From f151ffee86d172dba18e5fa507bd28440fa45bb0 Mon Sep 17 00:00:00 2001 From: aperez Date: Mon, 17 Nov 2025 17:18:47 +0300 Subject: [PATCH] Updated current version of v1 and v2 dags, bin/ytops_client, ansible individual services --- .gitignore | 1 + VERSION | 2 +- airflow/Dockerfile | 102 +- airflow/Dockerfile.old | 125 + airflow/camoufox/Dockerfile | 3 + airflow/config/custom_task_hooks.py | 11 +- airflow/configs/docker-compose-dl.yaml.j2 | 65 +- airflow/configs/docker-compose-dl.yaml.v1.j2 | 151 + airflow/configs/docker-compose-master.yaml.j2 | 20 +- .../configs/docker-compose-ytdlp-ops.yaml.j2 | 71 +- airflow/dags/scripts/regression.py | 636 ++++ airflow/dags/ytdlp_mgmt_proxy_account.py | 157 +- airflow/dags/ytdlp_mgmt_queues.py | 103 +- airflow/dags/ytdlp_ops_account_maintenance.py | 116 +- ...patcher.py => ytdlp_ops_v01_dispatcher.py} | 10 +- airflow/dags/ytdlp_ops_v01_orchestrator.py | 444 +++ airflow/dags/ytdlp_ops_v01_worker_per_url.py | 1794 +++++++++++ airflow/dags/ytdlp_ops_v02_dispatcher_auth.py | 98 + airflow/dags/ytdlp_ops_v02_dispatcher_dl.py | 89 + ....py => ytdlp_ops_v02_orchestrator_auth.py} | 120 +- airflow/dags/ytdlp_ops_v02_orchestrator_dl.py | 302 ++ ...y => ytdlp_ops_v02_worker_per_url_auth.py} | 619 +++- .../dags/ytdlp_ops_v02_worker_per_url_dl.py | 895 ++++++ ansible/MIGRATION.md | 9 + ansible/README-yt.md | 120 + ansible/group_vars/all/vault.yml | 2 + ansible/playbook-dags.yml | 1 + ansible/playbook-full.yml | 47 + ansible/playbook-hook.yml | 2 +- ansible/playbook-master.yml | 56 + ansible/playbook-sync-local.yml | 108 + ansible/playbook-update-regression-script.yml | 27 + ansible/playbook-worker.yml | 92 +- ansible/playbook-ytdlp-master-only.yml | 22 + ansible/playbooks/playbook-bgutils-start.yml | 19 + ansible/playbooks/playbook-bgutils-stop.yml | 19 + ansible/playbooks/restart_worker.yml | 53 + ansible/roles/ytdlp-worker/defaults/main.yml | 3 + ansible/roles/ytdlp-worker/tasks/main.yml | 52 + ansible/templates/.env.j2 | 8 + bin/ytops-client | 10 + cli.config | 35 + get_info_json_client.py | 150 - package_client.py | 117 + pangramia/__init__.py | 0 pangramia/__pycache__/__init__.cpython-39.pyc | Bin 0 -> 148 bytes pangramia/base_service/BaseService-remote | 131 + pangramia/base_service/BaseService.py | 564 ++++ pangramia/base_service/__init__.py | 1 + pangramia/base_service/constants.py | 14 + pangramia/base_service/ttypes.py | 20 + pangramia/yt/__init__.py | 0 pangramia/yt/common/__init__.py | 1 + pangramia/yt/common/constants.py | 14 + pangramia/yt/common/ttypes.py | 1403 ++++++++ pangramia/yt/exceptions/__init__.py | 1 + .../__pycache__/__init__.cpython-39.pyc | Bin 0 -> 195 bytes .../__pycache__/ttypes.cpython-39.pyc | Bin 0 -> 7599 bytes pangramia/yt/exceptions/constants.py | 14 + pangramia/yt/exceptions/ttypes.py | 254 ++ .../yt/management/YTManagementService-remote | 215 ++ .../yt/management/YTManagementService.py | 2816 +++++++++++++++++ pangramia/yt/management/__init__.py | 1 + pangramia/yt/management/constants.py | 14 + pangramia/yt/management/ttypes.py | 21 + .../yt/tokens_ops/YTTokenOpService-remote | 257 ++ pangramia/yt/tokens_ops/YTTokenOpService.py | 1719 ++++++++++ pangramia/yt/tokens_ops/__init__.py | 1 + pangramia/yt/tokens_ops/constants.py | 14 + pangramia/yt/tokens_ops/ttypes.py | 21 + playbooks/playbook-bgutils-start.yml | 0 playbooks/playbook-bgutils-stop.yml | 0 policies/1_fetch_only_policies.yaml | 155 + policies/2_download_only_policies.yaml | 58 + policies/3_full_stack_policies.yaml | 158 + policies/README.md | 28 + setup.py | 1 - thrift_model/.gitignore | 1 + thrift_model/data/common.thrift | 145 + thrift_model/data/exceptions.thrift | 14 + .../gen_py/pangramia/yt/common/ttypes.py | 213 +- .../yt/tokens_ops/YTTokenOpService-remote | 23 +- .../yt/tokens_ops/YTTokenOpService.py | 338 +- thrift_model/pom.xml | 2 +- thrift_model/services/base_service.thrift | 19 + thrift_model/services/yt_admin_ops.thrift | 63 + thrift_model/services/yt_management.thrift | 27 + thrift_model/services/yt_tokens_ops.thrift | 50 + tools/generate-inventory.py | 6 +- tools/{sync-to-tower.sh => sync-to-jump.sh} | 18 +- .../__pycache__/__init__.cpython-39.pyc | Bin 0 -> 224 bytes .../__pycache__/client_utils.cpython-39.pyc | Bin 0 -> 1402 bytes .../__pycache__/version.cpython-39.pyc | Bin 0 -> 512 bytes 93 files changed, 15201 insertions(+), 500 deletions(-) create mode 100644 airflow/Dockerfile.old create mode 100644 airflow/configs/docker-compose-dl.yaml.v1.j2 create mode 100644 airflow/dags/scripts/regression.py rename airflow/dags/{ytdlp_ops_dispatcher.py => ytdlp_ops_v01_dispatcher.py} (91%) create mode 100644 airflow/dags/ytdlp_ops_v01_orchestrator.py create mode 100644 airflow/dags/ytdlp_ops_v01_worker_per_url.py create mode 100644 airflow/dags/ytdlp_ops_v02_dispatcher_auth.py create mode 100644 airflow/dags/ytdlp_ops_v02_dispatcher_dl.py rename airflow/dags/{ytdlp_ops_orchestrator.py => ytdlp_ops_v02_orchestrator_auth.py} (72%) create mode 100644 airflow/dags/ytdlp_ops_v02_orchestrator_dl.py rename airflow/dags/{ytdlp_ops_worker_per_url.py => ytdlp_ops_v02_worker_per_url_auth.py} (63%) create mode 100644 airflow/dags/ytdlp_ops_v02_worker_per_url_dl.py create mode 100644 ansible/MIGRATION.md create mode 100644 ansible/README-yt.md create mode 100644 ansible/playbook-sync-local.yml create mode 100644 ansible/playbook-update-regression-script.yml create mode 100644 ansible/playbook-ytdlp-master-only.yml create mode 100644 ansible/playbooks/playbook-bgutils-start.yml create mode 100644 ansible/playbooks/playbook-bgutils-stop.yml create mode 100644 ansible/playbooks/restart_worker.yml create mode 100644 ansible/roles/ytdlp-worker/defaults/main.yml create mode 100755 bin/ytops-client create mode 100644 cli.config delete mode 100644 get_info_json_client.py create mode 100755 package_client.py create mode 100644 pangramia/__init__.py create mode 100644 pangramia/__pycache__/__init__.cpython-39.pyc create mode 100755 pangramia/base_service/BaseService-remote create mode 100644 pangramia/base_service/BaseService.py create mode 100644 pangramia/base_service/__init__.py create mode 100644 pangramia/base_service/constants.py create mode 100644 pangramia/base_service/ttypes.py create mode 100644 pangramia/yt/__init__.py create mode 100644 pangramia/yt/common/__init__.py create mode 100644 pangramia/yt/common/constants.py create mode 100644 pangramia/yt/common/ttypes.py create mode 100644 pangramia/yt/exceptions/__init__.py create mode 100644 pangramia/yt/exceptions/__pycache__/__init__.cpython-39.pyc create mode 100644 pangramia/yt/exceptions/__pycache__/ttypes.cpython-39.pyc create mode 100644 pangramia/yt/exceptions/constants.py create mode 100644 pangramia/yt/exceptions/ttypes.py create mode 100755 pangramia/yt/management/YTManagementService-remote create mode 100644 pangramia/yt/management/YTManagementService.py create mode 100644 pangramia/yt/management/__init__.py create mode 100644 pangramia/yt/management/constants.py create mode 100644 pangramia/yt/management/ttypes.py create mode 100755 pangramia/yt/tokens_ops/YTTokenOpService-remote create mode 100644 pangramia/yt/tokens_ops/YTTokenOpService.py create mode 100644 pangramia/yt/tokens_ops/__init__.py create mode 100644 pangramia/yt/tokens_ops/constants.py create mode 100644 pangramia/yt/tokens_ops/ttypes.py create mode 100644 playbooks/playbook-bgutils-start.yml create mode 100644 playbooks/playbook-bgutils-stop.yml create mode 100644 policies/1_fetch_only_policies.yaml create mode 100644 policies/2_download_only_policies.yaml create mode 100644 policies/3_full_stack_policies.yaml create mode 100644 policies/README.md create mode 100644 thrift_model/data/common.thrift create mode 100644 thrift_model/data/exceptions.thrift create mode 100644 thrift_model/services/base_service.thrift create mode 100644 thrift_model/services/yt_admin_ops.thrift create mode 100644 thrift_model/services/yt_management.thrift create mode 100644 thrift_model/services/yt_tokens_ops.thrift rename tools/{sync-to-tower.sh => sync-to-jump.sh} (76%) create mode 100644 yt_ops_services/__pycache__/__init__.cpython-39.pyc create mode 100644 yt_ops_services/__pycache__/client_utils.cpython-39.pyc create mode 100644 yt_ops_services/__pycache__/version.cpython-39.pyc diff --git a/.gitignore b/.gitignore index b0ac3ed..4d50ae9 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ +**/__pycache__/* .aider* diff --git a/VERSION b/VERSION index 406729f..12f67a4 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.10.1-exp +3.11.3-exp diff --git a/airflow/Dockerfile b/airflow/Dockerfile index 5242560..3d34a26 100644 --- a/airflow/Dockerfile +++ b/airflow/Dockerfile @@ -18,54 +18,95 @@ RUN apt-get update && \ iputils-ping \ curl \ traceroute \ - tcpdump && \ + tcpdump \ + unzip \ + git && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* /usr/share/man /usr/share/doc /usr/share/doc-base +# Ensure the airflow user and group exist with the correct UID/GID and permissions. +# This is done early to allow `COPY --chown` to work correctly. +RUN if ! getent group airflow > /dev/null 2>&1; then \ + groupadd -g 50000 airflow; \ + fi && \ + if ! id -u airflow > /dev/null 2>&1; then \ + useradd -u 50000 -g 50000 -m -s /bin/bash airflow; \ + else \ + usermod -g 50000 airflow; \ + fi && \ + chown -R airflow:airflow /app && \ + chmod -R g+w /app + # Download and install mc (MinIO client) RUN wget https://dl.min.io/client/mc/release/linux-amd64/mc -O /usr/local/bin/mc && \ chmod +x /usr/local/bin/mc -# Download and install custom FFmpeg build from yt-dlp's recommended source +# Install FFmpeg RUN FFMPEG_URL="https://github.com/yt-dlp/FFmpeg-Builds/releases/download/latest/ffmpeg-master-latest-linux64-gpl.tar.xz" && \ - echo "Downloading FFmpeg from $FFMPEG_URL" && \ wget -qO /tmp/ffmpeg.tar.xz "$FFMPEG_URL" && \ mkdir -p /opt/ffmpeg && \ tar -xf /tmp/ffmpeg.tar.xz -C /opt/ffmpeg --strip-components=1 && \ ln -sf /opt/ffmpeg/bin/ffmpeg /usr/local/bin/ffmpeg && \ ln -sf /opt/ffmpeg/bin/ffprobe /usr/local/bin/ffprobe && \ - rm -rf /tmp/ffmpeg.tar.xz && \ - ffmpeg -version + rm -rf /tmp/ffmpeg.tar.xz -# Check if airflow group exists, create it if it doesn't, then ensure proper setup -RUN if ! getent group airflow > /dev/null 2>&1; then \ - groupadd -g 1001 airflow; \ - fi && \ - # Check if airflow user exists and is in the airflow group - if id -u airflow > /dev/null 2>&1; then \ - usermod -a -G airflow airflow; \ - else \ - useradd -u 1003 -g 1001 -m -s /bin/bash airflow; \ - fi && \ - chown -R airflow:airflow /app && \ - chmod g+w /app +# Install yt-dlp from master +# Temporarily rename pip to bypass the root check in the base image's pip wrapper, +# ensuring a system-wide installation. +RUN mv /usr/local/bin/pip /usr/local/bin/pip.orig && \ + python3 -m pip install --no-cache-dir -U pip hatchling wheel && \ + python3 -m pip install --no-cache-dir --force-reinstall "yt-dlp[default] @ https://github.com/yt-dlp/yt-dlp/archive/master.tar.gz" && \ + chmod a+x "$(which yt-dlp)" && \ + mv /usr/local/bin/pip.orig /usr/local/bin/pip -# Switch to airflow user for package installation -USER airflow +# Install Deno +RUN curl -fsSL https://github.com/denoland/deno/releases/latest/download/deno-x86_64-unknown-linux-gnu.zip -o deno.zip && \ + unzip deno.zip && mv deno /usr/local/bin/ && rm deno.zip -# Install base Airflow dependencies +# Install aria2c and gost +RUN curl -fsSL https://raw.githubusercontent.com/P3TERX/aria2-builder/master/aria2-install.sh | bash + +# Install gost (direct download of binary) +RUN wget -q https://github.com/ginuerzh/gost/releases/download/v2.12.0/gost_2.12.0_linux_amd64.tar.gz && \ + tar -xzf gost_2.12.0_linux_amd64.tar.gz -C /usr/local/bin/ && \ + rm gost_2.12.0_linux_amd64.tar.gz + +# Verify installations +RUN ffmpeg -version && deno --version && yt-dlp --version && aria2c --version && gost -V + +# Create version information files +RUN ( \ + echo "--- yt-dlp ---" && \ + yt-dlp --version && \ + echo "" && \ + echo "--- deno ---" && \ + deno --version && \ + echo "" && \ + echo "--- ffmpeg ---" && \ + ffmpeg -version | head -n 1 \ +) > VERSION-airflow-latest.txt && \ +cp VERSION-airflow-latest.txt VERSION-airflow-$(date +%Y%m%d-%H%M%S).txt + + +# Install base Airflow dependencies as root (system-wide) # [FIX] Explicitly install a version of botocore compatible with Python 3.12 # to fix a RecursionError when handling S3 remote logs. -RUN pip install --no-cache-dir \ +# Temporarily rename pip to bypass the root check in the base image's pip wrapper. +RUN mv /usr/local/bin/pip /usr/local/bin/pip.orig && \ + python3 -m pip install --no-cache-dir \ "apache-airflow==${AIRFLOW_VERSION}" \ apache-airflow-providers-docker \ apache-airflow-providers-http \ apache-airflow-providers-amazon \ + "apache-airflow-providers-celery>=3.3.0" \ + apache-airflow-providers-redis \ "botocore>=1.34.118" \ psycopg2-binary \ "gunicorn==20.1.0" \ "python-ffmpeg==2.0.12" \ - "ffprobe3" + "ffprobe3" \ + "python-dotenv" && \ + mv /usr/local/bin/pip.orig /usr/local/bin/pip # --- Install the custom yt_ops_services package --- # Copy all the necessary source code for the package. @@ -78,17 +119,24 @@ COPY --chown=airflow:airflow pangramia ./pangramia/ # Install the package in editable mode. This runs setup.py and installs all dependencies # listed in `install_requires`, making the `yt_ops_services` module available everywhere. -RUN pip install --no-cache-dir -e . +# Bypass the pip root check again. +RUN mv /usr/local/bin/pip /usr/local/bin/pip.orig && \ + python3 -m pip install --no-cache-dir -e . && \ + mv /usr/local/bin/pip.orig /usr/local/bin/pip # Copy token generator scripts and utils with correct permissions # COPY --chown=airflow:airflow generate_tokens_direct.mjs ./ # COPY --chown=airflow:airflow utils ./utils/ # COPY --chown=airflow:airflow token_generator ./token_generator/ -# --- Always update yt-dlp to latest nightly on container start --- -# This is done in the entrypoint so every worker run uses the freshest build -COPY --chown=airflow:airflow update-yt-dlp.sh /usr/local/bin/update-yt-dlp.sh -RUN chmod +x /usr/local/bin/update-yt-dlp.sh +# Ensure the home directory and all its contents are owned by the airflow user before switching to it. +# This fixes permission issues that can occur if previous RUN commands created files in /home/airflow as root. +# We also make it world-writable to accommodate running the container with a different user ID, which can +# happen in some environments (e.g., OpenShift or with docker-compose user overrides). +RUN chown -R airflow:airflow /home/airflow && chmod -R 777 /home/airflow + +# Switch to airflow user for all subsequent operations +USER airflow # Expose bgutil plugin to worker path ENV PYTHONPATH=/opt/bgutil-ytdlp-pot-provider/plugin:$PYTHONPATH diff --git a/airflow/Dockerfile.old b/airflow/Dockerfile.old new file mode 100644 index 0000000..5b56f60 --- /dev/null +++ b/airflow/Dockerfile.old @@ -0,0 +1,125 @@ + + + +FROM apache/airflow:2.10.3 +ENV AIRFLOW_VERSION=2.10.3 + +WORKDIR /app + +# Install system dependencies +USER root +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + vim \ + mc \ + jq \ + build-essential \ + python3-dev \ + wget \ + tar \ + xz-utils \ + iputils-ping \ + curl \ + traceroute \ + tcpdump \ + unzip \ + git && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* /usr/share/man /usr/share/doc /usr/share/doc-base + +# Download and install mc (MinIO client) +RUN wget https://dl.min.io/client/mc/release/linux-amd64/mc -O /usr/local/bin/mc && \ + chmod +x /usr/local/bin/mc + +# Install FFmpeg +RUN FFMPEG_URL="https://github.com/yt-dlp/FFmpeg-Builds/releases/download/latest/ffmpeg-master-latest-linux64-gpl.tar.xz" && \ + wget -qO /tmp/ffmpeg.tar.xz "$FFMPEG_URL" && \ + mkdir -p /opt/ffmpeg && \ + tar -xf /tmp/ffmpeg.tar.xz -C /opt/ffmpeg --strip-components=1 && \ + ln -sf /opt/ffmpeg/bin/ffmpeg /usr/local/bin/ffmpeg && \ + ln -sf /opt/ffmpeg/bin/ffprobe /usr/local/bin/ffprobe && \ + rm -rf /tmp/ffmpeg.tar.xz + +# Install yt-dlp from master +RUN python3 -m pip install -U pip hatchling wheel && \ + python3 -m pip install --force-reinstall "yt-dlp[default] @ https://github.com/yt-dlp/yt-dlp/archive/master.tar.gz" + +# Install Deno +RUN curl -fsSL https://github.com/denoland/deno/releases/latest/download/deno-x86_64-unknown-linux-gnu.zip -o deno.zip && \ + unzip deno.zip && mv deno /usr/local/bin/ && rm deno.zip + +# Install aria2c and gost +RUN curl -fsSL https://raw.githubusercontent.com/P3TERX/aria2-builder/master/aria2-install.sh | bash + +# Install gost (direct download of binary) +RUN wget -q https://github.com/ginuerzh/gost/releases/download/v2.12.0/gost_2.12.0_linux_amd64.tar.gz && \ + tar -xzf gost_2.12.0_linux_amd64.tar.gz -C /usr/local/bin/ && \ + rm gost_2.12.0_linux_amd64.tar.gz + +# Verify installations +RUN ffmpeg -version && deno --version && yt-dlp --version && aria2c --version && gost -V + +# Check if airflow group exists, create it if it doesn't, then ensure proper setup +RUN if ! getent group airflow > /dev/null 2>&1; then \ + groupadd -g 1001 airflow; \ + fi && \ + # Check if airflow user exists and is in the airflow group + if id -u airflow > /dev/null 2>&1; then \ + usermod -a -G airflow airflow; \ + else \ + useradd -u 1003 -g 1001 -m -s /bin/bash airflow; \ + fi && \ + chown -R airflow:airflow /app && \ + chmod g+w /app + +# Install base Airflow dependencies +# [FIX] Explicitly install a version of botocore compatible with Python 3.12 +# to fix a RecursionError when handling S3 remote logs. +RUN pip install --no-cache-dir \ + "apache-airflow==${AIRFLOW_VERSION}" \ + apache-airflow-providers-docker \ + apache-airflow-providers-http \ + apache-airflow-providers-amazon \ + "botocore>=1.34.118" \ + psycopg2-binary \ + "gunicorn==20.1.0" \ + "python-ffmpeg==2.0.12" \ + "ffprobe3" \ + "python-dotenv" + +# Switch to airflow user for package installation +USER airflow + +# --- Install the custom yt_ops_services package --- +# Copy all the necessary source code for the package. +# The deploy script ensures these files are in the build context. +COPY --chown=airflow:airflow setup.py ./ +COPY --chown=airflow:airflow VERSION ./ +COPY --chown=airflow:airflow yt_ops_services ./yt_ops_services/ +COPY --chown=airflow:airflow thrift_model ./thrift_model/ +COPY --chown=airflow:airflow pangramia ./pangramia/ + +# Install the package in editable mode. This runs setup.py and installs all dependencies +# listed in `install_requires`, making the `yt_ops_services` module available everywhere. +RUN pip install --no-cache-dir -e . + +# Copy token generator scripts and utils with correct permissions +# COPY --chown=airflow:airflow generate_tokens_direct.mjs ./ +# COPY --chown=airflow:airflow utils ./utils/ +# COPY --chown=airflow:airflow token_generator ./token_generator/ + +# Create version information files +RUN ( \ + echo "--- yt-dlp ---" && \ + yt-dlp --version && \ + echo "" && \ + echo "--- deno ---" && \ + deno --version && \ + echo "" && \ + echo "--- ffmpeg ---" && \ + ffmpeg -version | head -n 1 \ +) > VERSION-airflow-latest.txt && \ +cp VERSION-airflow-latest.txt VERSION-airflow-$(date +%Y%m%d-%H%M%S).txt + +# Expose bgutil plugin to worker path +ENV PYTHONPATH=/opt/bgutil-ytdlp-pot-provider/plugin:$PYTHONPATH diff --git a/airflow/camoufox/Dockerfile b/airflow/camoufox/Dockerfile index 207faca..fef9cd1 100644 --- a/airflow/camoufox/Dockerfile +++ b/airflow/camoufox/Dockerfile @@ -62,6 +62,9 @@ RUN conda run -n camo pip install --no-cache-dir -r requirements.txt # Install Playwright browsers for version 1.49 RUN conda run -n camo playwright install --with-deps +# Pre-download and cache Camoufox to speed up startup +RUN conda run -n camo camoufox fetch + # Copy the server script into the image COPY camoufox_server.py . diff --git a/airflow/config/custom_task_hooks.py b/airflow/config/custom_task_hooks.py index 47c2a97..a2e2dd6 100644 --- a/airflow/config/custom_task_hooks.py +++ b/airflow/config/custom_task_hooks.py @@ -14,7 +14,8 @@ def task_instance_mutation_hook(ti): to be set by the dispatcher DAG. This avoids database race conditions. """ logger.debug(f"MUTATION HOOK: Running for dag '{ti.dag_id}', task '{ti.task_id}'.") - if ti.dag_id == 'ytdlp_ops_worker_per_url': + # This hook targets all worker DAGs, which follow a naming convention. + if 'worker_per_url' in ti.dag_id: # If the run_id isn't populated yet, just return. The hook may be called again. if not ti.run_id: logger.debug(f"MUTATION HOOK: run_id not yet available for task '{ti.task_id}'. Skipping this invocation.") @@ -26,7 +27,8 @@ def task_instance_mutation_hook(ti): if ti.run_id and '_q_' in ti.run_id: try: parsed_queue = ti.run_id.split('_q_')[-1] - if parsed_queue.startswith('queue-dl-'): + # Check for valid v1 (dl) or v2 (auth/dl) queue prefixes. + if parsed_queue.startswith(('queue-dl-', 'queue-auth-')): worker_queue = parsed_queue except Exception as e: logger.error(f"MUTATION HOOK: CRITICAL: Error parsing queue from run_id '{ti.run_id}': {e}.", exc_info=True) @@ -37,8 +39,9 @@ def task_instance_mutation_hook(ti): else: # If the queue is not found, it's a critical failure in the dispatching logic. # We fall back to the default queue but log it as a high-severity warning. - logger.warning(f"MUTATION HOOK: Could not find worker queue in run_id '{ti.run_id}'. Falling back to 'queue-dl'. Pinning will fail.") - ti.queue = 'queue-dl' + fallback_queue = 'queue-auth' if 'auth' in ti.dag_id else 'queue-dl' + logger.warning(f"MUTATION HOOK: Could not find worker queue in run_id '{ti.run_id}'. Falling back to '{fallback_queue}'. Pinning will fail.") + ti.queue = fallback_queue # --- Hook Registration --- diff --git a/airflow/configs/docker-compose-dl.yaml.j2 b/airflow/configs/docker-compose-dl.yaml.j2 index 75e714a..2adfcb5 100644 --- a/airflow/configs/docker-compose-dl.yaml.j2 +++ b/airflow/configs/docker-compose-dl.yaml.j2 @@ -14,7 +14,6 @@ x-airflow-common: # If you built a custom image for master, you need to push it to a registry # and reference it here. image: ${AIRFLOW_IMAGE_NAME:-pangramia/ytdlp-ops-airflow:latest} - build: . # Add extra hosts here to allow workers to resolve other hosts by name. # This section is auto-generated by Ansible from the inventory. extra_hosts: @@ -30,7 +29,7 @@ x-airflow-common: AIRFLOW__CORE__PARALLELISM: 128 AIRFLOW__CORE__MAX_ACTIVE_TASKS_PER_DAG: 64 - AIRFLOW__SCHEDULER__PARSING_PROCESSES: 4 + AIRFLOW__SCHEDULER__PARSING_PROCESSES: 8 AIRFLOW__WEBSERVER__WORKERS: 5 AIRFLOW__WEBSERVER__WORKER_CLASS: "gevent" @@ -75,21 +74,21 @@ x-airflow-common: - ${AIRFLOW_PROJ_DIR:-.}/downloadfiles:/opt/airflow/downloadfiles - ${AIRFLOW_PROJ_DIR:-.}/addfiles:/opt/airflow/addfiles - ${AIRFLOW_PROJ_DIR:-.}/inputfiles:/opt/airflow/inputfiles + # Mount the generated pangramia package to ensure workers have the latest version + - ${AIRFLOW_PROJ_DIR:-.}/pangramia:/app/pangramia # Use AIRFLOW_UID from .env file to fix permission issues. GID is set to 0 for compatibility with the Airflow image. user: "${{ '{' }}AIRFLOW_UID:-50000{{ '}' }}:0" services: - airflow-worker: + airflow-worker-dl: <<: *airflow-common - container_name: airflow-dl-worker-1 + container_name: airflow-worker-dl-1 hostname: ${HOSTNAME:-dl001} - # The worker now listens on the generic queue AND its own dedicated queue. - # The hostname is dynamically inserted into the queue name. + # The DL worker listens on the generic dl queue AND its own dedicated queue. command: airflow celery worker -q queue-dl,queue-dl-${HOSTNAME:-dl001} deploy: resources: limits: - # Increased from 4G to 8G to support higher memory per child process. memory: ${AIRFLOW_WORKER_DOWNLOAD_MEM_LIMIT:-8G} reservations: memory: ${AIRFLOW_WORKER_DOWNLOAD_MEM_RESERV:-2G} @@ -103,26 +102,18 @@ services: start_period: 30s environment: <<: *airflow-common-env - HOSTNAME: ${HOSTNAME:-dl001} # Explicitly set inside container + HOSTNAME: ${HOSTNAME:-dl001} DUMB_INIT_SETSID: "0" AIRFLOW__CELERY__WORKER_QUEUES: "queue-dl,queue-dl-${HOSTNAME:-dl001}" AIRFLOW__CELERY__WORKER_TAGS: "dl" AIRFLOW__CELERY__WORKER_PREFETCH_MULTIPLIER: "1" - # Use autoscaling to adjust number of workers based on load. - # Format is max_concurrency,min_concurrency. - AIRFLOW__CELERY__WORKER_AUTOSCALE: "16,4" - # Use prefork pool for better compatibility with blocking libraries. + AIRFLOW__CELERY__WORKER_AUTOSCALE: "16,8" AIRFLOW__CELERY__POOL: "prefork" AIRFLOW__CELERY__TASK_ACKS_LATE: "False" AIRFLOW__CELERY__OPERATION_TIMEOUT: "2.0" AIRFLOW__CELERY__WORKER_NAME: "worker-dl@%h" AIRFLOW__CELERY__WORKER_MAX_TASKS_PER_CHILD: "100" - # Increased from 256MB to 512MB for memory-intensive yt-dlp tasks. - # This value is in KB. 512 * 1024 = 524288. AIRFLOW__CELERY__WORKER_MAX_MEMORY_PER_CHILD: "524288" # 512MB - # The hostname is now managed by Docker Compose to ensure uniqueness when scaling. - # It will be generated based on project, service, and replica number (e.g., airflow-airflow-dl-worker-1). - # hostname: "dl-worker-${HOSTNAME_SUFFIX:-$$(hostname)}" ports: - "8793:8793" networks: @@ -130,6 +121,46 @@ services: - proxynet restart: always + airflow-worker-auth: + <<: *airflow-common + container_name: airflow-worker-auth-1 + hostname: ${HOSTNAME:-auth001} + # The Auth worker listens on the generic auth queue AND its own dedicated queue. + command: airflow celery worker -q queue-auth,queue-auth-${HOSTNAME:-auth001} + deploy: + resources: + limits: + memory: ${AIRFLOW_WORKER_AUTH_MEM_LIMIT:-4G} + reservations: + memory: ${AIRFLOW_WORKER_AUTH_MEM_RESERV:-1G} + healthcheck: + test: + - "CMD-SHELL" + - 'celery --app airflow.providers.celery.executors.celery_executor.app inspect ping -d "worker-auth@$$(hostname)"' + interval: 30s + timeout: 30s + retries: 5 + start_period: 30s + environment: + <<: *airflow-common-env + HOSTNAME: ${HOSTNAME:-auth001} + DUMB_INIT_SETSID: "0" + AIRFLOW__CELERY__WORKER_QUEUES: "queue-auth,queue-auth-${HOSTNAME:-auth001}" + AIRFLOW__CELERY__WORKER_TAGS: "auth" + AIRFLOW__CELERY__WORKER_PREFETCH_MULTIPLIER: "1" + # Auth tasks are less resource intensive but we want fewer of them to avoid service overload. + AIRFLOW__CELERY__WORKER_AUTOSCALE: "2,1" + AIRFLOW__CELERY__POOL: "prefork" + AIRFLOW__CELERY__TASK_ACKS_LATE: "False" + AIRFLOW__CELERY__OPERATION_TIMEOUT: "2.0" + AIRFLOW__CELERY__WORKER_NAME: "worker-auth@%h" + AIRFLOW__CELERY__WORKER_MAX_TASKS_PER_CHILD: "100" + AIRFLOW__CELERY__WORKER_MAX_MEMORY_PER_CHILD: "262144" # 256MB + networks: + - default + - proxynet + restart: always + docker-socket-proxy: profiles: - disabled diff --git a/airflow/configs/docker-compose-dl.yaml.v1.j2 b/airflow/configs/docker-compose-dl.yaml.v1.j2 new file mode 100644 index 0000000..b073039 --- /dev/null +++ b/airflow/configs/docker-compose-dl.yaml.v1.j2 @@ -0,0 +1,151 @@ +# Airflow remote DL worker configuration. +# This file should be used on a remote machine to run a download worker. +# It requires a master Airflow instance running with services exposed. +# +# Before running, create a .env file in this directory with: +# MASTER_HOST_IP=... a.b.c.d ... # IP address of the machine running docker-compose-master.yaml +# POSTGRES_PASSWORD=... # The password for the PostgreSQL database from the master compose file +# REDIS_PASSWORD=... # The password for Redis from the master compose file +# AIRFLOW_UID=... # User ID for file permissions, should match master +--- +x-airflow-common: + &airflow-common + # This should point to the same image used by the master. + # If you built a custom image for master, you need to push it to a registry + # and reference it here. + image: ${AIRFLOW_IMAGE_NAME:-pangramia/ytdlp-ops-airflow:latest} + # Add extra hosts here to allow workers to resolve other hosts by name. + # This section is auto-generated by Ansible from the inventory. + extra_hosts: +{% for host in groups['all'] %} + - "{{ hostvars[host]['inventory_hostname'] }}:{{ hostvars[host]['ansible_host'] | default(hostvars[host]['inventory_hostname']) }}" +{% endfor %} + env_file: + # The .env file is located in the project root (e.g., /srv/airflow_dl_worker), + # so we provide an absolute path to it. + - "{{ airflow_worker_dir }}/.env" + environment: + &airflow-common-env + + AIRFLOW__CORE__PARALLELISM: 128 + AIRFLOW__CORE__MAX_ACTIVE_TASKS_PER_DAG: 64 + AIRFLOW__SCHEDULER__PARSING_PROCESSES: 8 + AIRFLOW__WEBSERVER__WORKERS: 5 + AIRFLOW__WEBSERVER__WORKER_CLASS: "gevent" + + AIRFLOW__LOGGING__SECRET_MASK_EXCEPTION_ARGS: False + + + # Prevent slow webserver when low memory? + GUNICORN_CMD_ARGS: --max-requests 20 --max-requests-jitter 3 --worker-tmp-dir /dev/shm + + + # Airflow Core + AIRFLOW__CORE__EXECUTOR: CeleryExecutor + AIRFLOW__CORE__LOAD_EXAMPLES: 'false' + AIRFLOW__CORE__FERNET_KEY: '' # Should be same as master, but worker does not need it. + + # Backend connections - These should point to the master node + # Set MASTER_HOST_IP, POSTGRES_PASSWORD, and REDIS_PASSWORD in your .env file + AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:${{ '{' }}POSTGRES_PASSWORD{{ '}' }}@${{ '{' }}MASTER_HOST_IP{{ '}' }}:{{ postgres_port }}/airflow + AIRFLOW__CELERY__RESULT_BACKEND: db+postgresql+psycopg2://airflow:${{ '{' }}POSTGRES_PASSWORD{{ '}' }}@${{ '{' }}MASTER_HOST_IP{{ '}' }}:{{ postgres_port }}/airflow + AIRFLOW__CELERY__BROKER_URL: redis://:${REDIS_PASSWORD}@${MASTER_HOST_IP}:{{ redis_port }}/0 + + # Remote Logging - connection is configured directly via environment variables + #_PIP_ADDITIONAL_REQUIREMENTS: ${{ '{' }}_PIP_ADDITIONAL_REQUIREMENTS:- apache-airflow-providers-docker apache-airflow-providers-http thrift>=0.16.0,<=0.20.0 backoff>=2.2.1 python-dotenv==1.0.1 psutil>=5.9.0 apache-airflow-providers-amazon{{ '}' }} + AIRFLOW__LOGGING__REMOTE_LOGGING: "True" + AIRFLOW__LOGGING__REMOTE_BASE_LOG_FOLDER: "s3://airflow-logs" + AIRFLOW__LOGGING__REMOTE_LOG_CONN_ID: minio_default + AIRFLOW__LOGGING__ENCRYPT_S3_LOGS: "False" + #AIRFLOW__LOGGING__LOG_ID_TEMPLATE: "{dag_id}-{task_id}-{run_id}-{try_number}" + AIRFLOW__WEBSERVER__SECRET_KEY: 'qmALu5JCAW0518WGAqkVZQ==' + AIRFLOW__CORE__INTERNAL_API_SECRET_KEY: 'qmALu5JCAW0518WGAqkVZQ==' + AIRFLOW__CORE__LOCAL_SETTINGS_PATH: "/opt/airflow/config/custom_task_hooks.py" + + volumes: + # Mount dags to get any utility scripts, but the worker will pull the DAG from the DB + - ${AIRFLOW_PROJ_DIR:-.}/dags:/opt/airflow/dags + # Mount logs locally in case remote logging fails + - ${AIRFLOW_PROJ_DIR:-.}/logs:/opt/airflow/logs + # Mount config for local settings and other configurations + - ${AIRFLOW_PROJ_DIR:-.}/config:/opt/airflow/config + - ${AIRFLOW_PROJ_DIR:-.}/config/airflow.cfg:/opt/airflow/airflow.cfg + # Mount download directories + - ${AIRFLOW_PROJ_DIR:-.}/downloadfiles:/opt/airflow/downloadfiles + - ${AIRFLOW_PROJ_DIR:-.}/addfiles:/opt/airflow/addfiles + - ${AIRFLOW_PROJ_DIR:-.}/inputfiles:/opt/airflow/inputfiles + # Mount the generated pangramia package to ensure workers have the latest version + - ${AIRFLOW_PROJ_DIR:-.}/pangramia:/app/pangramia + # Use AIRFLOW_UID from .env file to fix permission issues. GID is set to 0 for compatibility with the Airflow image. + user: "${{ '{' }}AIRFLOW_UID:-50000{{ '}' }}:0" + +services: + airflow-worker: + <<: *airflow-common + container_name: airflow-dl-worker-1 + hostname: ${HOSTNAME:-dl001} + # The worker now listens on the generic queue AND its own dedicated queue. + # The hostname is dynamically inserted into the queue name. + command: airflow celery worker -q queue-dl,queue-dl-${HOSTNAME:-dl001} + deploy: + resources: + limits: + # Increased from 4G to 8G to support higher memory per child process. + memory: ${AIRFLOW_WORKER_DOWNLOAD_MEM_LIMIT:-8G} + reservations: + memory: ${AIRFLOW_WORKER_DOWNLOAD_MEM_RESERV:-2G} + healthcheck: + test: + - "CMD-SHELL" + - 'celery --app airflow.providers.celery.executors.celery_executor.app inspect ping -d "worker-dl@$$(hostname)"' + interval: 30s + timeout: 30s + retries: 5 + start_period: 30s + environment: + <<: *airflow-common-env + HOSTNAME: ${HOSTNAME:-dl001} # Explicitly set inside container + DUMB_INIT_SETSID: "0" + AIRFLOW__CELERY__WORKER_QUEUES: "queue-dl,queue-dl-${HOSTNAME:-dl001}" + AIRFLOW__CELERY__WORKER_TAGS: "dl" + AIRFLOW__CELERY__WORKER_PREFETCH_MULTIPLIER: "1" + # Use autoscaling to adjust number of workers based on load. + # Format is max_concurrency,min_concurrency. + AIRFLOW__CELERY__WORKER_AUTOSCALE: "16,8" + # Use prefork pool for better compatibility with blocking libraries. + AIRFLOW__CELERY__POOL: "prefork" + AIRFLOW__CELERY__TASK_ACKS_LATE: "False" + AIRFLOW__CELERY__OPERATION_TIMEOUT: "2.0" + AIRFLOW__CELERY__WORKER_NAME: "worker-dl@%h" + AIRFLOW__CELERY__WORKER_MAX_TASKS_PER_CHILD: "100" + # Increased from 256MB to 512MB for memory-intensive yt-dlp tasks. + # This value is in KB. 512 * 1024 = 524288. + AIRFLOW__CELERY__WORKER_MAX_MEMORY_PER_CHILD: "524288" # 512MB + # The hostname is now managed by Docker Compose to ensure uniqueness when scaling. + # It will be generated based on project, service, and replica number (e.g., airflow-airflow-dl-worker-1). + # hostname: "dl-worker-${HOSTNAME_SUFFIX:-$$(hostname)}" + ports: + - "8793:8793" + networks: + - default + - proxynet + restart: always + + docker-socket-proxy: + profiles: + - disabled + image: tecnativa/docker-socket-proxy:0.1.1 + environment: + CONTAINERS: 1 + IMAGES: 1 + AUTH: 1 + POST: 1 + privileged: true + volumes: + - /var/run/docker.sock:/var/run/docker.sock:ro + restart: always + +networks: + proxynet: + name: airflow_proxynet + external: true diff --git a/airflow/configs/docker-compose-master.yaml.j2 b/airflow/configs/docker-compose-master.yaml.j2 index 22e7ec0..110d119 100644 --- a/airflow/configs/docker-compose-master.yaml.j2 +++ b/airflow/configs/docker-compose-master.yaml.j2 @@ -112,6 +112,8 @@ x-airflow-common: - ${{ '{' }}AIRFLOW_PROJ_DIR:-.{{ '}' }}/downloadfiles:/opt/airflow/downloadfiles - ${{ '{' }}AIRFLOW_PROJ_DIR:-.{{ '}' }}/addfiles:/opt/airflow/addfiles - ${{ '{' }}AIRFLOW_PROJ_DIR:-.{{ '}' }}/inputfiles:/opt/airflow/inputfiles + # Mount the generated pangramia package to ensure master services have the latest version + - ${{ '{' }}AIRFLOW_PROJ_DIR:-.{{ '}' }}/pangramia:/app/pangramia user: "${{ '{' }}AIRFLOW_UID:-50000{{ '}' }}:0" depends_on: &airflow-common-depends-on @@ -142,7 +144,7 @@ services: volumes: - ./postgres-data:/var/lib/postgresql/data ports: - - "{{ postgres_port }}:5432" + - "${{ '{' }}POSTGRES_PORT:-5432{{ '}' }}:5432" healthcheck: test: ["CMD", "pg_isready", "-U", "airflow"] interval: 10s @@ -179,7 +181,7 @@ services: expose: - 6379 ports: - - "{{ redis_port }}:6379" + - "${{ '{' }}REDIS_PORT:-6379{{ '}' }}:6379" healthcheck: test: ["CMD", "redis-cli", "-a", "${{ '{' }}REDIS_PASSWORD:-rOhTAIlTFFylXsjhqwxnYxDChFc{{ '}' }}", "ping"] interval: 10s @@ -405,6 +407,20 @@ services: airflow-init: condition: service_completed_successfully + airflow-regression-runner: + <<: *airflow-common + entrypoint: "" + container_name: airflow-regression-runner + command: ["tail", "-f", "/dev/null"] + hostname: ${{ '{' }}HOSTNAME{{ '}' }} + environment: + <<: *airflow-common-env + restart: always + depends_on: + <<: *airflow-common-depends-on + airflow-init: + condition: service_completed_successfully + airflow-init: <<: *airflow-common depends_on: diff --git a/airflow/configs/docker-compose-ytdlp-ops.yaml.j2 b/airflow/configs/docker-compose-ytdlp-ops.yaml.j2 index 936653e..171c97f 100644 --- a/airflow/configs/docker-compose-ytdlp-ops.yaml.j2 +++ b/airflow/configs/docker-compose-ytdlp-ops.yaml.j2 @@ -8,6 +8,34 @@ include: {% endif %} services: + bgutil-provider: + image: brainicism/bgutil-ytdlp-pot-provider + container_name: bgutil-provider + init: true + ports: + - "4416:4416" + restart: unless-stopped + networks: + - proxynet + + context-prepper: + image: busybox:latest + restart: "no" + volumes: + - ./context:/app/context + networks: + - proxynet + command: + - "/bin/sh" + - "-c" + - | + set -e + CONTEXT_BASE_DIR="/app/context" + TIMESTAMP_DIR="$${CONTEXT_BASE_DIR}/context-data_$$(date +%Y%m%d_%H%M%S)" + mkdir -p "$${TIMESTAMP_DIR}" + ln -sfn "$${TIMESTAMP_DIR}" "$${CONTEXT_BASE_DIR}/context-data" + echo "Context prepper finished. Data will be in: $${TIMESTAMP_DIR}" + envoy: image: envoyproxy/envoy:v1.29-latest {% if service_role != 'management' %} @@ -35,16 +63,30 @@ services: # container_name is omitted; Docker will use the service name for DNS. # This service depends on the camoufox-group service, which ensures all camoufox # instances are started before this service. -{% if service_role is defined and service_role != 'management' %} depends_on: - - camoufox-group + context-prepper: + condition: service_completed_successfully +{% if service_role is defined and service_role != 'management' %} + camoufox-group: + condition: service_started {% endif %} # Ports are no longer exposed directly. Envoy will connect to them on the internal network. + # entrypoint: + # - /bin/sh + # - -c + # - | + # set -e + # echo "[$(date)] Updating yt-dlp to latest nightly master..." + # python3 -m pip install -U --pre "yt-dlp[default]" --upgrade-strategy eager --force-reinstall --no-cache-dir + # echo "[$(date)] yt-dlp updated to:" + # yt-dlp --version + # echo "[$(date)] Starting original entrypoint..." + # exec /usr/local/bin/docker-entrypoint.sh "$$@" env_file: - ./.env # Path is relative to the project directory volumes: - - context-data:/app/context-data - - ./logs/communication_logs:/app/communication_logs + - ./context:/app/context + - ./logs/yt-dlp-ops/communication_logs:/app/logs/yt-dlp-ops/communication_logs {% if service_role != 'management' %} # Mount the generated endpoints file to make it available to the server - ./configs/camoufox_endpoints.json:/app/config/camoufox_endpoints.json:ro @@ -72,19 +114,24 @@ services: - "${REDIS_PORT:-52909}" - "--redis-password" - "${REDIS_PASSWORD}" - - "--account-active-duration-min" - - "${ACCOUNT_ACTIVE_DURATION_MIN:-30}" - - "--account-cooldown-duration-min" - - "${ACCOUNT_COOLDOWN_DURATION_MIN:-60}" - "--service-role" - "{{ service_role }}" + # --- S3 Logging Parameters --- + - "--s3-endpoint-url" + - "${S3_ENDPOINT_URL}" + - "--s3-access-key-id" + - "${S3_ACCESS_KEY_ID}" + - "--s3-secret-access-key" + - "${S3_SECRET_ACCESS_KEY}" + - "--s3-region-name" + - "${S3_REGION_NAME}" {% if service_role is defined and service_role != 'management' %} # --- Parameters for worker/all-in-one roles ONLY --- - "--script-dir" - "/app" - "--context-dir" - - "/app/context-data" + - "/app/context/context-data" - "--clean-context-dir" - "--clients" - "${YT_CLIENTS:-web,mweb,ios,android}" @@ -94,13 +141,13 @@ services: - "/app/config/camoufox_endpoints.json" - "--print-tokens" - "--stop-if-no-proxy" + - "--comms-log-root-dir" + - "/app/logs/yt-dlp-ops/communication_logs" + - "--bgutils-no-innertube" {% endif %} restart: unless-stopped pull_policy: always -volumes: - context-data: - networks: proxynet: name: airflow_proxynet diff --git a/airflow/dags/scripts/regression.py b/airflow/dags/scripts/regression.py new file mode 100644 index 0000000..ae79946 --- /dev/null +++ b/airflow/dags/scripts/regression.py @@ -0,0 +1,636 @@ +# -*- coding: utf-8 -*- +""" +Regression testing script for the ytdlp-ops system. + +This script orchestrates a regression test by: +1. Populating a Redis queue with video URLs from an input file. +2. Triggering the `ytdlp_ops_orchestrator` Airflow DAG to start processing. +3. Monitoring the progress of the processing for a specified duration. +4. Generating a report of any failures. +5. Optionally cleaning up the Redis queues after the test. +""" + +import argparse +import csv +import json +import logging +import os +import re +import requests +import subprocess +import signal +import sys +import time +from datetime import datetime, timedelta +from pathlib import Path + +import redis +from tabulate import tabulate + +# It's safe to import these as the script runs in the same container as Airflow +# where the yt_ops_services package is installed. +try: + from yt_ops_services.client_utils import get_thrift_client, format_timestamp + from pangramia.yt.exceptions.ttypes import PBServiceException, PBUserException +except ImportError: + logging.error("Could not import Thrift modules. Ensure this script is run in the 'airflow-regression-runner' container.") + sys.exit(1) + +# --- Configuration --- +logging.basicConfig( + level=logging.INFO, + format="[%(asctime)s] [%(levelname)s] %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", +) + +INTERRUPTED = False + +def signal_handler(sig, frame): + """Handles Ctrl+C interruption.""" + global INTERRUPTED + if not INTERRUPTED: + logging.warning("Ctrl+C detected. Initiating graceful shutdown...") + INTERRUPTED = True + else: + logging.warning("Second Ctrl+C detected. Forcing exit.") + sys.exit(1) + + +# --- Helper Functions --- + +def _get_redis_client(redis_url: str): + """Gets a Redis client from a URL.""" + try: + # from_url is the modern way to connect and handles password auth + client = redis.from_url(redis_url, decode_responses=True) + client.ping() + logging.info(f"Successfully connected to Redis at {client.connection_pool.connection_kwargs.get('host')}:{client.connection_pool.connection_kwargs.get('port')}") + return client + except redis.exceptions.ConnectionError as e: + logging.error(f"Failed to connect to Redis: {e}") + sys.exit(1) + except Exception as e: + logging.error(f"An unexpected error occurred while connecting to Redis: {e}") + sys.exit(1) + + +def _get_webserver_url(): + """ + Determines the Airflow webserver URL, prioritizing MASTER_HOST_IP from .env. + """ + master_host_ip = os.getenv("MASTER_HOST_IP") + if master_host_ip: + url = f"http://{master_host_ip}:8080" + logging.info(f"Using MASTER_HOST_IP for webserver URL: {url}") + return url + + # Fallback to AIRFLOW_WEBSERVER_URL or the default service name + url = os.getenv("AIRFLOW_WEBSERVER_URL", "http://airflow-webserver:8080") + logging.info(f"Using default webserver URL: {url}") + return url + +def _normalize_to_url(item: str) -> str | None: + """ + Validates if an item is a recognizable YouTube URL or video ID, + and normalizes it to a standard watch URL format. + """ + if not item: + return None + + video_id_pattern = r"^[a-zA-Z0-9_-]{11}$" + if re.match(video_id_pattern, item): + return f"https://www.youtube.com/watch?v={item}" + + url_patterns = [r"(?:v=|\/v\/|youtu\.be\/|embed\/|shorts\/)([a-zA-Z0-9_-]{11})"] + for pattern in url_patterns: + match = re.search(pattern, item) + if match: + return f"https://www.youtube.com/watch?v={match.group(1)}" + + logging.warning(f"Could not recognize '{item}' as a valid YouTube URL or video ID.") + return None + +def _read_input_file(file_path: str) -> list[str]: + """Reads video IDs/URLs from a file (CSV or JSON list).""" + path = Path(file_path) + if not path.is_file(): + logging.error(f"Input file not found: {file_path}") + sys.exit(1) + + content = path.read_text(encoding='utf-8') + + # Try parsing as JSON list first + if content.strip().startswith('['): + try: + data = json.loads(content) + if isinstance(data, list): + logging.info(f"Successfully parsed {file_path} as a JSON list.") + return [str(item) for item in data] + except json.JSONDecodeError: + logging.warning("File looks like JSON but failed to parse. Will try treating as CSV/text.") + + # Fallback to CSV/text (one item per line) + items = [] + # Use io.StringIO to handle the content as a file for the csv reader + from io import StringIO + # Sniff to see if it has a header + try: + has_header = csv.Sniffer().has_header(content) + except csv.Error: + has_header = False # Not a CSV, treat as plain text + + reader = csv.reader(StringIO(content)) + if has_header: + next(reader) # Skip header row + + for row in reader: + if row: + items.append(row[0].strip()) # Assume the ID/URL is in the first column + + logging.info(f"Successfully parsed {len(items)} items from {file_path} as CSV/text.") + return items + + +def _get_api_auth(): + """Gets Airflow API credentials from environment variables.""" + username = os.getenv("AIRFLOW_ADMIN_USERNAME", "admin") + password = os.getenv("AIRFLOW_ADMIN_PASSWORD") + if not password: + logging.error("AIRFLOW_ADMIN_PASSWORD not found in environment. Cannot interact with API.") + return None, None + return username, password + +def _pause_dag(dag_id: str, is_paused: bool = True): + """Pauses or unpauses an Airflow DAG via the REST API.""" + logging.info(f"Attempting to {'pause' if is_paused else 'unpause'} DAG: {dag_id}...") + username, password = _get_api_auth() + if not username: + return + + webserver_url = _get_webserver_url() + endpoint = f"{webserver_url}/api/v1/dags/{dag_id}" + payload = {"is_paused": is_paused} + + try: + response = requests.patch(endpoint, auth=(username, password), json=payload, timeout=30) + response.raise_for_status() + logging.info(f"Successfully {'paused' if is_paused else 'unpaused'} DAG '{dag_id}'.") + except requests.exceptions.RequestException as e: + logging.error(f"Failed to {'pause' if is_paused else 'unpause'} DAG '{dag_id}': {e}") + if e.response is not None: + logging.error(f"Response: {e.response.text}") + +def _fail_running_dag_runs(dag_id: str): + """Finds all running DAG runs for a given DAG and marks them as failed.""" + logging.info(f"Attempting to fail all running instances of DAG '{dag_id}'...") + username, password = _get_api_auth() + if not username: + return + + webserver_url = _get_webserver_url() + list_endpoint = f"{webserver_url}/api/v1/dags/{dag_id}/dagRuns?state=running" + + try: + # Get running DAGs + response = requests.get(list_endpoint, auth=(username, password), timeout=30) + response.raise_for_status() + running_runs = response.json().get("dag_runs", []) + + if not running_runs: + logging.info(f"No running DAG runs found for '{dag_id}'.") + return + + logging.info(f"Found {len(running_runs)} running DAG run(s) to fail.") + + for run in running_runs: + dag_run_id = run["dag_run_id"] + update_endpoint = f"{webserver_url}/api/v1/dags/{dag_id}/dagRuns/{dag_run_id}" + payload = {"state": "failed"} + try: + update_response = requests.patch(update_endpoint, auth=(username, password), json=payload, timeout=30) + update_response.raise_for_status() + logging.info(f" - Successfully marked DAG run '{dag_run_id}' as failed.") + except requests.exceptions.RequestException as e: + logging.error(f" - Failed to mark DAG run '{dag_run_id}' as failed: {e}") + + except requests.exceptions.RequestException as e: + logging.error(f"Failed to list running DAG runs for '{dag_id}': {e}") + if e.response is not None: + logging.error(f"Response: {e.response.text}") + + +# --- Core Logic Functions --- + +def step_0_populate_queue(redis_client, queue_name: str, input_file: str): + """Reads URLs from a file and populates the Redis inbox queue.""" + logging.info("--- Step 0: Populating Redis Queue ---") + raw_items = _read_input_file(input_file) + if not raw_items: + logging.error("No items found in the input file. Aborting.") + sys.exit(1) + + valid_urls = [] + for item in raw_items: + url = _normalize_to_url(item) + if url and url not in valid_urls: + valid_urls.append(url) + + if not valid_urls: + logging.error("No valid YouTube URLs or IDs were found in the input file. Aborting.") + sys.exit(1) + + inbox_queue = f"{queue_name}_inbox" + logging.info(f"Adding {len(valid_urls)} unique and valid URLs to Redis queue '{inbox_queue}'...") + + with redis_client.pipeline() as pipe: + for url in valid_urls: + pipe.rpush(inbox_queue, url) + pipe.execute() + + logging.info(f"Successfully populated queue. Total items in '{inbox_queue}': {redis_client.llen(inbox_queue)}") + return len(valid_urls) + + +def step_1_trigger_orchestrator(args: argparse.Namespace): + """Triggers the ytdlp_ops_orchestrator DAG using the Airflow REST API.""" + logging.info("--- Step 1: Triggering Orchestrator DAG via REST API ---") + + # Get API details from environment variables + webserver_url = _get_webserver_url() + api_endpoint = f"{webserver_url}/api/v1/dags/ytdlp_ops_orchestrator/dagRuns" + + # Default admin user is 'admin' + username = os.getenv("AIRFLOW_ADMIN_USERNAME", "admin") + password = os.getenv("AIRFLOW_ADMIN_PASSWORD") + + if not password: + logging.error("AIRFLOW_ADMIN_PASSWORD not found in environment. Please set it in your .env file.") + sys.exit(1) + + # Construct the configuration for the DAG run + conf = { + "total_workers": args.workers, + "workers_per_bunch": args.workers_per_bunch, + "clients": args.client, + } + + payload = { + "conf": conf + } + + logging.info(f"Triggering DAG at endpoint: {api_endpoint}") + + try: + response = requests.post( + api_endpoint, + auth=(username, password), + json=payload, + timeout=30 # 30 second timeout + ) + response.raise_for_status() # Raises an HTTPError for bad responses (4xx or 5xx) + + logging.info("Successfully triggered the orchestrator DAG.") + logging.debug(f"Airflow API response:\n{response.json()}") + + except requests.exceptions.RequestException as e: + logging.error("Failed to trigger the orchestrator DAG via REST API.") + logging.error(f"Error: {e}") + if e.response is not None: + logging.error(f"Response status code: {e.response.status_code}") + logging.error(f"Response text: {e.response.text}") + sys.exit(1) + + +def step_2_monitor_progress(args: argparse.Namespace, redis_client, queue_name: str, total_urls: int, run_time_min: int, interval_min: int, show_status: bool): + """Monitors the Redis queues for the duration of the test.""" + logging.info("--- Step 2: Monitoring Progress ---") + + end_time = datetime.now() + timedelta(minutes=run_time_min) + inbox_q = f"{queue_name}_inbox" + progress_q = f"{queue_name}_progress" + result_q = f"{queue_name}_result" + fail_q = f"{queue_name}_fail" + + while datetime.now() < end_time and not INTERRUPTED: + try: + inbox_len = redis_client.llen(inbox_q) + progress_len = redis_client.hlen(progress_q) + result_len = redis_client.hlen(result_q) + fail_len = redis_client.hlen(fail_q) + + processed = result_len + fail_len + success_len = 0 + if result_len > 0: + # This is inefficient but gives a more accurate success count + results = redis_client.hgetall(result_q) + success_len = sum(1 for v in results.values() if '"status": "success"' in v) + + logging.info( + f"Progress: {processed}/{total_urls} | " + f"Success: {success_len} | Failed: {fail_len} | " + f"In Progress: {progress_len} | Inbox: {inbox_len}" + ) + if show_status: + # This function now connects directly to services to get status + get_system_status(args, redis_client) + except Exception as e: + logging.error(f"Error while querying Redis for progress: {e}") + + # Wait for the interval, but check for interruption every second + # for a more responsive shutdown. + wait_until = time.time() + interval_min * 60 + while time.time() < wait_until and not INTERRUPTED: + # Check if we are past the main end_time + if datetime.now() >= end_time: + break + time.sleep(1) + + if INTERRUPTED: + logging.info("Monitoring interrupted.") + else: + logging.info("Monitoring period has ended.") + + +# --- System Status Functions (Direct Connect) --- + +def _list_proxy_statuses(client, server_identity=None): + """Lists proxy statuses by connecting directly to the Thrift service.""" + logging.info(f"--- Proxy Statuses (Server: {server_identity or 'ALL'}) ---") + try: + statuses = client.getProxyStatus(server_identity) + if not statuses: + logging.info("No proxy statuses found.") + return + + status_list = [] + headers = ["Server", "Proxy URL", "Status", "Success", "Failures", "Last Success", "Last Failure"] + for s in statuses: + status_list.append({ + "Server": s.serverIdentity, "Proxy URL": s.proxyUrl, "Status": s.status, + "Success": s.successCount, "Failures": s.failureCount, + "Last Success": format_timestamp(s.lastSuccessTimestamp), + "Last Failure": format_timestamp(s.lastFailureTimestamp), + }) + logging.info("\n" + tabulate(status_list, headers='keys', tablefmt='grid')) + except (PBServiceException, PBUserException) as e: + logging.error(f"Failed to get proxy statuses: {e.message}") + except Exception as e: + logging.error(f"An unexpected error occurred while getting proxy statuses: {e}", exc_info=True) + +def _list_account_statuses(client, redis_client, account_id=None): + """Lists account statuses from Thrift, enriched with live Redis data.""" + logging.info(f"--- Account Statuses (Account: {account_id or 'ALL'}) ---") + try: + statuses = client.getAccountStatus(accountId=account_id, accountPrefix=None) + if not statuses: + logging.info("No account statuses found.") + return + + status_list = [] + for s in statuses: + status_str = s.status + if 'RESTING' in status_str: + try: + expiry_ts_bytes = redis_client.hget(f"account_status:{s.accountId}", "resting_until") + if expiry_ts_bytes: + expiry_ts = float(expiry_ts_bytes) + now = datetime.now().timestamp() + if now < expiry_ts: + remaining_seconds = int(expiry_ts - now) + status_str = f"RESTING ({remaining_seconds}s left)" + except Exception: + pass # Ignore if parsing fails + + last_success = float(s.lastSuccessTimestamp) if s.lastSuccessTimestamp else 0 + last_failure = float(s.lastFailureTimestamp) if s.lastFailureTimestamp else 0 + last_activity = max(last_success, last_failure) + + status_list.append({ + "Account ID": s.accountId, "Status": status_str, "Success": s.successCount, + "Failures": s.failureCount, "Last Success": format_timestamp(s.lastSuccessTimestamp), + "Last Failure": format_timestamp(s.lastFailureTimestamp), "Last Proxy": s.lastUsedProxy or "N/A", + "_last_activity": last_activity, + }) + + status_list.sort(key=lambda item: item.get('_last_activity', 0), reverse=True) + for item in status_list: + del item['_last_activity'] + + logging.info("\n" + tabulate(status_list, headers='keys', tablefmt='grid')) + except (PBServiceException, PBUserException) as e: + logging.error(f"Failed to get account statuses: {e.message}") + except Exception as e: + logging.error(f"An unexpected error occurred while getting account statuses: {e}", exc_info=True) + +def _list_client_statuses(redis_client): + """Lists client statistics from Redis.""" + logging.info("--- Client Statuses ---") + try: + stats_key = "client_stats" + all_stats_raw = redis_client.hgetall(stats_key) + if not all_stats_raw: + logging.info("No client stats found in Redis.") + return + + status_list = [] + for client, stats_json in all_stats_raw.items(): + try: + stats = json.loads(stats_json) + def format_latest(data): + if not data: return "N/A" + ts = format_timestamp(data.get('timestamp')) + url = data.get('url', 'N/A') + video_id_match = re.search(r'v=([a-zA-Z0-9_-]{11})', url) + video_id = video_id_match.group(1) if video_id_match else 'N/A' + return f"{ts} ({video_id})" + + status_list.append({ + "Client": client, "Success": stats.get('success_count', 0), + "Failures": stats.get('failure_count', 0), + "Last Success": format_latest(stats.get('latest_success')), + "Last Failure": format_latest(stats.get('latest_failure')), + }) + except (json.JSONDecodeError, AttributeError): + status_list.append({"Client": client, "Success": "ERROR", "Failures": "ERROR", "Last Success": "Parse Error", "Last Failure": "Parse Error"}) + + status_list.sort(key=lambda item: item.get('Client', '')) + logging.info("\n" + tabulate(status_list, headers='keys', tablefmt='grid')) + except Exception as e: + logging.error(f"An unexpected error occurred while getting client statuses: {e}", exc_info=True) + +def get_system_status(args: argparse.Namespace, redis_client): + """Connects to services and prints status tables.""" + logging.info("--- Getting System Status ---") + client, transport = None, None + try: + client, transport = get_thrift_client(args.management_host, args.management_port) + _list_proxy_statuses(client) + _list_account_statuses(client, redis_client) + _list_client_statuses(redis_client) + except Exception as e: + logging.error(f"Could not get system status: {e}") + finally: + if transport and transport.isOpen(): + transport.close() + + +def step_3_generate_report(redis_client, queue_name: str, report_file: str | None): + """Generates a CSV report of failed items.""" + logging.info("--- Step 3: Generating Report ---") + fail_q = f"{queue_name}_fail" + + failed_items = redis_client.hgetall(fail_q) + if not failed_items: + logging.info("No items found in the fail queue. No report will be generated.") + return + + logging.info(f"Found {len(failed_items)} failed items. Writing to report...") + + report_data = [] + for url, data_json in failed_items.items(): + try: + data = json.loads(data_json) + error_details = data.get('error_details', {}) + report_data.append({ + 'url': url, + 'video_id': _normalize_to_url(url).split('v=')[-1] if _normalize_to_url(url) else 'N/A', + 'error_message': error_details.get('error_message', 'N/A'), + 'error_code': error_details.get('error_code', 'N/A'), + 'proxy_url': error_details.get('proxy_url', 'N/A'), + 'timestamp': datetime.fromtimestamp(data.get('end_time', 0)).isoformat(), + }) + except (json.JSONDecodeError, AttributeError): + report_data.append({'url': url, 'video_id': 'N/A', 'error_message': 'Could not parse error data', 'error_code': 'PARSE_ERROR', 'proxy_url': 'N/A', 'timestamp': 'N/A'}) + + if report_file: + try: + with open(report_file, 'w', newline='', encoding='utf-8') as f: + writer = csv.DictWriter(f, fieldnames=report_data[0].keys()) + writer.writeheader() + writer.writerows(report_data) + logging.info(f"Successfully wrote report to {report_file}") + except IOError as e: + logging.error(f"Could not write report to file {report_file}: {e}") + else: + # Print to stdout if no file is specified + logging.info("--- Failure Report (stdout) ---") + for item in report_data: + logging.info(f"URL: {item['url']}, Error: {item['error_code']} - {item['error_message']}") + logging.info("--- End of Report ---") + + +def handle_interruption(redis_client, queue_name, report_file): + """Graceful shutdown logic for when the script is interrupted.""" + logging.warning("--- Interruption Detected: Starting Shutdown Procedure ---") + + # 1. Pause DAGs + _pause_dag("ytdlp_ops_orchestrator") + _pause_dag("ytdlp_ops_dispatcher") + + # 2. Fail running per_url jobs + _fail_running_dag_runs("ytdlp_ops_worker_per_url") + + # 3. Generate report + logging.info("Generating final report due to interruption...") + step_3_generate_report(redis_client, queue_name, report_file) + # Also print to stdout if a file was specified, so user sees it immediately + if report_file: + logging.info("Printing report to stdout as well...") + step_3_generate_report(redis_client, queue_name, None) + + +def step_4_cleanup_queues(redis_client, queue_name: str): + """Cleans up the Redis queues used by the test.""" + logging.info("--- Step 4: Cleaning Up Queues ---") + queues_to_delete = [ + f"{queue_name}_inbox", + f"{queue_name}_progress", + f"{queue_name}_result", + f"{queue_name}_fail", + ] + logging.warning(f"This will delete the following Redis keys: {queues_to_delete}") + + deleted_count = redis_client.delete(*queues_to_delete) + logging.info(f"Cleanup complete. Deleted {deleted_count} key(s).") + + +def main(): + """Main function to parse arguments and run the regression test.""" + # Register the signal handler for Ctrl+C + signal.signal(signal.SIGINT, signal_handler) + + parser = argparse.ArgumentParser(description="Run a regression test for the ytdlp-ops system.") + + # Environment + parser.add_argument("--redis-host", type=str, default="redis", help="Hostname or IP address of the Redis server. Defaults to 'redis' for in-container execution.") + parser.add_argument("--management-host", type=str, default=os.getenv("MANAGEMENT_SERVICE_HOST", "envoy-thrift-lb"), help="Hostname of the management Thrift service.") + parser.add_argument("--management-port", type=int, default=int(os.getenv("MANAGEMENT_SERVICE_PORT", 9080)), help="Port of the management Thrift service.") + + # Test Configuration + parser.add_argument("--client", type=str, required=True, help="Client persona to test (e.g., 'mweb').") + parser.add_argument("--workers", type=int, required=True, help="Total number of worker loops to start.") + parser.add_argument("--workers-per-bunch", type=int, default=1, help="Number of workers per bunch.") + parser.add_argument("--run-time-min", type=int, required=True, help="How long to let the test run, in minutes.") + parser.add_argument("--input-file", type=str, help="Path to a file containing video IDs/URLs. If not provided, the existing queue will be used.") + + # Monitoring & Reporting + parser.add_argument("--progress-interval-min", type=int, default=2, help="How often to query and print progress, in minutes.") + parser.add_argument("--report-file", type=str, help="Path to a CSV file to write the list of failed URLs to.") + parser.add_argument("--show-status", action="store_true", help="If set, show proxy and account statuses during progress monitoring.") + + # Actions + parser.add_argument("--cleanup", action="store_true", help="If set, clear the Redis queues after the test completes.") + parser.add_argument("--skip-populate", action="store_true", help="If set, skip populating the queue (assumes it's already populated).") + parser.add_argument("--skip-trigger", action="store_true", help="If set, skip triggering the orchestrator (assumes it's already running).") + + args = parser.parse_args() + + # --- Setup --- + redis_password = os.getenv("REDIS_PASSWORD") + if not redis_password: + logging.error("REDIS_PASSWORD not found in environment. Please set it in your .env file.") + sys.exit(1) + + # Use the provided redis-host, defaulting to 'redis' for in-container execution + redis_url = f"redis://:{redis_password}@{args.redis_host}:6379/0" + redis_client = _get_redis_client(redis_url) + + queue_name = "video_queue" # Hardcoded for now, could be an arg + total_urls = 0 + + # --- Execution --- + if not args.skip_populate: + if args.input_file: + total_urls = step_0_populate_queue(redis_client, queue_name, args.input_file) + else: + logging.info("No input file provided, using existing queue.") + total_urls = redis_client.llen(f"{queue_name}_inbox") + if total_urls == 0: + logging.warning("Queue is empty and no input file was provided. The test may not have any work to do.") + else: + total_urls = redis_client.llen(f"{queue_name}_inbox") + logging.info(f"Skipping population. Found {total_urls} URLs in the inbox.") + + if not args.skip_trigger: + step_1_trigger_orchestrator(args) + else: + logging.info("Skipping orchestrator trigger.") + + step_2_monitor_progress(args, redis_client, queue_name, total_urls, args.run_time_min, args.progress_interval_min, args.show_status) + + if INTERRUPTED: + handle_interruption(redis_client, queue_name, args.report_file) + else: + step_3_generate_report(redis_client, queue_name, args.report_file) + + if args.cleanup: + step_4_cleanup_queues(redis_client, queue_name) + + if INTERRUPTED: + logging.warning("Regression test script finished due to user interruption.") + sys.exit(130) # Standard exit code for Ctrl+C + else: + logging.info("Regression test script finished.") + +if __name__ == "__main__": + main() diff --git a/airflow/dags/ytdlp_mgmt_proxy_account.py b/airflow/dags/ytdlp_mgmt_proxy_account.py index 3599c61..0e180f3 100644 --- a/airflow/dags/ytdlp_mgmt_proxy_account.py +++ b/airflow/dags/ytdlp_mgmt_proxy_account.py @@ -4,6 +4,9 @@ DAG to manage the state of proxies and accounts used by the ytdlp-ops-server. from __future__ import annotations import logging +import json +import re +import time from datetime import datetime import socket @@ -208,6 +211,112 @@ def _list_account_statuses(client, account_id, redis_conn_id): print(f"\nERROR: An unexpected error occurred: {e}\n") +def _list_client_statuses(redis_conn_id): + """Lists the status of different client types from Redis.""" + logger.info("Listing client statuses from Redis key 'client_stats'") + + try: + redis_client = _get_redis_client(redis_conn_id) + stats_key = "client_stats" + all_stats_raw = redis_client.hgetall(stats_key) + + if not all_stats_raw: + print("\n--- Client Statuses ---\nNo client stats found in Redis.\n-----------------------\n") + return + + from tabulate import tabulate + status_list = [] + + for client_bytes, stats_json_bytes in all_stats_raw.items(): + client_name = client_bytes.decode('utf-8') + try: + stats = json.loads(stats_json_bytes.decode('utf-8')) + + def format_latest(data): + if not data: return "N/A" + ts = format_timestamp(data.get('timestamp')) + url = data.get('url') or 'N/A' + machine = data.get('machine_id', 'N/A') + video_id_match = re.search(r'v=([a-zA-Z0-9_-]{11})', url) + video_id = video_id_match.group(1) if video_id_match else 'N/A' + return f"{ts}\nMachine: {machine}\nVideo ID: {video_id}" + + status_item = { + "Client": client_name, + "Success": stats.get('success_count', 0), + "Failures": stats.get('failure_count', 0), + "Last Success": format_latest(stats.get('latest_success')), + "Last Failure": format_latest(stats.get('latest_failure')), + } + status_list.append(status_item) + except (json.JSONDecodeError, AttributeError) as e: + logger.error(f"Could not parse stats for client '{client_name}': {e}") + status_list.append({ + "Client": client_name, "Success": "ERROR", "Failures": "ERROR", + "Last Success": "Could not parse data", "Last Failure": "Could not parse data" + }) + + status_list.sort(key=lambda item: item.get('Client', '')) + + print("\n--- Client Statuses ---") + print(f"\n{tabulate(status_list, headers='keys', tablefmt='grid')}") + print("-----------------------\n") + + except Exception as e: + logger.error(f"An unexpected error occurred while getting client statuses: {e}", exc_info=True) + print(f"\nERROR: An unexpected error occurred: {e}\n") + + +def _list_activity_counters(redis_conn_id: str): + """Lists current activity rates for proxies and accounts from Redis.""" + logger.info("Listing activity counters from Redis keys 'activity:per_proxy:*' and 'activity:per_account:*'") + + try: + redis_client = _get_redis_client(redis_conn_id) + from tabulate import tabulate + now = time.time() + + def process_keys(pattern, entity_name): + keys = redis_client.scan_iter(pattern) + status_list = [] + for key_bytes in keys: + key = key_bytes.decode('utf-8') + entity_id = key.split(':', 2)[-1] + + # Clean up old entries before counting + redis_client.zremrangebyscore(key, '-inf', now - 3660) # Clean up > 1hr old + + count_1m = redis_client.zcount(key, now - 60, now) + count_5m = redis_client.zcount(key, now - 300, now) + count_1h = redis_client.zcount(key, now - 3600, now) + + if count_1h == 0: # Don't show entities with no recent activity + continue + + status_list.append({ + entity_name: entity_id, + "Activity (Last 1m)": count_1m, + "Activity (Last 5m)": count_5m, + "Activity (Last 1h)": count_1h, + }) + + status_list.sort(key=lambda item: item.get(entity_name, '')) + + print(f"\n--- {entity_name} Activity Counters ---") + if not status_list: + print(f"No recent activity found for {entity_name.lower()}s.") + else: + print(f"\n{tabulate(status_list, headers='keys', tablefmt='grid')}") + print("-----------------------------------\n") + + process_keys("activity:per_proxy:*", "Proxy URL") + process_keys("activity:per_account:*", "Account ID") + + except Exception as e: + logger.error(f"An unexpected error occurred while getting activity counters: {e}", exc_info=True) + print(f"\nERROR: An unexpected error occurred: {e}\n") + + def manage_system_callable(**context): """Main callable to interact with the system management endpoints.""" # Log version for debugging @@ -218,7 +327,7 @@ def manage_system_callable(**context): action = params["action"] # For Thrift actions, use the new management host/port - if entity not in ["airflow_meta"]: + if entity not in ["airflow_meta", "activity_counters"]: host = params["management_host"] port = params["management_port"] else: @@ -232,8 +341,10 @@ def manage_system_callable(**context): valid_actions = { "proxy": ["list_with_status", "ban", "unban", "ban_all", "unban_all", "delete_from_redis"], "account": ["list_with_status", "ban", "unban", "unban_all", "delete_from_redis"], + "client": ["list_with_status", "delete_from_redis"], "accounts_and_proxies": ["list_with_status", "ban", "unban", "ban_all", "unban_all", "delete_from_redis"], "airflow_meta": ["clear_dag_runs"], + "activity_counters": ["list_with_status"], } if action not in valid_actions.get(entity, []): @@ -287,7 +398,15 @@ def manage_system_callable(**context): # The session is committed automatically by the `with create_session()` context manager. logger.info(f"Successfully deleted {deleted_count} DagRun(s) for DAG '{dag_id}'.") print(f"\nSuccessfully deleted {deleted_count} DagRun(s) for DAG '{dag_id}'.\n") - return # End execution + return # End execution + + # --- Handle Activity Counter action --- + if entity == "activity_counters": + if action == "list_with_status": + _list_activity_counters(params["redis_conn_id"]) + return # End execution + else: + raise ValueError(f"Action '{action}' is not valid for entity 'activity_counters'. Only 'list_with_status' is supported.") # Handle Thrift-based deletion actions if action == "delete_from_redis": @@ -355,6 +474,15 @@ def manage_system_callable(**context): print(f"\nSuccessfully deleted {proxy_result} proxy keys for server '{server_identity}' from Redis.\n") else: print(f"\nSuccessfully deleted {proxy_result} proxy keys from Redis across ALL servers.\n") + + elif entity == "client": + logger.info("Deleting all client stats from Redis...") + redis_client = _get_redis_client(params["redis_conn_id"]) + result = redis_client.delete("client_stats") + if result > 0: + print(f"\nSuccessfully deleted 'client_stats' key from Redis.\n") + else: + print(f"\nKey 'client_stats' not found in Redis. Nothing to delete.\n") except (PBServiceException, PBUserException) as e: logger.error(f"Thrift error performing delete action: {e.message}", exc_info=True) @@ -374,7 +502,10 @@ def manage_system_callable(**context): try: client, transport = get_thrift_client(host, port) - if entity == "proxy": + if entity == "client": + if action == "list_with_status": + _list_client_statuses(params["redis_conn_id"]) + elif entity == "proxy": if action == "list_with_status": _list_proxy_statuses(client, server_identity) elif action == "ban": @@ -497,6 +628,13 @@ def manage_system_callable(**context): _list_account_statuses(client, account_prefix, params["redis_conn_id"]) elif entity == "accounts_and_proxies": + if action == "list_with_status": + print("\n--- Listing statuses for Proxies, Accounts, and Clients ---") + _list_proxy_statuses(client, server_identity) + _list_account_statuses(client, account_id, params["redis_conn_id"]) + _list_client_statuses(params["redis_conn_id"]) + return # End execution for list_with_status + print(f"\n--- Performing action '{action}' on BOTH Proxies and Accounts ---") # --- Proxy Action --- @@ -674,7 +812,7 @@ with DAG( "entity": Param( "accounts_and_proxies", type="string", - enum=["account", "proxy", "accounts_and_proxies", "airflow_meta"], + enum=["account", "proxy", "client", "accounts_and_proxies", "activity_counters", "airflow_meta"], description="The type of entity to manage.", ), "action": Param( @@ -698,6 +836,13 @@ with DAG( - `unban_all`: Sets the status of all accounts (or those matching a prefix in `account_id`) to `ACTIVE`. - `delete_from_redis`: **(Destructive)** Deletes account status from Redis via Thrift service. This permanently removes the account from being tracked by the system. If `account_id` is provided, it deletes that specific account. If `account_id` is provided as a prefix, it deletes all accounts matching that prefix. If `account_id` is empty, it deletes ALL accounts. + #### Actions for `entity: client` + - `list_with_status`: View success/failure statistics for each client type. + - `delete_from_redis`: **(Destructive)** Deletes all client stats from Redis. + + #### Actions for `entity: activity_counters` + - `list_with_status`: View current activity rates (ops/min, ops/hr) for proxies and accounts. + #### Actions for `entity: accounts_and_proxies` - This entity performs the selected action on **both** proxies and accounts where applicable. - `list_with_status`: View statuses for both proxies and accounts. @@ -735,9 +880,9 @@ with DAG( description="The Airflow connection ID for the Redis server (used for 'delete_from_redis' and for fetching detailed account status).", ), "dag_id_to_manage": Param( - "ytdlp_ops_worker_per_url", + "ytdlp_ops_v01_worker_per_url", type="string", - enum=["ytdlp_ops_worker_per_url", "ytdlp_ops_orchestrator"], + enum=["ytdlp_ops_v01_orchestrator", "ytdlp_ops_v01_dispatcher", "ytdlp_ops_v01_worker_per_url", "ytdlp_ops_v02_orchestrator_auth", "ytdlp_ops_v02_dispatcher_auth", "ytdlp_ops_v02_worker_per_url_auth", "ytdlp_ops_v02_orchestrator_dl", "ytdlp_ops_v02_dispatcher_dl", "ytdlp_ops_v02_worker_per_url_dl"], title="[Airflow Meta] DAG ID", description="The DAG ID to perform the action on.", ), diff --git a/airflow/dags/ytdlp_mgmt_queues.py b/airflow/dags/ytdlp_mgmt_queues.py index fef9f76..930b821 100644 --- a/airflow/dags/ytdlp_mgmt_queues.py +++ b/airflow/dags/ytdlp_mgmt_queues.py @@ -254,7 +254,18 @@ def clear_queue_callable(**context): ti = context['task_instance'] logger.info(f"Task '{ti.task_id}' running on queue '{ti.queue}'.") redis_conn_id = params['redis_conn_id'] - queue_base_name = params['queue_base_name'] + + queue_system = params.get('queue_system', 'v1_monolithic') + if queue_system == 'v1_monolithic': + queue_base_name = params['queue_base_name'] + elif queue_system == 'v2_separated_auth': + queue_base_name = 'queue2_auth' + elif queue_system == 'v2_separated_dl': + queue_base_name = 'queue2_dl' + else: + raise ValueError(f"Invalid queue_system: {queue_system}") + logger.info(f"Operating on queue system '{queue_system}' with base name '{queue_base_name}'.") + queues_to_clear_options = params.get('queues_to_clear_options', []) confirm_clear = params.get('confirm_clear', False) dump_queues = params['dump_queues'] @@ -386,50 +397,77 @@ def check_status_callable(**context): ti = context['task_instance'] logger.info(f"Task '{ti.task_id}' running on queue '{ti.queue}'.") redis_conn_id = params['redis_conn_id'] - queue_name = params.get('queue_base_name', DEFAULT_QUEUE_NAME) - queue_suffixes = ['_inbox', '_progress', '_result', '_fail'] + queue_system = params.get('queue_system', 'v1_monolithic') + + queue_base_names_to_check = [] + if queue_system == 'v1_monolithic': + queue_base_names_to_check.append(params.get('queue_base_name', DEFAULT_QUEUE_NAME)) + elif queue_system.startswith('v2_'): + # For v2, always check both auth and dl queues for a complete picture. + queue_base_names_to_check.extend(['queue2_auth', 'queue2_dl']) + else: + raise ValueError(f"Invalid queue_system: {queue_system}") - logger.info(f"--- Checking Status for Queues with Base Name: '{queue_name}' ---") + queue_suffixes = ['_inbox', '_progress', '_result', '_fail'] + + logger.info(f"--- Checking Status for Queue System: '{queue_system}' ---") try: redis_client = _get_redis_client(redis_conn_id) - for suffix in queue_suffixes: - queue_to_check = f"{queue_name}{suffix}" - key_type = redis_client.type(queue_to_check).decode('utf-8') - size = 0 - if key_type == 'list': - size = redis_client.llen(queue_to_check) - elif key_type == 'hash': - size = redis_client.hlen(queue_to_check) + for queue_name in queue_base_names_to_check: + logger.info(f"--- Base Name: '{queue_name}' ---") + for suffix in queue_suffixes: + queue_to_check = f"{queue_name}{suffix}" + key_type = redis_client.type(queue_to_check).decode('utf-8') + size = 0 + if key_type == 'list': + size = redis_client.llen(queue_to_check) + elif key_type == 'hash': + size = redis_client.hlen(queue_to_check) - if key_type != 'none': - logger.info(f" - Queue '{queue_to_check}': Type='{key_type.upper()}', Size={size}") - else: - logger.info(f" - Queue '{queue_to_check}': Does not exist.") + if key_type != 'none': + logger.info(f" - Queue '{queue_to_check}': Type='{key_type.upper()}', Size={size}") + else: + logger.info(f" - Queue '{queue_to_check}': Does not exist.") logger.info(f"--- End of Status Check ---") except Exception as e: - logger.error(f"Failed to check queue status for base name '{queue_name}': {e}", exc_info=True) + logger.error(f"Failed to check queue status for system '{queue_system}': {e}", exc_info=True) raise AirflowException(f"Failed to check queue status: {e}") def requeue_failed_callable(**context): """ Copies all URLs from the fail hash to the inbox list and optionally clears the fail hash. + Adapts behavior for v1 and v2 queue systems. """ params = context['params'] ti = context['task_instance'] logger.info(f"Task '{ti.task_id}' running on queue '{ti.queue}'.") redis_conn_id = params['redis_conn_id'] - queue_name = params['queue_base_name'] clear_fail_queue = params['clear_fail_queue_after_requeue'] + queue_system = params.get('queue_system', 'v1_monolithic') - fail_queue_name = f"{queue_name}_fail" - inbox_queue_name = f"{queue_name}_inbox" + fail_queue_name = "" + inbox_queue_name = "" - logger.info(f"Requeuing failed URLs from '{fail_queue_name}' to '{inbox_queue_name}'.") + if queue_system == 'v1_monolithic': + queue_name = params['queue_base_name'] + fail_queue_name = f"{queue_name}_fail" + inbox_queue_name = f"{queue_name}_inbox" + elif queue_system == 'v2_separated_auth': + fail_queue_name = "queue2_auth_fail" + inbox_queue_name = "queue2_auth_inbox" + elif queue_system == 'v2_separated_dl': + fail_queue_name = "queue2_dl_fail" + # DL failures must be re-authenticated, so they go back to the auth inbox. + inbox_queue_name = "queue2_auth_inbox" + else: + raise ValueError(f"Invalid queue_system: {queue_system}") + + logger.info(f"Requeuing failed URLs from '{fail_queue_name}' to '{inbox_queue_name}' (system: {queue_system}).") redis_client = _get_redis_client(redis_conn_id) @@ -478,7 +516,15 @@ def add_videos_to_queue_callable(**context): params = context["params"] ti = context['task_instance'] logger.info(f"Task '{ti.task_id}' running on queue '{ti.queue}'.") - queue_name = params["queue_base_name"] + + queue_system = params.get('queue_system', 'v1_monolithic') + if queue_system.startswith('v2_'): + # For v2 systems, raw URLs are always added to the auth queue. + queue_name = 'queue2_auth' + logger.info(f"Queue system is '{queue_system}'. Adding URLs to '{queue_name}_inbox'.") + else: + queue_name = params["queue_base_name"] + redis_conn_id = params["redis_conn_id"] dry_run = params["dry_run"] @@ -565,11 +611,18 @@ with DAG( title="Action", description="The management action to perform.", ), + "queue_system": Param( + "v1_monolithic", + type="string", + enum=["v1_monolithic", "v2_separated_auth", "v2_separated_dl"], + title="Queue System", + description="Select the target queue system to manage. This choice affects which queues are targeted by actions.", + ), "queue_base_name": Param( DEFAULT_QUEUE_NAME, type="string", - title="Queue Base Name", - description="Base name for queues used in actions like 'add_videos', 'check_status', 'clear_queue', 'requeue_failed'.", + title="Queue Base Name (v1 only)", + description="Base name for queues. Only used when 'Queue System' is 'v1_monolithic'.", ), # --- Params for 'add_videos' --- "input_source": Param( @@ -644,7 +697,7 @@ with DAG( ), # --- Params for 'list_contents' --- "queue_to_list": Param( - 'video_queue_inbox,video_queue_result,video_queue_fail', + 'video_queue_inbox,queue2_auth_inbox,queue2_dl_result', type="string", title="[list_contents] Queues to List", description="Comma-separated list of exact Redis key names to list.", diff --git a/airflow/dags/ytdlp_ops_account_maintenance.py b/airflow/dags/ytdlp_ops_account_maintenance.py index 5ba4216..abfa8f6 100644 --- a/airflow/dags/ytdlp_ops_account_maintenance.py +++ b/airflow/dags/ytdlp_ops_account_maintenance.py @@ -22,6 +22,7 @@ from datetime import datetime from airflow.decorators import task from airflow.models import Variable from airflow.models.dag import DAG +from airflow.models.param import Param from airflow.utils.dates import days_ago # Import utility functions and Thrift modules @@ -42,7 +43,7 @@ DEFAULT_ARGS = { 'owner': 'airflow', 'retries': 1, 'retry_delay': 30, - 'queue': 'maintenance', + 'queue': 'default', } @@ -61,38 +62,76 @@ def _get_thrift_client(host, port, timeout=60): @task -def manage_account_states(): +def manage_account_states(**context): """ - Fetches all account statuses and performs necessary state transitions. + Fetches all account statuses and performs necessary state transitions + based on time durations configured in the DAG parameters. """ + params = context['params'] + requests_limit = params['account_requests_limit'] + cooldown_duration_s = params['account_cooldown_duration_min'] * 60 + ban_duration_s = params['account_ban_duration_hours'] * 3600 + host = DEFAULT_YT_AUTH_SERVICE_IP port = int(DEFAULT_YT_AUTH_SERVICE_PORT) redis_conn_id = DEFAULT_REDIS_CONN_ID + logger.info(f"Starting account maintenance. Service: {host}:{port}, Redis: {redis_conn_id}") + logger.info(f"Using limits: Requests={requests_limit}, Cooldown={params['account_cooldown_duration_min']}m, Ban={params['account_ban_duration_hours']}h") client, transport = None, None try: client, transport = _get_thrift_client(host, port) redis_client = _get_redis_client(redis_conn_id) - logger.info("Fetching all account statuses from the service...") - all_accounts = client.getAccountStatus(accountPrefix=None) - logger.info(f"Found {len(all_accounts)} accounts to process.") + logger.info(f"--- Step 1: Fetching all account statuses from the ytdlp-ops-server at {host}:{port}... ---") + all_accounts = client.getAccountStatus(accountId=None, accountPrefix=None) + logger.info(f"Found {len(all_accounts)} total accounts to process.") accounts_to_unban = [] accounts_to_activate = [] accounts_to_rest = [] + + now_ts = int(time.time()) for acc in all_accounts: - if acc.status == "BANNED (expired)": + # Thrift can return 0 for unset integer fields. + # The AccountStatus thrift object is missing status_changed_timestamp and active_since_timestamp. + # We use available timestamps as proxies. + last_failure_ts = int(acc.lastFailureTimestamp or 0) + last_success_ts = int(acc.lastSuccessTimestamp or 0) + last_usage_ts = max(last_failure_ts, last_success_ts) + + if acc.status == "BANNED" and last_failure_ts > 0 and (now_ts - last_failure_ts) >= ban_duration_s: accounts_to_unban.append(acc.accountId) - elif acc.status == "RESTING (expired)": + elif acc.status == "RESTING" and last_usage_ts > 0 and (now_ts - last_usage_ts) >= cooldown_duration_s: accounts_to_activate.append(acc.accountId) - elif acc.status == "ACTIVE (should be resting)": - accounts_to_rest.append(acc.accountId) + elif acc.status == "ACTIVE": + # For ACTIVE -> RESTING, check how many requests have been made since activation. + count_at_activation_raw = redis_client.hget(f"account_status:{acc.accountId}", "success_count_at_activation") + + if count_at_activation_raw is not None: + count_at_activation = int(count_at_activation_raw) + current_success_count = acc.successCount or 0 + requests_made = current_success_count - count_at_activation + + if requests_made >= requests_limit: + logger.info(f"Account {acc.accountId} reached request limit ({requests_made}/{requests_limit}). Moving to RESTING.") + accounts_to_rest.append(acc.accountId) + else: + # This is a fallback for accounts that were activated before this logic was deployed. + # We can activate them "fresh" by setting their baseline count now. + logger.info(f"Account {acc.accountId} is ACTIVE but has no 'success_count_at_activation'. Setting it now.") + redis_client.hset(f"account_status:{acc.accountId}", "success_count_at_activation", acc.successCount or 0) + + logger.info("--- Step 2: Analyzing accounts for state transitions ---") + logger.info(f"Found {len(accounts_to_unban)} accounts with expired bans to un-ban.") + logger.info(f"Found {len(accounts_to_activate)} accounts with expired rest periods to activate.") + logger.info(f"Found {len(accounts_to_rest)} accounts with expired active periods to put to rest.") # --- Perform State Transitions --- # 1. Un-ban accounts via Thrift call + logger.info("--- Step 3: Processing un-bans ---") if accounts_to_unban: logger.info(f"Un-banning {len(accounts_to_unban)} accounts: {accounts_to_unban}") for acc_id in accounts_to_unban: @@ -101,21 +140,30 @@ def manage_account_states(): logger.info(f"Successfully un-banned account '{acc_id}'.") except Exception as e: logger.error(f"Failed to un-ban account '{acc_id}': {e}") + else: + logger.info("No accounts to un-ban.") # 2. Activate resting accounts via direct Redis write + logger.info("--- Step 4: Processing activations ---") if accounts_to_activate: logger.info(f"Activating {len(accounts_to_activate)} accounts: {accounts_to_activate}") now_ts = int(time.time()) + account_map = {acc.accountId: acc for acc in all_accounts} with redis_client.pipeline() as pipe: for acc_id in accounts_to_activate: key = f"account_status:{acc_id}" + current_success_count = account_map[acc_id].successCount or 0 pipe.hset(key, "status", "ACTIVE") pipe.hset(key, "active_since_timestamp", now_ts) pipe.hset(key, "status_changed_timestamp", now_ts) + pipe.hset(key, "success_count_at_activation", current_success_count) pipe.execute() logger.info("Finished activating accounts.") + else: + logger.info("No accounts to activate.") # 3. Rest active accounts via direct Redis write + logger.info("--- Step 5: Processing rests ---") if accounts_to_rest: logger.info(f"Putting {len(accounts_to_rest)} accounts to rest: {accounts_to_rest}") now_ts = int(time.time()) @@ -124,8 +172,13 @@ def manage_account_states(): key = f"account_status:{acc_id}" pipe.hset(key, "status", "RESTING") pipe.hset(key, "status_changed_timestamp", now_ts) + pipe.hdel(key, "success_count_at_activation") pipe.execute() logger.info("Finished putting accounts to rest.") + else: + logger.info("No accounts to put to rest.") + + logger.info("--- Account maintenance run complete. ---") finally: if transport and transport.isOpen(): @@ -139,6 +192,47 @@ with DAG( start_date=days_ago(1), catchup=False, tags=['ytdlp', 'maintenance'], - doc_md=__doc__, + doc_md=""" + ### YT-DLP Account Maintenance: Time-Based State Transitions + + This DAG is the central authority for automated, **time-based** state management for ytdlp-ops accounts. + It runs periodically to fetch the status of all accounts and applies its own logic to determine if an account's state should change based on configurable time durations. + + The thresholds are defined as DAG parameters and can be configured via the Airflow UI: + - **Requests Limit**: How many successful requests an account can perform before it needs to rest. + - **Cooldown Duration**: How long an account must rest before it can be used again. + - **Ban Duration**: How long a ban lasts before the account is automatically un-banned. + + --- + + #### Separation of Concerns: Time vs. Errors + + It is critical to understand that this DAG primarily handles time-based state changes. Error-based banning may be handled by worker DAGs during URL processing. This separation ensures that maintenance is predictable and based on timers, while acute, error-driven actions are handled immediately by the workers that encounter them. + + --- + + #### State Transitions Performed by This DAG: + + On each run, this DAG fetches the raw status and timestamps for all accounts and performs the following checks: + + 1. **Un-banning (`BANNED` -> `ACTIVE`)**: + - **Condition**: An account has been in the `BANNED` state for longer than the configured `account_ban_duration_hours`. + - **Action**: The DAG calls the `unbanAccount` service endpoint to lift the ban. + + 2. **Activation (`RESTING` -> `ACTIVE`)**: + - **Condition**: An account has been in the `RESTING` state for longer than the configured `account_cooldown_duration_min`. + - **Action**: The DAG updates the account's status to `ACTIVE` directly in Redis. + + 3. **Resting (`ACTIVE` -> `RESTING`)**: + - **Condition**: An account has performed more successful requests than the configured `account_requests_limit` since it was last activated. + - **Action**: The DAG updates the account's status to `RESTING` directly in Redis. + + This process gives full control over time-based account lifecycle management to the Airflow orchestrator. + """, + params={ + 'account_requests_limit': Param(250, type="integer", description="Number of successful requests an account can make before it is rested."), + 'account_cooldown_duration_min': Param(60, type="integer", description="Duration in minutes an account must rest before being activated again. Default is 1 hour."), + 'account_ban_duration_hours': Param(24, type="integer", description="Duration in hours an account stays banned before it can be un-banned."), + } ) as dag: manage_account_states() diff --git a/airflow/dags/ytdlp_ops_dispatcher.py b/airflow/dags/ytdlp_ops_v01_dispatcher.py similarity index 91% rename from airflow/dags/ytdlp_ops_dispatcher.py rename to airflow/dags/ytdlp_ops_v01_dispatcher.py index a6f50fa..0835603 100644 --- a/airflow/dags/ytdlp_ops_dispatcher.py +++ b/airflow/dags/ytdlp_ops_v01_dispatcher.py @@ -72,16 +72,16 @@ def dispatch_url_to_worker(**context): # The hook will parse the queue name from the run_id itself. run_id = f"worker_run_{context['dag_run'].run_id}_{context['ts_nodash']}_q_{worker_queue}" - logger.info(f"Triggering 'ytdlp_ops_worker_per_url' with run_id '{run_id}'") + logger.info(f"Triggering 'ytdlp_ops_v01_worker_per_url' with run_id '{run_id}'") trigger_dag( - dag_id='ytdlp_ops_worker_per_url', + dag_id='ytdlp_ops_v01_worker_per_url', run_id=run_id, conf=conf_to_pass, replace_microseconds=False ) with DAG( - dag_id='ytdlp_ops_dispatcher', + dag_id='ytdlp_ops_v01_dispatcher', default_args={'owner': 'airflow', 'retries': 0}, schedule=None, # This DAG is only triggered by the orchestrator. start_date=days_ago(1), @@ -94,10 +94,10 @@ with DAG( 1. It pulls a single URL from the Redis `_inbox` queue. 2. It runs on the generic `queue-dl` to find any available worker. 3. It determines the worker's hostname and constructs a dedicated queue name (e.g., `queue-dl-dl-worker-1`). - 4. It triggers the `ytdlp_ops_worker_per_url` DAG, passing the URL and the dedicated queue name in the configuration. + 4. It triggers the `ytdlp_ops_v01_worker_per_url` DAG, passing the URL and the dedicated queue name in the configuration. This dispatcher-led affinity, combined with the `task_instance_mutation_hook` cluster policy, ensures that all subsequent processing for that URL happens on the same machine. - The `ytdlp_ops_orchestrator` is used to trigger a batch of these dispatcher runs. + The `ytdlp_ops_v01_orchestrator` is used to trigger a batch of these dispatcher runs. """, # All params are passed through from the orchestrator render_template_as_native_obj=True, diff --git a/airflow/dags/ytdlp_ops_v01_orchestrator.py b/airflow/dags/ytdlp_ops_v01_orchestrator.py new file mode 100644 index 0000000..3ea5aa0 --- /dev/null +++ b/airflow/dags/ytdlp_ops_v01_orchestrator.py @@ -0,0 +1,444 @@ +# -*- coding: utf-8 -*- +# vim:fenc=utf-8 +# +# Copyright © 2024 rl +# +# Distributed under terms of the MIT license. + +""" +DAG to orchestrate ytdlp_ops_dispatcher DAG runs based on a defined policy. +It fetches URLs from a Redis queue and launches dispatchers in controlled bunches, +which in turn trigger workers with affinity. +""" + +from airflow import DAG +from airflow.exceptions import AirflowException, AirflowSkipException +from airflow.operators.python import PythonOperator +from airflow.models.param import Param +from airflow.models.variable import Variable +from airflow.utils.dates import days_ago +from airflow.api.common.trigger_dag import trigger_dag +from airflow.models.dagrun import DagRun +from airflow.models.dag import DagModel +from datetime import timedelta +import logging +import random +import time +import json + +# Import utility functions +from utils.redis_utils import _get_redis_client + +# Import Thrift modules for proxy status check +from pangramia.yt.tokens_ops import YTTokenOpService +from thrift.protocol import TBinaryProtocol +from thrift.transport import TSocket, TTransport + +# Configure logging +logger = logging.getLogger(__name__) + +DEFAULT_REQUEST_PARAMS_JSON = """{ + "context_reuse_policy": { + "enabled": true, + "max_age_seconds": 86400, + "reuse_visitor_id": true, + "reuse_cookies": true + }, + "token_generation_strategy": { + "youtubei_js": { + "generate_po_token": true, + "generate_gvs_token": true + } + }, + "ytdlp_params": { + "use_curl_prefetch": false, + "token_supplement_strategy": { + "youtubepot_bgutilhttp_extractor": { + "enabled": true + } + }, + "visitor_id_override": { + "enabled": true + } + }, + "session_params": { + "lang": "en-US", + "location": "US", + "deviceCategory": "MOBILE", + "user_agents": { + "youtubei_js": "Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)", + "yt_dlp": "Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)" + } + } +}""" + +# Default settings +DEFAULT_QUEUE_NAME = 'video_queue' +DEFAULT_REDIS_CONN_ID = 'redis_default' +DEFAULT_TOTAL_WORKERS = 3 +DEFAULT_WORKERS_PER_BUNCH = 1 +DEFAULT_WORKER_DELAY_S = 5 +DEFAULT_BUNCH_DELAY_S = 20 + +DEFAULT_YT_AUTH_SERVICE_IP = Variable.get("YT_AUTH_SERVICE_IP", default_var="172.17.0.1") +DEFAULT_YT_AUTH_SERVICE_PORT = Variable.get("YT_AUTH_SERVICE_PORT", default_var=9080) + +# --- Helper Functions --- + +def _check_application_queue(redis_client, queue_base_name: str) -> int: + """Checks and logs the length of the application's inbox queue.""" + inbox_queue_name = f"{queue_base_name}_inbox" + logger.info(f"--- Checking Application Work Queue ---") + try: + q_len = redis_client.llen(inbox_queue_name) + logger.info(f"Application work queue '{inbox_queue_name}' has {q_len} item(s).") + return q_len + except Exception as e: + logger.error(f"Failed to check application queue '{inbox_queue_name}': {e}", exc_info=True) + return -1 # Indicate an error + +def _inspect_celery_queues(redis_client, queue_names: list): + """Inspects Celery queues in Redis and logs their status.""" + logger.info("--- Inspecting Celery Queues in Redis ---") + for queue_name in queue_names: + try: + q_len = redis_client.llen(queue_name) + logger.info(f"Queue '{queue_name}': Length = {q_len}") + + if q_len > 0: + logger.info(f"Showing up to 10 tasks in '{queue_name}':") + # Fetch up to 10 items from the start of the list (queue) + items_bytes = redis_client.lrange(queue_name, 0, 9) + for i, item_bytes in enumerate(items_bytes): + try: + # Celery tasks are JSON-encoded strings + task_data = json.loads(item_bytes.decode('utf-8')) + # Pretty print for readability in logs + pretty_task_data = json.dumps(task_data, indent=2) + logger.info(f" Task {i+1}:\n{pretty_task_data}") + except (json.JSONDecodeError, UnicodeDecodeError) as e: + logger.warning(f" Task {i+1}: Could not decode/parse task data. Error: {e}. Raw: {item_bytes!r}") + except Exception as e: + logger.error(f"Failed to inspect queue '{queue_name}': {e}", exc_info=True) + logger.info("--- End of Queue Inspection ---") + + +# --- Main Orchestration Callable --- + +def orchestrate_workers_ignition_callable(**context): + """ + Main orchestration logic. Triggers a specified number of dispatcher DAGs + to initiate self-sustaining processing loops. + """ + params = context['params'] + ti = context['task_instance'] + logger.info(f"Orchestrator task '{ti.task_id}' running on queue '{ti.queue}'.") + logger.info("Starting dispatcher ignition sequence.") + + dispatcher_dag_id = 'ytdlp_ops_v01_dispatcher' + worker_queue = 'queue-dl' + app_queue_name = params['queue_name'] + + logger.info(f"Running in v1 (monolithic) mode. Dispatcher DAG: '{dispatcher_dag_id}', Worker Queue: '{worker_queue}'") + + dag_model = DagModel.get_dagmodel(dispatcher_dag_id) + if dag_model and dag_model.is_paused: + logger.warning(f"Dispatcher DAG '{dispatcher_dag_id}' is paused. Skipping dispatcher ignition.") + raise AirflowSkipException(f"Dispatcher DAG '{dispatcher_dag_id}' is paused.") + + total_workers = int(params['total_workers']) + workers_per_bunch = int(params['workers_per_bunch']) + + # --- Input Validation --- + if total_workers <= 0: + logger.warning(f"'total_workers' is {total_workers}. No workers will be started. Skipping ignition.") + raise AirflowSkipException(f"No workers to start (total_workers={total_workers}).") + + if workers_per_bunch <= 0: + logger.error(f"'workers_per_bunch' must be a positive integer, but got {workers_per_bunch}. Aborting.") + raise AirflowException(f"'workers_per_bunch' must be a positive integer, but got {workers_per_bunch}.") + # --- End Input Validation --- + + worker_delay = int(params['delay_between_workers_s']) + bunch_delay = int(params['delay_between_bunches_s']) + + # Create a list of worker numbers to trigger + worker_indices = list(range(total_workers)) + bunches = [worker_indices[i:i + workers_per_bunch] for i in range(0, len(worker_indices), workers_per_bunch)] + + # --- Inspect Queues before starting --- + try: + redis_conn_id = params.get('redis_conn_id', DEFAULT_REDIS_CONN_ID) + redis_client = _get_redis_client(redis_conn_id) + + # First, check the application queue for work + app_queue_len = _check_application_queue(redis_client, app_queue_name) + + if params.get('skip_if_queue_empty') and app_queue_len == 0: + logger.info("'skip_if_queue_empty' is True and application queue is empty. Skipping worker ignition.") + raise AirflowSkipException("Application work queue is empty.") + + # Then, inspect the target Celery queue for debugging + _inspect_celery_queues(redis_client, [worker_queue]) + except AirflowSkipException: + raise # Re-raise to let Airflow handle the skip + except Exception as e: + logger.error(f"Could not inspect queues due to an error: {e}. Continuing with ignition sequence.") + # --- End of Inspection --- + + logger.info(f"Plan: Triggering {total_workers} total dispatcher runs in {len(bunches)} bunches. Each run will attempt to process one URL.") + + dag_run_id = context['dag_run'].run_id + total_triggered = 0 + + for i, bunch in enumerate(bunches): + logger.info(f"--- Triggering Bunch {i+1}/{len(bunches)} (contains {len(bunch)} dispatcher(s)) ---") + for j, _ in enumerate(bunch): + # Create a unique run_id for each dispatcher run + run_id = f"dispatched_{dag_run_id}_{total_triggered}" + + # Pass all orchestrator params to the dispatcher, which will then pass them to the worker. + conf_to_pass = {p: params[p] for p in params} + + logger.info(f"Triggering dispatcher {j+1}/{len(bunch)} in bunch {i+1} (run {total_triggered + 1}/{total_workers}) (Run ID: {run_id})") + logger.debug(f"Full conf for dispatcher run {run_id}: {conf_to_pass}") + + trigger_dag( + dag_id=dispatcher_dag_id, + run_id=run_id, + conf=conf_to_pass, + replace_microseconds=False + ) + total_triggered += 1 + + # Delay between dispatches in a bunch + if j < len(bunch) - 1: + logger.info(f"Waiting {worker_delay}s before next dispatcher in bunch...") + time.sleep(worker_delay) + + # Delay between bunches + if i < len(bunches) - 1: + logger.info(f"--- Bunch {i+1} triggered. Waiting {bunch_delay}s before next bunch... ---") + time.sleep(bunch_delay) + + logger.info(f"--- Ignition sequence complete. Total dispatcher runs triggered: {total_triggered}. ---") + + # --- Final Queue Inspection --- + final_check_delay = 30 # seconds + logger.info(f"Waiting {final_check_delay}s for a final queue status check to see if workers picked up tasks...") + time.sleep(final_check_delay) + + try: + redis_conn_id = params.get('redis_conn_id', DEFAULT_REDIS_CONN_ID) + redis_client = _get_redis_client(redis_conn_id) + + # Log connection details for debugging broker mismatch issues + conn_kwargs = redis_client.connection_pool.connection_kwargs + logger.info(f"Final check using Redis connection '{redis_conn_id}': " + f"host={conn_kwargs.get('host')}, " + f"port={conn_kwargs.get('port')}, " + f"db={conn_kwargs.get('db')}") + + _inspect_celery_queues(redis_client, [worker_queue]) + logger.info("Final queue inspection complete. If queues are not empty, workers have not picked up tasks yet. " + "If queues are empty, workers have started processing.") + except Exception as e: + logger.error(f"Could not perform final queue inspection: {e}. This does not affect worker ignition.") + + + + +# ============================================================================= +# DAG Definition +# ============================================================================= + +default_args = { + 'owner': 'airflow', + 'depends_on_past': False, + 'email_on_failure': False, + 'email_on_retry': False, + 'retries': 1, + 'retry_delay': timedelta(minutes=1), + 'start_date': days_ago(1), +} + +with DAG( + dag_id='ytdlp_ops_v01_orchestrator', + default_args=default_args, + schedule=None, # This DAG runs only when triggered. + max_active_runs=1, # Only one ignition process should run at a time. + catchup=False, + description='Ignition system for ytdlp_ops_v01_dispatcher DAGs. Starts self-sustaining worker loops via dispatchers.', + doc_md=""" + ### YT-DLP v1 (Monolithic) Worker Ignition System + + This DAG acts as an "ignition system" to start one or more self-sustaining worker loops for the **v1 monolithic worker**. + It does **not** process URLs itself. Its only job is to trigger a specified number of `ytdlp_ops_v01_dispatcher` DAGs, + which in turn pull URLs and trigger `ytdlp_ops_v01_worker_per_url` with worker affinity. + + #### How it Works: + + 1. **Manual Trigger:** You manually trigger this DAG with parameters defining how many dispatcher loops to start (`total_workers`), in what configuration (`workers_per_bunch`, delays). + 2. **Ignition:** The orchestrator triggers the initial set of dispatcher DAGs in a "fire-and-forget" manner, passing all its configuration parameters to them. + 3. **Completion:** Once all initial dispatchers have been triggered, the orchestrator's job is complete. + + The dispatchers then take over, each pulling a URL, determining affinity, and triggering a worker DAG. + + #### Client Selection (`clients` parameter): + The `clients` parameter determines which YouTube client persona is used for token generation. Different clients have different capabilities and requirements. + + **Supported Clients:** + + | Client | Visitor ID | Player poToken | GVS poToken | Cookies Support | Notes | + | ---------------- | ------------ | -------------- | ------------ | --------------- | ------------------------------------------------------------------ | + | `tv` | Required | Not Required | Not Required | Supported | All formats may have DRM if you request too much. | + | `web_safari` | Required | Required | Required* | Supported | *Provides HLS (m3u8) formats which may not require a GVS token. | + | `mweb` | Required | Required | Required | Supported | | + | `web_camoufox` | Required | Required | Required | Supported | Camoufox variant of `web`. | + + **Untested / Not Recommended Clients:** + + | Client | Visitor ID | Player poToken | GVS poToken | Cookies Support | Notes | + | ---------------- | ------------ | -------------- | ------------ | --------------- | ------------------------------------------------------------------ | + | `web` | Required | Required | Required | Supported | Only SABR formats available. | + | `tv_simply` | Required | Not Required | Not Required | Not Supported | | + | `tv_embedded` | Required | Not Required | Not Required | Supported | Requires account cookies for most videos. | + | `web_embedded` | Required | Not Required | Not Required | Supported | Only for embeddable videos. | + | `web_music` | Required | Required | Required | Supported | | + | `web_creator` | Required | Required | Required | Supported | Requires account cookies. | + | `android` | Required | Required | Required | Not Supported | | + | `android_vr` | Required | Not Required | Not Required | Not Supported | YouTube Kids videos are not available. | + | `ios` | Required | Required | Required | Not Supported | | + + Other `_camoufox` variants are also available but untested. + """, + tags=['ytdlp', 'mgmt', 'master'], + params={ + # --- Ignition Control Parameters --- + 'total_workers': Param(DEFAULT_TOTAL_WORKERS, type="integer", description="Total number of dispatcher loops to start."), + 'workers_per_bunch': Param(DEFAULT_WORKERS_PER_BUNCH, type="integer", description="Number of dispatchers to start in each bunch."), + 'delay_between_workers_s': Param(DEFAULT_WORKER_DELAY_S, type="integer", description="Delay in seconds between starting each dispatcher within a bunch."), + 'delay_between_bunches_s': Param(DEFAULT_BUNCH_DELAY_S, type="integer", description="Delay in seconds between starting each bunch."), + 'skip_if_queue_empty': Param(False, type="boolean", title="[Ignition Control] Skip if Queue Empty", description="If True, the orchestrator will not start any dispatchers if the application's work queue is empty."), + + # --- Worker Passthrough Parameters --- + 'on_auth_failure': Param( + 'retry_with_new_account', + type="string", + enum=['stop_loop', 'retry_with_new_account', 'retry_without_ban', 'proceed_loop_under_manual_inspection'], + title="[Worker Param] On Authentication Failure Policy", + description="Policy for a worker when a bannable authentication error occurs. " + "'stop_loop': Ban the account, mark URL as failed, and stop the worker's loop. " + "'retry_with_new_account': (Default) Ban the failed account, retry ONCE with a new account. If retry fails, ban the second account and stop." + "'retry_without_ban': If a connection error (e.g. SOCKS timeout) occurs, retry with a new account but do NOT ban the first account/proxy. If retry fails, stop the loop without banning." + "'proceed_loop_under_manual_inspection': **BEWARE: MANUAL SUPERVISION REQUIRED.** Marks the URL as failed but continues the processing loop. Use this only when you can manually intervene." + ), + 'on_download_failure': Param( + 'proceed_loop', + type="string", + enum=['stop_loop', 'proceed_loop', 'retry_with_new_token'], + title="[Worker Param] On Download Failure Policy", + description="Policy for a worker when a download or probe error occurs. " + "'stop_loop': Mark URL as failed and stop the worker's loop. " + "'proceed_loop': (Default) Mark URL as failed but continue the processing loop with a new URL. " + "'retry_with_new_token': Attempt to get a new token with a new account and retry the download once. If it fails again, proceed loop." + ), + 'request_params_json': Param(DEFAULT_REQUEST_PARAMS_JSON, type="string", title="[Worker Param] Request Params JSON", description="JSON string with request parameters for the token service.", render_kwargs={"rows": 20, "cols": 120}), + 'queue_name': Param(DEFAULT_QUEUE_NAME, type="string", description="[Worker Param] Base name for Redis queues."), + 'redis_conn_id': Param(DEFAULT_REDIS_CONN_ID, type="string", description="[Worker Param] Airflow Redis connection ID."), + 'clients': Param( + 'mweb,web_camoufox,tv', + type="string", + enum=[ + 'mweb,web_camoufox,tv', + 'mweb', + 'web_camoufox', + 'tv', + 'custom', + 'tv,web_safari,mweb,web_camoufox', + 'web_safari', + 'web', + 'web_embedded', + 'web_music', + 'web_creator', + 'web_safari_camoufox', + 'web_embedded_camoufox', + 'web_music_camoufox', + 'web_creator_camoufox', + 'mweb_camoufox', + 'android', + 'android_music', + 'android_creator', + 'android_vr', + 'ios', + 'ios_music', + 'ios_creator', + 'tv_simply', + 'tv_embedded', + ], + title="[Worker Param] Clients", + description="[Worker Param] Comma-separated list of clients for token generation. Full list: web, web_safari, web_embedded, web_music, web_creator, mweb, web_camoufox, web_safari_camoufox, web_embedded_camoufox, web_music_camoufox, web_creator_camoufox, mweb_camoufox, android, android_music, android_creator, android_vr, ios, ios_music, ios_creator, tv, tv_simply, tv_embedded. See DAG documentation for details." + ), + 'account_pool': Param('ytdlp_account', type="string", description="[Worker Param] Account pool prefix or comma-separated list."), + 'account_pool_size': Param(10, type=["integer", "null"], description="[Worker Param] If using a prefix for 'account_pool', this specifies the number of accounts to generate (e.g., 10 for 'prefix_01' through 'prefix_10'). Required when using a prefix."), + 'prepend_client_to_account': Param(True, type="boolean", title="[Worker Param] Prepend Client to Account", description="If True, prepends client and timestamp to account names in prefix mode. Format: prefix_YYYYMMDDHHMMSS_client_XX."), + 'service_ip': Param(DEFAULT_YT_AUTH_SERVICE_IP, type="string", description="[Worker Param] IP of the ytdlp-ops-server. Default is from Airflow variable YT_AUTH_SERVICE_IP or hardcoded."), + 'service_port': Param(DEFAULT_YT_AUTH_SERVICE_PORT, type="integer", description="[Worker Param] Port of the Envoy load balancer. Default is from Airflow variable YT_AUTH_SERVICE_PORT or hardcoded."), + 'machine_id': Param("ytdlp-ops-airflow-service", type="string", description="[Worker Param] Identifier for the client machine."), + 'assigned_proxy_url': Param(None, type=["string", "null"], title="[Worker Param] Assigned Proxy URL", description="A specific proxy URL to use for the request, overriding the server's proxy pool logic."), + 'auto_create_new_accounts_on_exhaustion': Param(True, type="boolean", description="[Worker Param] If True and all accounts in a prefix-based pool are exhausted, create a new one automatically."), + # --- Download Control Parameters --- + 'delay_between_formats_s': Param(15, type="integer", title="[Worker Param] Delay Between Formats (s)", description="Delay in seconds between downloading each format when multiple formats are specified. A 22s wait may be effective for batch downloads, while 6-12s may suffice if cookies are refreshed regularly."), + 'yt_dlp_test_mode': Param(False, type="boolean", title="[Worker Param] yt-dlp Test Mode", description="If True, runs yt-dlp with --test flag (dry run without downloading)."), + 'skip_probe': Param(True, type="boolean", title="[Worker Param] Skip Probe", description="If True, skips the ffmpeg probe of downloaded files."), + 'yt_dlp_cleanup_mode': Param(True, type="boolean", title="[Worker Param] yt-dlp Cleanup Mode", description="If True, creates a .empty file and deletes the original media file after successful download and probe."), + 'fragment_retries': Param(2, type="integer", title="[Worker Param] Fragment Retries", description="Number of retries for a fragment before giving up."), + 'limit_rate': Param('5M', type=["string", "null"], title="[Worker Param] Limit Rate", description="Download speed limit (e.g., 50K, 4.2M)."), + 'socket_timeout': Param(15, type="integer", title="[Worker Param] Socket Timeout", description="Timeout in seconds for socket operations."), + 'min_sleep_interval': Param(5, type="integer", title="[Worker Param] Min Sleep Interval", description="Minimum time to sleep between downloads (seconds)."), + 'max_sleep_interval': Param(10, type="integer", title="[Worker Param] Max Sleep Interval", description="Maximum time to sleep between downloads (seconds)."), + 'download_format_preset': Param( + 'formats_2', + type="string", + enum=['best_audio', 'formats_0', 'formats_2', 'formats_3', 'custom'], + title="[Worker Param] Download Format Preset", + description="Select a predefined format string or choose 'custom' to use the value from 'Custom Download Format'.\nformats_0: 18,140\nformats_2: 18,140,299/298/137/136/135/134/133\nformats_3: 18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318" + ), + 'download_format_custom': Param( + '18,140,299/298/137/136/135/134/133', + type="string", + title="[Worker Param] Custom Download Format", + description="Custom yt-dlp format string. Used when preset is 'custom'. E.g., 'ba[ext=m4a]/bestaudio/best'." + ), + 'downloader': Param( + 'default', + type="string", + enum=['default', 'aria2c'], + title="[Worker Param] Downloader", + description="Choose the downloader for yt-dlp." + ), + 'downloader_args_aria2c': Param( + 'aria2c:-x 4 -k 2M --max-download-limit=3M', + type="string", + title="[Worker Param] Aria2c Downloader Arguments", + description="Arguments to pass to yt-dlp's --downloader-args. Used when downloader is 'aria2c'." + ), + 'yt_dlp_extra_args': Param( + '--restrict-filenames', + type=["string", "null"], + title="[Worker Param] Extra yt-dlp arguments", + description="Extra command-line arguments for yt-dlp during download." + ), + } +) as dag: + + orchestrate_task = PythonOperator( + task_id='start_worker_loops', + python_callable=orchestrate_workers_ignition_callable, + ) + orchestrate_task.doc_md = """ + ### Start Worker Loops + This is the main task that executes the ignition policy. + - It triggers `ytdlp_ops_dispatcher` DAGs according to the batch settings. + - It passes all its parameters down to the dispatchers, which will use them to trigger workers. + """ diff --git a/airflow/dags/ytdlp_ops_v01_worker_per_url.py b/airflow/dags/ytdlp_ops_v01_worker_per_url.py new file mode 100644 index 0000000..9c01451 --- /dev/null +++ b/airflow/dags/ytdlp_ops_v01_worker_per_url.py @@ -0,0 +1,1794 @@ +# -*- coding: utf-8 -*- +# vim:fenc=utf-8 +# +# Copyright © 2024 rl +# +# Distributed under terms of the MIT license. + +""" +DAG for processing a single YouTube URL passed via DAG run configuration. +This is the "Worker" part of a Sensor/Worker pattern. +This DAG has been refactored to use the TaskFlow API to implement worker affinity, +ensuring all tasks for a single URL run on the same machine. +""" + +from __future__ import annotations + +from airflow.decorators import task, task_group +from airflow.exceptions import AirflowException, AirflowSkipException +from airflow.models import Variable +from airflow.models.dag import DAG +from airflow.models.param import Param +from airflow.models.xcom_arg import XComArg +from airflow.operators.dummy import DummyOperator +from airflow.utils.dates import days_ago +from airflow.utils.task_group import TaskGroup +from airflow.api.common.trigger_dag import trigger_dag +from copy import copy +from datetime import datetime, timedelta +import concurrent.futures +import json +import logging +import os +import random +import re +import redis +import socket +import time +import traceback +import uuid + +# Import utility functions and Thrift modules +from utils.redis_utils import _get_redis_client +from pangramia.yt.common.ttypes import TokenUpdateMode, AirflowLogContext +from pangramia.yt.exceptions.ttypes import PBServiceException, PBUserException +from pangramia.yt.tokens_ops import YTTokenOpService +from thrift.protocol import TBinaryProtocol +from thrift.transport import TSocket, TTransport +from thrift.transport.TTransport import TTransportException + +# Configure logging +logger = logging.getLogger(__name__) + + +# --- Client Stats Helper --- + +def _update_client_stats(redis_client, clients_str: str, status: str, url: str, machine_id: str, dag_run_id: str): + """Updates success/failure statistics for a client type in Redis.""" + if not clients_str: + logger.warning("Cannot update client stats: 'clients' string is empty.") + return + + # Assumption: The service tries clients in the order provided. + # We attribute the result to the first client in the list. + primary_client = clients_str.split(',')[0].strip() + if not primary_client: + logger.warning("Cannot update client stats: could not determine primary client.") + return + + stats_key = "client_stats" + + try: + # Using a pipeline with WATCH for safe concurrent updates. + with redis_client.pipeline() as pipe: + pipe.watch(stats_key) + + current_stats_json = redis_client.hget(stats_key, primary_client) + stats = {} + if current_stats_json: + try: + stats = json.loads(current_stats_json) + except json.JSONDecodeError: + logger.warning(f"Could not parse existing stats for client '{primary_client}'. Resetting stats.") + stats = {} + + stats.setdefault('success_count', 0) + stats.setdefault('failure_count', 0) + + details = { + 'timestamp': time.time(), 'url': url, + 'machine_id': machine_id, 'dag_run_id': dag_run_id, + } + + if status == 'success': + stats['success_count'] += 1 + stats['latest_success'] = details + elif status == 'failure': + stats['failure_count'] += 1 + stats['latest_failure'] = details + + pipe.multi() + pipe.hset(stats_key, primary_client, json.dumps(stats)) + pipe.execute() + + logger.info(f"Successfully updated '{status}' stats for client '{primary_client}'.") + + except redis.exceptions.WatchError: + logger.warning(f"WatchError updating stats for client '{primary_client}'. Another process updated it. Skipping this update.") + except Exception as e: + logger.error(f"Failed to update client stats for '{primary_client}': {e}", exc_info=True) + + +# Default settings from Airflow Variables or hardcoded fallbacks +DEFAULT_QUEUE_NAME = 'video_queue' +DEFAULT_REDIS_CONN_ID = 'redis_default' +DEFAULT_TIMEOUT = 3600 +DEFAULT_YT_AUTH_SERVICE_IP = Variable.get("YT_AUTH_SERVICE_IP", default_var="172.17.0.1") +DEFAULT_YT_AUTH_SERVICE_PORT = Variable.get("YT_AUTH_SERVICE_PORT", default_var=9080) + +# The queue is set to a fallback here. The actual worker-specific queue is +# assigned just-in-time by the task_instance_mutation_hook (see: airflow/config/custom_task_hooks.py), +# which parses the target queue from the DAG run_id. +DEFAULT_ARGS = { + 'owner': 'airflow', + 'retries': 0, + 'queue': 'queue-dl', # Fallback queue. Will be overridden by the policy hook. +} + + +# --- Helper Functions --- + +def _get_thrift_client(host, port, timeout): + """Helper to create and connect a Thrift client.""" + transport = TSocket.TSocket(host, port) + transport.setTimeout(timeout * 1000) + transport = TTransport.TFramedTransport(transport) + protocol = TBinaryProtocol.TBinaryProtocol(transport) + client = YTTokenOpService.Client(protocol) + transport.open() + logger.info(f"Connected to Thrift server at {host}:{port}") + return client, transport + +def _extract_video_id(url): + """Extracts YouTube video ID from URL.""" + if not url or not isinstance(url, str): + return None + patterns = [r'v=([a-zA-Z0-9_-]{11})', r'youtu\.be/([a-zA-Z0-9_-]{11})'] + for pattern in patterns: + match = re.search(pattern, url) + if match: + return match.group(1) + return None + +def _get_account_pool(params: dict) -> list: + """ + Gets the list of accounts to use for processing, filtering out banned/resting accounts. + Supports explicit list, prefix-based generation, and single account modes. + """ + account_pool_str = params.get('account_pool', 'default_account') + accounts = [] + is_prefix_mode = False + + if ',' in account_pool_str: + accounts = [acc.strip() for acc in account_pool_str.split(',') if acc.strip()] + else: + prefix = account_pool_str + pool_size_param = params.get('account_pool_size') + if pool_size_param is not None: + is_prefix_mode = True + pool_size = int(pool_size_param) + + if params.get('prepend_client_to_account', True): + clients_str = params.get('clients', '') + primary_client = clients_str.split(',')[0].strip() if clients_str else 'unknown' + timestamp = datetime.now().strftime('%Y%m%d%H%M%S') + new_prefix = f"{prefix}_{timestamp}_{primary_client}" + accounts = [f"{new_prefix}_{i:02d}" for i in range(1, pool_size + 1)] + else: + accounts = [f"{prefix}_{i:02d}" for i in range(1, pool_size + 1)] + else: + accounts = [prefix] + + if not accounts: + raise AirflowException("Initial account pool is empty.") + + redis_conn_id = params.get('redis_conn_id', DEFAULT_REDIS_CONN_ID) + try: + redis_client = _get_redis_client(redis_conn_id) + active_accounts = [] + for account in accounts: + status_bytes = redis_client.hget(f"account_status:{account}", "status") + status = status_bytes.decode('utf-8') if status_bytes else "ACTIVE" + if status not in ['BANNED'] and 'RESTING' not in status: + active_accounts.append(account) + + if not active_accounts and accounts: + auto_create = params.get('auto_create_new_accounts_on_exhaustion', False) + if auto_create and is_prefix_mode: + new_account_id = f"{account_pool_str}-auto-{str(uuid.uuid4())[:8]}" + logger.warning(f"Account pool exhausted. Auto-creating new account: '{new_account_id}'") + active_accounts.append(new_account_id) + else: + raise AirflowException("All accounts in the configured pool are currently exhausted.") + accounts = active_accounts + except Exception as e: + logger.error(f"Could not filter accounts from Redis. Using unfiltered pool. Error: {e}", exc_info=True) + + if not accounts: + raise AirflowException("Account pool is empty after filtering.") + + logger.info(f"Final active account pool with {len(accounts)} accounts.") + return accounts + +# ============================================================================= +# TASK DEFINITIONS (TaskFlow API) +# ============================================================================= + +@task +def get_url_and_assign_account(**context): + """ + Gets the URL to process from the DAG run configuration and assigns an active account. + This is the first task in the pinned-worker DAG. + """ + params = context['params'] + ti = context['task_instance'] + + # --- Worker Pinning Verification --- + # This is a safeguard against a known Airflow issue where clearing a task + # can cause the task_instance_mutation_hook to be skipped, breaking pinning. + # See: https://github.com/apache/airflow/issues/20143 + expected_queue = None + if ti.run_id and '_q_' in ti.run_id: + expected_queue = ti.run_id.split('_q_')[-1] + + if not expected_queue: + # Fallback to conf if run_id parsing fails for some reason + expected_queue = params.get('worker_queue') + + if expected_queue and ti.queue != expected_queue: + error_msg = ( + f"WORKER PINNING FAILURE: Task is running on queue '{ti.queue}' but was expected on '{expected_queue}'. " + "This usually happens after manually clearing a task, which is not the recommended recovery method for this DAG. " + "To recover a failed URL, let the DAG run fail, use the 'ytdlp_mgmt_queues' DAG to requeue the URL, " + "and use the 'ytdlp_ops_orchestrator' to start a new worker loop if needed." + ) + logger.error(error_msg) + raise AirflowException(error_msg) + elif expected_queue: + logger.info(f"Worker pinning verified. Task is correctly running on queue '{ti.queue}'.") + # --- End Verification --- + + # The URL is passed by the dispatcher DAG via 'url_to_process'. + # For manual runs, we fall back to 'manual_url_to_process'. + url_to_process = params.get('url_to_process') + if not url_to_process: + url_to_process = params.get('manual_url_to_process') + if url_to_process: + logger.info(f"Using URL from manual run parameter: '{url_to_process}'") + + if not url_to_process: + raise AirflowException("No URL to process. For manual runs, please provide a URL in the 'manual_url_to_process' parameter.") + logger.info(f"Received URL '{url_to_process}' to process.") + + # Mark the URL as in-progress in Redis + try: + redis_conn_id = params.get('redis_conn_id', DEFAULT_REDIS_CONN_ID) + queue_name = params.get('queue_name', DEFAULT_QUEUE_NAME) + progress_queue = f"{queue_name}_progress" + client = _get_redis_client(redis_conn_id) + + progress_data = { + 'status': 'in_progress', + 'start_time': time.time(), + 'dag_run_id': context['dag_run'].run_id, + 'hostname': socket.gethostname(), + } + client.hset(progress_queue, url_to_process, json.dumps(progress_data)) + logger.info(f"Marked URL '{url_to_process}' as in-progress.") + except Exception as e: + logger.error(f"Could not mark URL as in-progress in Redis: {e}", exc_info=True) + + # Account assignment logic is the same as before. + account_id = random.choice(_get_account_pool(params)) + logger.info(f"Selected account '{account_id}' for this run.") + + return { + 'url_to_process': url_to_process, + 'account_id': account_id, + 'accounts_tried': [account_id], + } + +@task +def get_token(initial_data: dict, **context): + """Makes a single attempt to get a token from the Thrift service.""" + ti = context['task_instance'] + params = context['params'] + + account_id = initial_data['account_id'] + url = initial_data['url_to_process'] + info_json_dir = Variable.get('DOWNLOADS_TEMP', '/opt/airflow/downloadfiles') + + host, port, timeout = params['service_ip'], int(params['service_port']), int(params.get('timeout', DEFAULT_TIMEOUT)) + machine_id = params.get('machine_id') or socket.gethostname() + clients = params.get('clients') + request_params_json = params.get('request_params_json', '{}') + assigned_proxy_url = params.get('assigned_proxy_url') + + # Pretty-print the request parameters for debugging + try: + pretty_request_params = json.dumps(json.loads(request_params_json), indent=2) + logger.info(f"\n--- Request Parameters ---\n{pretty_request_params}\n--- End of Request Parameters ---") + except (json.JSONDecodeError, TypeError): + logger.warning("Could not parse request_params_json. Using raw content.") + logger.info(f"\n--- Raw Request Parameters ---\n{request_params_json}\n--- End of Raw Request Parameters ---") + + # Construct Airflow log context to pass to the service + try: + from airflow.configuration import conf + remote_base = conf.get('logging', 'remote_base_log_folder') + log_path = ( + f"{remote_base}/dag_id={ti.dag_id}/run_id={ti.run_id}/" + f"task_id={ti.task_id}/attempt={ti.try_number}.log" + ) + airflow_log_context = AirflowLogContext( + logS3Path=log_path, + dagId=ti.dag_id, + runId=ti.run_id, + taskId=ti.task_id, + tryNumber=ti.try_number, + workerHostname=socket.gethostname(), + queue=ti.queue + ) + logger.info(f"Constructed Airflow log context for yt-ops service: {airflow_log_context}") + except Exception as e: + logger.warning(f"Could not construct full Airflow log context: {e}. Creating a basic one.") + airflow_log_context = AirflowLogContext( + dagId=ti.dag_id, + runId=ti.run_id, + taskId=ti.task_id, + tryNumber=ti.try_number, + workerHostname=socket.gethostname(), + queue=ti.queue + ) + + logger.info(f"--- Attempting to get token for URL '{url}' with account '{account_id}' (Clients: {clients}) ---") + client, transport = None, None + try: + client, transport = _get_thrift_client(host, port, timeout) + token_data = client.getOrRefreshToken( + accountId=account_id, + updateType=TokenUpdateMode.AUTO, + url=url, + clients=clients, + machineId=machine_id, + airflowLogContext=airflow_log_context, + requestParamsJson=request_params_json, + assignedProxyUrl=assigned_proxy_url + ) + + # Log a compact summary of the Thrift response, omitting large/detailed fields. + summary_token_data = copy(token_data) + if hasattr(summary_token_data, 'infoJson') and summary_token_data.infoJson: + summary_token_data.infoJson = f"... ({len(summary_token_data.infoJson)} bytes) ..." + if hasattr(summary_token_data, 'cookiesBlob') and summary_token_data.cookiesBlob: + summary_token_data.cookiesBlob = f"... ({len(summary_token_data.cookiesBlob)} bytes) ..." + # These will be logged separately below. + if hasattr(summary_token_data, 'requestSummary'): + summary_token_data.requestSummary = "..." + if hasattr(summary_token_data, 'communicationLogs'): + summary_token_data.communicationLogs = "..." + logger.info(f"Thrift service response summary: {summary_token_data}") + + request_summary = getattr(token_data, 'requestSummary', None) + if request_summary: + # Prepending a newline for better separation in logs. + logger.info(f"\n--- Request Summary ---\n{request_summary}") + + communication_logs = getattr(token_data, 'communicationLogs', None) + if communication_logs: + logger.info("--- Communication Logs from Token Service ---") + logger.info(communication_logs) + logger.info("--- End of Communication Logs ---") + + info_json = getattr(token_data, 'infoJson', None) + if not (info_json and json.loads(info_json)): + raise AirflowException("Service returned success but info.json was empty or invalid.") + + video_id = _extract_video_id(url) + os.makedirs(info_json_dir, exist_ok=True) + # Use a readable timestamp for a unique filename on each attempt. + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + info_json_path = os.path.join(info_json_dir, f"info_{video_id or 'unknown'}_{account_id}_{timestamp}.json") + with open(info_json_path, 'w', encoding='utf-8') as f: + f.write(info_json) + + proxy_attr = next((attr for attr in ['socks5Proxy', 'socksProxy', 'socks'] if hasattr(token_data, attr)), None) + ytdlp_command = getattr(token_data, 'ytdlpCommand', None) + if ytdlp_command: + logger.info(f"--- YTDLP Command from Token Service ---\n{ytdlp_command}\n--- End of YTDLP Command ---") + + return { + 'info_json_path': info_json_path, + 'socks_proxy': getattr(token_data, proxy_attr) if proxy_attr else None, + 'ytdlp_command': ytdlp_command, + 'successful_account_id': account_id, + 'original_url': url, # Include original URL for fallback + } + except (PBServiceException, PBUserException, TTransportException) as e: + error_context = getattr(e, 'context', None) + if isinstance(error_context, str): + try: error_context = json.loads(error_context.replace("'", "\"")) + except: pass + + error_details = { + 'error_message': getattr(e, 'message', str(e)), + 'error_code': getattr(e, 'errorCode', 'TRANSPORT_ERROR'), + 'proxy_url': error_context.get('proxy_url') if isinstance(error_context, dict) else None + } + logger.error(f"Thrift call failed for account '{account_id}'. Details: {error_details}") + ti.xcom_push(key='error_details', value=error_details) + raise AirflowException(f"Thrift call failed: {error_details['error_message']}") + finally: + if transport and transport.isOpen(): + transport.close() + +@task.branch +def handle_bannable_error_branch(task_id_to_check: str, **context): + """ + Inspects a failed task and routes to retry logic if the error is retryable. + Routes to a fatal error handler for non-retryable infrastructure issues. + """ + ti = context['task_instance'] + params = context['params'] + error_details = ti.xcom_pull(task_ids=task_id_to_check, key='error_details') + if not error_details: + logger.error(f"Task {task_id_to_check} failed without error details. Marking as fatal.") + return 'handle_fatal_error' + + error_message = error_details.get('error_message', '').strip() + error_code = error_details.get('error_code', '').strip() + policy = params.get('on_auth_failure', 'retry_with_new_account') + + # Check if this is an age confirmation error - should not stop the loop + if "Sign in to confirm your age" in error_message or "confirm your age" in error_message.lower(): + logger.info(f"Age confirmation error detected for '{task_id_to_check}'. This is a content restriction, not a bot detection issue.") + return 'handle_age_restriction_error' + + # Fatal Thrift connection errors that should stop all processing. + if error_code == 'TRANSPORT_ERROR': + logger.error(f"Fatal Thrift connection error from '{task_id_to_check}'. Stopping processing.") + return 'handle_fatal_error' + + # Service-side connection errors that are potentially retryable. + connection_errors = ['SOCKS5_CONNECTION_FAILED', 'SOCKET_TIMEOUT', 'CAMOUFOX_TIMEOUT'] + if error_code in connection_errors: + logger.info(f"Handling connection error '{error_code}' from '{task_id_to_check}'. Policy: '{policy}'") + if policy == 'stop_loop': + logger.warning(f"Connection error with 'stop_loop' policy. Marking as fatal.") + return 'handle_fatal_error' + elif policy == 'retry_without_ban': + logger.info("Retrying with a new account without banning.") + return 'assign_new_account_for_direct_retry' + else: # 'retry_with_new_account' and 'proceed_loop_under_manual_inspection' should also retry without ban on connection error + logger.info(f"Connection error with policy '{policy}'. Retrying with a new account without banning.") + return 'assign_new_account_for_direct_retry' + + # Bannable errors (e.g., bot detection) that can be retried with a new account. + is_bannable = error_code in ["BOT_DETECTED", "BOT_DETECTION_SIGN_IN_REQUIRED"] + logger.info(f"Handling failure from '{task_id_to_check}'. Error code: '{error_code}', Policy: '{policy}'") + if is_bannable: + if policy == 'retry_with_new_account': + return 'ban_account_and_prepare_for_retry' + if policy == 'retry_without_ban': + return 'assign_new_account_for_direct_retry' + if policy == 'stop_loop': + return 'ban_and_report_immediately' + if policy == 'proceed_loop_under_manual_inspection': + logger.warning(f"Bannable error with 'proceed_loop_under_manual_inspection' policy. Reporting failure and continuing loop. MANUAL INTERVENTION IS LIKELY REQUIRED.") + return 'report_bannable_and_continue' + + # Any other error is considered fatal for this run. + logger.error(f"Unhandled or non-retryable error '{error_code}' from '{task_id_to_check}'. Marking as fatal.") + return 'handle_fatal_error' + +@task_group(group_id='ban_and_retry_logic') +def ban_and_retry_logic(initial_data: dict): + """ + Task group that checks for sliding window failures before banning an account. + If the account meets ban criteria, it's banned. Otherwise, the ban is skipped + but the retry proceeds. + """ + + @task.branch + def check_sliding_window_for_ban(data: dict, **context): + """ + Checks Redis for recent failures. If thresholds are met, proceeds to ban. + Otherwise, proceeds to a dummy task to allow retry without ban. + """ + params = context['params'] + account_id = data['account_id'] + redis_conn_id = params.get('redis_conn_id', DEFAULT_REDIS_CONN_ID) + + # These thresholds should ideally be Airflow Variables to be configurable + failure_window_seconds = 3600 # 1 hour + failure_threshold_count = 5 + failure_threshold_unique_proxies = 3 + + try: + redis_client = _get_redis_client(redis_conn_id) + failure_key = f"account_failures:{account_id}" + now = time.time() + window_start = now - failure_window_seconds + + # 1. Remove old failures and get recent ones + redis_client.zremrangebyscore(failure_key, '-inf', window_start) + recent_failures = redis_client.zrange(failure_key, 0, -1) + + if len(recent_failures) >= failure_threshold_count: + # Decode from bytes to string for processing + recent_failures_str = [f.decode('utf-8') for f in recent_failures] + # Failure format is "context:job_id:timestamp" + unique_proxies = {f.split(':')[0] for f in recent_failures_str} + + if len(unique_proxies) >= failure_threshold_unique_proxies: + logger.warning( + f"Account {account_id} has failed {len(recent_failures)} times " + f"with {len(unique_proxies)} unique contexts in the last hour. Proceeding to ban." + ) + return 'ban_account_task' + else: + logger.info( + f"Account {account_id} has {len(recent_failures)} failures, but only " + f"from {len(unique_proxies)} unique contexts (threshold is {failure_threshold_unique_proxies}). Skipping ban." + ) + else: + logger.info(f"Account {account_id} has {len(recent_failures)} failures (threshold is {failure_threshold_count}). Skipping ban.") + + except Exception as e: + logger.error(f"Error during sliding window check for account {account_id}: {e}. Skipping ban as a precaution.", exc_info=True) + + return 'skip_ban_task' + + @task(task_id='ban_account_task') + def ban_account_task(data: dict, **context): + """Wrapper task to call the main ban_account function.""" + ban_account(initial_data=data, reason="Banned by Airflow worker after sliding window check", **context) + + @task(task_id='skip_ban_task') + def skip_ban_task(): + """Dummy task to represent the 'skip ban' path.""" + pass + + check_task = check_sliding_window_for_ban(data=initial_data) + ban_task_in_group = ban_account_task(data=initial_data) + skip_task = skip_ban_task() + + check_task >> [ban_task_in_group, skip_task] + + +@task +def ban_account(initial_data: dict, reason: str, **context): + """Bans a single account via the Thrift service.""" + params = context['params'] + account_id = initial_data['account_id'] + client, transport = None, None + try: + host, port, timeout = params['service_ip'], int(params['service_port']), int(params.get('timeout', DEFAULT_TIMEOUT)) + client, transport = _get_thrift_client(host, port, timeout) + logger.warning(f"Banning account '{account_id}'. Reason: {reason}") + client.banAccount(accountId=account_id, reason=reason) + except Exception as e: + logger.error(f"Failed to issue ban for account '{account_id}': {e}", exc_info=True) + finally: + if transport and transport.isOpen(): + transport.close() + +@task +def assign_new_account_for_direct_retry(initial_data: dict, **context): + """Selects a new, unused account for a direct retry (e.g., after connection error).""" + params = context['params'] + accounts_tried = initial_data['accounts_tried'] + account_pool = _get_account_pool(params) + available_for_retry = [acc for acc in account_pool if acc not in accounts_tried] + if not available_for_retry: + raise AirflowException("No other accounts available in the pool for a retry.") + + new_account_id = random.choice(available_for_retry) + accounts_tried.append(new_account_id) + logger.info(f"Selected new account for retry: '{new_account_id}'") + + # Return updated initial_data with new account + return { + 'url_to_process': initial_data['url_to_process'], + 'account_id': new_account_id, + 'accounts_tried': accounts_tried, + } + +@task +def assign_new_account_after_ban_check(initial_data: dict, **context): + """Selects a new, unused account for the retry attempt after a ban check.""" + params = context['params'] + accounts_tried = initial_data['accounts_tried'] + account_pool = _get_account_pool(params) + available_for_retry = [acc for acc in account_pool if acc not in accounts_tried] + if not available_for_retry: + raise AirflowException("No other accounts available in the pool for a retry.") + + new_account_id = random.choice(available_for_retry) + accounts_tried.append(new_account_id) + logger.info(f"Selected new account for retry: '{new_account_id}'") + + # Return updated initial_data with new account + return { + 'url_to_process': initial_data['url_to_process'], + 'account_id': new_account_id, + 'accounts_tried': accounts_tried, + } + +@task +def ban_and_report_immediately(initial_data: dict, reason: str, **context): + """Bans an account and prepares for failure reporting and continuing the loop.""" + ban_account(initial_data, reason, **context) + logger.info(f"Account '{initial_data.get('account_id')}' banned. Proceeding to report failure.") + # This task is a leaf in its path and is followed by the failure reporting task. + return initial_data # Pass data along if needed by reporting + +@task +def list_available_formats(token_data: dict, **context): + """ + Lists available formats for the given video using the info.json. + This is for debugging and informational purposes. + """ + import subprocess + import shlex + + info_json_path = token_data.get('info_json_path') + if not (info_json_path and os.path.exists(info_json_path)): + logger.warning(f"Cannot list formats: info.json path is missing or file does not exist ({info_json_path}).") + return [] + + try: + cmd = [ + 'yt-dlp', + '--verbose', + '--list-formats', + '--load-info-json', info_json_path, + ] + + copy_paste_cmd = ' '.join(shlex.quote(arg) for arg in cmd) + logger.info(f"Executing yt-dlp command to list formats: {copy_paste_cmd}") + + process = subprocess.run(cmd, capture_output=True, text=True, timeout=60) + + if process.stderr: + logger.info(f"yt-dlp --list-formats STDERR:\n{process.stderr}") + + if process.returncode != 0: + logger.error(f"yt-dlp --list-formats failed with exit code {process.returncode}") + + available_formats = [] + if process.stdout: + logger.info(f"--- Available Formats ---\n{process.stdout}\n--- End of Formats ---") + # Parse the output to get format IDs + lines = process.stdout.split('\n') + header_found = False + for line in lines: + if line.startswith('ID '): + header_found = True + continue + if header_found and line.strip() and line.strip()[0].isdigit(): + format_id = line.split()[0] + available_formats.append(format_id) + logger.info(f"Parsed available format IDs: {available_formats}") + + return available_formats + + except Exception as e: + logger.error(f"An error occurred while trying to list formats: {e}", exc_info=True) + return [] + + +@task +def download_and_probe(token_data: dict, available_formats: list[str], **context): + """ + Uses retrieved token data to download and probe media files. + Supports parallel downloading of specific, comma-separated format IDs. + If probing fails, retries downloading only the failed files. + """ + import subprocess + import shlex + import concurrent.futures + + try: + params = context['params'] + info_json_path = token_data.get('info_json_path') + proxy = token_data.get('socks_proxy') + original_url = token_data.get('original_url') + download_dir = Variable.get('DOWNLOADS_TEMP', '/opt/airflow/downloadfiles/video') + + format_preset = params.get('download_format_preset', 'best_audio') + if format_preset == 'custom': + download_format = params.get('download_format_custom') + if not download_format: + raise AirflowException("Format preset is 'custom' but no custom format string was provided.") + elif format_preset == 'best_audio': + download_format = 'ba[ext=m4a]/bestaudio/best' + elif format_preset == 'formats_0': + download_format = '18,140' + elif format_preset == 'formats_2': + download_format = '18,140,299/298/137/136/135/134/133' + elif format_preset == 'formats_3': + download_format = '18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318' + else: + download_format = 'ba[ext=m4a]/bestaudio/best' + + output_template = params.get('output_path_template', "%(title)s [%(id)s].f%(format_id)s.%(ext)s") + full_output_path = os.path.join(download_dir, output_template) + retry_on_probe_failure = params.get('retry_on_probe_failure', False) + + if not (info_json_path and os.path.exists(info_json_path)): + raise AirflowException(f"Error: info.json path is missing or file does not exist ({info_json_path}).") + + def run_yt_dlp_command(format_selector: str): + """Constructs and runs a yt-dlp command, returning a list of final filenames.""" + cmd = [ + 'yt-dlp', '--verbose', '--print-traffic', '--load-info-json', info_json_path, + '-f', format_selector, '-o', full_output_path, + '--print', 'filename', '--continue', '--no-progress', '--no-simulate', + '--no-write-info-json', '--ignore-errors', '--no-playlist', + ] + + if params.get('fragment_retries'): + cmd.extend(['--fragment-retries', str(params['fragment_retries'])]) + if params.get('limit_rate'): + cmd.extend(['--limit-rate', params['limit_rate']]) + if params.get('socket_timeout'): + cmd.extend(['--socket-timeout', str(params['socket_timeout'])]) + if params.get('min_sleep_interval'): + cmd.extend(['--min-sleep-interval', str(params['min_sleep_interval'])]) + if params.get('max_sleep_interval'): + cmd.extend(['--max-sleep-interval', str(params['max_sleep_interval'])]) + if params.get('yt_dlp_test_mode'): + cmd.append('--test') + + downloader = params.get('downloader', 'default') + if proxy and not (downloader == 'aria2c' and proxy.startswith('socks5://')): + cmd.extend(['--proxy', proxy]) + + gost_process = None + try: + if downloader == 'aria2c': + cmd.extend(['--downloader', 'aria2c']) + downloader_args = params.get('downloader_args_aria2c') + if proxy and proxy.startswith('socks5://'): + import socket + from contextlib import closing + def find_free_port(): + with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: + s.bind(('', 0)) + s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + return s.getsockname()[1] + local_port = find_free_port() + http_proxy = f"http://127.0.0.1:{local_port}" + logger.info(f"Starting gost for format '{format_selector}' to forward {proxy} to {http_proxy}") + gost_cmd = ['gost', '-L', f'http://127.0.0.1:{local_port}', '-F', proxy] + gost_process = subprocess.Popen(gost_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + time.sleep(1) + if gost_process.poll() is not None: + stdout, stderr = gost_process.communicate() + logger.error(f"gost failed to start. Exit: {gost_process.returncode}. Stdout: {stdout.decode()}. Stderr: {stderr.decode()}") + raise AirflowException("gost proxy tunnel failed to start.") + user_args = downloader_args[len('aria2c:'):] if downloader_args and downloader_args.startswith('aria2c:') else (downloader_args or "") + final_args_str = f'aria2c:{user_args.strip()} --http-proxy={http_proxy}' + cmd.extend(['--downloader-args', final_args_str]) + elif downloader_args: + cmd.extend(['--downloader-args', downloader_args]) + + extra_args = params.get('yt_dlp_extra_args') + if extra_args: + cmd.extend(shlex.split(extra_args)) + if original_url: + cmd.append(original_url) + + copy_paste_cmd = ' '.join(shlex.quote(arg) for arg in cmd) + logger.info(f"Executing yt-dlp command for format '{format_selector}': {copy_paste_cmd}") + process = subprocess.run(cmd, capture_output=True, text=True, timeout=3600) + + if process.stdout: + logger.info(f"yt-dlp STDOUT for format '{format_selector}':\n{process.stdout}") + if process.stderr: + # yt-dlp often prints progress and informational messages to stderr + logger.info(f"yt-dlp STDERR for format '{format_selector}':\n{process.stderr}") + + if process.returncode != 0: + logger.error(f"yt-dlp failed for format '{format_selector}' with exit code {process.returncode}") + # STDOUT and STDERR are already logged above. + raise AirflowException(f"yt-dlp command failed for format '{format_selector}'. {process.stderr}") + + # In test mode, files are not created, so we only check that yt-dlp returned filenames. + # Otherwise, we verify that the files actually exist on disk. + output_files = [f for f in process.stdout.strip().split('\n') if f] + if not params.get('yt_dlp_test_mode'): + output_files = [f for f in output_files if os.path.exists(f)] + + if not output_files: + log_msg = (f"Test run for format '{format_selector}' did not produce any filenames." + if params.get('yt_dlp_test_mode') else + f"Download for format '{format_selector}' finished but no output files exist.") + exc_msg = (f"Test run for format '{format_selector}' did not produce any filenames." + if params.get('yt_dlp_test_mode') else + f"Download for format '{format_selector}' did not produce a file.") + + logger.error(log_msg) + logger.error(f"Full STDOUT:\n{process.stdout}") + logger.error(f"Full STDERR:\n{process.stderr}") + raise AirflowException(exc_msg) + + log_prefix = "SUCCESS (Test Mode):" if params.get('yt_dlp_test_mode') else "SUCCESS:" + logger.info(f"{log_prefix} Command for format '{format_selector}' complete. Files: {output_files}") + return output_files + finally: + if gost_process: + logger.info(f"Terminating gost process (PID: {gost_process.pid}) for format '{format_selector}'.") + gost_process.terminate() + try: + gost_process.wait(timeout=5) + except subprocess.TimeoutExpired: + gost_process.kill() + gost_process.wait() + + def run_ffmpeg_probe(filename): + """Probes a file with ffmpeg to check for corruption.""" + logger.info(f"Probing downloaded file: {filename}") + try: + subprocess.run(['ffmpeg', '-v', 'error', '-i', filename, '-f', 'null', '-'], check=True, capture_output=True, text=True) + logger.info(f"SUCCESS: Probe confirmed valid media file: {filename}") + except subprocess.CalledProcessError as e: + logger.error(f"ffmpeg probe failed for '{filename}'. File may be corrupt.") + logger.error(f"ffmpeg STDERR: {e.stderr}") + raise AirflowException(f"ffmpeg probe failed for {filename}.") + + def _download_and_probe_formats(formats_to_process: list[str] | str): + """ + Helper to download a list of format IDs (or a single complex selector) and probe the results. + Returns a tuple of (successful_files, failed_probe_files). + """ + all_downloaded_files = [] + delay_between_formats = params.get('delay_between_formats_s', 0) + + if isinstance(formats_to_process, list) and formats_to_process: + logger.info(f"Downloading {len(formats_to_process)} format(s) sequentially: {formats_to_process}") + for i, fid in enumerate(formats_to_process): + all_downloaded_files.extend(run_yt_dlp_command(fid)) + if delay_between_formats > 0 and i < len(formats_to_process) - 1: + logger.info(f"Waiting {delay_between_formats}s before next format download...") + time.sleep(delay_between_formats) + + elif isinstance(formats_to_process, str): + logger.info(f"Using complex format selector '{formats_to_process}'. Running as a single command.") + all_downloaded_files = run_yt_dlp_command(formats_to_process) + + if not all_downloaded_files: + logger.warning("Download process completed but produced no files.") + return [], [] + + if params.get('yt_dlp_test_mode'): + logger.info("Test mode is enabled. Skipping probe of output files.") + return all_downloaded_files, [] + + if params.get('skip_probe'): + logger.info("Skipping probe of output files as per configuration.") + return all_downloaded_files, [] + + successful_probes, failed_probes = [], [] + logger.info(f"Probing {len(all_downloaded_files)} downloaded file(s) sequentially...") + for filename in all_downloaded_files: + try: + run_ffmpeg_probe(filename) + successful_probes.append(filename) + except Exception: + failed_probes.append(filename) + + return successful_probes, failed_probes + + # --- Main Execution Logic --- + with open(info_json_path, 'r', encoding='utf-8') as f: + info = json.load(f) + + # Split the format string by commas to get a list of individual format selectors. + # This enables parallel downloads of different formats or format groups. + # For example, '18,140,299/298' becomes ['18', '140', '299/298'], + # and each item will be downloaded in a separate yt-dlp process. + if download_format and isinstance(download_format, str): + formats_to_download_initial = [selector.strip() for selector in download_format.split(',') if selector.strip()] + else: + # Fallback for safety, though download_format should always be a string. + formats_to_download_initial = [] + + if not formats_to_download_initial: + raise AirflowException("No valid download format selectors were found after parsing.") + + # --- Filter requested formats against available formats --- + final_formats_to_download = [] + if not available_formats: + logger.warning("List of available formats is empty. Will attempt to download all requested formats without validation.") + final_formats_to_download = formats_to_download_initial + else: + for selector in formats_to_download_initial: + # A selector can be '140' or '299/298/137' + individual_ids = re.split(r'[/+]', selector) + if any(fid in available_formats for fid in individual_ids): + final_formats_to_download.append(selector) + else: + logger.warning(f"Requested format selector '{selector}' contains no available formats. Skipping.") + + if not final_formats_to_download: + raise AirflowException("None of the requested formats are available for this video.") + + # --- Initial Download and Probe --- + successful_files, failed_files = _download_and_probe_formats(final_formats_to_download) + + if params.get('yt_dlp_test_mode'): + logger.info(f"Test mode: yt-dlp returned {len(successful_files)} filenames. Skipping probe failure checks.") + if not successful_files: + raise AirflowException("Test run did not produce any filenames.") + return successful_files + + if not failed_files: + if not successful_files: + raise AirflowException("Download and probe process completed but produced no valid files.") + return successful_files + + # --- Handle Probe Failures and Retry --- + if not retry_on_probe_failure: + raise AirflowException(f"Probe failed for {len(failed_files)} file(s) and retry is disabled: {failed_files}") + + logger.warning(f"Probe failed for {len(failed_files)} file(s). Attempting one re-download for failed files...") + + format_ids_to_retry = [] + # Since each download is now for a specific selector and the output template + # includes the format_id, we can always attempt to extract the format_id + # from the failed filename for a targeted retry. + for f in failed_files: + match = re.search(r'\.f([\d]+)\.', f) + if match: + format_ids_to_retry.append(match.group(1)) + else: + logger.error(f"Could not extract format_id from failed file '{f}'. Cannot retry this specific file.") + formats_to_download_retry = format_ids_to_retry + + if not formats_to_download_retry: + raise AirflowException("Probe failed, but could not determine which formats to retry.") + + # Rename failed files to allow for a fresh download attempt + for f in failed_files: + try: + failed_path = f"{f}.probe_failed_{int(time.time())}" + os.rename(f, failed_path) + logger.info(f"Renamed corrupted file to {failed_path}") + except OSError as rename_err: + logger.error(f"Could not rename corrupted file '{f}': {rename_err}") + + # --- Retry Download and Probe --- + retried_successful_files, retried_failed_files = _download_and_probe_formats(formats_to_download_retry) + + if retried_failed_files: + logger.error(f"Probe failed again for {len(retried_failed_files)} file(s) after retry: {retried_failed_files}") + + final_success_list = successful_files + retried_successful_files + if not final_success_list: + raise AirflowException("All files failed to download or probe correctly, even after retry.") + + logger.info(f"Retry complete. Final success count: {len(final_success_list)} file(s).") + + if params.get('yt_dlp_cleanup_mode', True): + logger.info(f"Cleanup mode is enabled. Creating .empty files and deleting originals for {len(final_success_list)} files.") + for f in final_success_list: + try: + empty_file_path = f"{f}.empty" + with open(empty_file_path, 'w') as fp: + pass # create empty file + logger.info(f"Created empty file: {empty_file_path}") + os.remove(f) + logger.info(f"Deleted original file: {f}") + except Exception as e: + logger.error(f"Error during cleanup for file {f}: {e}", exc_info=True) + # Do not fail the task for a cleanup error, just log it. + + return final_success_list + except Exception as e: + if 'HTTP Error 403: Forbidden' in str(e): + logger.warning("Detected 'HTTP Error 403: Forbidden' in download error. Pushing details to XCom for branching.") + ti = context['task_instance'] + ti.xcom_push(key='download_error_details', value={'error_code': 'HTTP_403_FORBIDDEN', 'error_message': str(e)}) + raise AirflowException(f"Download and probe failed: {e}") from e + +@task +def mark_url_as_success(initial_data: dict, downloaded_file_paths: list, token_data: dict, **context): + """Records the successful result in Redis.""" + params = context['params'] + url = initial_data['url_to_process'] + result_data = { + 'status': 'success', 'end_time': time.time(), 'url': url, + 'downloaded_file_paths': downloaded_file_paths, **token_data, + 'dag_run_id': context['dag_run'].run_id, + } + client = _get_redis_client(params['redis_conn_id']) + + # Update activity counters + try: + proxy_url = token_data.get('socks_proxy') + account_id = token_data.get('successful_account_id') + now = time.time() + # Use a unique member to prevent collisions, e.g., dag_run_id + member = context['dag_run'].run_id + + if proxy_url: + proxy_key = f"activity:per_proxy:{proxy_url}" + client.zadd(proxy_key, {member: now}) + client.expire(proxy_key, 3600 * 2) # Expire after 2 hours + if account_id: + account_key = f"activity:per_account:{account_id}" + client.zadd(account_key, {member: now}) + client.expire(account_key, 3600 * 2) # Expire after 2 hours + except Exception as e: + logger.error(f"Could not update activity counters: {e}", exc_info=True) + + # Update client-specific stats + try: + machine_id = params.get('machine_id') or socket.gethostname() + _update_client_stats(client, params.get('clients', ''), 'success', url, machine_id, context['dag_run'].run_id) + except Exception as e: + logger.error(f"Could not update client stats on success: {e}", exc_info=True) + + progress_queue = f"{params['queue_name']}_progress" + result_queue = f"{params['queue_name']}_result" + + with client.pipeline() as pipe: + pipe.hset(result_queue, url, json.dumps(result_data)) + pipe.hdel(progress_queue, url) + pipe.execute() + + logger.info(f"Stored success result for URL '{url}' and removed from progress queue.") + +@task(trigger_rule='one_failed') +def report_failure_and_stop(**context): + """ + Handles a failed URL processing attempt by recording a detailed error report to Redis + and stopping the worker loop. + """ + params = context['params'] + ti = context['task_instance'] + url = params.get('url_to_process', 'unknown') + + # Collect error details from XCom + error_details = {} + + # Check for error details from get_token tasks + first_token_task_id = 'get_token' + retry_token_task_id = 'retry_get_token' + + first_token_error = ti.xcom_pull(task_ids=first_token_task_id, key='error_details') + retry_token_error = ti.xcom_pull(task_ids=retry_token_task_id, key='error_details') + + # Use the most recent error details + if retry_token_error: + error_details = retry_token_error + elif first_token_error: + error_details = first_token_error + else: + pass + + logger.error(f"A failure occurred while processing URL '{url}'. Reporting to Redis and stopping loop.") + + result_data = { + 'status': 'failed', + 'end_time': time.time(), + 'url': url, + 'dag_run_id': context['dag_run'].run_id, + 'error_details': error_details + } + + try: + client = _get_redis_client(params['redis_conn_id']) + + # Update client-specific stats + try: + machine_id = params.get('machine_id') or socket.gethostname() + _update_client_stats(client, params.get('clients', ''), 'failure', url, machine_id, context['dag_run'].run_id) + except Exception as e: + logger.error(f"Could not update client stats on failure: {e}", exc_info=True) + + result_queue = f"{params['queue_name']}_result" + fail_queue = f"{params['queue_name']}_fail" + + progress_queue = f"{params['queue_name']}_progress" + + with client.pipeline() as pipe: + pipe.hset(result_queue, url, json.dumps(result_data)) + pipe.hset(fail_queue, url, json.dumps(result_data)) + pipe.hdel(progress_queue, url) + pipe.execute() + + logger.info(f"Stored failure result for URL '{url}' in '{result_queue}' and '{fail_queue}' and removed from progress queue.") + except Exception as e: + logger.error(f"Could not report failure to Redis: {e}", exc_info=True) + + +@task(trigger_rule='one_failed') +def report_failure_and_continue(**context): + """ + Handles a failed URL processing attempt by recording a detailed error report to Redis. + This is a common endpoint for various failure paths that should not stop the overall dispatcher loop. + """ + params = context['params'] + ti = context['task_instance'] + url = params.get('url_to_process', 'unknown') + + # Collect error details from XCom + error_details = {} + + # Check for error details from get_token tasks + first_token_task_id = 'get_token' + retry_token_task_id = 'retry_get_token' + + first_token_error = ti.xcom_pull(task_ids=first_token_task_id, key='error_details') + retry_token_error = ti.xcom_pull(task_ids=retry_token_task_id, key='error_details') + + # Use the most recent error details + if retry_token_error: + error_details = retry_token_error + elif first_token_error: + error_details = first_token_error + else: + # Check for other possible error sources + # This is a simplified approach - in a real implementation you might want to + # check more task IDs or use a more sophisticated error collection mechanism + pass + + logger.error(f"A failure occurred while processing URL '{url}'. Reporting to Redis.") + + result_data = { + 'status': 'failed', + 'end_time': time.time(), + 'url': url, + 'dag_run_id': context['dag_run'].run_id, + 'error_details': error_details + } + + try: + client = _get_redis_client(params['redis_conn_id']) + + # Update client-specific stats + try: + machine_id = params.get('machine_id') or socket.gethostname() + _update_client_stats(client, params.get('clients', ''), 'failure', url, machine_id, context['dag_run'].run_id) + except Exception as e: + logger.error(f"Could not update client stats on failure: {e}", exc_info=True) + + result_queue = f"{params['queue_name']}_result" + fail_queue = f"{params['queue_name']}_fail" + + progress_queue = f"{params['queue_name']}_progress" + + with client.pipeline() as pipe: + pipe.hset(result_queue, url, json.dumps(result_data)) + pipe.hset(fail_queue, url, json.dumps(result_data)) + pipe.hdel(progress_queue, url) + pipe.execute() + + logger.info(f"Stored failure result for URL '{url}' in '{result_queue}' and '{fail_queue}' and removed from progress queue.") + except Exception as e: + logger.error(f"Could not report failure to Redis: {e}", exc_info=True) + + +@task(trigger_rule='one_failed') +def handle_fatal_error(**context): + """ + Handles fatal, non-retryable errors (e.g., infrastructure issues). + This task reports the failure to Redis before failing the DAG run to ensure + failed URLs are queued for later reprocessing, then stops the processing loop. + """ + params = context['params'] + ti = context['task_instance'] + url = params.get('url_to_process', 'unknown') + + # Collect error details + error_details = {} + first_token_task_id = 'get_token' + retry_token_task_id = 'retry_get_token' + + first_token_error = ti.xcom_pull(task_ids=first_token_task_id, key='error_details') + retry_token_error = ti.xcom_pull(task_ids=retry_token_task_id, key='error_details') + + # Use the most recent error details + if retry_token_error: + error_details = retry_token_error + elif first_token_error: + error_details = first_token_error + + logger.error(f"A fatal, non-retryable error occurred for URL '{url}'. See previous task logs for details.") + + # Report failure to Redis so the URL can be reprocessed later + try: + client = _get_redis_client(params['redis_conn_id']) + + # Update client-specific stats + try: + machine_id = params.get('machine_id') or socket.gethostname() + _update_client_stats(client, params.get('clients', ''), 'failure', url, machine_id, context['dag_run'].run_id) + except Exception as e: + logger.error(f"Could not update client stats on fatal error: {e}", exc_info=True) + + result_data = { + 'status': 'failed', + 'end_time': time.time(), + 'url': url, + 'dag_run_id': context['dag_run'].run_id, + 'error': 'fatal_error', + 'error_message': 'Fatal non-retryable error occurred', + 'error_details': error_details + } + result_queue = f"{params['queue_name']}_result" + fail_queue = f"{params['queue_name']}_fail" + + progress_queue = f"{params['queue_name']}_progress" + + with client.pipeline() as pipe: + pipe.hset(result_queue, url, json.dumps(result_data)) + pipe.hset(fail_queue, url, json.dumps(result_data)) + pipe.hdel(progress_queue, url) + pipe.execute() + + logger.info(f"Stored fatal error result for URL '{url}' in '{result_queue}' and '{fail_queue}' for later reprocessing.") + except Exception as e: + logger.error(f"Could not report fatal error to Redis: {e}", exc_info=True) + + # Fail the DAG run to prevent automatic continuation of the processing loop + raise AirflowException("Failing DAG due to fatal error. The dispatcher loop will stop.") + + +@task(trigger_rule='one_success') +def continue_processing_loop(**context): + """ + After a successful run, triggers a new dispatcher to continue the processing loop, + effectively asking for the next URL to be processed. + """ + params = context['params'] + dag_run = context['dag_run'] + + # Do not continue the loop for manual runs of the worker DAG. + # A worker DAG triggered by the dispatcher will have a run_id starting with 'worker_run_'. + if not dag_run.run_id.startswith('worker_run_'): + logger.info(f"DAG run '{dag_run.run_id}' does not appear to be triggered by the dispatcher. Stopping processing loop.") + return + + # Create a new unique run_id for the dispatcher. + # Using a timestamp and UUID ensures the ID is unique and does not grow in length over time, + # preventing database errors. + new_dispatcher_run_id = f"retriggered_by_worker_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{str(uuid.uuid4())[:8]}" + + # Pass all original parameters from the orchestrator through to the new dispatcher run. + conf_to_pass = {k: v for k, v in params.items() if v is not None} + + # The new dispatcher will pull its own URL and determine its own queue, so we don't pass these. + conf_to_pass.pop('url_to_process', None) + conf_to_pass.pop('worker_queue', None) + + logger.info(f"Worker finished successfully. Triggering a new dispatcher ('{new_dispatcher_run_id}') to continue the loop.") + trigger_dag( + dag_id='ytdlp_ops_v01_dispatcher', + run_id=new_dispatcher_run_id, + conf=conf_to_pass, + replace_microseconds=False + ) + + +@task.branch(trigger_rule='one_failed') +def handle_retry_failure_branch(task_id_to_check: str, **context): + """ + Inspects a failed retry attempt and decides on the final action. + On retry, most errors are considered fatal for the URL, but not for the system. + """ + ti = context['task_instance'] + params = context['params'] + error_details = ti.xcom_pull(task_ids=task_id_to_check, key='error_details') + if not error_details: + return 'handle_fatal_error' + + error_message = error_details.get('error_message', '').strip() + error_code = error_details.get('error_code', '').strip() + + # Check if this is an age confirmation error - should not stop the loop + if "Sign in to confirm your age" in error_message or "confirm your age" in error_message.lower(): + logger.info(f"Age confirmation error detected on retry from '{task_id_to_check}'. This is a content restriction, not a bot detection issue.") + return 'handle_age_restriction_error' + + if error_code == 'TRANSPORT_ERROR': + logger.error(f"Fatal Thrift connection error on retry from '{task_id_to_check}'.") + return 'handle_fatal_error' + + is_bannable = error_code in ["BOT_DETECTED", "BOT_DETECTION_SIGN_IN_REQUIRED"] + if is_bannable: + policy = params.get('on_auth_failure', 'retry_with_new_account') + if policy == 'proceed_loop_under_manual_inspection': + logger.warning(f"Bannable error '{error_code}' on retry with 'proceed_loop_under_manual_inspection' policy. Reporting failure and continuing loop. MANUAL INTERVENTION IS LIKELY REQUIRED.") + return 'report_bannable_and_continue' + # On retry failure, we always ban and stop the loop for this URL. + logger.warning(f"Bannable error '{error_code}' on retry. Banning account and reporting failure.") + return 'ban_and_report_after_retry' + + logger.error(f"URL failed on retry with code '{error_code}'. Reporting failure and continuing loop.") + return 'report_failure_and_continue' + + +@task +def ban_and_report_after_retry(retry_data: dict, reason: str, **context): + """Bans the account used in a failed retry and prepares for failure reporting.""" + # The account to ban is the one from the retry attempt. + ban_account(retry_data, reason, **context) + logger.info(f"Account '{retry_data.get('account_id')}' banned after retry failed. Proceeding to report failure.") + return retry_data + + +@task.branch(trigger_rule='one_failed') +def handle_download_failure_branch(**context): + """ + If download or probe fails, decide whether to stop the loop, continue, or retry + based on the `on_download_failure` policy. + """ + params = context['params'] + policy = params.get('on_download_failure', 'proceed_loop') + ti = context['task_instance'] + + # The full task_id for download_and_probe is 'download_processing.download_and_probe' + download_error_details = ti.xcom_pull(task_ids='download_processing.download_and_probe', key='download_error_details') + + if policy == 'retry_with_new_token': + logger.info("Download failed. Policy is to retry with a new token. Branching to retry logic.") + return 'retry_logic_for_download' + + if policy == 'stop_loop': + logger.error(f"Download or probe failed with policy '{policy}'. Stopping loop by routing to fatal error handler.") + return 'handle_fatal_error' + + # Default policy is 'proceed_loop' + logger.warning(f"Download or probe failed with policy '{policy}'. Reporting failure and continuing loop.") + return 'report_failure_and_continue' + + +@task(trigger_rule='one_success') +def coalesce_token_data(get_token_result=None, retry_get_token_result=None): + """ + Selects the successful token data from either the first attempt or the retry. + The task that did not run or failed will have a result of None. + """ + if retry_get_token_result: + logger.info("Using token data from retry attempt.") + return retry_get_token_result + if get_token_result: + logger.info("Using token data from initial attempt.") + return get_token_result + # This should not be reached if trigger_rule='one_success' is working correctly. + raise AirflowException("Could not find a successful token result from any attempt.") + + +@task +def report_bannable_and_continue(**context): + """ + Handles a bannable error by reporting it, but continues the loop + as per the 'proceed_loop_under_manual_inspection' policy. + """ + params = context['params'] + ti = context['task_instance'] + url = params.get('url_to_process', 'unknown') + + # Collect error details + error_details = {} + first_token_task_id = 'get_token' + retry_token_task_id = 'retry_get_token' + + first_token_error = ti.xcom_pull(task_ids=first_token_task_id, key='error_details') + retry_token_error = ti.xcom_pull(task_ids=retry_token_task_id, key='error_details') + + # Use the most recent error details + if retry_token_error: + error_details = retry_token_error + elif first_token_error: + error_details = first_token_error + + logger.error(f"Bannable error for URL '{url}'. Policy is to continue loop under manual supervision.") + + # Report failure to Redis + try: + client = _get_redis_client(params['redis_conn_id']) + + # Update client-specific stats + try: + machine_id = params.get('machine_id') or socket.gethostname() + _update_client_stats(client, params.get('clients', ''), 'failure', url, machine_id, context['dag_run'].run_id) + except Exception as e: + logger.error(f"Could not update client stats on bannable error: {e}", exc_info=True) + + result_data = { + 'status': 'failed', + 'end_time': time.time(), + 'url': url, + 'dag_run_id': context['dag_run'].run_id, + 'error': 'bannable_error_manual_override', + 'error_message': 'Bannable error occurred, but policy is set to continue loop under manual supervision.', + 'error_details': error_details + } + result_queue = f"{params['queue_name']}_result" + fail_queue = f"{params['queue_name']}_fail" + + progress_queue = f"{params['queue_name']}_progress" + + with client.pipeline() as pipe: + pipe.hset(result_queue, url, json.dumps(result_data)) + pipe.hset(fail_queue, url, json.dumps(result_data)) + pipe.hdel(progress_queue, url) + pipe.execute() + + logger.info(f"Stored bannable error for URL '{url}' in '{result_queue}' and '{fail_queue}'.") + except Exception as e: + logger.error(f"Could not report bannable error to Redis: {e}", exc_info=True) + + +@task +def handle_age_restriction_error(**context): + """ + Handles age restriction errors specifically. These are content restrictions + that cannot be bypassed by using different accounts, so we report the failure + and continue the processing loop rather than stopping it. + """ + params = context['params'] + ti = context['task_instance'] + url = params.get('url_to_process', 'unknown') + + # Collect error details + error_details = {} + first_token_task_id = 'get_token' + retry_token_task_id = 'retry_get_token' + + first_token_error = ti.xcom_pull(task_ids=first_token_task_id, key='error_details') + retry_token_error = ti.xcom_pull(task_ids=retry_token_task_id, key='error_details') + + # Use the most recent error details + if retry_token_error: + error_details = retry_token_error + elif first_token_error: + error_details = first_token_error + + logger.error(f"Age restriction error for URL '{url}'. This content requires age confirmation and cannot be bypassed.") + + # Report failure to Redis so the URL can be marked as failed + try: + client = _get_redis_client(params['redis_conn_id']) + + # Update client-specific stats + try: + machine_id = params.get('machine_id') or socket.gethostname() + _update_client_stats(client, params.get('clients', ''), 'failure', url, machine_id, context['dag_run'].run_id) + except Exception as e: + logger.error(f"Could not update client stats on age restriction error: {e}", exc_info=True) + + result_data = { + 'status': 'failed', + 'end_time': time.time(), + 'url': url, + 'dag_run_id': context['dag_run'].run_id, + 'error': 'age_restriction', + 'error_message': 'Content requires age confirmation', + 'error_details': error_details + } + result_queue = f"{params['queue_name']}_result" + fail_queue = f"{params['queue_name']}_fail" + + progress_queue = f"{params['queue_name']}_progress" + + with client.pipeline() as pipe: + pipe.hset(result_queue, url, json.dumps(result_data)) + pipe.hset(fail_queue, url, json.dumps(result_data)) + pipe.hdel(progress_queue, url) + pipe.execute() + + logger.info(f"Stored age restriction error for URL '{url}' in '{result_queue}' and '{fail_queue}'.") + except Exception as e: + logger.error(f"Could not report age restriction error to Redis: {e}", exc_info=True) + + # This is NOT a fatal error for the processing loop - we just continue with the next URL + + +# ============================================================================= +# DAG Definition with TaskGroups +# ============================================================================= +with DAG( + dag_id='ytdlp_ops_v01_worker_per_url', + default_args=DEFAULT_ARGS, + schedule=None, + start_date=days_ago(1), + catchup=False, + tags=['ytdlp', 'worker'], + doc_md=__doc__, + render_template_as_native_obj=True, + params={ + 'queue_name': Param(DEFAULT_QUEUE_NAME, type="string"), + 'redis_conn_id': Param(DEFAULT_REDIS_CONN_ID, type="string"), + 'service_ip': Param(DEFAULT_YT_AUTH_SERVICE_IP, type="string"), + 'service_port': Param(DEFAULT_YT_AUTH_SERVICE_PORT, type="integer"), + 'account_pool': Param('default_account', type="string"), + 'account_pool_size': Param(None, type=["integer", "null"]), + 'prepend_client_to_account': Param(True, type="boolean", title="[Worker Param] Prepend Client to Account", description="If True, prepends client and timestamp to account names in prefix mode."), + 'machine_id': Param(None, type=["string", "null"]), + 'assigned_proxy_url': Param(None, type=["string", "null"], title="[Worker Param] Assigned Proxy URL", description="A specific proxy URL to use for the request, overriding the server's proxy pool logic."), + 'clients': Param('mweb,web_camoufox,tv', type="string", description="Comma-separated list of clients for token generation. e.g. mweb,tv,web_camoufox"), + 'timeout': Param(DEFAULT_TIMEOUT, type="integer"), + 'output_path_template': Param("%(title)s [%(id)s].f%(format_id)s.%(ext)s", type="string", title="[Worker Param] Output Path Template", description="Output filename template for yt-dlp. It is highly recommended to include `%(format_id)s` to prevent filename collisions when downloading multiple formats."), + 'on_auth_failure': Param( + 'retry_with_new_account', + type="string", + enum=['stop_loop', 'retry_with_new_account', 'retry_without_ban', 'proceed_loop_under_manual_inspection'], + title="[Worker Param] On Authentication Failure Policy", + description="Policy for handling bannable authentication failures." + ), + 'on_download_failure': Param( + 'proceed_loop', + type="string", + enum=['stop_loop', 'proceed_loop', 'retry_with_new_token'], + title="[Worker Param] On Download Failure Policy", + description="Policy for handling download or probe failures." + ), + 'request_params_json': Param('{}', type="string", title="[Worker Param] Request Params JSON", description="JSON string with request parameters for the token service."), + 'retry_on_probe_failure': Param(False, type="boolean"), + 'skip_probe': Param(False, type="boolean", title="[Worker Param] Skip Probe", description="If True, skips the ffmpeg probe of downloaded files."), + 'yt_dlp_cleanup_mode': Param(True, type="boolean", title="[Worker Param] yt-dlp Cleanup Mode", description="If True, creates a .empty file and deletes the original media file after successful download and probe."), + 'auto_create_new_accounts_on_exhaustion': Param(True, type="boolean"), + 'delay_between_formats_s': Param(15, type="integer", title="[Worker Param] Delay Between Formats (s)", description="Delay in seconds between downloading each format when multiple formats are specified. A 22s wait may be effective for batch downloads, while 6-12s may suffice if cookies are refreshed regularly."), + 'yt_dlp_test_mode': Param(False, type="boolean", title="[Worker Param] yt-dlp Test Mode", description="If True, runs yt-dlp with --test flag (dry run without downloading)."), + 'fragment_retries': Param(10, type="integer", title="[Worker Param] Fragment Retries", description="Number of retries for a fragment before giving up."), + 'limit_rate': Param('5M', type=["string", "null"], title="[Worker Param] Limit Rate", description="Download speed limit (e.g., 50K, 4.2M)."), + 'socket_timeout': Param(15, type="integer", title="[Worker Param] Socket Timeout", description="Timeout in seconds for socket operations."), + 'min_sleep_interval': Param(5, type="integer", title="[Worker Param] Min Sleep Interval", description="Minimum time to sleep between downloads (seconds)."), + 'max_sleep_interval': Param(10, type="integer", title="[Worker Param] Max Sleep Interval", description="Maximum time to sleep between downloads (seconds)."), + 'download_format_preset': Param( + 'custom', + type="string", + enum=['best_audio', 'formats_0', 'formats_2', 'formats_3', 'custom'], + title="Download Format Preset", + description="Select a predefined format string or choose 'custom'. To download multiple formats, this should be a comma-separated list of format IDs (e.g., '137,140').\nformats_0: 18,140\nformats_2: 18,140,299/298/137/136/135/134/133\nformats_3: 18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318" + ), + 'download_format_custom': Param( + '18,140,299/298/137/136/135/134/133', + type="string", + title="Custom Download Format", + description="Custom yt-dlp format string. Used when preset is 'custom'. To download multiple formats, provide a comma-separated list of format IDs (e.g., '137,140')." + ), + 'downloader': Param( + 'default', + type="string", + enum=['default', 'aria2c'], + title="Downloader", + description="Choose the downloader for yt-dlp." + ), + 'downloader_args_aria2c': Param( + 'aria2c:-x 4 -k 2M --max-download-limit=3M', + type="string", + title="Aria2c Downloader Arguments", + description="Arguments to pass to yt-dlp's --downloader-args. Used when downloader is 'aria2c'." + ), + 'yt_dlp_extra_args': Param( + '', + type=["string", "null"], + title="Extra yt-dlp arguments", + description="Extra command-line arguments for yt-dlp during download." + ), + # --- Manual Run / Internal Parameters --- + 'manual_url_to_process': Param('iPwdia3gAnk', type=["string", "null"], title="[Manual Run] URL to Process", description="For manual runs, provide a single YouTube URL to process. This is ignored if triggered by the dispatcher."), + 'url_to_process': Param(None, type=["string", "null"], title="[Internal] URL from Dispatcher", description="This parameter is set by the dispatcher DAG and should not be used for manual runs."), + 'worker_queue': Param(None, type=["string", "null"], title="[Internal] Worker Queue", description="This parameter is set by the dispatcher DAG and should not be used for manual runs."), + } +) as dag: + initial_data = get_url_and_assign_account() + + # --- Task Instantiation with TaskGroups --- + + # Main success/failure handlers (outside groups for clear end points) + fatal_error_task = handle_fatal_error() + report_failure_and_stop_task = report_failure_and_stop() + report_failure_task = report_failure_and_continue() + continue_loop_task = continue_processing_loop() + age_restriction_task = handle_age_restriction_error() + report_bannable_and_continue_task = report_bannable_and_continue() + + # --- Task Group 1: Initial Attempt --- + with TaskGroup("initial_attempt", tooltip="Initial token acquisition attempt") as initial_attempt_group: + first_token_attempt = get_token(initial_data) + initial_branch_task = handle_bannable_error_branch.override(trigger_rule='one_failed')( + task_id_to_check=first_token_attempt.operator.task_id + ) + + # Tasks for the "stop_loop" policy on initial attempt + ban_and_report_immediately_task = ban_and_report_immediately.override(task_id='ban_and_report_immediately')( + initial_data=initial_data, + reason="Banned by Airflow worker (policy is stop_loop)" + ) + + first_token_attempt >> initial_branch_task + initial_branch_task >> [fatal_error_task, ban_and_report_immediately_task, age_restriction_task, report_bannable_and_continue_task] + + # --- Task Group 2: Retry Logic --- + with TaskGroup("retry_logic", tooltip="Retry logic with account management") as retry_logic_group: + # Retry path tasks + ban_and_retry_group = ban_and_retry_logic.override(group_id='ban_account_and_prepare_for_retry')( + initial_data=initial_data + ) + # This task is for retries after a ban check + after_ban_account_task = assign_new_account_after_ban_check.override(task_id='assign_new_account_after_ban_check')( + initial_data=initial_data + ) + # This task is for direct retries (e.g., on connection error) + direct_retry_account_task = assign_new_account_for_direct_retry.override(task_id='assign_new_account_for_direct_retry')( + initial_data=initial_data + ) + + @task(trigger_rule='one_success') + def coalesce_retry_data(direct_retry_data=None, after_ban_data=None): + """Coalesces account data from one of the two mutually exclusive retry paths.""" + if direct_retry_data: + return direct_retry_data + if after_ban_data: + return after_ban_data + raise AirflowException("Could not find valid account data for retry.") + + coalesced_retry_data = coalesce_retry_data( + direct_retry_data=direct_retry_account_task, + after_ban_data=after_ban_account_task + ) + + retry_token_task = get_token.override(task_id='retry_get_token')( + initial_data=coalesced_retry_data + ) + + # Retry failure branch and its tasks + retry_branch_task = handle_retry_failure_branch.override(trigger_rule='one_failed')( + task_id_to_check=retry_token_task.operator.task_id + ) + ban_after_retry_report_task = ban_and_report_after_retry.override(task_id='ban_and_report_after_retry')( + retry_data=coalesced_retry_data, + reason="Banned by Airflow worker after failed retry" + ) + + # Internal dependencies within retry group + ban_and_retry_group >> after_ban_account_task + after_ban_account_task >> coalesced_retry_data + direct_retry_account_task >> coalesced_retry_data + coalesced_retry_data >> retry_token_task + retry_token_task >> retry_branch_task + retry_branch_task >> [fatal_error_task, report_failure_task, ban_after_retry_report_task, age_restriction_task, report_bannable_and_continue_task] + ban_after_retry_report_task >> report_failure_and_stop_task + + # --- Task Group 3: Download and Processing --- + with TaskGroup("download_processing", tooltip="Download and media processing") as download_processing_group: + # Coalesce, download, and success tasks + token_data = coalesce_token_data( + get_token_result=first_token_attempt, + retry_get_token_result=retry_token_task + ) + list_formats_task = list_available_formats(token_data=token_data) + download_task = download_and_probe( + token_data=token_data, + available_formats=list_formats_task, + ) + download_branch_task = handle_download_failure_branch.override(trigger_rule='one_failed')() + + # Internal dependencies within download group + first_token_attempt >> token_data + retry_token_task >> token_data + token_data >> list_formats_task + list_formats_task >> download_task + download_task >> download_branch_task + + # --- Task Group 4: Download Retry Logic --- + with TaskGroup("retry_logic_for_download", tooltip="Retry download with a new account after a 403 error") as retry_logic_for_download_group: + new_account_data = assign_new_account_for_direct_retry.override(task_id='assign_new_account_for_download_retry')( + initial_data=initial_data + ) + new_token_data = get_token.override(task_id='get_token_for_download_retry')( + initial_data=new_account_data + ) + new_formats = list_available_formats.override(task_id='list_formats_for_download_retry')( + token_data=new_token_data + ) + retry_download_task = download_and_probe.override(task_id='retry_download_and_probe')( + token_data=new_token_data, + available_formats=new_formats + ) + + # If any task in this group fails, the entire group fails. + # The group's failure will trigger the top-level `report_failure_task`. + new_account_data >> new_token_data >> new_formats >> retry_download_task + + # --- Coalesce final results for success tasks --- + @task(trigger_rule='one_success') + def coalesce_final_download_files(initial_dl=None, retry_dl=None, **context): + """ + Selects the successful list of downloaded files from either the first attempt or the retry. + This version checks task instance states to be more robust against ambiguous None results. + """ + try: + ti = context['task_instance'] + dag_run = ti.get_dagrun() + + retry_dl_ti = dag_run.get_task_instance('retry_logic_for_download.retry_download_and_probe') + + if retry_dl_ti and retry_dl_ti.state == 'success': + logger.info("Using downloaded files from the retry download path.") + return retry_dl + except Exception as e: + logger.warning(f"Could not check state of retry download task, falling back to initial download result. Error: {e}") + + logger.info("Using downloaded files from the initial download path.") + return initial_dl + + @task(trigger_rule='one_success') + def coalesce_final_token_data(initial_token_task_result=None, download_retry_token_task_result=None, **context): + """ + Selects the correct token data for the success report. + It checks if the download retry path was taken by seeing if its token task ran and succeeded. + """ + # We can't just check for a result, as a skipped task might have a `None` result. + # We need to check the state of the task instance. + # If the download retry token task succeeded, it means that path was taken. + try: + ti = context['task_instance'] + retry_token_ti = ti.get_dagrun().get_task_instance('retry_logic_for_download.get_token_for_download_retry') + if retry_token_ti and retry_token_ti.state == 'success': + logger.info("Using token data from download retry path.") + return download_retry_token_task_result + except Exception as e: + logger.warning(f"Could not check state of retry token task, falling back to initial token. Error: {e}") + + logger.info("Using token data from initial auth path.") + return initial_token_task_result + + final_files = coalesce_final_download_files( + initial_dl=download_task, + retry_dl=retry_download_task + ) + final_token = coalesce_final_token_data( + initial_token_task_result=coalesce_token_data( + get_token_result=first_token_attempt, + retry_get_token_result=retry_token_task + ), + download_retry_token_task_result=new_token_data + ) + + # Final success task, fed by coalesced results + final_success_task = mark_url_as_success.override(task_id='final_success_report')( + initial_data=initial_data, + downloaded_file_paths=final_files, + token_data=final_token + ) + final_success_task >> continue_loop_task + + # --- DAG Dependencies between TaskGroups --- + # Initial attempt can lead to retry logic or direct failure + initial_branch_task >> [retry_logic_group, fatal_error_task, ban_and_report_immediately_task, age_restriction_task, report_bannable_and_continue_task] + + # Ban and report immediately leads to failure reporting + ban_and_report_immediately_task >> report_failure_and_stop_task + + # Age restriction error leads to failure reporting and continues the loop + age_restriction_task >> continue_loop_task + report_bannable_and_continue_task >> continue_loop_task + report_failure_task >> continue_loop_task + + # Connect download failure branch to the new retry group + download_branch_task >> [retry_logic_for_download_group, report_failure_task, fatal_error_task] + + # Connect success paths to the coalescing tasks + download_task >> final_files + retry_download_task >> final_files + + # The token from the initial auth path is one input to the final token coalesce + coalesce_token_data(get_token_result=first_token_attempt, retry_get_token_result=retry_token_task) >> final_token + # The token from the download retry path is the other input + new_token_data >> final_token + + # Connect coalesced results to the final success task + final_files >> final_success_task + final_token >> final_success_task + + # If the download retry group fails, it should trigger the generic failure handler + retry_logic_for_download_group >> report_failure_task diff --git a/airflow/dags/ytdlp_ops_v02_dispatcher_auth.py b/airflow/dags/ytdlp_ops_v02_dispatcher_auth.py new file mode 100644 index 0000000..5b899e8 --- /dev/null +++ b/airflow/dags/ytdlp_ops_v02_dispatcher_auth.py @@ -0,0 +1,98 @@ +# -*- coding: utf-8 -*- +""" +DAG to dispatch work to ytdlp_ops_worker_per_url_auth DAGs. +It pulls a URL from Redis and triggers an auth worker with a pinned queue. +""" + +from __future__ import annotations +import logging +import os +import socket +from datetime import timedelta + +from airflow.decorators import task +from airflow.exceptions import AirflowSkipException +from airflow.models.dag import DAG +from airflow.models.param import Param +from airflow.api.common.trigger_dag import trigger_dag +from airflow.utils.dates import days_ago + +from utils.redis_utils import _get_redis_client + +logger = logging.getLogger(__name__) + +DEFAULT_QUEUE_NAME = 'queue2_auth' +DEFAULT_REDIS_CONN_ID = 'redis_default' + +@task(queue='queue-auth') +def dispatch_url_to_auth_worker(**context): + """ + Pulls one URL from Redis, determines the current worker's dedicated queue, + and triggers the auth worker DAG to process the URL on that specific queue. + """ + ti = context['task_instance'] + logger.info(f"Auth Dispatcher task '{ti.task_id}' running on queue '{ti.queue}'.") + + # --- Check for worker pause lock file --- + lock_file_path = '/opt/airflow/inputfiles/AIRFLOW.PREVENT_URL_PULL.lockfile' + hostname = socket.gethostname() + if os.path.exists(lock_file_path): + logger.info(f"Worker '{hostname}' is paused. Lock file found at '{lock_file_path}'. Skipping URL pull.") + raise AirflowSkipException(f"Worker '{hostname}' is paused.") + else: + logger.info(f"Worker '{hostname}' is active (no lock file found at '{lock_file_path}'). Proceeding to pull URL.") + + params = context['params'] + redis_conn_id = params['redis_conn_id'] + queue_name = params['queue_name'] + inbox_queue = f"{queue_name}_inbox" + + logger.info(f"Attempting to pull one URL from Redis queue '{inbox_queue}'...") + client = _get_redis_client(redis_conn_id) + url_bytes = client.lpop(inbox_queue) + + if not url_bytes: + logger.info("Redis auth inbox queue is empty. No work to dispatch. Skipping task.") + raise AirflowSkipException("Redis auth inbox queue is empty. No work to dispatch.") + + url_to_process = url_bytes.decode('utf-8') + logger.info(f"Pulled URL '{url_to_process}' from the queue.") + + # Determine the worker-specific queue for affinity + hostname = socket.gethostname() + worker_queue = f"queue-auth-{hostname}" + logger.info(f"Running on worker '{hostname}'. Dispatching job to its dedicated queue '{worker_queue}'.") + + conf_to_pass = {**params, 'url_to_process': url_to_process, 'worker_queue': worker_queue} + + run_id = f"worker_run_auth_{context['dag_run'].run_id}_{context['ts_nodash']}_q_{worker_queue}" + + logger.info(f"Triggering 'ytdlp_ops_v02_worker_per_url_auth' with run_id '{run_id}'") + trigger_dag( + dag_id='ytdlp_ops_v02_worker_per_url_auth', + run_id=run_id, + conf=conf_to_pass, + replace_microseconds=False + ) + +with DAG( + dag_id='ytdlp_ops_v02_dispatcher_auth', + default_args={'owner': 'airflow', 'retries': 0}, + schedule=None, + start_date=days_ago(1), + catchup=False, + tags=['ytdlp', 'worker', 'dispatcher', 'auth'], + is_paused_upon_creation=True, + doc_md=""" + ### YT-DLP Auth URL Dispatcher + + This DAG dispatches a single URL to an auth worker with a pinned queue. + It pulls from the `queue2_auth_inbox` Redis queue and triggers the `ytdlp_ops_v02_worker_per_url_auth` DAG. + """, + render_template_as_native_obj=True, + params={ + 'queue_name': Param(DEFAULT_QUEUE_NAME, type='string', title='Queue Name', description='The base name of the Redis queue to pull URLs from.'), + 'redis_conn_id': Param(DEFAULT_REDIS_CONN_ID, type='string', title='Redis Connection ID'), + }, +) as dag: + dispatch_url_to_auth_worker() diff --git a/airflow/dags/ytdlp_ops_v02_dispatcher_dl.py b/airflow/dags/ytdlp_ops_v02_dispatcher_dl.py new file mode 100644 index 0000000..c1cdd12 --- /dev/null +++ b/airflow/dags/ytdlp_ops_v02_dispatcher_dl.py @@ -0,0 +1,89 @@ +# -*- coding: utf-8 -*- +""" +DAG to dispatch download jobs to ytdlp_ops_worker_per_url_dl DAGs. +It pulls a job payload from Redis and triggers a download worker. +""" + +from __future__ import annotations +import logging +import os +import socket +from datetime import timedelta + +from airflow.decorators import task +from airflow.exceptions import AirflowSkipException +from airflow.models.dag import DAG +from airflow.models.param import Param +from airflow.api.common.trigger_dag import trigger_dag +from airflow.utils.dates import days_ago + +from utils.redis_utils import _get_redis_client + +logger = logging.getLogger(__name__) + +DEFAULT_QUEUE_NAME = 'queue2_dl' +DEFAULT_REDIS_CONN_ID = 'redis_default' + +@task(queue='queue-dl') +def dispatch_job_to_dl_worker(**context): + """ + Pulls one job payload from Redis, determines the current worker's dedicated queue, + and triggers the download worker DAG to process the job on that specific queue. + """ + ti = context['task_instance'] + logger.info(f"Download Dispatcher task '{ti.task_id}' running on queue '{ti.queue}'.") + + params = context['params'] + redis_conn_id = params['redis_conn_id'] + queue_name = params['queue_name'] + inbox_queue = f"{queue_name}_inbox" + + logger.info(f"Attempting to pull one job from Redis queue '{inbox_queue}'...") + client = _get_redis_client(redis_conn_id) + job_bytes = client.lpop(inbox_queue) + + if not job_bytes: + logger.info("Redis download inbox queue is empty. No work to dispatch. Skipping task.") + raise AirflowSkipException("Redis download inbox queue is empty. No work to dispatch.") + + job_data_str = job_bytes.decode('utf-8') + logger.info(f"Pulled job from the queue.") + + # Determine the worker-specific queue for affinity + hostname = socket.gethostname() + worker_queue = f"queue-dl-{hostname}" + logger.info(f"Running on worker '{hostname}'. Dispatching job to its dedicated queue '{worker_queue}'.") + + conf_to_pass = {**params, 'job_data': job_data_str, 'worker_queue': worker_queue} + + run_id = f"worker_run_dl_{context['dag_run'].run_id}_{context['ts_nodash']}_q_{worker_queue}" + + logger.info(f"Triggering 'ytdlp_ops_v02_worker_per_url_dl' with run_id '{run_id}'") + trigger_dag( + dag_id='ytdlp_ops_v02_worker_per_url_dl', + run_id=run_id, + conf=conf_to_pass, + replace_microseconds=False + ) + +with DAG( + dag_id='ytdlp_ops_v02_dispatcher_dl', + default_args={'owner': 'airflow', 'retries': 0}, + schedule=None, + start_date=days_ago(1), + catchup=False, + tags=['ytdlp', 'worker', 'dispatcher', 'download'], + is_paused_upon_creation=True, + doc_md=""" + ### YT-DLP Download Job Dispatcher + + This DAG dispatches a single download job to a download worker with a pinned queue. + It pulls a JSON payload from the `queue2_dl_inbox` Redis queue and triggers the `ytdlp_ops_v02_worker_per_url_dl` DAG. + """, + render_template_as_native_obj=True, + params={ + 'queue_name': Param(DEFAULT_QUEUE_NAME, type='string', title='Queue Name', description='The base name of the Redis queue to pull job payloads from.'), + 'redis_conn_id': Param(DEFAULT_REDIS_CONN_ID, type='string', title='Redis Connection ID'), + }, +) as dag: + dispatch_job_to_dl_worker() diff --git a/airflow/dags/ytdlp_ops_orchestrator.py b/airflow/dags/ytdlp_ops_v02_orchestrator_auth.py similarity index 72% rename from airflow/dags/ytdlp_ops_orchestrator.py rename to airflow/dags/ytdlp_ops_v02_orchestrator_auth.py index b7feff1..65432ea 100644 --- a/airflow/dags/ytdlp_ops_orchestrator.py +++ b/airflow/dags/ytdlp_ops_v02_orchestrator_auth.py @@ -6,9 +6,7 @@ # Distributed under terms of the MIT license. """ -DAG to orchestrate ytdlp_ops_dispatcher DAG runs based on a defined policy. -It fetches URLs from a Redis queue and launches dispatchers in controlled bunches, -which in turn trigger workers with affinity. +DAG to orchestrate ytdlp_ops_dispatcher_v2_auth DAG runs based on a defined policy. """ from airflow import DAG @@ -37,8 +35,42 @@ from thrift.transport import TSocket, TTransport # Configure logging logger = logging.getLogger(__name__) +DEFAULT_REQUEST_PARAMS_JSON = """{ + "context_reuse_policy": { + "enabled": true, + "max_age_seconds": 86400, + "reuse_visitor_id": true, + "reuse_cookies": true + }, + "token_generation_strategy": { + "youtubei_js": { + "generate_po_token": true, + "generate_gvs_token": true + } + }, + "ytdlp_params": { + "use_curl_prefetch": false, + "token_supplement_strategy": { + "youtubepot_bgutilhttp_extractor": { + "enabled": true + } + }, + "visitor_id_override": { + "enabled": true + } + }, + "session_params": { + "lang": "en-US", + "location": "US", + "deviceCategory": "MOBILE", + "user_agents": { + "youtubei_js": "Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)", + "yt_dlp": "Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)" + } + } +}""" + # Default settings -DEFAULT_QUEUE_NAME = 'video_queue' DEFAULT_REDIS_CONN_ID = 'redis_default' DEFAULT_TOTAL_WORKERS = 3 DEFAULT_WORKERS_PER_BUNCH = 1 @@ -100,7 +132,12 @@ def orchestrate_workers_ignition_callable(**context): logger.info(f"Orchestrator task '{ti.task_id}' running on queue '{ti.queue}'.") logger.info("Starting dispatcher ignition sequence.") - dispatcher_dag_id = 'ytdlp_ops_dispatcher' + dispatcher_dag_id = 'ytdlp_ops_v02_dispatcher_auth' + worker_queue = 'queue-auth' + app_queue_name = 'queue2_auth' + + logger.info(f"Running in v2 (auth) mode. Dispatcher DAG: '{dispatcher_dag_id}', Worker Queue: '{worker_queue}'") + dag_model = DagModel.get_dagmodel(dispatcher_dag_id) if dag_model and dag_model.is_paused: logger.warning(f"Dispatcher DAG '{dispatcher_dag_id}' is paused. Skipping dispatcher ignition.") @@ -127,13 +164,12 @@ def orchestrate_workers_ignition_callable(**context): bunches = [worker_indices[i:i + workers_per_bunch] for i in range(0, len(worker_indices), workers_per_bunch)] # --- Inspect Queues before starting --- - worker_queue = 'queue-dl' # The static queue the worker DAG uses. try: redis_conn_id = params.get('redis_conn_id', DEFAULT_REDIS_CONN_ID) redis_client = _get_redis_client(redis_conn_id) # First, check the application queue for work - app_queue_len = _check_application_queue(redis_client, params['queue_name']) + app_queue_len = _check_application_queue(redis_client, app_queue_name) if params.get('skip_if_queue_empty') and app_queue_len == 0: logger.info("'skip_if_queue_empty' is True and application queue is empty. Skipping worker ignition.") @@ -224,26 +260,17 @@ default_args = { } with DAG( - dag_id='ytdlp_ops_orchestrator', + dag_id='ytdlp_ops_v02_orchestrator_auth', default_args=default_args, - schedule_interval=None, # This DAG runs only when triggered. + schedule=None, # This DAG runs only when triggered. max_active_runs=1, # Only one ignition process should run at a time. catchup=False, - description='Ignition system for ytdlp_ops_dispatcher DAGs. Starts self-sustaining worker loops via dispatchers.', + description='Ignition system for ytdlp_ops_v02_dispatcher_auth DAGs.', doc_md=""" - ### YT-DLP Worker Ignition System + ### YT-DLP v2 (Auth) Worker Ignition System - This DAG acts as an "ignition system" to start one or more self-sustaining worker loops. - It does **not** process URLs itself. Its only job is to trigger a specified number of `ytdlp_ops_dispatcher` DAGs, - which in turn pull URLs and trigger `ytdlp_ops_worker_per_url` with worker affinity. - - #### How it Works: - - 1. **Manual Trigger:** You manually trigger this DAG with parameters defining how many dispatcher loops to start (`total_workers`), in what configuration (`workers_per_bunch`, delays). - 2. **Ignition:** The orchestrator triggers the initial set of dispatcher DAGs in a "fire-and-forget" manner, passing all its configuration parameters to them. - 3. **Completion:** Once all initial dispatchers have been triggered, the orchestrator's job is complete. - - The dispatchers then take over, each pulling a URL, determining affinity, and triggering a worker DAG. + This DAG acts as an "ignition system" to start one or more self-sustaining worker loops for the **v2 authentication worker**. + It triggers `ytdlp_ops_v02_dispatcher_auth` DAGs, which pull raw URLs from `queue2_auth_inbox` and trigger `ytdlp_ops_v02_worker_per_url_auth` workers. """, tags=['ytdlp', 'mgmt', 'master'], params={ @@ -256,25 +283,60 @@ with DAG( # --- Worker Passthrough Parameters --- 'on_bannable_failure': Param( - 'stop_loop', + 'stop_loop_on_auth_proceed_on_download_error', type="string", - enum=['stop_loop', 'retry_with_new_account', 'retry_without_ban', 'retry_and_ban_account_only', 'retry_on_connection_error'], + enum=['stop_loop', 'retry_with_new_account', 'retry_without_ban', 'retry_and_ban_account_only', 'retry_on_connection_error', 'proceed_loop_under_manual_inspection', 'stop_loop_on_auth_proceed_on_download_error'], title="[Worker Param] On Bannable Failure Policy", description="Policy for a worker when a bannable error occurs. " - "'stop_loop': Ban the account, mark URL as failed, and stop the worker's loop. " + "'stop_loop': Ban the account, mark URL as failed, and stop the worker's loop on any failure (auth or download). " "'retry_with_new_account': Ban the failed account, retry ONCE with a new account. If retry fails, ban the second account and proxy, then stop." "'retry_on_connection_error': If a connection error (e.g. SOCKS timeout) occurs, retry with a new account but do NOT ban the first account/proxy. If retry fails, stop the loop without banning." + "'proceed_loop_under_manual_inspection': **BEWARE: MANUAL SUPERVISION REQUIRED.** Marks the URL as failed but continues the processing loop. Use this only when you can manually intervene by pausing the dispatcher DAG or creating a lock file (`/opt/airflow/inputfiles/AIRFLOW.PREVENT_URL_PULL.lockfile`) to prevent a runaway failure loop." + "'stop_loop_on_auth_proceed_on_download_error': **(Default)** Stops the loop on an authentication/token error (like 'stop_loop'), but continues the loop on a download/probe error (like 'proceed...')." ), - 'queue_name': Param(DEFAULT_QUEUE_NAME, type="string", description="[Worker Param] Base name for Redis queues."), + 'request_params_json': Param(DEFAULT_REQUEST_PARAMS_JSON, type="string", title="[Worker Param] Request Params JSON", description="JSON string with request parameters for the token service.", render_kwargs={"rows": 20, "cols": 120}), 'redis_conn_id': Param(DEFAULT_REDIS_CONN_ID, type="string", description="[Worker Param] Airflow Redis connection ID."), - 'clients': Param('tv_sample,mweb,web_camoufox', type="string", description="[Worker Param] Comma-separated list of clients for token generation. Full list: web, web_safari, web_embedded, web_music, web_creator, mweb, web_camoufox, web_safari_camoufox, web_embedded_camoufox, web_music_camoufox, web_creator_camoufox, mweb_camoufox, android, android_music, android_creator, android_vr, ios, ios_music, ios_creator, tv, tv_simply, tv_sample, tv_embedded"), + 'clients': Param( + 'mweb,web_camoufox,tv', + type="string", + enum=[ + 'mweb,web_camoufox,tv', + 'mweb', + 'web_camoufox', + 'tv', + 'custom', + 'tv,web_safari,mweb,web_camoufox', + 'web_safari', + 'web', + 'web_embedded', + 'web_music', + 'web_creator', + 'web_safari_camoufox', + 'web_embedded_camoufox', + 'web_music_camoufox', + 'web_creator_camoufox', + 'mweb_camoufox', + 'android', + 'android_music', + 'android_creator', + 'android_vr', + 'ios', + 'ios_music', + 'ios_creator', + 'tv_simply', + 'tv_embedded', + ], + title="[Worker Param] Clients", + description="[Worker Param] Comma-separated list of clients for token generation. Full list: web, web_safari, web_embedded, web_music, web_creator, mweb, web_camoufox, web_safari_camoufox, web_embedded_camoufox, web_music_camoufox, web_creator_camoufox, mweb_camoufox, android, android_music, android_creator, android_vr, ios, ios_music, ios_creator, tv, tv_simply, tv_embedded. See DAG documentation for details." + ), 'account_pool': Param('ytdlp_account', type="string", description="[Worker Param] Account pool prefix or comma-separated list."), 'account_pool_size': Param(10, type=["integer", "null"], description="[Worker Param] If using a prefix for 'account_pool', this specifies the number of accounts to generate (e.g., 10 for 'prefix_01' through 'prefix_10'). Required when using a prefix."), + 'prepend_client_to_account': Param(True, type="boolean", title="[Worker Param] Prepend Client to Account", description="If True, prepends client and timestamp to account names in prefix mode. Format: prefix_YYYYMMDDHHMMSS_client_XX."), 'service_ip': Param(DEFAULT_YT_AUTH_SERVICE_IP, type="string", description="[Worker Param] IP of the ytdlp-ops-server. Default is from Airflow variable YT_AUTH_SERVICE_IP or hardcoded."), 'service_port': Param(DEFAULT_YT_AUTH_SERVICE_PORT, type="integer", description="[Worker Param] Port of the Envoy load balancer. Default is from Airflow variable YT_AUTH_SERVICE_PORT or hardcoded."), 'machine_id': Param("ytdlp-ops-airflow-service", type="string", description="[Worker Param] Identifier for the client machine."), + 'assigned_proxy_url': Param(None, type=["string", "null"], title="[Worker Param] Assigned Proxy URL", description="If provided, forces the token service to use this specific proxy for the request."), 'auto_create_new_accounts_on_exhaustion': Param(True, type="boolean", description="[Worker Param] If True and all accounts in a prefix-based pool are exhausted, create a new one automatically."), - 'retrigger_delay_on_empty_s': Param(60, type="integer", description="[Worker Param] Delay in seconds before a worker re-triggers itself if the queue is empty. Set to -1 to stop the loop."), } ) as dag: @@ -285,6 +347,6 @@ with DAG( orchestrate_task.doc_md = """ ### Start Worker Loops This is the main task that executes the ignition policy. - - It triggers `ytdlp_ops_dispatcher` DAGs according to the batch settings. + - It triggers `ytdlp_ops_v02_dispatcher_auth` DAGs according to the batch settings. - It passes all its parameters down to the dispatchers, which will use them to trigger workers. """ diff --git a/airflow/dags/ytdlp_ops_v02_orchestrator_dl.py b/airflow/dags/ytdlp_ops_v02_orchestrator_dl.py new file mode 100644 index 0000000..df5e834 --- /dev/null +++ b/airflow/dags/ytdlp_ops_v02_orchestrator_dl.py @@ -0,0 +1,302 @@ +# -*- coding: utf-8 -*- +# vim:fenc=utf-8 +# +# Copyright © 2024 rl +# +# Distributed under terms of the MIT license. + +""" +DAG to orchestrate ytdlp_ops_dispatcher_v2_dl DAG runs based on a defined policy. +""" + +from airflow import DAG +from airflow.exceptions import AirflowException, AirflowSkipException +from airflow.operators.python import PythonOperator +from airflow.models.param import Param +from airflow.models.variable import Variable +from airflow.utils.dates import days_ago +from airflow.api.common.trigger_dag import trigger_dag +from airflow.models.dagrun import DagRun +from airflow.models.dag import DagModel +from datetime import timedelta +import logging +import random +import time +import json + +# Import utility functions +from utils.redis_utils import _get_redis_client + +# Import Thrift modules for proxy status check +from pangramia.yt.tokens_ops import YTTokenOpService +from thrift.protocol import TBinaryProtocol +from thrift.transport import TSocket, TTransport + +# Configure logging +logger = logging.getLogger(__name__) + +# Default settings +DEFAULT_REDIS_CONN_ID = 'redis_default' +DEFAULT_TOTAL_WORKERS = 3 +DEFAULT_WORKERS_PER_BUNCH = 1 +DEFAULT_WORKER_DELAY_S = 5 +DEFAULT_BUNCH_DELAY_S = 20 + +# --- Helper Functions --- + +def _check_application_queue(redis_client, queue_base_name: str) -> int: + """Checks and logs the length of the application's inbox queue.""" + inbox_queue_name = f"{queue_base_name}_inbox" + logger.info(f"--- Checking Application Work Queue ---") + try: + q_len = redis_client.llen(inbox_queue_name) + logger.info(f"Application work queue '{inbox_queue_name}' has {q_len} item(s).") + return q_len + except Exception as e: + logger.error(f"Failed to check application queue '{inbox_queue_name}': {e}", exc_info=True) + return -1 # Indicate an error + +def _inspect_celery_queues(redis_client, queue_names: list): + """Inspects Celery queues in Redis and logs their status.""" + logger.info("--- Inspecting Celery Queues in Redis ---") + for queue_name in queue_names: + try: + q_len = redis_client.llen(queue_name) + logger.info(f"Queue '{queue_name}': Length = {q_len}") + + if q_len > 0: + logger.info(f"Showing up to 10 tasks in '{queue_name}':") + # Fetch up to 10 items from the start of the list (queue) + items_bytes = redis_client.lrange(queue_name, 0, 9) + for i, item_bytes in enumerate(items_bytes): + try: + # Celery tasks are JSON-encoded strings + task_data = json.loads(item_bytes.decode('utf-8')) + # Pretty print for readability in logs + pretty_task_data = json.dumps(task_data, indent=2) + logger.info(f" Task {i+1}:\n{pretty_task_data}") + except (json.JSONDecodeError, UnicodeDecodeError) as e: + logger.warning(f" Task {i+1}: Could not decode/parse task data. Error: {e}. Raw: {item_bytes!r}") + except Exception as e: + logger.error(f"Failed to inspect queue '{queue_name}': {e}", exc_info=True) + logger.info("--- End of Queue Inspection ---") + + +# --- Main Orchestration Callable --- + +def orchestrate_workers_ignition_callable(**context): + """ + Main orchestration logic. Triggers a specified number of dispatcher DAGs + to initiate self-sustaining processing loops. + """ + params = context['params'] + ti = context['task_instance'] + logger.info(f"Orchestrator task '{ti.task_id}' running on queue '{ti.queue}'.") + logger.info("Starting dispatcher ignition sequence.") + + dispatcher_dag_id = 'ytdlp_ops_v02_dispatcher_dl' + worker_queue = 'queue-dl' + app_queue_name = 'queue2_dl' + + logger.info(f"Running in v2 (download) mode. Dispatcher DAG: '{dispatcher_dag_id}', Worker Queue: '{worker_queue}'") + + dag_model = DagModel.get_dagmodel(dispatcher_dag_id) + if dag_model and dag_model.is_paused: + logger.warning(f"Dispatcher DAG '{dispatcher_dag_id}' is paused. Skipping dispatcher ignition.") + raise AirflowSkipException(f"Dispatcher DAG '{dispatcher_dag_id}' is paused.") + + total_workers = int(params['total_workers']) + workers_per_bunch = int(params['workers_per_bunch']) + + # --- Input Validation --- + if total_workers <= 0: + logger.warning(f"'total_workers' is {total_workers}. No workers will be started. Skipping ignition.") + raise AirflowSkipException(f"No workers to start (total_workers={total_workers}).") + + if workers_per_bunch <= 0: + logger.error(f"'workers_per_bunch' must be a positive integer, but got {workers_per_bunch}. Aborting.") + raise AirflowException(f"'workers_per_bunch' must be a positive integer, but got {workers_per_bunch}.") + # --- End Input Validation --- + + worker_delay = int(params['delay_between_workers_s']) + bunch_delay = int(params['delay_between_bunches_s']) + + # Create a list of worker numbers to trigger + worker_indices = list(range(total_workers)) + bunches = [worker_indices[i:i + workers_per_bunch] for i in range(0, len(worker_indices), workers_per_bunch)] + + # --- Inspect Queues before starting --- + try: + redis_conn_id = params.get('redis_conn_id', DEFAULT_REDIS_CONN_ID) + redis_client = _get_redis_client(redis_conn_id) + + # First, check the application queue for work + app_queue_len = _check_application_queue(redis_client, app_queue_name) + + if params.get('skip_if_queue_empty') and app_queue_len == 0: + logger.info("'skip_if_queue_empty' is True and application queue is empty. Skipping worker ignition.") + raise AirflowSkipException("Application work queue is empty.") + + # Then, inspect the target Celery queue for debugging + _inspect_celery_queues(redis_client, [worker_queue]) + except AirflowSkipException: + raise # Re-raise to let Airflow handle the skip + except Exception as e: + logger.error(f"Could not inspect queues due to an error: {e}. Continuing with ignition sequence.") + # --- End of Inspection --- + + logger.info(f"Plan: Triggering {total_workers} total dispatcher runs in {len(bunches)} bunches. Each run will attempt to process one URL.") + + dag_run_id = context['dag_run'].run_id + total_triggered = 0 + + for i, bunch in enumerate(bunches): + logger.info(f"--- Triggering Bunch {i+1}/{len(bunches)} (contains {len(bunch)} dispatcher(s)) ---") + for j, _ in enumerate(bunch): + # Create a unique run_id for each dispatcher run + run_id = f"dispatched_{dag_run_id}_{total_triggered}" + + # Pass all orchestrator params to the dispatcher, which will then pass them to the worker. + conf_to_pass = {p: params[p] for p in params} + + logger.info(f"Triggering dispatcher {j+1}/{len(bunch)} in bunch {i+1} (run {total_triggered + 1}/{total_workers}) (Run ID: {run_id})") + logger.debug(f"Full conf for dispatcher run {run_id}: {conf_to_pass}") + + trigger_dag( + dag_id=dispatcher_dag_id, + run_id=run_id, + conf=conf_to_pass, + replace_microseconds=False + ) + total_triggered += 1 + + # Delay between dispatches in a bunch + if j < len(bunch) - 1: + logger.info(f"Waiting {worker_delay}s before next dispatcher in bunch...") + time.sleep(worker_delay) + + # Delay between bunches + if i < len(bunches) - 1: + logger.info(f"--- Bunch {i+1} triggered. Waiting {bunch_delay}s before next bunch... ---") + time.sleep(bunch_delay) + + logger.info(f"--- Ignition sequence complete. Total dispatcher runs triggered: {total_triggered}. ---") + + # --- Final Queue Inspection --- + final_check_delay = 30 # seconds + logger.info(f"Waiting {final_check_delay}s for a final queue status check to see if workers picked up tasks...") + time.sleep(final_check_delay) + + try: + redis_conn_id = params.get('redis_conn_id', DEFAULT_REDIS_CONN_ID) + redis_client = _get_redis_client(redis_conn_id) + + # Log connection details for debugging broker mismatch issues + conn_kwargs = redis_client.connection_pool.connection_kwargs + logger.info(f"Final check using Redis connection '{redis_conn_id}': " + f"host={conn_kwargs.get('host')}, " + f"port={conn_kwargs.get('port')}, " + f"db={conn_kwargs.get('db')}") + + _inspect_celery_queues(redis_client, [worker_queue]) + logger.info("Final queue inspection complete. If queues are not empty, workers have not picked up tasks yet. " + "If queues are empty, workers have started processing.") + except Exception as e: + logger.error(f"Could not perform final queue inspection: {e}. This does not affect worker ignition.") + + + + +# ============================================================================= +# DAG Definition +# ============================================================================= + +default_args = { + 'owner': 'airflow', + 'depends_on_past': False, + 'email_on_failure': False, + 'email_on_retry': False, + 'retries': 1, + 'retry_delay': timedelta(minutes=1), + 'start_date': days_ago(1), +} + +with DAG( + dag_id='ytdlp_ops_v02_orchestrator_dl', + default_args=default_args, + schedule=None, # This DAG runs only when triggered. + max_active_runs=1, # Only one ignition process should run at a time. + catchup=False, + description='Ignition system for ytdlp_ops_v02_dispatcher_dl DAGs.', + doc_md=""" + ### YT-DLP v2 (Download) Worker Ignition System + + This DAG acts as an "ignition system" to start one or more self-sustaining worker loops for the **v2 download worker**. + It triggers `ytdlp_ops_v02_dispatcher_dl` DAGs, which pull job payloads from `queue2_dl_inbox` and trigger `ytdlp_ops_v02_worker_per_url_dl` workers. + """, + tags=['ytdlp', 'mgmt', 'master'], + params={ + # --- Ignition Control Parameters --- + 'total_workers': Param(DEFAULT_TOTAL_WORKERS, type="integer", description="Total number of dispatcher loops to start."), + 'workers_per_bunch': Param(DEFAULT_WORKERS_PER_BUNCH, type="integer", description="Number of dispatchers to start in each bunch."), + 'delay_between_workers_s': Param(DEFAULT_WORKER_DELAY_S, type="integer", description="Delay in seconds between starting each dispatcher within a bunch."), + 'delay_between_bunches_s': Param(DEFAULT_BUNCH_DELAY_S, type="integer", description="Delay in seconds between starting each bunch."), + 'skip_if_queue_empty': Param(False, type="boolean", title="[Ignition Control] Skip if Queue Empty", description="If True, the orchestrator will not start any dispatchers if the application's work queue is empty."), + 'redis_conn_id': Param(DEFAULT_REDIS_CONN_ID, type="string", description="[Worker Param] Airflow Redis connection ID."), + 'clients': Param('mweb,web_camoufox,tv', type="string", title="[Worker Param] Clients", description="Comma-separated list of clients for token generation. e.g. mweb,tv,web_camoufox"), + + # --- Download Control Parameters --- + 'delay_between_formats_s': Param(15, type="integer", title="[Worker Param] Delay Between Formats (s)", description="Delay in seconds between downloading each format when multiple formats are specified. A 22s wait may be effective for batch downloads, while 6-12s may suffice if cookies are refreshed regularly."), + 'yt_dlp_test_mode': Param(False, type="boolean", title="[Worker Param] yt-dlp Test Mode", description="If True, runs yt-dlp with --test flag (dry run without downloading)."), + 'skip_probe': Param(True, type="boolean", title="[Worker Param] Skip Probe", description="If True, skips the ffmpeg probe of downloaded files."), + 'yt_dlp_cleanup_mode': Param(True, type="boolean", title="[Worker Param] yt-dlp Cleanup Mode", description="If True, creates a .empty file and deletes the original media file after successful download and probe."), + 'fragment_retries': Param(2, type="integer", title="[Worker Param] Fragment Retries", description="Number of retries for a fragment before giving up."), + 'limit_rate': Param('5M', type=["string", "null"], title="[Worker Param] Limit Rate", description="Download speed limit (e.g., 50K, 4.2M)."), + 'socket_timeout': Param(15, type="integer", title="[Worker Param] Socket Timeout", description="Timeout in seconds for socket operations."), + 'min_sleep_interval': Param(5, type="integer", title="[Worker Param] Min Sleep Interval", description="Minimum time to sleep between downloads (seconds)."), + 'max_sleep_interval': Param(10, type="integer", title="[Worker Param] Max Sleep Interval", description="Maximum time to sleep between downloads (seconds)."), + 'download_format_preset': Param( + 'formats_2', + type="string", + enum=['best_audio', 'formats_0', 'formats_2', 'formats_3', 'custom'], + title="[Worker Param] Download Format Preset", + description="Select a predefined format string or choose 'custom' to use the value from 'Custom Download Format'.\nformats_0: 18,140\nformats_2: 18,140,299/298/137/136/135/134/133\nformats_3: 18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318" + ), + 'download_format_custom': Param( + '18,140,299/298/137/136/135/134/133', + type="string", + title="[Worker Param] Custom Download Format", + description="Custom yt-dlp format string. Used when preset is 'custom'. E.g., 'ba[ext=m4a]/bestaudio/best'." + ), + 'downloader': Param( + 'default', + type="string", + enum=['default', 'aria2c'], + title="[Worker Param] Downloader", + description="Choose the downloader for yt-dlp." + ), + 'downloader_args_aria2c': Param( + 'aria2c:-x 4 -k 2M --max-download-limit=3M', + type="string", + title="[Worker Param] Aria2c Downloader Arguments", + description="Arguments to pass to yt-dlp's --downloader-args. Used when downloader is 'aria2c'." + ), + 'yt_dlp_extra_args': Param( + '--restrict-filenames', + type=["string", "null"], + title="[Worker Param] Extra yt-dlp arguments", + description="Extra command-line arguments for yt-dlp during download." + ), + } +) as dag: + + orchestrate_task = PythonOperator( + task_id='start_worker_loops', + python_callable=orchestrate_workers_ignition_callable, + ) + orchestrate_task.doc_md = """ + ### Start Worker Loops + This is the main task that executes the ignition policy. + - It triggers `ytdlp_ops_v02_dispatcher_dl` DAGs according to the batch settings. + - It passes all its parameters down to the dispatchers, which will use them to trigger workers. + """ diff --git a/airflow/dags/ytdlp_ops_worker_per_url.py b/airflow/dags/ytdlp_ops_v02_worker_per_url_auth.py similarity index 63% rename from airflow/dags/ytdlp_ops_worker_per_url.py rename to airflow/dags/ytdlp_ops_v02_worker_per_url_auth.py index 050ef1a..1939821 100644 --- a/airflow/dags/ytdlp_ops_worker_per_url.py +++ b/airflow/dags/ytdlp_ops_v02_worker_per_url_auth.py @@ -6,10 +6,10 @@ # Distributed under terms of the MIT license. """ -DAG for processing a single YouTube URL passed via DAG run configuration. -This is the "Worker" part of a Sensor/Worker pattern. -This DAG has been refactored to use the TaskFlow API to implement worker affinity, -ensuring all tasks for a single URL run on the same machine. +DAG for authenticating a single YouTube URL passed via DAG run configuration. +This is the "Auth Worker" part of a separated Auth/Download pattern. +It acquires a token, saves the info.json, and pushes the token data to a +Redis queue for the download worker. """ from __future__ import annotations @@ -24,12 +24,15 @@ from airflow.operators.dummy import DummyOperator from airflow.utils.dates import days_ago from airflow.utils.task_group import TaskGroup from airflow.api.common.trigger_dag import trigger_dag +from copy import copy from datetime import datetime, timedelta +import concurrent.futures import json import logging import os import random import re +import redis import socket import time import traceback @@ -37,7 +40,7 @@ import uuid # Import utility functions and Thrift modules from utils.redis_utils import _get_redis_client -from pangramia.yt.common.ttypes import TokenUpdateMode +from pangramia.yt.common.ttypes import TokenUpdateMode, AirflowLogContext from pangramia.yt.exceptions.ttypes import PBServiceException, PBUserException from pangramia.yt.tokens_ops import YTTokenOpService from thrift.protocol import TBinaryProtocol @@ -47,20 +50,114 @@ from thrift.transport.TTransport import TTransportException # Configure logging logger = logging.getLogger(__name__) + +# --- Client Stats Helper --- + +def _update_client_stats(redis_client, clients_str: str, status: str, url: str, machine_id: str, dag_run_id: str): + """Updates success/failure statistics for a client type in Redis.""" + if not clients_str: + logger.warning("Cannot update client stats: 'clients' string is empty.") + return + + # Assumption: The service tries clients in the order provided. + # We attribute the result to the first client in the list. + primary_client = clients_str.split(',')[0].strip() + if not primary_client: + logger.warning("Cannot update client stats: could not determine primary client.") + return + + stats_key = "client_stats" + + try: + # Using a pipeline with WATCH for safe concurrent updates. + with redis_client.pipeline() as pipe: + pipe.watch(stats_key) + + current_stats_json = redis_client.hget(stats_key, primary_client) + stats = {} + if current_stats_json: + try: + stats = json.loads(current_stats_json) + except json.JSONDecodeError: + logger.warning(f"Could not parse existing stats for client '{primary_client}'. Resetting stats.") + stats = {} + + stats.setdefault('success_count', 0) + stats.setdefault('failure_count', 0) + + details = { + 'timestamp': time.time(), 'url': url, + 'machine_id': machine_id, 'dag_run_id': dag_run_id, + } + + if status == 'success': + stats['success_count'] += 1 + stats['latest_success'] = details + elif status == 'failure': + stats['failure_count'] += 1 + stats['latest_failure'] = details + + pipe.multi() + pipe.hset(stats_key, primary_client, json.dumps(stats)) + pipe.execute() + + logger.info(f"Successfully updated '{status}' stats for client '{primary_client}'.") + + except redis.exceptions.WatchError: + logger.warning(f"WatchError updating stats for client '{primary_client}'. Another process updated it. Skipping this update.") + except Exception as e: + logger.error(f"Failed to update client stats for '{primary_client}': {e}", exc_info=True) + + # Default settings from Airflow Variables or hardcoded fallbacks -DEFAULT_QUEUE_NAME = 'video_queue' +DEFAULT_QUEUE_NAME = 'queue2_auth' DEFAULT_REDIS_CONN_ID = 'redis_default' DEFAULT_TIMEOUT = 3600 DEFAULT_YT_AUTH_SERVICE_IP = Variable.get("YT_AUTH_SERVICE_IP", default_var="172.17.0.1") DEFAULT_YT_AUTH_SERVICE_PORT = Variable.get("YT_AUTH_SERVICE_PORT", default_var=9080) +DEFAULT_REQUEST_PARAMS = { + "context_reuse_policy": { + "enabled": True, + "max_age_seconds": 86400, + "reuse_visitor_id": True, + "reuse_cookies": True + }, + "token_generation_strategy": { + "youtubei_js": { + "generate_po_token": True, + "generate_gvs_token": True + } + }, + "ytdlp_params": { + "use_curl_prefetch": False, + "token_supplement_strategy": { + "youtubepot_bgutilhttp_extractor": { + "enabled": True + } + }, + "visitor_id_override": { + "enabled": True + } + }, + "session_params": { + "lang": "en-US", + "location": "US", + "deviceCategory": "MOBILE", + "user_agents": { + "youtubei_js": "Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)", + "yt_dlp": "Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)" + } + } +} + # The queue is set to a fallback here. The actual worker-specific queue is # assigned just-in-time by the task_instance_mutation_hook (see: airflow/config/custom_task_hooks.py), # which parses the target queue from the DAG run_id. DEFAULT_ARGS = { 'owner': 'airflow', 'retries': 0, - 'queue': 'queue-dl', # Fallback queue. Will be overridden by the policy hook. + 'queue': 'queue-auth', # Fallback queue. Will be overridden by the policy hook. } @@ -105,7 +202,15 @@ def _get_account_pool(params: dict) -> list: if pool_size_param is not None: is_prefix_mode = True pool_size = int(pool_size_param) - accounts = [f"{prefix}_{i:02d}" for i in range(1, pool_size + 1)] + + if params.get('prepend_client_to_account', True): + clients_str = params.get('clients', '') + primary_client = clients_str.split(',')[0].strip() if clients_str else 'unknown' + timestamp = datetime.now().strftime('%Y%m%d%H%M%S') + new_prefix = f"{prefix}_{timestamp}_{primary_client}" + accounts = [f"{new_prefix}_{i:02d}" for i in range(1, pool_size + 1)] + else: + accounts = [f"{prefix}_{i:02d}" for i in range(1, pool_size + 1)] else: accounts = [prefix] @@ -140,6 +245,61 @@ def _get_account_pool(params: dict) -> list: logger.info(f"Final active account pool with {len(accounts)} accounts.") return accounts +@task +def list_available_formats(token_data: dict, **context): + """ + Lists available formats for the given video using the info.json. + This is for debugging and informational purposes. + """ + import subprocess + import shlex + + info_json_path = token_data.get('info_json_path') + if not (info_json_path and os.path.exists(info_json_path)): + logger.warning(f"Cannot list formats: info.json path is missing or file does not exist ({info_json_path}).") + return [] + + try: + cmd = [ + 'yt-dlp', + '--verbose', + '--list-formats', + '--load-info-json', info_json_path, + ] + + copy_paste_cmd = ' '.join(shlex.quote(arg) for arg in cmd) + logger.info(f"Executing yt-dlp command to list formats: {copy_paste_cmd}") + + process = subprocess.run(cmd, capture_output=True, text=True, timeout=60) + + if process.stderr: + logger.info(f"yt-dlp --list-formats STDERR:\n{process.stderr}") + + if process.returncode != 0: + logger.error(f"yt-dlp --list-formats failed with exit code {process.returncode}") + + available_formats = [] + if process.stdout: + logger.info(f"--- Available Formats ---\n{process.stdout}\n--- End of Formats ---") + # Parse the output to get format IDs + lines = process.stdout.split('\n') + header_found = False + for line in lines: + if line.startswith('ID '): + header_found = True + continue + if header_found and line.strip() and line.strip()[0].isdigit(): + format_id = line.split()[0] + available_formats.append(format_id) + logger.info(f"Parsed available format IDs: {available_formats}") + + return available_formats + + except Exception as e: + logger.error(f"An error occurred while trying to list formats: {e}", exc_info=True) + return [] + + # ============================================================================= # TASK DEFINITIONS (TaskFlow API) # ============================================================================= @@ -178,12 +338,36 @@ def get_url_and_assign_account(**context): logger.info(f"Worker pinning verified. Task is correctly running on queue '{ti.queue}'.") # --- End Verification --- - # The URL is passed by the dispatcher DAG. + # The URL is passed by the dispatcher DAG via 'url_to_process'. + # For manual runs, we fall back to 'manual_url_to_process'. url_to_process = params.get('url_to_process') if not url_to_process: - raise AirflowException("'url_to_process' was not found in the DAG run configuration.") + url_to_process = params.get('manual_url_to_process') + if url_to_process: + logger.info(f"Using URL from manual run parameter: '{url_to_process}'") + + if not url_to_process: + raise AirflowException("No URL to process. For manual runs, please provide a URL in the 'manual_url_to_process' parameter.") logger.info(f"Received URL '{url_to_process}' to process.") + # Mark the URL as in-progress in Redis + try: + redis_conn_id = params.get('redis_conn_id', DEFAULT_REDIS_CONN_ID) + queue_name = params.get('queue_name', DEFAULT_QUEUE_NAME) + progress_queue = f"{queue_name}_progress" + client = _get_redis_client(redis_conn_id) + + progress_data = { + 'status': 'in_progress', + 'start_time': time.time(), + 'dag_run_id': context['dag_run'].run_id, + 'hostname': socket.gethostname(), + } + client.hset(progress_queue, url_to_process, json.dumps(progress_data)) + logger.info(f"Marked URL '{url_to_process}' as in-progress.") + except Exception as e: + logger.error(f"Could not mark URL as in-progress in Redis: {e}", exc_info=True) + # Account assignment logic is the same as before. account_id = random.choice(_get_account_pool(params)) logger.info(f"Selected account '{account_id}' for this run.") @@ -206,22 +390,100 @@ def get_token(initial_data: dict, **context): host, port, timeout = params['service_ip'], int(params['service_port']), int(params.get('timeout', DEFAULT_TIMEOUT)) machine_id = params.get('machine_id') or socket.gethostname() + clients = params.get('clients') + request_params_json = params.get('request_params_json', '{}') + assigned_proxy_url = params.get('assigned_proxy_url') + + # Pretty-print the request parameters for debugging + try: + pretty_request_params = json.dumps(json.loads(request_params_json), indent=2) + logger.info(f"\n--- Request Parameters ---\n{pretty_request_params}\n--- End of Request Parameters ---") + except (json.JSONDecodeError, TypeError): + logger.warning("Could not parse request_params_json. Using raw content.") + logger.info(f"\n--- Raw Request Parameters ---\n{request_params_json}\n--- End of Raw Request Parameters ---") + + # Construct Airflow log context to pass to the service + try: + from airflow.configuration import conf + remote_base = conf.get('logging', 'remote_base_log_folder') + log_path = ( + f"{remote_base}/dag_id={ti.dag_id}/run_id={ti.run_id}/" + f"task_id={ti.task_id}/attempt={ti.try_number}.log" + ) + airflow_log_context = AirflowLogContext( + logS3Path=log_path, + dagId=ti.dag_id, + runId=ti.run_id, + taskId=ti.task_id, + tryNumber=ti.try_number, + workerHostname=socket.gethostname(), + queue=ti.queue + ) + logger.info(f"Constructed Airflow log context for yt-ops service: {airflow_log_context}") + except Exception as e: + logger.warning(f"Could not construct full Airflow log context: {e}. Creating a basic one.") + airflow_log_context = AirflowLogContext( + dagId=ti.dag_id, + runId=ti.run_id, + taskId=ti.task_id, + tryNumber=ti.try_number, + workerHostname=socket.gethostname(), + queue=ti.queue + ) - logger.info(f"--- Attempting to get token for URL '{url}' with account '{account_id}' ---") + logger.info(f"--- Attempting to get token for URL '{url}' with account '{account_id}' (Clients: {clients}) ---") client, transport = None, None try: client, transport = _get_thrift_client(host, port, timeout) - token_data = client.getOrRefreshToken(accountId=account_id, updateType=TokenUpdateMode.AUTO, url=url, clients=params.get('clients'), machineId=machine_id) + token_data = client.getOrRefreshToken( + accountId=account_id, + updateType=TokenUpdateMode.AUTO, + url=url, + clients=clients, + machineId=machine_id, + airflowLogContext=airflow_log_context, + requestParamsJson=request_params_json, + assignedProxyUrl=assigned_proxy_url + ) + + # Log a compact summary of the Thrift response, omitting large/detailed fields. + summary_token_data = copy(token_data) + if hasattr(summary_token_data, 'infoJson') and summary_token_data.infoJson: + summary_token_data.infoJson = f"... ({len(summary_token_data.infoJson)} bytes) ..." + if hasattr(summary_token_data, 'cookiesBlob') and summary_token_data.cookiesBlob: + summary_token_data.cookiesBlob = f"... ({len(summary_token_data.cookiesBlob)} bytes) ..." + # These will be logged separately below. + if hasattr(summary_token_data, 'requestSummary'): + summary_token_data.requestSummary = "..." + if hasattr(summary_token_data, 'communicationLogPaths'): + summary_token_data.communicationLogPaths = "..." + logger.info(f"Thrift service response summary: {summary_token_data}") + + request_summary = getattr(token_data, 'requestSummary', None) + if request_summary: + # Prepending a newline for better separation in logs. + logger.info(f"\n--- Request Summary ---\n{request_summary}") + + communication_log_paths = getattr(token_data, 'communicationLogPaths', None) + if communication_log_paths: + logger.info("--- Communication Log Paths ---") + for path in communication_log_paths: + logger.info(f" - {path}") info_json = getattr(token_data, 'infoJson', None) if not (info_json and json.loads(info_json)): raise AirflowException("Service returned success but info.json was empty or invalid.") video_id = _extract_video_id(url) - os.makedirs(info_json_dir, exist_ok=True) - # Use a readable timestamp for a unique filename on each attempt. timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - info_json_path = os.path.join(info_json_dir, f"info_{video_id or 'unknown'}_{account_id}_{timestamp}.json") + + # Create a unique directory for this job's artifacts + job_dir_name = f"{timestamp}-{video_id or 'unknown'}" + job_dir_path = os.path.join(info_json_dir, job_dir_name) + os.makedirs(job_dir_path, exist_ok=True) + + info_json_filename = f"info_{video_id or 'unknown'}_{account_id}_{timestamp}.json" + info_json_path = os.path.join(job_dir_path, info_json_filename) with open(info_json_path, 'w', encoding='utf-8') as f: f.write(info_json) @@ -232,6 +494,7 @@ def get_token(initial_data: dict, **context): 'ytdlp_command': getattr(token_data, 'ytdlpCommand', None), 'successful_account_id': account_id, 'original_url': url, # Include original URL for fallback + 'clients': clients, # Pass clients string for accurate stats } except (PBServiceException, PBUserException, TTransportException) as e: error_context = getattr(e, 'context', None) @@ -297,8 +560,11 @@ def handle_bannable_error_branch(task_id_to_check: str, **context): return 'ban_account_and_prepare_for_retry' if policy in ['retry_on_connection_error', 'retry_without_ban']: return 'assign_new_account_for_direct_retry' - if policy == 'stop_loop': + if policy in ['stop_loop', 'stop_loop_on_auth_proceed_on_download_error']: return 'ban_and_report_immediately' + if policy == 'proceed_loop_under_manual_inspection': + logger.warning(f"Bannable error with 'proceed_loop_under_manual_inspection' policy. Reporting failure and continuing loop. MANUAL INTERVENTION IS LIKELY REQUIRED.") + return 'report_bannable_and_continue' # Any other error is considered fatal for this run. logger.error(f"Unhandled or non-retryable error '{error_code}' from '{task_id_to_check}'. Marking as fatal.") @@ -447,121 +713,43 @@ def ban_and_report_immediately(initial_data: dict, reason: str, **context): return initial_data # Pass data along if needed by reporting @task -def download_and_probe(token_data: dict, **context): +def push_auth_success_to_redis(initial_data: dict, token_data: dict, **context): """ - Uses the retrieved token data to download and probe the media file. - This version uses subprocess directly with an argument list for better security and clarity. + On successful token acquisition, pushes the complete token data to the + Redis queue for the download worker and records the auth success. """ - import subprocess - import shlex - - params = context['params'] - info_json_path = token_data.get('info_json_path') - proxy = token_data.get('socks_proxy') - original_url = token_data.get('original_url') - download_dir = Variable.get('DOWNLOADS_TEMP', '/opt/airflow/downloadfiles/video') - - download_format = params.get('download_format', 'ba[ext=m4a]/bestaudio/best') - output_template = params.get('output_path_template', "%(title)s [%(id)s].%(ext)s") - full_output_path = os.path.join(download_dir, output_template) - retry_on_probe_failure = params.get('retry_on_probe_failure', False) - - if not (info_json_path and os.path.exists(info_json_path)): - raise AirflowException(f"Error: info.json path is missing or file does not exist ({info_json_path}).") - - def run_yt_dlp(): - """Constructs and runs the yt-dlp command, returning the final filename.""" - cmd = [ - 'yt-dlp', - '--verbose', - '--load-info-json', info_json_path, - '-f', download_format, - '-o', full_output_path, - '--print', 'filename', - '--continue', - '--no-progress', - '--no-simulate', - '--no-write-info-json', - '--ignore-errors', - '--no-playlist', - ] - if proxy: - cmd.extend(['--proxy', proxy]) - - # Crucially, add the original URL to allow yt-dlp to refresh expired download links, - # which is the most common cause of HTTP 403 errors. - if original_url: - cmd.append(original_url) - - copy_paste_cmd = ' '.join(shlex.quote(arg) for arg in cmd) - logger.info(f"Executing yt-dlp command: {copy_paste_cmd}") - - process = subprocess.run(cmd, capture_output=True, text=True, timeout=3600) - - if process.returncode != 0: - logger.error(f"yt-dlp failed with exit code {process.returncode}") - logger.error(f"STDOUT: {process.stdout}") - logger.error(f"STDERR: {process.stderr}") - raise AirflowException("yt-dlp command failed.") - - # Get the last line of stdout, which should be the filename - final_filename = process.stdout.strip().split('\n')[-1] - if not (final_filename and os.path.exists(final_filename)): - logger.error(f"Download command finished but the output file does not exist: '{final_filename}'") - logger.error(f"Full STDOUT:\n{process.stdout}") - logger.error(f"Full STDERR:\n{process.stderr}") - raise AirflowException(f"Download failed or did not produce a file: {final_filename}") - - logger.info(f"SUCCESS: Download complete. Final file at: {final_filename}") - return final_filename - - def run_ffmpeg_probe(filename): - """Probes the given file with ffmpeg to check for corruption.""" - logger.info(f"Probing downloaded file: {filename}") - try: - subprocess.run(['ffmpeg', '-v', 'error', '-i', filename, '-f', 'null', '-'], check=True, capture_output=True, text=True) - logger.info("SUCCESS: Probe confirmed valid media file.") - except subprocess.CalledProcessError as e: - logger.error(f"ffmpeg probe check failed for '{filename}'. The file might be corrupt.") - logger.error(f"ffmpeg STDERR: {e.stderr}") - raise AirflowException("ffmpeg probe failed.") - - # --- Main Execution Logic --- - final_filename = run_yt_dlp() - try: - run_ffmpeg_probe(final_filename) - return final_filename - except AirflowException as e: - if "probe failed" in str(e) and retry_on_probe_failure: - logger.warning("Probe failed. Attempting one re-download...") - try: - # Rename the failed file to allow for a fresh download attempt - part_file = f"{final_filename}.part" - os.rename(final_filename, part_file) - logger.info(f"Renamed corrupted file to {part_file}") - except OSError as rename_err: - logger.error(f"Could not rename corrupted file: {rename_err}") - - final_filename_retry = run_yt_dlp() - run_ffmpeg_probe(final_filename_retry) - return final_filename_retry - else: - # Re-raise the original exception if no retry is attempted - raise - -@task -def mark_url_as_success(initial_data: dict, downloaded_file_path: str, token_data: dict, **context): - """Records the successful result in Redis.""" params = context['params'] url = initial_data['url_to_process'] - result_data = { - 'status': 'success', 'end_time': time.time(), 'url': url, - 'downloaded_file_path': downloaded_file_path, **token_data, - 'dag_run_id': context['dag_run'].run_id, - } + + # The download inbox queue is derived from the auth queue name. + dl_inbox_queue = f"{params['queue_name'].replace('_auth', '_dl')}_inbox" + auth_result_queue = f"{params['queue_name']}_result" + progress_queue = f"{params['queue_name']}_progress" + client = _get_redis_client(params['redis_conn_id']) - client.hset(f"{params['queue_name']}_result", url, json.dumps(result_data)) - logger.info(f"Stored success result for URL '{url}'.") + + payload = { + 'timestamp': time.time(), + 'dag_run_id': context['dag_run'].run_id, + **token_data + } + + result_data = { + 'status': 'success', + 'end_time': time.time(), + 'url': url, + 'dag_run_id': context['dag_run'].run_id, + 'token_data': token_data + } + + with client.pipeline() as pipe: + pipe.lpush(dl_inbox_queue, json.dumps(payload)) + pipe.hset(auth_result_queue, url, json.dumps(result_data)) + pipe.hdel(progress_queue, url) + pipe.execute() + + logger.info(f"Pushed successful auth data for URL '{url}' to '{dl_inbox_queue}'.") + logger.info(f"Stored success result for auth on URL '{url}' in '{auth_result_queue}'.") @task(trigger_rule='one_failed') def report_failure_and_continue(**context): @@ -606,15 +794,26 @@ def report_failure_and_continue(**context): try: client = _get_redis_client(params['redis_conn_id']) + + # Update client-specific stats + try: + machine_id = params.get('machine_id') or socket.gethostname() + _update_client_stats(client, params.get('clients', ''), 'failure', url, machine_id, context['dag_run'].run_id) + except Exception as e: + logger.error(f"Could not update client stats on failure: {e}", exc_info=True) + result_queue = f"{params['queue_name']}_result" fail_queue = f"{params['queue_name']}_fail" + progress_queue = f"{params['queue_name']}_progress" + with client.pipeline() as pipe: pipe.hset(result_queue, url, json.dumps(result_data)) pipe.hset(fail_queue, url, json.dumps(result_data)) + pipe.hdel(progress_queue, url) pipe.execute() - logger.info(f"Stored failure result for URL '{url}' in '{result_queue}' and '{fail_queue}'.") + logger.info(f"Stored failure result for URL '{url}' in '{result_queue}' and '{fail_queue}' and removed from progress queue.") except Exception as e: logger.error(f"Could not report failure to Redis: {e}", exc_info=True) @@ -648,6 +847,15 @@ def handle_fatal_error(**context): # Report failure to Redis so the URL can be reprocessed later try: + client = _get_redis_client(params['redis_conn_id']) + + # Update client-specific stats + try: + machine_id = params.get('machine_id') or socket.gethostname() + _update_client_stats(client, params.get('clients', ''), 'failure', url, machine_id, context['dag_run'].run_id) + except Exception as e: + logger.error(f"Could not update client stats on fatal error: {e}", exc_info=True) + result_data = { 'status': 'failed', 'end_time': time.time(), @@ -657,13 +865,15 @@ def handle_fatal_error(**context): 'error_message': 'Fatal non-retryable error occurred', 'error_details': error_details } - client = _get_redis_client(params['redis_conn_id']) result_queue = f"{params['queue_name']}_result" fail_queue = f"{params['queue_name']}_fail" + progress_queue = f"{params['queue_name']}_progress" + with client.pipeline() as pipe: pipe.hset(result_queue, url, json.dumps(result_data)) pipe.hset(fail_queue, url, json.dumps(result_data)) + pipe.hdel(progress_queue, url) pipe.execute() logger.info(f"Stored fatal error result for URL '{url}' in '{result_queue}' and '{fail_queue}' for later reprocessing.") @@ -683,6 +893,12 @@ def continue_processing_loop(**context): params = context['params'] dag_run = context['dag_run'] + # Do not continue the loop for manual runs of the worker DAG. + # A worker DAG triggered by the dispatcher will have a run_id starting with 'worker_run_'. + if not dag_run.run_id.startswith('worker_run_'): + logger.info(f"DAG run '{dag_run.run_id}' does not appear to be triggered by the dispatcher. Stopping processing loop.") + return + # Create a new unique run_id for the dispatcher. # Using a timestamp and UUID ensures the ID is unique and does not grow in length over time, # preventing database errors. @@ -697,7 +913,7 @@ def continue_processing_loop(**context): logger.info(f"Worker finished successfully. Triggering a new dispatcher ('{new_dispatcher_run_id}') to continue the loop.") trigger_dag( - dag_id='ytdlp_ops_dispatcher', + dag_id='ytdlp_ops_v02_dispatcher_auth', run_id=new_dispatcher_run_id, conf=conf_to_pass, replace_microseconds=False @@ -711,6 +927,7 @@ def handle_retry_failure_branch(task_id_to_check: str, **context): On retry, most errors are considered fatal for the URL, but not for the system. """ ti = context['task_instance'] + params = context['params'] error_details = ti.xcom_pull(task_ids=task_id_to_check, key='error_details') if not error_details: return 'handle_fatal_error' @@ -720,8 +937,8 @@ def handle_retry_failure_branch(task_id_to_check: str, **context): # Check if this is an age confirmation error - should not stop the loop if "Sign in to confirm your age" in error_message or "confirm your age" in error_message.lower(): - logger.info(f"Age confirmation error detected on retry from '{task_id_to_check}'. Reporting failure and continuing loop.") - return 'report_failure_and_continue' + logger.info(f"Age confirmation error detected on retry from '{task_id_to_check}'. This is a content restriction, not a bot detection issue.") + return 'handle_age_restriction_error' if error_code == 'TRANSPORT_ERROR': logger.error(f"Fatal Thrift connection error on retry from '{task_id_to_check}'.") @@ -729,6 +946,11 @@ def handle_retry_failure_branch(task_id_to_check: str, **context): is_bannable = error_code in ["BOT_DETECTED", "BOT_DETECTION_SIGN_IN_REQUIRED"] if is_bannable: + policy = params.get('on_bannable_failure', 'retry_with_new_account') + if policy == 'proceed_loop_under_manual_inspection': + logger.warning(f"Bannable error '{error_code}' on retry with 'proceed_loop_under_manual_inspection' policy. Reporting failure and continuing loop. MANUAL INTERVENTION IS LIKELY REQUIRED.") + return 'report_bannable_and_continue' + logger.warning(f"Bannable error '{error_code}' on retry. Banning account and reporting failure.") return 'ban_and_report_after_retry' @@ -745,11 +967,6 @@ def ban_and_report_after_retry(retry_data: dict, reason: str, **context): return retry_data -@task.branch(trigger_rule='one_failed') -def handle_download_failure_branch(**context): - """If download or probe fails, routes to the standard failure reporting.""" - logger.warning("Download or probe failed. Reporting failure and continuing loop.") - return 'report_failure_and_continue' @task(trigger_rule='one_success') @@ -768,7 +985,69 @@ def coalesce_token_data(get_token_result=None, retry_get_token_result=None): raise AirflowException("Could not find a successful token result from any attempt.") -@task(trigger_rule='one_failed') +@task +def report_bannable_and_continue(**context): + """ + Handles a bannable error by reporting it, but continues the loop + as per the 'proceed_loop_under_manual_inspection' policy. + """ + params = context['params'] + ti = context['task_instance'] + url = params.get('url_to_process', 'unknown') + + # Collect error details + error_details = {} + first_token_task_id = 'get_token' + retry_token_task_id = 'retry_get_token' + + first_token_error = ti.xcom_pull(task_ids=first_token_task_id, key='error_details') + retry_token_error = ti.xcom_pull(task_ids=retry_token_task_id, key='error_details') + + # Use the most recent error details + if retry_token_error: + error_details = retry_token_error + elif first_token_error: + error_details = first_token_error + + logger.error(f"Bannable error for URL '{url}'. Policy is to continue loop under manual supervision.") + + # Report failure to Redis + try: + client = _get_redis_client(params['redis_conn_id']) + + # Update client-specific stats + try: + machine_id = params.get('machine_id') or socket.gethostname() + _update_client_stats(client, params.get('clients', ''), 'failure', url, machine_id, context['dag_run'].run_id) + except Exception as e: + logger.error(f"Could not update client stats on bannable error: {e}", exc_info=True) + + result_data = { + 'status': 'failed', + 'end_time': time.time(), + 'url': url, + 'dag_run_id': context['dag_run'].run_id, + 'error': 'bannable_error_manual_override', + 'error_message': 'Bannable error occurred, but policy is set to continue loop under manual supervision.', + 'error_details': error_details + } + result_queue = f"{params['queue_name']}_result" + fail_queue = f"{params['queue_name']}_fail" + + progress_queue = f"{params['queue_name']}_progress" + + with client.pipeline() as pipe: + pipe.hset(result_queue, url, json.dumps(result_data)) + pipe.hset(fail_queue, url, json.dumps(result_data)) + pipe.hdel(progress_queue, url) + pipe.execute() + + logger.info(f"Stored bannable error for URL '{url}' in '{result_queue}' and '{fail_queue}'.") + except Exception as e: + logger.error(f"Could not report bannable error to Redis: {e}", exc_info=True) + + +@task def handle_age_restriction_error(**context): """ Handles age restriction errors specifically. These are content restrictions @@ -797,6 +1076,15 @@ def handle_age_restriction_error(**context): # Report failure to Redis so the URL can be marked as failed try: + client = _get_redis_client(params['redis_conn_id']) + + # Update client-specific stats + try: + machine_id = params.get('machine_id') or socket.gethostname() + _update_client_stats(client, params.get('clients', ''), 'failure', url, machine_id, context['dag_run'].run_id) + except Exception as e: + logger.error(f"Could not update client stats on age restriction error: {e}", exc_info=True) + result_data = { 'status': 'failed', 'end_time': time.time(), @@ -806,13 +1094,15 @@ def handle_age_restriction_error(**context): 'error_message': 'Content requires age confirmation', 'error_details': error_details } - client = _get_redis_client(params['redis_conn_id']) result_queue = f"{params['queue_name']}_result" fail_queue = f"{params['queue_name']}_fail" + progress_queue = f"{params['queue_name']}_progress" + with client.pipeline() as pipe: pipe.hset(result_queue, url, json.dumps(result_data)) pipe.hset(fail_queue, url, json.dumps(result_data)) + pipe.hdel(progress_queue, url) pipe.execute() logger.info(f"Stored age restriction error for URL '{url}' in '{result_queue}' and '{fail_queue}'.") @@ -826,7 +1116,7 @@ def handle_age_restriction_error(**context): # DAG Definition with TaskGroups # ============================================================================= with DAG( - dag_id='ytdlp_ops_worker_per_url', + dag_id='ytdlp_ops_v02_worker_per_url_auth', default_args=DEFAULT_ARGS, schedule=None, start_date=days_ago(1), @@ -834,6 +1124,7 @@ with DAG( tags=['ytdlp', 'worker'], doc_md=__doc__, render_template_as_native_obj=True, + is_paused_upon_creation=True, params={ 'queue_name': Param(DEFAULT_QUEUE_NAME, type="string"), 'redis_conn_id': Param(DEFAULT_REDIS_CONN_ID, type="string"), @@ -841,17 +1132,18 @@ with DAG( 'service_port': Param(DEFAULT_YT_AUTH_SERVICE_PORT, type="integer"), 'account_pool': Param('default_account', type="string"), 'account_pool_size': Param(None, type=["integer", "null"]), + 'prepend_client_to_account': Param(True, type="boolean", title="[Worker Param] Prepend Client to Account", description="If True, prepends client and timestamp to account names in prefix mode."), 'machine_id': Param(None, type=["string", "null"]), - 'clients': Param('web', type="string"), + 'assigned_proxy_url': Param(None, type=["string", "null"], title="[Worker Param] Assigned Proxy URL", description="If provided, forces the token service to use this specific proxy for the request."), + 'clients': Param('mweb', type="string", description="Comma-separated list of clients for token generation. e.g. mweb,tv,web_camoufox"), 'timeout': Param(DEFAULT_TIMEOUT, type="integer"), - 'download_format': Param('ba[ext=m4a]/bestaudio/best', type="string"), - 'output_path_template': Param("%(title)s [%(id)s].%(ext)s", type="string"), - 'on_bannable_failure': Param('retry_with_new_account', type="string", enum=['stop_loop', 'retry_with_new_account', 'retry_without_ban', 'retry_and_ban_account_only', 'retry_on_connection_error']), - 'retry_on_probe_failure': Param(False, type="boolean"), + 'on_bannable_failure': Param('stop_loop_on_auth_proceed_on_download_error', type="string", enum=['stop_loop', 'retry_with_new_account', 'retry_without_ban', 'retry_and_ban_account_only', 'retry_on_connection_error', 'proceed_loop_under_manual_inspection', 'stop_loop_on_auth_proceed_on_download_error']), + 'request_params_json': Param(json.dumps(DEFAULT_REQUEST_PARAMS), type="string", title="[Worker Param] Request Params JSON", description="JSON string with request parameters for the token service."), 'auto_create_new_accounts_on_exhaustion': Param(True, type="boolean"), - # Internal params passed from dispatcher - 'url_to_process': Param(None, type=["string", "null"]), - 'worker_queue': Param(None, type=["string", "null"]), + # --- Manual Run / Internal Parameters --- + 'manual_url_to_process': Param('iPwdia3gAnk', type=["string", "null"], title="[Manual Run] URL to Process", description="For manual runs, provide a single YouTube URL to process. This is ignored if triggered by the dispatcher."), + 'url_to_process': Param(None, type=["string", "null"], title="[Internal] URL from Dispatcher", description="This parameter is set by the dispatcher DAG and should not be used for manual runs."), + 'worker_queue': Param(None, type=["string", "null"], title="[Internal] Worker Queue", description="This parameter is set by the dispatcher DAG and should not be used for manual runs."), } ) as dag: initial_data = get_url_and_assign_account() @@ -863,6 +1155,7 @@ with DAG( report_failure_task = report_failure_and_continue() continue_loop_task = continue_processing_loop() age_restriction_task = handle_age_restriction_error() + report_bannable_and_continue_task = report_bannable_and_continue() # --- Task Group 1: Initial Attempt --- with TaskGroup("initial_attempt", tooltip="Initial token acquisition attempt") as initial_attempt_group: @@ -878,7 +1171,7 @@ with DAG( ) first_token_attempt >> initial_branch_task - initial_branch_task >> [fatal_error_task, ban_and_report_immediately_task, age_restriction_task] + initial_branch_task >> [fatal_error_task, ban_and_report_immediately_task, age_restriction_task, report_bannable_and_continue_task] # --- Task Group 2: Retry Logic --- with TaskGroup("retry_logic", tooltip="Retry logic with account management") as retry_logic_group: @@ -928,42 +1221,40 @@ with DAG( direct_retry_account_task >> coalesced_retry_data coalesced_retry_data >> retry_token_task retry_token_task >> retry_branch_task - retry_branch_task >> [fatal_error_task, report_failure_task, ban_after_retry_report_task, age_restriction_task] + retry_branch_task >> [fatal_error_task, report_failure_task, ban_after_retry_report_task, age_restriction_task, report_bannable_and_continue_task] ban_after_retry_report_task >> report_failure_task - # --- Task Group 3: Download and Processing --- - with TaskGroup("download_processing", tooltip="Download and media processing") as download_processing_group: - # Coalesce, download, and success tasks + # --- Task Group 3: Success/Continuation Logic --- + with TaskGroup("success_and_continuation", tooltip="Push to DL queue and continue loop") as success_group: token_data = coalesce_token_data( get_token_result=first_token_attempt, retry_get_token_result=retry_token_task ) - download_task = download_and_probe(token_data=token_data) - download_branch_task = handle_download_failure_branch.override(trigger_rule='one_failed')() - success_task = mark_url_as_success( + list_formats_task = list_available_formats(token_data=token_data) + success_task = push_auth_success_to_redis( initial_data=initial_data, - downloaded_file_path=download_task, token_data=token_data ) - # Internal dependencies within download group first_token_attempt >> token_data retry_token_task >> token_data - token_data >> download_task - download_task >> download_branch_task - download_branch_task >> report_failure_task - download_task >> success_task + token_data >> list_formats_task >> success_task success_task >> continue_loop_task # --- DAG Dependencies between TaskGroups --- # Initial attempt can lead to retry logic or direct failure - initial_branch_task >> [retry_logic_group, fatal_error_task, ban_and_report_immediately_task, age_restriction_task] + initial_branch_task >> [retry_logic_group, fatal_error_task, ban_and_report_immediately_task, age_restriction_task, report_bannable_and_continue_task] - # Retry logic leads to download processing on success or failure reporting on failure - retry_branch_task >> [download_processing_group, report_failure_task] + # A successful initial attempt bypasses retry and goes straight to the success group + initial_attempt_group >> success_group + + # Retry logic leads to success/continuation on success or failure reporting on failure + retry_branch_task >> [report_failure_task] # Handled within the group + retry_logic_group >> success_group # Ban and report immediately leads to failure reporting ban_and_report_immediately_task >> report_failure_task # Age restriction error leads to failure reporting and continues the loop age_restriction_task >> continue_loop_task + report_bannable_and_continue_task >> continue_loop_task diff --git a/airflow/dags/ytdlp_ops_v02_worker_per_url_dl.py b/airflow/dags/ytdlp_ops_v02_worker_per_url_dl.py new file mode 100644 index 0000000..68605bf --- /dev/null +++ b/airflow/dags/ytdlp_ops_v02_worker_per_url_dl.py @@ -0,0 +1,895 @@ +# -*- coding: utf-8 -*- +# vim:fenc=utf-8 +# +# Copyright © 2024 rl +# +# Distributed under terms of the MIT license. + +""" +DAG for downloading a single YouTube URL based on pre-fetched token data. +This is the "Download Worker" part of a separated Auth/Download pattern. +It receives a job payload with all necessary token info and handles only the +downloading and probing of media files. +""" + +from __future__ import annotations + +from airflow.decorators import task, task_group +from airflow.exceptions import AirflowException, AirflowSkipException +from airflow.models import Variable +from airflow.models.dag import DAG +from airflow.models.param import Param +from airflow.models.xcom_arg import XComArg +from airflow.operators.dummy import DummyOperator +from airflow.utils.dates import days_ago +from airflow.utils.task_group import TaskGroup +from airflow.api.common.trigger_dag import trigger_dag +from datetime import datetime, timedelta +import concurrent.futures +import json +import logging +import os +import random +import re +import redis +import socket +import time +import traceback +import uuid + +# Import utility functions and Thrift modules +from utils.redis_utils import _get_redis_client +from pangramia.yt.common.ttypes import TokenUpdateMode, AirflowLogContext +from pangramia.yt.exceptions.ttypes import PBServiceException, PBUserException +from pangramia.yt.tokens_ops import YTTokenOpService +from thrift.protocol import TBinaryProtocol +from thrift.transport import TSocket, TTransport +from thrift.transport.TTransport import TTransportException + +# Configure logging +logger = logging.getLogger(__name__) + + +# --- Client Stats Helper --- + +def _update_client_stats(redis_client, clients_str: str, status: str, url: str, machine_id: str, dag_run_id: str): + """Updates success/failure statistics for a client type in Redis.""" + if not clients_str: + logger.warning("Cannot update client stats: 'clients' string is empty.") + return + + # Assumption: The service tries clients in the order provided. + # We attribute the result to the first client in the list. + primary_client = clients_str.split(',')[0].strip() + if not primary_client: + logger.warning("Cannot update client stats: could not determine primary client.") + return + + stats_key = "client_stats" + + try: + # Using a pipeline with WATCH for safe concurrent updates. + with redis_client.pipeline() as pipe: + pipe.watch(stats_key) + + current_stats_json = redis_client.hget(stats_key, primary_client) + stats = {} + if current_stats_json: + try: + stats = json.loads(current_stats_json) + except json.JSONDecodeError: + logger.warning(f"Could not parse existing stats for client '{primary_client}'. Resetting stats.") + stats = {} + + stats.setdefault('success_count', 0) + stats.setdefault('failure_count', 0) + + details = { + 'timestamp': time.time(), 'url': url, + 'machine_id': machine_id, 'dag_run_id': dag_run_id, + } + + if status == 'success': + stats['success_count'] += 1 + stats['latest_success'] = details + elif status == 'failure': + stats['failure_count'] += 1 + stats['latest_failure'] = details + + pipe.multi() + pipe.hset(stats_key, primary_client, json.dumps(stats)) + pipe.execute() + + logger.info(f"Successfully updated '{status}' stats for client '{primary_client}'.") + + except redis.exceptions.WatchError: + logger.warning(f"WatchError updating stats for client '{primary_client}'. Another process updated it. Skipping this update.") + except Exception as e: + logger.error(f"Failed to update client stats for '{primary_client}': {e}", exc_info=True) + + +# Default settings from Airflow Variables or hardcoded fallbacks +DEFAULT_QUEUE_NAME = 'queue2_dl' +DEFAULT_REDIS_CONN_ID = 'redis_default' +DEFAULT_TIMEOUT = 3600 +DEFAULT_YT_AUTH_SERVICE_IP = Variable.get("YT_AUTH_SERVICE_IP", default_var="172.17.0.1") +DEFAULT_YT_AUTH_SERVICE_PORT = Variable.get("YT_AUTH_SERVICE_PORT", default_var=9080) + +# The queue is set to a fallback here. The actual worker-specific queue is +# assigned just-in-time by the task_instance_mutation_hook (see: airflow/config/custom_task_hooks.py), +# which parses the target queue from the DAG run_id. +DEFAULT_ARGS = { + 'owner': 'airflow', + 'retries': 0, + 'queue': 'queue-dl', # Fallback queue. Will be overridden by the policy hook. +} + + +# --- Helper Functions --- + +def _extract_video_id(url): + """Extracts YouTube video ID from URL.""" + if not url or not isinstance(url, str): + return None + patterns = [r'v=([a-zA-Z0-9_-]{11})', r'youtu\.be/([a-zA-Z0-9_-]{11})'] + for pattern in patterns: + match = re.search(pattern, url) + if match: + return match.group(1) + return None + +# ============================================================================= +# TASK DEFINITIONS (TaskFlow API) +# ============================================================================= + +@task +def get_download_job_from_conf(**context): + """ + Gets the download job details (which includes token data) from the DAG run conf. + This is the first task in the download worker DAG. + """ + params = context['params'] + ti = context['task_instance'] + + # --- Worker Pinning Verification --- + # This is a safeguard against a known Airflow issue where clearing a task + # can cause the task_instance_mutation_hook to be skipped, breaking pinning. + # See: https://github.com/apache/airflow/issues/20143 + expected_queue = None + if ti.run_id and '_q_' in ti.run_id: + expected_queue = ti.run_id.split('_q_')[-1] + + if not expected_queue: + # Fallback to conf if run_id parsing fails for some reason + expected_queue = params.get('worker_queue') + + if expected_queue and ti.queue != expected_queue: + error_msg = ( + f"WORKER PINNING FAILURE: Task is running on queue '{ti.queue}' but was expected on '{expected_queue}'. " + "This usually happens after manually clearing a task, which is not the recommended recovery method for this DAG. " + "To recover a failed URL, let the DAG run fail, use the 'ytdlp_mgmt_queues' DAG to requeue the URL, " + "and use the 'ytdlp_ops_orchestrator' to start a new worker loop if needed." + ) + logger.error(error_msg) + raise AirflowException(error_msg) + elif expected_queue: + logger.info(f"Worker pinning verified. Task is correctly running on queue '{ti.queue}'.") + # --- End Verification --- + + # The job data is passed by the dispatcher DAG via 'job_data'. + job_data = params.get('job_data') + if not job_data: + raise AirflowException("No job_data provided in DAG run configuration.") + + # If job_data is a string, parse it as JSON + if isinstance(job_data, str): + try: + job_data = json.loads(job_data) + except json.JSONDecodeError: + raise AirflowException(f"Could not decode job_data JSON: {job_data}") + + url_to_process = job_data.get('original_url') + if not url_to_process: + raise AirflowException("'original_url' not found in job_data.") + + logger.info(f"Received job for URL '{url_to_process}'.") + + # Mark the URL as in-progress in Redis + try: + redis_conn_id = params.get('redis_conn_id', DEFAULT_REDIS_CONN_ID) + queue_name = params.get('queue_name', DEFAULT_QUEUE_NAME) + progress_queue = f"{queue_name}_progress" + client = _get_redis_client(redis_conn_id) + + progress_data = { + 'status': 'in_progress', + 'start_time': time.time(), + 'dag_run_id': context['dag_run'].run_id, + 'hostname': socket.gethostname(), + } + client.hset(progress_queue, url_to_process, json.dumps(progress_data)) + logger.info(f"Marked URL '{url_to_process}' as in-progress.") + except Exception as e: + logger.error(f"Could not mark URL as in-progress in Redis: {e}", exc_info=True) + + return job_data + +@task +def list_available_formats(token_data: dict, **context): + """ + Lists available formats for the given video using the info.json. + This is for debugging and informational purposes. + """ + import subprocess + import shlex + + info_json_path = token_data.get('info_json_path') + if not (info_json_path and os.path.exists(info_json_path)): + logger.warning(f"Cannot list formats: info.json path is missing or file does not exist ({info_json_path}).") + return [] + + try: + cmd = [ + 'yt-dlp', + '--verbose', + '--list-formats', + '--load-info-json', info_json_path, + ] + + copy_paste_cmd = ' '.join(shlex.quote(arg) for arg in cmd) + logger.info(f"Executing yt-dlp command to list formats: {copy_paste_cmd}") + + process = subprocess.run(cmd, capture_output=True, text=True, timeout=60) + + if process.stderr: + logger.info(f"yt-dlp --list-formats STDERR:\n{process.stderr}") + + if process.returncode != 0: + logger.error(f"yt-dlp --list-formats failed with exit code {process.returncode}") + + available_formats = [] + if process.stdout: + logger.info(f"--- Available Formats ---\n{process.stdout}\n--- End of Formats ---") + # Parse the output to get format IDs + lines = process.stdout.split('\n') + header_found = False + for line in lines: + if line.startswith('ID '): + header_found = True + continue + if header_found and line.strip() and line.strip()[0].isdigit(): + format_id = line.split()[0] + available_formats.append(format_id) + logger.info(f"Parsed available format IDs: {available_formats}") + + return available_formats + + except Exception as e: + logger.error(f"An error occurred while trying to list formats: {e}", exc_info=True) + return [] + + +@task +def download_and_probe(token_data: dict, available_formats: list[str], **context): + """ + Uses retrieved token data to download and probe media files. + Supports parallel downloading of specific, comma-separated format IDs. + If probing fails, retries downloading only the failed files. + """ + import subprocess + import shlex + import concurrent.futures + + params = context['params'] + info_json_path = token_data.get('info_json_path') + proxy = token_data.get('socks_proxy') + original_url = token_data.get('original_url') + + if not (info_json_path and os.path.exists(info_json_path)): + raise AirflowException(f"Error: info.json path is missing or file does not exist ({info_json_path}).") + + download_dir = os.path.dirname(info_json_path) + + format_preset = params.get('download_format_preset', 'best_audio') + if format_preset == 'custom': + download_format = params.get('download_format_custom') + if not download_format: + raise AirflowException("Format preset is 'custom' but no custom format string was provided.") + elif format_preset == 'best_audio': + download_format = 'ba[ext=m4a]/bestaudio/best' + elif format_preset == 'formats_0': + download_format = '18,140' + elif format_preset == 'formats_2': + download_format = '18,140,299/298/137/136/135/134/133' + elif format_preset == 'formats_3': + download_format = '18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318' + else: + download_format = 'ba[ext=m4a]/bestaudio/best' + + output_template = params.get('output_path_template', "%(title)s [%(id)s].f%(format_id)s.%(ext)s") + full_output_path = os.path.join(download_dir, output_template) + retry_on_probe_failure = params.get('retry_on_probe_failure', False) + + def run_yt_dlp_command(format_selector: str): + """Constructs and runs a yt-dlp command, returning a list of final filenames.""" + cmd = [ + 'yt-dlp', '--verbose', '--print-traffic', '--load-info-json', info_json_path, + '-f', format_selector, '-o', full_output_path, + '--print', 'filename', '--continue', '--no-progress', '--no-simulate', + '--no-write-info-json', '--ignore-errors', '--no-playlist', + ] + + if params.get('fragment_retries'): + cmd.extend(['--fragment-retries', str(params['fragment_retries'])]) + if params.get('limit_rate'): + cmd.extend(['--limit-rate', params['limit_rate']]) + if params.get('socket_timeout'): + cmd.extend(['--socket-timeout', str(params['socket_timeout'])]) + if params.get('min_sleep_interval'): + cmd.extend(['--min-sleep-interval', str(params['min_sleep_interval'])]) + if params.get('max_sleep_interval'): + cmd.extend(['--max-sleep-interval', str(params['max_sleep_interval'])]) + if params.get('yt_dlp_test_mode'): + cmd.append('--test') + + downloader = params.get('downloader', 'default') + if proxy and not (downloader == 'aria2c' and proxy.startswith('socks5://')): + cmd.extend(['--proxy', proxy]) + + gost_process = None + try: + if downloader == 'aria2c': + cmd.extend(['--downloader', 'aria2c']) + downloader_args = params.get('downloader_args_aria2c') + if proxy and proxy.startswith('socks5://'): + import socket + from contextlib import closing + def find_free_port(): + with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: + s.bind(('', 0)) + s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + return s.getsockname()[1] + local_port = find_free_port() + http_proxy = f"http://127.0.0.1:{local_port}" + logger.info(f"Starting gost for format '{format_selector}' to forward {proxy} to {http_proxy}") + gost_cmd = ['gost', '-L', f'http://127.0.0.1:{local_port}', '-F', proxy] + gost_process = subprocess.Popen(gost_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + time.sleep(1) + if gost_process.poll() is not None: + stdout, stderr = gost_process.communicate() + logger.error(f"gost failed to start. Exit: {gost_process.returncode}. Stdout: {stdout.decode()}. Stderr: {stderr.decode()}") + raise AirflowException("gost proxy tunnel failed to start.") + user_args = downloader_args[len('aria2c:'):] if downloader_args and downloader_args.startswith('aria2c:') else (downloader_args or "") + final_args_str = f'aria2c:{user_args.strip()} --http-proxy={http_proxy}' + cmd.extend(['--downloader-args', final_args_str]) + elif downloader_args: + cmd.extend(['--downloader-args', downloader_args]) + + extra_args = params.get('yt_dlp_extra_args') + if extra_args: + cmd.extend(shlex.split(extra_args)) + if original_url: + cmd.append(original_url) + + copy_paste_cmd = ' '.join(shlex.quote(arg) for arg in cmd) + logger.info(f"Executing yt-dlp command for format '{format_selector}': {copy_paste_cmd}") + process = subprocess.run(cmd, capture_output=True, text=True, timeout=3600) + + if process.stdout: + logger.info(f"yt-dlp STDOUT for format '{format_selector}':\n{process.stdout}") + if process.stderr: + # yt-dlp often prints progress and informational messages to stderr + logger.info(f"yt-dlp STDERR for format '{format_selector}':\n{process.stderr}") + + if process.returncode != 0: + logger.error(f"yt-dlp failed for format '{format_selector}' with exit code {process.returncode}") + # STDOUT and STDERR are already logged above. + raise AirflowException(f"yt-dlp command failed for format '{format_selector}'.") + + # In test mode, files are not created, so we only check that yt-dlp returned filenames. + # Otherwise, we verify that the files actually exist on disk. + output_files = [f for f in process.stdout.strip().split('\n') if f] + if not params.get('yt_dlp_test_mode'): + output_files = [f for f in output_files if os.path.exists(f)] + + if not output_files: + log_msg = (f"Test run for format '{format_selector}' did not produce any filenames." + if params.get('yt_dlp_test_mode') else + f"Download for format '{format_selector}' finished but no output files exist.") + exc_msg = (f"Test run for format '{format_selector}' did not produce any filenames." + if params.get('yt_dlp_test_mode') else + f"Download for format '{format_selector}' did not produce a file.") + + logger.error(log_msg) + logger.error(f"Full STDOUT:\n{process.stdout}") + logger.error(f"Full STDERR:\n{process.stderr}") + raise AirflowException(exc_msg) + + log_prefix = "SUCCESS (Test Mode):" if params.get('yt_dlp_test_mode') else "SUCCESS:" + logger.info(f"{log_prefix} Command for format '{format_selector}' complete. Files: {output_files}") + return output_files + finally: + if gost_process: + logger.info(f"Terminating gost process (PID: {gost_process.pid}) for format '{format_selector}'.") + gost_process.terminate() + try: + gost_process.wait(timeout=5) + except subprocess.TimeoutExpired: + gost_process.kill() + gost_process.wait() + + def run_ffmpeg_probe(filename): + """Probes a file with ffmpeg to check for corruption.""" + logger.info(f"Probing downloaded file: {filename}") + try: + subprocess.run(['ffmpeg', '-v', 'error', '-i', filename, '-f', 'null', '-'], check=True, capture_output=True, text=True) + logger.info(f"SUCCESS: Probe confirmed valid media file: {filename}") + except subprocess.CalledProcessError as e: + logger.error(f"ffmpeg probe failed for '{filename}'. File may be corrupt.") + logger.error(f"ffmpeg STDERR: {e.stderr}") + raise AirflowException(f"ffmpeg probe failed for {filename}.") + + def _download_and_probe_formats(formats_to_process: list[str] | str): + """ + Helper to download a list of format IDs (or a single complex selector) and probe the results. + Returns a tuple of (successful_files, failed_probe_files). + """ + all_downloaded_files = [] + delay_between_formats = params.get('delay_between_formats_s', 0) + + if isinstance(formats_to_process, list) and formats_to_process: + logger.info(f"Downloading {len(formats_to_process)} format(s) sequentially: {formats_to_process}") + for i, fid in enumerate(formats_to_process): + all_downloaded_files.extend(run_yt_dlp_command(fid)) + if delay_between_formats > 0 and i < len(formats_to_process) - 1: + logger.info(f"Waiting {delay_between_formats}s before next format download...") + time.sleep(delay_between_formats) + + elif isinstance(formats_to_process, str): + logger.info(f"Using complex format selector '{formats_to_process}'. Running as a single command.") + all_downloaded_files = run_yt_dlp_command(formats_to_process) + + if not all_downloaded_files: + logger.warning("Download process completed but produced no files.") + return [], [] + + if params.get('yt_dlp_test_mode'): + logger.info("Test mode is enabled. Skipping probe of output files.") + return all_downloaded_files, [] + + if params.get('skip_probe'): + logger.info("Skipping probe of output files as per configuration.") + return all_downloaded_files, [] + + successful_probes, failed_probes = [], [] + logger.info(f"Probing {len(all_downloaded_files)} downloaded file(s) sequentially...") + for filename in all_downloaded_files: + try: + run_ffmpeg_probe(filename) + successful_probes.append(filename) + except Exception: + failed_probes.append(filename) + + return successful_probes, failed_probes + + # --- Main Execution Logic --- + with open(info_json_path, 'r', encoding='utf-8') as f: + info = json.load(f) + + # Split the format string by commas to get a list of individual format selectors. + # This enables parallel downloads of different formats or format groups. + # For example, '18,140,299/298' becomes ['18', '140', '299/298'], + # and each item will be downloaded in a separate yt-dlp process. + if download_format and isinstance(download_format, str): + formats_to_download_initial = [selector.strip() for selector in download_format.split(',') if selector.strip()] + else: + # Fallback for safety, though download_format should always be a string. + formats_to_download_initial = [] + + if not formats_to_download_initial: + raise AirflowException("No valid download format selectors were found after parsing.") + + # --- Filter requested formats against available formats --- + final_formats_to_download = [] + if not available_formats: + logger.warning("List of available formats is empty. Will attempt to download all requested formats without validation.") + final_formats_to_download = formats_to_download_initial + else: + for selector in formats_to_download_initial: + # A selector can be '140' or '299/298/137' + individual_ids = re.split(r'[/+]', selector) + if any(fid in available_formats for fid in individual_ids): + final_formats_to_download.append(selector) + else: + logger.warning(f"Requested format selector '{selector}' contains no available formats. Skipping.") + + if not final_formats_to_download: + raise AirflowException("None of the requested formats are available for this video.") + + # --- Initial Download and Probe --- + successful_files, failed_files = _download_and_probe_formats(final_formats_to_download) + + if params.get('yt_dlp_test_mode'): + logger.info(f"Test mode: yt-dlp returned {len(successful_files)} filenames. Skipping probe failure checks.") + if not successful_files: + raise AirflowException("Test run did not produce any filenames.") + return successful_files + + if not failed_files: + if not successful_files: + raise AirflowException("Download and probe process completed but produced no valid files.") + return successful_files + + # --- Handle Probe Failures and Retry --- + if not retry_on_probe_failure: + raise AirflowException(f"Probe failed for {len(failed_files)} file(s) and retry is disabled: {failed_files}") + + logger.warning(f"Probe failed for {len(failed_files)} file(s). Attempting one re-download for failed files...") + + format_ids_to_retry = [] + # Since each download is now for a specific selector and the output template + # includes the format_id, we can always attempt to extract the format_id + # from the failed filename for a targeted retry. + for f in failed_files: + match = re.search(r'\.f([\d]+)\.', f) + if match: + format_ids_to_retry.append(match.group(1)) + else: + logger.error(f"Could not extract format_id from failed file '{f}'. Cannot retry this specific file.") + formats_to_download_retry = format_ids_to_retry + + if not formats_to_download_retry: + raise AirflowException("Probe failed, but could not determine which formats to retry.") + + # Rename failed files to allow for a fresh download attempt + for f in failed_files: + try: + failed_path = f"{f}.probe_failed_{int(time.time())}" + os.rename(f, failed_path) + logger.info(f"Renamed corrupted file to {failed_path}") + except OSError as rename_err: + logger.error(f"Could not rename corrupted file '{f}': {rename_err}") + + # --- Retry Download and Probe --- + retried_successful_files, retried_failed_files = _download_and_probe_formats(formats_to_download_retry) + + if retried_failed_files: + logger.error(f"Probe failed again for {len(retried_failed_files)} file(s) after retry: {retried_failed_files}") + + final_success_list = successful_files + retried_successful_files + if not final_success_list: + raise AirflowException("All files failed to download or probe correctly, even after retry.") + + logger.info(f"Retry complete. Final success count: {len(final_success_list)} file(s).") + + if params.get('yt_dlp_cleanup_mode', True): + logger.info(f"Cleanup mode is enabled. Creating .empty files and deleting originals for {len(final_success_list)} files.") + for f in final_success_list: + try: + empty_file_path = f"{f}.empty" + with open(empty_file_path, 'w') as fp: + pass # create empty file + logger.info(f"Created empty file: {empty_file_path}") + os.remove(f) + logger.info(f"Deleted original file: {f}") + except Exception as e: + logger.error(f"Error during cleanup for file {f}: {e}", exc_info=True) + # Do not fail the task for a cleanup error, just log it. + + return final_success_list + +@task +def mark_url_as_success(job_data: dict, downloaded_file_paths: list, **context): + """Records the successful download result in Redis.""" + params = context['params'] + url = job_data['original_url'] + result_data = { + 'status': 'success', 'end_time': time.time(), 'url': url, + 'downloaded_file_paths': downloaded_file_paths, **job_data, + 'dag_run_id': context['dag_run'].run_id, + } + client = _get_redis_client(params['redis_conn_id']) + + # Update activity counters + try: + proxy_url = job_data.get('socks_proxy') + account_id = job_data.get('successful_account_id') + now = time.time() + # Use a unique member to prevent collisions, e.g., dag_run_id + member = context['dag_run'].run_id + + if proxy_url: + proxy_key = f"activity:per_proxy:{proxy_url}" + client.zadd(proxy_key, {member: now}) + client.expire(proxy_key, 3600 * 2) # Expire after 2 hours + if account_id: + account_key = f"activity:per_account:{account_id}" + client.zadd(account_key, {member: now}) + client.expire(account_key, 3600 * 2) # Expire after 2 hours + except Exception as e: + logger.error(f"Could not update activity counters: {e}", exc_info=True) + + # Update client-specific stats + try: + machine_id = params.get('machine_id') or socket.gethostname() + clients_str = job_data.get('clients', params.get('clients', '')) # Prefer clients from job, fallback to params + _update_client_stats(client, clients_str, 'success', url, machine_id, context['dag_run'].run_id) + except Exception as e: + logger.error(f"Could not update client stats on success: {e}", exc_info=True) + + progress_queue = f"{params['queue_name']}_progress" + result_queue = f"{params['queue_name']}_result" + + with client.pipeline() as pipe: + pipe.hset(result_queue, url, json.dumps(result_data)) + pipe.hdel(progress_queue, url) + pipe.execute() + + logger.info(f"Stored success result for URL '{url}' and removed from progress queue.") + +@task(trigger_rule='one_failed') +def report_failure_and_continue(**context): + """ + Handles a failed download attempt by recording an error report to Redis. + """ + params = context['params'] + ti = context['task_instance'] + + job_data = params.get('job_data', {}) + if isinstance(job_data, str): + try: + job_data = json.loads(job_data) + except json.JSONDecodeError: + job_data = {} + url = job_data.get('original_url', 'unknown') + + # No token errors to collect, just report a generic download failure. + error_details = {'error_message': 'Download or probe stage failed.'} + + logger.error(f"A failure occurred while processing URL '{url}'. Reporting to Redis.") + + result_data = { + 'status': 'failed', + 'end_time': time.time(), + 'url': url, + 'dag_run_id': context['dag_run'].run_id, + 'error_details': error_details + } + + try: + client = _get_redis_client(params['redis_conn_id']) + + # Update client-specific stats + try: + machine_id = params.get('machine_id') or socket.gethostname() + clients_str = job_data.get('clients', params.get('clients', '')) # Prefer clients from job, fallback to params + _update_client_stats(client, clients_str, 'failure', url, machine_id, context['dag_run'].run_id) + except Exception as e: + logger.error(f"Could not update client stats on failure: {e}", exc_info=True) + + result_queue = f"{params['queue_name']}_result" + fail_queue = f"{params['queue_name']}_fail" + + progress_queue = f"{params['queue_name']}_progress" + + with client.pipeline() as pipe: + pipe.hset(result_queue, url, json.dumps(result_data)) + pipe.hset(fail_queue, url, json.dumps(result_data)) + pipe.hdel(progress_queue, url) + pipe.execute() + + logger.info(f"Stored failure result for URL '{url}' in '{result_queue}' and '{fail_queue}' and removed from progress queue.") + except Exception as e: + logger.error(f"Could not report failure to Redis: {e}", exc_info=True) + + +@task(trigger_rule='one_failed') +def handle_fatal_error(**context): + """ + Handles fatal, non-retryable errors (e.g., infrastructure issues). + This task reports the failure to Redis to ensure failed URLs are queued + for later reprocessing, but allows the processing loop to continue. + """ + params = context['params'] + ti = context['task_instance'] + + job_data = params.get('job_data', {}) + if isinstance(job_data, str): + try: + job_data = json.loads(job_data) + except json.JSONDecodeError: + job_data = {} + url = job_data.get('original_url', 'unknown') + + error_details = {'error_message': 'Fatal error during download stage.'} + + logger.error(f"A fatal, non-retryable error occurred for URL '{url}'. See previous task logs for details.") + + # Report failure to Redis so the URL can be reprocessed later + try: + client = _get_redis_client(params['redis_conn_id']) + + # Update client-specific stats + try: + machine_id = params.get('machine_id') or socket.gethostname() + clients_str = job_data.get('clients', params.get('clients', '')) # Prefer clients from job, fallback to params + _update_client_stats(client, clients_str, 'failure', url, machine_id, context['dag_run'].run_id) + except Exception as e: + logger.error(f"Could not update client stats on fatal error: {e}", exc_info=True) + + result_data = { + 'status': 'failed', + 'end_time': time.time(), + 'url': url, + 'dag_run_id': context['dag_run'].run_id, + 'error': 'fatal_error', + 'error_message': 'Fatal non-retryable error occurred', + 'error_details': error_details + } + result_queue = f"{params['queue_name']}_result" + fail_queue = f"{params['queue_name']}_fail" + + progress_queue = f"{params['queue_name']}_progress" + + with client.pipeline() as pipe: + pipe.hset(result_queue, url, json.dumps(result_data)) + pipe.hset(fail_queue, url, json.dumps(result_data)) + pipe.hdel(progress_queue, url) + pipe.execute() + + logger.info(f"Stored fatal error result for URL '{url}' in '{result_queue}' and '{fail_queue}' for later reprocessing.") + except Exception as e: + logger.error(f"Could not report fatal error to Redis: {e}", exc_info=True) + + # Do not fail the DAG run. Allow the processing loop to continue. + logger.warning("A fatal error was handled, but the DAG is configured to continue the processing loop.") + + +@task(trigger_rule='one_success') +def continue_processing_loop(**context): + """ + After a successful run, triggers a new dispatcher to continue the processing loop, + effectively asking for the next URL to be processed. + """ + params = context['params'] + dag_run = context['dag_run'] + + # Do not continue the loop for manual runs of the worker DAG. + # A worker DAG triggered by the dispatcher will have a run_id starting with 'worker_run_'. + if not dag_run.run_id.startswith('worker_run_'): + logger.info(f"DAG run '{dag_run.run_id}' does not appear to be triggered by the dispatcher. Stopping processing loop.") + return + + # Create a new unique run_id for the dispatcher. + # Using a timestamp and UUID ensures the ID is unique and does not grow in length over time, + # preventing database errors. + new_dispatcher_run_id = f"retriggered_by_worker_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{str(uuid.uuid4())[:8]}" + + # Pass all original parameters from the orchestrator through to the new dispatcher run. + conf_to_pass = {k: v for k, v in params.items() if v is not None} + + # The new dispatcher will pull its own job data and determine its own queue, so we don't pass these. + conf_to_pass.pop('job_data', None) + conf_to_pass.pop('worker_queue', None) + + logger.info(f"Worker finished successfully. Triggering a new dispatcher ('{new_dispatcher_run_id}') to continue the loop.") + trigger_dag( + dag_id='ytdlp_ops_v02_dispatcher_dl', + run_id=new_dispatcher_run_id, + conf=conf_to_pass, + replace_microseconds=False + ) + + +@task.branch(trigger_rule='one_failed') +def handle_download_failure_branch(**context): + """If download or probe fails, routes to the standard failure reporting.""" + logger.warning("Download or probe failed. Reporting failure and continuing loop.") + return 'report_failure_and_continue' + + + + +# ============================================================================= +# DAG Definition with TaskGroups +# ============================================================================= +with DAG( + dag_id='ytdlp_ops_v02_worker_per_url_dl', + default_args=DEFAULT_ARGS, + schedule=None, + start_date=days_ago(1), + catchup=False, + tags=['ytdlp', 'worker'], + doc_md=__doc__, + render_template_as_native_obj=True, + is_paused_upon_creation=True, + params={ + 'queue_name': Param(DEFAULT_QUEUE_NAME, type="string"), + 'redis_conn_id': Param(DEFAULT_REDIS_CONN_ID, type="string"), + 'machine_id': Param(None, type=["string", "null"]), + 'clients': Param('mweb,web_camoufox,tv', type="string", description="Comma-separated list of clients for token generation. e.g. mweb,tv,web_camoufox"), + 'output_path_template': Param("%(title)s [%(id)s].f%(format_id)s.%(ext)s", type="string", title="[Worker Param] Output Path Template", description="Output filename template for yt-dlp. It is highly recommended to include `%(format_id)s` to prevent filename collisions when downloading multiple formats."), + 'retry_on_probe_failure': Param(False, type="boolean"), + 'skip_probe': Param(False, type="boolean", title="[Worker Param] Skip Probe", description="If True, skips the ffmpeg probe of downloaded files."), + 'yt_dlp_cleanup_mode': Param(True, type="boolean", title="[Worker Param] yt-dlp Cleanup Mode", description="If True, creates a .empty file and deletes the original media file after successful download and probe."), + 'delay_between_formats_s': Param(15, type="integer", title="[Worker Param] Delay Between Formats (s)", description="Delay in seconds between downloading each format when multiple formats are specified. A 22s wait may be effective for batch downloads, while 6-12s may suffice if cookies are refreshed regularly."), + 'yt_dlp_test_mode': Param(False, type="boolean", title="[Worker Param] yt-dlp Test Mode", description="If True, runs yt-dlp with --test flag (dry run without downloading)."), + 'fragment_retries': Param(10, type="integer", title="[Worker Param] Fragment Retries", description="Number of retries for a fragment before giving up."), + 'limit_rate': Param('5M', type=["string", "null"], title="[Worker Param] Limit Rate", description="Download speed limit (e.g., 50K, 4.2M)."), + 'socket_timeout': Param(15, type="integer", title="[Worker Param] Socket Timeout", description="Timeout in seconds for socket operations."), + 'min_sleep_interval': Param(5, type="integer", title="[Worker Param] Min Sleep Interval", description="Minimum time to sleep between downloads (seconds)."), + 'max_sleep_interval': Param(10, type="integer", title="[Worker Param] Max Sleep Interval", description="Maximum time to sleep between downloads (seconds)."), + 'download_format_preset': Param( + 'formats_2', + type="string", + enum=['best_audio', 'formats_0', 'formats_2', 'formats_3', 'custom'], + title="Download Format Preset", + description="Select a predefined format string or choose 'custom'. To download multiple formats, this should be a comma-separated list of format IDs (e.g., '137,140').\nformats_0: 18,140\nformats_2: 18,140,299/298/137/136/135/134/133\nformats_3: 18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318" + ), + 'download_format_custom': Param( + 'ba[ext=m4a]/bestaudio/best', + type="string", + title="Custom Download Format", + description="Custom yt-dlp format string. Used when preset is 'custom'. To download multiple formats, provide a comma-separated list of format IDs (e.g., '137,140')." + ), + 'downloader': Param( + 'default', + type="string", + enum=['default', 'aria2c'], + title="Downloader", + description="Choose the downloader for yt-dlp." + ), + 'downloader_args_aria2c': Param( + 'aria2c:-x 4 -k 2M --max-download-limit=3M', + type="string", + title="Aria2c Downloader Arguments", + description="Arguments to pass to yt-dlp's --downloader-args. Used when downloader is 'aria2c'." + ), + 'yt_dlp_extra_args': Param( + '--no-part --restrict-filenames', + type=["string", "null"], + title="Extra yt-dlp arguments", + description="Extra command-line arguments for yt-dlp during download." + ), + # --- Manual Run / Internal Parameters --- + 'job_data': Param(None, type=["object", "string", "null"], title="[Internal] Job Data from Dispatcher", description="This parameter is set by the dispatcher DAG and should not be used for manual runs."), + 'worker_queue': Param(None, type=["string", "null"], title="[Internal] Worker Queue", description="This parameter is set by the dispatcher DAG and should not be used for manual runs."), + } +) as dag: + job_data = get_download_job_from_conf() + + # --- Task Instantiation --- + + # Main success/failure handlers + fatal_error_task = handle_fatal_error() + report_failure_task = report_failure_and_continue() + continue_loop_task = continue_processing_loop() + + # --- Download and Processing Group --- + with TaskGroup("download_processing", tooltip="Download and media processing") as download_processing_group: + list_formats_task = list_available_formats(token_data=job_data) + download_task = download_and_probe( + token_data=job_data, + available_formats=list_formats_task, + ) + download_branch_task = handle_download_failure_branch.override(trigger_rule='one_failed')() + success_task = mark_url_as_success( + job_data=job_data, + downloaded_file_paths=download_task, + ) + + list_formats_task >> download_task + download_task >> download_branch_task + download_branch_task >> report_failure_task + download_task >> success_task + success_task >> continue_loop_task + + # If the initial job setup succeeds, proceed to the download group. + # If it fails, trigger the fatal error handler. This prevents fatal_error_task + # from being an "island" task that gets triggered by any other failure in the DAG. + job_data.operator >> download_processing_group + job_data.operator >> fatal_error_task + + # Any failure path should continue the loop to process the next URL. + report_failure_task >> continue_loop_task + fatal_error_task >> continue_loop_task diff --git a/ansible/MIGRATION.md b/ansible/MIGRATION.md new file mode 100644 index 0000000..78f995b --- /dev/null +++ b/ansible/MIGRATION.md @@ -0,0 +1,9 @@ +# Migration Notes + +This document tracks the process of migrating the Ansible deployment. + +## Guiding Principles + +- No changes to business logic or core functionality are permitted during this phase. +- The focus is solely on resolving file path issues, dependency errors, and structural inconsistencies resulting from the migration of a subset of files. +- All changes should be aimed at making the existing playbooks runnable in the new environment. diff --git a/ansible/README-yt.md b/ansible/README-yt.md new file mode 100644 index 0000000..0695db7 --- /dev/null +++ b/ansible/README-yt.md @@ -0,0 +1,120 @@ +# Ansible-driven YT-DLP / Airflow Cluster – Quick-Start & Cheat-Sheet + +> One playbook = one command to **deploy**, **update**, **restart**, or **re-configure** the entire cluster. + +--- + +## 0. Prerequisites (run once on the **tower** server) + +``` + +--- + +## 1. Ansible Vault Setup (run once on your **local machine**) + +This project uses Ansible Vault to encrypt sensitive data like passwords and API keys. To run the playbooks, you need to provide the vault password. The recommended way is to create a file named `.vault_pass` in the root of the project directory. + +1. **Create the Vault Password File:** + From the project's root directory (e.g., `/opt/yt-ops-services`), create the file. The file should contain only your vault password on a single line. + + ```bash + # Replace 'your_secret_password_here' with your actual vault password + echo "your_secret_password_here" > .vault_pass + ``` + +2. **Secure the File:** + It's good practice to restrict permissions on this file so only you can read it. + + ```bash + chmod 600 .vault_pass + ``` + +The `ansible.cfg` file is configured to automatically look for this `.vault_pass` file in the project root. + +--- + +## 1.5. Cluster & Inventory Management + +The Ansible inventory (`ansible/inventory.ini`), host-specific variables (`ansible/host_vars/`), and the master `docker-compose.yaml` are dynamically generated from a central cluster definition file (e.g., `cluster.yml`). + +**Whenever you add, remove, or change the IP of a node in your `cluster.yml`, you must re-run the generator script.** + +1. **Install Script Dependencies (run once):** + The generator script requires `PyYAML` and `Jinja2`. Install them using pip: + ```bash + pip3 install PyYAML Jinja2 + ``` + +2. **Edit Your Cluster Definition:** + Modify your `cluster.yml` file (located in the project root) to define your master and worker nodes. + +3. **Run the Generator Script:** + From the project's root directory, run the following command to update all generated files: + + ```bash + # Make sure the script is executable first: chmod +x tools/generate-inventory.py + ./tools/generate-inventory.py cluster.yml + ``` + +This ensures that Ansible has the correct host information and that the master node's Docker Compose configuration includes the correct `extra_hosts` for log fetching from workers. + +--- + +## 2. Setup and Basic Usage + +### Running Ansible Commands + +**IMPORTANT:** All `ansible-playbook` commands should be run from within the `ansible/` directory. This allows Ansible to automatically find the `ansible.cfg` and `inventory.ini` files. + +```bash +cd ansible +ansible-playbook .yml +``` + +The `ansible.cfg` file is configured to automatically use the `.vault_pass` file located in the project root (one level above `ansible/`). This means you **do not** need to manually specify `--vault-password-file ../.vault_pass` in your commands. Ensure your `.vault_pass` file is located in the project root. + +If you run `ansible-playbook` from the project root instead of the `ansible/` directory, you will see warnings about the inventory not being parsed, because Ansible does not automatically find `ansible/ansible.cfg`. + +--- + +## 3. Deployment Scenarios + +### Full Cluster Deployment + +To deploy or update the entire cluster (master and all workers), run the main playbook. This will build/pull images and restart all services. + +```bash +# Run from inside the ansible/ directory +ansible-playbook playbook-full.yml +``` + +### Targeted & Fast Deployments + +For faster development cycles, you can deploy changes to specific parts of the cluster without rebuilding or re-pulling Docker images. + +#### Updating Only the Master Node (Fast Deploy) + +To sync configuration, code, and restart services on the master node *without* rebuilding the Airflow image or pulling the `ytdlp-ops-server` image, use the `fast_deploy` flag with the master playbook. This is ideal for pushing changes to DAGs, Python code, or config files. + +```bash +# Run from inside the ansible/ directory +ansible-playbook playbook-master.yml --extra-vars "fast_deploy=true" +``` + +#### Updating Only a Specific Worker Node (Fast Deploy) + +Similarly, you can update a single worker node. Replace `dl001` with the hostname of the worker you want to target from your `inventory.ini`. + +```bash +# Run from inside the ansible/ directory +ansible-playbook playbook-worker.yml --limit dl001 --extra-vars "fast_deploy=true" +``` + +#### Updating Only DAGs and Configs + +If you have only changed DAGs or configuration files and don't need to restart any services, you can run a much faster playbook that only syncs the `dags/` and `config/` directories. + +```bash +# Run from inside the ansible/ directory +ansible-playbook playbook-dags.yml +``` diff --git a/ansible/group_vars/all/vault.yml b/ansible/group_vars/all/vault.yml index 3893d1b..5de67e6 100644 --- a/ansible/group_vars/all/vault.yml +++ b/ansible/group_vars/all/vault.yml @@ -6,3 +6,5 @@ vault_vnc_password: "vnc_pwd_Z5xW8cV2bN4mP7lK" vault_ss_password_1: "UCUAR7vRO/u9Zo71nfA13c+/b1MCiJpfZJo+EmEBCfA=" vault_ss_password_2: "tgtQcfjJp/A3F01g4woO0bEQoxij3CAOK/iR1OTPuF4=" vault_dockerhub_password: "dckr_pat_DmFFqwFEdXFvZlgngGY9ooBaq6o" +vault_s3_access_key_id: "admin" +vault_s3_secret_access_key: "0153093693-0009" diff --git a/ansible/playbook-dags.yml b/ansible/playbook-dags.yml index 473c8a5..66b1621 100644 --- a/ansible/playbook-dags.yml +++ b/ansible/playbook-dags.yml @@ -60,3 +60,4 @@ loop: - "airflow.cfg" - "custom_task_hooks.py" + diff --git a/ansible/playbook-full.yml b/ansible/playbook-full.yml index d9ec167..ff5320e 100644 --- a/ansible/playbook-full.yml +++ b/ansible/playbook-full.yml @@ -111,6 +111,53 @@ name: airflow_proxynet driver: bridge + post_tasks: + - name: Sync custom_task_hooks.py to MASTER server + when: inventory_hostname in groups['airflow_master'] + synchronize: + src: "../airflow/config/custom_task_hooks.py" + dest: "{{ airflow_master_dir }}/config/" + archive: yes + rsync_path: "sudo rsync" + + - name: Sync airflow_local_settings.py to MASTER server + when: inventory_hostname in groups['airflow_master'] + synchronize: + src: "../airflow/config/airflow_local_settings.py" + dest: "{{ airflow_master_dir }}/config/" + archive: yes + rsync_path: "sudo rsync" + + - name: Sync custom_task_hooks.py to WORKER server + when: inventory_hostname in groups['airflow_workers'] + synchronize: + src: "../airflow/config/custom_task_hooks.py" + dest: "{{ airflow_worker_dir }}/config/" + archive: yes + rsync_path: "sudo rsync" + + - name: Sync airflow_local_settings.py to WORKER server + when: inventory_hostname in groups['airflow_workers'] + synchronize: + src: "../airflow/config/airflow_local_settings.py" + dest: "{{ airflow_worker_dir }}/config/" + archive: yes + rsync_path: "sudo rsync" + + - name: Restart Airflow services on MASTER to apply hook + when: inventory_hostname in groups['airflow_master'] + ansible.builtin.command: + cmd: "docker compose restart airflow-scheduler airflow-webserver airflow-master-worker airflow-triggerer" + chdir: "{{ airflow_master_dir }}" + become: yes + + - name: Restart Airflow worker on WORKER to apply hook + when: inventory_hostname in groups['airflow_workers'] + ansible.builtin.command: + cmd: "docker compose restart airflow-worker-dl airflow-worker-auth" + chdir: "{{ airflow_worker_dir }}" + become: yes + - name: Deploy master import_playbook: playbook-master.yml when: inventory_hostname in groups['airflow_master'] diff --git a/ansible/playbook-hook.yml b/ansible/playbook-hook.yml index 25629b7..f4f707f 100644 --- a/ansible/playbook-hook.yml +++ b/ansible/playbook-hook.yml @@ -48,6 +48,6 @@ - name: Restart Airflow worker on WORKER when: inventory_hostname in groups['airflow_workers'] ansible.builtin.command: - cmd: "docker compose restart airflow-worker" + cmd: "docker compose restart airflow-worker-dl airflow-worker-auth" chdir: "{{ airflow_worker_dir }}" become: yes diff --git a/ansible/playbook-master.yml b/ansible/playbook-master.yml index 0ed92f2..aff9806 100644 --- a/ansible/playbook-master.yml +++ b/ansible/playbook-master.yml @@ -144,6 +144,42 @@ deploy_group_gid: "0" when: deploy_group_gid is not defined or deploy_group_gid == "" + - name: Generate Docker Compose configurations + ansible.builtin.command: > + docker compose --project-directory . -f configs/docker-compose.config-generate.yaml run --rm config-generator + args: + chdir: "{{ airflow_master_dir }}" + become: yes + become_user: "{{ ansible_user }}" + register: config_generator_result + changed_when: "'Creating' in config_generator_result.stdout or 'Recreating' in config_generator_result.stdout" + + - name: Show config generator output + ansible.builtin.debug: + var: config_generator_result.stdout_lines + when: config_generator_result.changed + + - name: Ensure Airflow project directory is writable by the container user (UID 50000) + ansible.builtin.file: + path: "{{ airflow_master_dir }}" + owner: 50000 + group: 50000 + become: yes + + - name: Ensure Airflow subdirectories are writable by the container user (UID 50000) + ansible.builtin.file: + path: "{{ item }}" + owner: 50000 + group: 50000 + recurse: yes + state: directory + loop: + - "{{ airflow_master_dir }}/dags" + - "{{ airflow_master_dir }}/logs" + - "{{ airflow_master_dir }}/plugins" + - "{{ airflow_master_dir }}/config" + become: yes + tasks: - name: Install pipx ansible.builtin.apt: @@ -170,3 +206,23 @@ - name: Include camoufox verification tasks include_tasks: tasks/verify_camoufox.yml when: not fast_deploy | default(false) + + - name: Run regression test + command: > + docker exec -i airflow-regression-runner python3 /opt/airflow/dags/scripts/regression.py + --client "{{ regression_client | default('mweb') }}" + --workers {{ regression_workers | default(4) }} + --workers-per-bunch {{ regression_workers_per_bunch | default(4) }} + --run-time-min {{ regression_run_time_min | default(120) }} + --input-file "{{ regression_input_file | default('/opt/airflow/inputfiles/video_ids.csv') }}" + --progress-interval-min {{ regression_progress_interval_min | default(2) }} + --report-file "{{ regression_report_file | default('/opt/airflow/downloadfiles/regression_report.csv') }}" + {% if regression_cleanup | default(true) %}--cleanup{% endif %} + register: regression_test_result + changed_when: false + when: run_regression_test | default(false) + + - name: Display regression test output + debug: + var: regression_test_result.stdout_lines + when: run_regression_test | default(false) diff --git a/ansible/playbook-sync-local.yml b/ansible/playbook-sync-local.yml new file mode 100644 index 0000000..41ddfba --- /dev/null +++ b/ansible/playbook-sync-local.yml @@ -0,0 +1,108 @@ +--- +- name: Sync Local Development Files to Workers + hosts: airflow_workers + gather_facts: no + vars_files: + - "{{ inventory_dir }}/group_vars/all/generated_vars.yml" + + pre_tasks: + - name: Announce local sync + debug: + msg: "Syncing local dev files to {{ inventory_hostname }} at {{ airflow_worker_dir }}" + + tasks: + - name: Check if yt-dlp is installed + ansible.builtin.command: which yt-dlp + register: ytdlp_check + changed_when: false + failed_when: false + become: yes + become_user: "{{ ansible_user }}" + + - name: Install yt-dlp if not found + ansible.builtin.command: python3 -m pip install -U "yt-dlp[default]" --break-system-packages + when: ytdlp_check.rc != 0 + become: yes + become_user: "{{ ansible_user }}" + + - name: Sync thrift_model directory to workers + ansible.posix.synchronize: + src: ../thrift_model/ + dest: "{{ airflow_worker_dir }}/thrift_model/" + rsync_opts: + - "--delete" + - "--exclude=.DS_Store" + - "--exclude=__pycache__" + - "--exclude='*.pyc'" + recursive: yes + perms: yes + become: yes + become_user: "{{ ansible_user }}" + + - name: Sync pangramia package to workers + ansible.posix.synchronize: + src: ../pangramia/ + dest: "{{ airflow_worker_dir }}/pangramia/" + rsync_opts: + - "--delete" + - "--exclude=.DS_Store" + - "--exclude=__pycache__" + - "--exclude='*.pyc'" + recursive: yes + perms: yes + become: yes + become_user: "{{ ansible_user }}" + + - name: Sync ytops_client directory to workers + ansible.posix.synchronize: + src: ../ytops_client/ + dest: "{{ airflow_worker_dir }}/ytops_client/" + rsync_opts: + - "--delete" + - "--exclude=.DS_Store" + - "--exclude=__pycache__" + - "--exclude='*.pyc'" + recursive: yes + perms: yes + become: yes + become_user: "{{ ansible_user }}" + + - name: Sync policies directory to workers + ansible.posix.synchronize: + src: ../policies/ + dest: "{{ airflow_worker_dir }}/policies/" + rsync_opts: + - "--delete" + - "--exclude=.DS_Store" + - "--exclude=__pycache__" + - "--exclude='*.pyc'" + recursive: yes + perms: yes + become: yes + become_user: "{{ ansible_user }}" + + - name: Ensure bin directory exists on workers for client utilities + ansible.builtin.file: + path: "{{ airflow_worker_dir }}/bin" + state: directory + mode: '0755' + become: yes + become_user: "{{ ansible_user }}" + + - name: Sync client utility scripts to workers + ansible.posix.synchronize: + src: "../{{ item }}" + dest: "{{ airflow_worker_dir }}/{{ item }}" + perms: yes + loop: + - "README.client.md" + - "cli.config" + - "format_download.py" + - "get_info_json_client.py" + - "list_formats.py" + - "stress_test_formats.py" + - "stress_enhanced.py" + - "package_client.py" + - "bin/ytops-client" + become: yes + become_user: "{{ ansible_user }}" diff --git a/ansible/playbook-update-regression-script.yml b/ansible/playbook-update-regression-script.yml new file mode 100644 index 0000000..5fd5ead --- /dev/null +++ b/ansible/playbook-update-regression-script.yml @@ -0,0 +1,27 @@ +--- +- name: Update Regression Test Script + hosts: airflow_master + gather_facts: no + vars: + # This should be the root directory of your project on the master host. + # It's set as a variable so you can override it if needed, e.g., + # ansible-playbook ... -e "project_dir=/path/to/your/project" + project_dir: "/srv/airflow_master" + + tasks: + - name: Copy latest regression.py script to the master host + copy: + src: ../airflow/dags/scripts/regression.py + dest: "{{ project_dir }}/dags/scripts/regression.py" + owner: "{{ ansible_user }}" + group: "ytdl" # Assuming the same deploy group as the main playbook + mode: '0644' + become: yes + notify: + - Announce completion + + handlers: + - name: Announce completion + listen: "Announce completion" + debug: + msg: "Regression script has been updated on {{ inventory_hostname }}. You can now run it using 'docker exec'." diff --git a/ansible/playbook-worker.yml b/ansible/playbook-worker.yml index d8fe17b..e1fb454 100644 --- a/ansible/playbook-worker.yml +++ b/ansible/playbook-worker.yml @@ -8,7 +8,7 @@ pre_tasks: - name: Announce worker deployment debug: - msg: "Starting deployment for Airflow Worker: {{ inventory_hostname }} ({{ ansible_host }})" + msg: "Starting deployment for Airflow Worker: {{ inventory_hostname }} ({{ ansible_user }}@{{ ansible_host }})" - name: Configure system timezone # Ensures all services and logs on this node use a consistent timezone. @@ -129,6 +129,96 @@ become: yes when: limits_sysctl_config_copy.changed + - name: Create logs directory structure relative to deployment + file: + path: "./logs/yt-dlp-ops/communication_logs" + state: directory + mode: '0755' + owner: "{{ ansible_user }}" + group: "{{ deploy_group }}" + become: yes + + - name: Build local Docker images (e.g., camoufox) + ansible.builtin.command: > + docker compose --project-directory . -f configs/docker-compose-ytdlp-ops.yaml build + args: + chdir: "{{ airflow_worker_dir }}" + become: yes + become_user: "{{ ansible_user }}" + register: docker_build_result + changed_when: "'Building' in docker_build_result.stdout or 'writing image' in docker_build_result.stdout" + + - name: Pull pre-built Docker images for ytdlp-ops services + ansible.builtin.command: > + docker compose --project-directory . -f configs/docker-compose-ytdlp-ops.yaml pull --ignore-buildable + args: + chdir: "{{ airflow_worker_dir }}" + become: yes + become_user: "{{ ansible_user }}" + register: docker_pull_result + retries: 3 + delay: 10 + changed_when: "'Pulling' in docker_pull_result.stdout or 'Downloaded' in docker_pull_result.stdout" + + - name: Show docker pull output + ansible.builtin.debug: + var: docker_pull_result.stdout_lines + when: docker_pull_result.changed + + - name: Ensure Airflow project directory is writable by the container user (UID 50000) + ansible.builtin.file: + path: "{{ airflow_worker_dir }}" + owner: 50000 + group: 50000 + become: yes + + - name: Ensure Airflow subdirectories are writable by the container user (UID 50000) + ansible.builtin.file: + path: "{{ item }}" + owner: 50000 + group: 50000 + recurse: yes + state: directory + loop: + - "{{ airflow_worker_dir }}/dags" + - "{{ airflow_worker_dir }}/logs" + - "{{ airflow_worker_dir }}/plugins" + - "{{ airflow_worker_dir }}/config" + become: yes + + - name: Create .dockerignore on worker to exclude runtime data from build context + ansible.builtin.copy: + dest: "{{ airflow_worker_dir }}/.dockerignore" + content: | + # Exclude build artifacts and virtual environments + __pycache__/ + *.pyc + *.pyo + .venv/ + venv/ + + # Exclude sensitive information + .env + .vault_pass + + # Exclude local development and OS-specific files + .DS_Store + .idea/ + *.swp + + # Exclude large directories with runtime data that should not be in the image + logs/ + downloadfiles/ + addfiles/ + *downloads/ + postgres-data/ + redis-data/ + minio-data/ + owner: "{{ ansible_user }}" + group: "{{ deploy_group }}" + mode: '0644' + become: yes + tasks: - name: Install pipx ansible.builtin.apt: diff --git a/ansible/playbook-ytdlp-master-only.yml b/ansible/playbook-ytdlp-master-only.yml new file mode 100644 index 0000000..18e75ca --- /dev/null +++ b/ansible/playbook-ytdlp-master-only.yml @@ -0,0 +1,22 @@ +--- +- name: Deploy YTDLP Master Services (Management Role Only) + hosts: airflow_master + gather_facts: no + vars_files: + - "{{ inventory_dir }}/group_vars/all/generated_vars.yml" + - "{{ inventory_dir }}/group_vars/all/vault.yml" + tasks: + - name: Announce ytdlp-master-only deployment + debug: + msg: "Starting deployment for YTDLP Master services on: {{ inventory_hostname }}" + + - name: Start/Redeploy ytdlp-ops services without camoufox + community.docker.docker_compose_v2: + project_src: "{{ airflow_master_dir }}" + files: + - configs/docker-compose-ytdlp-ops.yaml + state: present + remove_orphans: true + recreate: always + pull: "{{ 'never' if fast_deploy | default(false) else 'missing' }}" + become: yes diff --git a/ansible/playbooks/playbook-bgutils-start.yml b/ansible/playbooks/playbook-bgutils-start.yml new file mode 100644 index 0000000..ca83531 --- /dev/null +++ b/ansible/playbooks/playbook-bgutils-start.yml @@ -0,0 +1,19 @@ +--- +- name: Start bgutil-provider service + hosts: all # Use --limit to target specific hosts, e.g., --limit management + become: true + gather_facts: false + vars: + container_name: "bgutil-provider" + + tasks: + - name: "Ensure {{ container_name }} container is started" + community.docker.docker_container: + name: "{{ container_name }}" + state: started + register: container_status + + - name: "Display container status" + ansible.builtin.debug: + msg: "{{ container_name }} was started." + when: container_status.changed diff --git a/ansible/playbooks/playbook-bgutils-stop.yml b/ansible/playbooks/playbook-bgutils-stop.yml new file mode 100644 index 0000000..156f107 --- /dev/null +++ b/ansible/playbooks/playbook-bgutils-stop.yml @@ -0,0 +1,19 @@ +--- +- name: Stop bgutil-provider service + hosts: all # Use --limit to target specific hosts, e.g., --limit management + become: true + gather_facts: false + vars: + container_name: "bgutil-provider" + + tasks: + - name: "Ensure {{ container_name }} container is stopped" + community.docker.docker_container: + name: "{{ container_name }}" + state: stopped + register: container_status + + - name: "Display container status" + ansible.builtin.debug: + msg: "{{ container_name }} was stopped." + when: container_status.changed diff --git a/ansible/playbooks/restart_worker.yml b/ansible/playbooks/restart_worker.yml new file mode 100644 index 0000000..5df5d65 --- /dev/null +++ b/ansible/playbooks/restart_worker.yml @@ -0,0 +1,53 @@ +--- +- name: Restart and Update ytdlp-ops Worker + hosts: all:!af-green + vars: + # This should be the root directory of your project on the target worker machine. + project_dir: "{{ '/srv/airflow_master' if inventory_hostname == 'af-green' else '/srv/airflow_dl_worker' }}" + # This is the path to your compose file, relative to the project_dir. + compose_file: "configs/docker-compose-ytdlp-ops.yaml" + # The specific image to pull for updates. + service_image: "pangramia/ytdlp-ops-server:4.0.1" + + tasks: + - name: "Ensure project directory exists" + ansible.builtin.file: + path: "{{ project_dir }}" + state: directory + mode: '0755' + become: yes + + - name: "Copy get_info_json_client.py to worker" + ansible.builtin.copy: + src: ../../get_info_json_client.py + dest: "{{ project_dir }}/get_info_json_client.py" + mode: '0755' + become: yes + + - name: "Pull the latest image for the ytdlp-ops service" + community.docker.docker_image: + name: "{{ service_image }}" + source: pull + tags: + - pull + + - name: "Take down the ytdlp-ops services" + community.docker.docker_compose_v2: + project_src: "{{ project_dir }}" + files: + - "{{ compose_file }}" + state: absent + remove_volumes: true + tags: + - down + + - name: "Bring up the ytdlp-ops services" + community.docker.docker_compose_v2: + project_src: "{{ project_dir }}" + files: + - "{{ compose_file }}" + state: present + recreate: always # Corresponds to --force-recreate + build: never + tags: + - up diff --git a/ansible/roles/ytdlp-worker/defaults/main.yml b/ansible/roles/ytdlp-worker/defaults/main.yml new file mode 100644 index 0000000..1d467f3 --- /dev/null +++ b/ansible/roles/ytdlp-worker/defaults/main.yml @@ -0,0 +1,3 @@ +--- +# defaults file for ytdlp-worker +camoufox_base_port: 10000 diff --git a/ansible/roles/ytdlp-worker/tasks/main.yml b/ansible/roles/ytdlp-worker/tasks/main.yml index d628112..98a9ceb 100644 --- a/ansible/roles/ytdlp-worker/tasks/main.yml +++ b/ansible/roles/ytdlp-worker/tasks/main.yml @@ -101,6 +101,22 @@ - "envoy.yaml.j2" - "docker-compose.camoufox.yaml.j2" +- name: Sync Airflow build context to worker + synchronize: + src: "../{{ item }}" + dest: "{{ airflow_worker_dir }}/" + archive: yes + recursive: yes + rsync_path: "sudo rsync" + rsync_opts: "{{ rsync_default_opts }}" + loop: + - "airflow/Dockerfile" + - "setup.py" + - "VERSION" + - "yt_ops_services" + - "thrift_model" + - "pangramia" + - name: Create .env file for YT-DLP worker service template: src: "../../templates/.env.j2" @@ -179,6 +195,20 @@ group: "{{ deploy_group }}" become: yes +- name: "Log: Building Airflow image" + debug: + msg: "Building the Airflow image locally. This image contains all dependencies for running DAGs." + +- name: Build Airflow image from local Dockerfile + community.docker.docker_image: + name: "pangramia/ytdlp-ops-airflow:latest" + build: + path: "{{ airflow_worker_dir }}" + dockerfile: "Dockerfile" + source: build + force_source: true + when: not fast_deploy | default(false) + - name: "Log: Building Camoufox (remote browser) image" debug: msg: "Building the Camoufox image locally. This image provides remote-controlled Firefox browsers for token generation." @@ -206,6 +236,27 @@ path: "/srv/shadowsocks-rust/docker-compose.proxies.yaml" register: proxy_compose_file +- name: "Log: Stopping worker services before start" + debug: + msg: "Stopping all worker services to ensure a clean start." + +- name: Stop all worker services + community.docker.docker_compose_v2: + project_src: "{{ airflow_worker_dir }}" + files: + - "configs/docker-compose-ytdlp-ops.yaml" + - "configs/docker-compose.camoufox.yaml" + - "configs/docker-compose.airflow.yml" + state: absent + remove_volumes: true # Corresponds to docker compose down -v + +- name: Forcefully remove project-specific Docker volumes to fix corruption issues + ansible.builtin.shell: "docker volume ls -q --filter 'label=com.docker.compose.project=ytdlp-ops-worker' | xargs -r docker volume rm --force" + become: yes + register: removed_volumes + changed_when: removed_volumes.stdout | length > 0 + failed_when: false + - name: "Log: Starting all worker services" debug: msg: "Starting all worker services: ytdlp-ops, camoufox, and airflow-worker." @@ -220,6 +271,7 @@ state: present remove_orphans: true pull: "{{ 'never' if fast_deploy | default(false) else 'missing' }}" + recreate: always # Corresponds to --force-recreate - name: Include camoufox verification tasks include_tasks: ../../../tasks/verify_camoufox.yml diff --git a/ansible/templates/.env.j2 b/ansible/templates/.env.j2 index cd2cc13..ab5c478 100644 --- a/ansible/templates/.env.j2 +++ b/ansible/templates/.env.j2 @@ -29,6 +29,14 @@ FLOWER_PASSWORD="{{ vault_flower_password }}" AIRFLOW_UID={{ airflow_uid | default(1003) }} AIRFLOW_GID={{ deploy_group_gid | default(1001) }} +# --- S3 Logging Configuration (for Airflow integration) --- +# Optional: for appending service logs to Airflow's S3 logs. +# These should match the 'minio_default' connection configured in Airflow. +S3_ENDPOINT_URL="{{ s3_endpoint_url | default('') }}" +S3_ACCESS_KEY_ID="{{ vault_s3_access_key_id | default('') }}" +S3_SECRET_ACCESS_KEY="{{ vault_s3_secret_access_key | default('') }}" +S3_REGION_NAME="{{ s3_region_name | default('us-east-1') }}" + # --- Master-specific settings --- {% if 'master' in service_role or 'management' in service_role %} MASTER_HOST_IP={{ hostvars[groups['airflow_master'][0]].ansible_host }} diff --git a/bin/ytops-client b/bin/ytops-client new file mode 100755 index 0000000..46138aa --- /dev/null +++ b/bin/ytops-client @@ -0,0 +1,10 @@ +#!/bin/sh +set -e +# Find the directory where this script is located. +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +# Go up one level to the project root. +PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +# Set PYTHONPATH to include the project root, so we can import 'ytops_client' +export PYTHONPATH="$PROJECT_ROOT${PYTHONPATH:+:$PYTHONPATH}" +# Execute the Python CLI script as a module to handle relative imports +exec python3 -m ytops_client.cli "$@" diff --git a/cli.config b/cli.config new file mode 100644 index 0000000..a6a64fe --- /dev/null +++ b/cli.config @@ -0,0 +1,35 @@ +# yt-dlp configuration for format_download.py + +# Continue on broken downloads +#--continue + +# Do not simulate +--no-simulate + +# Do not write info.json file (we already have it) +--no-write-info-json + +# Continue on download errors +--ignore-errors + +# Do not download playlist +--no-playlist + +# Retry fragments 10 times +--fragment-retries 10 + +# Limit download rate to 5M +--limit-rate 5M + +# Socket timeout +--socket-timeout 15 + +# Sleep interval +--min-sleep-interval 5 +--max-sleep-interval 10 + +# Progress +--progress + + +--no-part diff --git a/get_info_json_client.py b/get_info_json_client.py deleted file mode 100644 index d364cee..0000000 --- a/get_info_json_client.py +++ /dev/null @@ -1,150 +0,0 @@ -#!/usr/bin/env python3 -""" -Client script to get info.json from the Thrift service. - -Usage: - python get_info_json_client.py [URL] --host [HOST] --port [PORT] [options] - -Options: - --host HOST Thrift server host - --port PORT Thrift server port - --account-id ID Account ID to use - --output FILE Output file path - --verbose Enable verbose output -""" - -import argparse -import json -import os -import sys -import logging -from typing import Dict, Any, Optional - -# Configure logging -logging.basicConfig( - level=logging.INFO, - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' -) -logger = logging.getLogger('info_json_client') - -# Import Thrift modules -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from thrift.transport import TTransport -from pangramia.yt.common.ttypes import TokenUpdateMode -from pangramia.yt.exceptions.ttypes import PBServiceException, PBUserException -from yt_ops_services.client_utils import get_thrift_client - -def parse_args(): - """Parse command line arguments""" - parser = argparse.ArgumentParser(description='Get info.json from Thrift service') - parser.add_argument('url', help='YouTube URL or video ID') - parser.add_argument('--host', default='127.0.0.1', help="Thrift server host. Using 127.0.0.1 avoids harmless connection errors when the local Envoy proxy only listens on IPv4.") - parser.add_argument('--port', type=int, default=9080, help='Thrift server port') - parser.add_argument('--profile', default='default_profile', help='The profile name (accountId) to use for the request.') - parser.add_argument('--client', help='Specific client to use (e.g., web, ios). Overrides server default. Append "_camoufox" to any client name (e.g., "web_camoufox") to force the browser-based generation strategy.') - parser.add_argument('--output', help='Output file path for the info.json. If not provided, prints to stdout.') - parser.add_argument('--machine-id', help='Identifier for the client machine. Defaults to hostname.') - parser.add_argument('--verbose', action='store_true', help='Enable verbose output') - return parser.parse_args() - -def main(): - """Main entry point""" - args = parse_args() - - # Set log level - if args.verbose: - logger.setLevel(logging.DEBUG) - - transport = None - try: - # Create Thrift client - client, transport = get_thrift_client(args.host, args.port) - - # Get token data, which includes the info.json - logger.info(f"Requesting info.json for URL '{args.url}' using profile '{args.profile}'") - - # Prepare arguments for the Thrift call - machine_id = args.machine_id - if not machine_id: - import socket - machine_id = socket.gethostname() - logger.info(f"No machine ID provided, using hostname: {machine_id}") - - thrift_args = { - 'accountId': args.profile, - 'updateType': TokenUpdateMode.AUTO, - 'url': args.url, - 'clients': args.client, - 'machineId': machine_id - } - if args.client: - logger.info(f"Requesting to use specific client: {args.client}") - else: - logger.info("No specific client requested, server will use its default.") - - token_data = client.getOrRefreshToken(**thrift_args) - - if not token_data or not hasattr(token_data, 'infoJson') or not token_data.infoJson: - logger.error("Server did not return valid info.json data.") - print("Error: Server did not return valid info.json data.", file=sys.stderr) - return 1 - - info_json_str = token_data.infoJson - - # Check if the returned info.json is an error report - try: - info_data = json.loads(info_json_str) - if isinstance(info_data, dict) and 'error' in info_data: - error_code = info_data.get('errorCode', 'N/A') - error_message = info_data.get('message', info_data.get('error', 'Unknown error')) - logger.error(f"Server returned an error in info.json (Code: {error_code}): {error_message}") - print(f"Error from server (Code: {error_code}): {error_message}", file=sys.stderr) - # Optionally print the full error JSON - if args.verbose: - print(json.dumps(info_data, indent=2), file=sys.stderr) - return 1 - except json.JSONDecodeError: - logger.error(f"Failed to parse info.json from server: {info_json_str[:200]}...") - print("Error: Failed to parse the info.json response from the server.", file=sys.stderr) - return 1 - - logger.info(f"Successfully retrieved info.json ({len(info_json_str)} bytes)") - - # Write to output file if specified, otherwise print to stdout - if args.output: - try: - with open(args.output, 'w', encoding='utf-8') as f: - # Pretty-print the JSON to the file - json.dump(info_data, f, indent=2) - logger.info(f"Wrote info.json to {args.output}") - print(f"Successfully saved info.json to {args.output}") - except IOError as e: - logger.error(f"Failed to write to output file {args.output}: {e}") - print(f"Error: Failed to write to output file {args.output}: {e}", file=sys.stderr) - return 1 - else: - # Pretty-print the JSON to stdout - print(json.dumps(info_data, indent=2)) - - return 0 - except (PBServiceException, PBUserException) as e: - logger.error(f"A Thrift error occurred: {e.message}", exc_info=args.verbose) - print(f"Error: {e.message}", file=sys.stderr) - if hasattr(e, 'context') and e.context: - print(f"Context: {e.context}", file=sys.stderr) - return 1 - except TTransport.TTransportException as e: - logger.error(f"Connection to server failed: {e}", exc_info=args.verbose) - print(f"Error: Connection to server at {args.host}:{args.port} failed.", file=sys.stderr) - return 1 - except Exception as e: - logger.exception(f"An unexpected error occurred: {e}") - print(f"An unexpected error occurred: {e}", file=sys.stderr) - return 1 - finally: - if transport and transport.isOpen(): - transport.close() - logger.info("Thrift connection closed.") - -if __name__ == "__main__": - sys.exit(main()) diff --git a/package_client.py b/package_client.py new file mode 100755 index 0000000..6e17c6d --- /dev/null +++ b/package_client.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python3 +""" +Packages the client-side scripts and their dependencies into a distributable .tar.gz archive. + +This script should be run from the root of the project repository. +""" + +import argparse +import os +import shutil +import sys +import tarfile +from pathlib import Path + +try: + # Assumes yt_ops_services/version.py exists and is importable + from yt_ops_services.version import get_version as get_api_version +except ImportError: + print("Error: Could not import get_version from yt_ops_services.version.", file=sys.stderr) + print("Please ensure yt_ops_services/version.py exists and run this script from the project root.", file=sys.stderr) + sys.exit(1) + +def get_client_version(): + """Reads the client version from the VERSION.client file.""" + try: + return Path('VERSION.client').read_text(encoding='utf-8').strip() + except FileNotFoundError: + print("Error: VERSION.client file not found in the project root.", file=sys.stderr) + sys.exit(1) + +# --- Configuration --- + +# Defines the content of the package. +# Keys are source paths relative to the project root. +# Values are destination paths inside the archive. +PACKAGE_CONTENT = { + 'get_info_json_client.py': 'get_info_json_client.py', + 'list_formats.py': 'list_formats.py', + 'format_download.py': 'format_download.py', + 'stress_test_formats.py': 'stress_test_formats.py', + 'cli.config': 'cli.config', + 'README.client.md': 'README.md', # Rename for convention + 'formats.md': 'formats.md', + 'VERSION.client': 'VERSION.client', + 'yt_ops_services': 'yt_ops_services', + 'thrift_model/gen_py': 'thrift_model/gen_py', +} + +# Client-side Python requirements +CLIENT_REQUIREMENTS = [ + 'thrift==0.16.0', +] + + +def main(): + """Main entry point""" + parser = argparse.ArgumentParser(description="Package the yt-ops-services client tools.") + parser.add_argument('--output-dir', default='dist', help='Directory to save the package file (default: dist).') + args = parser.parse_args() + + api_version = get_api_version() + client_version = get_client_version() + package_name = f"yt-ops-services-client-{api_version}-{client_version}" + archive_filename = f"{package_name}.tar.gz" + + os.makedirs(args.output_dir, exist_ok=True) + archive_path = os.path.join(args.output_dir, archive_filename) + + staging_dir = Path(args.output_dir) / f"{package_name}-staging" + + print(f"Creating client package: {archive_filename}") + + if staging_dir.exists(): + shutil.rmtree(staging_dir) + staging_dir.mkdir(parents=True) + + package_root = staging_dir / package_name + package_root.mkdir() + + try: + print("Staging files...") + for src, dest in PACKAGE_CONTENT.items(): + src_path = Path(src) + dest_path = package_root / dest + + if not src_path.exists(): + print(f"Warning: Source not found, skipping: {src_path}", file=sys.stderr) + continue + + dest_path.parent.mkdir(parents=True, exist_ok=True) + + if src_path.is_dir(): + shutil.copytree(src_path, dest_path) + else: + shutil.copy2(src_path, dest_path) + + # Create __init__.py to ensure thrift_model is a package + (package_root / 'thrift_model/__init__.py').touch() + + print("Creating requirements.txt...") + (package_root / 'requirements.txt').write_text('\n'.join(CLIENT_REQUIREMENTS) + '\n', encoding='utf-8') + + print(f"Creating archive at {archive_path}...") + with tarfile.open(archive_path, "w:gz") as tar: + tar.add(package_root, arcname=package_name) + + print("\nPackage created successfully!") + print(f" -> {archive_path}") + + finally: + if staging_dir.exists(): + print("Cleaning up staging directory...") + shutil.rmtree(staging_dir) + + +if __name__ == "__main__": + main() diff --git a/pangramia/__init__.py b/pangramia/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pangramia/__pycache__/__init__.cpython-39.pyc b/pangramia/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e5bdb0c4694e77bb27cb7abc4f8b94555a192c75 GIT binary patch literal 148 zcmYe~<>g`k0z12B86f&Gh(HF6K#l_t7qb9~6oz01O-8?!3`HPe1o6vKKfje%S=u!)-OoROD{^y%}msfkI&4@EQycTE2zB1VUwGmQks)$ L2Qu 2: + from urllib.parse import urlparse +else: + from urlparse import urlparse +from thrift.transport import TTransport, TSocket, TSSLSocket, THttpClient +from thrift.protocol.TBinaryProtocol import TBinaryProtocol + +from pangramia.base_service import BaseService +from pangramia.base_service.ttypes import * + +if len(sys.argv) <= 1 or sys.argv[1] == '--help': + print('') + print('Usage: ' + sys.argv[0] + ' [-h host[:port]] [-u url] [-f[ramed]] [-s[sl]] [-novalidate] [-ca_certs certs] [-keyfile keyfile] [-certfile certfile] function [arg1 [arg2...]]') + print('') + print('Functions:') + print(' bool ping()') + print(' bool reportError(string message, details)') + print(' void shutdown()') + print('') + sys.exit(0) + +pp = pprint.PrettyPrinter(indent=2) +host = 'localhost' +port = 9090 +uri = '' +framed = False +ssl = False +validate = True +ca_certs = None +keyfile = None +certfile = None +http = False +argi = 1 + +if sys.argv[argi] == '-h': + parts = sys.argv[argi + 1].split(':') + host = parts[0] + if len(parts) > 1: + port = int(parts[1]) + argi += 2 + +if sys.argv[argi] == '-u': + url = urlparse(sys.argv[argi + 1]) + parts = url[1].split(':') + host = parts[0] + if len(parts) > 1: + port = int(parts[1]) + else: + port = 80 + uri = url[2] + if url[4]: + uri += '?%s' % url[4] + http = True + argi += 2 + +if sys.argv[argi] == '-f' or sys.argv[argi] == '-framed': + framed = True + argi += 1 + +if sys.argv[argi] == '-s' or sys.argv[argi] == '-ssl': + ssl = True + argi += 1 + +if sys.argv[argi] == '-novalidate': + validate = False + argi += 1 + +if sys.argv[argi] == '-ca_certs': + ca_certs = sys.argv[argi+1] + argi += 2 + +if sys.argv[argi] == '-keyfile': + keyfile = sys.argv[argi+1] + argi += 2 + +if sys.argv[argi] == '-certfile': + certfile = sys.argv[argi+1] + argi += 2 + +cmd = sys.argv[argi] +args = sys.argv[argi + 1:] + +if http: + transport = THttpClient.THttpClient(host, port, uri) +else: + if ssl: + socket = TSSLSocket.TSSLSocket(host, port, validate=validate, ca_certs=ca_certs, keyfile=keyfile, certfile=certfile) + else: + socket = TSocket.TSocket(host, port) + if framed: + transport = TTransport.TFramedTransport(socket) + else: + transport = TTransport.TBufferedTransport(socket) +protocol = TBinaryProtocol(transport) +client = BaseService.Client(protocol) +transport.open() + +if cmd == 'ping': + if len(args) != 0: + print('ping requires 0 args') + sys.exit(1) + pp.pprint(client.ping()) + +elif cmd == 'reportError': + if len(args) != 2: + print('reportError requires 2 args') + sys.exit(1) + pp.pprint(client.reportError(args[0], eval(args[1]),)) + +elif cmd == 'shutdown': + if len(args) != 0: + print('shutdown requires 0 args') + sys.exit(1) + pp.pprint(client.shutdown()) + +else: + print('Unrecognized method %s' % cmd) + sys.exit(1) + +transport.close() diff --git a/pangramia/base_service/BaseService.py b/pangramia/base_service/BaseService.py new file mode 100644 index 0000000..b6cf1f4 --- /dev/null +++ b/pangramia/base_service/BaseService.py @@ -0,0 +1,564 @@ +# +# Autogenerated by Thrift Compiler (0.20.0) +# +# DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING +# +# options string: py +# + +from thrift.Thrift import TType, TMessageType, TFrozenDict, TException, TApplicationException +from thrift.protocol.TProtocol import TProtocolException +from thrift.TRecursive import fix_spec + +import sys +import logging +from .ttypes import * +from thrift.Thrift import TProcessor +from thrift.transport import TTransport +all_structs = [] + + +class Iface(object): + def ping(self): + pass + + def reportError(self, message, details): + """ + Parameters: + - message + - details + + """ + pass + + def shutdown(self): + pass + + +class Client(Iface): + def __init__(self, iprot, oprot=None): + self._iprot = self._oprot = iprot + if oprot is not None: + self._oprot = oprot + self._seqid = 0 + + def ping(self): + self.send_ping() + return self.recv_ping() + + def send_ping(self): + self._oprot.writeMessageBegin('ping', TMessageType.CALL, self._seqid) + args = ping_args() + args.write(self._oprot) + self._oprot.writeMessageEnd() + self._oprot.trans.flush() + + def recv_ping(self): + iprot = self._iprot + (fname, mtype, rseqid) = iprot.readMessageBegin() + if mtype == TMessageType.EXCEPTION: + x = TApplicationException() + x.read(iprot) + iprot.readMessageEnd() + raise x + result = ping_result() + result.read(iprot) + iprot.readMessageEnd() + if result.success is not None: + return result.success + if result.serviceExp is not None: + raise result.serviceExp + if result.userExp is not None: + raise result.userExp + raise TApplicationException(TApplicationException.MISSING_RESULT, "ping failed: unknown result") + + def reportError(self, message, details): + """ + Parameters: + - message + - details + + """ + self.send_reportError(message, details) + return self.recv_reportError() + + def send_reportError(self, message, details): + self._oprot.writeMessageBegin('reportError', TMessageType.CALL, self._seqid) + args = reportError_args() + args.message = message + args.details = details + args.write(self._oprot) + self._oprot.writeMessageEnd() + self._oprot.trans.flush() + + def recv_reportError(self): + iprot = self._iprot + (fname, mtype, rseqid) = iprot.readMessageBegin() + if mtype == TMessageType.EXCEPTION: + x = TApplicationException() + x.read(iprot) + iprot.readMessageEnd() + raise x + result = reportError_result() + result.read(iprot) + iprot.readMessageEnd() + if result.success is not None: + return result.success + if result.serviceExp is not None: + raise result.serviceExp + if result.userExp is not None: + raise result.userExp + raise TApplicationException(TApplicationException.MISSING_RESULT, "reportError failed: unknown result") + + def shutdown(self): + self.send_shutdown() + + def send_shutdown(self): + self._oprot.writeMessageBegin('shutdown', TMessageType.ONEWAY, self._seqid) + args = shutdown_args() + args.write(self._oprot) + self._oprot.writeMessageEnd() + self._oprot.trans.flush() + + +class Processor(Iface, TProcessor): + def __init__(self, handler): + self._handler = handler + self._processMap = {} + self._processMap["ping"] = Processor.process_ping + self._processMap["reportError"] = Processor.process_reportError + self._processMap["shutdown"] = Processor.process_shutdown + self._on_message_begin = None + + def on_message_begin(self, func): + self._on_message_begin = func + + def process(self, iprot, oprot): + (name, type, seqid) = iprot.readMessageBegin() + if self._on_message_begin: + self._on_message_begin(name, type, seqid) + if name not in self._processMap: + iprot.skip(TType.STRUCT) + iprot.readMessageEnd() + x = TApplicationException(TApplicationException.UNKNOWN_METHOD, 'Unknown function %s' % (name)) + oprot.writeMessageBegin(name, TMessageType.EXCEPTION, seqid) + x.write(oprot) + oprot.writeMessageEnd() + oprot.trans.flush() + return + else: + self._processMap[name](self, seqid, iprot, oprot) + return True + + def process_ping(self, seqid, iprot, oprot): + args = ping_args() + args.read(iprot) + iprot.readMessageEnd() + result = ping_result() + try: + result.success = self._handler.ping() + msg_type = TMessageType.REPLY + except TTransport.TTransportException: + raise + except pangramia.yt.exceptions.ttypes.PBServiceException as serviceExp: + msg_type = TMessageType.REPLY + result.serviceExp = serviceExp + except pangramia.yt.exceptions.ttypes.PBUserException as userExp: + msg_type = TMessageType.REPLY + result.userExp = userExp + except TApplicationException as ex: + logging.exception('TApplication exception in handler') + msg_type = TMessageType.EXCEPTION + result = ex + except Exception: + logging.exception('Unexpected exception in handler') + msg_type = TMessageType.EXCEPTION + result = TApplicationException(TApplicationException.INTERNAL_ERROR, 'Internal error') + oprot.writeMessageBegin("ping", msg_type, seqid) + result.write(oprot) + oprot.writeMessageEnd() + oprot.trans.flush() + + def process_reportError(self, seqid, iprot, oprot): + args = reportError_args() + args.read(iprot) + iprot.readMessageEnd() + result = reportError_result() + try: + result.success = self._handler.reportError(args.message, args.details) + msg_type = TMessageType.REPLY + except TTransport.TTransportException: + raise + except pangramia.yt.exceptions.ttypes.PBServiceException as serviceExp: + msg_type = TMessageType.REPLY + result.serviceExp = serviceExp + except pangramia.yt.exceptions.ttypes.PBUserException as userExp: + msg_type = TMessageType.REPLY + result.userExp = userExp + except TApplicationException as ex: + logging.exception('TApplication exception in handler') + msg_type = TMessageType.EXCEPTION + result = ex + except Exception: + logging.exception('Unexpected exception in handler') + msg_type = TMessageType.EXCEPTION + result = TApplicationException(TApplicationException.INTERNAL_ERROR, 'Internal error') + oprot.writeMessageBegin("reportError", msg_type, seqid) + result.write(oprot) + oprot.writeMessageEnd() + oprot.trans.flush() + + def process_shutdown(self, seqid, iprot, oprot): + args = shutdown_args() + args.read(iprot) + iprot.readMessageEnd() + try: + self._handler.shutdown() + except TTransport.TTransportException: + raise + except Exception: + logging.exception('Exception in oneway handler') + +# HELPER FUNCTIONS AND STRUCTURES + + +class ping_args(object): + + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('ping_args') + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) +all_structs.append(ping_args) +ping_args.thrift_spec = ( +) + + +class ping_result(object): + """ + Attributes: + - success + - serviceExp + - userExp + + """ + + + def __init__(self, success=None, serviceExp=None, userExp=None,): + self.success = success + self.serviceExp = serviceExp + self.userExp = userExp + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 0: + if ftype == TType.BOOL: + self.success = iprot.readBool() + else: + iprot.skip(ftype) + elif fid == 1: + if ftype == TType.STRUCT: + self.serviceExp = pangramia.yt.exceptions.ttypes.PBServiceException.read(iprot) + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRUCT: + self.userExp = pangramia.yt.exceptions.ttypes.PBUserException.read(iprot) + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('ping_result') + if self.success is not None: + oprot.writeFieldBegin('success', TType.BOOL, 0) + oprot.writeBool(self.success) + oprot.writeFieldEnd() + if self.serviceExp is not None: + oprot.writeFieldBegin('serviceExp', TType.STRUCT, 1) + self.serviceExp.write(oprot) + oprot.writeFieldEnd() + if self.userExp is not None: + oprot.writeFieldBegin('userExp', TType.STRUCT, 2) + self.userExp.write(oprot) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) +all_structs.append(ping_result) +ping_result.thrift_spec = ( + (0, TType.BOOL, 'success', None, None, ), # 0 + (1, TType.STRUCT, 'serviceExp', [pangramia.yt.exceptions.ttypes.PBServiceException, None], None, ), # 1 + (2, TType.STRUCT, 'userExp', [pangramia.yt.exceptions.ttypes.PBUserException, None], None, ), # 2 +) + + +class reportError_args(object): + """ + Attributes: + - message + - details + + """ + + + def __init__(self, message=None, details=None,): + self.message = message + self.details = details + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 1: + if ftype == TType.STRING: + self.message = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.MAP: + self.details = {} + (_ktype1, _vtype2, _size0) = iprot.readMapBegin() + for _i4 in range(_size0): + _key5 = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + _val6 = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + self.details[_key5] = _val6 + iprot.readMapEnd() + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('reportError_args') + if self.message is not None: + oprot.writeFieldBegin('message', TType.STRING, 1) + oprot.writeString(self.message.encode('utf-8') if sys.version_info[0] == 2 else self.message) + oprot.writeFieldEnd() + if self.details is not None: + oprot.writeFieldBegin('details', TType.MAP, 2) + oprot.writeMapBegin(TType.STRING, TType.STRING, len(self.details)) + for kiter7, viter8 in self.details.items(): + oprot.writeString(kiter7.encode('utf-8') if sys.version_info[0] == 2 else kiter7) + oprot.writeString(viter8.encode('utf-8') if sys.version_info[0] == 2 else viter8) + oprot.writeMapEnd() + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) +all_structs.append(reportError_args) +reportError_args.thrift_spec = ( + None, # 0 + (1, TType.STRING, 'message', 'UTF8', None, ), # 1 + (2, TType.MAP, 'details', (TType.STRING, 'UTF8', TType.STRING, 'UTF8', False), None, ), # 2 +) + + +class reportError_result(object): + """ + Attributes: + - success + - serviceExp + - userExp + + """ + + + def __init__(self, success=None, serviceExp=None, userExp=None,): + self.success = success + self.serviceExp = serviceExp + self.userExp = userExp + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 0: + if ftype == TType.BOOL: + self.success = iprot.readBool() + else: + iprot.skip(ftype) + elif fid == 1: + if ftype == TType.STRUCT: + self.serviceExp = pangramia.yt.exceptions.ttypes.PBServiceException.read(iprot) + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRUCT: + self.userExp = pangramia.yt.exceptions.ttypes.PBUserException.read(iprot) + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('reportError_result') + if self.success is not None: + oprot.writeFieldBegin('success', TType.BOOL, 0) + oprot.writeBool(self.success) + oprot.writeFieldEnd() + if self.serviceExp is not None: + oprot.writeFieldBegin('serviceExp', TType.STRUCT, 1) + self.serviceExp.write(oprot) + oprot.writeFieldEnd() + if self.userExp is not None: + oprot.writeFieldBegin('userExp', TType.STRUCT, 2) + self.userExp.write(oprot) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) +all_structs.append(reportError_result) +reportError_result.thrift_spec = ( + (0, TType.BOOL, 'success', None, None, ), # 0 + (1, TType.STRUCT, 'serviceExp', [pangramia.yt.exceptions.ttypes.PBServiceException, None], None, ), # 1 + (2, TType.STRUCT, 'userExp', [pangramia.yt.exceptions.ttypes.PBUserException, None], None, ), # 2 +) + + +class shutdown_args(object): + + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('shutdown_args') + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) +all_structs.append(shutdown_args) +shutdown_args.thrift_spec = ( +) +fix_spec(all_structs) +del all_structs diff --git a/pangramia/base_service/__init__.py b/pangramia/base_service/__init__.py new file mode 100644 index 0000000..f8be3f5 --- /dev/null +++ b/pangramia/base_service/__init__.py @@ -0,0 +1 @@ +__all__ = ['ttypes', 'constants', 'BaseService'] diff --git a/pangramia/base_service/constants.py b/pangramia/base_service/constants.py new file mode 100644 index 0000000..09a78b3 --- /dev/null +++ b/pangramia/base_service/constants.py @@ -0,0 +1,14 @@ +# +# Autogenerated by Thrift Compiler (0.20.0) +# +# DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING +# +# options string: py +# + +from thrift.Thrift import TType, TMessageType, TFrozenDict, TException, TApplicationException +from thrift.protocol.TProtocol import TProtocolException +from thrift.TRecursive import fix_spec + +import sys +from .ttypes import * diff --git a/pangramia/base_service/ttypes.py b/pangramia/base_service/ttypes.py new file mode 100644 index 0000000..3bfb47f --- /dev/null +++ b/pangramia/base_service/ttypes.py @@ -0,0 +1,20 @@ +# +# Autogenerated by Thrift Compiler (0.20.0) +# +# DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING +# +# options string: py +# + +from thrift.Thrift import TType, TMessageType, TFrozenDict, TException, TApplicationException +from thrift.protocol.TProtocol import TProtocolException +from thrift.TRecursive import fix_spec + +import sys +import pangramia.yt.common.ttypes +import pangramia.yt.exceptions.ttypes + +from thrift.transport import TTransport +all_structs = [] +fix_spec(all_structs) +del all_structs diff --git a/pangramia/yt/__init__.py b/pangramia/yt/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pangramia/yt/common/__init__.py b/pangramia/yt/common/__init__.py new file mode 100644 index 0000000..adefd8e --- /dev/null +++ b/pangramia/yt/common/__init__.py @@ -0,0 +1 @@ +__all__ = ['ttypes', 'constants'] diff --git a/pangramia/yt/common/constants.py b/pangramia/yt/common/constants.py new file mode 100644 index 0000000..09a78b3 --- /dev/null +++ b/pangramia/yt/common/constants.py @@ -0,0 +1,14 @@ +# +# Autogenerated by Thrift Compiler (0.20.0) +# +# DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING +# +# options string: py +# + +from thrift.Thrift import TType, TMessageType, TFrozenDict, TException, TApplicationException +from thrift.protocol.TProtocol import TProtocolException +from thrift.TRecursive import fix_spec + +import sys +from .ttypes import * diff --git a/pangramia/yt/common/ttypes.py b/pangramia/yt/common/ttypes.py new file mode 100644 index 0000000..063c837 --- /dev/null +++ b/pangramia/yt/common/ttypes.py @@ -0,0 +1,1403 @@ +# +# Autogenerated by Thrift Compiler (0.20.0) +# +# DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING +# +# options string: py +# + +from thrift.Thrift import TType, TMessageType, TFrozenDict, TException, TApplicationException +from thrift.protocol.TProtocol import TProtocolException +from thrift.TRecursive import fix_spec + +import sys + +from thrift.transport import TTransport +all_structs = [] + + +class ErrorCode(object): + """ + Standard error codes for service exceptions. + + """ + UNKNOWN = 0 + NOT_IMPLEMENTED = 1 + INTERNAL_ERROR = 2 + INVALID_REQUEST = 3 + PROXY_UNAVAILABLE = 4 + ACCOUNT_UNAVAILABLE = 5 + BOT_DETECTED = 6 + BOT_DETECTION_SIGN_IN_REQUIRED = 7 + SABR_STREAMING_DETECTED = 8 + + _VALUES_TO_NAMES = { + 0: "UNKNOWN", + 1: "NOT_IMPLEMENTED", + 2: "INTERNAL_ERROR", + 3: "INVALID_REQUEST", + 4: "PROXY_UNAVAILABLE", + 5: "ACCOUNT_UNAVAILABLE", + 6: "BOT_DETECTED", + 7: "BOT_DETECTION_SIGN_IN_REQUIRED", + 8: "SABR_STREAMING_DETECTED", + } + + _NAMES_TO_VALUES = { + "UNKNOWN": 0, + "NOT_IMPLEMENTED": 1, + "INTERNAL_ERROR": 2, + "INVALID_REQUEST": 3, + "PROXY_UNAVAILABLE": 4, + "ACCOUNT_UNAVAILABLE": 5, + "BOT_DETECTED": 6, + "BOT_DETECTION_SIGN_IN_REQUIRED": 7, + "SABR_STREAMING_DETECTED": 8, + } + + +class JobState(object): + SUCCESS = 0 + FAIL = 1 + BOT_FORBIDDEN_ON_URL_ACCESS = 2 + BOT_FORBIDDEN_ON_FILE_DOWNLOAD = 3 + BOT_CAPTCHA = 4 + BOT_AUTH_RELOGIN_REQUIRED = 5 + BOT_AUTH_SMS_REQUIRED = 6 + BOT_AUTH_DEVICE_QR_REQUIRED = 7 + BOT_ACCOUNT_BANNED = 8 + BOT_IP_BANNED = 9 + + _VALUES_TO_NAMES = { + 0: "SUCCESS", + 1: "FAIL", + 2: "BOT_FORBIDDEN_ON_URL_ACCESS", + 3: "BOT_FORBIDDEN_ON_FILE_DOWNLOAD", + 4: "BOT_CAPTCHA", + 5: "BOT_AUTH_RELOGIN_REQUIRED", + 6: "BOT_AUTH_SMS_REQUIRED", + 7: "BOT_AUTH_DEVICE_QR_REQUIRED", + 8: "BOT_ACCOUNT_BANNED", + 9: "BOT_IP_BANNED", + } + + _NAMES_TO_VALUES = { + "SUCCESS": 0, + "FAIL": 1, + "BOT_FORBIDDEN_ON_URL_ACCESS": 2, + "BOT_FORBIDDEN_ON_FILE_DOWNLOAD": 3, + "BOT_CAPTCHA": 4, + "BOT_AUTH_RELOGIN_REQUIRED": 5, + "BOT_AUTH_SMS_REQUIRED": 6, + "BOT_AUTH_DEVICE_QR_REQUIRED": 7, + "BOT_ACCOUNT_BANNED": 8, + "BOT_IP_BANNED": 9, + } + + +class TokenUpdateMode(object): + AUTOREFRESH_AND_REMAIN_ANONYMOUS = 0 + AUTOREFRESH_AND_ALLOW_AUTH = 1 + AUTOREFRESH_AND_ONLY_AUTH = 2 + CLEANUP_THEN_AUTOREFRESH_AND_ONLY_AUTH = 3 + CLEANUP_THEN_AUTOREFRESH_AND_REMAIN_ANONYMOUS = 4 + CLEANUP_THEN_AUTOREFRESH_AND_ALLOW_AUTH = 5 + AUTO = 6 + + _VALUES_TO_NAMES = { + 0: "AUTOREFRESH_AND_REMAIN_ANONYMOUS", + 1: "AUTOREFRESH_AND_ALLOW_AUTH", + 2: "AUTOREFRESH_AND_ONLY_AUTH", + 3: "CLEANUP_THEN_AUTOREFRESH_AND_ONLY_AUTH", + 4: "CLEANUP_THEN_AUTOREFRESH_AND_REMAIN_ANONYMOUS", + 5: "CLEANUP_THEN_AUTOREFRESH_AND_ALLOW_AUTH", + 6: "AUTO", + } + + _NAMES_TO_VALUES = { + "AUTOREFRESH_AND_REMAIN_ANONYMOUS": 0, + "AUTOREFRESH_AND_ALLOW_AUTH": 1, + "AUTOREFRESH_AND_ONLY_AUTH": 2, + "CLEANUP_THEN_AUTOREFRESH_AND_ONLY_AUTH": 3, + "CLEANUP_THEN_AUTOREFRESH_AND_REMAIN_ANONYMOUS": 4, + "CLEANUP_THEN_AUTOREFRESH_AND_ALLOW_AUTH": 5, + "AUTO": 6, + } + + +class AccountPairState(object): + ACTIVE = 0 + PAUSED = 1 + REMOVED = 2 + IN_PROGRESS = 3 + ALL = 4 + + _VALUES_TO_NAMES = { + 0: "ACTIVE", + 1: "PAUSED", + 2: "REMOVED", + 3: "IN_PROGRESS", + 4: "ALL", + } + + _NAMES_TO_VALUES = { + "ACTIVE": 0, + "PAUSED": 1, + "REMOVED": 2, + "IN_PROGRESS": 3, + "ALL": 4, + } + + +class JobTokenData(object): + """ + Attributes: + - infoJson + - ytdlpCommand + - socks + - jobId + - url + - cookiesBlob + - requestSummary + - communicationLogPaths + - serverVersionInfo + + """ + + + def __init__(self, infoJson=None, ytdlpCommand=None, socks=None, jobId=None, url=None, cookiesBlob=None, requestSummary=None, communicationLogPaths=None, serverVersionInfo=None,): + self.infoJson = infoJson + self.ytdlpCommand = ytdlpCommand + self.socks = socks + self.jobId = jobId + self.url = url + self.cookiesBlob = cookiesBlob + self.requestSummary = requestSummary + self.communicationLogPaths = communicationLogPaths + self.serverVersionInfo = serverVersionInfo + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 1: + if ftype == TType.STRING: + self.infoJson = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRING: + self.ytdlpCommand = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 3: + if ftype == TType.STRING: + self.socks = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 4: + if ftype == TType.STRING: + self.jobId = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 5: + if ftype == TType.STRING: + self.url = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 6: + if ftype == TType.STRING: + self.cookiesBlob = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 7: + if ftype == TType.STRING: + self.requestSummary = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 8: + if ftype == TType.LIST: + self.communicationLogPaths = [] + (_etype3, _size0) = iprot.readListBegin() + for _i4 in range(_size0): + _elem5 = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + self.communicationLogPaths.append(_elem5) + iprot.readListEnd() + else: + iprot.skip(ftype) + elif fid == 9: + if ftype == TType.STRING: + self.serverVersionInfo = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('JobTokenData') + if self.infoJson is not None: + oprot.writeFieldBegin('infoJson', TType.STRING, 1) + oprot.writeString(self.infoJson.encode('utf-8') if sys.version_info[0] == 2 else self.infoJson) + oprot.writeFieldEnd() + if self.ytdlpCommand is not None: + oprot.writeFieldBegin('ytdlpCommand', TType.STRING, 2) + oprot.writeString(self.ytdlpCommand.encode('utf-8') if sys.version_info[0] == 2 else self.ytdlpCommand) + oprot.writeFieldEnd() + if self.socks is not None: + oprot.writeFieldBegin('socks', TType.STRING, 3) + oprot.writeString(self.socks.encode('utf-8') if sys.version_info[0] == 2 else self.socks) + oprot.writeFieldEnd() + if self.jobId is not None: + oprot.writeFieldBegin('jobId', TType.STRING, 4) + oprot.writeString(self.jobId.encode('utf-8') if sys.version_info[0] == 2 else self.jobId) + oprot.writeFieldEnd() + if self.url is not None: + oprot.writeFieldBegin('url', TType.STRING, 5) + oprot.writeString(self.url.encode('utf-8') if sys.version_info[0] == 2 else self.url) + oprot.writeFieldEnd() + if self.cookiesBlob is not None: + oprot.writeFieldBegin('cookiesBlob', TType.STRING, 6) + oprot.writeString(self.cookiesBlob.encode('utf-8') if sys.version_info[0] == 2 else self.cookiesBlob) + oprot.writeFieldEnd() + if self.requestSummary is not None: + oprot.writeFieldBegin('requestSummary', TType.STRING, 7) + oprot.writeString(self.requestSummary.encode('utf-8') if sys.version_info[0] == 2 else self.requestSummary) + oprot.writeFieldEnd() + if self.communicationLogPaths is not None: + oprot.writeFieldBegin('communicationLogPaths', TType.LIST, 8) + oprot.writeListBegin(TType.STRING, len(self.communicationLogPaths)) + for iter6 in self.communicationLogPaths: + oprot.writeString(iter6.encode('utf-8') if sys.version_info[0] == 2 else iter6) + oprot.writeListEnd() + oprot.writeFieldEnd() + if self.serverVersionInfo is not None: + oprot.writeFieldBegin('serverVersionInfo', TType.STRING, 9) + oprot.writeString(self.serverVersionInfo.encode('utf-8') if sys.version_info[0] == 2 else self.serverVersionInfo) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) + + +class AccountData(object): + """ + Attributes: + - username + - password + - countryCode + + """ + + + def __init__(self, username=None, password=None, countryCode=None,): + self.username = username + self.password = password + self.countryCode = countryCode + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 1: + if ftype == TType.STRING: + self.username = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRING: + self.password = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 3: + if ftype == TType.STRING: + self.countryCode = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('AccountData') + if self.username is not None: + oprot.writeFieldBegin('username', TType.STRING, 1) + oprot.writeString(self.username.encode('utf-8') if sys.version_info[0] == 2 else self.username) + oprot.writeFieldEnd() + if self.password is not None: + oprot.writeFieldBegin('password', TType.STRING, 2) + oprot.writeString(self.password.encode('utf-8') if sys.version_info[0] == 2 else self.password) + oprot.writeFieldEnd() + if self.countryCode is not None: + oprot.writeFieldBegin('countryCode', TType.STRING, 3) + oprot.writeString(self.countryCode.encode('utf-8') if sys.version_info[0] == 2 else self.countryCode) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + if self.username is None: + raise TProtocolException(message='Required field username is unset!') + if self.password is None: + raise TProtocolException(message='Required field password is unset!') + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) + + +class ProxyData(object): + """ + Attributes: + - proxyUrl + - countryCode + + """ + + + def __init__(self, proxyUrl=None, countryCode=None,): + self.proxyUrl = proxyUrl + self.countryCode = countryCode + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 1: + if ftype == TType.STRING: + self.proxyUrl = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRING: + self.countryCode = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('ProxyData') + if self.proxyUrl is not None: + oprot.writeFieldBegin('proxyUrl', TType.STRING, 1) + oprot.writeString(self.proxyUrl.encode('utf-8') if sys.version_info[0] == 2 else self.proxyUrl) + oprot.writeFieldEnd() + if self.countryCode is not None: + oprot.writeFieldBegin('countryCode', TType.STRING, 2) + oprot.writeString(self.countryCode.encode('utf-8') if sys.version_info[0] == 2 else self.countryCode) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + if self.proxyUrl is None: + raise TProtocolException(message='Required field proxyUrl is unset!') + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) + + +class AccountPairWithState(object): + """ + Attributes: + - accountId + - proxyId + - accountPairState + - machineId + + """ + + + def __init__(self, accountId=None, proxyId=None, accountPairState=None, machineId=None,): + self.accountId = accountId + self.proxyId = proxyId + self.accountPairState = accountPairState + self.machineId = machineId + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 1: + if ftype == TType.STRING: + self.accountId = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRING: + self.proxyId = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 3: + if ftype == TType.I32: + self.accountPairState = iprot.readI32() + else: + iprot.skip(ftype) + elif fid == 4: + if ftype == TType.STRING: + self.machineId = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('AccountPairWithState') + if self.accountId is not None: + oprot.writeFieldBegin('accountId', TType.STRING, 1) + oprot.writeString(self.accountId.encode('utf-8') if sys.version_info[0] == 2 else self.accountId) + oprot.writeFieldEnd() + if self.proxyId is not None: + oprot.writeFieldBegin('proxyId', TType.STRING, 2) + oprot.writeString(self.proxyId.encode('utf-8') if sys.version_info[0] == 2 else self.proxyId) + oprot.writeFieldEnd() + if self.accountPairState is not None: + oprot.writeFieldBegin('accountPairState', TType.I32, 3) + oprot.writeI32(self.accountPairState) + oprot.writeFieldEnd() + if self.machineId is not None: + oprot.writeFieldBegin('machineId', TType.STRING, 4) + oprot.writeString(self.machineId.encode('utf-8') if sys.version_info[0] == 2 else self.machineId) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + if self.accountId is None: + raise TProtocolException(message='Required field accountId is unset!') + if self.proxyId is None: + raise TProtocolException(message='Required field proxyId is unset!') + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) + + +class JobData(object): + """ + Attributes: + - jobId + - url + - cookiesBlob + - potoken + - visitorId + - ytdlpCommand + - createdTime + - telemetry + - state + - errorMessage + - socks5Id + + """ + + + def __init__(self, jobId=None, url=None, cookiesBlob=None, potoken=None, visitorId=None, ytdlpCommand=None, createdTime=None, telemetry=None, state=None, errorMessage=None, socks5Id=None,): + self.jobId = jobId + self.url = url + self.cookiesBlob = cookiesBlob + self.potoken = potoken + self.visitorId = visitorId + self.ytdlpCommand = ytdlpCommand + self.createdTime = createdTime + self.telemetry = telemetry + self.state = state + self.errorMessage = errorMessage + self.socks5Id = socks5Id + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 1: + if ftype == TType.STRING: + self.jobId = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRING: + self.url = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 3: + if ftype == TType.STRING: + self.cookiesBlob = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 4: + if ftype == TType.STRING: + self.potoken = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 5: + if ftype == TType.STRING: + self.visitorId = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 6: + if ftype == TType.STRING: + self.ytdlpCommand = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 7: + if ftype == TType.STRING: + self.createdTime = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 8: + if ftype == TType.MAP: + self.telemetry = {} + (_ktype8, _vtype9, _size7) = iprot.readMapBegin() + for _i11 in range(_size7): + _key12 = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + _val13 = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + self.telemetry[_key12] = _val13 + iprot.readMapEnd() + else: + iprot.skip(ftype) + elif fid == 9: + if ftype == TType.I32: + self.state = iprot.readI32() + else: + iprot.skip(ftype) + elif fid == 10: + if ftype == TType.STRING: + self.errorMessage = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 11: + if ftype == TType.STRING: + self.socks5Id = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('JobData') + if self.jobId is not None: + oprot.writeFieldBegin('jobId', TType.STRING, 1) + oprot.writeString(self.jobId.encode('utf-8') if sys.version_info[0] == 2 else self.jobId) + oprot.writeFieldEnd() + if self.url is not None: + oprot.writeFieldBegin('url', TType.STRING, 2) + oprot.writeString(self.url.encode('utf-8') if sys.version_info[0] == 2 else self.url) + oprot.writeFieldEnd() + if self.cookiesBlob is not None: + oprot.writeFieldBegin('cookiesBlob', TType.STRING, 3) + oprot.writeString(self.cookiesBlob.encode('utf-8') if sys.version_info[0] == 2 else self.cookiesBlob) + oprot.writeFieldEnd() + if self.potoken is not None: + oprot.writeFieldBegin('potoken', TType.STRING, 4) + oprot.writeString(self.potoken.encode('utf-8') if sys.version_info[0] == 2 else self.potoken) + oprot.writeFieldEnd() + if self.visitorId is not None: + oprot.writeFieldBegin('visitorId', TType.STRING, 5) + oprot.writeString(self.visitorId.encode('utf-8') if sys.version_info[0] == 2 else self.visitorId) + oprot.writeFieldEnd() + if self.ytdlpCommand is not None: + oprot.writeFieldBegin('ytdlpCommand', TType.STRING, 6) + oprot.writeString(self.ytdlpCommand.encode('utf-8') if sys.version_info[0] == 2 else self.ytdlpCommand) + oprot.writeFieldEnd() + if self.createdTime is not None: + oprot.writeFieldBegin('createdTime', TType.STRING, 7) + oprot.writeString(self.createdTime.encode('utf-8') if sys.version_info[0] == 2 else self.createdTime) + oprot.writeFieldEnd() + if self.telemetry is not None: + oprot.writeFieldBegin('telemetry', TType.MAP, 8) + oprot.writeMapBegin(TType.STRING, TType.STRING, len(self.telemetry)) + for kiter14, viter15 in self.telemetry.items(): + oprot.writeString(kiter14.encode('utf-8') if sys.version_info[0] == 2 else kiter14) + oprot.writeString(viter15.encode('utf-8') if sys.version_info[0] == 2 else viter15) + oprot.writeMapEnd() + oprot.writeFieldEnd() + if self.state is not None: + oprot.writeFieldBegin('state', TType.I32, 9) + oprot.writeI32(self.state) + oprot.writeFieldEnd() + if self.errorMessage is not None: + oprot.writeFieldBegin('errorMessage', TType.STRING, 10) + oprot.writeString(self.errorMessage.encode('utf-8') if sys.version_info[0] == 2 else self.errorMessage) + oprot.writeFieldEnd() + if self.socks5Id is not None: + oprot.writeFieldBegin('socks5Id', TType.STRING, 11) + oprot.writeString(self.socks5Id.encode('utf-8') if sys.version_info[0] == 2 else self.socks5Id) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + if self.jobId is None: + raise TProtocolException(message='Required field jobId is unset!') + if self.url is None: + raise TProtocolException(message='Required field url is unset!') + if self.cookiesBlob is None: + raise TProtocolException(message='Required field cookiesBlob is unset!') + if self.potoken is None: + raise TProtocolException(message='Required field potoken is unset!') + if self.visitorId is None: + raise TProtocolException(message='Required field visitorId is unset!') + if self.ytdlpCommand is None: + raise TProtocolException(message='Required field ytdlpCommand is unset!') + if self.createdTime is None: + raise TProtocolException(message='Required field createdTime is unset!') + if self.telemetry is None: + raise TProtocolException(message='Required field telemetry is unset!') + if self.state is None: + raise TProtocolException(message='Required field state is unset!') + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) + + +class RichCollectionPagination(object): + """ + Attributes: + - hasNext + - totalCount + - page + - pageSize + + """ + + + def __init__(self, hasNext=None, totalCount=None, page=None, pageSize=None,): + self.hasNext = hasNext + self.totalCount = totalCount + self.page = page + self.pageSize = pageSize + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 1: + if ftype == TType.BOOL: + self.hasNext = iprot.readBool() + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.I32: + self.totalCount = iprot.readI32() + else: + iprot.skip(ftype) + elif fid == 3: + if ftype == TType.I32: + self.page = iprot.readI32() + else: + iprot.skip(ftype) + elif fid == 4: + if ftype == TType.I32: + self.pageSize = iprot.readI32() + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('RichCollectionPagination') + if self.hasNext is not None: + oprot.writeFieldBegin('hasNext', TType.BOOL, 1) + oprot.writeBool(self.hasNext) + oprot.writeFieldEnd() + if self.totalCount is not None: + oprot.writeFieldBegin('totalCount', TType.I32, 2) + oprot.writeI32(self.totalCount) + oprot.writeFieldEnd() + if self.page is not None: + oprot.writeFieldBegin('page', TType.I32, 3) + oprot.writeI32(self.page) + oprot.writeFieldEnd() + if self.pageSize is not None: + oprot.writeFieldBegin('pageSize', TType.I32, 4) + oprot.writeI32(self.pageSize) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + if self.hasNext is None: + raise TProtocolException(message='Required field hasNext is unset!') + if self.totalCount is None: + raise TProtocolException(message='Required field totalCount is unset!') + if self.page is None: + raise TProtocolException(message='Required field page is unset!') + if self.pageSize is None: + raise TProtocolException(message='Required field pageSize is unset!') + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) + + +class RichCollectionJobData(object): + """ + Attributes: + - items + - pagination + + """ + + + def __init__(self, items=None, pagination=None,): + self.items = items + self.pagination = pagination + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 1: + if ftype == TType.LIST: + self.items = [] + (_etype19, _size16) = iprot.readListBegin() + for _i20 in range(_size16): + _elem21 = JobData() + _elem21.read(iprot) + self.items.append(_elem21) + iprot.readListEnd() + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRUCT: + self.pagination = RichCollectionPagination() + self.pagination.read(iprot) + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('RichCollectionJobData') + if self.items is not None: + oprot.writeFieldBegin('items', TType.LIST, 1) + oprot.writeListBegin(TType.STRUCT, len(self.items)) + for iter22 in self.items: + iter22.write(oprot) + oprot.writeListEnd() + oprot.writeFieldEnd() + if self.pagination is not None: + oprot.writeFieldBegin('pagination', TType.STRUCT, 2) + self.pagination.write(oprot) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + if self.items is None: + raise TProtocolException(message='Required field items is unset!') + if self.pagination is None: + raise TProtocolException(message='Required field pagination is unset!') + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) + + +class ProxyStatus(object): + """ + Attributes: + - proxyUrl + - status + - successCount + - failureCount + - lastFailureTimestamp + - lastSuccessTimestamp + - serverIdentity + + """ + + + def __init__(self, proxyUrl=None, status=None, successCount=None, failureCount=None, lastFailureTimestamp=None, lastSuccessTimestamp=None, serverIdentity=None,): + self.proxyUrl = proxyUrl + self.status = status + self.successCount = successCount + self.failureCount = failureCount + self.lastFailureTimestamp = lastFailureTimestamp + self.lastSuccessTimestamp = lastSuccessTimestamp + self.serverIdentity = serverIdentity + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 1: + if ftype == TType.STRING: + self.proxyUrl = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRING: + self.status = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 3: + if ftype == TType.I64: + self.successCount = iprot.readI64() + else: + iprot.skip(ftype) + elif fid == 4: + if ftype == TType.I64: + self.failureCount = iprot.readI64() + else: + iprot.skip(ftype) + elif fid == 5: + if ftype == TType.STRING: + self.lastFailureTimestamp = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 6: + if ftype == TType.STRING: + self.lastSuccessTimestamp = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 7: + if ftype == TType.STRING: + self.serverIdentity = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('ProxyStatus') + if self.proxyUrl is not None: + oprot.writeFieldBegin('proxyUrl', TType.STRING, 1) + oprot.writeString(self.proxyUrl.encode('utf-8') if sys.version_info[0] == 2 else self.proxyUrl) + oprot.writeFieldEnd() + if self.status is not None: + oprot.writeFieldBegin('status', TType.STRING, 2) + oprot.writeString(self.status.encode('utf-8') if sys.version_info[0] == 2 else self.status) + oprot.writeFieldEnd() + if self.successCount is not None: + oprot.writeFieldBegin('successCount', TType.I64, 3) + oprot.writeI64(self.successCount) + oprot.writeFieldEnd() + if self.failureCount is not None: + oprot.writeFieldBegin('failureCount', TType.I64, 4) + oprot.writeI64(self.failureCount) + oprot.writeFieldEnd() + if self.lastFailureTimestamp is not None: + oprot.writeFieldBegin('lastFailureTimestamp', TType.STRING, 5) + oprot.writeString(self.lastFailureTimestamp.encode('utf-8') if sys.version_info[0] == 2 else self.lastFailureTimestamp) + oprot.writeFieldEnd() + if self.lastSuccessTimestamp is not None: + oprot.writeFieldBegin('lastSuccessTimestamp', TType.STRING, 6) + oprot.writeString(self.lastSuccessTimestamp.encode('utf-8') if sys.version_info[0] == 2 else self.lastSuccessTimestamp) + oprot.writeFieldEnd() + if self.serverIdentity is not None: + oprot.writeFieldBegin('serverIdentity', TType.STRING, 7) + oprot.writeString(self.serverIdentity.encode('utf-8') if sys.version_info[0] == 2 else self.serverIdentity) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) + + +class AccountStatus(object): + """ + Attributes: + - accountId + - status + - successCount + - failureCount + - lastFailureTimestamp + - lastSuccessTimestamp + - lastUsedProxy + - lastUsedMachine + + """ + + + def __init__(self, accountId=None, status=None, successCount=None, failureCount=None, lastFailureTimestamp=None, lastSuccessTimestamp=None, lastUsedProxy=None, lastUsedMachine=None,): + self.accountId = accountId + self.status = status + self.successCount = successCount + self.failureCount = failureCount + self.lastFailureTimestamp = lastFailureTimestamp + self.lastSuccessTimestamp = lastSuccessTimestamp + self.lastUsedProxy = lastUsedProxy + self.lastUsedMachine = lastUsedMachine + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 1: + if ftype == TType.STRING: + self.accountId = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRING: + self.status = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 3: + if ftype == TType.I64: + self.successCount = iprot.readI64() + else: + iprot.skip(ftype) + elif fid == 4: + if ftype == TType.I64: + self.failureCount = iprot.readI64() + else: + iprot.skip(ftype) + elif fid == 5: + if ftype == TType.STRING: + self.lastFailureTimestamp = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 6: + if ftype == TType.STRING: + self.lastSuccessTimestamp = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 7: + if ftype == TType.STRING: + self.lastUsedProxy = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 8: + if ftype == TType.STRING: + self.lastUsedMachine = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('AccountStatus') + if self.accountId is not None: + oprot.writeFieldBegin('accountId', TType.STRING, 1) + oprot.writeString(self.accountId.encode('utf-8') if sys.version_info[0] == 2 else self.accountId) + oprot.writeFieldEnd() + if self.status is not None: + oprot.writeFieldBegin('status', TType.STRING, 2) + oprot.writeString(self.status.encode('utf-8') if sys.version_info[0] == 2 else self.status) + oprot.writeFieldEnd() + if self.successCount is not None: + oprot.writeFieldBegin('successCount', TType.I64, 3) + oprot.writeI64(self.successCount) + oprot.writeFieldEnd() + if self.failureCount is not None: + oprot.writeFieldBegin('failureCount', TType.I64, 4) + oprot.writeI64(self.failureCount) + oprot.writeFieldEnd() + if self.lastFailureTimestamp is not None: + oprot.writeFieldBegin('lastFailureTimestamp', TType.STRING, 5) + oprot.writeString(self.lastFailureTimestamp.encode('utf-8') if sys.version_info[0] == 2 else self.lastFailureTimestamp) + oprot.writeFieldEnd() + if self.lastSuccessTimestamp is not None: + oprot.writeFieldBegin('lastSuccessTimestamp', TType.STRING, 6) + oprot.writeString(self.lastSuccessTimestamp.encode('utf-8') if sys.version_info[0] == 2 else self.lastSuccessTimestamp) + oprot.writeFieldEnd() + if self.lastUsedProxy is not None: + oprot.writeFieldBegin('lastUsedProxy', TType.STRING, 7) + oprot.writeString(self.lastUsedProxy.encode('utf-8') if sys.version_info[0] == 2 else self.lastUsedProxy) + oprot.writeFieldEnd() + if self.lastUsedMachine is not None: + oprot.writeFieldBegin('lastUsedMachine', TType.STRING, 8) + oprot.writeString(self.lastUsedMachine.encode('utf-8') if sys.version_info[0] == 2 else self.lastUsedMachine) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) + + +class AirflowLogContext(object): + """ + Attributes: + - logS3Path + - dagId + - runId + - taskId + - tryNumber + - workerHostname + - queue + + """ + + + def __init__(self, logS3Path=None, dagId=None, runId=None, taskId=None, tryNumber=None, workerHostname=None, queue=None,): + self.logS3Path = logS3Path + self.dagId = dagId + self.runId = runId + self.taskId = taskId + self.tryNumber = tryNumber + self.workerHostname = workerHostname + self.queue = queue + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 1: + if ftype == TType.STRING: + self.logS3Path = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRING: + self.dagId = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 3: + if ftype == TType.STRING: + self.runId = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 4: + if ftype == TType.STRING: + self.taskId = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 5: + if ftype == TType.I32: + self.tryNumber = iprot.readI32() + else: + iprot.skip(ftype) + elif fid == 6: + if ftype == TType.STRING: + self.workerHostname = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 7: + if ftype == TType.STRING: + self.queue = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('AirflowLogContext') + if self.logS3Path is not None: + oprot.writeFieldBegin('logS3Path', TType.STRING, 1) + oprot.writeString(self.logS3Path.encode('utf-8') if sys.version_info[0] == 2 else self.logS3Path) + oprot.writeFieldEnd() + if self.dagId is not None: + oprot.writeFieldBegin('dagId', TType.STRING, 2) + oprot.writeString(self.dagId.encode('utf-8') if sys.version_info[0] == 2 else self.dagId) + oprot.writeFieldEnd() + if self.runId is not None: + oprot.writeFieldBegin('runId', TType.STRING, 3) + oprot.writeString(self.runId.encode('utf-8') if sys.version_info[0] == 2 else self.runId) + oprot.writeFieldEnd() + if self.taskId is not None: + oprot.writeFieldBegin('taskId', TType.STRING, 4) + oprot.writeString(self.taskId.encode('utf-8') if sys.version_info[0] == 2 else self.taskId) + oprot.writeFieldEnd() + if self.tryNumber is not None: + oprot.writeFieldBegin('tryNumber', TType.I32, 5) + oprot.writeI32(self.tryNumber) + oprot.writeFieldEnd() + if self.workerHostname is not None: + oprot.writeFieldBegin('workerHostname', TType.STRING, 6) + oprot.writeString(self.workerHostname.encode('utf-8') if sys.version_info[0] == 2 else self.workerHostname) + oprot.writeFieldEnd() + if self.queue is not None: + oprot.writeFieldBegin('queue', TType.STRING, 7) + oprot.writeString(self.queue.encode('utf-8') if sys.version_info[0] == 2 else self.queue) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) +all_structs.append(JobTokenData) +JobTokenData.thrift_spec = ( + None, # 0 + (1, TType.STRING, 'infoJson', 'UTF8', None, ), # 1 + (2, TType.STRING, 'ytdlpCommand', 'UTF8', None, ), # 2 + (3, TType.STRING, 'socks', 'UTF8', None, ), # 3 + (4, TType.STRING, 'jobId', 'UTF8', None, ), # 4 + (5, TType.STRING, 'url', 'UTF8', None, ), # 5 + (6, TType.STRING, 'cookiesBlob', 'UTF8', None, ), # 6 + (7, TType.STRING, 'requestSummary', 'UTF8', None, ), # 7 + (8, TType.LIST, 'communicationLogPaths', (TType.STRING, 'UTF8', False), None, ), # 8 + (9, TType.STRING, 'serverVersionInfo', 'UTF8', None, ), # 9 +) +all_structs.append(AccountData) +AccountData.thrift_spec = ( + None, # 0 + (1, TType.STRING, 'username', 'UTF8', None, ), # 1 + (2, TType.STRING, 'password', 'UTF8', None, ), # 2 + (3, TType.STRING, 'countryCode', 'UTF8', None, ), # 3 +) +all_structs.append(ProxyData) +ProxyData.thrift_spec = ( + None, # 0 + (1, TType.STRING, 'proxyUrl', 'UTF8', None, ), # 1 + (2, TType.STRING, 'countryCode', 'UTF8', None, ), # 2 +) +all_structs.append(AccountPairWithState) +AccountPairWithState.thrift_spec = ( + None, # 0 + (1, TType.STRING, 'accountId', 'UTF8', None, ), # 1 + (2, TType.STRING, 'proxyId', 'UTF8', None, ), # 2 + (3, TType.I32, 'accountPairState', None, None, ), # 3 + (4, TType.STRING, 'machineId', 'UTF8', None, ), # 4 +) +all_structs.append(JobData) +JobData.thrift_spec = ( + None, # 0 + (1, TType.STRING, 'jobId', 'UTF8', None, ), # 1 + (2, TType.STRING, 'url', 'UTF8', None, ), # 2 + (3, TType.STRING, 'cookiesBlob', 'UTF8', None, ), # 3 + (4, TType.STRING, 'potoken', 'UTF8', None, ), # 4 + (5, TType.STRING, 'visitorId', 'UTF8', None, ), # 5 + (6, TType.STRING, 'ytdlpCommand', 'UTF8', None, ), # 6 + (7, TType.STRING, 'createdTime', 'UTF8', None, ), # 7 + (8, TType.MAP, 'telemetry', (TType.STRING, 'UTF8', TType.STRING, 'UTF8', False), None, ), # 8 + (9, TType.I32, 'state', None, None, ), # 9 + (10, TType.STRING, 'errorMessage', 'UTF8', None, ), # 10 + (11, TType.STRING, 'socks5Id', 'UTF8', None, ), # 11 +) +all_structs.append(RichCollectionPagination) +RichCollectionPagination.thrift_spec = ( + None, # 0 + (1, TType.BOOL, 'hasNext', None, None, ), # 1 + (2, TType.I32, 'totalCount', None, None, ), # 2 + (3, TType.I32, 'page', None, None, ), # 3 + (4, TType.I32, 'pageSize', None, None, ), # 4 +) +all_structs.append(RichCollectionJobData) +RichCollectionJobData.thrift_spec = ( + None, # 0 + (1, TType.LIST, 'items', (TType.STRUCT, [JobData, None], False), None, ), # 1 + (2, TType.STRUCT, 'pagination', [RichCollectionPagination, None], None, ), # 2 +) +all_structs.append(ProxyStatus) +ProxyStatus.thrift_spec = ( + None, # 0 + (1, TType.STRING, 'proxyUrl', 'UTF8', None, ), # 1 + (2, TType.STRING, 'status', 'UTF8', None, ), # 2 + (3, TType.I64, 'successCount', None, None, ), # 3 + (4, TType.I64, 'failureCount', None, None, ), # 4 + (5, TType.STRING, 'lastFailureTimestamp', 'UTF8', None, ), # 5 + (6, TType.STRING, 'lastSuccessTimestamp', 'UTF8', None, ), # 6 + (7, TType.STRING, 'serverIdentity', 'UTF8', None, ), # 7 +) +all_structs.append(AccountStatus) +AccountStatus.thrift_spec = ( + None, # 0 + (1, TType.STRING, 'accountId', 'UTF8', None, ), # 1 + (2, TType.STRING, 'status', 'UTF8', None, ), # 2 + (3, TType.I64, 'successCount', None, None, ), # 3 + (4, TType.I64, 'failureCount', None, None, ), # 4 + (5, TType.STRING, 'lastFailureTimestamp', 'UTF8', None, ), # 5 + (6, TType.STRING, 'lastSuccessTimestamp', 'UTF8', None, ), # 6 + (7, TType.STRING, 'lastUsedProxy', 'UTF8', None, ), # 7 + (8, TType.STRING, 'lastUsedMachine', 'UTF8', None, ), # 8 +) +all_structs.append(AirflowLogContext) +AirflowLogContext.thrift_spec = ( + None, # 0 + (1, TType.STRING, 'logS3Path', 'UTF8', None, ), # 1 + (2, TType.STRING, 'dagId', 'UTF8', None, ), # 2 + (3, TType.STRING, 'runId', 'UTF8', None, ), # 3 + (4, TType.STRING, 'taskId', 'UTF8', None, ), # 4 + (5, TType.I32, 'tryNumber', None, None, ), # 5 + (6, TType.STRING, 'workerHostname', 'UTF8', None, ), # 6 + (7, TType.STRING, 'queue', 'UTF8', None, ), # 7 +) +fix_spec(all_structs) +del all_structs diff --git a/pangramia/yt/exceptions/__init__.py b/pangramia/yt/exceptions/__init__.py new file mode 100644 index 0000000..adefd8e --- /dev/null +++ b/pangramia/yt/exceptions/__init__.py @@ -0,0 +1 @@ +__all__ = ['ttypes', 'constants'] diff --git a/pangramia/yt/exceptions/__pycache__/__init__.cpython-39.pyc b/pangramia/yt/exceptions/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..32c1ca90e3b38d6abbde51201c4fe00c9606a491 GIT binary patch literal 195 zcmYj}O$x#=5QWn!{$VfSu8W!rNO9-RjXO6(XhIF7N$8AXj^vfP6}*5uSB`?X;hAkVH99rnC&5uIo2T_iO2~mm$hj|k`lJbQ6G8?7<$~l0W z=*Rr?jn__wNHS3sB39Z&7ZUyuO@FbCK^c{Ad(~OR7iq4IiDY?M0$6VeU>(yi+jK#n LovAb4ezL_IIN&wn literal 0 HcmV?d00001 diff --git a/pangramia/yt/exceptions/__pycache__/ttypes.cpython-39.pyc b/pangramia/yt/exceptions/__pycache__/ttypes.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..87cefa9ab242d40a1674f21ab3e7e2c0b46c774b GIT binary patch literal 7599 zcmeHM&2JmW72nw}k}G~mmSsD26V;8A$h9piahwl0ZfwU6+9r%^F*$7FF4ml-v=YBO zyHp~T>LD_kO9Lc8fD}DRz3I|Z&%O21L;rvQ3KT6GzyW&g!AOz*-YiLx6z#N$+oEWe zn78xs_PsZ6-kW)E=;v~dhUdbCPnOet>m@q`A45HE#3NZH=b_bIWk^ zXs3Aw?Tl&{g4}j7$nP*vFu4=3l}SFqv)rs&Jf~XuZmWRS6fdg2Yy$Ztuz*=LgS>Y@ zQ7U|8N(E5L1i39p^k$Bmi`v}O9n#dCC9QIKy%or8`PCqb{G~v(CdyYub0cWH6joyC zl&`!~30iU3Y{-M+2ItoYRGch507RlX*gakJ8_4fdw1;X7W`3MwU_IOVeN8&Rt% z;tDYze9UemT0cei-kWU&WeIaD3%YM#~R@81` zfP6(xcwQ95K4#;2yQVawpjH(*jEFpe0>Cbl8PBWK{3rsBcK6DBvlY*;$ETXD=oAlH zwdQ)%YetE-qWPBJSQ36c^f59Y^vorik7F2Dbh@=J)1DVL!r1e^2_BIRps^|ZHomc6 z>8E?R6*zAH(NUxFGD?&Mh~d&(>|NLj^q`kE9;x_^$K%<0lZVyy*|1)3$NtS)FdH_a z*l$z%C`!F`)D0A~YHYEKq&QaEwWPW1 zN6X?Mu=iPmQq!oQSWWuE^PV6sT`i4Y$po*WaDYq?*&11Z67hEsxWiUp&21*m!4KGq z9vds>mM+dgP%G*0ux(&;;3d?zr0*DuXl>g&8u`Jdj$)vg=u7QrZyB4`roEZ!*l~I# zv#N{g2kh3bJ1Ndk|CQ=>Q2S*(v6AKb5se#Y{T$!scC?i|Hf`)P@9Paa>Dkm94r+yt zO&f9JGteRYzxC;SIz;D>_oQPC(II(KtNI5_Y%5AdRsSeVW-+N|k>1Q~I`LG;*1aoeoJlFnf$o)A+6Lqjr#LSP{Kr zbWEOpA8WFkDcvCp6a?}-Bfvxtb=nn0FbH=E}&A3^w>PInL zp@RKXI$_lFU}?p4goq42lv9_l2R3@HCAViEC`G*#-d$`F9l0sL*}XD zY8ceGY6}uinv3Pvu1R~beEoZ+m!%P{M>4w>hzJK9cC2bsI$bhhV@cXwnapT4Y)RwQ zi`U#N4ZrHQ5*8NZTMAtD4LYwhxKs#=E46ZaLZLaAmqw))Nh@sO_#DxMw5koi9^gg0 zs5Gh}cT?UfH5O1=qso)6?M2~6aK<&f@T_ZjtHJuyc&+)hbE8gc66^0^&e2r>O+Ut_ zS(=&j)AP*1I}Lb3&-ULA%c3_AcmlsHbM&n404L4#vObR;`jlj`$75!kR!idEgwJ>6o$m@a$#WQ6VqKOC_2AabW)0rHI~kA@NHp$j*7_!=oE<#9I~Y- zr9o+uXS$T8h^{IJK&80FR6lCuN^tAzZ|R%*Ce3_Ec2Ik=qjz~y5I{5EkMl;y=;IiK z12@hBFLBP^OxgB~nlalxL~lAW=bVxX3KFZM<3>5Hc#LX@rw3vJaRSZV!#&3g8swN_ zo^Vb9Nb7A8#(_FNWKq=z+)mM#1=T&`eBvqkdZPCQJWQM=qEAz|ov@L)y)WT0dx460 z4p?q7+V?-B7Bt{ZaZr!kgl^Xh^Ltbl<7P`H?LFLo^kCpdOthj zN#Z4Negh>s382A^j>1jIF^cRljkE>VKm!rAV&Gt98*hxh(|?wT>bHL`hekclvy-S@CPp1yp)p-?fiNPaKct?&>FJ z5xpTAOWX5;Tb}o0AodL7 z$XI<5);_WN=1BPu4i-d!S*)%y$b?fsvh3&CfbM?sg**vs&M1{iyOY8LwsbrX2}rv} zn$LRPt+rq5t^+KbH!IK|IkZYK1cm3~+w|FlO1SDlyxipC5_QuaBYsGrOrSzwiNG>| z%ym<=N>HnTyB9x_dO{<{fz z;o7Cwkz@9=aY0u)|F&FOP*RC_w1}5z5}-U#Oc79ORa#cMr6v3s%3vakt$j$?FOkLG zKRs09d`Mi$sIfmH;Tq-mz;jY%qaWR$=>thb6OWt)r*Y?l@{*4S#2BOPuQSbl#`=Gd zX^Kb5R4AR9%ZnGNeu2R7WtR9Z^}R^oB7mFjrcY;3Q~A@vLN|eW&NaPo;VHCmelMKG z8%OxU(*rxJa(vpFe}VEKQ>f`tH~u<*`d`VP)G47%?u+v$aT#{g%`-%atTCmsjD>S< zN~IYK&x~3d#Q?u0Yx|e;jDw>#^%ar~@ft)B*9aUVuorA7m!KHs4=95f!@k}5OQZ|p z`n@<(K=dc#*gXev2eI!Qq+DVsvL{@M>}eIE|6r0JUI)WOXK(atdy#~TzE4ObinvrV zaB}}_?wtvUH;Lw31a1)U35+Zd1$Tcz84m9EEztvr>;)(G!5A4Ra{ZKK=q}mJ{z*Q# z@Lh$-f4x7qe&8;`VR82YXmaj%iOLrQ&}tJnPE=?okml>1|iV$3$jl^H{eMbz$mG_{bHIWaB 2: + from urllib.parse import urlparse +else: + from urlparse import urlparse +from thrift.transport import TTransport, TSocket, TSSLSocket, THttpClient +from thrift.protocol.TBinaryProtocol import TBinaryProtocol + +from pangramia.yt.management import YTManagementService +from pangramia.yt.management.ttypes import * + +if len(sys.argv) <= 1 or sys.argv[1] == '--help': + print('') + print('Usage: ' + sys.argv[0] + ' [-h host[:port]] [-u url] [-f[ramed]] [-s[sl]] [-novalidate] [-ca_certs certs] [-keyfile keyfile] [-certfile certfile] function [arg1 [arg2...]]') + print('') + print('Functions:') + print(' getProxyStatus(string serverIdentity)') + print(' bool banProxy(string proxyUrl, string serverIdentity)') + print(' bool unbanProxy(string proxyUrl, string serverIdentity)') + print(' bool resetAllProxyStatuses(string serverIdentity)') + print(' bool banAllProxies(string serverIdentity)') + print(' bool deleteProxyFromRedis(string proxyUrl, string serverIdentity)') + print(' i32 deleteAllProxiesFromRedis(string serverIdentity)') + print(' getAccountStatus(string accountId, string accountPrefix)') + print(' bool banAccount(string accountId, string reason)') + print(' bool unbanAccount(string accountId, string reason)') + print(' bool deleteAccountFromRedis(string accountId)') + print(' i32 deleteAllAccountsFromRedis(string accountPrefix)') + print(' bool ping()') + print(' bool reportError(string message, details)') + print(' void shutdown()') + print('') + sys.exit(0) + +pp = pprint.PrettyPrinter(indent=2) +host = 'localhost' +port = 9090 +uri = '' +framed = False +ssl = False +validate = True +ca_certs = None +keyfile = None +certfile = None +http = False +argi = 1 + +if sys.argv[argi] == '-h': + parts = sys.argv[argi + 1].split(':') + host = parts[0] + if len(parts) > 1: + port = int(parts[1]) + argi += 2 + +if sys.argv[argi] == '-u': + url = urlparse(sys.argv[argi + 1]) + parts = url[1].split(':') + host = parts[0] + if len(parts) > 1: + port = int(parts[1]) + else: + port = 80 + uri = url[2] + if url[4]: + uri += '?%s' % url[4] + http = True + argi += 2 + +if sys.argv[argi] == '-f' or sys.argv[argi] == '-framed': + framed = True + argi += 1 + +if sys.argv[argi] == '-s' or sys.argv[argi] == '-ssl': + ssl = True + argi += 1 + +if sys.argv[argi] == '-novalidate': + validate = False + argi += 1 + +if sys.argv[argi] == '-ca_certs': + ca_certs = sys.argv[argi+1] + argi += 2 + +if sys.argv[argi] == '-keyfile': + keyfile = sys.argv[argi+1] + argi += 2 + +if sys.argv[argi] == '-certfile': + certfile = sys.argv[argi+1] + argi += 2 + +cmd = sys.argv[argi] +args = sys.argv[argi + 1:] + +if http: + transport = THttpClient.THttpClient(host, port, uri) +else: + if ssl: + socket = TSSLSocket.TSSLSocket(host, port, validate=validate, ca_certs=ca_certs, keyfile=keyfile, certfile=certfile) + else: + socket = TSocket.TSocket(host, port) + if framed: + transport = TTransport.TFramedTransport(socket) + else: + transport = TTransport.TBufferedTransport(socket) +protocol = TBinaryProtocol(transport) +client = YTManagementService.Client(protocol) +transport.open() + +if cmd == 'getProxyStatus': + if len(args) != 1: + print('getProxyStatus requires 1 args') + sys.exit(1) + pp.pprint(client.getProxyStatus(args[0],)) + +elif cmd == 'banProxy': + if len(args) != 2: + print('banProxy requires 2 args') + sys.exit(1) + pp.pprint(client.banProxy(args[0], args[1],)) + +elif cmd == 'unbanProxy': + if len(args) != 2: + print('unbanProxy requires 2 args') + sys.exit(1) + pp.pprint(client.unbanProxy(args[0], args[1],)) + +elif cmd == 'resetAllProxyStatuses': + if len(args) != 1: + print('resetAllProxyStatuses requires 1 args') + sys.exit(1) + pp.pprint(client.resetAllProxyStatuses(args[0],)) + +elif cmd == 'banAllProxies': + if len(args) != 1: + print('banAllProxies requires 1 args') + sys.exit(1) + pp.pprint(client.banAllProxies(args[0],)) + +elif cmd == 'deleteProxyFromRedis': + if len(args) != 2: + print('deleteProxyFromRedis requires 2 args') + sys.exit(1) + pp.pprint(client.deleteProxyFromRedis(args[0], args[1],)) + +elif cmd == 'deleteAllProxiesFromRedis': + if len(args) != 1: + print('deleteAllProxiesFromRedis requires 1 args') + sys.exit(1) + pp.pprint(client.deleteAllProxiesFromRedis(args[0],)) + +elif cmd == 'getAccountStatus': + if len(args) != 2: + print('getAccountStatus requires 2 args') + sys.exit(1) + pp.pprint(client.getAccountStatus(args[0], args[1],)) + +elif cmd == 'banAccount': + if len(args) != 2: + print('banAccount requires 2 args') + sys.exit(1) + pp.pprint(client.banAccount(args[0], args[1],)) + +elif cmd == 'unbanAccount': + if len(args) != 2: + print('unbanAccount requires 2 args') + sys.exit(1) + pp.pprint(client.unbanAccount(args[0], args[1],)) + +elif cmd == 'deleteAccountFromRedis': + if len(args) != 1: + print('deleteAccountFromRedis requires 1 args') + sys.exit(1) + pp.pprint(client.deleteAccountFromRedis(args[0],)) + +elif cmd == 'deleteAllAccountsFromRedis': + if len(args) != 1: + print('deleteAllAccountsFromRedis requires 1 args') + sys.exit(1) + pp.pprint(client.deleteAllAccountsFromRedis(args[0],)) + +elif cmd == 'ping': + if len(args) != 0: + print('ping requires 0 args') + sys.exit(1) + pp.pprint(client.ping()) + +elif cmd == 'reportError': + if len(args) != 2: + print('reportError requires 2 args') + sys.exit(1) + pp.pprint(client.reportError(args[0], eval(args[1]),)) + +elif cmd == 'shutdown': + if len(args) != 0: + print('shutdown requires 0 args') + sys.exit(1) + pp.pprint(client.shutdown()) + +else: + print('Unrecognized method %s' % cmd) + sys.exit(1) + +transport.close() diff --git a/pangramia/yt/management/YTManagementService.py b/pangramia/yt/management/YTManagementService.py new file mode 100644 index 0000000..e13e963 --- /dev/null +++ b/pangramia/yt/management/YTManagementService.py @@ -0,0 +1,2816 @@ +# +# Autogenerated by Thrift Compiler (0.20.0) +# +# DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING +# +# options string: py +# + +from thrift.Thrift import TType, TMessageType, TFrozenDict, TException, TApplicationException +from thrift.protocol.TProtocol import TProtocolException +from thrift.TRecursive import fix_spec + +import sys +import pangramia.base_service.BaseService +import logging +from .ttypes import * +from thrift.Thrift import TProcessor +from thrift.transport import TTransport +all_structs = [] + + +class Iface(pangramia.base_service.BaseService.Iface): + def getProxyStatus(self, serverIdentity): + """ + Parameters: + - serverIdentity + + """ + pass + + def banProxy(self, proxyUrl, serverIdentity): + """ + Parameters: + - proxyUrl + - serverIdentity + + """ + pass + + def unbanProxy(self, proxyUrl, serverIdentity): + """ + Parameters: + - proxyUrl + - serverIdentity + + """ + pass + + def resetAllProxyStatuses(self, serverIdentity): + """ + Parameters: + - serverIdentity + + """ + pass + + def banAllProxies(self, serverIdentity): + """ + Parameters: + - serverIdentity + + """ + pass + + def deleteProxyFromRedis(self, proxyUrl, serverIdentity): + """ + Parameters: + - proxyUrl + - serverIdentity + + """ + pass + + def deleteAllProxiesFromRedis(self, serverIdentity): + """ + Parameters: + - serverIdentity + + """ + pass + + def getAccountStatus(self, accountId, accountPrefix): + """ + Parameters: + - accountId + - accountPrefix + + """ + pass + + def banAccount(self, accountId, reason): + """ + Parameters: + - accountId + - reason + + """ + pass + + def unbanAccount(self, accountId, reason): + """ + Parameters: + - accountId + - reason + + """ + pass + + def deleteAccountFromRedis(self, accountId): + """ + Parameters: + - accountId + + """ + pass + + def deleteAllAccountsFromRedis(self, accountPrefix): + """ + Parameters: + - accountPrefix + + """ + pass + + +class Client(pangramia.base_service.BaseService.Client, Iface): + def __init__(self, iprot, oprot=None): + pangramia.base_service.BaseService.Client.__init__(self, iprot, oprot) + + def getProxyStatus(self, serverIdentity): + """ + Parameters: + - serverIdentity + + """ + self.send_getProxyStatus(serverIdentity) + return self.recv_getProxyStatus() + + def send_getProxyStatus(self, serverIdentity): + self._oprot.writeMessageBegin('getProxyStatus', TMessageType.CALL, self._seqid) + args = getProxyStatus_args() + args.serverIdentity = serverIdentity + args.write(self._oprot) + self._oprot.writeMessageEnd() + self._oprot.trans.flush() + + def recv_getProxyStatus(self): + iprot = self._iprot + (fname, mtype, rseqid) = iprot.readMessageBegin() + if mtype == TMessageType.EXCEPTION: + x = TApplicationException() + x.read(iprot) + iprot.readMessageEnd() + raise x + result = getProxyStatus_result() + result.read(iprot) + iprot.readMessageEnd() + if result.success is not None: + return result.success + if result.serviceExp is not None: + raise result.serviceExp + if result.userExp is not None: + raise result.userExp + raise TApplicationException(TApplicationException.MISSING_RESULT, "getProxyStatus failed: unknown result") + + def banProxy(self, proxyUrl, serverIdentity): + """ + Parameters: + - proxyUrl + - serverIdentity + + """ + self.send_banProxy(proxyUrl, serverIdentity) + return self.recv_banProxy() + + def send_banProxy(self, proxyUrl, serverIdentity): + self._oprot.writeMessageBegin('banProxy', TMessageType.CALL, self._seqid) + args = banProxy_args() + args.proxyUrl = proxyUrl + args.serverIdentity = serverIdentity + args.write(self._oprot) + self._oprot.writeMessageEnd() + self._oprot.trans.flush() + + def recv_banProxy(self): + iprot = self._iprot + (fname, mtype, rseqid) = iprot.readMessageBegin() + if mtype == TMessageType.EXCEPTION: + x = TApplicationException() + x.read(iprot) + iprot.readMessageEnd() + raise x + result = banProxy_result() + result.read(iprot) + iprot.readMessageEnd() + if result.success is not None: + return result.success + if result.serviceExp is not None: + raise result.serviceExp + if result.userExp is not None: + raise result.userExp + raise TApplicationException(TApplicationException.MISSING_RESULT, "banProxy failed: unknown result") + + def unbanProxy(self, proxyUrl, serverIdentity): + """ + Parameters: + - proxyUrl + - serverIdentity + + """ + self.send_unbanProxy(proxyUrl, serverIdentity) + return self.recv_unbanProxy() + + def send_unbanProxy(self, proxyUrl, serverIdentity): + self._oprot.writeMessageBegin('unbanProxy', TMessageType.CALL, self._seqid) + args = unbanProxy_args() + args.proxyUrl = proxyUrl + args.serverIdentity = serverIdentity + args.write(self._oprot) + self._oprot.writeMessageEnd() + self._oprot.trans.flush() + + def recv_unbanProxy(self): + iprot = self._iprot + (fname, mtype, rseqid) = iprot.readMessageBegin() + if mtype == TMessageType.EXCEPTION: + x = TApplicationException() + x.read(iprot) + iprot.readMessageEnd() + raise x + result = unbanProxy_result() + result.read(iprot) + iprot.readMessageEnd() + if result.success is not None: + return result.success + if result.serviceExp is not None: + raise result.serviceExp + if result.userExp is not None: + raise result.userExp + raise TApplicationException(TApplicationException.MISSING_RESULT, "unbanProxy failed: unknown result") + + def resetAllProxyStatuses(self, serverIdentity): + """ + Parameters: + - serverIdentity + + """ + self.send_resetAllProxyStatuses(serverIdentity) + return self.recv_resetAllProxyStatuses() + + def send_resetAllProxyStatuses(self, serverIdentity): + self._oprot.writeMessageBegin('resetAllProxyStatuses', TMessageType.CALL, self._seqid) + args = resetAllProxyStatuses_args() + args.serverIdentity = serverIdentity + args.write(self._oprot) + self._oprot.writeMessageEnd() + self._oprot.trans.flush() + + def recv_resetAllProxyStatuses(self): + iprot = self._iprot + (fname, mtype, rseqid) = iprot.readMessageBegin() + if mtype == TMessageType.EXCEPTION: + x = TApplicationException() + x.read(iprot) + iprot.readMessageEnd() + raise x + result = resetAllProxyStatuses_result() + result.read(iprot) + iprot.readMessageEnd() + if result.success is not None: + return result.success + if result.serviceExp is not None: + raise result.serviceExp + if result.userExp is not None: + raise result.userExp + raise TApplicationException(TApplicationException.MISSING_RESULT, "resetAllProxyStatuses failed: unknown result") + + def banAllProxies(self, serverIdentity): + """ + Parameters: + - serverIdentity + + """ + self.send_banAllProxies(serverIdentity) + return self.recv_banAllProxies() + + def send_banAllProxies(self, serverIdentity): + self._oprot.writeMessageBegin('banAllProxies', TMessageType.CALL, self._seqid) + args = banAllProxies_args() + args.serverIdentity = serverIdentity + args.write(self._oprot) + self._oprot.writeMessageEnd() + self._oprot.trans.flush() + + def recv_banAllProxies(self): + iprot = self._iprot + (fname, mtype, rseqid) = iprot.readMessageBegin() + if mtype == TMessageType.EXCEPTION: + x = TApplicationException() + x.read(iprot) + iprot.readMessageEnd() + raise x + result = banAllProxies_result() + result.read(iprot) + iprot.readMessageEnd() + if result.success is not None: + return result.success + if result.serviceExp is not None: + raise result.serviceExp + if result.userExp is not None: + raise result.userExp + raise TApplicationException(TApplicationException.MISSING_RESULT, "banAllProxies failed: unknown result") + + def deleteProxyFromRedis(self, proxyUrl, serverIdentity): + """ + Parameters: + - proxyUrl + - serverIdentity + + """ + self.send_deleteProxyFromRedis(proxyUrl, serverIdentity) + return self.recv_deleteProxyFromRedis() + + def send_deleteProxyFromRedis(self, proxyUrl, serverIdentity): + self._oprot.writeMessageBegin('deleteProxyFromRedis', TMessageType.CALL, self._seqid) + args = deleteProxyFromRedis_args() + args.proxyUrl = proxyUrl + args.serverIdentity = serverIdentity + args.write(self._oprot) + self._oprot.writeMessageEnd() + self._oprot.trans.flush() + + def recv_deleteProxyFromRedis(self): + iprot = self._iprot + (fname, mtype, rseqid) = iprot.readMessageBegin() + if mtype == TMessageType.EXCEPTION: + x = TApplicationException() + x.read(iprot) + iprot.readMessageEnd() + raise x + result = deleteProxyFromRedis_result() + result.read(iprot) + iprot.readMessageEnd() + if result.success is not None: + return result.success + if result.serviceExp is not None: + raise result.serviceExp + if result.userExp is not None: + raise result.userExp + raise TApplicationException(TApplicationException.MISSING_RESULT, "deleteProxyFromRedis failed: unknown result") + + def deleteAllProxiesFromRedis(self, serverIdentity): + """ + Parameters: + - serverIdentity + + """ + self.send_deleteAllProxiesFromRedis(serverIdentity) + return self.recv_deleteAllProxiesFromRedis() + + def send_deleteAllProxiesFromRedis(self, serverIdentity): + self._oprot.writeMessageBegin('deleteAllProxiesFromRedis', TMessageType.CALL, self._seqid) + args = deleteAllProxiesFromRedis_args() + args.serverIdentity = serverIdentity + args.write(self._oprot) + self._oprot.writeMessageEnd() + self._oprot.trans.flush() + + def recv_deleteAllProxiesFromRedis(self): + iprot = self._iprot + (fname, mtype, rseqid) = iprot.readMessageBegin() + if mtype == TMessageType.EXCEPTION: + x = TApplicationException() + x.read(iprot) + iprot.readMessageEnd() + raise x + result = deleteAllProxiesFromRedis_result() + result.read(iprot) + iprot.readMessageEnd() + if result.success is not None: + return result.success + if result.serviceExp is not None: + raise result.serviceExp + if result.userExp is not None: + raise result.userExp + raise TApplicationException(TApplicationException.MISSING_RESULT, "deleteAllProxiesFromRedis failed: unknown result") + + def getAccountStatus(self, accountId, accountPrefix): + """ + Parameters: + - accountId + - accountPrefix + + """ + self.send_getAccountStatus(accountId, accountPrefix) + return self.recv_getAccountStatus() + + def send_getAccountStatus(self, accountId, accountPrefix): + self._oprot.writeMessageBegin('getAccountStatus', TMessageType.CALL, self._seqid) + args = getAccountStatus_args() + args.accountId = accountId + args.accountPrefix = accountPrefix + args.write(self._oprot) + self._oprot.writeMessageEnd() + self._oprot.trans.flush() + + def recv_getAccountStatus(self): + iprot = self._iprot + (fname, mtype, rseqid) = iprot.readMessageBegin() + if mtype == TMessageType.EXCEPTION: + x = TApplicationException() + x.read(iprot) + iprot.readMessageEnd() + raise x + result = getAccountStatus_result() + result.read(iprot) + iprot.readMessageEnd() + if result.success is not None: + return result.success + if result.serviceExp is not None: + raise result.serviceExp + if result.userExp is not None: + raise result.userExp + raise TApplicationException(TApplicationException.MISSING_RESULT, "getAccountStatus failed: unknown result") + + def banAccount(self, accountId, reason): + """ + Parameters: + - accountId + - reason + + """ + self.send_banAccount(accountId, reason) + return self.recv_banAccount() + + def send_banAccount(self, accountId, reason): + self._oprot.writeMessageBegin('banAccount', TMessageType.CALL, self._seqid) + args = banAccount_args() + args.accountId = accountId + args.reason = reason + args.write(self._oprot) + self._oprot.writeMessageEnd() + self._oprot.trans.flush() + + def recv_banAccount(self): + iprot = self._iprot + (fname, mtype, rseqid) = iprot.readMessageBegin() + if mtype == TMessageType.EXCEPTION: + x = TApplicationException() + x.read(iprot) + iprot.readMessageEnd() + raise x + result = banAccount_result() + result.read(iprot) + iprot.readMessageEnd() + if result.success is not None: + return result.success + if result.serviceExp is not None: + raise result.serviceExp + if result.userExp is not None: + raise result.userExp + raise TApplicationException(TApplicationException.MISSING_RESULT, "banAccount failed: unknown result") + + def unbanAccount(self, accountId, reason): + """ + Parameters: + - accountId + - reason + + """ + self.send_unbanAccount(accountId, reason) + return self.recv_unbanAccount() + + def send_unbanAccount(self, accountId, reason): + self._oprot.writeMessageBegin('unbanAccount', TMessageType.CALL, self._seqid) + args = unbanAccount_args() + args.accountId = accountId + args.reason = reason + args.write(self._oprot) + self._oprot.writeMessageEnd() + self._oprot.trans.flush() + + def recv_unbanAccount(self): + iprot = self._iprot + (fname, mtype, rseqid) = iprot.readMessageBegin() + if mtype == TMessageType.EXCEPTION: + x = TApplicationException() + x.read(iprot) + iprot.readMessageEnd() + raise x + result = unbanAccount_result() + result.read(iprot) + iprot.readMessageEnd() + if result.success is not None: + return result.success + if result.serviceExp is not None: + raise result.serviceExp + if result.userExp is not None: + raise result.userExp + raise TApplicationException(TApplicationException.MISSING_RESULT, "unbanAccount failed: unknown result") + + def deleteAccountFromRedis(self, accountId): + """ + Parameters: + - accountId + + """ + self.send_deleteAccountFromRedis(accountId) + return self.recv_deleteAccountFromRedis() + + def send_deleteAccountFromRedis(self, accountId): + self._oprot.writeMessageBegin('deleteAccountFromRedis', TMessageType.CALL, self._seqid) + args = deleteAccountFromRedis_args() + args.accountId = accountId + args.write(self._oprot) + self._oprot.writeMessageEnd() + self._oprot.trans.flush() + + def recv_deleteAccountFromRedis(self): + iprot = self._iprot + (fname, mtype, rseqid) = iprot.readMessageBegin() + if mtype == TMessageType.EXCEPTION: + x = TApplicationException() + x.read(iprot) + iprot.readMessageEnd() + raise x + result = deleteAccountFromRedis_result() + result.read(iprot) + iprot.readMessageEnd() + if result.success is not None: + return result.success + if result.serviceExp is not None: + raise result.serviceExp + if result.userExp is not None: + raise result.userExp + raise TApplicationException(TApplicationException.MISSING_RESULT, "deleteAccountFromRedis failed: unknown result") + + def deleteAllAccountsFromRedis(self, accountPrefix): + """ + Parameters: + - accountPrefix + + """ + self.send_deleteAllAccountsFromRedis(accountPrefix) + return self.recv_deleteAllAccountsFromRedis() + + def send_deleteAllAccountsFromRedis(self, accountPrefix): + self._oprot.writeMessageBegin('deleteAllAccountsFromRedis', TMessageType.CALL, self._seqid) + args = deleteAllAccountsFromRedis_args() + args.accountPrefix = accountPrefix + args.write(self._oprot) + self._oprot.writeMessageEnd() + self._oprot.trans.flush() + + def recv_deleteAllAccountsFromRedis(self): + iprot = self._iprot + (fname, mtype, rseqid) = iprot.readMessageBegin() + if mtype == TMessageType.EXCEPTION: + x = TApplicationException() + x.read(iprot) + iprot.readMessageEnd() + raise x + result = deleteAllAccountsFromRedis_result() + result.read(iprot) + iprot.readMessageEnd() + if result.success is not None: + return result.success + if result.serviceExp is not None: + raise result.serviceExp + if result.userExp is not None: + raise result.userExp + raise TApplicationException(TApplicationException.MISSING_RESULT, "deleteAllAccountsFromRedis failed: unknown result") + + +class Processor(pangramia.base_service.BaseService.Processor, Iface, TProcessor): + def __init__(self, handler): + pangramia.base_service.BaseService.Processor.__init__(self, handler) + self._processMap["getProxyStatus"] = Processor.process_getProxyStatus + self._processMap["banProxy"] = Processor.process_banProxy + self._processMap["unbanProxy"] = Processor.process_unbanProxy + self._processMap["resetAllProxyStatuses"] = Processor.process_resetAllProxyStatuses + self._processMap["banAllProxies"] = Processor.process_banAllProxies + self._processMap["deleteProxyFromRedis"] = Processor.process_deleteProxyFromRedis + self._processMap["deleteAllProxiesFromRedis"] = Processor.process_deleteAllProxiesFromRedis + self._processMap["getAccountStatus"] = Processor.process_getAccountStatus + self._processMap["banAccount"] = Processor.process_banAccount + self._processMap["unbanAccount"] = Processor.process_unbanAccount + self._processMap["deleteAccountFromRedis"] = Processor.process_deleteAccountFromRedis + self._processMap["deleteAllAccountsFromRedis"] = Processor.process_deleteAllAccountsFromRedis + self._on_message_begin = None + + def on_message_begin(self, func): + self._on_message_begin = func + + def process(self, iprot, oprot): + (name, type, seqid) = iprot.readMessageBegin() + if self._on_message_begin: + self._on_message_begin(name, type, seqid) + if name not in self._processMap: + iprot.skip(TType.STRUCT) + iprot.readMessageEnd() + x = TApplicationException(TApplicationException.UNKNOWN_METHOD, 'Unknown function %s' % (name)) + oprot.writeMessageBegin(name, TMessageType.EXCEPTION, seqid) + x.write(oprot) + oprot.writeMessageEnd() + oprot.trans.flush() + return + else: + self._processMap[name](self, seqid, iprot, oprot) + return True + + def process_getProxyStatus(self, seqid, iprot, oprot): + args = getProxyStatus_args() + args.read(iprot) + iprot.readMessageEnd() + result = getProxyStatus_result() + try: + result.success = self._handler.getProxyStatus(args.serverIdentity) + msg_type = TMessageType.REPLY + except TTransport.TTransportException: + raise + except pangramia.yt.exceptions.ttypes.PBServiceException as serviceExp: + msg_type = TMessageType.REPLY + result.serviceExp = serviceExp + except pangramia.yt.exceptions.ttypes.PBUserException as userExp: + msg_type = TMessageType.REPLY + result.userExp = userExp + except TApplicationException as ex: + logging.exception('TApplication exception in handler') + msg_type = TMessageType.EXCEPTION + result = ex + except Exception: + logging.exception('Unexpected exception in handler') + msg_type = TMessageType.EXCEPTION + result = TApplicationException(TApplicationException.INTERNAL_ERROR, 'Internal error') + oprot.writeMessageBegin("getProxyStatus", msg_type, seqid) + result.write(oprot) + oprot.writeMessageEnd() + oprot.trans.flush() + + def process_banProxy(self, seqid, iprot, oprot): + args = banProxy_args() + args.read(iprot) + iprot.readMessageEnd() + result = banProxy_result() + try: + result.success = self._handler.banProxy(args.proxyUrl, args.serverIdentity) + msg_type = TMessageType.REPLY + except TTransport.TTransportException: + raise + except pangramia.yt.exceptions.ttypes.PBServiceException as serviceExp: + msg_type = TMessageType.REPLY + result.serviceExp = serviceExp + except pangramia.yt.exceptions.ttypes.PBUserException as userExp: + msg_type = TMessageType.REPLY + result.userExp = userExp + except TApplicationException as ex: + logging.exception('TApplication exception in handler') + msg_type = TMessageType.EXCEPTION + result = ex + except Exception: + logging.exception('Unexpected exception in handler') + msg_type = TMessageType.EXCEPTION + result = TApplicationException(TApplicationException.INTERNAL_ERROR, 'Internal error') + oprot.writeMessageBegin("banProxy", msg_type, seqid) + result.write(oprot) + oprot.writeMessageEnd() + oprot.trans.flush() + + def process_unbanProxy(self, seqid, iprot, oprot): + args = unbanProxy_args() + args.read(iprot) + iprot.readMessageEnd() + result = unbanProxy_result() + try: + result.success = self._handler.unbanProxy(args.proxyUrl, args.serverIdentity) + msg_type = TMessageType.REPLY + except TTransport.TTransportException: + raise + except pangramia.yt.exceptions.ttypes.PBServiceException as serviceExp: + msg_type = TMessageType.REPLY + result.serviceExp = serviceExp + except pangramia.yt.exceptions.ttypes.PBUserException as userExp: + msg_type = TMessageType.REPLY + result.userExp = userExp + except TApplicationException as ex: + logging.exception('TApplication exception in handler') + msg_type = TMessageType.EXCEPTION + result = ex + except Exception: + logging.exception('Unexpected exception in handler') + msg_type = TMessageType.EXCEPTION + result = TApplicationException(TApplicationException.INTERNAL_ERROR, 'Internal error') + oprot.writeMessageBegin("unbanProxy", msg_type, seqid) + result.write(oprot) + oprot.writeMessageEnd() + oprot.trans.flush() + + def process_resetAllProxyStatuses(self, seqid, iprot, oprot): + args = resetAllProxyStatuses_args() + args.read(iprot) + iprot.readMessageEnd() + result = resetAllProxyStatuses_result() + try: + result.success = self._handler.resetAllProxyStatuses(args.serverIdentity) + msg_type = TMessageType.REPLY + except TTransport.TTransportException: + raise + except pangramia.yt.exceptions.ttypes.PBServiceException as serviceExp: + msg_type = TMessageType.REPLY + result.serviceExp = serviceExp + except pangramia.yt.exceptions.ttypes.PBUserException as userExp: + msg_type = TMessageType.REPLY + result.userExp = userExp + except TApplicationException as ex: + logging.exception('TApplication exception in handler') + msg_type = TMessageType.EXCEPTION + result = ex + except Exception: + logging.exception('Unexpected exception in handler') + msg_type = TMessageType.EXCEPTION + result = TApplicationException(TApplicationException.INTERNAL_ERROR, 'Internal error') + oprot.writeMessageBegin("resetAllProxyStatuses", msg_type, seqid) + result.write(oprot) + oprot.writeMessageEnd() + oprot.trans.flush() + + def process_banAllProxies(self, seqid, iprot, oprot): + args = banAllProxies_args() + args.read(iprot) + iprot.readMessageEnd() + result = banAllProxies_result() + try: + result.success = self._handler.banAllProxies(args.serverIdentity) + msg_type = TMessageType.REPLY + except TTransport.TTransportException: + raise + except pangramia.yt.exceptions.ttypes.PBServiceException as serviceExp: + msg_type = TMessageType.REPLY + result.serviceExp = serviceExp + except pangramia.yt.exceptions.ttypes.PBUserException as userExp: + msg_type = TMessageType.REPLY + result.userExp = userExp + except TApplicationException as ex: + logging.exception('TApplication exception in handler') + msg_type = TMessageType.EXCEPTION + result = ex + except Exception: + logging.exception('Unexpected exception in handler') + msg_type = TMessageType.EXCEPTION + result = TApplicationException(TApplicationException.INTERNAL_ERROR, 'Internal error') + oprot.writeMessageBegin("banAllProxies", msg_type, seqid) + result.write(oprot) + oprot.writeMessageEnd() + oprot.trans.flush() + + def process_deleteProxyFromRedis(self, seqid, iprot, oprot): + args = deleteProxyFromRedis_args() + args.read(iprot) + iprot.readMessageEnd() + result = deleteProxyFromRedis_result() + try: + result.success = self._handler.deleteProxyFromRedis(args.proxyUrl, args.serverIdentity) + msg_type = TMessageType.REPLY + except TTransport.TTransportException: + raise + except pangramia.yt.exceptions.ttypes.PBServiceException as serviceExp: + msg_type = TMessageType.REPLY + result.serviceExp = serviceExp + except pangramia.yt.exceptions.ttypes.PBUserException as userExp: + msg_type = TMessageType.REPLY + result.userExp = userExp + except TApplicationException as ex: + logging.exception('TApplication exception in handler') + msg_type = TMessageType.EXCEPTION + result = ex + except Exception: + logging.exception('Unexpected exception in handler') + msg_type = TMessageType.EXCEPTION + result = TApplicationException(TApplicationException.INTERNAL_ERROR, 'Internal error') + oprot.writeMessageBegin("deleteProxyFromRedis", msg_type, seqid) + result.write(oprot) + oprot.writeMessageEnd() + oprot.trans.flush() + + def process_deleteAllProxiesFromRedis(self, seqid, iprot, oprot): + args = deleteAllProxiesFromRedis_args() + args.read(iprot) + iprot.readMessageEnd() + result = deleteAllProxiesFromRedis_result() + try: + result.success = self._handler.deleteAllProxiesFromRedis(args.serverIdentity) + msg_type = TMessageType.REPLY + except TTransport.TTransportException: + raise + except pangramia.yt.exceptions.ttypes.PBServiceException as serviceExp: + msg_type = TMessageType.REPLY + result.serviceExp = serviceExp + except pangramia.yt.exceptions.ttypes.PBUserException as userExp: + msg_type = TMessageType.REPLY + result.userExp = userExp + except TApplicationException as ex: + logging.exception('TApplication exception in handler') + msg_type = TMessageType.EXCEPTION + result = ex + except Exception: + logging.exception('Unexpected exception in handler') + msg_type = TMessageType.EXCEPTION + result = TApplicationException(TApplicationException.INTERNAL_ERROR, 'Internal error') + oprot.writeMessageBegin("deleteAllProxiesFromRedis", msg_type, seqid) + result.write(oprot) + oprot.writeMessageEnd() + oprot.trans.flush() + + def process_getAccountStatus(self, seqid, iprot, oprot): + args = getAccountStatus_args() + args.read(iprot) + iprot.readMessageEnd() + result = getAccountStatus_result() + try: + result.success = self._handler.getAccountStatus(args.accountId, args.accountPrefix) + msg_type = TMessageType.REPLY + except TTransport.TTransportException: + raise + except pangramia.yt.exceptions.ttypes.PBServiceException as serviceExp: + msg_type = TMessageType.REPLY + result.serviceExp = serviceExp + except pangramia.yt.exceptions.ttypes.PBUserException as userExp: + msg_type = TMessageType.REPLY + result.userExp = userExp + except TApplicationException as ex: + logging.exception('TApplication exception in handler') + msg_type = TMessageType.EXCEPTION + result = ex + except Exception: + logging.exception('Unexpected exception in handler') + msg_type = TMessageType.EXCEPTION + result = TApplicationException(TApplicationException.INTERNAL_ERROR, 'Internal error') + oprot.writeMessageBegin("getAccountStatus", msg_type, seqid) + result.write(oprot) + oprot.writeMessageEnd() + oprot.trans.flush() + + def process_banAccount(self, seqid, iprot, oprot): + args = banAccount_args() + args.read(iprot) + iprot.readMessageEnd() + result = banAccount_result() + try: + result.success = self._handler.banAccount(args.accountId, args.reason) + msg_type = TMessageType.REPLY + except TTransport.TTransportException: + raise + except pangramia.yt.exceptions.ttypes.PBServiceException as serviceExp: + msg_type = TMessageType.REPLY + result.serviceExp = serviceExp + except pangramia.yt.exceptions.ttypes.PBUserException as userExp: + msg_type = TMessageType.REPLY + result.userExp = userExp + except TApplicationException as ex: + logging.exception('TApplication exception in handler') + msg_type = TMessageType.EXCEPTION + result = ex + except Exception: + logging.exception('Unexpected exception in handler') + msg_type = TMessageType.EXCEPTION + result = TApplicationException(TApplicationException.INTERNAL_ERROR, 'Internal error') + oprot.writeMessageBegin("banAccount", msg_type, seqid) + result.write(oprot) + oprot.writeMessageEnd() + oprot.trans.flush() + + def process_unbanAccount(self, seqid, iprot, oprot): + args = unbanAccount_args() + args.read(iprot) + iprot.readMessageEnd() + result = unbanAccount_result() + try: + result.success = self._handler.unbanAccount(args.accountId, args.reason) + msg_type = TMessageType.REPLY + except TTransport.TTransportException: + raise + except pangramia.yt.exceptions.ttypes.PBServiceException as serviceExp: + msg_type = TMessageType.REPLY + result.serviceExp = serviceExp + except pangramia.yt.exceptions.ttypes.PBUserException as userExp: + msg_type = TMessageType.REPLY + result.userExp = userExp + except TApplicationException as ex: + logging.exception('TApplication exception in handler') + msg_type = TMessageType.EXCEPTION + result = ex + except Exception: + logging.exception('Unexpected exception in handler') + msg_type = TMessageType.EXCEPTION + result = TApplicationException(TApplicationException.INTERNAL_ERROR, 'Internal error') + oprot.writeMessageBegin("unbanAccount", msg_type, seqid) + result.write(oprot) + oprot.writeMessageEnd() + oprot.trans.flush() + + def process_deleteAccountFromRedis(self, seqid, iprot, oprot): + args = deleteAccountFromRedis_args() + args.read(iprot) + iprot.readMessageEnd() + result = deleteAccountFromRedis_result() + try: + result.success = self._handler.deleteAccountFromRedis(args.accountId) + msg_type = TMessageType.REPLY + except TTransport.TTransportException: + raise + except pangramia.yt.exceptions.ttypes.PBServiceException as serviceExp: + msg_type = TMessageType.REPLY + result.serviceExp = serviceExp + except pangramia.yt.exceptions.ttypes.PBUserException as userExp: + msg_type = TMessageType.REPLY + result.userExp = userExp + except TApplicationException as ex: + logging.exception('TApplication exception in handler') + msg_type = TMessageType.EXCEPTION + result = ex + except Exception: + logging.exception('Unexpected exception in handler') + msg_type = TMessageType.EXCEPTION + result = TApplicationException(TApplicationException.INTERNAL_ERROR, 'Internal error') + oprot.writeMessageBegin("deleteAccountFromRedis", msg_type, seqid) + result.write(oprot) + oprot.writeMessageEnd() + oprot.trans.flush() + + def process_deleteAllAccountsFromRedis(self, seqid, iprot, oprot): + args = deleteAllAccountsFromRedis_args() + args.read(iprot) + iprot.readMessageEnd() + result = deleteAllAccountsFromRedis_result() + try: + result.success = self._handler.deleteAllAccountsFromRedis(args.accountPrefix) + msg_type = TMessageType.REPLY + except TTransport.TTransportException: + raise + except pangramia.yt.exceptions.ttypes.PBServiceException as serviceExp: + msg_type = TMessageType.REPLY + result.serviceExp = serviceExp + except pangramia.yt.exceptions.ttypes.PBUserException as userExp: + msg_type = TMessageType.REPLY + result.userExp = userExp + except TApplicationException as ex: + logging.exception('TApplication exception in handler') + msg_type = TMessageType.EXCEPTION + result = ex + except Exception: + logging.exception('Unexpected exception in handler') + msg_type = TMessageType.EXCEPTION + result = TApplicationException(TApplicationException.INTERNAL_ERROR, 'Internal error') + oprot.writeMessageBegin("deleteAllAccountsFromRedis", msg_type, seqid) + result.write(oprot) + oprot.writeMessageEnd() + oprot.trans.flush() + +# HELPER FUNCTIONS AND STRUCTURES + + +class getProxyStatus_args(object): + """ + Attributes: + - serverIdentity + + """ + + + def __init__(self, serverIdentity=None,): + self.serverIdentity = serverIdentity + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 1: + if ftype == TType.STRING: + self.serverIdentity = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('getProxyStatus_args') + if self.serverIdentity is not None: + oprot.writeFieldBegin('serverIdentity', TType.STRING, 1) + oprot.writeString(self.serverIdentity.encode('utf-8') if sys.version_info[0] == 2 else self.serverIdentity) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) +all_structs.append(getProxyStatus_args) +getProxyStatus_args.thrift_spec = ( + None, # 0 + (1, TType.STRING, 'serverIdentity', 'UTF8', None, ), # 1 +) + + +class getProxyStatus_result(object): + """ + Attributes: + - success + - serviceExp + - userExp + + """ + + + def __init__(self, success=None, serviceExp=None, userExp=None,): + self.success = success + self.serviceExp = serviceExp + self.userExp = userExp + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 0: + if ftype == TType.LIST: + self.success = [] + (_etype3, _size0) = iprot.readListBegin() + for _i4 in range(_size0): + _elem5 = pangramia.yt.common.ttypes.ProxyStatus() + _elem5.read(iprot) + self.success.append(_elem5) + iprot.readListEnd() + else: + iprot.skip(ftype) + elif fid == 1: + if ftype == TType.STRUCT: + self.serviceExp = pangramia.yt.exceptions.ttypes.PBServiceException.read(iprot) + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRUCT: + self.userExp = pangramia.yt.exceptions.ttypes.PBUserException.read(iprot) + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('getProxyStatus_result') + if self.success is not None: + oprot.writeFieldBegin('success', TType.LIST, 0) + oprot.writeListBegin(TType.STRUCT, len(self.success)) + for iter6 in self.success: + iter6.write(oprot) + oprot.writeListEnd() + oprot.writeFieldEnd() + if self.serviceExp is not None: + oprot.writeFieldBegin('serviceExp', TType.STRUCT, 1) + self.serviceExp.write(oprot) + oprot.writeFieldEnd() + if self.userExp is not None: + oprot.writeFieldBegin('userExp', TType.STRUCT, 2) + self.userExp.write(oprot) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) +all_structs.append(getProxyStatus_result) +getProxyStatus_result.thrift_spec = ( + (0, TType.LIST, 'success', (TType.STRUCT, [pangramia.yt.common.ttypes.ProxyStatus, None], False), None, ), # 0 + (1, TType.STRUCT, 'serviceExp', [pangramia.yt.exceptions.ttypes.PBServiceException, None], None, ), # 1 + (2, TType.STRUCT, 'userExp', [pangramia.yt.exceptions.ttypes.PBUserException, None], None, ), # 2 +) + + +class banProxy_args(object): + """ + Attributes: + - proxyUrl + - serverIdentity + + """ + + + def __init__(self, proxyUrl=None, serverIdentity=None,): + self.proxyUrl = proxyUrl + self.serverIdentity = serverIdentity + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 1: + if ftype == TType.STRING: + self.proxyUrl = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRING: + self.serverIdentity = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('banProxy_args') + if self.proxyUrl is not None: + oprot.writeFieldBegin('proxyUrl', TType.STRING, 1) + oprot.writeString(self.proxyUrl.encode('utf-8') if sys.version_info[0] == 2 else self.proxyUrl) + oprot.writeFieldEnd() + if self.serverIdentity is not None: + oprot.writeFieldBegin('serverIdentity', TType.STRING, 2) + oprot.writeString(self.serverIdentity.encode('utf-8') if sys.version_info[0] == 2 else self.serverIdentity) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) +all_structs.append(banProxy_args) +banProxy_args.thrift_spec = ( + None, # 0 + (1, TType.STRING, 'proxyUrl', 'UTF8', None, ), # 1 + (2, TType.STRING, 'serverIdentity', 'UTF8', None, ), # 2 +) + + +class banProxy_result(object): + """ + Attributes: + - success + - serviceExp + - userExp + + """ + + + def __init__(self, success=None, serviceExp=None, userExp=None,): + self.success = success + self.serviceExp = serviceExp + self.userExp = userExp + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 0: + if ftype == TType.BOOL: + self.success = iprot.readBool() + else: + iprot.skip(ftype) + elif fid == 1: + if ftype == TType.STRUCT: + self.serviceExp = pangramia.yt.exceptions.ttypes.PBServiceException.read(iprot) + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRUCT: + self.userExp = pangramia.yt.exceptions.ttypes.PBUserException.read(iprot) + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('banProxy_result') + if self.success is not None: + oprot.writeFieldBegin('success', TType.BOOL, 0) + oprot.writeBool(self.success) + oprot.writeFieldEnd() + if self.serviceExp is not None: + oprot.writeFieldBegin('serviceExp', TType.STRUCT, 1) + self.serviceExp.write(oprot) + oprot.writeFieldEnd() + if self.userExp is not None: + oprot.writeFieldBegin('userExp', TType.STRUCT, 2) + self.userExp.write(oprot) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) +all_structs.append(banProxy_result) +banProxy_result.thrift_spec = ( + (0, TType.BOOL, 'success', None, None, ), # 0 + (1, TType.STRUCT, 'serviceExp', [pangramia.yt.exceptions.ttypes.PBServiceException, None], None, ), # 1 + (2, TType.STRUCT, 'userExp', [pangramia.yt.exceptions.ttypes.PBUserException, None], None, ), # 2 +) + + +class unbanProxy_args(object): + """ + Attributes: + - proxyUrl + - serverIdentity + + """ + + + def __init__(self, proxyUrl=None, serverIdentity=None,): + self.proxyUrl = proxyUrl + self.serverIdentity = serverIdentity + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 1: + if ftype == TType.STRING: + self.proxyUrl = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRING: + self.serverIdentity = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('unbanProxy_args') + if self.proxyUrl is not None: + oprot.writeFieldBegin('proxyUrl', TType.STRING, 1) + oprot.writeString(self.proxyUrl.encode('utf-8') if sys.version_info[0] == 2 else self.proxyUrl) + oprot.writeFieldEnd() + if self.serverIdentity is not None: + oprot.writeFieldBegin('serverIdentity', TType.STRING, 2) + oprot.writeString(self.serverIdentity.encode('utf-8') if sys.version_info[0] == 2 else self.serverIdentity) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) +all_structs.append(unbanProxy_args) +unbanProxy_args.thrift_spec = ( + None, # 0 + (1, TType.STRING, 'proxyUrl', 'UTF8', None, ), # 1 + (2, TType.STRING, 'serverIdentity', 'UTF8', None, ), # 2 +) + + +class unbanProxy_result(object): + """ + Attributes: + - success + - serviceExp + - userExp + + """ + + + def __init__(self, success=None, serviceExp=None, userExp=None,): + self.success = success + self.serviceExp = serviceExp + self.userExp = userExp + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 0: + if ftype == TType.BOOL: + self.success = iprot.readBool() + else: + iprot.skip(ftype) + elif fid == 1: + if ftype == TType.STRUCT: + self.serviceExp = pangramia.yt.exceptions.ttypes.PBServiceException.read(iprot) + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRUCT: + self.userExp = pangramia.yt.exceptions.ttypes.PBUserException.read(iprot) + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('unbanProxy_result') + if self.success is not None: + oprot.writeFieldBegin('success', TType.BOOL, 0) + oprot.writeBool(self.success) + oprot.writeFieldEnd() + if self.serviceExp is not None: + oprot.writeFieldBegin('serviceExp', TType.STRUCT, 1) + self.serviceExp.write(oprot) + oprot.writeFieldEnd() + if self.userExp is not None: + oprot.writeFieldBegin('userExp', TType.STRUCT, 2) + self.userExp.write(oprot) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) +all_structs.append(unbanProxy_result) +unbanProxy_result.thrift_spec = ( + (0, TType.BOOL, 'success', None, None, ), # 0 + (1, TType.STRUCT, 'serviceExp', [pangramia.yt.exceptions.ttypes.PBServiceException, None], None, ), # 1 + (2, TType.STRUCT, 'userExp', [pangramia.yt.exceptions.ttypes.PBUserException, None], None, ), # 2 +) + + +class resetAllProxyStatuses_args(object): + """ + Attributes: + - serverIdentity + + """ + + + def __init__(self, serverIdentity=None,): + self.serverIdentity = serverIdentity + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 1: + if ftype == TType.STRING: + self.serverIdentity = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('resetAllProxyStatuses_args') + if self.serverIdentity is not None: + oprot.writeFieldBegin('serverIdentity', TType.STRING, 1) + oprot.writeString(self.serverIdentity.encode('utf-8') if sys.version_info[0] == 2 else self.serverIdentity) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) +all_structs.append(resetAllProxyStatuses_args) +resetAllProxyStatuses_args.thrift_spec = ( + None, # 0 + (1, TType.STRING, 'serverIdentity', 'UTF8', None, ), # 1 +) + + +class resetAllProxyStatuses_result(object): + """ + Attributes: + - success + - serviceExp + - userExp + + """ + + + def __init__(self, success=None, serviceExp=None, userExp=None,): + self.success = success + self.serviceExp = serviceExp + self.userExp = userExp + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 0: + if ftype == TType.BOOL: + self.success = iprot.readBool() + else: + iprot.skip(ftype) + elif fid == 1: + if ftype == TType.STRUCT: + self.serviceExp = pangramia.yt.exceptions.ttypes.PBServiceException.read(iprot) + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRUCT: + self.userExp = pangramia.yt.exceptions.ttypes.PBUserException.read(iprot) + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('resetAllProxyStatuses_result') + if self.success is not None: + oprot.writeFieldBegin('success', TType.BOOL, 0) + oprot.writeBool(self.success) + oprot.writeFieldEnd() + if self.serviceExp is not None: + oprot.writeFieldBegin('serviceExp', TType.STRUCT, 1) + self.serviceExp.write(oprot) + oprot.writeFieldEnd() + if self.userExp is not None: + oprot.writeFieldBegin('userExp', TType.STRUCT, 2) + self.userExp.write(oprot) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) +all_structs.append(resetAllProxyStatuses_result) +resetAllProxyStatuses_result.thrift_spec = ( + (0, TType.BOOL, 'success', None, None, ), # 0 + (1, TType.STRUCT, 'serviceExp', [pangramia.yt.exceptions.ttypes.PBServiceException, None], None, ), # 1 + (2, TType.STRUCT, 'userExp', [pangramia.yt.exceptions.ttypes.PBUserException, None], None, ), # 2 +) + + +class banAllProxies_args(object): + """ + Attributes: + - serverIdentity + + """ + + + def __init__(self, serverIdentity=None,): + self.serverIdentity = serverIdentity + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 1: + if ftype == TType.STRING: + self.serverIdentity = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('banAllProxies_args') + if self.serverIdentity is not None: + oprot.writeFieldBegin('serverIdentity', TType.STRING, 1) + oprot.writeString(self.serverIdentity.encode('utf-8') if sys.version_info[0] == 2 else self.serverIdentity) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) +all_structs.append(banAllProxies_args) +banAllProxies_args.thrift_spec = ( + None, # 0 + (1, TType.STRING, 'serverIdentity', 'UTF8', None, ), # 1 +) + + +class banAllProxies_result(object): + """ + Attributes: + - success + - serviceExp + - userExp + + """ + + + def __init__(self, success=None, serviceExp=None, userExp=None,): + self.success = success + self.serviceExp = serviceExp + self.userExp = userExp + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 0: + if ftype == TType.BOOL: + self.success = iprot.readBool() + else: + iprot.skip(ftype) + elif fid == 1: + if ftype == TType.STRUCT: + self.serviceExp = pangramia.yt.exceptions.ttypes.PBServiceException.read(iprot) + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRUCT: + self.userExp = pangramia.yt.exceptions.ttypes.PBUserException.read(iprot) + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('banAllProxies_result') + if self.success is not None: + oprot.writeFieldBegin('success', TType.BOOL, 0) + oprot.writeBool(self.success) + oprot.writeFieldEnd() + if self.serviceExp is not None: + oprot.writeFieldBegin('serviceExp', TType.STRUCT, 1) + self.serviceExp.write(oprot) + oprot.writeFieldEnd() + if self.userExp is not None: + oprot.writeFieldBegin('userExp', TType.STRUCT, 2) + self.userExp.write(oprot) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) +all_structs.append(banAllProxies_result) +banAllProxies_result.thrift_spec = ( + (0, TType.BOOL, 'success', None, None, ), # 0 + (1, TType.STRUCT, 'serviceExp', [pangramia.yt.exceptions.ttypes.PBServiceException, None], None, ), # 1 + (2, TType.STRUCT, 'userExp', [pangramia.yt.exceptions.ttypes.PBUserException, None], None, ), # 2 +) + + +class deleteProxyFromRedis_args(object): + """ + Attributes: + - proxyUrl + - serverIdentity + + """ + + + def __init__(self, proxyUrl=None, serverIdentity=None,): + self.proxyUrl = proxyUrl + self.serverIdentity = serverIdentity + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 1: + if ftype == TType.STRING: + self.proxyUrl = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRING: + self.serverIdentity = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('deleteProxyFromRedis_args') + if self.proxyUrl is not None: + oprot.writeFieldBegin('proxyUrl', TType.STRING, 1) + oprot.writeString(self.proxyUrl.encode('utf-8') if sys.version_info[0] == 2 else self.proxyUrl) + oprot.writeFieldEnd() + if self.serverIdentity is not None: + oprot.writeFieldBegin('serverIdentity', TType.STRING, 2) + oprot.writeString(self.serverIdentity.encode('utf-8') if sys.version_info[0] == 2 else self.serverIdentity) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) +all_structs.append(deleteProxyFromRedis_args) +deleteProxyFromRedis_args.thrift_spec = ( + None, # 0 + (1, TType.STRING, 'proxyUrl', 'UTF8', None, ), # 1 + (2, TType.STRING, 'serverIdentity', 'UTF8', None, ), # 2 +) + + +class deleteProxyFromRedis_result(object): + """ + Attributes: + - success + - serviceExp + - userExp + + """ + + + def __init__(self, success=None, serviceExp=None, userExp=None,): + self.success = success + self.serviceExp = serviceExp + self.userExp = userExp + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 0: + if ftype == TType.BOOL: + self.success = iprot.readBool() + else: + iprot.skip(ftype) + elif fid == 1: + if ftype == TType.STRUCT: + self.serviceExp = pangramia.yt.exceptions.ttypes.PBServiceException.read(iprot) + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRUCT: + self.userExp = pangramia.yt.exceptions.ttypes.PBUserException.read(iprot) + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('deleteProxyFromRedis_result') + if self.success is not None: + oprot.writeFieldBegin('success', TType.BOOL, 0) + oprot.writeBool(self.success) + oprot.writeFieldEnd() + if self.serviceExp is not None: + oprot.writeFieldBegin('serviceExp', TType.STRUCT, 1) + self.serviceExp.write(oprot) + oprot.writeFieldEnd() + if self.userExp is not None: + oprot.writeFieldBegin('userExp', TType.STRUCT, 2) + self.userExp.write(oprot) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) +all_structs.append(deleteProxyFromRedis_result) +deleteProxyFromRedis_result.thrift_spec = ( + (0, TType.BOOL, 'success', None, None, ), # 0 + (1, TType.STRUCT, 'serviceExp', [pangramia.yt.exceptions.ttypes.PBServiceException, None], None, ), # 1 + (2, TType.STRUCT, 'userExp', [pangramia.yt.exceptions.ttypes.PBUserException, None], None, ), # 2 +) + + +class deleteAllProxiesFromRedis_args(object): + """ + Attributes: + - serverIdentity + + """ + + + def __init__(self, serverIdentity=None,): + self.serverIdentity = serverIdentity + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 1: + if ftype == TType.STRING: + self.serverIdentity = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('deleteAllProxiesFromRedis_args') + if self.serverIdentity is not None: + oprot.writeFieldBegin('serverIdentity', TType.STRING, 1) + oprot.writeString(self.serverIdentity.encode('utf-8') if sys.version_info[0] == 2 else self.serverIdentity) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) +all_structs.append(deleteAllProxiesFromRedis_args) +deleteAllProxiesFromRedis_args.thrift_spec = ( + None, # 0 + (1, TType.STRING, 'serverIdentity', 'UTF8', None, ), # 1 +) + + +class deleteAllProxiesFromRedis_result(object): + """ + Attributes: + - success + - serviceExp + - userExp + + """ + + + def __init__(self, success=None, serviceExp=None, userExp=None,): + self.success = success + self.serviceExp = serviceExp + self.userExp = userExp + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 0: + if ftype == TType.I32: + self.success = iprot.readI32() + else: + iprot.skip(ftype) + elif fid == 1: + if ftype == TType.STRUCT: + self.serviceExp = pangramia.yt.exceptions.ttypes.PBServiceException.read(iprot) + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRUCT: + self.userExp = pangramia.yt.exceptions.ttypes.PBUserException.read(iprot) + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('deleteAllProxiesFromRedis_result') + if self.success is not None: + oprot.writeFieldBegin('success', TType.I32, 0) + oprot.writeI32(self.success) + oprot.writeFieldEnd() + if self.serviceExp is not None: + oprot.writeFieldBegin('serviceExp', TType.STRUCT, 1) + self.serviceExp.write(oprot) + oprot.writeFieldEnd() + if self.userExp is not None: + oprot.writeFieldBegin('userExp', TType.STRUCT, 2) + self.userExp.write(oprot) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) +all_structs.append(deleteAllProxiesFromRedis_result) +deleteAllProxiesFromRedis_result.thrift_spec = ( + (0, TType.I32, 'success', None, None, ), # 0 + (1, TType.STRUCT, 'serviceExp', [pangramia.yt.exceptions.ttypes.PBServiceException, None], None, ), # 1 + (2, TType.STRUCT, 'userExp', [pangramia.yt.exceptions.ttypes.PBUserException, None], None, ), # 2 +) + + +class getAccountStatus_args(object): + """ + Attributes: + - accountId + - accountPrefix + + """ + + + def __init__(self, accountId=None, accountPrefix=None,): + self.accountId = accountId + self.accountPrefix = accountPrefix + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 1: + if ftype == TType.STRING: + self.accountId = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRING: + self.accountPrefix = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('getAccountStatus_args') + if self.accountId is not None: + oprot.writeFieldBegin('accountId', TType.STRING, 1) + oprot.writeString(self.accountId.encode('utf-8') if sys.version_info[0] == 2 else self.accountId) + oprot.writeFieldEnd() + if self.accountPrefix is not None: + oprot.writeFieldBegin('accountPrefix', TType.STRING, 2) + oprot.writeString(self.accountPrefix.encode('utf-8') if sys.version_info[0] == 2 else self.accountPrefix) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) +all_structs.append(getAccountStatus_args) +getAccountStatus_args.thrift_spec = ( + None, # 0 + (1, TType.STRING, 'accountId', 'UTF8', None, ), # 1 + (2, TType.STRING, 'accountPrefix', 'UTF8', None, ), # 2 +) + + +class getAccountStatus_result(object): + """ + Attributes: + - success + - serviceExp + - userExp + + """ + + + def __init__(self, success=None, serviceExp=None, userExp=None,): + self.success = success + self.serviceExp = serviceExp + self.userExp = userExp + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 0: + if ftype == TType.LIST: + self.success = [] + (_etype10, _size7) = iprot.readListBegin() + for _i11 in range(_size7): + _elem12 = pangramia.yt.common.ttypes.AccountStatus() + _elem12.read(iprot) + self.success.append(_elem12) + iprot.readListEnd() + else: + iprot.skip(ftype) + elif fid == 1: + if ftype == TType.STRUCT: + self.serviceExp = pangramia.yt.exceptions.ttypes.PBServiceException.read(iprot) + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRUCT: + self.userExp = pangramia.yt.exceptions.ttypes.PBUserException.read(iprot) + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('getAccountStatus_result') + if self.success is not None: + oprot.writeFieldBegin('success', TType.LIST, 0) + oprot.writeListBegin(TType.STRUCT, len(self.success)) + for iter13 in self.success: + iter13.write(oprot) + oprot.writeListEnd() + oprot.writeFieldEnd() + if self.serviceExp is not None: + oprot.writeFieldBegin('serviceExp', TType.STRUCT, 1) + self.serviceExp.write(oprot) + oprot.writeFieldEnd() + if self.userExp is not None: + oprot.writeFieldBegin('userExp', TType.STRUCT, 2) + self.userExp.write(oprot) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) +all_structs.append(getAccountStatus_result) +getAccountStatus_result.thrift_spec = ( + (0, TType.LIST, 'success', (TType.STRUCT, [pangramia.yt.common.ttypes.AccountStatus, None], False), None, ), # 0 + (1, TType.STRUCT, 'serviceExp', [pangramia.yt.exceptions.ttypes.PBServiceException, None], None, ), # 1 + (2, TType.STRUCT, 'userExp', [pangramia.yt.exceptions.ttypes.PBUserException, None], None, ), # 2 +) + + +class banAccount_args(object): + """ + Attributes: + - accountId + - reason + + """ + + + def __init__(self, accountId=None, reason=None,): + self.accountId = accountId + self.reason = reason + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 1: + if ftype == TType.STRING: + self.accountId = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRING: + self.reason = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('banAccount_args') + if self.accountId is not None: + oprot.writeFieldBegin('accountId', TType.STRING, 1) + oprot.writeString(self.accountId.encode('utf-8') if sys.version_info[0] == 2 else self.accountId) + oprot.writeFieldEnd() + if self.reason is not None: + oprot.writeFieldBegin('reason', TType.STRING, 2) + oprot.writeString(self.reason.encode('utf-8') if sys.version_info[0] == 2 else self.reason) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) +all_structs.append(banAccount_args) +banAccount_args.thrift_spec = ( + None, # 0 + (1, TType.STRING, 'accountId', 'UTF8', None, ), # 1 + (2, TType.STRING, 'reason', 'UTF8', None, ), # 2 +) + + +class banAccount_result(object): + """ + Attributes: + - success + - serviceExp + - userExp + + """ + + + def __init__(self, success=None, serviceExp=None, userExp=None,): + self.success = success + self.serviceExp = serviceExp + self.userExp = userExp + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 0: + if ftype == TType.BOOL: + self.success = iprot.readBool() + else: + iprot.skip(ftype) + elif fid == 1: + if ftype == TType.STRUCT: + self.serviceExp = pangramia.yt.exceptions.ttypes.PBServiceException.read(iprot) + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRUCT: + self.userExp = pangramia.yt.exceptions.ttypes.PBUserException.read(iprot) + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('banAccount_result') + if self.success is not None: + oprot.writeFieldBegin('success', TType.BOOL, 0) + oprot.writeBool(self.success) + oprot.writeFieldEnd() + if self.serviceExp is not None: + oprot.writeFieldBegin('serviceExp', TType.STRUCT, 1) + self.serviceExp.write(oprot) + oprot.writeFieldEnd() + if self.userExp is not None: + oprot.writeFieldBegin('userExp', TType.STRUCT, 2) + self.userExp.write(oprot) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) +all_structs.append(banAccount_result) +banAccount_result.thrift_spec = ( + (0, TType.BOOL, 'success', None, None, ), # 0 + (1, TType.STRUCT, 'serviceExp', [pangramia.yt.exceptions.ttypes.PBServiceException, None], None, ), # 1 + (2, TType.STRUCT, 'userExp', [pangramia.yt.exceptions.ttypes.PBUserException, None], None, ), # 2 +) + + +class unbanAccount_args(object): + """ + Attributes: + - accountId + - reason + + """ + + + def __init__(self, accountId=None, reason=None,): + self.accountId = accountId + self.reason = reason + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 1: + if ftype == TType.STRING: + self.accountId = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRING: + self.reason = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('unbanAccount_args') + if self.accountId is not None: + oprot.writeFieldBegin('accountId', TType.STRING, 1) + oprot.writeString(self.accountId.encode('utf-8') if sys.version_info[0] == 2 else self.accountId) + oprot.writeFieldEnd() + if self.reason is not None: + oprot.writeFieldBegin('reason', TType.STRING, 2) + oprot.writeString(self.reason.encode('utf-8') if sys.version_info[0] == 2 else self.reason) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) +all_structs.append(unbanAccount_args) +unbanAccount_args.thrift_spec = ( + None, # 0 + (1, TType.STRING, 'accountId', 'UTF8', None, ), # 1 + (2, TType.STRING, 'reason', 'UTF8', None, ), # 2 +) + + +class unbanAccount_result(object): + """ + Attributes: + - success + - serviceExp + - userExp + + """ + + + def __init__(self, success=None, serviceExp=None, userExp=None,): + self.success = success + self.serviceExp = serviceExp + self.userExp = userExp + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 0: + if ftype == TType.BOOL: + self.success = iprot.readBool() + else: + iprot.skip(ftype) + elif fid == 1: + if ftype == TType.STRUCT: + self.serviceExp = pangramia.yt.exceptions.ttypes.PBServiceException.read(iprot) + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRUCT: + self.userExp = pangramia.yt.exceptions.ttypes.PBUserException.read(iprot) + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('unbanAccount_result') + if self.success is not None: + oprot.writeFieldBegin('success', TType.BOOL, 0) + oprot.writeBool(self.success) + oprot.writeFieldEnd() + if self.serviceExp is not None: + oprot.writeFieldBegin('serviceExp', TType.STRUCT, 1) + self.serviceExp.write(oprot) + oprot.writeFieldEnd() + if self.userExp is not None: + oprot.writeFieldBegin('userExp', TType.STRUCT, 2) + self.userExp.write(oprot) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) +all_structs.append(unbanAccount_result) +unbanAccount_result.thrift_spec = ( + (0, TType.BOOL, 'success', None, None, ), # 0 + (1, TType.STRUCT, 'serviceExp', [pangramia.yt.exceptions.ttypes.PBServiceException, None], None, ), # 1 + (2, TType.STRUCT, 'userExp', [pangramia.yt.exceptions.ttypes.PBUserException, None], None, ), # 2 +) + + +class deleteAccountFromRedis_args(object): + """ + Attributes: + - accountId + + """ + + + def __init__(self, accountId=None,): + self.accountId = accountId + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 1: + if ftype == TType.STRING: + self.accountId = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('deleteAccountFromRedis_args') + if self.accountId is not None: + oprot.writeFieldBegin('accountId', TType.STRING, 1) + oprot.writeString(self.accountId.encode('utf-8') if sys.version_info[0] == 2 else self.accountId) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) +all_structs.append(deleteAccountFromRedis_args) +deleteAccountFromRedis_args.thrift_spec = ( + None, # 0 + (1, TType.STRING, 'accountId', 'UTF8', None, ), # 1 +) + + +class deleteAccountFromRedis_result(object): + """ + Attributes: + - success + - serviceExp + - userExp + + """ + + + def __init__(self, success=None, serviceExp=None, userExp=None,): + self.success = success + self.serviceExp = serviceExp + self.userExp = userExp + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 0: + if ftype == TType.BOOL: + self.success = iprot.readBool() + else: + iprot.skip(ftype) + elif fid == 1: + if ftype == TType.STRUCT: + self.serviceExp = pangramia.yt.exceptions.ttypes.PBServiceException.read(iprot) + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRUCT: + self.userExp = pangramia.yt.exceptions.ttypes.PBUserException.read(iprot) + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('deleteAccountFromRedis_result') + if self.success is not None: + oprot.writeFieldBegin('success', TType.BOOL, 0) + oprot.writeBool(self.success) + oprot.writeFieldEnd() + if self.serviceExp is not None: + oprot.writeFieldBegin('serviceExp', TType.STRUCT, 1) + self.serviceExp.write(oprot) + oprot.writeFieldEnd() + if self.userExp is not None: + oprot.writeFieldBegin('userExp', TType.STRUCT, 2) + self.userExp.write(oprot) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) +all_structs.append(deleteAccountFromRedis_result) +deleteAccountFromRedis_result.thrift_spec = ( + (0, TType.BOOL, 'success', None, None, ), # 0 + (1, TType.STRUCT, 'serviceExp', [pangramia.yt.exceptions.ttypes.PBServiceException, None], None, ), # 1 + (2, TType.STRUCT, 'userExp', [pangramia.yt.exceptions.ttypes.PBUserException, None], None, ), # 2 +) + + +class deleteAllAccountsFromRedis_args(object): + """ + Attributes: + - accountPrefix + + """ + + + def __init__(self, accountPrefix=None,): + self.accountPrefix = accountPrefix + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 1: + if ftype == TType.STRING: + self.accountPrefix = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('deleteAllAccountsFromRedis_args') + if self.accountPrefix is not None: + oprot.writeFieldBegin('accountPrefix', TType.STRING, 1) + oprot.writeString(self.accountPrefix.encode('utf-8') if sys.version_info[0] == 2 else self.accountPrefix) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) +all_structs.append(deleteAllAccountsFromRedis_args) +deleteAllAccountsFromRedis_args.thrift_spec = ( + None, # 0 + (1, TType.STRING, 'accountPrefix', 'UTF8', None, ), # 1 +) + + +class deleteAllAccountsFromRedis_result(object): + """ + Attributes: + - success + - serviceExp + - userExp + + """ + + + def __init__(self, success=None, serviceExp=None, userExp=None,): + self.success = success + self.serviceExp = serviceExp + self.userExp = userExp + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 0: + if ftype == TType.I32: + self.success = iprot.readI32() + else: + iprot.skip(ftype) + elif fid == 1: + if ftype == TType.STRUCT: + self.serviceExp = pangramia.yt.exceptions.ttypes.PBServiceException.read(iprot) + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRUCT: + self.userExp = pangramia.yt.exceptions.ttypes.PBUserException.read(iprot) + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('deleteAllAccountsFromRedis_result') + if self.success is not None: + oprot.writeFieldBegin('success', TType.I32, 0) + oprot.writeI32(self.success) + oprot.writeFieldEnd() + if self.serviceExp is not None: + oprot.writeFieldBegin('serviceExp', TType.STRUCT, 1) + self.serviceExp.write(oprot) + oprot.writeFieldEnd() + if self.userExp is not None: + oprot.writeFieldBegin('userExp', TType.STRUCT, 2) + self.userExp.write(oprot) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) +all_structs.append(deleteAllAccountsFromRedis_result) +deleteAllAccountsFromRedis_result.thrift_spec = ( + (0, TType.I32, 'success', None, None, ), # 0 + (1, TType.STRUCT, 'serviceExp', [pangramia.yt.exceptions.ttypes.PBServiceException, None], None, ), # 1 + (2, TType.STRUCT, 'userExp', [pangramia.yt.exceptions.ttypes.PBUserException, None], None, ), # 2 +) +fix_spec(all_structs) +del all_structs diff --git a/pangramia/yt/management/__init__.py b/pangramia/yt/management/__init__.py new file mode 100644 index 0000000..813fdf8 --- /dev/null +++ b/pangramia/yt/management/__init__.py @@ -0,0 +1 @@ +__all__ = ['ttypes', 'constants', 'YTManagementService'] diff --git a/pangramia/yt/management/constants.py b/pangramia/yt/management/constants.py new file mode 100644 index 0000000..09a78b3 --- /dev/null +++ b/pangramia/yt/management/constants.py @@ -0,0 +1,14 @@ +# +# Autogenerated by Thrift Compiler (0.20.0) +# +# DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING +# +# options string: py +# + +from thrift.Thrift import TType, TMessageType, TFrozenDict, TException, TApplicationException +from thrift.protocol.TProtocol import TProtocolException +from thrift.TRecursive import fix_spec + +import sys +from .ttypes import * diff --git a/pangramia/yt/management/ttypes.py b/pangramia/yt/management/ttypes.py new file mode 100644 index 0000000..de828aa --- /dev/null +++ b/pangramia/yt/management/ttypes.py @@ -0,0 +1,21 @@ +# +# Autogenerated by Thrift Compiler (0.20.0) +# +# DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING +# +# options string: py +# + +from thrift.Thrift import TType, TMessageType, TFrozenDict, TException, TApplicationException +from thrift.protocol.TProtocol import TProtocolException +from thrift.TRecursive import fix_spec + +import sys +import pangramia.yt.common.ttypes +import pangramia.yt.exceptions.ttypes +import pangramia.base_service.ttypes + +from thrift.transport import TTransport +all_structs = [] +fix_spec(all_structs) +del all_structs diff --git a/pangramia/yt/tokens_ops/YTTokenOpService-remote b/pangramia/yt/tokens_ops/YTTokenOpService-remote new file mode 100755 index 0000000..8685be7 --- /dev/null +++ b/pangramia/yt/tokens_ops/YTTokenOpService-remote @@ -0,0 +1,257 @@ +#!/usr/bin/env python +# +# Autogenerated by Thrift Compiler (0.20.0) +# +# DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING +# +# options string: py +# + +import sys +import pprint +if sys.version_info[0] > 2: + from urllib.parse import urlparse +else: + from urlparse import urlparse +from thrift.transport import TTransport, TSocket, TSSLSocket, THttpClient +from thrift.protocol.TBinaryProtocol import TBinaryProtocol + +from pangramia.yt.tokens_ops import YTTokenOpService +from pangramia.yt.tokens_ops.ttypes import * + +if len(sys.argv) <= 1 or sys.argv[1] == '--help': + print('') + print('Usage: ' + sys.argv[0] + ' [-h host[:port]] [-u url] [-f[ramed]] [-s[sl]] [-novalidate] [-ca_certs certs] [-keyfile keyfile] [-certfile certfile] function [arg1 [arg2...]]') + print('') + print('Functions:') + print(' JobTokenData getOrRefreshTokenWithReport(string accountId, string oldUrl, JobState status, string details, string jobId, TokenUpdateMode updateType, string url, string clients, AirflowLogContext airflowLogContext, string requestParamsJson)') + print(' JobTokenData getOrRefreshToken(string accountId, TokenUpdateMode updateType, string url, string clients, string machineId, AirflowLogContext airflowLogContext, string requestParamsJson, string assignedProxyUrl)') + print(' JobTokenData getLatestToken(string accountId)') + print(' JobTokenData refreshToken(string accountId, TokenUpdateMode updateType, string url)') + print(' bool reportState(string url, JobState status, string details, string jobId)') + print(' JobTokenData getInfoJsonDirect(string url, string clients)') + print(' getProxyStatus(string serverIdentity)') + print(' bool banProxy(string proxyUrl, string serverIdentity)') + print(' bool unbanProxy(string proxyUrl, string serverIdentity)') + print(' bool resetAllProxyStatuses(string serverIdentity)') + print(' bool banAllProxies(string serverIdentity)') + print(' bool deleteProxyFromRedis(string proxyUrl, string serverIdentity)') + print(' i32 deleteAllProxiesFromRedis(string serverIdentity)') + print(' getAccountStatus(string accountId, string accountPrefix)') + print(' bool banAccount(string accountId, string reason)') + print(' bool unbanAccount(string accountId, string reason)') + print(' bool deleteAccountFromRedis(string accountId)') + print(' i32 deleteAllAccountsFromRedis(string accountPrefix)') + print(' bool ping()') + print(' bool reportError(string message, details)') + print(' void shutdown()') + print('') + sys.exit(0) + +pp = pprint.PrettyPrinter(indent=2) +host = 'localhost' +port = 9090 +uri = '' +framed = False +ssl = False +validate = True +ca_certs = None +keyfile = None +certfile = None +http = False +argi = 1 + +if sys.argv[argi] == '-h': + parts = sys.argv[argi + 1].split(':') + host = parts[0] + if len(parts) > 1: + port = int(parts[1]) + argi += 2 + +if sys.argv[argi] == '-u': + url = urlparse(sys.argv[argi + 1]) + parts = url[1].split(':') + host = parts[0] + if len(parts) > 1: + port = int(parts[1]) + else: + port = 80 + uri = url[2] + if url[4]: + uri += '?%s' % url[4] + http = True + argi += 2 + +if sys.argv[argi] == '-f' or sys.argv[argi] == '-framed': + framed = True + argi += 1 + +if sys.argv[argi] == '-s' or sys.argv[argi] == '-ssl': + ssl = True + argi += 1 + +if sys.argv[argi] == '-novalidate': + validate = False + argi += 1 + +if sys.argv[argi] == '-ca_certs': + ca_certs = sys.argv[argi+1] + argi += 2 + +if sys.argv[argi] == '-keyfile': + keyfile = sys.argv[argi+1] + argi += 2 + +if sys.argv[argi] == '-certfile': + certfile = sys.argv[argi+1] + argi += 2 + +cmd = sys.argv[argi] +args = sys.argv[argi + 1:] + +if http: + transport = THttpClient.THttpClient(host, port, uri) +else: + if ssl: + socket = TSSLSocket.TSSLSocket(host, port, validate=validate, ca_certs=ca_certs, keyfile=keyfile, certfile=certfile) + else: + socket = TSocket.TSocket(host, port) + if framed: + transport = TTransport.TFramedTransport(socket) + else: + transport = TTransport.TBufferedTransport(socket) +protocol = TBinaryProtocol(transport) +client = YTTokenOpService.Client(protocol) +transport.open() + +if cmd == 'getOrRefreshTokenWithReport': + if len(args) != 10: + print('getOrRefreshTokenWithReport requires 10 args') + sys.exit(1) + pp.pprint(client.getOrRefreshTokenWithReport(args[0], args[1], eval(args[2]), args[3], args[4], eval(args[5]), args[6], args[7], eval(args[8]), args[9],)) + +elif cmd == 'getOrRefreshToken': + if len(args) != 8: + print('getOrRefreshToken requires 8 args') + sys.exit(1) + pp.pprint(client.getOrRefreshToken(args[0], eval(args[1]), args[2], args[3], args[4], eval(args[5]), args[6], args[7],)) + +elif cmd == 'getLatestToken': + if len(args) != 1: + print('getLatestToken requires 1 args') + sys.exit(1) + pp.pprint(client.getLatestToken(args[0],)) + +elif cmd == 'refreshToken': + if len(args) != 3: + print('refreshToken requires 3 args') + sys.exit(1) + pp.pprint(client.refreshToken(args[0], eval(args[1]), args[2],)) + +elif cmd == 'reportState': + if len(args) != 4: + print('reportState requires 4 args') + sys.exit(1) + pp.pprint(client.reportState(args[0], eval(args[1]), args[2], args[3],)) + +elif cmd == 'getInfoJsonDirect': + if len(args) != 2: + print('getInfoJsonDirect requires 2 args') + sys.exit(1) + pp.pprint(client.getInfoJsonDirect(args[0], args[1],)) + +elif cmd == 'getProxyStatus': + if len(args) != 1: + print('getProxyStatus requires 1 args') + sys.exit(1) + pp.pprint(client.getProxyStatus(args[0],)) + +elif cmd == 'banProxy': + if len(args) != 2: + print('banProxy requires 2 args') + sys.exit(1) + pp.pprint(client.banProxy(args[0], args[1],)) + +elif cmd == 'unbanProxy': + if len(args) != 2: + print('unbanProxy requires 2 args') + sys.exit(1) + pp.pprint(client.unbanProxy(args[0], args[1],)) + +elif cmd == 'resetAllProxyStatuses': + if len(args) != 1: + print('resetAllProxyStatuses requires 1 args') + sys.exit(1) + pp.pprint(client.resetAllProxyStatuses(args[0],)) + +elif cmd == 'banAllProxies': + if len(args) != 1: + print('banAllProxies requires 1 args') + sys.exit(1) + pp.pprint(client.banAllProxies(args[0],)) + +elif cmd == 'deleteProxyFromRedis': + if len(args) != 2: + print('deleteProxyFromRedis requires 2 args') + sys.exit(1) + pp.pprint(client.deleteProxyFromRedis(args[0], args[1],)) + +elif cmd == 'deleteAllProxiesFromRedis': + if len(args) != 1: + print('deleteAllProxiesFromRedis requires 1 args') + sys.exit(1) + pp.pprint(client.deleteAllProxiesFromRedis(args[0],)) + +elif cmd == 'getAccountStatus': + if len(args) != 2: + print('getAccountStatus requires 2 args') + sys.exit(1) + pp.pprint(client.getAccountStatus(args[0], args[1],)) + +elif cmd == 'banAccount': + if len(args) != 2: + print('banAccount requires 2 args') + sys.exit(1) + pp.pprint(client.banAccount(args[0], args[1],)) + +elif cmd == 'unbanAccount': + if len(args) != 2: + print('unbanAccount requires 2 args') + sys.exit(1) + pp.pprint(client.unbanAccount(args[0], args[1],)) + +elif cmd == 'deleteAccountFromRedis': + if len(args) != 1: + print('deleteAccountFromRedis requires 1 args') + sys.exit(1) + pp.pprint(client.deleteAccountFromRedis(args[0],)) + +elif cmd == 'deleteAllAccountsFromRedis': + if len(args) != 1: + print('deleteAllAccountsFromRedis requires 1 args') + sys.exit(1) + pp.pprint(client.deleteAllAccountsFromRedis(args[0],)) + +elif cmd == 'ping': + if len(args) != 0: + print('ping requires 0 args') + sys.exit(1) + pp.pprint(client.ping()) + +elif cmd == 'reportError': + if len(args) != 2: + print('reportError requires 2 args') + sys.exit(1) + pp.pprint(client.reportError(args[0], eval(args[1]),)) + +elif cmd == 'shutdown': + if len(args) != 0: + print('shutdown requires 0 args') + sys.exit(1) + pp.pprint(client.shutdown()) + +else: + print('Unrecognized method %s' % cmd) + sys.exit(1) + +transport.close() diff --git a/pangramia/yt/tokens_ops/YTTokenOpService.py b/pangramia/yt/tokens_ops/YTTokenOpService.py new file mode 100644 index 0000000..e1772e4 --- /dev/null +++ b/pangramia/yt/tokens_ops/YTTokenOpService.py @@ -0,0 +1,1719 @@ +# +# Autogenerated by Thrift Compiler (0.20.0) +# +# DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING +# +# options string: py +# + +from thrift.Thrift import TType, TMessageType, TFrozenDict, TException, TApplicationException +from thrift.protocol.TProtocol import TProtocolException +from thrift.TRecursive import fix_spec + +import sys +import pangramia.yt.management.YTManagementService +import logging +from .ttypes import * +from thrift.Thrift import TProcessor +from thrift.transport import TTransport +all_structs = [] + + +class Iface(pangramia.yt.management.YTManagementService.Iface): + def getOrRefreshTokenWithReport(self, accountId, oldUrl, status, details, jobId, updateType, url, clients, airflowLogContext, requestParamsJson): + """ + Parameters: + - accountId + - oldUrl + - status + - details + - jobId + - updateType + - url + - clients + - airflowLogContext + - requestParamsJson + + """ + pass + + def getOrRefreshToken(self, accountId, updateType, url, clients, machineId, airflowLogContext, requestParamsJson, assignedProxyUrl): + """ + Parameters: + - accountId + - updateType + - url + - clients + - machineId + - airflowLogContext + - requestParamsJson + - assignedProxyUrl + + """ + pass + + def getLatestToken(self, accountId): + """ + Parameters: + - accountId + + """ + pass + + def refreshToken(self, accountId, updateType, url): + """ + Parameters: + - accountId + - updateType + - url + + """ + pass + + def reportState(self, url, status, details, jobId): + """ + Parameters: + - url + - status + - details + - jobId + + """ + pass + + def getInfoJsonDirect(self, url, clients): + """ + Parameters: + - url + - clients + + """ + pass + + +class Client(pangramia.yt.management.YTManagementService.Client, Iface): + def __init__(self, iprot, oprot=None): + pangramia.yt.management.YTManagementService.Client.__init__(self, iprot, oprot) + + def getOrRefreshTokenWithReport(self, accountId, oldUrl, status, details, jobId, updateType, url, clients, airflowLogContext, requestParamsJson): + """ + Parameters: + - accountId + - oldUrl + - status + - details + - jobId + - updateType + - url + - clients + - airflowLogContext + - requestParamsJson + + """ + self.send_getOrRefreshTokenWithReport(accountId, oldUrl, status, details, jobId, updateType, url, clients, airflowLogContext, requestParamsJson) + return self.recv_getOrRefreshTokenWithReport() + + def send_getOrRefreshTokenWithReport(self, accountId, oldUrl, status, details, jobId, updateType, url, clients, airflowLogContext, requestParamsJson): + self._oprot.writeMessageBegin('getOrRefreshTokenWithReport', TMessageType.CALL, self._seqid) + args = getOrRefreshTokenWithReport_args() + args.accountId = accountId + args.oldUrl = oldUrl + args.status = status + args.details = details + args.jobId = jobId + args.updateType = updateType + args.url = url + args.clients = clients + args.airflowLogContext = airflowLogContext + args.requestParamsJson = requestParamsJson + args.write(self._oprot) + self._oprot.writeMessageEnd() + self._oprot.trans.flush() + + def recv_getOrRefreshTokenWithReport(self): + iprot = self._iprot + (fname, mtype, rseqid) = iprot.readMessageBegin() + if mtype == TMessageType.EXCEPTION: + x = TApplicationException() + x.read(iprot) + iprot.readMessageEnd() + raise x + result = getOrRefreshTokenWithReport_result() + result.read(iprot) + iprot.readMessageEnd() + if result.success is not None: + return result.success + if result.serviceExp is not None: + raise result.serviceExp + if result.userExp is not None: + raise result.userExp + raise TApplicationException(TApplicationException.MISSING_RESULT, "getOrRefreshTokenWithReport failed: unknown result") + + def getOrRefreshToken(self, accountId, updateType, url, clients, machineId, airflowLogContext, requestParamsJson, assignedProxyUrl): + """ + Parameters: + - accountId + - updateType + - url + - clients + - machineId + - airflowLogContext + - requestParamsJson + - assignedProxyUrl + + """ + self.send_getOrRefreshToken(accountId, updateType, url, clients, machineId, airflowLogContext, requestParamsJson, assignedProxyUrl) + return self.recv_getOrRefreshToken() + + def send_getOrRefreshToken(self, accountId, updateType, url, clients, machineId, airflowLogContext, requestParamsJson, assignedProxyUrl): + self._oprot.writeMessageBegin('getOrRefreshToken', TMessageType.CALL, self._seqid) + args = getOrRefreshToken_args() + args.accountId = accountId + args.updateType = updateType + args.url = url + args.clients = clients + args.machineId = machineId + args.airflowLogContext = airflowLogContext + args.requestParamsJson = requestParamsJson + args.assignedProxyUrl = assignedProxyUrl + args.write(self._oprot) + self._oprot.writeMessageEnd() + self._oprot.trans.flush() + + def recv_getOrRefreshToken(self): + iprot = self._iprot + (fname, mtype, rseqid) = iprot.readMessageBegin() + if mtype == TMessageType.EXCEPTION: + x = TApplicationException() + x.read(iprot) + iprot.readMessageEnd() + raise x + result = getOrRefreshToken_result() + result.read(iprot) + iprot.readMessageEnd() + if result.success is not None: + return result.success + if result.serviceExp is not None: + raise result.serviceExp + if result.userExp is not None: + raise result.userExp + raise TApplicationException(TApplicationException.MISSING_RESULT, "getOrRefreshToken failed: unknown result") + + def getLatestToken(self, accountId): + """ + Parameters: + - accountId + + """ + self.send_getLatestToken(accountId) + return self.recv_getLatestToken() + + def send_getLatestToken(self, accountId): + self._oprot.writeMessageBegin('getLatestToken', TMessageType.CALL, self._seqid) + args = getLatestToken_args() + args.accountId = accountId + args.write(self._oprot) + self._oprot.writeMessageEnd() + self._oprot.trans.flush() + + def recv_getLatestToken(self): + iprot = self._iprot + (fname, mtype, rseqid) = iprot.readMessageBegin() + if mtype == TMessageType.EXCEPTION: + x = TApplicationException() + x.read(iprot) + iprot.readMessageEnd() + raise x + result = getLatestToken_result() + result.read(iprot) + iprot.readMessageEnd() + if result.success is not None: + return result.success + if result.serviceExp is not None: + raise result.serviceExp + if result.userExp is not None: + raise result.userExp + raise TApplicationException(TApplicationException.MISSING_RESULT, "getLatestToken failed: unknown result") + + def refreshToken(self, accountId, updateType, url): + """ + Parameters: + - accountId + - updateType + - url + + """ + self.send_refreshToken(accountId, updateType, url) + return self.recv_refreshToken() + + def send_refreshToken(self, accountId, updateType, url): + self._oprot.writeMessageBegin('refreshToken', TMessageType.CALL, self._seqid) + args = refreshToken_args() + args.accountId = accountId + args.updateType = updateType + args.url = url + args.write(self._oprot) + self._oprot.writeMessageEnd() + self._oprot.trans.flush() + + def recv_refreshToken(self): + iprot = self._iprot + (fname, mtype, rseqid) = iprot.readMessageBegin() + if mtype == TMessageType.EXCEPTION: + x = TApplicationException() + x.read(iprot) + iprot.readMessageEnd() + raise x + result = refreshToken_result() + result.read(iprot) + iprot.readMessageEnd() + if result.success is not None: + return result.success + if result.serviceExp is not None: + raise result.serviceExp + if result.userExp is not None: + raise result.userExp + raise TApplicationException(TApplicationException.MISSING_RESULT, "refreshToken failed: unknown result") + + def reportState(self, url, status, details, jobId): + """ + Parameters: + - url + - status + - details + - jobId + + """ + self.send_reportState(url, status, details, jobId) + return self.recv_reportState() + + def send_reportState(self, url, status, details, jobId): + self._oprot.writeMessageBegin('reportState', TMessageType.CALL, self._seqid) + args = reportState_args() + args.url = url + args.status = status + args.details = details + args.jobId = jobId + args.write(self._oprot) + self._oprot.writeMessageEnd() + self._oprot.trans.flush() + + def recv_reportState(self): + iprot = self._iprot + (fname, mtype, rseqid) = iprot.readMessageBegin() + if mtype == TMessageType.EXCEPTION: + x = TApplicationException() + x.read(iprot) + iprot.readMessageEnd() + raise x + result = reportState_result() + result.read(iprot) + iprot.readMessageEnd() + if result.success is not None: + return result.success + if result.serviceExp is not None: + raise result.serviceExp + if result.userExp is not None: + raise result.userExp + raise TApplicationException(TApplicationException.MISSING_RESULT, "reportState failed: unknown result") + + def getInfoJsonDirect(self, url, clients): + """ + Parameters: + - url + - clients + + """ + self.send_getInfoJsonDirect(url, clients) + return self.recv_getInfoJsonDirect() + + def send_getInfoJsonDirect(self, url, clients): + self._oprot.writeMessageBegin('getInfoJsonDirect', TMessageType.CALL, self._seqid) + args = getInfoJsonDirect_args() + args.url = url + args.clients = clients + args.write(self._oprot) + self._oprot.writeMessageEnd() + self._oprot.trans.flush() + + def recv_getInfoJsonDirect(self): + iprot = self._iprot + (fname, mtype, rseqid) = iprot.readMessageBegin() + if mtype == TMessageType.EXCEPTION: + x = TApplicationException() + x.read(iprot) + iprot.readMessageEnd() + raise x + result = getInfoJsonDirect_result() + result.read(iprot) + iprot.readMessageEnd() + if result.success is not None: + return result.success + if result.serviceExp is not None: + raise result.serviceExp + if result.userExp is not None: + raise result.userExp + raise TApplicationException(TApplicationException.MISSING_RESULT, "getInfoJsonDirect failed: unknown result") + + +class Processor(pangramia.yt.management.YTManagementService.Processor, Iface, TProcessor): + def __init__(self, handler): + pangramia.yt.management.YTManagementService.Processor.__init__(self, handler) + self._processMap["getOrRefreshTokenWithReport"] = Processor.process_getOrRefreshTokenWithReport + self._processMap["getOrRefreshToken"] = Processor.process_getOrRefreshToken + self._processMap["getLatestToken"] = Processor.process_getLatestToken + self._processMap["refreshToken"] = Processor.process_refreshToken + self._processMap["reportState"] = Processor.process_reportState + self._processMap["getInfoJsonDirect"] = Processor.process_getInfoJsonDirect + self._on_message_begin = None + + def on_message_begin(self, func): + self._on_message_begin = func + + def process(self, iprot, oprot): + (name, type, seqid) = iprot.readMessageBegin() + if self._on_message_begin: + self._on_message_begin(name, type, seqid) + if name not in self._processMap: + iprot.skip(TType.STRUCT) + iprot.readMessageEnd() + x = TApplicationException(TApplicationException.UNKNOWN_METHOD, 'Unknown function %s' % (name)) + oprot.writeMessageBegin(name, TMessageType.EXCEPTION, seqid) + x.write(oprot) + oprot.writeMessageEnd() + oprot.trans.flush() + return + else: + self._processMap[name](self, seqid, iprot, oprot) + return True + + def process_getOrRefreshTokenWithReport(self, seqid, iprot, oprot): + args = getOrRefreshTokenWithReport_args() + args.read(iprot) + iprot.readMessageEnd() + result = getOrRefreshTokenWithReport_result() + try: + result.success = self._handler.getOrRefreshTokenWithReport(args.accountId, args.oldUrl, args.status, args.details, args.jobId, args.updateType, args.url, args.clients, args.airflowLogContext, args.requestParamsJson) + msg_type = TMessageType.REPLY + except TTransport.TTransportException: + raise + except pangramia.yt.exceptions.ttypes.PBServiceException as serviceExp: + msg_type = TMessageType.REPLY + result.serviceExp = serviceExp + except pangramia.yt.exceptions.ttypes.PBUserException as userExp: + msg_type = TMessageType.REPLY + result.userExp = userExp + except TApplicationException as ex: + logging.exception('TApplication exception in handler') + msg_type = TMessageType.EXCEPTION + result = ex + except Exception: + logging.exception('Unexpected exception in handler') + msg_type = TMessageType.EXCEPTION + result = TApplicationException(TApplicationException.INTERNAL_ERROR, 'Internal error') + oprot.writeMessageBegin("getOrRefreshTokenWithReport", msg_type, seqid) + result.write(oprot) + oprot.writeMessageEnd() + oprot.trans.flush() + + def process_getOrRefreshToken(self, seqid, iprot, oprot): + args = getOrRefreshToken_args() + args.read(iprot) + iprot.readMessageEnd() + result = getOrRefreshToken_result() + try: + result.success = self._handler.getOrRefreshToken(args.accountId, args.updateType, args.url, args.clients, args.machineId, args.airflowLogContext, args.requestParamsJson, args.assignedProxyUrl) + msg_type = TMessageType.REPLY + except TTransport.TTransportException: + raise + except pangramia.yt.exceptions.ttypes.PBServiceException as serviceExp: + msg_type = TMessageType.REPLY + result.serviceExp = serviceExp + except pangramia.yt.exceptions.ttypes.PBUserException as userExp: + msg_type = TMessageType.REPLY + result.userExp = userExp + except TApplicationException as ex: + logging.exception('TApplication exception in handler') + msg_type = TMessageType.EXCEPTION + result = ex + except Exception: + logging.exception('Unexpected exception in handler') + msg_type = TMessageType.EXCEPTION + result = TApplicationException(TApplicationException.INTERNAL_ERROR, 'Internal error') + oprot.writeMessageBegin("getOrRefreshToken", msg_type, seqid) + result.write(oprot) + oprot.writeMessageEnd() + oprot.trans.flush() + + def process_getLatestToken(self, seqid, iprot, oprot): + args = getLatestToken_args() + args.read(iprot) + iprot.readMessageEnd() + result = getLatestToken_result() + try: + result.success = self._handler.getLatestToken(args.accountId) + msg_type = TMessageType.REPLY + except TTransport.TTransportException: + raise + except pangramia.yt.exceptions.ttypes.PBServiceException as serviceExp: + msg_type = TMessageType.REPLY + result.serviceExp = serviceExp + except pangramia.yt.exceptions.ttypes.PBUserException as userExp: + msg_type = TMessageType.REPLY + result.userExp = userExp + except TApplicationException as ex: + logging.exception('TApplication exception in handler') + msg_type = TMessageType.EXCEPTION + result = ex + except Exception: + logging.exception('Unexpected exception in handler') + msg_type = TMessageType.EXCEPTION + result = TApplicationException(TApplicationException.INTERNAL_ERROR, 'Internal error') + oprot.writeMessageBegin("getLatestToken", msg_type, seqid) + result.write(oprot) + oprot.writeMessageEnd() + oprot.trans.flush() + + def process_refreshToken(self, seqid, iprot, oprot): + args = refreshToken_args() + args.read(iprot) + iprot.readMessageEnd() + result = refreshToken_result() + try: + result.success = self._handler.refreshToken(args.accountId, args.updateType, args.url) + msg_type = TMessageType.REPLY + except TTransport.TTransportException: + raise + except pangramia.yt.exceptions.ttypes.PBServiceException as serviceExp: + msg_type = TMessageType.REPLY + result.serviceExp = serviceExp + except pangramia.yt.exceptions.ttypes.PBUserException as userExp: + msg_type = TMessageType.REPLY + result.userExp = userExp + except TApplicationException as ex: + logging.exception('TApplication exception in handler') + msg_type = TMessageType.EXCEPTION + result = ex + except Exception: + logging.exception('Unexpected exception in handler') + msg_type = TMessageType.EXCEPTION + result = TApplicationException(TApplicationException.INTERNAL_ERROR, 'Internal error') + oprot.writeMessageBegin("refreshToken", msg_type, seqid) + result.write(oprot) + oprot.writeMessageEnd() + oprot.trans.flush() + + def process_reportState(self, seqid, iprot, oprot): + args = reportState_args() + args.read(iprot) + iprot.readMessageEnd() + result = reportState_result() + try: + result.success = self._handler.reportState(args.url, args.status, args.details, args.jobId) + msg_type = TMessageType.REPLY + except TTransport.TTransportException: + raise + except pangramia.yt.exceptions.ttypes.PBServiceException as serviceExp: + msg_type = TMessageType.REPLY + result.serviceExp = serviceExp + except pangramia.yt.exceptions.ttypes.PBUserException as userExp: + msg_type = TMessageType.REPLY + result.userExp = userExp + except TApplicationException as ex: + logging.exception('TApplication exception in handler') + msg_type = TMessageType.EXCEPTION + result = ex + except Exception: + logging.exception('Unexpected exception in handler') + msg_type = TMessageType.EXCEPTION + result = TApplicationException(TApplicationException.INTERNAL_ERROR, 'Internal error') + oprot.writeMessageBegin("reportState", msg_type, seqid) + result.write(oprot) + oprot.writeMessageEnd() + oprot.trans.flush() + + def process_getInfoJsonDirect(self, seqid, iprot, oprot): + args = getInfoJsonDirect_args() + args.read(iprot) + iprot.readMessageEnd() + result = getInfoJsonDirect_result() + try: + result.success = self._handler.getInfoJsonDirect(args.url, args.clients) + msg_type = TMessageType.REPLY + except TTransport.TTransportException: + raise + except pangramia.yt.exceptions.ttypes.PBServiceException as serviceExp: + msg_type = TMessageType.REPLY + result.serviceExp = serviceExp + except pangramia.yt.exceptions.ttypes.PBUserException as userExp: + msg_type = TMessageType.REPLY + result.userExp = userExp + except TApplicationException as ex: + logging.exception('TApplication exception in handler') + msg_type = TMessageType.EXCEPTION + result = ex + except Exception: + logging.exception('Unexpected exception in handler') + msg_type = TMessageType.EXCEPTION + result = TApplicationException(TApplicationException.INTERNAL_ERROR, 'Internal error') + oprot.writeMessageBegin("getInfoJsonDirect", msg_type, seqid) + result.write(oprot) + oprot.writeMessageEnd() + oprot.trans.flush() + +# HELPER FUNCTIONS AND STRUCTURES + + +class getOrRefreshTokenWithReport_args(object): + """ + Attributes: + - accountId + - oldUrl + - status + - details + - jobId + - updateType + - url + - clients + - airflowLogContext + - requestParamsJson + + """ + + + def __init__(self, accountId=None, oldUrl=None, status=None, details=None, jobId=None, updateType= 6, url=None, clients=None, airflowLogContext=None, requestParamsJson=None,): + self.accountId = accountId + self.oldUrl = oldUrl + self.status = status + self.details = details + self.jobId = jobId + self.updateType = updateType + self.url = url + self.clients = clients + self.airflowLogContext = airflowLogContext + self.requestParamsJson = requestParamsJson + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 1: + if ftype == TType.STRING: + self.accountId = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRING: + self.oldUrl = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 3: + if ftype == TType.I32: + self.status = iprot.readI32() + else: + iprot.skip(ftype) + elif fid == 4: + if ftype == TType.STRING: + self.details = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 5: + if ftype == TType.STRING: + self.jobId = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 6: + if ftype == TType.I32: + self.updateType = iprot.readI32() + else: + iprot.skip(ftype) + elif fid == 7: + if ftype == TType.STRING: + self.url = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 8: + if ftype == TType.STRING: + self.clients = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 9: + if ftype == TType.STRUCT: + self.airflowLogContext = pangramia.yt.common.ttypes.AirflowLogContext() + self.airflowLogContext.read(iprot) + else: + iprot.skip(ftype) + elif fid == 10: + if ftype == TType.STRING: + self.requestParamsJson = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('getOrRefreshTokenWithReport_args') + if self.accountId is not None: + oprot.writeFieldBegin('accountId', TType.STRING, 1) + oprot.writeString(self.accountId.encode('utf-8') if sys.version_info[0] == 2 else self.accountId) + oprot.writeFieldEnd() + if self.oldUrl is not None: + oprot.writeFieldBegin('oldUrl', TType.STRING, 2) + oprot.writeString(self.oldUrl.encode('utf-8') if sys.version_info[0] == 2 else self.oldUrl) + oprot.writeFieldEnd() + if self.status is not None: + oprot.writeFieldBegin('status', TType.I32, 3) + oprot.writeI32(self.status) + oprot.writeFieldEnd() + if self.details is not None: + oprot.writeFieldBegin('details', TType.STRING, 4) + oprot.writeString(self.details.encode('utf-8') if sys.version_info[0] == 2 else self.details) + oprot.writeFieldEnd() + if self.jobId is not None: + oprot.writeFieldBegin('jobId', TType.STRING, 5) + oprot.writeString(self.jobId.encode('utf-8') if sys.version_info[0] == 2 else self.jobId) + oprot.writeFieldEnd() + if self.updateType is not None: + oprot.writeFieldBegin('updateType', TType.I32, 6) + oprot.writeI32(self.updateType) + oprot.writeFieldEnd() + if self.url is not None: + oprot.writeFieldBegin('url', TType.STRING, 7) + oprot.writeString(self.url.encode('utf-8') if sys.version_info[0] == 2 else self.url) + oprot.writeFieldEnd() + if self.clients is not None: + oprot.writeFieldBegin('clients', TType.STRING, 8) + oprot.writeString(self.clients.encode('utf-8') if sys.version_info[0] == 2 else self.clients) + oprot.writeFieldEnd() + if self.airflowLogContext is not None: + oprot.writeFieldBegin('airflowLogContext', TType.STRUCT, 9) + self.airflowLogContext.write(oprot) + oprot.writeFieldEnd() + if self.requestParamsJson is not None: + oprot.writeFieldBegin('requestParamsJson', TType.STRING, 10) + oprot.writeString(self.requestParamsJson.encode('utf-8') if sys.version_info[0] == 2 else self.requestParamsJson) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) +all_structs.append(getOrRefreshTokenWithReport_args) +getOrRefreshTokenWithReport_args.thrift_spec = ( + None, # 0 + (1, TType.STRING, 'accountId', 'UTF8', None, ), # 1 + (2, TType.STRING, 'oldUrl', 'UTF8', None, ), # 2 + (3, TType.I32, 'status', None, None, ), # 3 + (4, TType.STRING, 'details', 'UTF8', None, ), # 4 + (5, TType.STRING, 'jobId', 'UTF8', None, ), # 5 + (6, TType.I32, 'updateType', None, 6, ), # 6 + (7, TType.STRING, 'url', 'UTF8', None, ), # 7 + (8, TType.STRING, 'clients', 'UTF8', None, ), # 8 + (9, TType.STRUCT, 'airflowLogContext', [pangramia.yt.common.ttypes.AirflowLogContext, None], None, ), # 9 + (10, TType.STRING, 'requestParamsJson', 'UTF8', None, ), # 10 +) + + +class getOrRefreshTokenWithReport_result(object): + """ + Attributes: + - success + - serviceExp + - userExp + + """ + + + def __init__(self, success=None, serviceExp=None, userExp=None,): + self.success = success + self.serviceExp = serviceExp + self.userExp = userExp + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 0: + if ftype == TType.STRUCT: + self.success = pangramia.yt.common.ttypes.JobTokenData() + self.success.read(iprot) + else: + iprot.skip(ftype) + elif fid == 1: + if ftype == TType.STRUCT: + self.serviceExp = pangramia.yt.exceptions.ttypes.PBServiceException.read(iprot) + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRUCT: + self.userExp = pangramia.yt.exceptions.ttypes.PBUserException.read(iprot) + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('getOrRefreshTokenWithReport_result') + if self.success is not None: + oprot.writeFieldBegin('success', TType.STRUCT, 0) + self.success.write(oprot) + oprot.writeFieldEnd() + if self.serviceExp is not None: + oprot.writeFieldBegin('serviceExp', TType.STRUCT, 1) + self.serviceExp.write(oprot) + oprot.writeFieldEnd() + if self.userExp is not None: + oprot.writeFieldBegin('userExp', TType.STRUCT, 2) + self.userExp.write(oprot) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) +all_structs.append(getOrRefreshTokenWithReport_result) +getOrRefreshTokenWithReport_result.thrift_spec = ( + (0, TType.STRUCT, 'success', [pangramia.yt.common.ttypes.JobTokenData, None], None, ), # 0 + (1, TType.STRUCT, 'serviceExp', [pangramia.yt.exceptions.ttypes.PBServiceException, None], None, ), # 1 + (2, TType.STRUCT, 'userExp', [pangramia.yt.exceptions.ttypes.PBUserException, None], None, ), # 2 +) + + +class getOrRefreshToken_args(object): + """ + Attributes: + - accountId + - updateType + - url + - clients + - machineId + - airflowLogContext + - requestParamsJson + - assignedProxyUrl + + """ + + + def __init__(self, accountId=None, updateType= 6, url=None, clients=None, machineId=None, airflowLogContext=None, requestParamsJson=None, assignedProxyUrl=None,): + self.accountId = accountId + self.updateType = updateType + self.url = url + self.clients = clients + self.machineId = machineId + self.airflowLogContext = airflowLogContext + self.requestParamsJson = requestParamsJson + self.assignedProxyUrl = assignedProxyUrl + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 1: + if ftype == TType.STRING: + self.accountId = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.I32: + self.updateType = iprot.readI32() + else: + iprot.skip(ftype) + elif fid == 3: + if ftype == TType.STRING: + self.url = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 4: + if ftype == TType.STRING: + self.clients = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 5: + if ftype == TType.STRING: + self.machineId = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 6: + if ftype == TType.STRUCT: + self.airflowLogContext = pangramia.yt.common.ttypes.AirflowLogContext() + self.airflowLogContext.read(iprot) + else: + iprot.skip(ftype) + elif fid == 7: + if ftype == TType.STRING: + self.requestParamsJson = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 8: + if ftype == TType.STRING: + self.assignedProxyUrl = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('getOrRefreshToken_args') + if self.accountId is not None: + oprot.writeFieldBegin('accountId', TType.STRING, 1) + oprot.writeString(self.accountId.encode('utf-8') if sys.version_info[0] == 2 else self.accountId) + oprot.writeFieldEnd() + if self.updateType is not None: + oprot.writeFieldBegin('updateType', TType.I32, 2) + oprot.writeI32(self.updateType) + oprot.writeFieldEnd() + if self.url is not None: + oprot.writeFieldBegin('url', TType.STRING, 3) + oprot.writeString(self.url.encode('utf-8') if sys.version_info[0] == 2 else self.url) + oprot.writeFieldEnd() + if self.clients is not None: + oprot.writeFieldBegin('clients', TType.STRING, 4) + oprot.writeString(self.clients.encode('utf-8') if sys.version_info[0] == 2 else self.clients) + oprot.writeFieldEnd() + if self.machineId is not None: + oprot.writeFieldBegin('machineId', TType.STRING, 5) + oprot.writeString(self.machineId.encode('utf-8') if sys.version_info[0] == 2 else self.machineId) + oprot.writeFieldEnd() + if self.airflowLogContext is not None: + oprot.writeFieldBegin('airflowLogContext', TType.STRUCT, 6) + self.airflowLogContext.write(oprot) + oprot.writeFieldEnd() + if self.requestParamsJson is not None: + oprot.writeFieldBegin('requestParamsJson', TType.STRING, 7) + oprot.writeString(self.requestParamsJson.encode('utf-8') if sys.version_info[0] == 2 else self.requestParamsJson) + oprot.writeFieldEnd() + if self.assignedProxyUrl is not None: + oprot.writeFieldBegin('assignedProxyUrl', TType.STRING, 8) + oprot.writeString(self.assignedProxyUrl.encode('utf-8') if sys.version_info[0] == 2 else self.assignedProxyUrl) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) +all_structs.append(getOrRefreshToken_args) +getOrRefreshToken_args.thrift_spec = ( + None, # 0 + (1, TType.STRING, 'accountId', 'UTF8', None, ), # 1 + (2, TType.I32, 'updateType', None, 6, ), # 2 + (3, TType.STRING, 'url', 'UTF8', None, ), # 3 + (4, TType.STRING, 'clients', 'UTF8', None, ), # 4 + (5, TType.STRING, 'machineId', 'UTF8', None, ), # 5 + (6, TType.STRUCT, 'airflowLogContext', [pangramia.yt.common.ttypes.AirflowLogContext, None], None, ), # 6 + (7, TType.STRING, 'requestParamsJson', 'UTF8', None, ), # 7 + (8, TType.STRING, 'assignedProxyUrl', 'UTF8', None, ), # 8 +) + + +class getOrRefreshToken_result(object): + """ + Attributes: + - success + - serviceExp + - userExp + + """ + + + def __init__(self, success=None, serviceExp=None, userExp=None,): + self.success = success + self.serviceExp = serviceExp + self.userExp = userExp + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 0: + if ftype == TType.STRUCT: + self.success = pangramia.yt.common.ttypes.JobTokenData() + self.success.read(iprot) + else: + iprot.skip(ftype) + elif fid == 1: + if ftype == TType.STRUCT: + self.serviceExp = pangramia.yt.exceptions.ttypes.PBServiceException.read(iprot) + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRUCT: + self.userExp = pangramia.yt.exceptions.ttypes.PBUserException.read(iprot) + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('getOrRefreshToken_result') + if self.success is not None: + oprot.writeFieldBegin('success', TType.STRUCT, 0) + self.success.write(oprot) + oprot.writeFieldEnd() + if self.serviceExp is not None: + oprot.writeFieldBegin('serviceExp', TType.STRUCT, 1) + self.serviceExp.write(oprot) + oprot.writeFieldEnd() + if self.userExp is not None: + oprot.writeFieldBegin('userExp', TType.STRUCT, 2) + self.userExp.write(oprot) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) +all_structs.append(getOrRefreshToken_result) +getOrRefreshToken_result.thrift_spec = ( + (0, TType.STRUCT, 'success', [pangramia.yt.common.ttypes.JobTokenData, None], None, ), # 0 + (1, TType.STRUCT, 'serviceExp', [pangramia.yt.exceptions.ttypes.PBServiceException, None], None, ), # 1 + (2, TType.STRUCT, 'userExp', [pangramia.yt.exceptions.ttypes.PBUserException, None], None, ), # 2 +) + + +class getLatestToken_args(object): + """ + Attributes: + - accountId + + """ + + + def __init__(self, accountId=None,): + self.accountId = accountId + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 1: + if ftype == TType.STRING: + self.accountId = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('getLatestToken_args') + if self.accountId is not None: + oprot.writeFieldBegin('accountId', TType.STRING, 1) + oprot.writeString(self.accountId.encode('utf-8') if sys.version_info[0] == 2 else self.accountId) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) +all_structs.append(getLatestToken_args) +getLatestToken_args.thrift_spec = ( + None, # 0 + (1, TType.STRING, 'accountId', 'UTF8', None, ), # 1 +) + + +class getLatestToken_result(object): + """ + Attributes: + - success + - serviceExp + - userExp + + """ + + + def __init__(self, success=None, serviceExp=None, userExp=None,): + self.success = success + self.serviceExp = serviceExp + self.userExp = userExp + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 0: + if ftype == TType.STRUCT: + self.success = pangramia.yt.common.ttypes.JobTokenData() + self.success.read(iprot) + else: + iprot.skip(ftype) + elif fid == 1: + if ftype == TType.STRUCT: + self.serviceExp = pangramia.yt.exceptions.ttypes.PBServiceException.read(iprot) + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRUCT: + self.userExp = pangramia.yt.exceptions.ttypes.PBUserException.read(iprot) + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('getLatestToken_result') + if self.success is not None: + oprot.writeFieldBegin('success', TType.STRUCT, 0) + self.success.write(oprot) + oprot.writeFieldEnd() + if self.serviceExp is not None: + oprot.writeFieldBegin('serviceExp', TType.STRUCT, 1) + self.serviceExp.write(oprot) + oprot.writeFieldEnd() + if self.userExp is not None: + oprot.writeFieldBegin('userExp', TType.STRUCT, 2) + self.userExp.write(oprot) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) +all_structs.append(getLatestToken_result) +getLatestToken_result.thrift_spec = ( + (0, TType.STRUCT, 'success', [pangramia.yt.common.ttypes.JobTokenData, None], None, ), # 0 + (1, TType.STRUCT, 'serviceExp', [pangramia.yt.exceptions.ttypes.PBServiceException, None], None, ), # 1 + (2, TType.STRUCT, 'userExp', [pangramia.yt.exceptions.ttypes.PBUserException, None], None, ), # 2 +) + + +class refreshToken_args(object): + """ + Attributes: + - accountId + - updateType + - url + + """ + + + def __init__(self, accountId=None, updateType= 6, url=None,): + self.accountId = accountId + self.updateType = updateType + self.url = url + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 1: + if ftype == TType.STRING: + self.accountId = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.I32: + self.updateType = iprot.readI32() + else: + iprot.skip(ftype) + elif fid == 3: + if ftype == TType.STRING: + self.url = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('refreshToken_args') + if self.accountId is not None: + oprot.writeFieldBegin('accountId', TType.STRING, 1) + oprot.writeString(self.accountId.encode('utf-8') if sys.version_info[0] == 2 else self.accountId) + oprot.writeFieldEnd() + if self.updateType is not None: + oprot.writeFieldBegin('updateType', TType.I32, 2) + oprot.writeI32(self.updateType) + oprot.writeFieldEnd() + if self.url is not None: + oprot.writeFieldBegin('url', TType.STRING, 3) + oprot.writeString(self.url.encode('utf-8') if sys.version_info[0] == 2 else self.url) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) +all_structs.append(refreshToken_args) +refreshToken_args.thrift_spec = ( + None, # 0 + (1, TType.STRING, 'accountId', 'UTF8', None, ), # 1 + (2, TType.I32, 'updateType', None, 6, ), # 2 + (3, TType.STRING, 'url', 'UTF8', None, ), # 3 +) + + +class refreshToken_result(object): + """ + Attributes: + - success + - serviceExp + - userExp + + """ + + + def __init__(self, success=None, serviceExp=None, userExp=None,): + self.success = success + self.serviceExp = serviceExp + self.userExp = userExp + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 0: + if ftype == TType.STRUCT: + self.success = pangramia.yt.common.ttypes.JobTokenData() + self.success.read(iprot) + else: + iprot.skip(ftype) + elif fid == 1: + if ftype == TType.STRUCT: + self.serviceExp = pangramia.yt.exceptions.ttypes.PBServiceException.read(iprot) + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRUCT: + self.userExp = pangramia.yt.exceptions.ttypes.PBUserException.read(iprot) + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('refreshToken_result') + if self.success is not None: + oprot.writeFieldBegin('success', TType.STRUCT, 0) + self.success.write(oprot) + oprot.writeFieldEnd() + if self.serviceExp is not None: + oprot.writeFieldBegin('serviceExp', TType.STRUCT, 1) + self.serviceExp.write(oprot) + oprot.writeFieldEnd() + if self.userExp is not None: + oprot.writeFieldBegin('userExp', TType.STRUCT, 2) + self.userExp.write(oprot) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) +all_structs.append(refreshToken_result) +refreshToken_result.thrift_spec = ( + (0, TType.STRUCT, 'success', [pangramia.yt.common.ttypes.JobTokenData, None], None, ), # 0 + (1, TType.STRUCT, 'serviceExp', [pangramia.yt.exceptions.ttypes.PBServiceException, None], None, ), # 1 + (2, TType.STRUCT, 'userExp', [pangramia.yt.exceptions.ttypes.PBUserException, None], None, ), # 2 +) + + +class reportState_args(object): + """ + Attributes: + - url + - status + - details + - jobId + + """ + + + def __init__(self, url=None, status=None, details=None, jobId=None,): + self.url = url + self.status = status + self.details = details + self.jobId = jobId + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 1: + if ftype == TType.STRING: + self.url = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.I32: + self.status = iprot.readI32() + else: + iprot.skip(ftype) + elif fid == 3: + if ftype == TType.STRING: + self.details = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 4: + if ftype == TType.STRING: + self.jobId = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('reportState_args') + if self.url is not None: + oprot.writeFieldBegin('url', TType.STRING, 1) + oprot.writeString(self.url.encode('utf-8') if sys.version_info[0] == 2 else self.url) + oprot.writeFieldEnd() + if self.status is not None: + oprot.writeFieldBegin('status', TType.I32, 2) + oprot.writeI32(self.status) + oprot.writeFieldEnd() + if self.details is not None: + oprot.writeFieldBegin('details', TType.STRING, 3) + oprot.writeString(self.details.encode('utf-8') if sys.version_info[0] == 2 else self.details) + oprot.writeFieldEnd() + if self.jobId is not None: + oprot.writeFieldBegin('jobId', TType.STRING, 4) + oprot.writeString(self.jobId.encode('utf-8') if sys.version_info[0] == 2 else self.jobId) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) +all_structs.append(reportState_args) +reportState_args.thrift_spec = ( + None, # 0 + (1, TType.STRING, 'url', 'UTF8', None, ), # 1 + (2, TType.I32, 'status', None, None, ), # 2 + (3, TType.STRING, 'details', 'UTF8', None, ), # 3 + (4, TType.STRING, 'jobId', 'UTF8', None, ), # 4 +) + + +class reportState_result(object): + """ + Attributes: + - success + - serviceExp + - userExp + + """ + + + def __init__(self, success=None, serviceExp=None, userExp=None,): + self.success = success + self.serviceExp = serviceExp + self.userExp = userExp + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 0: + if ftype == TType.BOOL: + self.success = iprot.readBool() + else: + iprot.skip(ftype) + elif fid == 1: + if ftype == TType.STRUCT: + self.serviceExp = pangramia.yt.exceptions.ttypes.PBServiceException.read(iprot) + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRUCT: + self.userExp = pangramia.yt.exceptions.ttypes.PBUserException.read(iprot) + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('reportState_result') + if self.success is not None: + oprot.writeFieldBegin('success', TType.BOOL, 0) + oprot.writeBool(self.success) + oprot.writeFieldEnd() + if self.serviceExp is not None: + oprot.writeFieldBegin('serviceExp', TType.STRUCT, 1) + self.serviceExp.write(oprot) + oprot.writeFieldEnd() + if self.userExp is not None: + oprot.writeFieldBegin('userExp', TType.STRUCT, 2) + self.userExp.write(oprot) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) +all_structs.append(reportState_result) +reportState_result.thrift_spec = ( + (0, TType.BOOL, 'success', None, None, ), # 0 + (1, TType.STRUCT, 'serviceExp', [pangramia.yt.exceptions.ttypes.PBServiceException, None], None, ), # 1 + (2, TType.STRUCT, 'userExp', [pangramia.yt.exceptions.ttypes.PBUserException, None], None, ), # 2 +) + + +class getInfoJsonDirect_args(object): + """ + Attributes: + - url + - clients + + """ + + + def __init__(self, url=None, clients=None,): + self.url = url + self.clients = clients + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 1: + if ftype == TType.STRING: + self.url = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRING: + self.clients = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('getInfoJsonDirect_args') + if self.url is not None: + oprot.writeFieldBegin('url', TType.STRING, 1) + oprot.writeString(self.url.encode('utf-8') if sys.version_info[0] == 2 else self.url) + oprot.writeFieldEnd() + if self.clients is not None: + oprot.writeFieldBegin('clients', TType.STRING, 2) + oprot.writeString(self.clients.encode('utf-8') if sys.version_info[0] == 2 else self.clients) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) +all_structs.append(getInfoJsonDirect_args) +getInfoJsonDirect_args.thrift_spec = ( + None, # 0 + (1, TType.STRING, 'url', 'UTF8', None, ), # 1 + (2, TType.STRING, 'clients', 'UTF8', None, ), # 2 +) + + +class getInfoJsonDirect_result(object): + """ + Attributes: + - success + - serviceExp + - userExp + + """ + + + def __init__(self, success=None, serviceExp=None, userExp=None,): + self.success = success + self.serviceExp = serviceExp + self.userExp = userExp + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 0: + if ftype == TType.STRUCT: + self.success = pangramia.yt.common.ttypes.JobTokenData() + self.success.read(iprot) + else: + iprot.skip(ftype) + elif fid == 1: + if ftype == TType.STRUCT: + self.serviceExp = pangramia.yt.exceptions.ttypes.PBServiceException.read(iprot) + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRUCT: + self.userExp = pangramia.yt.exceptions.ttypes.PBUserException.read(iprot) + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('getInfoJsonDirect_result') + if self.success is not None: + oprot.writeFieldBegin('success', TType.STRUCT, 0) + self.success.write(oprot) + oprot.writeFieldEnd() + if self.serviceExp is not None: + oprot.writeFieldBegin('serviceExp', TType.STRUCT, 1) + self.serviceExp.write(oprot) + oprot.writeFieldEnd() + if self.userExp is not None: + oprot.writeFieldBegin('userExp', TType.STRUCT, 2) + self.userExp.write(oprot) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) +all_structs.append(getInfoJsonDirect_result) +getInfoJsonDirect_result.thrift_spec = ( + (0, TType.STRUCT, 'success', [pangramia.yt.common.ttypes.JobTokenData, None], None, ), # 0 + (1, TType.STRUCT, 'serviceExp', [pangramia.yt.exceptions.ttypes.PBServiceException, None], None, ), # 1 + (2, TType.STRUCT, 'userExp', [pangramia.yt.exceptions.ttypes.PBUserException, None], None, ), # 2 +) +fix_spec(all_structs) +del all_structs diff --git a/pangramia/yt/tokens_ops/__init__.py b/pangramia/yt/tokens_ops/__init__.py new file mode 100644 index 0000000..e97f47d --- /dev/null +++ b/pangramia/yt/tokens_ops/__init__.py @@ -0,0 +1 @@ +__all__ = ['ttypes', 'constants', 'YTTokenOpService'] diff --git a/pangramia/yt/tokens_ops/constants.py b/pangramia/yt/tokens_ops/constants.py new file mode 100644 index 0000000..09a78b3 --- /dev/null +++ b/pangramia/yt/tokens_ops/constants.py @@ -0,0 +1,14 @@ +# +# Autogenerated by Thrift Compiler (0.20.0) +# +# DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING +# +# options string: py +# + +from thrift.Thrift import TType, TMessageType, TFrozenDict, TException, TApplicationException +from thrift.protocol.TProtocol import TProtocolException +from thrift.TRecursive import fix_spec + +import sys +from .ttypes import * diff --git a/pangramia/yt/tokens_ops/ttypes.py b/pangramia/yt/tokens_ops/ttypes.py new file mode 100644 index 0000000..2be2420 --- /dev/null +++ b/pangramia/yt/tokens_ops/ttypes.py @@ -0,0 +1,21 @@ +# +# Autogenerated by Thrift Compiler (0.20.0) +# +# DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING +# +# options string: py +# + +from thrift.Thrift import TType, TMessageType, TFrozenDict, TException, TApplicationException +from thrift.protocol.TProtocol import TProtocolException +from thrift.TRecursive import fix_spec + +import sys +import pangramia.yt.common.ttypes +import pangramia.yt.exceptions.ttypes +import pangramia.yt.management.ttypes + +from thrift.transport import TTransport +all_structs = [] +fix_spec(all_structs) +del all_structs diff --git a/playbooks/playbook-bgutils-start.yml b/playbooks/playbook-bgutils-start.yml new file mode 100644 index 0000000..e69de29 diff --git a/playbooks/playbook-bgutils-stop.yml b/playbooks/playbook-bgutils-stop.yml new file mode 100644 index 0000000..e69de29 diff --git a/policies/1_fetch_only_policies.yaml b/policies/1_fetch_only_policies.yaml new file mode 100644 index 0000000..50d3ab8 --- /dev/null +++ b/policies/1_fetch_only_policies.yaml @@ -0,0 +1,155 @@ +# This file contains policies for testing only the info.json generation step. +# No downloads are performed. + +--- +# Policy: Basic fetch-only test for a TV client. +# This policy uses a single, static profile and has a rate limit to avoid being +# too aggressive. It saves the generated info.json files to a directory. +name: tv_downgraded_single_profile + +settings: + mode: fetch_only + urls_file: "urls.txt" + info_json_script: "bin/ytops-client get-info" + save_info_json_dir: "fetched_info_jsons/tv_downgraded" + # Use a single, static profile for all requests. + profile_prefix: "tv_downgraded_user" + profile_mode: per_worker # With 1 worker, this is effectively a single profile. + +execution_control: + run_until: { cycles: 1 } + workers: 1 + sleep_between_tasks: { min_seconds: 5, max_seconds: 10 } + +info_json_generation_policy: + client: tv_downgraded + # Safety rate limit: 450 requests per hour (7.5 req/min) + rate_limits: + per_ip: { max_requests: 450, per_minutes: 60 } + +--- +# Policy: Fetch-only test for an Android client using a cookie file. +# This demonstrates how to pass a cookie file for authenticated requests. +# It uses a single profile and stops if it encounters too many errors. +name: android_sdkless_with_cookies + +settings: + mode: fetch_only + urls_file: "urls.txt" + info_json_script: "bin/ytops-client get-info" + save_info_json_dir: "fetched_info_jsons/android_sdkless" + profile_prefix: "android_user_with_cookies" + profile_mode: per_worker + +execution_control: + run_until: { cycles: 1 } # Run through the URL list once. + workers: 1 + sleep_between_tasks: { min_seconds: 2, max_seconds: 4 } + +info_json_generation_policy: + client: android_sdkless + # Pass per-request parameters. This is how you specify a cookie file. + request_params: + cookies_file_path: "/path/to/your/android_cookies.txt" + +stop_conditions: + # Stop if we get more than 5 errors in any 10-minute window. + on_error_rate: { max_errors: 5, per_minutes: 10 } + +--- +# Policy: TV Fetch with Profile Cooldown (Pipeline Stage 1) +# Fetches info.json files using the 'tv' client. Each profile is limited +# to a certain number of requests before it is put into a cooldown period. +# The output of this policy is intended to be used by a 'download_only' policy. +name: tv_fetch_with_cooldown + +settings: + mode: fetch_only + urls_file: "urls.txt" + info_json_script: "bin/ytops-client get-info" + # Save the generated files to this directory for the download task to find. + save_info_json_dir: "live_jsons_tv" + profile_management: + prefix: "tv_user" + initial_pool_size: 10 + auto_expand_pool: true + max_requests_per_profile: 60 + sleep_minutes_on_exhaustion: 60 + +execution_control: + run_until: { cycles: 1 } + workers: 1 + sleep_between_tasks: { min_seconds: 2, max_seconds: 5 } + +info_json_generation_policy: + client: "tv" + request_params: + context_reuse_policy: { enabled: true, max_age_seconds: 86400 } + +--- +# Policy: MWeb with client rotation and rate limits. +# This demonstrates a more complex scenario with multiple clients and strict +# rate limiting, useful for simulating sophisticated user behavior. +name: mweb_client_rotation_and_rate_limits + +settings: + mode: fetch_only + urls_file: "urls.txt" + info_json_script: "bin/ytops-client get-info" + # Use the dynamic profile pool management system. + profile_management: + prefix: "mweb_user" + initial_pool_size: 10 + max_requests_per_profile: 100 + sleep_minutes_on_exhaustion: 15 + +execution_control: + run_until: { cycles: 1 } + workers: 10 + sleep_between_tasks: { min_seconds: 2, max_seconds: 5 } + +info_json_generation_policy: + # Enforce strict rate limits for both the entire IP and each individual profile. + rate_limits: + per_ip: { max_requests: 120, per_minutes: 10 } + per_profile: { max_requests: 10, per_minutes: 10 } + + # Rotate between a primary client (mweb) and a refresh client (web_camoufox) + # to keep sessions fresh. + client_rotation_policy: + major_client: "mweb" + major_client_params: + context_reuse_policy: { enabled: true, max_age_seconds: 1800 } + refresh_client: "web_camoufox" + refresh_every: { requests: 20, minutes: 10 } + +--- +# Policy: TV Simply, fetch-only test with per-worker profile rotation. +# Fetches info.json using tv_simply with multiple workers. Each worker gets a +# unique profile that is retired and replaced with a new generation after a +# set number of requests. +name: tv_simply_fetch_rotation + +settings: + mode: fetch_only + urls_file: "urls.txt" + info_json_script: "bin/ytops-client get-info" + save_info_json_dir: "fetched_info_jsons/tv_simply_rotation" + # Use the modern profile management system. + profile_mode: per_worker_with_rotation + profile_management: + prefix: "tv_simply_user" + # Rotate to a new profile generation after 250 requests. + max_requests_per_profile: 250 + +execution_control: + run_until: { cycles: 1 } # Run through the URL list once. + workers: 8 # Run with 8 parallel workers. + sleep_between_tasks: { min_seconds: 2, max_seconds: 5 } + # Optional: Override the assumed time for a fetch task to improve rate estimation. + # The default is 3 seconds for fetch_only mode. + # assumptions: + # fetch_task_duration: 2.5 + +info_json_generation_policy: + client: tv_simply diff --git a/policies/2_download_only_policies.yaml b/policies/2_download_only_policies.yaml new file mode 100644 index 0000000..a5feb54 --- /dev/null +++ b/policies/2_download_only_policies.yaml @@ -0,0 +1,58 @@ +# This file contains policies for testing only the download step from +# existing info.json files. No new info.json files are generated. + +--- +# Policy: Basic profile-aware download test. +# This policy reads info.json files from a directory, groups them by a profile +# name extracted from the filename, and downloads them using multiple workers. +# Each worker handles one or more profiles sequentially. +name: basic_profile_aware_download + +settings: + mode: download_only + info_json_dir: "prefetched_info_jsons" + # Regex to extract profile names from filenames like '...-VIDEOID-my_profile_name.json'. + profile_extraction_regex: ".*-[a-zA-Z0-9_-]{11}-(.+)\\.json" + +execution_control: + run_until: { cycles: 1 } + # 'auto' sets workers to the number of profiles, capped by auto_workers_max. + workers: auto + auto_workers_max: 8 + # This sleep applies between each file downloaded by a single profile. + sleep_between_tasks: { min_seconds: 1, max_seconds: 2 } + +download_policy: + formats: "18,140,299/298/137/136/135/134/133" + downloader: "aria2c" + downloader_args: "aria2c:-x 4 -k 1M" + extra_args: "--cleanup --output-dir /tmp/downloads" + # This sleep applies between formats of a single video. + sleep_between_formats: { min_seconds: 0, max_seconds: 0 } + +--- +# Policy: Continuous download from a folder (Pipeline Stage 2). +# This policy watches a directory for new info.json files and processes them +# as they appear. It is designed to work as the second stage of a pipeline, +# consuming files generated by a 'fetch_only' policy like 'tv_fetch_with_cooldown'. +name: continuous_watch_download + +settings: + mode: download_only + info_json_dir: "live_info_jsons" + directory_scan_mode: continuous + mark_processed_files: true # Rename files to *.processed to avoid re-downloading. + max_files_per_cycle: 50 # Process up to 50 new files each time it checks. + sleep_if_no_new_files_seconds: 15 + +execution_control: + # Note: For 'continuous' mode, a time-based run_until (e.g., {minutes: 120}) + # is more typical. {cycles: 1} will cause it to scan the directory once + # for new files, process them, and then exit. + run_until: { cycles: 1 } + workers: 4 # Use a few workers to process files in parallel. + sleep_between_tasks: { min_seconds: 0, max_seconds: 0 } + +download_policy: + formats: "18,140" + extra_args: "--cleanup --output-dir /tmp/downloads" diff --git a/policies/3_full_stack_policies.yaml b/policies/3_full_stack_policies.yaml new file mode 100644 index 0000000..5ac43a0 --- /dev/null +++ b/policies/3_full_stack_policies.yaml @@ -0,0 +1,158 @@ +# This file contains policies for full-stack tests, which include both +# info.json generation and the subsequent download step. + +--- +# Policy: TV client with profile rotation. +# This test uses multiple parallel workers. Each worker gets its own profile +# that is automatically rotated (e.g., from tv_user_0_0 to tv_user_0_1) after +# a certain number of requests to simulate user churn. +name: tv_simply_profile_rotation + +settings: + mode: full_stack + urls_file: "urls.txt" + info_json_script: "bin/ytops-client get-info" + save_info_json_dir: "fetched_info_jsons/tv_simply_rotation" + # Use the modern profile management system. + profile_mode: per_worker_with_rotation + profile_management: + prefix: "tv_simply" + # Rotate to a new profile generation after 250 requests. + max_requests_per_profile: 250 + +execution_control: + run_until: { cycles: 1 } + workers: 8 # Run with 8 parallel workers. + sleep_between_tasks: { min_seconds: 2, max_seconds: 5 } + # Optional: Override assumptions to improve rate estimation. + # assumptions: + # fetch_task_duration: 10 # Est. seconds to get info.json + # download_task_duration: 20 # Est. seconds to download all formats for one video + +info_json_generation_policy: + client: tv_simply + +download_policy: + formats: "18,140" + extra_args: "--cleanup --output-dir downloads/tv_simply_rotation" + proxy: "socks5://127.0.0.1:1087" + downloader: "aria2c" + downloader_args: "aria2c:-x 8 -k 1M" + sleep_between_formats: { min_seconds: 2, max_seconds: 2 } + +stop_conditions: + on_cumulative_403: { max_errors: 5, per_minutes: 2 } + +--- +# Policy: TV Simply, full-stack test with per-worker profile rotation. +# Generates info.json using tv_simply and immediately attempts to download. +# This combines the fetch and download steps into a single workflow. +name: tv_simply_full_stack_rotation + +settings: + mode: full_stack + urls_file: "urls.txt" + info_json_script: "bin/ytops-client get-info" + profile_mode: per_worker_with_rotation + profile_management: + prefix: "tv_simply_worker" + max_requests_per_profile: 240 + +execution_control: + workers: 10 + run_until: { cycles: 1 } + sleep_between_tasks: { min_seconds: 5, max_seconds: 5 } + +info_json_generation_policy: + client: "tv_simply" + request_params: + context_reuse_policy: { enabled: false } + +download_policy: + formats: "18,140" + extra_args: "--output-dir downloads/tv_simply_downloads" + +--- +# Policy: MWeb client with multiple profiles, each with its own cookie file. +# This demonstrates how to run an authenticated test with a pool of accounts. +# The orchestrator will cycle through the cookie files, assigning one to each profile. +name: mweb_multi_profile_with_cookies + +settings: + mode: full_stack + urls_file: "urls.txt" + info_json_script: "bin/ytops-client get-info" + # Use the dynamic profile pool management system. + profile_management: + prefix: "mweb_user" + initial_pool_size: 3 # Start with 3 profiles. + auto_expand_pool: true # Create new profiles if the initial 3 are all rate-limited. + max_requests_per_profile: 100 # Let each profile make 100 requests... + sleep_minutes_on_exhaustion: 15 # ...then put it to sleep for 15 minutes. + # Assign a different cookie file to each profile in the pool. + # The tool will cycle through this list. + cookie_files: + - "/path/to/your/mweb_cookies_0.txt" + - "/path/to/your/mweb_cookies_1.txt" + - "/path/to/your/mweb_cookies_2.txt" + +execution_control: + run_until: { cycles: 1 } + workers: 3 # Match workers to the number of initial profiles. + sleep_between_tasks: { min_seconds: 1, max_seconds: 3 } + +info_json_generation_policy: + client: mweb + # This client uses youtubei.js, which generates PO tokens. + +download_policy: + formats: "18,140" + extra_args: "--cleanup --output-dir /tmp/downloads" + +--- +# Policy: TV client with profile rotation and aria2c RPC download. +# This test uses multiple parallel workers. Each worker gets its own profile +# that is automatically rotated. Downloads are submitted to an aria2c daemon +# via its RPC interface. +name: tv_simply_profile_rotation_aria2c_rpc + +settings: + mode: full_stack + urls_file: "urls.txt" + info_json_script: "bin/ytops-client get-info" + save_info_json_dir: "fetched_info_jsons/tv_simply_rotation_aria" + profile_mode: per_worker_with_rotation + profile_management: + prefix: "tv_simply_aria" + max_requests_per_profile: 250 + +execution_control: + run_until: { cycles: 1 } + workers: 8 + sleep_between_tasks: { min_seconds: 2, max_seconds: 5 } + +info_json_generation_policy: + client: tv_simply + +download_policy: + formats: "18,140" + # Use the aria2c RPC downloader + downloader: "aria2c_rpc" + # RPC server connection details + aria_host: "localhost" + aria_port: 6800 + # aria_secret: "your_secret" # Uncomment and set if needed + # Set to true to wait for each download and get a success/fail result. + # This is the default and recommended for monitoring success/failure. + # Set to false for maximum submission throughput ("fire-and-forget"), + # but you will lose per-download status reporting. + aria_wait: true + # The output directory is on the aria2c host machine + output_dir: "/downloads/tv_simply_rotation_aria" + # Pass custom arguments to aria2c in yt-dlp format for better performance. + # -x: max connections per server, -k: min split size. + downloader_args: "aria2c:[-x 8, -k 1M]" + sleep_between_formats: { min_seconds: 1, max_seconds: 2 } + +stop_conditions: + on_cumulative_403: { max_errors: 5, per_minutes: 2 } diff --git a/policies/README.md b/policies/README.md new file mode 100644 index 0000000..c590c79 --- /dev/null +++ b/policies/README.md @@ -0,0 +1,28 @@ +# Stress Test Policies + +This directory contains example policy files for the `stress_enhanced.py` orchestrator. Each file defines a specific testing strategy, organized by task type. + +## Authentication & Info.json Policies (`fetch_only` mode) + +These policies focus on testing the info.json generation service. + +- `info_json_rate_limit.yaml`: Tests the service with a focus on rate limits and client rotation. +- `auth_scenarios.yaml`: Contains specific scenarios for fetching info.json files, such as using a low-level command template for full control. + +## Download Policies (`download_only` mode) + +These policies focus on testing the download infrastructure using pre-existing info.json files. + +- `download_throughput.yaml`: Tests download/CDN infrastructure, focusing on throughput and error handling. +- `download_scenarios.yaml`: Contains specific scenarios for downloading, such as testing random formats from a directory of info.json files. + +## Full-Stack Policies (`full_stack` mode) + +These policies test the entire workflow from info.json generation through to downloading. + +- `regular_testing_scenarios.yaml`: Contains a collection of common, end-to-end testing scenarios, including: + - `mweb_per_request_profile`: A high-volume test that uses a new profile for every request. + - `mixed_client_profile_pool`: A complex test that alternates clients and reuses profiles from a pool. +- `tv_pipeline_scenarios.yaml`: A two-stage pipeline for fetching with the TV client and then continuously downloading. + +These files can be used as templates for creating custom test scenarios. diff --git a/setup.py b/setup.py index 1639ab4..9c38fe6 100644 --- a/setup.py +++ b/setup.py @@ -32,7 +32,6 @@ setup( 'psutil', 'flask', 'waitress', - 'yt_dlp>=2025.3.27', 'yt-dlp-get-pot==0.3.0', 'requests>=2.31.0', 'ffprobe3', diff --git a/thrift_model/.gitignore b/thrift_model/.gitignore index 2f7896d..6afe395 100644 --- a/thrift_model/.gitignore +++ b/thrift_model/.gitignore @@ -1 +1,2 @@ +__py_cache__ target/ diff --git a/thrift_model/data/common.thrift b/thrift_model/data/common.thrift new file mode 100644 index 0000000..0d73dc3 --- /dev/null +++ b/thrift_model/data/common.thrift @@ -0,0 +1,145 @@ +namespace py pangramia.yt.common +namespace java com.pangramia.yt.common + +typedef string JobID +typedef string Timestamp + + +/** + * Standard error codes for service exceptions. + */ +enum ErrorCode { + UNKNOWN = 0, + NOT_IMPLEMENTED = 1, + INTERNAL_ERROR = 2, + INVALID_REQUEST = 3, + PROXY_UNAVAILABLE = 4, + ACCOUNT_UNAVAILABLE = 5, + BOT_DETECTED = 6, + BOT_DETECTION_SIGN_IN_REQUIRED = 7, + SABR_STREAMING_DETECTED = 8 +} + + +enum JobState { + SUCCESS, + FAIL, + BOT_FORBIDDEN_ON_URL_ACCESS, + BOT_FORBIDDEN_ON_FILE_DOWNLOAD, + BOT_CAPTCHA, + BOT_AUTH_RELOGIN_REQUIRED, + BOT_AUTH_SMS_REQUIRED, + BOT_AUTH_DEVICE_QR_REQUIRED, + BOT_ACCOUNT_BANNED, + BOT_IP_BANNED +} + +struct JobTokenData { + 1: optional string infoJson, + 2: optional string ytdlpCommand, + 3: optional string socks, + 4: optional JobID jobId, + 5: optional string url, + 6: optional string cookiesBlob, + 7: optional string requestSummary, + 8: optional list communicationLogPaths, + 9: optional string serverVersionInfo, +} + + +enum TokenUpdateMode { + AUTOREFRESH_AND_REMAIN_ANONYMOUS, + AUTOREFRESH_AND_ALLOW_AUTH, + AUTOREFRESH_AND_ONLY_AUTH, + CLEANUP_THEN_AUTOREFRESH_AND_ONLY_AUTH, + CLEANUP_THEN_AUTOREFRESH_AND_REMAIN_ANONYMOUS, + CLEANUP_THEN_AUTOREFRESH_AND_ALLOW_AUTH, + AUTO,// AUTOREFRESH_AND_ONLY_AUTH, +} + + +struct AccountData { + 1: required string username, + 2: required string password, + 3: optional string countryCode +} + +struct ProxyData { + 1: required string proxyUrl, + 2: optional string countryCode +} + + +enum AccountPairState { + ACTIVE, + PAUSED, + REMOVED, + IN_PROGRESS, + ALL +} + + +struct AccountPairWithState { + 1: required string accountId, + 2: required string proxyId, + 3: optional AccountPairState accountPairState + 4: optional string machineId, +} + +struct JobData { + 1: required string jobId, + 2: required string url, + 3: required string cookiesBlob, + 4: required string potoken, + 5: required string visitorId, + 6: required string ytdlpCommand, + 7: required string createdTime, + 8: required map telemetry, + 9: required JobState state, + 10: optional string errorMessage, + 11: optional string socks5Id +} + +struct RichCollectionPagination { + 1: required bool hasNext, + 2: required i32 totalCount, + 3: required i32 page, + 4: required i32 pageSize +} + +struct RichCollectionJobData { + 1: required list items, + 2: required RichCollectionPagination pagination +} + +struct ProxyStatus { + 1: string proxyUrl, + 2: string status, + 3: i64 successCount, + 4: i64 failureCount, + 5: optional string lastFailureTimestamp, + 6: optional string lastSuccessTimestamp, + 7: optional string serverIdentity +} + +struct AccountStatus { + 1: string accountId, + 2: string status, + 3: i64 successCount, + 4: i64 failureCount, + 5: optional string lastFailureTimestamp, + 6: optional string lastSuccessTimestamp, + 7: optional string lastUsedProxy, + 8: optional string lastUsedMachine +} + +struct AirflowLogContext { + 1: optional string logS3Path, + 2: optional string dagId, + 3: optional string runId, + 4: optional string taskId, + 5: optional i32 tryNumber, + 6: optional string workerHostname, + 7: optional string queue +} + diff --git a/thrift_model/data/exceptions.thrift b/thrift_model/data/exceptions.thrift new file mode 100644 index 0000000..2e0370e --- /dev/null +++ b/thrift_model/data/exceptions.thrift @@ -0,0 +1,14 @@ +namespace py pangramia.yt.exceptions +namespace java com.pangramia.yt.exceptions + +exception PBServiceException { + 1: required string message, + 2: optional string errorCode, + 3: optional map context +} + +exception PBUserException { + 1: required string message, + 2: optional string errorCode, + 3: optional map context +} diff --git a/thrift_model/gen_py/pangramia/yt/common/ttypes.py b/thrift_model/gen_py/pangramia/yt/common/ttypes.py index 145dee9..063c837 100644 --- a/thrift_model/gen_py/pangramia/yt/common/ttypes.py +++ b/thrift_model/gen_py/pangramia/yt/common/ttypes.py @@ -29,6 +29,7 @@ class ErrorCode(object): ACCOUNT_UNAVAILABLE = 5 BOT_DETECTED = 6 BOT_DETECTION_SIGN_IN_REQUIRED = 7 + SABR_STREAMING_DETECTED = 8 _VALUES_TO_NAMES = { 0: "UNKNOWN", @@ -39,6 +40,7 @@ class ErrorCode(object): 5: "ACCOUNT_UNAVAILABLE", 6: "BOT_DETECTED", 7: "BOT_DETECTION_SIGN_IN_REQUIRED", + 8: "SABR_STREAMING_DETECTED", } _NAMES_TO_VALUES = { @@ -50,6 +52,7 @@ class ErrorCode(object): "ACCOUNT_UNAVAILABLE": 5, "BOT_DETECTED": 6, "BOT_DETECTION_SIGN_IN_REQUIRED": 7, + "SABR_STREAMING_DETECTED": 8, } @@ -155,17 +158,23 @@ class JobTokenData(object): - jobId - url - cookiesBlob + - requestSummary + - communicationLogPaths + - serverVersionInfo """ - def __init__(self, infoJson=None, ytdlpCommand=None, socks=None, jobId=None, url=None, cookiesBlob=None,): + def __init__(self, infoJson=None, ytdlpCommand=None, socks=None, jobId=None, url=None, cookiesBlob=None, requestSummary=None, communicationLogPaths=None, serverVersionInfo=None,): self.infoJson = infoJson self.ytdlpCommand = ytdlpCommand self.socks = socks self.jobId = jobId self.url = url self.cookiesBlob = cookiesBlob + self.requestSummary = requestSummary + self.communicationLogPaths = communicationLogPaths + self.serverVersionInfo = serverVersionInfo def read(self, iprot): if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: @@ -206,6 +215,26 @@ class JobTokenData(object): self.cookiesBlob = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() else: iprot.skip(ftype) + elif fid == 7: + if ftype == TType.STRING: + self.requestSummary = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 8: + if ftype == TType.LIST: + self.communicationLogPaths = [] + (_etype3, _size0) = iprot.readListBegin() + for _i4 in range(_size0): + _elem5 = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + self.communicationLogPaths.append(_elem5) + iprot.readListEnd() + else: + iprot.skip(ftype) + elif fid == 9: + if ftype == TType.STRING: + self.serverVersionInfo = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) else: iprot.skip(ftype) iprot.readFieldEnd() @@ -240,6 +269,21 @@ class JobTokenData(object): oprot.writeFieldBegin('cookiesBlob', TType.STRING, 6) oprot.writeString(self.cookiesBlob.encode('utf-8') if sys.version_info[0] == 2 else self.cookiesBlob) oprot.writeFieldEnd() + if self.requestSummary is not None: + oprot.writeFieldBegin('requestSummary', TType.STRING, 7) + oprot.writeString(self.requestSummary.encode('utf-8') if sys.version_info[0] == 2 else self.requestSummary) + oprot.writeFieldEnd() + if self.communicationLogPaths is not None: + oprot.writeFieldBegin('communicationLogPaths', TType.LIST, 8) + oprot.writeListBegin(TType.STRING, len(self.communicationLogPaths)) + for iter6 in self.communicationLogPaths: + oprot.writeString(iter6.encode('utf-8') if sys.version_info[0] == 2 else iter6) + oprot.writeListEnd() + oprot.writeFieldEnd() + if self.serverVersionInfo is not None: + oprot.writeFieldBegin('serverVersionInfo', TType.STRING, 9) + oprot.writeString(self.serverVersionInfo.encode('utf-8') if sys.version_info[0] == 2 else self.serverVersionInfo) + oprot.writeFieldEnd() oprot.writeFieldStop() oprot.writeStructEnd() @@ -583,11 +627,11 @@ class JobData(object): elif fid == 8: if ftype == TType.MAP: self.telemetry = {} - (_ktype1, _vtype2, _size0) = iprot.readMapBegin() - for _i4 in range(_size0): - _key5 = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() - _val6 = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() - self.telemetry[_key5] = _val6 + (_ktype8, _vtype9, _size7) = iprot.readMapBegin() + for _i11 in range(_size7): + _key12 = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + _val13 = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + self.telemetry[_key12] = _val13 iprot.readMapEnd() else: iprot.skip(ftype) @@ -647,9 +691,9 @@ class JobData(object): if self.telemetry is not None: oprot.writeFieldBegin('telemetry', TType.MAP, 8) oprot.writeMapBegin(TType.STRING, TType.STRING, len(self.telemetry)) - for kiter7, viter8 in self.telemetry.items(): - oprot.writeString(kiter7.encode('utf-8') if sys.version_info[0] == 2 else kiter7) - oprot.writeString(viter8.encode('utf-8') if sys.version_info[0] == 2 else viter8) + for kiter14, viter15 in self.telemetry.items(): + oprot.writeString(kiter14.encode('utf-8') if sys.version_info[0] == 2 else kiter14) + oprot.writeString(viter15.encode('utf-8') if sys.version_info[0] == 2 else viter15) oprot.writeMapEnd() oprot.writeFieldEnd() if self.state is not None: @@ -823,11 +867,11 @@ class RichCollectionJobData(object): if fid == 1: if ftype == TType.LIST: self.items = [] - (_etype12, _size9) = iprot.readListBegin() - for _i13 in range(_size9): - _elem14 = JobData() - _elem14.read(iprot) - self.items.append(_elem14) + (_etype19, _size16) = iprot.readListBegin() + for _i20 in range(_size16): + _elem21 = JobData() + _elem21.read(iprot) + self.items.append(_elem21) iprot.readListEnd() else: iprot.skip(ftype) @@ -850,8 +894,8 @@ class RichCollectionJobData(object): if self.items is not None: oprot.writeFieldBegin('items', TType.LIST, 1) oprot.writeListBegin(TType.STRUCT, len(self.items)) - for iter15 in self.items: - iter15.write(oprot) + for iter22 in self.items: + iter22.write(oprot) oprot.writeListEnd() oprot.writeFieldEnd() if self.pagination is not None: @@ -1135,6 +1179,129 @@ class AccountStatus(object): def __ne__(self, other): return not (self == other) + + +class AirflowLogContext(object): + """ + Attributes: + - logS3Path + - dagId + - runId + - taskId + - tryNumber + - workerHostname + - queue + + """ + + + def __init__(self, logS3Path=None, dagId=None, runId=None, taskId=None, tryNumber=None, workerHostname=None, queue=None,): + self.logS3Path = logS3Path + self.dagId = dagId + self.runId = runId + self.taskId = taskId + self.tryNumber = tryNumber + self.workerHostname = workerHostname + self.queue = queue + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 1: + if ftype == TType.STRING: + self.logS3Path = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRING: + self.dagId = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 3: + if ftype == TType.STRING: + self.runId = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 4: + if ftype == TType.STRING: + self.taskId = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 5: + if ftype == TType.I32: + self.tryNumber = iprot.readI32() + else: + iprot.skip(ftype) + elif fid == 6: + if ftype == TType.STRING: + self.workerHostname = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 7: + if ftype == TType.STRING: + self.queue = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('AirflowLogContext') + if self.logS3Path is not None: + oprot.writeFieldBegin('logS3Path', TType.STRING, 1) + oprot.writeString(self.logS3Path.encode('utf-8') if sys.version_info[0] == 2 else self.logS3Path) + oprot.writeFieldEnd() + if self.dagId is not None: + oprot.writeFieldBegin('dagId', TType.STRING, 2) + oprot.writeString(self.dagId.encode('utf-8') if sys.version_info[0] == 2 else self.dagId) + oprot.writeFieldEnd() + if self.runId is not None: + oprot.writeFieldBegin('runId', TType.STRING, 3) + oprot.writeString(self.runId.encode('utf-8') if sys.version_info[0] == 2 else self.runId) + oprot.writeFieldEnd() + if self.taskId is not None: + oprot.writeFieldBegin('taskId', TType.STRING, 4) + oprot.writeString(self.taskId.encode('utf-8') if sys.version_info[0] == 2 else self.taskId) + oprot.writeFieldEnd() + if self.tryNumber is not None: + oprot.writeFieldBegin('tryNumber', TType.I32, 5) + oprot.writeI32(self.tryNumber) + oprot.writeFieldEnd() + if self.workerHostname is not None: + oprot.writeFieldBegin('workerHostname', TType.STRING, 6) + oprot.writeString(self.workerHostname.encode('utf-8') if sys.version_info[0] == 2 else self.workerHostname) + oprot.writeFieldEnd() + if self.queue is not None: + oprot.writeFieldBegin('queue', TType.STRING, 7) + oprot.writeString(self.queue.encode('utf-8') if sys.version_info[0] == 2 else self.queue) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) all_structs.append(JobTokenData) JobTokenData.thrift_spec = ( None, # 0 @@ -1144,6 +1311,9 @@ JobTokenData.thrift_spec = ( (4, TType.STRING, 'jobId', 'UTF8', None, ), # 4 (5, TType.STRING, 'url', 'UTF8', None, ), # 5 (6, TType.STRING, 'cookiesBlob', 'UTF8', None, ), # 6 + (7, TType.STRING, 'requestSummary', 'UTF8', None, ), # 7 + (8, TType.LIST, 'communicationLogPaths', (TType.STRING, 'UTF8', False), None, ), # 8 + (9, TType.STRING, 'serverVersionInfo', 'UTF8', None, ), # 9 ) all_structs.append(AccountData) AccountData.thrift_spec = ( @@ -1218,5 +1388,16 @@ AccountStatus.thrift_spec = ( (7, TType.STRING, 'lastUsedProxy', 'UTF8', None, ), # 7 (8, TType.STRING, 'lastUsedMachine', 'UTF8', None, ), # 8 ) +all_structs.append(AirflowLogContext) +AirflowLogContext.thrift_spec = ( + None, # 0 + (1, TType.STRING, 'logS3Path', 'UTF8', None, ), # 1 + (2, TType.STRING, 'dagId', 'UTF8', None, ), # 2 + (3, TType.STRING, 'runId', 'UTF8', None, ), # 3 + (4, TType.STRING, 'taskId', 'UTF8', None, ), # 4 + (5, TType.I32, 'tryNumber', None, None, ), # 5 + (6, TType.STRING, 'workerHostname', 'UTF8', None, ), # 6 + (7, TType.STRING, 'queue', 'UTF8', None, ), # 7 +) fix_spec(all_structs) del all_structs diff --git a/thrift_model/gen_py/pangramia/yt/tokens_ops/YTTokenOpService-remote b/thrift_model/gen_py/pangramia/yt/tokens_ops/YTTokenOpService-remote index 920278e..8685be7 100755 --- a/thrift_model/gen_py/pangramia/yt/tokens_ops/YTTokenOpService-remote +++ b/thrift_model/gen_py/pangramia/yt/tokens_ops/YTTokenOpService-remote @@ -24,11 +24,12 @@ if len(sys.argv) <= 1 or sys.argv[1] == '--help': print('Usage: ' + sys.argv[0] + ' [-h host[:port]] [-u url] [-f[ramed]] [-s[sl]] [-novalidate] [-ca_certs certs] [-keyfile keyfile] [-certfile certfile] function [arg1 [arg2...]]') print('') print('Functions:') - print(' JobTokenData getOrRefreshTokenWithReport(string accountId, string oldUrl, JobState status, string details, string jobId, TokenUpdateMode updateType, string url, string clients)') - print(' JobTokenData getOrRefreshToken(string accountId, TokenUpdateMode updateType, string url, string clients, string machineId)') + print(' JobTokenData getOrRefreshTokenWithReport(string accountId, string oldUrl, JobState status, string details, string jobId, TokenUpdateMode updateType, string url, string clients, AirflowLogContext airflowLogContext, string requestParamsJson)') + print(' JobTokenData getOrRefreshToken(string accountId, TokenUpdateMode updateType, string url, string clients, string machineId, AirflowLogContext airflowLogContext, string requestParamsJson, string assignedProxyUrl)') print(' JobTokenData getLatestToken(string accountId)') print(' JobTokenData refreshToken(string accountId, TokenUpdateMode updateType, string url)') print(' bool reportState(string url, JobState status, string details, string jobId)') + print(' JobTokenData getInfoJsonDirect(string url, string clients)') print(' getProxyStatus(string serverIdentity)') print(' bool banProxy(string proxyUrl, string serverIdentity)') print(' bool unbanProxy(string proxyUrl, string serverIdentity)') @@ -124,16 +125,16 @@ client = YTTokenOpService.Client(protocol) transport.open() if cmd == 'getOrRefreshTokenWithReport': - if len(args) != 8: - print('getOrRefreshTokenWithReport requires 8 args') + if len(args) != 10: + print('getOrRefreshTokenWithReport requires 10 args') sys.exit(1) - pp.pprint(client.getOrRefreshTokenWithReport(args[0], args[1], eval(args[2]), args[3], args[4], eval(args[5]), args[6], args[7],)) + pp.pprint(client.getOrRefreshTokenWithReport(args[0], args[1], eval(args[2]), args[3], args[4], eval(args[5]), args[6], args[7], eval(args[8]), args[9],)) elif cmd == 'getOrRefreshToken': - if len(args) != 5: - print('getOrRefreshToken requires 5 args') + if len(args) != 8: + print('getOrRefreshToken requires 8 args') sys.exit(1) - pp.pprint(client.getOrRefreshToken(args[0], eval(args[1]), args[2], args[3], args[4],)) + pp.pprint(client.getOrRefreshToken(args[0], eval(args[1]), args[2], args[3], args[4], eval(args[5]), args[6], args[7],)) elif cmd == 'getLatestToken': if len(args) != 1: @@ -153,6 +154,12 @@ elif cmd == 'reportState': sys.exit(1) pp.pprint(client.reportState(args[0], eval(args[1]), args[2], args[3],)) +elif cmd == 'getInfoJsonDirect': + if len(args) != 2: + print('getInfoJsonDirect requires 2 args') + sys.exit(1) + pp.pprint(client.getInfoJsonDirect(args[0], args[1],)) + elif cmd == 'getProxyStatus': if len(args) != 1: print('getProxyStatus requires 1 args') diff --git a/thrift_model/gen_py/pangramia/yt/tokens_ops/YTTokenOpService.py b/thrift_model/gen_py/pangramia/yt/tokens_ops/YTTokenOpService.py index 8356d39..e1772e4 100644 --- a/thrift_model/gen_py/pangramia/yt/tokens_ops/YTTokenOpService.py +++ b/thrift_model/gen_py/pangramia/yt/tokens_ops/YTTokenOpService.py @@ -20,7 +20,7 @@ all_structs = [] class Iface(pangramia.yt.management.YTManagementService.Iface): - def getOrRefreshTokenWithReport(self, accountId, oldUrl, status, details, jobId, updateType, url, clients): + def getOrRefreshTokenWithReport(self, accountId, oldUrl, status, details, jobId, updateType, url, clients, airflowLogContext, requestParamsJson): """ Parameters: - accountId @@ -31,11 +31,13 @@ class Iface(pangramia.yt.management.YTManagementService.Iface): - updateType - url - clients + - airflowLogContext + - requestParamsJson """ pass - def getOrRefreshToken(self, accountId, updateType, url, clients, machineId): + def getOrRefreshToken(self, accountId, updateType, url, clients, machineId, airflowLogContext, requestParamsJson, assignedProxyUrl): """ Parameters: - accountId @@ -43,6 +45,9 @@ class Iface(pangramia.yt.management.YTManagementService.Iface): - url - clients - machineId + - airflowLogContext + - requestParamsJson + - assignedProxyUrl """ pass @@ -76,12 +81,21 @@ class Iface(pangramia.yt.management.YTManagementService.Iface): """ pass + def getInfoJsonDirect(self, url, clients): + """ + Parameters: + - url + - clients + + """ + pass + class Client(pangramia.yt.management.YTManagementService.Client, Iface): def __init__(self, iprot, oprot=None): pangramia.yt.management.YTManagementService.Client.__init__(self, iprot, oprot) - def getOrRefreshTokenWithReport(self, accountId, oldUrl, status, details, jobId, updateType, url, clients): + def getOrRefreshTokenWithReport(self, accountId, oldUrl, status, details, jobId, updateType, url, clients, airflowLogContext, requestParamsJson): """ Parameters: - accountId @@ -92,12 +106,14 @@ class Client(pangramia.yt.management.YTManagementService.Client, Iface): - updateType - url - clients + - airflowLogContext + - requestParamsJson """ - self.send_getOrRefreshTokenWithReport(accountId, oldUrl, status, details, jobId, updateType, url, clients) + self.send_getOrRefreshTokenWithReport(accountId, oldUrl, status, details, jobId, updateType, url, clients, airflowLogContext, requestParamsJson) return self.recv_getOrRefreshTokenWithReport() - def send_getOrRefreshTokenWithReport(self, accountId, oldUrl, status, details, jobId, updateType, url, clients): + def send_getOrRefreshTokenWithReport(self, accountId, oldUrl, status, details, jobId, updateType, url, clients, airflowLogContext, requestParamsJson): self._oprot.writeMessageBegin('getOrRefreshTokenWithReport', TMessageType.CALL, self._seqid) args = getOrRefreshTokenWithReport_args() args.accountId = accountId @@ -108,6 +124,8 @@ class Client(pangramia.yt.management.YTManagementService.Client, Iface): args.updateType = updateType args.url = url args.clients = clients + args.airflowLogContext = airflowLogContext + args.requestParamsJson = requestParamsJson args.write(self._oprot) self._oprot.writeMessageEnd() self._oprot.trans.flush() @@ -131,7 +149,7 @@ class Client(pangramia.yt.management.YTManagementService.Client, Iface): raise result.userExp raise TApplicationException(TApplicationException.MISSING_RESULT, "getOrRefreshTokenWithReport failed: unknown result") - def getOrRefreshToken(self, accountId, updateType, url, clients, machineId): + def getOrRefreshToken(self, accountId, updateType, url, clients, machineId, airflowLogContext, requestParamsJson, assignedProxyUrl): """ Parameters: - accountId @@ -139,12 +157,15 @@ class Client(pangramia.yt.management.YTManagementService.Client, Iface): - url - clients - machineId + - airflowLogContext + - requestParamsJson + - assignedProxyUrl """ - self.send_getOrRefreshToken(accountId, updateType, url, clients, machineId) + self.send_getOrRefreshToken(accountId, updateType, url, clients, machineId, airflowLogContext, requestParamsJson, assignedProxyUrl) return self.recv_getOrRefreshToken() - def send_getOrRefreshToken(self, accountId, updateType, url, clients, machineId): + def send_getOrRefreshToken(self, accountId, updateType, url, clients, machineId, airflowLogContext, requestParamsJson, assignedProxyUrl): self._oprot.writeMessageBegin('getOrRefreshToken', TMessageType.CALL, self._seqid) args = getOrRefreshToken_args() args.accountId = accountId @@ -152,6 +173,9 @@ class Client(pangramia.yt.management.YTManagementService.Client, Iface): args.url = url args.clients = clients args.machineId = machineId + args.airflowLogContext = airflowLogContext + args.requestParamsJson = requestParamsJson + args.assignedProxyUrl = assignedProxyUrl args.write(self._oprot) self._oprot.writeMessageEnd() self._oprot.trans.flush() @@ -293,6 +317,44 @@ class Client(pangramia.yt.management.YTManagementService.Client, Iface): raise result.userExp raise TApplicationException(TApplicationException.MISSING_RESULT, "reportState failed: unknown result") + def getInfoJsonDirect(self, url, clients): + """ + Parameters: + - url + - clients + + """ + self.send_getInfoJsonDirect(url, clients) + return self.recv_getInfoJsonDirect() + + def send_getInfoJsonDirect(self, url, clients): + self._oprot.writeMessageBegin('getInfoJsonDirect', TMessageType.CALL, self._seqid) + args = getInfoJsonDirect_args() + args.url = url + args.clients = clients + args.write(self._oprot) + self._oprot.writeMessageEnd() + self._oprot.trans.flush() + + def recv_getInfoJsonDirect(self): + iprot = self._iprot + (fname, mtype, rseqid) = iprot.readMessageBegin() + if mtype == TMessageType.EXCEPTION: + x = TApplicationException() + x.read(iprot) + iprot.readMessageEnd() + raise x + result = getInfoJsonDirect_result() + result.read(iprot) + iprot.readMessageEnd() + if result.success is not None: + return result.success + if result.serviceExp is not None: + raise result.serviceExp + if result.userExp is not None: + raise result.userExp + raise TApplicationException(TApplicationException.MISSING_RESULT, "getInfoJsonDirect failed: unknown result") + class Processor(pangramia.yt.management.YTManagementService.Processor, Iface, TProcessor): def __init__(self, handler): @@ -302,6 +364,7 @@ class Processor(pangramia.yt.management.YTManagementService.Processor, Iface, TP self._processMap["getLatestToken"] = Processor.process_getLatestToken self._processMap["refreshToken"] = Processor.process_refreshToken self._processMap["reportState"] = Processor.process_reportState + self._processMap["getInfoJsonDirect"] = Processor.process_getInfoJsonDirect self._on_message_begin = None def on_message_begin(self, func): @@ -330,7 +393,7 @@ class Processor(pangramia.yt.management.YTManagementService.Processor, Iface, TP iprot.readMessageEnd() result = getOrRefreshTokenWithReport_result() try: - result.success = self._handler.getOrRefreshTokenWithReport(args.accountId, args.oldUrl, args.status, args.details, args.jobId, args.updateType, args.url, args.clients) + result.success = self._handler.getOrRefreshTokenWithReport(args.accountId, args.oldUrl, args.status, args.details, args.jobId, args.updateType, args.url, args.clients, args.airflowLogContext, args.requestParamsJson) msg_type = TMessageType.REPLY except TTransport.TTransportException: raise @@ -359,7 +422,7 @@ class Processor(pangramia.yt.management.YTManagementService.Processor, Iface, TP iprot.readMessageEnd() result = getOrRefreshToken_result() try: - result.success = self._handler.getOrRefreshToken(args.accountId, args.updateType, args.url, args.clients, args.machineId) + result.success = self._handler.getOrRefreshToken(args.accountId, args.updateType, args.url, args.clients, args.machineId, args.airflowLogContext, args.requestParamsJson, args.assignedProxyUrl) msg_type = TMessageType.REPLY except TTransport.TTransportException: raise @@ -469,6 +532,35 @@ class Processor(pangramia.yt.management.YTManagementService.Processor, Iface, TP oprot.writeMessageEnd() oprot.trans.flush() + def process_getInfoJsonDirect(self, seqid, iprot, oprot): + args = getInfoJsonDirect_args() + args.read(iprot) + iprot.readMessageEnd() + result = getInfoJsonDirect_result() + try: + result.success = self._handler.getInfoJsonDirect(args.url, args.clients) + msg_type = TMessageType.REPLY + except TTransport.TTransportException: + raise + except pangramia.yt.exceptions.ttypes.PBServiceException as serviceExp: + msg_type = TMessageType.REPLY + result.serviceExp = serviceExp + except pangramia.yt.exceptions.ttypes.PBUserException as userExp: + msg_type = TMessageType.REPLY + result.userExp = userExp + except TApplicationException as ex: + logging.exception('TApplication exception in handler') + msg_type = TMessageType.EXCEPTION + result = ex + except Exception: + logging.exception('Unexpected exception in handler') + msg_type = TMessageType.EXCEPTION + result = TApplicationException(TApplicationException.INTERNAL_ERROR, 'Internal error') + oprot.writeMessageBegin("getInfoJsonDirect", msg_type, seqid) + result.write(oprot) + oprot.writeMessageEnd() + oprot.trans.flush() + # HELPER FUNCTIONS AND STRUCTURES @@ -483,11 +575,13 @@ class getOrRefreshTokenWithReport_args(object): - updateType - url - clients + - airflowLogContext + - requestParamsJson """ - def __init__(self, accountId=None, oldUrl=None, status=None, details=None, jobId=None, updateType= 6, url=None, clients=None,): + def __init__(self, accountId=None, oldUrl=None, status=None, details=None, jobId=None, updateType= 6, url=None, clients=None, airflowLogContext=None, requestParamsJson=None,): self.accountId = accountId self.oldUrl = oldUrl self.status = status @@ -496,6 +590,8 @@ class getOrRefreshTokenWithReport_args(object): self.updateType = updateType self.url = url self.clients = clients + self.airflowLogContext = airflowLogContext + self.requestParamsJson = requestParamsJson def read(self, iprot): if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: @@ -546,6 +642,17 @@ class getOrRefreshTokenWithReport_args(object): self.clients = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() else: iprot.skip(ftype) + elif fid == 9: + if ftype == TType.STRUCT: + self.airflowLogContext = pangramia.yt.common.ttypes.AirflowLogContext() + self.airflowLogContext.read(iprot) + else: + iprot.skip(ftype) + elif fid == 10: + if ftype == TType.STRING: + self.requestParamsJson = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) else: iprot.skip(ftype) iprot.readFieldEnd() @@ -588,6 +695,14 @@ class getOrRefreshTokenWithReport_args(object): oprot.writeFieldBegin('clients', TType.STRING, 8) oprot.writeString(self.clients.encode('utf-8') if sys.version_info[0] == 2 else self.clients) oprot.writeFieldEnd() + if self.airflowLogContext is not None: + oprot.writeFieldBegin('airflowLogContext', TType.STRUCT, 9) + self.airflowLogContext.write(oprot) + oprot.writeFieldEnd() + if self.requestParamsJson is not None: + oprot.writeFieldBegin('requestParamsJson', TType.STRING, 10) + oprot.writeString(self.requestParamsJson.encode('utf-8') if sys.version_info[0] == 2 else self.requestParamsJson) + oprot.writeFieldEnd() oprot.writeFieldStop() oprot.writeStructEnd() @@ -615,6 +730,8 @@ getOrRefreshTokenWithReport_args.thrift_spec = ( (6, TType.I32, 'updateType', None, 6, ), # 6 (7, TType.STRING, 'url', 'UTF8', None, ), # 7 (8, TType.STRING, 'clients', 'UTF8', None, ), # 8 + (9, TType.STRUCT, 'airflowLogContext', [pangramia.yt.common.ttypes.AirflowLogContext, None], None, ), # 9 + (10, TType.STRING, 'requestParamsJson', 'UTF8', None, ), # 10 ) @@ -712,16 +829,22 @@ class getOrRefreshToken_args(object): - url - clients - machineId + - airflowLogContext + - requestParamsJson + - assignedProxyUrl """ - def __init__(self, accountId=None, updateType= 6, url=None, clients=None, machineId=None,): + def __init__(self, accountId=None, updateType= 6, url=None, clients=None, machineId=None, airflowLogContext=None, requestParamsJson=None, assignedProxyUrl=None,): self.accountId = accountId self.updateType = updateType self.url = url self.clients = clients self.machineId = machineId + self.airflowLogContext = airflowLogContext + self.requestParamsJson = requestParamsJson + self.assignedProxyUrl = assignedProxyUrl def read(self, iprot): if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: @@ -757,6 +880,22 @@ class getOrRefreshToken_args(object): self.machineId = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() else: iprot.skip(ftype) + elif fid == 6: + if ftype == TType.STRUCT: + self.airflowLogContext = pangramia.yt.common.ttypes.AirflowLogContext() + self.airflowLogContext.read(iprot) + else: + iprot.skip(ftype) + elif fid == 7: + if ftype == TType.STRING: + self.requestParamsJson = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 8: + if ftype == TType.STRING: + self.assignedProxyUrl = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) else: iprot.skip(ftype) iprot.readFieldEnd() @@ -787,6 +926,18 @@ class getOrRefreshToken_args(object): oprot.writeFieldBegin('machineId', TType.STRING, 5) oprot.writeString(self.machineId.encode('utf-8') if sys.version_info[0] == 2 else self.machineId) oprot.writeFieldEnd() + if self.airflowLogContext is not None: + oprot.writeFieldBegin('airflowLogContext', TType.STRUCT, 6) + self.airflowLogContext.write(oprot) + oprot.writeFieldEnd() + if self.requestParamsJson is not None: + oprot.writeFieldBegin('requestParamsJson', TType.STRING, 7) + oprot.writeString(self.requestParamsJson.encode('utf-8') if sys.version_info[0] == 2 else self.requestParamsJson) + oprot.writeFieldEnd() + if self.assignedProxyUrl is not None: + oprot.writeFieldBegin('assignedProxyUrl', TType.STRING, 8) + oprot.writeString(self.assignedProxyUrl.encode('utf-8') if sys.version_info[0] == 2 else self.assignedProxyUrl) + oprot.writeFieldEnd() oprot.writeFieldStop() oprot.writeStructEnd() @@ -811,6 +962,9 @@ getOrRefreshToken_args.thrift_spec = ( (3, TType.STRING, 'url', 'UTF8', None, ), # 3 (4, TType.STRING, 'clients', 'UTF8', None, ), # 4 (5, TType.STRING, 'machineId', 'UTF8', None, ), # 5 + (6, TType.STRUCT, 'airflowLogContext', [pangramia.yt.common.ttypes.AirflowLogContext, None], None, ), # 6 + (7, TType.STRING, 'requestParamsJson', 'UTF8', None, ), # 7 + (8, TType.STRING, 'assignedProxyUrl', 'UTF8', None, ), # 8 ) @@ -1401,5 +1555,165 @@ reportState_result.thrift_spec = ( (1, TType.STRUCT, 'serviceExp', [pangramia.yt.exceptions.ttypes.PBServiceException, None], None, ), # 1 (2, TType.STRUCT, 'userExp', [pangramia.yt.exceptions.ttypes.PBUserException, None], None, ), # 2 ) + + +class getInfoJsonDirect_args(object): + """ + Attributes: + - url + - clients + + """ + + + def __init__(self, url=None, clients=None,): + self.url = url + self.clients = clients + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 1: + if ftype == TType.STRING: + self.url = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRING: + self.clients = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString() + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('getInfoJsonDirect_args') + if self.url is not None: + oprot.writeFieldBegin('url', TType.STRING, 1) + oprot.writeString(self.url.encode('utf-8') if sys.version_info[0] == 2 else self.url) + oprot.writeFieldEnd() + if self.clients is not None: + oprot.writeFieldBegin('clients', TType.STRING, 2) + oprot.writeString(self.clients.encode('utf-8') if sys.version_info[0] == 2 else self.clients) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) +all_structs.append(getInfoJsonDirect_args) +getInfoJsonDirect_args.thrift_spec = ( + None, # 0 + (1, TType.STRING, 'url', 'UTF8', None, ), # 1 + (2, TType.STRING, 'clients', 'UTF8', None, ), # 2 +) + + +class getInfoJsonDirect_result(object): + """ + Attributes: + - success + - serviceExp + - userExp + + """ + + + def __init__(self, success=None, serviceExp=None, userExp=None,): + self.success = success + self.serviceExp = serviceExp + self.userExp = userExp + + def read(self, iprot): + if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None: + iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec]) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 0: + if ftype == TType.STRUCT: + self.success = pangramia.yt.common.ttypes.JobTokenData() + self.success.read(iprot) + else: + iprot.skip(ftype) + elif fid == 1: + if ftype == TType.STRUCT: + self.serviceExp = pangramia.yt.exceptions.ttypes.PBServiceException.read(iprot) + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRUCT: + self.userExp = pangramia.yt.exceptions.ttypes.PBUserException.read(iprot) + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot._fast_encode is not None and self.thrift_spec is not None: + oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec])) + return + oprot.writeStructBegin('getInfoJsonDirect_result') + if self.success is not None: + oprot.writeFieldBegin('success', TType.STRUCT, 0) + self.success.write(oprot) + oprot.writeFieldEnd() + if self.serviceExp is not None: + oprot.writeFieldBegin('serviceExp', TType.STRUCT, 1) + self.serviceExp.write(oprot) + oprot.writeFieldEnd() + if self.userExp is not None: + oprot.writeFieldBegin('userExp', TType.STRUCT, 2) + self.userExp.write(oprot) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.items()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) +all_structs.append(getInfoJsonDirect_result) +getInfoJsonDirect_result.thrift_spec = ( + (0, TType.STRUCT, 'success', [pangramia.yt.common.ttypes.JobTokenData, None], None, ), # 0 + (1, TType.STRUCT, 'serviceExp', [pangramia.yt.exceptions.ttypes.PBServiceException, None], None, ), # 1 + (2, TType.STRUCT, 'userExp', [pangramia.yt.exceptions.ttypes.PBUserException, None], None, ), # 2 +) fix_spec(all_structs) del all_structs diff --git a/thrift_model/pom.xml b/thrift_model/pom.xml index c113c99..8058556 100644 --- a/thrift_model/pom.xml +++ b/thrift_model/pom.xml @@ -7,7 +7,7 @@ com.pangramia.yt thrift-services - 3.6.0-SNAPSHOT + 5.5.0-SNAPSHOT 0.16.0 diff --git a/thrift_model/services/base_service.thrift b/thrift_model/services/base_service.thrift new file mode 100644 index 0000000..bce4461 --- /dev/null +++ b/thrift_model/services/base_service.thrift @@ -0,0 +1,19 @@ +namespace py pangramia.base_service +namespace java com.pangramia.base_service + +include "../data/common.thrift" +include "../data/exceptions.thrift" + +service BaseService { + // Common health check method + bool ping() throws (1: exceptions.PBServiceException serviceExp, + 2: exceptions.PBUserException userExp), + + // Common error reporting + bool reportError(1: string message, + 2: map details) throws (1: exceptions.PBServiceException serviceExp, + 2: exceptions.PBUserException userExp) + + // Add this to fix AsyncProcessor issues + oneway void shutdown() +} diff --git a/thrift_model/services/yt_admin_ops.thrift b/thrift_model/services/yt_admin_ops.thrift new file mode 100644 index 0000000..5b2b71a --- /dev/null +++ b/thrift_model/services/yt_admin_ops.thrift @@ -0,0 +1,63 @@ +namespace py pangramia.yt.admin_ops +namespace java com.pangramia.yt.admin_ops + +include "../data/common.thrift" +include "../data/exceptions.thrift" +include "base_service.thrift" + +// Proxy and Account management +service YTAccountsOpService extends base_service.BaseService { + + // AccountPairs + bool addAccountPair(1: string accountId, 2: string proxyId, 3: string machineId, 4: common.ProxyData proxyData, 5: optional common.AccountData accountData) + throws (1: exceptions.PBServiceException serviceExp, + 2: exceptions.PBUserException userExp), + + common.AccountPairWithState getPair(1: string machineId) + throws (1: exceptions.PBServiceException serviceExp, + 2: exceptions.PBUserException userExp), + + bool pair(1: string accountId, 2: string proxyId, 3:string machineId) + throws (1: exceptions.PBServiceException serviceExp, + 2: exceptions.PBUserException userExp), + + bool unpair(1: string accountId, 2: string proxyId, 3:string machineId) + throws (1: exceptions.PBServiceException serviceExp, + 2: exceptions.PBUserException userExp), + + list listAccountPairs(1: optional common.AccountPairState filter) throws (1: exceptions.PBServiceException serviceExp, + 2: exceptions.PBUserException userExp), + + // ManageAccounts + bool addAccount(1: string accountId, 2: optional common.AccountData accountData) throws (1: exceptions.PBServiceException serviceExp, + 2: exceptions.PBUserException userExp), + + + bool suspendAccount(1: string accountId) throws (1: exceptions.PBServiceException serviceExp, + 2: exceptions.PBUserException userExp), + + bool resumeAccount(1: string accountId) throws (1: exceptions.PBServiceException serviceExp, + 2: exceptions.PBUserException userExp), + + bool removeAccount(1: string accountId) throws (1: exceptions.PBServiceException serviceExp, + 2: exceptions.PBUserException userExp), + + list listActiveAccounts() throws (1: exceptions.PBServiceException serviceExp, + 2: exceptions.PBUserException userExp), + + // ManageProxy + bool addProxy(1: string proxyId, 2: common.ProxyData proxyData) throws (1: exceptions.PBServiceException serviceExp, + 2: exceptions.PBUserException userExp), + + bool suspendProxy(1: string proxyId) throws (1: exceptions.PBServiceException serviceExp, + 2: exceptions.PBUserException userExp), + + bool resumeProxy(1: string proxyId) throws (1: exceptions.PBServiceException serviceExp, + 2: exceptions.PBUserException userExp), + + bool removeProxy(1: string proxyId) throws (1: exceptions.PBServiceException serviceExp, + 2: exceptions.PBUserException userExp), + + list listActiveProxies() throws (1: exceptions.PBServiceException serviceExp, + 2: exceptions.PBUserException userExp), +} diff --git a/thrift_model/services/yt_management.thrift b/thrift_model/services/yt_management.thrift new file mode 100644 index 0000000..728c9db --- /dev/null +++ b/thrift_model/services/yt_management.thrift @@ -0,0 +1,27 @@ +namespace py pangramia.yt.management +namespace java com.pangramia.yt.management + +include "../data/common.thrift" +include "../data/exceptions.thrift" +include "base_service.thrift" + +// Service for managing the state of shared resources like proxies and accounts. +// This service is intended to be run as a single, authoritative instance. +service YTManagementService extends base_service.BaseService { + + // --- Proxy Management Methods --- + list getProxyStatus(1: optional string serverIdentity) throws (1: exceptions.PBServiceException serviceExp, 2: exceptions.PBUserException userExp), + bool banProxy(1: string proxyUrl, 2: string serverIdentity) throws (1: exceptions.PBServiceException serviceExp, 2: exceptions.PBUserException userExp), + bool unbanProxy(1: string proxyUrl, 2: string serverIdentity) throws (1: exceptions.PBServiceException serviceExp, 2: exceptions.PBUserException userExp), + bool resetAllProxyStatuses(1: string serverIdentity) throws (1: exceptions.PBServiceException serviceExp, 2: exceptions.PBUserException userExp), + bool banAllProxies(1: string serverIdentity) throws (1: exceptions.PBServiceException serviceExp, 2: exceptions.PBUserException userExp), + bool deleteProxyFromRedis(1: string proxyUrl, 2: string serverIdentity) throws (1: exceptions.PBServiceException serviceExp, 2: exceptions.PBUserException userExp), + i32 deleteAllProxiesFromRedis(1: optional string serverIdentity) throws (1: exceptions.PBServiceException serviceExp, 2: exceptions.PBUserException userExp), + + // --- Account Management Methods --- + list getAccountStatus(1: optional string accountId, 2: optional string accountPrefix) throws (1: exceptions.PBServiceException serviceExp, 2: exceptions.PBUserException userExp), + bool banAccount(1: string accountId, 2: optional string reason) throws (1: exceptions.PBServiceException serviceExp, 2: exceptions.PBUserException userExp), + bool unbanAccount(1: string accountId, 2: optional string reason) throws (1: exceptions.PBServiceException serviceExp, 2: exceptions.PBUserException userExp), + bool deleteAccountFromRedis(1: string accountId) throws (1: exceptions.PBServiceException serviceExp, 2: exceptions.PBUserException userExp), + i32 deleteAllAccountsFromRedis(1: optional string accountPrefix) throws (1: exceptions.PBServiceException serviceExp, 2: exceptions.PBUserException userExp) +} diff --git a/thrift_model/services/yt_tokens_ops.thrift b/thrift_model/services/yt_tokens_ops.thrift new file mode 100644 index 0000000..ad60712 --- /dev/null +++ b/thrift_model/services/yt_tokens_ops.thrift @@ -0,0 +1,50 @@ +namespace py pangramia.yt.tokens_ops +namespace java com.pangramia.yt.tokens_ops + +include "../data/common.thrift" +include "../data/exceptions.thrift" +include "yt_management.thrift" + +// The unified service that combines token operations and management functions. +// The server implementation will decide which functions are active based on its role. +service YTTokenOpService extends yt_management.YTManagementService { + + common.JobTokenData getOrRefreshTokenWithReport ( 1: string accountId, + 2: string oldUrl, + 3: common.JobState status, + 4: optional string details, + 5: optional string jobId, + 6: optional common.TokenUpdateMode updateType = common.TokenUpdateMode.AUTO, + 7: optional string url, + 8: optional string clients, + 9: optional common.AirflowLogContext airflowLogContext, + 10: optional string requestParamsJson) throws (1: exceptions.PBServiceException serviceExp, + 2: exceptions.PBUserException userExp) + + common.JobTokenData getOrRefreshToken ( 1: string accountId, + 2: optional common.TokenUpdateMode updateType = common.TokenUpdateMode.AUTO, + 3: optional string url, + 4: optional string clients, + 5: optional string machineId, + 6: optional common.AirflowLogContext airflowLogContext, + 7: optional string requestParamsJson, + 8: optional string assignedProxyUrl) throws (1: exceptions.PBServiceException serviceExp, + 2: exceptions.PBUserException userExp) + + common.JobTokenData getLatestToken (1: string accountId) throws (1: exceptions.PBServiceException serviceExp, + 2: exceptions.PBUserException userExp), + common.JobTokenData refreshToken ( 1: string accountId, + 2: optional common.TokenUpdateMode updateType = common.TokenUpdateMode.AUTO, + 3: optional string url ) throws (1: exceptions.PBServiceException serviceExp, + 2: exceptions.PBUserException userExp) + bool reportState( 1: string url, + 2: common.JobState status, + 3: optional string details, + 4: optional string jobId) throws (1: exceptions.PBServiceException serviceExp, + 2: exceptions.PBUserException userExp) + + // New method for direct info.json generation, bypassing Node.js token generation. + common.JobTokenData getInfoJsonDirect(1: string url, + 2: optional string clients) throws (1: exceptions.PBServiceException serviceExp, + 2: exceptions.PBUserException userExp) +} diff --git a/tools/generate-inventory.py b/tools/generate-inventory.py index 5ebf313..6c05933 100755 --- a/tools/generate-inventory.py +++ b/tools/generate-inventory.py @@ -110,11 +110,15 @@ def generate_group_vars(cluster_config, group_vars_dir): # Get master IP for Redis configuration master_ip = list(cluster_config['master'].values())[0]['ip'] + # Combine master and worker nodes to create a hostvars-like structure + all_nodes = {**cluster_config.get('master', {}), **cluster_config.get('workers', {})} + # Prepare data for YAML dump generated_data = { 'master_host_ip': master_ip, 'redis_port': 52909, - 'external_access_ips': external_ips if external_ips else [] + 'external_access_ips': external_ips if external_ips else [], + 'hostvars': all_nodes } generated_data.update(global_vars) diff --git a/tools/sync-to-tower.sh b/tools/sync-to-jump.sh similarity index 76% rename from tools/sync-to-tower.sh rename to tools/sync-to-jump.sh index 06a8e83..82e355b 100755 --- a/tools/sync-to-tower.sh +++ b/tools/sync-to-jump.sh @@ -1,10 +1,11 @@ #!/bin/bash # -# Syncs the project directory to a remote "tower" host for deployment orchestration. +# Syncs the project directory to a remote "jump" host for deployment orchestration. # # This script is designed to be run from the root of the project directory. -# It excludes generated files, local data, logs, and other non-essential files -# to ensure a clean copy of the source code and configuration templates is synced. +# It syncs essential project files like source code, DAGs, and Ansible playbooks, +# while excluding generated files, local data, logs, and other non-essential files +# to ensure a clean copy is deployed. set -e # Exit immediately if a command exits with a non-zero status. set -u # Treat unset variables as an error. @@ -13,9 +14,9 @@ set -u # Treat unset variables as an error. # IMPORTANT: Update these variables to match your environment. # # The remote host to sync to (e.g., user@hostname) -REMOTE_HOST="user@your-tower-host.com" +REMOTE_HOST="alex_p@af-jump" # The destination path on the remote host -REMOTE_PATH="/path/to/your/project" +REMOTE_PATH="/home/alex_p/yt-ops-services" # The root directory of the project on the local machine. SOURCE_DIR="." @@ -46,8 +47,11 @@ EXCLUDE_OPTS=( "--exclude=airflow/configs/envoy.yaml" "--exclude=airflow/configs/docker-compose.camoufox.yaml" "--exclude=airflow/configs/camoufox_endpoints.json" + "--exclude=cluster*.yml" # Exclude local development notes "--exclude=TODO-*.md" + # Exclude user-specific tools + "--exclude=*aider*" ) # The rsync command: @@ -55,7 +59,9 @@ EXCLUDE_OPTS=( # -v: verbose # -z: compress file data during the transfer # --delete: delete extraneous files from the destination directory -rsync -avz --delete \ +# --partial: keep partially transferred files +# --progress: show progress during transfer +rsync -avz --delete --partial --progress \ "${EXCLUDE_OPTS[@]}" \ "$SOURCE_DIR/" \ "$REMOTE_HOST:$REMOTE_PATH/" diff --git a/yt_ops_services/__pycache__/__init__.cpython-39.pyc b/yt_ops_services/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..441a91592d31a28d1d78abed195c0924db206d44 GIT binary patch literal 224 zcmYe~<>g`kf~NKD8G%6hF^GcCnxWmFZlc5m=B&RJ^{g~2duCl(b2Z@0~jx8)lwc5U6ad_HGyP7&S@rTkToR8S<}#523Zha z@v@LXUX`?wkvaMol^t&cC}r1HG+;&W)1ZG8$YaR{+wG65!ByE@>KF%i0tyDvGZ}ow zXIndpqV2bLhHx3tmGMwmuB)o%@pGQllpI^e z9e}x}Q{t}7<(0!)+Jr9GW#^t26A)^0ytDG!LwA)sB=j%hUxCf%)nnGm#~Zn1tymJ; zR%|2gwmKJRzTT&xPLKP+McT=-5>dNR_b;hTrKx8+=yZIc1EA4EJjMRu=fRgh02Jux zO7JMTa=EE`jAa03tl#2W++$+hU>>}Nr?f^AUt?es298+0 zwX+E@=rvj+2T4bc3TtzYfi=Jub=WrR*OQ^n(S=|O@1AWbc ztFF$PB~XZDZ-5j;Q)#e`6yo~Y+xv@)<)U6LNU(Tuw)o?0v3?o4YE)D#lgiU4^kY%8 zg0ot$jDO{Um%=FgS6S83XPh&xCh?{nmZ8nR0q=g>tu_hrT&cBDfnh$M^j?^)N1NqGLrg#Luk4fimC}vI2WdbumTx^c{U2f1e5(Ke literal 0 HcmV?d00001 diff --git a/yt_ops_services/__pycache__/version.cpython-39.pyc b/yt_ops_services/__pycache__/version.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1abbdff5e5b143ecfc44e2b2ef643f4546bf35c6 GIT binary patch literal 512 zcmYjOu};G<5Is9?8$k@3hNiGr6@oIb6seU!bcv$crK!?5vQ4E{?NoMtfT0=r zC9h2U0uz_0MLp}D@7?+C$#&Fe90A&G_j7Va0KN-yXcWl}y1qc*z)1o_;FLS>FeHL( zU`RO`fX{xg4!u!Ac_?u&?P&rF0j7fQ!6^~Orh%EZ%sfAH|w^Tu%^ zyf`g$nUYg6GM`Ho_^m9Z+`yp%nF6U