Updated current version of v1 and v2 dags, bin/ytops_client, ansible individual services
This commit is contained in:
parent
52a2d6290d
commit
f151ffee86
1
.gitignore
vendored
1
.gitignore
vendored
@ -1 +1,2 @@
|
|||||||
|
**/__pycache__/*
|
||||||
.aider*
|
.aider*
|
||||||
|
|||||||
@ -18,54 +18,95 @@ RUN apt-get update && \
|
|||||||
iputils-ping \
|
iputils-ping \
|
||||||
curl \
|
curl \
|
||||||
traceroute \
|
traceroute \
|
||||||
tcpdump && \
|
tcpdump \
|
||||||
|
unzip \
|
||||||
|
git && \
|
||||||
apt-get clean && \
|
apt-get clean && \
|
||||||
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* /usr/share/man /usr/share/doc /usr/share/doc-base
|
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* /usr/share/man /usr/share/doc /usr/share/doc-base
|
||||||
|
|
||||||
|
# Ensure the airflow user and group exist with the correct UID/GID and permissions.
|
||||||
|
# This is done early to allow `COPY --chown` to work correctly.
|
||||||
|
RUN if ! getent group airflow > /dev/null 2>&1; then \
|
||||||
|
groupadd -g 50000 airflow; \
|
||||||
|
fi && \
|
||||||
|
if ! id -u airflow > /dev/null 2>&1; then \
|
||||||
|
useradd -u 50000 -g 50000 -m -s /bin/bash airflow; \
|
||||||
|
else \
|
||||||
|
usermod -g 50000 airflow; \
|
||||||
|
fi && \
|
||||||
|
chown -R airflow:airflow /app && \
|
||||||
|
chmod -R g+w /app
|
||||||
|
|
||||||
# Download and install mc (MinIO client)
|
# Download and install mc (MinIO client)
|
||||||
RUN wget https://dl.min.io/client/mc/release/linux-amd64/mc -O /usr/local/bin/mc && \
|
RUN wget https://dl.min.io/client/mc/release/linux-amd64/mc -O /usr/local/bin/mc && \
|
||||||
chmod +x /usr/local/bin/mc
|
chmod +x /usr/local/bin/mc
|
||||||
|
|
||||||
# Download and install custom FFmpeg build from yt-dlp's recommended source
|
# Install FFmpeg
|
||||||
RUN FFMPEG_URL="https://github.com/yt-dlp/FFmpeg-Builds/releases/download/latest/ffmpeg-master-latest-linux64-gpl.tar.xz" && \
|
RUN FFMPEG_URL="https://github.com/yt-dlp/FFmpeg-Builds/releases/download/latest/ffmpeg-master-latest-linux64-gpl.tar.xz" && \
|
||||||
echo "Downloading FFmpeg from $FFMPEG_URL" && \
|
|
||||||
wget -qO /tmp/ffmpeg.tar.xz "$FFMPEG_URL" && \
|
wget -qO /tmp/ffmpeg.tar.xz "$FFMPEG_URL" && \
|
||||||
mkdir -p /opt/ffmpeg && \
|
mkdir -p /opt/ffmpeg && \
|
||||||
tar -xf /tmp/ffmpeg.tar.xz -C /opt/ffmpeg --strip-components=1 && \
|
tar -xf /tmp/ffmpeg.tar.xz -C /opt/ffmpeg --strip-components=1 && \
|
||||||
ln -sf /opt/ffmpeg/bin/ffmpeg /usr/local/bin/ffmpeg && \
|
ln -sf /opt/ffmpeg/bin/ffmpeg /usr/local/bin/ffmpeg && \
|
||||||
ln -sf /opt/ffmpeg/bin/ffprobe /usr/local/bin/ffprobe && \
|
ln -sf /opt/ffmpeg/bin/ffprobe /usr/local/bin/ffprobe && \
|
||||||
rm -rf /tmp/ffmpeg.tar.xz && \
|
rm -rf /tmp/ffmpeg.tar.xz
|
||||||
ffmpeg -version
|
|
||||||
|
|
||||||
# Check if airflow group exists, create it if it doesn't, then ensure proper setup
|
# Install yt-dlp from master
|
||||||
RUN if ! getent group airflow > /dev/null 2>&1; then \
|
# Temporarily rename pip to bypass the root check in the base image's pip wrapper,
|
||||||
groupadd -g 1001 airflow; \
|
# ensuring a system-wide installation.
|
||||||
fi && \
|
RUN mv /usr/local/bin/pip /usr/local/bin/pip.orig && \
|
||||||
# Check if airflow user exists and is in the airflow group
|
python3 -m pip install --no-cache-dir -U pip hatchling wheel && \
|
||||||
if id -u airflow > /dev/null 2>&1; then \
|
python3 -m pip install --no-cache-dir --force-reinstall "yt-dlp[default] @ https://github.com/yt-dlp/yt-dlp/archive/master.tar.gz" && \
|
||||||
usermod -a -G airflow airflow; \
|
chmod a+x "$(which yt-dlp)" && \
|
||||||
else \
|
mv /usr/local/bin/pip.orig /usr/local/bin/pip
|
||||||
useradd -u 1003 -g 1001 -m -s /bin/bash airflow; \
|
|
||||||
fi && \
|
|
||||||
chown -R airflow:airflow /app && \
|
|
||||||
chmod g+w /app
|
|
||||||
|
|
||||||
# Switch to airflow user for package installation
|
# Install Deno
|
||||||
USER airflow
|
RUN curl -fsSL https://github.com/denoland/deno/releases/latest/download/deno-x86_64-unknown-linux-gnu.zip -o deno.zip && \
|
||||||
|
unzip deno.zip && mv deno /usr/local/bin/ && rm deno.zip
|
||||||
|
|
||||||
# Install base Airflow dependencies
|
# Install aria2c and gost
|
||||||
|
RUN curl -fsSL https://raw.githubusercontent.com/P3TERX/aria2-builder/master/aria2-install.sh | bash
|
||||||
|
|
||||||
|
# Install gost (direct download of binary)
|
||||||
|
RUN wget -q https://github.com/ginuerzh/gost/releases/download/v2.12.0/gost_2.12.0_linux_amd64.tar.gz && \
|
||||||
|
tar -xzf gost_2.12.0_linux_amd64.tar.gz -C /usr/local/bin/ && \
|
||||||
|
rm gost_2.12.0_linux_amd64.tar.gz
|
||||||
|
|
||||||
|
# Verify installations
|
||||||
|
RUN ffmpeg -version && deno --version && yt-dlp --version && aria2c --version && gost -V
|
||||||
|
|
||||||
|
# Create version information files
|
||||||
|
RUN ( \
|
||||||
|
echo "--- yt-dlp ---" && \
|
||||||
|
yt-dlp --version && \
|
||||||
|
echo "" && \
|
||||||
|
echo "--- deno ---" && \
|
||||||
|
deno --version && \
|
||||||
|
echo "" && \
|
||||||
|
echo "--- ffmpeg ---" && \
|
||||||
|
ffmpeg -version | head -n 1 \
|
||||||
|
) > VERSION-airflow-latest.txt && \
|
||||||
|
cp VERSION-airflow-latest.txt VERSION-airflow-$(date +%Y%m%d-%H%M%S).txt
|
||||||
|
|
||||||
|
|
||||||
|
# Install base Airflow dependencies as root (system-wide)
|
||||||
# [FIX] Explicitly install a version of botocore compatible with Python 3.12
|
# [FIX] Explicitly install a version of botocore compatible with Python 3.12
|
||||||
# to fix a RecursionError when handling S3 remote logs.
|
# to fix a RecursionError when handling S3 remote logs.
|
||||||
RUN pip install --no-cache-dir \
|
# Temporarily rename pip to bypass the root check in the base image's pip wrapper.
|
||||||
|
RUN mv /usr/local/bin/pip /usr/local/bin/pip.orig && \
|
||||||
|
python3 -m pip install --no-cache-dir \
|
||||||
"apache-airflow==${AIRFLOW_VERSION}" \
|
"apache-airflow==${AIRFLOW_VERSION}" \
|
||||||
apache-airflow-providers-docker \
|
apache-airflow-providers-docker \
|
||||||
apache-airflow-providers-http \
|
apache-airflow-providers-http \
|
||||||
apache-airflow-providers-amazon \
|
apache-airflow-providers-amazon \
|
||||||
|
"apache-airflow-providers-celery>=3.3.0" \
|
||||||
|
apache-airflow-providers-redis \
|
||||||
"botocore>=1.34.118" \
|
"botocore>=1.34.118" \
|
||||||
psycopg2-binary \
|
psycopg2-binary \
|
||||||
"gunicorn==20.1.0" \
|
"gunicorn==20.1.0" \
|
||||||
"python-ffmpeg==2.0.12" \
|
"python-ffmpeg==2.0.12" \
|
||||||
"ffprobe3"
|
"ffprobe3" \
|
||||||
|
"python-dotenv" && \
|
||||||
|
mv /usr/local/bin/pip.orig /usr/local/bin/pip
|
||||||
|
|
||||||
# --- Install the custom yt_ops_services package ---
|
# --- Install the custom yt_ops_services package ---
|
||||||
# Copy all the necessary source code for the package.
|
# Copy all the necessary source code for the package.
|
||||||
@ -78,17 +119,24 @@ COPY --chown=airflow:airflow pangramia ./pangramia/
|
|||||||
|
|
||||||
# Install the package in editable mode. This runs setup.py and installs all dependencies
|
# Install the package in editable mode. This runs setup.py and installs all dependencies
|
||||||
# listed in `install_requires`, making the `yt_ops_services` module available everywhere.
|
# listed in `install_requires`, making the `yt_ops_services` module available everywhere.
|
||||||
RUN pip install --no-cache-dir -e .
|
# Bypass the pip root check again.
|
||||||
|
RUN mv /usr/local/bin/pip /usr/local/bin/pip.orig && \
|
||||||
|
python3 -m pip install --no-cache-dir -e . && \
|
||||||
|
mv /usr/local/bin/pip.orig /usr/local/bin/pip
|
||||||
|
|
||||||
# Copy token generator scripts and utils with correct permissions
|
# Copy token generator scripts and utils with correct permissions
|
||||||
# COPY --chown=airflow:airflow generate_tokens_direct.mjs ./
|
# COPY --chown=airflow:airflow generate_tokens_direct.mjs ./
|
||||||
# COPY --chown=airflow:airflow utils ./utils/
|
# COPY --chown=airflow:airflow utils ./utils/
|
||||||
# COPY --chown=airflow:airflow token_generator ./token_generator/
|
# COPY --chown=airflow:airflow token_generator ./token_generator/
|
||||||
|
|
||||||
# --- Always update yt-dlp to latest nightly on container start ---
|
# Ensure the home directory and all its contents are owned by the airflow user before switching to it.
|
||||||
# This is done in the entrypoint so every worker run uses the freshest build
|
# This fixes permission issues that can occur if previous RUN commands created files in /home/airflow as root.
|
||||||
COPY --chown=airflow:airflow update-yt-dlp.sh /usr/local/bin/update-yt-dlp.sh
|
# We also make it world-writable to accommodate running the container with a different user ID, which can
|
||||||
RUN chmod +x /usr/local/bin/update-yt-dlp.sh
|
# happen in some environments (e.g., OpenShift or with docker-compose user overrides).
|
||||||
|
RUN chown -R airflow:airflow /home/airflow && chmod -R 777 /home/airflow
|
||||||
|
|
||||||
|
# Switch to airflow user for all subsequent operations
|
||||||
|
USER airflow
|
||||||
|
|
||||||
# Expose bgutil plugin to worker path
|
# Expose bgutil plugin to worker path
|
||||||
ENV PYTHONPATH=/opt/bgutil-ytdlp-pot-provider/plugin:$PYTHONPATH
|
ENV PYTHONPATH=/opt/bgutil-ytdlp-pot-provider/plugin:$PYTHONPATH
|
||||||
|
|||||||
125
airflow/Dockerfile.old
Normal file
125
airflow/Dockerfile.old
Normal file
@ -0,0 +1,125 @@
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
FROM apache/airflow:2.10.3
|
||||||
|
ENV AIRFLOW_VERSION=2.10.3
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install system dependencies
|
||||||
|
USER root
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y --no-install-recommends \
|
||||||
|
vim \
|
||||||
|
mc \
|
||||||
|
jq \
|
||||||
|
build-essential \
|
||||||
|
python3-dev \
|
||||||
|
wget \
|
||||||
|
tar \
|
||||||
|
xz-utils \
|
||||||
|
iputils-ping \
|
||||||
|
curl \
|
||||||
|
traceroute \
|
||||||
|
tcpdump \
|
||||||
|
unzip \
|
||||||
|
git && \
|
||||||
|
apt-get clean && \
|
||||||
|
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* /usr/share/man /usr/share/doc /usr/share/doc-base
|
||||||
|
|
||||||
|
# Download and install mc (MinIO client)
|
||||||
|
RUN wget https://dl.min.io/client/mc/release/linux-amd64/mc -O /usr/local/bin/mc && \
|
||||||
|
chmod +x /usr/local/bin/mc
|
||||||
|
|
||||||
|
# Install FFmpeg
|
||||||
|
RUN FFMPEG_URL="https://github.com/yt-dlp/FFmpeg-Builds/releases/download/latest/ffmpeg-master-latest-linux64-gpl.tar.xz" && \
|
||||||
|
wget -qO /tmp/ffmpeg.tar.xz "$FFMPEG_URL" && \
|
||||||
|
mkdir -p /opt/ffmpeg && \
|
||||||
|
tar -xf /tmp/ffmpeg.tar.xz -C /opt/ffmpeg --strip-components=1 && \
|
||||||
|
ln -sf /opt/ffmpeg/bin/ffmpeg /usr/local/bin/ffmpeg && \
|
||||||
|
ln -sf /opt/ffmpeg/bin/ffprobe /usr/local/bin/ffprobe && \
|
||||||
|
rm -rf /tmp/ffmpeg.tar.xz
|
||||||
|
|
||||||
|
# Install yt-dlp from master
|
||||||
|
RUN python3 -m pip install -U pip hatchling wheel && \
|
||||||
|
python3 -m pip install --force-reinstall "yt-dlp[default] @ https://github.com/yt-dlp/yt-dlp/archive/master.tar.gz"
|
||||||
|
|
||||||
|
# Install Deno
|
||||||
|
RUN curl -fsSL https://github.com/denoland/deno/releases/latest/download/deno-x86_64-unknown-linux-gnu.zip -o deno.zip && \
|
||||||
|
unzip deno.zip && mv deno /usr/local/bin/ && rm deno.zip
|
||||||
|
|
||||||
|
# Install aria2c and gost
|
||||||
|
RUN curl -fsSL https://raw.githubusercontent.com/P3TERX/aria2-builder/master/aria2-install.sh | bash
|
||||||
|
|
||||||
|
# Install gost (direct download of binary)
|
||||||
|
RUN wget -q https://github.com/ginuerzh/gost/releases/download/v2.12.0/gost_2.12.0_linux_amd64.tar.gz && \
|
||||||
|
tar -xzf gost_2.12.0_linux_amd64.tar.gz -C /usr/local/bin/ && \
|
||||||
|
rm gost_2.12.0_linux_amd64.tar.gz
|
||||||
|
|
||||||
|
# Verify installations
|
||||||
|
RUN ffmpeg -version && deno --version && yt-dlp --version && aria2c --version && gost -V
|
||||||
|
|
||||||
|
# Check if airflow group exists, create it if it doesn't, then ensure proper setup
|
||||||
|
RUN if ! getent group airflow > /dev/null 2>&1; then \
|
||||||
|
groupadd -g 1001 airflow; \
|
||||||
|
fi && \
|
||||||
|
# Check if airflow user exists and is in the airflow group
|
||||||
|
if id -u airflow > /dev/null 2>&1; then \
|
||||||
|
usermod -a -G airflow airflow; \
|
||||||
|
else \
|
||||||
|
useradd -u 1003 -g 1001 -m -s /bin/bash airflow; \
|
||||||
|
fi && \
|
||||||
|
chown -R airflow:airflow /app && \
|
||||||
|
chmod g+w /app
|
||||||
|
|
||||||
|
# Install base Airflow dependencies
|
||||||
|
# [FIX] Explicitly install a version of botocore compatible with Python 3.12
|
||||||
|
# to fix a RecursionError when handling S3 remote logs.
|
||||||
|
RUN pip install --no-cache-dir \
|
||||||
|
"apache-airflow==${AIRFLOW_VERSION}" \
|
||||||
|
apache-airflow-providers-docker \
|
||||||
|
apache-airflow-providers-http \
|
||||||
|
apache-airflow-providers-amazon \
|
||||||
|
"botocore>=1.34.118" \
|
||||||
|
psycopg2-binary \
|
||||||
|
"gunicorn==20.1.0" \
|
||||||
|
"python-ffmpeg==2.0.12" \
|
||||||
|
"ffprobe3" \
|
||||||
|
"python-dotenv"
|
||||||
|
|
||||||
|
# Switch to airflow user for package installation
|
||||||
|
USER airflow
|
||||||
|
|
||||||
|
# --- Install the custom yt_ops_services package ---
|
||||||
|
# Copy all the necessary source code for the package.
|
||||||
|
# The deploy script ensures these files are in the build context.
|
||||||
|
COPY --chown=airflow:airflow setup.py ./
|
||||||
|
COPY --chown=airflow:airflow VERSION ./
|
||||||
|
COPY --chown=airflow:airflow yt_ops_services ./yt_ops_services/
|
||||||
|
COPY --chown=airflow:airflow thrift_model ./thrift_model/
|
||||||
|
COPY --chown=airflow:airflow pangramia ./pangramia/
|
||||||
|
|
||||||
|
# Install the package in editable mode. This runs setup.py and installs all dependencies
|
||||||
|
# listed in `install_requires`, making the `yt_ops_services` module available everywhere.
|
||||||
|
RUN pip install --no-cache-dir -e .
|
||||||
|
|
||||||
|
# Copy token generator scripts and utils with correct permissions
|
||||||
|
# COPY --chown=airflow:airflow generate_tokens_direct.mjs ./
|
||||||
|
# COPY --chown=airflow:airflow utils ./utils/
|
||||||
|
# COPY --chown=airflow:airflow token_generator ./token_generator/
|
||||||
|
|
||||||
|
# Create version information files
|
||||||
|
RUN ( \
|
||||||
|
echo "--- yt-dlp ---" && \
|
||||||
|
yt-dlp --version && \
|
||||||
|
echo "" && \
|
||||||
|
echo "--- deno ---" && \
|
||||||
|
deno --version && \
|
||||||
|
echo "" && \
|
||||||
|
echo "--- ffmpeg ---" && \
|
||||||
|
ffmpeg -version | head -n 1 \
|
||||||
|
) > VERSION-airflow-latest.txt && \
|
||||||
|
cp VERSION-airflow-latest.txt VERSION-airflow-$(date +%Y%m%d-%H%M%S).txt
|
||||||
|
|
||||||
|
# Expose bgutil plugin to worker path
|
||||||
|
ENV PYTHONPATH=/opt/bgutil-ytdlp-pot-provider/plugin:$PYTHONPATH
|
||||||
@ -62,6 +62,9 @@ RUN conda run -n camo pip install --no-cache-dir -r requirements.txt
|
|||||||
# Install Playwright browsers for version 1.49
|
# Install Playwright browsers for version 1.49
|
||||||
RUN conda run -n camo playwright install --with-deps
|
RUN conda run -n camo playwright install --with-deps
|
||||||
|
|
||||||
|
# Pre-download and cache Camoufox to speed up startup
|
||||||
|
RUN conda run -n camo camoufox fetch
|
||||||
|
|
||||||
# Copy the server script into the image
|
# Copy the server script into the image
|
||||||
COPY camoufox_server.py .
|
COPY camoufox_server.py .
|
||||||
|
|
||||||
|
|||||||
@ -14,7 +14,8 @@ def task_instance_mutation_hook(ti):
|
|||||||
to be set by the dispatcher DAG. This avoids database race conditions.
|
to be set by the dispatcher DAG. This avoids database race conditions.
|
||||||
"""
|
"""
|
||||||
logger.debug(f"MUTATION HOOK: Running for dag '{ti.dag_id}', task '{ti.task_id}'.")
|
logger.debug(f"MUTATION HOOK: Running for dag '{ti.dag_id}', task '{ti.task_id}'.")
|
||||||
if ti.dag_id == 'ytdlp_ops_worker_per_url':
|
# This hook targets all worker DAGs, which follow a naming convention.
|
||||||
|
if 'worker_per_url' in ti.dag_id:
|
||||||
# If the run_id isn't populated yet, just return. The hook may be called again.
|
# If the run_id isn't populated yet, just return. The hook may be called again.
|
||||||
if not ti.run_id:
|
if not ti.run_id:
|
||||||
logger.debug(f"MUTATION HOOK: run_id not yet available for task '{ti.task_id}'. Skipping this invocation.")
|
logger.debug(f"MUTATION HOOK: run_id not yet available for task '{ti.task_id}'. Skipping this invocation.")
|
||||||
@ -26,7 +27,8 @@ def task_instance_mutation_hook(ti):
|
|||||||
if ti.run_id and '_q_' in ti.run_id:
|
if ti.run_id and '_q_' in ti.run_id:
|
||||||
try:
|
try:
|
||||||
parsed_queue = ti.run_id.split('_q_')[-1]
|
parsed_queue = ti.run_id.split('_q_')[-1]
|
||||||
if parsed_queue.startswith('queue-dl-'):
|
# Check for valid v1 (dl) or v2 (auth/dl) queue prefixes.
|
||||||
|
if parsed_queue.startswith(('queue-dl-', 'queue-auth-')):
|
||||||
worker_queue = parsed_queue
|
worker_queue = parsed_queue
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"MUTATION HOOK: CRITICAL: Error parsing queue from run_id '{ti.run_id}': {e}.", exc_info=True)
|
logger.error(f"MUTATION HOOK: CRITICAL: Error parsing queue from run_id '{ti.run_id}': {e}.", exc_info=True)
|
||||||
@ -37,8 +39,9 @@ def task_instance_mutation_hook(ti):
|
|||||||
else:
|
else:
|
||||||
# If the queue is not found, it's a critical failure in the dispatching logic.
|
# If the queue is not found, it's a critical failure in the dispatching logic.
|
||||||
# We fall back to the default queue but log it as a high-severity warning.
|
# We fall back to the default queue but log it as a high-severity warning.
|
||||||
logger.warning(f"MUTATION HOOK: Could not find worker queue in run_id '{ti.run_id}'. Falling back to 'queue-dl'. Pinning will fail.")
|
fallback_queue = 'queue-auth' if 'auth' in ti.dag_id else 'queue-dl'
|
||||||
ti.queue = 'queue-dl'
|
logger.warning(f"MUTATION HOOK: Could not find worker queue in run_id '{ti.run_id}'. Falling back to '{fallback_queue}'. Pinning will fail.")
|
||||||
|
ti.queue = fallback_queue
|
||||||
|
|
||||||
|
|
||||||
# --- Hook Registration ---
|
# --- Hook Registration ---
|
||||||
|
|||||||
@ -14,7 +14,6 @@ x-airflow-common:
|
|||||||
# If you built a custom image for master, you need to push it to a registry
|
# If you built a custom image for master, you need to push it to a registry
|
||||||
# and reference it here.
|
# and reference it here.
|
||||||
image: ${AIRFLOW_IMAGE_NAME:-pangramia/ytdlp-ops-airflow:latest}
|
image: ${AIRFLOW_IMAGE_NAME:-pangramia/ytdlp-ops-airflow:latest}
|
||||||
build: .
|
|
||||||
# Add extra hosts here to allow workers to resolve other hosts by name.
|
# Add extra hosts here to allow workers to resolve other hosts by name.
|
||||||
# This section is auto-generated by Ansible from the inventory.
|
# This section is auto-generated by Ansible from the inventory.
|
||||||
extra_hosts:
|
extra_hosts:
|
||||||
@ -30,7 +29,7 @@ x-airflow-common:
|
|||||||
|
|
||||||
AIRFLOW__CORE__PARALLELISM: 128
|
AIRFLOW__CORE__PARALLELISM: 128
|
||||||
AIRFLOW__CORE__MAX_ACTIVE_TASKS_PER_DAG: 64
|
AIRFLOW__CORE__MAX_ACTIVE_TASKS_PER_DAG: 64
|
||||||
AIRFLOW__SCHEDULER__PARSING_PROCESSES: 4
|
AIRFLOW__SCHEDULER__PARSING_PROCESSES: 8
|
||||||
AIRFLOW__WEBSERVER__WORKERS: 5
|
AIRFLOW__WEBSERVER__WORKERS: 5
|
||||||
AIRFLOW__WEBSERVER__WORKER_CLASS: "gevent"
|
AIRFLOW__WEBSERVER__WORKER_CLASS: "gevent"
|
||||||
|
|
||||||
@ -75,21 +74,21 @@ x-airflow-common:
|
|||||||
- ${AIRFLOW_PROJ_DIR:-.}/downloadfiles:/opt/airflow/downloadfiles
|
- ${AIRFLOW_PROJ_DIR:-.}/downloadfiles:/opt/airflow/downloadfiles
|
||||||
- ${AIRFLOW_PROJ_DIR:-.}/addfiles:/opt/airflow/addfiles
|
- ${AIRFLOW_PROJ_DIR:-.}/addfiles:/opt/airflow/addfiles
|
||||||
- ${AIRFLOW_PROJ_DIR:-.}/inputfiles:/opt/airflow/inputfiles
|
- ${AIRFLOW_PROJ_DIR:-.}/inputfiles:/opt/airflow/inputfiles
|
||||||
|
# Mount the generated pangramia package to ensure workers have the latest version
|
||||||
|
- ${AIRFLOW_PROJ_DIR:-.}/pangramia:/app/pangramia
|
||||||
# Use AIRFLOW_UID from .env file to fix permission issues. GID is set to 0 for compatibility with the Airflow image.
|
# Use AIRFLOW_UID from .env file to fix permission issues. GID is set to 0 for compatibility with the Airflow image.
|
||||||
user: "${{ '{' }}AIRFLOW_UID:-50000{{ '}' }}:0"
|
user: "${{ '{' }}AIRFLOW_UID:-50000{{ '}' }}:0"
|
||||||
|
|
||||||
services:
|
services:
|
||||||
airflow-worker:
|
airflow-worker-dl:
|
||||||
<<: *airflow-common
|
<<: *airflow-common
|
||||||
container_name: airflow-dl-worker-1
|
container_name: airflow-worker-dl-1
|
||||||
hostname: ${HOSTNAME:-dl001}
|
hostname: ${HOSTNAME:-dl001}
|
||||||
# The worker now listens on the generic queue AND its own dedicated queue.
|
# The DL worker listens on the generic dl queue AND its own dedicated queue.
|
||||||
# The hostname is dynamically inserted into the queue name.
|
|
||||||
command: airflow celery worker -q queue-dl,queue-dl-${HOSTNAME:-dl001}
|
command: airflow celery worker -q queue-dl,queue-dl-${HOSTNAME:-dl001}
|
||||||
deploy:
|
deploy:
|
||||||
resources:
|
resources:
|
||||||
limits:
|
limits:
|
||||||
# Increased from 4G to 8G to support higher memory per child process.
|
|
||||||
memory: ${AIRFLOW_WORKER_DOWNLOAD_MEM_LIMIT:-8G}
|
memory: ${AIRFLOW_WORKER_DOWNLOAD_MEM_LIMIT:-8G}
|
||||||
reservations:
|
reservations:
|
||||||
memory: ${AIRFLOW_WORKER_DOWNLOAD_MEM_RESERV:-2G}
|
memory: ${AIRFLOW_WORKER_DOWNLOAD_MEM_RESERV:-2G}
|
||||||
@ -103,26 +102,18 @@ services:
|
|||||||
start_period: 30s
|
start_period: 30s
|
||||||
environment:
|
environment:
|
||||||
<<: *airflow-common-env
|
<<: *airflow-common-env
|
||||||
HOSTNAME: ${HOSTNAME:-dl001} # Explicitly set inside container
|
HOSTNAME: ${HOSTNAME:-dl001}
|
||||||
DUMB_INIT_SETSID: "0"
|
DUMB_INIT_SETSID: "0"
|
||||||
AIRFLOW__CELERY__WORKER_QUEUES: "queue-dl,queue-dl-${HOSTNAME:-dl001}"
|
AIRFLOW__CELERY__WORKER_QUEUES: "queue-dl,queue-dl-${HOSTNAME:-dl001}"
|
||||||
AIRFLOW__CELERY__WORKER_TAGS: "dl"
|
AIRFLOW__CELERY__WORKER_TAGS: "dl"
|
||||||
AIRFLOW__CELERY__WORKER_PREFETCH_MULTIPLIER: "1"
|
AIRFLOW__CELERY__WORKER_PREFETCH_MULTIPLIER: "1"
|
||||||
# Use autoscaling to adjust number of workers based on load.
|
AIRFLOW__CELERY__WORKER_AUTOSCALE: "16,8"
|
||||||
# Format is max_concurrency,min_concurrency.
|
|
||||||
AIRFLOW__CELERY__WORKER_AUTOSCALE: "16,4"
|
|
||||||
# Use prefork pool for better compatibility with blocking libraries.
|
|
||||||
AIRFLOW__CELERY__POOL: "prefork"
|
AIRFLOW__CELERY__POOL: "prefork"
|
||||||
AIRFLOW__CELERY__TASK_ACKS_LATE: "False"
|
AIRFLOW__CELERY__TASK_ACKS_LATE: "False"
|
||||||
AIRFLOW__CELERY__OPERATION_TIMEOUT: "2.0"
|
AIRFLOW__CELERY__OPERATION_TIMEOUT: "2.0"
|
||||||
AIRFLOW__CELERY__WORKER_NAME: "worker-dl@%h"
|
AIRFLOW__CELERY__WORKER_NAME: "worker-dl@%h"
|
||||||
AIRFLOW__CELERY__WORKER_MAX_TASKS_PER_CHILD: "100"
|
AIRFLOW__CELERY__WORKER_MAX_TASKS_PER_CHILD: "100"
|
||||||
# Increased from 256MB to 512MB for memory-intensive yt-dlp tasks.
|
|
||||||
# This value is in KB. 512 * 1024 = 524288.
|
|
||||||
AIRFLOW__CELERY__WORKER_MAX_MEMORY_PER_CHILD: "524288" # 512MB
|
AIRFLOW__CELERY__WORKER_MAX_MEMORY_PER_CHILD: "524288" # 512MB
|
||||||
# The hostname is now managed by Docker Compose to ensure uniqueness when scaling.
|
|
||||||
# It will be generated based on project, service, and replica number (e.g., airflow-airflow-dl-worker-1).
|
|
||||||
# hostname: "dl-worker-${HOSTNAME_SUFFIX:-$$(hostname)}"
|
|
||||||
ports:
|
ports:
|
||||||
- "8793:8793"
|
- "8793:8793"
|
||||||
networks:
|
networks:
|
||||||
@ -130,6 +121,46 @@ services:
|
|||||||
- proxynet
|
- proxynet
|
||||||
restart: always
|
restart: always
|
||||||
|
|
||||||
|
airflow-worker-auth:
|
||||||
|
<<: *airflow-common
|
||||||
|
container_name: airflow-worker-auth-1
|
||||||
|
hostname: ${HOSTNAME:-auth001}
|
||||||
|
# The Auth worker listens on the generic auth queue AND its own dedicated queue.
|
||||||
|
command: airflow celery worker -q queue-auth,queue-auth-${HOSTNAME:-auth001}
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: ${AIRFLOW_WORKER_AUTH_MEM_LIMIT:-4G}
|
||||||
|
reservations:
|
||||||
|
memory: ${AIRFLOW_WORKER_AUTH_MEM_RESERV:-1G}
|
||||||
|
healthcheck:
|
||||||
|
test:
|
||||||
|
- "CMD-SHELL"
|
||||||
|
- 'celery --app airflow.providers.celery.executors.celery_executor.app inspect ping -d "worker-auth@$$(hostname)"'
|
||||||
|
interval: 30s
|
||||||
|
timeout: 30s
|
||||||
|
retries: 5
|
||||||
|
start_period: 30s
|
||||||
|
environment:
|
||||||
|
<<: *airflow-common-env
|
||||||
|
HOSTNAME: ${HOSTNAME:-auth001}
|
||||||
|
DUMB_INIT_SETSID: "0"
|
||||||
|
AIRFLOW__CELERY__WORKER_QUEUES: "queue-auth,queue-auth-${HOSTNAME:-auth001}"
|
||||||
|
AIRFLOW__CELERY__WORKER_TAGS: "auth"
|
||||||
|
AIRFLOW__CELERY__WORKER_PREFETCH_MULTIPLIER: "1"
|
||||||
|
# Auth tasks are less resource intensive but we want fewer of them to avoid service overload.
|
||||||
|
AIRFLOW__CELERY__WORKER_AUTOSCALE: "2,1"
|
||||||
|
AIRFLOW__CELERY__POOL: "prefork"
|
||||||
|
AIRFLOW__CELERY__TASK_ACKS_LATE: "False"
|
||||||
|
AIRFLOW__CELERY__OPERATION_TIMEOUT: "2.0"
|
||||||
|
AIRFLOW__CELERY__WORKER_NAME: "worker-auth@%h"
|
||||||
|
AIRFLOW__CELERY__WORKER_MAX_TASKS_PER_CHILD: "100"
|
||||||
|
AIRFLOW__CELERY__WORKER_MAX_MEMORY_PER_CHILD: "262144" # 256MB
|
||||||
|
networks:
|
||||||
|
- default
|
||||||
|
- proxynet
|
||||||
|
restart: always
|
||||||
|
|
||||||
docker-socket-proxy:
|
docker-socket-proxy:
|
||||||
profiles:
|
profiles:
|
||||||
- disabled
|
- disabled
|
||||||
|
|||||||
151
airflow/configs/docker-compose-dl.yaml.v1.j2
Normal file
151
airflow/configs/docker-compose-dl.yaml.v1.j2
Normal file
@ -0,0 +1,151 @@
|
|||||||
|
# Airflow remote DL worker configuration.
|
||||||
|
# This file should be used on a remote machine to run a download worker.
|
||||||
|
# It requires a master Airflow instance running with services exposed.
|
||||||
|
#
|
||||||
|
# Before running, create a .env file in this directory with:
|
||||||
|
# MASTER_HOST_IP=... a.b.c.d ... # IP address of the machine running docker-compose-master.yaml
|
||||||
|
# POSTGRES_PASSWORD=... # The password for the PostgreSQL database from the master compose file
|
||||||
|
# REDIS_PASSWORD=... # The password for Redis from the master compose file
|
||||||
|
# AIRFLOW_UID=... # User ID for file permissions, should match master
|
||||||
|
---
|
||||||
|
x-airflow-common:
|
||||||
|
&airflow-common
|
||||||
|
# This should point to the same image used by the master.
|
||||||
|
# If you built a custom image for master, you need to push it to a registry
|
||||||
|
# and reference it here.
|
||||||
|
image: ${AIRFLOW_IMAGE_NAME:-pangramia/ytdlp-ops-airflow:latest}
|
||||||
|
# Add extra hosts here to allow workers to resolve other hosts by name.
|
||||||
|
# This section is auto-generated by Ansible from the inventory.
|
||||||
|
extra_hosts:
|
||||||
|
{% for host in groups['all'] %}
|
||||||
|
- "{{ hostvars[host]['inventory_hostname'] }}:{{ hostvars[host]['ansible_host'] | default(hostvars[host]['inventory_hostname']) }}"
|
||||||
|
{% endfor %}
|
||||||
|
env_file:
|
||||||
|
# The .env file is located in the project root (e.g., /srv/airflow_dl_worker),
|
||||||
|
# so we provide an absolute path to it.
|
||||||
|
- "{{ airflow_worker_dir }}/.env"
|
||||||
|
environment:
|
||||||
|
&airflow-common-env
|
||||||
|
|
||||||
|
AIRFLOW__CORE__PARALLELISM: 128
|
||||||
|
AIRFLOW__CORE__MAX_ACTIVE_TASKS_PER_DAG: 64
|
||||||
|
AIRFLOW__SCHEDULER__PARSING_PROCESSES: 8
|
||||||
|
AIRFLOW__WEBSERVER__WORKERS: 5
|
||||||
|
AIRFLOW__WEBSERVER__WORKER_CLASS: "gevent"
|
||||||
|
|
||||||
|
AIRFLOW__LOGGING__SECRET_MASK_EXCEPTION_ARGS: False
|
||||||
|
|
||||||
|
|
||||||
|
# Prevent slow webserver when low memory?
|
||||||
|
GUNICORN_CMD_ARGS: --max-requests 20 --max-requests-jitter 3 --worker-tmp-dir /dev/shm
|
||||||
|
|
||||||
|
|
||||||
|
# Airflow Core
|
||||||
|
AIRFLOW__CORE__EXECUTOR: CeleryExecutor
|
||||||
|
AIRFLOW__CORE__LOAD_EXAMPLES: 'false'
|
||||||
|
AIRFLOW__CORE__FERNET_KEY: '' # Should be same as master, but worker does not need it.
|
||||||
|
|
||||||
|
# Backend connections - These should point to the master node
|
||||||
|
# Set MASTER_HOST_IP, POSTGRES_PASSWORD, and REDIS_PASSWORD in your .env file
|
||||||
|
AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:${{ '{' }}POSTGRES_PASSWORD{{ '}' }}@${{ '{' }}MASTER_HOST_IP{{ '}' }}:{{ postgres_port }}/airflow
|
||||||
|
AIRFLOW__CELERY__RESULT_BACKEND: db+postgresql+psycopg2://airflow:${{ '{' }}POSTGRES_PASSWORD{{ '}' }}@${{ '{' }}MASTER_HOST_IP{{ '}' }}:{{ postgres_port }}/airflow
|
||||||
|
AIRFLOW__CELERY__BROKER_URL: redis://:${REDIS_PASSWORD}@${MASTER_HOST_IP}:{{ redis_port }}/0
|
||||||
|
|
||||||
|
# Remote Logging - connection is configured directly via environment variables
|
||||||
|
#_PIP_ADDITIONAL_REQUIREMENTS: ${{ '{' }}_PIP_ADDITIONAL_REQUIREMENTS:- apache-airflow-providers-docker apache-airflow-providers-http thrift>=0.16.0,<=0.20.0 backoff>=2.2.1 python-dotenv==1.0.1 psutil>=5.9.0 apache-airflow-providers-amazon{{ '}' }}
|
||||||
|
AIRFLOW__LOGGING__REMOTE_LOGGING: "True"
|
||||||
|
AIRFLOW__LOGGING__REMOTE_BASE_LOG_FOLDER: "s3://airflow-logs"
|
||||||
|
AIRFLOW__LOGGING__REMOTE_LOG_CONN_ID: minio_default
|
||||||
|
AIRFLOW__LOGGING__ENCRYPT_S3_LOGS: "False"
|
||||||
|
#AIRFLOW__LOGGING__LOG_ID_TEMPLATE: "{dag_id}-{task_id}-{run_id}-{try_number}"
|
||||||
|
AIRFLOW__WEBSERVER__SECRET_KEY: 'qmALu5JCAW0518WGAqkVZQ=='
|
||||||
|
AIRFLOW__CORE__INTERNAL_API_SECRET_KEY: 'qmALu5JCAW0518WGAqkVZQ=='
|
||||||
|
AIRFLOW__CORE__LOCAL_SETTINGS_PATH: "/opt/airflow/config/custom_task_hooks.py"
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
# Mount dags to get any utility scripts, but the worker will pull the DAG from the DB
|
||||||
|
- ${AIRFLOW_PROJ_DIR:-.}/dags:/opt/airflow/dags
|
||||||
|
# Mount logs locally in case remote logging fails
|
||||||
|
- ${AIRFLOW_PROJ_DIR:-.}/logs:/opt/airflow/logs
|
||||||
|
# Mount config for local settings and other configurations
|
||||||
|
- ${AIRFLOW_PROJ_DIR:-.}/config:/opt/airflow/config
|
||||||
|
- ${AIRFLOW_PROJ_DIR:-.}/config/airflow.cfg:/opt/airflow/airflow.cfg
|
||||||
|
# Mount download directories
|
||||||
|
- ${AIRFLOW_PROJ_DIR:-.}/downloadfiles:/opt/airflow/downloadfiles
|
||||||
|
- ${AIRFLOW_PROJ_DIR:-.}/addfiles:/opt/airflow/addfiles
|
||||||
|
- ${AIRFLOW_PROJ_DIR:-.}/inputfiles:/opt/airflow/inputfiles
|
||||||
|
# Mount the generated pangramia package to ensure workers have the latest version
|
||||||
|
- ${AIRFLOW_PROJ_DIR:-.}/pangramia:/app/pangramia
|
||||||
|
# Use AIRFLOW_UID from .env file to fix permission issues. GID is set to 0 for compatibility with the Airflow image.
|
||||||
|
user: "${{ '{' }}AIRFLOW_UID:-50000{{ '}' }}:0"
|
||||||
|
|
||||||
|
services:
|
||||||
|
airflow-worker:
|
||||||
|
<<: *airflow-common
|
||||||
|
container_name: airflow-dl-worker-1
|
||||||
|
hostname: ${HOSTNAME:-dl001}
|
||||||
|
# The worker now listens on the generic queue AND its own dedicated queue.
|
||||||
|
# The hostname is dynamically inserted into the queue name.
|
||||||
|
command: airflow celery worker -q queue-dl,queue-dl-${HOSTNAME:-dl001}
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
# Increased from 4G to 8G to support higher memory per child process.
|
||||||
|
memory: ${AIRFLOW_WORKER_DOWNLOAD_MEM_LIMIT:-8G}
|
||||||
|
reservations:
|
||||||
|
memory: ${AIRFLOW_WORKER_DOWNLOAD_MEM_RESERV:-2G}
|
||||||
|
healthcheck:
|
||||||
|
test:
|
||||||
|
- "CMD-SHELL"
|
||||||
|
- 'celery --app airflow.providers.celery.executors.celery_executor.app inspect ping -d "worker-dl@$$(hostname)"'
|
||||||
|
interval: 30s
|
||||||
|
timeout: 30s
|
||||||
|
retries: 5
|
||||||
|
start_period: 30s
|
||||||
|
environment:
|
||||||
|
<<: *airflow-common-env
|
||||||
|
HOSTNAME: ${HOSTNAME:-dl001} # Explicitly set inside container
|
||||||
|
DUMB_INIT_SETSID: "0"
|
||||||
|
AIRFLOW__CELERY__WORKER_QUEUES: "queue-dl,queue-dl-${HOSTNAME:-dl001}"
|
||||||
|
AIRFLOW__CELERY__WORKER_TAGS: "dl"
|
||||||
|
AIRFLOW__CELERY__WORKER_PREFETCH_MULTIPLIER: "1"
|
||||||
|
# Use autoscaling to adjust number of workers based on load.
|
||||||
|
# Format is max_concurrency,min_concurrency.
|
||||||
|
AIRFLOW__CELERY__WORKER_AUTOSCALE: "16,8"
|
||||||
|
# Use prefork pool for better compatibility with blocking libraries.
|
||||||
|
AIRFLOW__CELERY__POOL: "prefork"
|
||||||
|
AIRFLOW__CELERY__TASK_ACKS_LATE: "False"
|
||||||
|
AIRFLOW__CELERY__OPERATION_TIMEOUT: "2.0"
|
||||||
|
AIRFLOW__CELERY__WORKER_NAME: "worker-dl@%h"
|
||||||
|
AIRFLOW__CELERY__WORKER_MAX_TASKS_PER_CHILD: "100"
|
||||||
|
# Increased from 256MB to 512MB for memory-intensive yt-dlp tasks.
|
||||||
|
# This value is in KB. 512 * 1024 = 524288.
|
||||||
|
AIRFLOW__CELERY__WORKER_MAX_MEMORY_PER_CHILD: "524288" # 512MB
|
||||||
|
# The hostname is now managed by Docker Compose to ensure uniqueness when scaling.
|
||||||
|
# It will be generated based on project, service, and replica number (e.g., airflow-airflow-dl-worker-1).
|
||||||
|
# hostname: "dl-worker-${HOSTNAME_SUFFIX:-$$(hostname)}"
|
||||||
|
ports:
|
||||||
|
- "8793:8793"
|
||||||
|
networks:
|
||||||
|
- default
|
||||||
|
- proxynet
|
||||||
|
restart: always
|
||||||
|
|
||||||
|
docker-socket-proxy:
|
||||||
|
profiles:
|
||||||
|
- disabled
|
||||||
|
image: tecnativa/docker-socket-proxy:0.1.1
|
||||||
|
environment:
|
||||||
|
CONTAINERS: 1
|
||||||
|
IMAGES: 1
|
||||||
|
AUTH: 1
|
||||||
|
POST: 1
|
||||||
|
privileged: true
|
||||||
|
volumes:
|
||||||
|
- /var/run/docker.sock:/var/run/docker.sock:ro
|
||||||
|
restart: always
|
||||||
|
|
||||||
|
networks:
|
||||||
|
proxynet:
|
||||||
|
name: airflow_proxynet
|
||||||
|
external: true
|
||||||
@ -112,6 +112,8 @@ x-airflow-common:
|
|||||||
- ${{ '{' }}AIRFLOW_PROJ_DIR:-.{{ '}' }}/downloadfiles:/opt/airflow/downloadfiles
|
- ${{ '{' }}AIRFLOW_PROJ_DIR:-.{{ '}' }}/downloadfiles:/opt/airflow/downloadfiles
|
||||||
- ${{ '{' }}AIRFLOW_PROJ_DIR:-.{{ '}' }}/addfiles:/opt/airflow/addfiles
|
- ${{ '{' }}AIRFLOW_PROJ_DIR:-.{{ '}' }}/addfiles:/opt/airflow/addfiles
|
||||||
- ${{ '{' }}AIRFLOW_PROJ_DIR:-.{{ '}' }}/inputfiles:/opt/airflow/inputfiles
|
- ${{ '{' }}AIRFLOW_PROJ_DIR:-.{{ '}' }}/inputfiles:/opt/airflow/inputfiles
|
||||||
|
# Mount the generated pangramia package to ensure master services have the latest version
|
||||||
|
- ${{ '{' }}AIRFLOW_PROJ_DIR:-.{{ '}' }}/pangramia:/app/pangramia
|
||||||
user: "${{ '{' }}AIRFLOW_UID:-50000{{ '}' }}:0"
|
user: "${{ '{' }}AIRFLOW_UID:-50000{{ '}' }}:0"
|
||||||
depends_on:
|
depends_on:
|
||||||
&airflow-common-depends-on
|
&airflow-common-depends-on
|
||||||
@ -142,7 +144,7 @@ services:
|
|||||||
volumes:
|
volumes:
|
||||||
- ./postgres-data:/var/lib/postgresql/data
|
- ./postgres-data:/var/lib/postgresql/data
|
||||||
ports:
|
ports:
|
||||||
- "{{ postgres_port }}:5432"
|
- "${{ '{' }}POSTGRES_PORT:-5432{{ '}' }}:5432"
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD", "pg_isready", "-U", "airflow"]
|
test: ["CMD", "pg_isready", "-U", "airflow"]
|
||||||
interval: 10s
|
interval: 10s
|
||||||
@ -179,7 +181,7 @@ services:
|
|||||||
expose:
|
expose:
|
||||||
- 6379
|
- 6379
|
||||||
ports:
|
ports:
|
||||||
- "{{ redis_port }}:6379"
|
- "${{ '{' }}REDIS_PORT:-6379{{ '}' }}:6379"
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD", "redis-cli", "-a", "${{ '{' }}REDIS_PASSWORD:-rOhTAIlTFFylXsjhqwxnYxDChFc{{ '}' }}", "ping"]
|
test: ["CMD", "redis-cli", "-a", "${{ '{' }}REDIS_PASSWORD:-rOhTAIlTFFylXsjhqwxnYxDChFc{{ '}' }}", "ping"]
|
||||||
interval: 10s
|
interval: 10s
|
||||||
@ -405,6 +407,20 @@ services:
|
|||||||
airflow-init:
|
airflow-init:
|
||||||
condition: service_completed_successfully
|
condition: service_completed_successfully
|
||||||
|
|
||||||
|
airflow-regression-runner:
|
||||||
|
<<: *airflow-common
|
||||||
|
entrypoint: ""
|
||||||
|
container_name: airflow-regression-runner
|
||||||
|
command: ["tail", "-f", "/dev/null"]
|
||||||
|
hostname: ${{ '{' }}HOSTNAME{{ '}' }}
|
||||||
|
environment:
|
||||||
|
<<: *airflow-common-env
|
||||||
|
restart: always
|
||||||
|
depends_on:
|
||||||
|
<<: *airflow-common-depends-on
|
||||||
|
airflow-init:
|
||||||
|
condition: service_completed_successfully
|
||||||
|
|
||||||
airflow-init:
|
airflow-init:
|
||||||
<<: *airflow-common
|
<<: *airflow-common
|
||||||
depends_on:
|
depends_on:
|
||||||
|
|||||||
@ -8,6 +8,34 @@ include:
|
|||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
services:
|
services:
|
||||||
|
bgutil-provider:
|
||||||
|
image: brainicism/bgutil-ytdlp-pot-provider
|
||||||
|
container_name: bgutil-provider
|
||||||
|
init: true
|
||||||
|
ports:
|
||||||
|
- "4416:4416"
|
||||||
|
restart: unless-stopped
|
||||||
|
networks:
|
||||||
|
- proxynet
|
||||||
|
|
||||||
|
context-prepper:
|
||||||
|
image: busybox:latest
|
||||||
|
restart: "no"
|
||||||
|
volumes:
|
||||||
|
- ./context:/app/context
|
||||||
|
networks:
|
||||||
|
- proxynet
|
||||||
|
command:
|
||||||
|
- "/bin/sh"
|
||||||
|
- "-c"
|
||||||
|
- |
|
||||||
|
set -e
|
||||||
|
CONTEXT_BASE_DIR="/app/context"
|
||||||
|
TIMESTAMP_DIR="$${CONTEXT_BASE_DIR}/context-data_$$(date +%Y%m%d_%H%M%S)"
|
||||||
|
mkdir -p "$${TIMESTAMP_DIR}"
|
||||||
|
ln -sfn "$${TIMESTAMP_DIR}" "$${CONTEXT_BASE_DIR}/context-data"
|
||||||
|
echo "Context prepper finished. Data will be in: $${TIMESTAMP_DIR}"
|
||||||
|
|
||||||
envoy:
|
envoy:
|
||||||
image: envoyproxy/envoy:v1.29-latest
|
image: envoyproxy/envoy:v1.29-latest
|
||||||
{% if service_role != 'management' %}
|
{% if service_role != 'management' %}
|
||||||
@ -35,16 +63,30 @@ services:
|
|||||||
# container_name is omitted; Docker will use the service name for DNS.
|
# container_name is omitted; Docker will use the service name for DNS.
|
||||||
# This service depends on the camoufox-group service, which ensures all camoufox
|
# This service depends on the camoufox-group service, which ensures all camoufox
|
||||||
# instances are started before this service.
|
# instances are started before this service.
|
||||||
{% if service_role is defined and service_role != 'management' %}
|
|
||||||
depends_on:
|
depends_on:
|
||||||
- camoufox-group
|
context-prepper:
|
||||||
|
condition: service_completed_successfully
|
||||||
|
{% if service_role is defined and service_role != 'management' %}
|
||||||
|
camoufox-group:
|
||||||
|
condition: service_started
|
||||||
{% endif %}
|
{% endif %}
|
||||||
# Ports are no longer exposed directly. Envoy will connect to them on the internal network.
|
# Ports are no longer exposed directly. Envoy will connect to them on the internal network.
|
||||||
|
# entrypoint:
|
||||||
|
# - /bin/sh
|
||||||
|
# - -c
|
||||||
|
# - |
|
||||||
|
# set -e
|
||||||
|
# echo "[$(date)] Updating yt-dlp to latest nightly master..."
|
||||||
|
# python3 -m pip install -U --pre "yt-dlp[default]" --upgrade-strategy eager --force-reinstall --no-cache-dir
|
||||||
|
# echo "[$(date)] yt-dlp updated to:"
|
||||||
|
# yt-dlp --version
|
||||||
|
# echo "[$(date)] Starting original entrypoint..."
|
||||||
|
# exec /usr/local/bin/docker-entrypoint.sh "$$@"
|
||||||
env_file:
|
env_file:
|
||||||
- ./.env # Path is relative to the project directory
|
- ./.env # Path is relative to the project directory
|
||||||
volumes:
|
volumes:
|
||||||
- context-data:/app/context-data
|
- ./context:/app/context
|
||||||
- ./logs/communication_logs:/app/communication_logs
|
- ./logs/yt-dlp-ops/communication_logs:/app/logs/yt-dlp-ops/communication_logs
|
||||||
{% if service_role != 'management' %}
|
{% if service_role != 'management' %}
|
||||||
# Mount the generated endpoints file to make it available to the server
|
# Mount the generated endpoints file to make it available to the server
|
||||||
- ./configs/camoufox_endpoints.json:/app/config/camoufox_endpoints.json:ro
|
- ./configs/camoufox_endpoints.json:/app/config/camoufox_endpoints.json:ro
|
||||||
@ -72,19 +114,24 @@ services:
|
|||||||
- "${REDIS_PORT:-52909}"
|
- "${REDIS_PORT:-52909}"
|
||||||
- "--redis-password"
|
- "--redis-password"
|
||||||
- "${REDIS_PASSWORD}"
|
- "${REDIS_PASSWORD}"
|
||||||
- "--account-active-duration-min"
|
|
||||||
- "${ACCOUNT_ACTIVE_DURATION_MIN:-30}"
|
|
||||||
- "--account-cooldown-duration-min"
|
|
||||||
- "${ACCOUNT_COOLDOWN_DURATION_MIN:-60}"
|
|
||||||
- "--service-role"
|
- "--service-role"
|
||||||
- "{{ service_role }}"
|
- "{{ service_role }}"
|
||||||
|
|
||||||
|
# --- S3 Logging Parameters ---
|
||||||
|
- "--s3-endpoint-url"
|
||||||
|
- "${S3_ENDPOINT_URL}"
|
||||||
|
- "--s3-access-key-id"
|
||||||
|
- "${S3_ACCESS_KEY_ID}"
|
||||||
|
- "--s3-secret-access-key"
|
||||||
|
- "${S3_SECRET_ACCESS_KEY}"
|
||||||
|
- "--s3-region-name"
|
||||||
|
- "${S3_REGION_NAME}"
|
||||||
{% if service_role is defined and service_role != 'management' %}
|
{% if service_role is defined and service_role != 'management' %}
|
||||||
# --- Parameters for worker/all-in-one roles ONLY ---
|
# --- Parameters for worker/all-in-one roles ONLY ---
|
||||||
- "--script-dir"
|
- "--script-dir"
|
||||||
- "/app"
|
- "/app"
|
||||||
- "--context-dir"
|
- "--context-dir"
|
||||||
- "/app/context-data"
|
- "/app/context/context-data"
|
||||||
- "--clean-context-dir"
|
- "--clean-context-dir"
|
||||||
- "--clients"
|
- "--clients"
|
||||||
- "${YT_CLIENTS:-web,mweb,ios,android}"
|
- "${YT_CLIENTS:-web,mweb,ios,android}"
|
||||||
@ -94,13 +141,13 @@ services:
|
|||||||
- "/app/config/camoufox_endpoints.json"
|
- "/app/config/camoufox_endpoints.json"
|
||||||
- "--print-tokens"
|
- "--print-tokens"
|
||||||
- "--stop-if-no-proxy"
|
- "--stop-if-no-proxy"
|
||||||
|
- "--comms-log-root-dir"
|
||||||
|
- "/app/logs/yt-dlp-ops/communication_logs"
|
||||||
|
- "--bgutils-no-innertube"
|
||||||
{% endif %}
|
{% endif %}
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
pull_policy: always
|
pull_policy: always
|
||||||
|
|
||||||
volumes:
|
|
||||||
context-data:
|
|
||||||
|
|
||||||
networks:
|
networks:
|
||||||
proxynet:
|
proxynet:
|
||||||
name: airflow_proxynet
|
name: airflow_proxynet
|
||||||
|
|||||||
636
airflow/dags/scripts/regression.py
Normal file
636
airflow/dags/scripts/regression.py
Normal file
@ -0,0 +1,636 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Regression testing script for the ytdlp-ops system.
|
||||||
|
|
||||||
|
This script orchestrates a regression test by:
|
||||||
|
1. Populating a Redis queue with video URLs from an input file.
|
||||||
|
2. Triggering the `ytdlp_ops_orchestrator` Airflow DAG to start processing.
|
||||||
|
3. Monitoring the progress of the processing for a specified duration.
|
||||||
|
4. Generating a report of any failures.
|
||||||
|
5. Optionally cleaning up the Redis queues after the test.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import csv
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import requests
|
||||||
|
import subprocess
|
||||||
|
import signal
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import redis
|
||||||
|
from tabulate import tabulate
|
||||||
|
|
||||||
|
# It's safe to import these as the script runs in the same container as Airflow
|
||||||
|
# where the yt_ops_services package is installed.
|
||||||
|
try:
|
||||||
|
from yt_ops_services.client_utils import get_thrift_client, format_timestamp
|
||||||
|
from pangramia.yt.exceptions.ttypes import PBServiceException, PBUserException
|
||||||
|
except ImportError:
|
||||||
|
logging.error("Could not import Thrift modules. Ensure this script is run in the 'airflow-regression-runner' container.")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# --- Configuration ---
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format="[%(asctime)s] [%(levelname)s] %(message)s",
|
||||||
|
datefmt="%Y-%m-%d %H:%M:%S",
|
||||||
|
)
|
||||||
|
|
||||||
|
INTERRUPTED = False
|
||||||
|
|
||||||
|
def signal_handler(sig, frame):
|
||||||
|
"""Handles Ctrl+C interruption."""
|
||||||
|
global INTERRUPTED
|
||||||
|
if not INTERRUPTED:
|
||||||
|
logging.warning("Ctrl+C detected. Initiating graceful shutdown...")
|
||||||
|
INTERRUPTED = True
|
||||||
|
else:
|
||||||
|
logging.warning("Second Ctrl+C detected. Forcing exit.")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
# --- Helper Functions ---
|
||||||
|
|
||||||
|
def _get_redis_client(redis_url: str):
|
||||||
|
"""Gets a Redis client from a URL."""
|
||||||
|
try:
|
||||||
|
# from_url is the modern way to connect and handles password auth
|
||||||
|
client = redis.from_url(redis_url, decode_responses=True)
|
||||||
|
client.ping()
|
||||||
|
logging.info(f"Successfully connected to Redis at {client.connection_pool.connection_kwargs.get('host')}:{client.connection_pool.connection_kwargs.get('port')}")
|
||||||
|
return client
|
||||||
|
except redis.exceptions.ConnectionError as e:
|
||||||
|
logging.error(f"Failed to connect to Redis: {e}")
|
||||||
|
sys.exit(1)
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"An unexpected error occurred while connecting to Redis: {e}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_webserver_url():
|
||||||
|
"""
|
||||||
|
Determines the Airflow webserver URL, prioritizing MASTER_HOST_IP from .env.
|
||||||
|
"""
|
||||||
|
master_host_ip = os.getenv("MASTER_HOST_IP")
|
||||||
|
if master_host_ip:
|
||||||
|
url = f"http://{master_host_ip}:8080"
|
||||||
|
logging.info(f"Using MASTER_HOST_IP for webserver URL: {url}")
|
||||||
|
return url
|
||||||
|
|
||||||
|
# Fallback to AIRFLOW_WEBSERVER_URL or the default service name
|
||||||
|
url = os.getenv("AIRFLOW_WEBSERVER_URL", "http://airflow-webserver:8080")
|
||||||
|
logging.info(f"Using default webserver URL: {url}")
|
||||||
|
return url
|
||||||
|
|
||||||
|
def _normalize_to_url(item: str) -> str | None:
|
||||||
|
"""
|
||||||
|
Validates if an item is a recognizable YouTube URL or video ID,
|
||||||
|
and normalizes it to a standard watch URL format.
|
||||||
|
"""
|
||||||
|
if not item:
|
||||||
|
return None
|
||||||
|
|
||||||
|
video_id_pattern = r"^[a-zA-Z0-9_-]{11}$"
|
||||||
|
if re.match(video_id_pattern, item):
|
||||||
|
return f"https://www.youtube.com/watch?v={item}"
|
||||||
|
|
||||||
|
url_patterns = [r"(?:v=|\/v\/|youtu\.be\/|embed\/|shorts\/)([a-zA-Z0-9_-]{11})"]
|
||||||
|
for pattern in url_patterns:
|
||||||
|
match = re.search(pattern, item)
|
||||||
|
if match:
|
||||||
|
return f"https://www.youtube.com/watch?v={match.group(1)}"
|
||||||
|
|
||||||
|
logging.warning(f"Could not recognize '{item}' as a valid YouTube URL or video ID.")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _read_input_file(file_path: str) -> list[str]:
|
||||||
|
"""Reads video IDs/URLs from a file (CSV or JSON list)."""
|
||||||
|
path = Path(file_path)
|
||||||
|
if not path.is_file():
|
||||||
|
logging.error(f"Input file not found: {file_path}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
content = path.read_text(encoding='utf-8')
|
||||||
|
|
||||||
|
# Try parsing as JSON list first
|
||||||
|
if content.strip().startswith('['):
|
||||||
|
try:
|
||||||
|
data = json.loads(content)
|
||||||
|
if isinstance(data, list):
|
||||||
|
logging.info(f"Successfully parsed {file_path} as a JSON list.")
|
||||||
|
return [str(item) for item in data]
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
logging.warning("File looks like JSON but failed to parse. Will try treating as CSV/text.")
|
||||||
|
|
||||||
|
# Fallback to CSV/text (one item per line)
|
||||||
|
items = []
|
||||||
|
# Use io.StringIO to handle the content as a file for the csv reader
|
||||||
|
from io import StringIO
|
||||||
|
# Sniff to see if it has a header
|
||||||
|
try:
|
||||||
|
has_header = csv.Sniffer().has_header(content)
|
||||||
|
except csv.Error:
|
||||||
|
has_header = False # Not a CSV, treat as plain text
|
||||||
|
|
||||||
|
reader = csv.reader(StringIO(content))
|
||||||
|
if has_header:
|
||||||
|
next(reader) # Skip header row
|
||||||
|
|
||||||
|
for row in reader:
|
||||||
|
if row:
|
||||||
|
items.append(row[0].strip()) # Assume the ID/URL is in the first column
|
||||||
|
|
||||||
|
logging.info(f"Successfully parsed {len(items)} items from {file_path} as CSV/text.")
|
||||||
|
return items
|
||||||
|
|
||||||
|
|
||||||
|
def _get_api_auth():
|
||||||
|
"""Gets Airflow API credentials from environment variables."""
|
||||||
|
username = os.getenv("AIRFLOW_ADMIN_USERNAME", "admin")
|
||||||
|
password = os.getenv("AIRFLOW_ADMIN_PASSWORD")
|
||||||
|
if not password:
|
||||||
|
logging.error("AIRFLOW_ADMIN_PASSWORD not found in environment. Cannot interact with API.")
|
||||||
|
return None, None
|
||||||
|
return username, password
|
||||||
|
|
||||||
|
def _pause_dag(dag_id: str, is_paused: bool = True):
|
||||||
|
"""Pauses or unpauses an Airflow DAG via the REST API."""
|
||||||
|
logging.info(f"Attempting to {'pause' if is_paused else 'unpause'} DAG: {dag_id}...")
|
||||||
|
username, password = _get_api_auth()
|
||||||
|
if not username:
|
||||||
|
return
|
||||||
|
|
||||||
|
webserver_url = _get_webserver_url()
|
||||||
|
endpoint = f"{webserver_url}/api/v1/dags/{dag_id}"
|
||||||
|
payload = {"is_paused": is_paused}
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.patch(endpoint, auth=(username, password), json=payload, timeout=30)
|
||||||
|
response.raise_for_status()
|
||||||
|
logging.info(f"Successfully {'paused' if is_paused else 'unpaused'} DAG '{dag_id}'.")
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
logging.error(f"Failed to {'pause' if is_paused else 'unpause'} DAG '{dag_id}': {e}")
|
||||||
|
if e.response is not None:
|
||||||
|
logging.error(f"Response: {e.response.text}")
|
||||||
|
|
||||||
|
def _fail_running_dag_runs(dag_id: str):
|
||||||
|
"""Finds all running DAG runs for a given DAG and marks them as failed."""
|
||||||
|
logging.info(f"Attempting to fail all running instances of DAG '{dag_id}'...")
|
||||||
|
username, password = _get_api_auth()
|
||||||
|
if not username:
|
||||||
|
return
|
||||||
|
|
||||||
|
webserver_url = _get_webserver_url()
|
||||||
|
list_endpoint = f"{webserver_url}/api/v1/dags/{dag_id}/dagRuns?state=running"
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Get running DAGs
|
||||||
|
response = requests.get(list_endpoint, auth=(username, password), timeout=30)
|
||||||
|
response.raise_for_status()
|
||||||
|
running_runs = response.json().get("dag_runs", [])
|
||||||
|
|
||||||
|
if not running_runs:
|
||||||
|
logging.info(f"No running DAG runs found for '{dag_id}'.")
|
||||||
|
return
|
||||||
|
|
||||||
|
logging.info(f"Found {len(running_runs)} running DAG run(s) to fail.")
|
||||||
|
|
||||||
|
for run in running_runs:
|
||||||
|
dag_run_id = run["dag_run_id"]
|
||||||
|
update_endpoint = f"{webserver_url}/api/v1/dags/{dag_id}/dagRuns/{dag_run_id}"
|
||||||
|
payload = {"state": "failed"}
|
||||||
|
try:
|
||||||
|
update_response = requests.patch(update_endpoint, auth=(username, password), json=payload, timeout=30)
|
||||||
|
update_response.raise_for_status()
|
||||||
|
logging.info(f" - Successfully marked DAG run '{dag_run_id}' as failed.")
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
logging.error(f" - Failed to mark DAG run '{dag_run_id}' as failed: {e}")
|
||||||
|
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
logging.error(f"Failed to list running DAG runs for '{dag_id}': {e}")
|
||||||
|
if e.response is not None:
|
||||||
|
logging.error(f"Response: {e.response.text}")
|
||||||
|
|
||||||
|
|
||||||
|
# --- Core Logic Functions ---
|
||||||
|
|
||||||
|
def step_0_populate_queue(redis_client, queue_name: str, input_file: str):
|
||||||
|
"""Reads URLs from a file and populates the Redis inbox queue."""
|
||||||
|
logging.info("--- Step 0: Populating Redis Queue ---")
|
||||||
|
raw_items = _read_input_file(input_file)
|
||||||
|
if not raw_items:
|
||||||
|
logging.error("No items found in the input file. Aborting.")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
valid_urls = []
|
||||||
|
for item in raw_items:
|
||||||
|
url = _normalize_to_url(item)
|
||||||
|
if url and url not in valid_urls:
|
||||||
|
valid_urls.append(url)
|
||||||
|
|
||||||
|
if not valid_urls:
|
||||||
|
logging.error("No valid YouTube URLs or IDs were found in the input file. Aborting.")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
inbox_queue = f"{queue_name}_inbox"
|
||||||
|
logging.info(f"Adding {len(valid_urls)} unique and valid URLs to Redis queue '{inbox_queue}'...")
|
||||||
|
|
||||||
|
with redis_client.pipeline() as pipe:
|
||||||
|
for url in valid_urls:
|
||||||
|
pipe.rpush(inbox_queue, url)
|
||||||
|
pipe.execute()
|
||||||
|
|
||||||
|
logging.info(f"Successfully populated queue. Total items in '{inbox_queue}': {redis_client.llen(inbox_queue)}")
|
||||||
|
return len(valid_urls)
|
||||||
|
|
||||||
|
|
||||||
|
def step_1_trigger_orchestrator(args: argparse.Namespace):
|
||||||
|
"""Triggers the ytdlp_ops_orchestrator DAG using the Airflow REST API."""
|
||||||
|
logging.info("--- Step 1: Triggering Orchestrator DAG via REST API ---")
|
||||||
|
|
||||||
|
# Get API details from environment variables
|
||||||
|
webserver_url = _get_webserver_url()
|
||||||
|
api_endpoint = f"{webserver_url}/api/v1/dags/ytdlp_ops_orchestrator/dagRuns"
|
||||||
|
|
||||||
|
# Default admin user is 'admin'
|
||||||
|
username = os.getenv("AIRFLOW_ADMIN_USERNAME", "admin")
|
||||||
|
password = os.getenv("AIRFLOW_ADMIN_PASSWORD")
|
||||||
|
|
||||||
|
if not password:
|
||||||
|
logging.error("AIRFLOW_ADMIN_PASSWORD not found in environment. Please set it in your .env file.")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# Construct the configuration for the DAG run
|
||||||
|
conf = {
|
||||||
|
"total_workers": args.workers,
|
||||||
|
"workers_per_bunch": args.workers_per_bunch,
|
||||||
|
"clients": args.client,
|
||||||
|
}
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"conf": conf
|
||||||
|
}
|
||||||
|
|
||||||
|
logging.info(f"Triggering DAG at endpoint: {api_endpoint}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.post(
|
||||||
|
api_endpoint,
|
||||||
|
auth=(username, password),
|
||||||
|
json=payload,
|
||||||
|
timeout=30 # 30 second timeout
|
||||||
|
)
|
||||||
|
response.raise_for_status() # Raises an HTTPError for bad responses (4xx or 5xx)
|
||||||
|
|
||||||
|
logging.info("Successfully triggered the orchestrator DAG.")
|
||||||
|
logging.debug(f"Airflow API response:\n{response.json()}")
|
||||||
|
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
logging.error("Failed to trigger the orchestrator DAG via REST API.")
|
||||||
|
logging.error(f"Error: {e}")
|
||||||
|
if e.response is not None:
|
||||||
|
logging.error(f"Response status code: {e.response.status_code}")
|
||||||
|
logging.error(f"Response text: {e.response.text}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
def step_2_monitor_progress(args: argparse.Namespace, redis_client, queue_name: str, total_urls: int, run_time_min: int, interval_min: int, show_status: bool):
|
||||||
|
"""Monitors the Redis queues for the duration of the test."""
|
||||||
|
logging.info("--- Step 2: Monitoring Progress ---")
|
||||||
|
|
||||||
|
end_time = datetime.now() + timedelta(minutes=run_time_min)
|
||||||
|
inbox_q = f"{queue_name}_inbox"
|
||||||
|
progress_q = f"{queue_name}_progress"
|
||||||
|
result_q = f"{queue_name}_result"
|
||||||
|
fail_q = f"{queue_name}_fail"
|
||||||
|
|
||||||
|
while datetime.now() < end_time and not INTERRUPTED:
|
||||||
|
try:
|
||||||
|
inbox_len = redis_client.llen(inbox_q)
|
||||||
|
progress_len = redis_client.hlen(progress_q)
|
||||||
|
result_len = redis_client.hlen(result_q)
|
||||||
|
fail_len = redis_client.hlen(fail_q)
|
||||||
|
|
||||||
|
processed = result_len + fail_len
|
||||||
|
success_len = 0
|
||||||
|
if result_len > 0:
|
||||||
|
# This is inefficient but gives a more accurate success count
|
||||||
|
results = redis_client.hgetall(result_q)
|
||||||
|
success_len = sum(1 for v in results.values() if '"status": "success"' in v)
|
||||||
|
|
||||||
|
logging.info(
|
||||||
|
f"Progress: {processed}/{total_urls} | "
|
||||||
|
f"Success: {success_len} | Failed: {fail_len} | "
|
||||||
|
f"In Progress: {progress_len} | Inbox: {inbox_len}"
|
||||||
|
)
|
||||||
|
if show_status:
|
||||||
|
# This function now connects directly to services to get status
|
||||||
|
get_system_status(args, redis_client)
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Error while querying Redis for progress: {e}")
|
||||||
|
|
||||||
|
# Wait for the interval, but check for interruption every second
|
||||||
|
# for a more responsive shutdown.
|
||||||
|
wait_until = time.time() + interval_min * 60
|
||||||
|
while time.time() < wait_until and not INTERRUPTED:
|
||||||
|
# Check if we are past the main end_time
|
||||||
|
if datetime.now() >= end_time:
|
||||||
|
break
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
if INTERRUPTED:
|
||||||
|
logging.info("Monitoring interrupted.")
|
||||||
|
else:
|
||||||
|
logging.info("Monitoring period has ended.")
|
||||||
|
|
||||||
|
|
||||||
|
# --- System Status Functions (Direct Connect) ---
|
||||||
|
|
||||||
|
def _list_proxy_statuses(client, server_identity=None):
|
||||||
|
"""Lists proxy statuses by connecting directly to the Thrift service."""
|
||||||
|
logging.info(f"--- Proxy Statuses (Server: {server_identity or 'ALL'}) ---")
|
||||||
|
try:
|
||||||
|
statuses = client.getProxyStatus(server_identity)
|
||||||
|
if not statuses:
|
||||||
|
logging.info("No proxy statuses found.")
|
||||||
|
return
|
||||||
|
|
||||||
|
status_list = []
|
||||||
|
headers = ["Server", "Proxy URL", "Status", "Success", "Failures", "Last Success", "Last Failure"]
|
||||||
|
for s in statuses:
|
||||||
|
status_list.append({
|
||||||
|
"Server": s.serverIdentity, "Proxy URL": s.proxyUrl, "Status": s.status,
|
||||||
|
"Success": s.successCount, "Failures": s.failureCount,
|
||||||
|
"Last Success": format_timestamp(s.lastSuccessTimestamp),
|
||||||
|
"Last Failure": format_timestamp(s.lastFailureTimestamp),
|
||||||
|
})
|
||||||
|
logging.info("\n" + tabulate(status_list, headers='keys', tablefmt='grid'))
|
||||||
|
except (PBServiceException, PBUserException) as e:
|
||||||
|
logging.error(f"Failed to get proxy statuses: {e.message}")
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"An unexpected error occurred while getting proxy statuses: {e}", exc_info=True)
|
||||||
|
|
||||||
|
def _list_account_statuses(client, redis_client, account_id=None):
|
||||||
|
"""Lists account statuses from Thrift, enriched with live Redis data."""
|
||||||
|
logging.info(f"--- Account Statuses (Account: {account_id or 'ALL'}) ---")
|
||||||
|
try:
|
||||||
|
statuses = client.getAccountStatus(accountId=account_id, accountPrefix=None)
|
||||||
|
if not statuses:
|
||||||
|
logging.info("No account statuses found.")
|
||||||
|
return
|
||||||
|
|
||||||
|
status_list = []
|
||||||
|
for s in statuses:
|
||||||
|
status_str = s.status
|
||||||
|
if 'RESTING' in status_str:
|
||||||
|
try:
|
||||||
|
expiry_ts_bytes = redis_client.hget(f"account_status:{s.accountId}", "resting_until")
|
||||||
|
if expiry_ts_bytes:
|
||||||
|
expiry_ts = float(expiry_ts_bytes)
|
||||||
|
now = datetime.now().timestamp()
|
||||||
|
if now < expiry_ts:
|
||||||
|
remaining_seconds = int(expiry_ts - now)
|
||||||
|
status_str = f"RESTING ({remaining_seconds}s left)"
|
||||||
|
except Exception:
|
||||||
|
pass # Ignore if parsing fails
|
||||||
|
|
||||||
|
last_success = float(s.lastSuccessTimestamp) if s.lastSuccessTimestamp else 0
|
||||||
|
last_failure = float(s.lastFailureTimestamp) if s.lastFailureTimestamp else 0
|
||||||
|
last_activity = max(last_success, last_failure)
|
||||||
|
|
||||||
|
status_list.append({
|
||||||
|
"Account ID": s.accountId, "Status": status_str, "Success": s.successCount,
|
||||||
|
"Failures": s.failureCount, "Last Success": format_timestamp(s.lastSuccessTimestamp),
|
||||||
|
"Last Failure": format_timestamp(s.lastFailureTimestamp), "Last Proxy": s.lastUsedProxy or "N/A",
|
||||||
|
"_last_activity": last_activity,
|
||||||
|
})
|
||||||
|
|
||||||
|
status_list.sort(key=lambda item: item.get('_last_activity', 0), reverse=True)
|
||||||
|
for item in status_list:
|
||||||
|
del item['_last_activity']
|
||||||
|
|
||||||
|
logging.info("\n" + tabulate(status_list, headers='keys', tablefmt='grid'))
|
||||||
|
except (PBServiceException, PBUserException) as e:
|
||||||
|
logging.error(f"Failed to get account statuses: {e.message}")
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"An unexpected error occurred while getting account statuses: {e}", exc_info=True)
|
||||||
|
|
||||||
|
def _list_client_statuses(redis_client):
|
||||||
|
"""Lists client statistics from Redis."""
|
||||||
|
logging.info("--- Client Statuses ---")
|
||||||
|
try:
|
||||||
|
stats_key = "client_stats"
|
||||||
|
all_stats_raw = redis_client.hgetall(stats_key)
|
||||||
|
if not all_stats_raw:
|
||||||
|
logging.info("No client stats found in Redis.")
|
||||||
|
return
|
||||||
|
|
||||||
|
status_list = []
|
||||||
|
for client, stats_json in all_stats_raw.items():
|
||||||
|
try:
|
||||||
|
stats = json.loads(stats_json)
|
||||||
|
def format_latest(data):
|
||||||
|
if not data: return "N/A"
|
||||||
|
ts = format_timestamp(data.get('timestamp'))
|
||||||
|
url = data.get('url', 'N/A')
|
||||||
|
video_id_match = re.search(r'v=([a-zA-Z0-9_-]{11})', url)
|
||||||
|
video_id = video_id_match.group(1) if video_id_match else 'N/A'
|
||||||
|
return f"{ts} ({video_id})"
|
||||||
|
|
||||||
|
status_list.append({
|
||||||
|
"Client": client, "Success": stats.get('success_count', 0),
|
||||||
|
"Failures": stats.get('failure_count', 0),
|
||||||
|
"Last Success": format_latest(stats.get('latest_success')),
|
||||||
|
"Last Failure": format_latest(stats.get('latest_failure')),
|
||||||
|
})
|
||||||
|
except (json.JSONDecodeError, AttributeError):
|
||||||
|
status_list.append({"Client": client, "Success": "ERROR", "Failures": "ERROR", "Last Success": "Parse Error", "Last Failure": "Parse Error"})
|
||||||
|
|
||||||
|
status_list.sort(key=lambda item: item.get('Client', ''))
|
||||||
|
logging.info("\n" + tabulate(status_list, headers='keys', tablefmt='grid'))
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"An unexpected error occurred while getting client statuses: {e}", exc_info=True)
|
||||||
|
|
||||||
|
def get_system_status(args: argparse.Namespace, redis_client):
|
||||||
|
"""Connects to services and prints status tables."""
|
||||||
|
logging.info("--- Getting System Status ---")
|
||||||
|
client, transport = None, None
|
||||||
|
try:
|
||||||
|
client, transport = get_thrift_client(args.management_host, args.management_port)
|
||||||
|
_list_proxy_statuses(client)
|
||||||
|
_list_account_statuses(client, redis_client)
|
||||||
|
_list_client_statuses(redis_client)
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Could not get system status: {e}")
|
||||||
|
finally:
|
||||||
|
if transport and transport.isOpen():
|
||||||
|
transport.close()
|
||||||
|
|
||||||
|
|
||||||
|
def step_3_generate_report(redis_client, queue_name: str, report_file: str | None):
|
||||||
|
"""Generates a CSV report of failed items."""
|
||||||
|
logging.info("--- Step 3: Generating Report ---")
|
||||||
|
fail_q = f"{queue_name}_fail"
|
||||||
|
|
||||||
|
failed_items = redis_client.hgetall(fail_q)
|
||||||
|
if not failed_items:
|
||||||
|
logging.info("No items found in the fail queue. No report will be generated.")
|
||||||
|
return
|
||||||
|
|
||||||
|
logging.info(f"Found {len(failed_items)} failed items. Writing to report...")
|
||||||
|
|
||||||
|
report_data = []
|
||||||
|
for url, data_json in failed_items.items():
|
||||||
|
try:
|
||||||
|
data = json.loads(data_json)
|
||||||
|
error_details = data.get('error_details', {})
|
||||||
|
report_data.append({
|
||||||
|
'url': url,
|
||||||
|
'video_id': _normalize_to_url(url).split('v=')[-1] if _normalize_to_url(url) else 'N/A',
|
||||||
|
'error_message': error_details.get('error_message', 'N/A'),
|
||||||
|
'error_code': error_details.get('error_code', 'N/A'),
|
||||||
|
'proxy_url': error_details.get('proxy_url', 'N/A'),
|
||||||
|
'timestamp': datetime.fromtimestamp(data.get('end_time', 0)).isoformat(),
|
||||||
|
})
|
||||||
|
except (json.JSONDecodeError, AttributeError):
|
||||||
|
report_data.append({'url': url, 'video_id': 'N/A', 'error_message': 'Could not parse error data', 'error_code': 'PARSE_ERROR', 'proxy_url': 'N/A', 'timestamp': 'N/A'})
|
||||||
|
|
||||||
|
if report_file:
|
||||||
|
try:
|
||||||
|
with open(report_file, 'w', newline='', encoding='utf-8') as f:
|
||||||
|
writer = csv.DictWriter(f, fieldnames=report_data[0].keys())
|
||||||
|
writer.writeheader()
|
||||||
|
writer.writerows(report_data)
|
||||||
|
logging.info(f"Successfully wrote report to {report_file}")
|
||||||
|
except IOError as e:
|
||||||
|
logging.error(f"Could not write report to file {report_file}: {e}")
|
||||||
|
else:
|
||||||
|
# Print to stdout if no file is specified
|
||||||
|
logging.info("--- Failure Report (stdout) ---")
|
||||||
|
for item in report_data:
|
||||||
|
logging.info(f"URL: {item['url']}, Error: {item['error_code']} - {item['error_message']}")
|
||||||
|
logging.info("--- End of Report ---")
|
||||||
|
|
||||||
|
|
||||||
|
def handle_interruption(redis_client, queue_name, report_file):
|
||||||
|
"""Graceful shutdown logic for when the script is interrupted."""
|
||||||
|
logging.warning("--- Interruption Detected: Starting Shutdown Procedure ---")
|
||||||
|
|
||||||
|
# 1. Pause DAGs
|
||||||
|
_pause_dag("ytdlp_ops_orchestrator")
|
||||||
|
_pause_dag("ytdlp_ops_dispatcher")
|
||||||
|
|
||||||
|
# 2. Fail running per_url jobs
|
||||||
|
_fail_running_dag_runs("ytdlp_ops_worker_per_url")
|
||||||
|
|
||||||
|
# 3. Generate report
|
||||||
|
logging.info("Generating final report due to interruption...")
|
||||||
|
step_3_generate_report(redis_client, queue_name, report_file)
|
||||||
|
# Also print to stdout if a file was specified, so user sees it immediately
|
||||||
|
if report_file:
|
||||||
|
logging.info("Printing report to stdout as well...")
|
||||||
|
step_3_generate_report(redis_client, queue_name, None)
|
||||||
|
|
||||||
|
|
||||||
|
def step_4_cleanup_queues(redis_client, queue_name: str):
|
||||||
|
"""Cleans up the Redis queues used by the test."""
|
||||||
|
logging.info("--- Step 4: Cleaning Up Queues ---")
|
||||||
|
queues_to_delete = [
|
||||||
|
f"{queue_name}_inbox",
|
||||||
|
f"{queue_name}_progress",
|
||||||
|
f"{queue_name}_result",
|
||||||
|
f"{queue_name}_fail",
|
||||||
|
]
|
||||||
|
logging.warning(f"This will delete the following Redis keys: {queues_to_delete}")
|
||||||
|
|
||||||
|
deleted_count = redis_client.delete(*queues_to_delete)
|
||||||
|
logging.info(f"Cleanup complete. Deleted {deleted_count} key(s).")
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Main function to parse arguments and run the regression test."""
|
||||||
|
# Register the signal handler for Ctrl+C
|
||||||
|
signal.signal(signal.SIGINT, signal_handler)
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description="Run a regression test for the ytdlp-ops system.")
|
||||||
|
|
||||||
|
# Environment
|
||||||
|
parser.add_argument("--redis-host", type=str, default="redis", help="Hostname or IP address of the Redis server. Defaults to 'redis' for in-container execution.")
|
||||||
|
parser.add_argument("--management-host", type=str, default=os.getenv("MANAGEMENT_SERVICE_HOST", "envoy-thrift-lb"), help="Hostname of the management Thrift service.")
|
||||||
|
parser.add_argument("--management-port", type=int, default=int(os.getenv("MANAGEMENT_SERVICE_PORT", 9080)), help="Port of the management Thrift service.")
|
||||||
|
|
||||||
|
# Test Configuration
|
||||||
|
parser.add_argument("--client", type=str, required=True, help="Client persona to test (e.g., 'mweb').")
|
||||||
|
parser.add_argument("--workers", type=int, required=True, help="Total number of worker loops to start.")
|
||||||
|
parser.add_argument("--workers-per-bunch", type=int, default=1, help="Number of workers per bunch.")
|
||||||
|
parser.add_argument("--run-time-min", type=int, required=True, help="How long to let the test run, in minutes.")
|
||||||
|
parser.add_argument("--input-file", type=str, help="Path to a file containing video IDs/URLs. If not provided, the existing queue will be used.")
|
||||||
|
|
||||||
|
# Monitoring & Reporting
|
||||||
|
parser.add_argument("--progress-interval-min", type=int, default=2, help="How often to query and print progress, in minutes.")
|
||||||
|
parser.add_argument("--report-file", type=str, help="Path to a CSV file to write the list of failed URLs to.")
|
||||||
|
parser.add_argument("--show-status", action="store_true", help="If set, show proxy and account statuses during progress monitoring.")
|
||||||
|
|
||||||
|
# Actions
|
||||||
|
parser.add_argument("--cleanup", action="store_true", help="If set, clear the Redis queues after the test completes.")
|
||||||
|
parser.add_argument("--skip-populate", action="store_true", help="If set, skip populating the queue (assumes it's already populated).")
|
||||||
|
parser.add_argument("--skip-trigger", action="store_true", help="If set, skip triggering the orchestrator (assumes it's already running).")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# --- Setup ---
|
||||||
|
redis_password = os.getenv("REDIS_PASSWORD")
|
||||||
|
if not redis_password:
|
||||||
|
logging.error("REDIS_PASSWORD not found in environment. Please set it in your .env file.")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# Use the provided redis-host, defaulting to 'redis' for in-container execution
|
||||||
|
redis_url = f"redis://:{redis_password}@{args.redis_host}:6379/0"
|
||||||
|
redis_client = _get_redis_client(redis_url)
|
||||||
|
|
||||||
|
queue_name = "video_queue" # Hardcoded for now, could be an arg
|
||||||
|
total_urls = 0
|
||||||
|
|
||||||
|
# --- Execution ---
|
||||||
|
if not args.skip_populate:
|
||||||
|
if args.input_file:
|
||||||
|
total_urls = step_0_populate_queue(redis_client, queue_name, args.input_file)
|
||||||
|
else:
|
||||||
|
logging.info("No input file provided, using existing queue.")
|
||||||
|
total_urls = redis_client.llen(f"{queue_name}_inbox")
|
||||||
|
if total_urls == 0:
|
||||||
|
logging.warning("Queue is empty and no input file was provided. The test may not have any work to do.")
|
||||||
|
else:
|
||||||
|
total_urls = redis_client.llen(f"{queue_name}_inbox")
|
||||||
|
logging.info(f"Skipping population. Found {total_urls} URLs in the inbox.")
|
||||||
|
|
||||||
|
if not args.skip_trigger:
|
||||||
|
step_1_trigger_orchestrator(args)
|
||||||
|
else:
|
||||||
|
logging.info("Skipping orchestrator trigger.")
|
||||||
|
|
||||||
|
step_2_monitor_progress(args, redis_client, queue_name, total_urls, args.run_time_min, args.progress_interval_min, args.show_status)
|
||||||
|
|
||||||
|
if INTERRUPTED:
|
||||||
|
handle_interruption(redis_client, queue_name, args.report_file)
|
||||||
|
else:
|
||||||
|
step_3_generate_report(redis_client, queue_name, args.report_file)
|
||||||
|
|
||||||
|
if args.cleanup:
|
||||||
|
step_4_cleanup_queues(redis_client, queue_name)
|
||||||
|
|
||||||
|
if INTERRUPTED:
|
||||||
|
logging.warning("Regression test script finished due to user interruption.")
|
||||||
|
sys.exit(130) # Standard exit code for Ctrl+C
|
||||||
|
else:
|
||||||
|
logging.info("Regression test script finished.")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@ -4,6 +4,9 @@ DAG to manage the state of proxies and accounts used by the ytdlp-ops-server.
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
import time
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import socket
|
import socket
|
||||||
|
|
||||||
@ -208,6 +211,112 @@ def _list_account_statuses(client, account_id, redis_conn_id):
|
|||||||
print(f"\nERROR: An unexpected error occurred: {e}\n")
|
print(f"\nERROR: An unexpected error occurred: {e}\n")
|
||||||
|
|
||||||
|
|
||||||
|
def _list_client_statuses(redis_conn_id):
|
||||||
|
"""Lists the status of different client types from Redis."""
|
||||||
|
logger.info("Listing client statuses from Redis key 'client_stats'")
|
||||||
|
|
||||||
|
try:
|
||||||
|
redis_client = _get_redis_client(redis_conn_id)
|
||||||
|
stats_key = "client_stats"
|
||||||
|
all_stats_raw = redis_client.hgetall(stats_key)
|
||||||
|
|
||||||
|
if not all_stats_raw:
|
||||||
|
print("\n--- Client Statuses ---\nNo client stats found in Redis.\n-----------------------\n")
|
||||||
|
return
|
||||||
|
|
||||||
|
from tabulate import tabulate
|
||||||
|
status_list = []
|
||||||
|
|
||||||
|
for client_bytes, stats_json_bytes in all_stats_raw.items():
|
||||||
|
client_name = client_bytes.decode('utf-8')
|
||||||
|
try:
|
||||||
|
stats = json.loads(stats_json_bytes.decode('utf-8'))
|
||||||
|
|
||||||
|
def format_latest(data):
|
||||||
|
if not data: return "N/A"
|
||||||
|
ts = format_timestamp(data.get('timestamp'))
|
||||||
|
url = data.get('url') or 'N/A'
|
||||||
|
machine = data.get('machine_id', 'N/A')
|
||||||
|
video_id_match = re.search(r'v=([a-zA-Z0-9_-]{11})', url)
|
||||||
|
video_id = video_id_match.group(1) if video_id_match else 'N/A'
|
||||||
|
return f"{ts}\nMachine: {machine}\nVideo ID: {video_id}"
|
||||||
|
|
||||||
|
status_item = {
|
||||||
|
"Client": client_name,
|
||||||
|
"Success": stats.get('success_count', 0),
|
||||||
|
"Failures": stats.get('failure_count', 0),
|
||||||
|
"Last Success": format_latest(stats.get('latest_success')),
|
||||||
|
"Last Failure": format_latest(stats.get('latest_failure')),
|
||||||
|
}
|
||||||
|
status_list.append(status_item)
|
||||||
|
except (json.JSONDecodeError, AttributeError) as e:
|
||||||
|
logger.error(f"Could not parse stats for client '{client_name}': {e}")
|
||||||
|
status_list.append({
|
||||||
|
"Client": client_name, "Success": "ERROR", "Failures": "ERROR",
|
||||||
|
"Last Success": "Could not parse data", "Last Failure": "Could not parse data"
|
||||||
|
})
|
||||||
|
|
||||||
|
status_list.sort(key=lambda item: item.get('Client', ''))
|
||||||
|
|
||||||
|
print("\n--- Client Statuses ---")
|
||||||
|
print(f"\n{tabulate(status_list, headers='keys', tablefmt='grid')}")
|
||||||
|
print("-----------------------\n")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"An unexpected error occurred while getting client statuses: {e}", exc_info=True)
|
||||||
|
print(f"\nERROR: An unexpected error occurred: {e}\n")
|
||||||
|
|
||||||
|
|
||||||
|
def _list_activity_counters(redis_conn_id: str):
|
||||||
|
"""Lists current activity rates for proxies and accounts from Redis."""
|
||||||
|
logger.info("Listing activity counters from Redis keys 'activity:per_proxy:*' and 'activity:per_account:*'")
|
||||||
|
|
||||||
|
try:
|
||||||
|
redis_client = _get_redis_client(redis_conn_id)
|
||||||
|
from tabulate import tabulate
|
||||||
|
now = time.time()
|
||||||
|
|
||||||
|
def process_keys(pattern, entity_name):
|
||||||
|
keys = redis_client.scan_iter(pattern)
|
||||||
|
status_list = []
|
||||||
|
for key_bytes in keys:
|
||||||
|
key = key_bytes.decode('utf-8')
|
||||||
|
entity_id = key.split(':', 2)[-1]
|
||||||
|
|
||||||
|
# Clean up old entries before counting
|
||||||
|
redis_client.zremrangebyscore(key, '-inf', now - 3660) # Clean up > 1hr old
|
||||||
|
|
||||||
|
count_1m = redis_client.zcount(key, now - 60, now)
|
||||||
|
count_5m = redis_client.zcount(key, now - 300, now)
|
||||||
|
count_1h = redis_client.zcount(key, now - 3600, now)
|
||||||
|
|
||||||
|
if count_1h == 0: # Don't show entities with no recent activity
|
||||||
|
continue
|
||||||
|
|
||||||
|
status_list.append({
|
||||||
|
entity_name: entity_id,
|
||||||
|
"Activity (Last 1m)": count_1m,
|
||||||
|
"Activity (Last 5m)": count_5m,
|
||||||
|
"Activity (Last 1h)": count_1h,
|
||||||
|
})
|
||||||
|
|
||||||
|
status_list.sort(key=lambda item: item.get(entity_name, ''))
|
||||||
|
|
||||||
|
print(f"\n--- {entity_name} Activity Counters ---")
|
||||||
|
if not status_list:
|
||||||
|
print(f"No recent activity found for {entity_name.lower()}s.")
|
||||||
|
else:
|
||||||
|
print(f"\n{tabulate(status_list, headers='keys', tablefmt='grid')}")
|
||||||
|
print("-----------------------------------\n")
|
||||||
|
|
||||||
|
process_keys("activity:per_proxy:*", "Proxy URL")
|
||||||
|
process_keys("activity:per_account:*", "Account ID")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"An unexpected error occurred while getting activity counters: {e}", exc_info=True)
|
||||||
|
print(f"\nERROR: An unexpected error occurred: {e}\n")
|
||||||
|
|
||||||
|
|
||||||
def manage_system_callable(**context):
|
def manage_system_callable(**context):
|
||||||
"""Main callable to interact with the system management endpoints."""
|
"""Main callable to interact with the system management endpoints."""
|
||||||
# Log version for debugging
|
# Log version for debugging
|
||||||
@ -218,7 +327,7 @@ def manage_system_callable(**context):
|
|||||||
action = params["action"]
|
action = params["action"]
|
||||||
|
|
||||||
# For Thrift actions, use the new management host/port
|
# For Thrift actions, use the new management host/port
|
||||||
if entity not in ["airflow_meta"]:
|
if entity not in ["airflow_meta", "activity_counters"]:
|
||||||
host = params["management_host"]
|
host = params["management_host"]
|
||||||
port = params["management_port"]
|
port = params["management_port"]
|
||||||
else:
|
else:
|
||||||
@ -232,8 +341,10 @@ def manage_system_callable(**context):
|
|||||||
valid_actions = {
|
valid_actions = {
|
||||||
"proxy": ["list_with_status", "ban", "unban", "ban_all", "unban_all", "delete_from_redis"],
|
"proxy": ["list_with_status", "ban", "unban", "ban_all", "unban_all", "delete_from_redis"],
|
||||||
"account": ["list_with_status", "ban", "unban", "unban_all", "delete_from_redis"],
|
"account": ["list_with_status", "ban", "unban", "unban_all", "delete_from_redis"],
|
||||||
|
"client": ["list_with_status", "delete_from_redis"],
|
||||||
"accounts_and_proxies": ["list_with_status", "ban", "unban", "ban_all", "unban_all", "delete_from_redis"],
|
"accounts_and_proxies": ["list_with_status", "ban", "unban", "ban_all", "unban_all", "delete_from_redis"],
|
||||||
"airflow_meta": ["clear_dag_runs"],
|
"airflow_meta": ["clear_dag_runs"],
|
||||||
|
"activity_counters": ["list_with_status"],
|
||||||
}
|
}
|
||||||
|
|
||||||
if action not in valid_actions.get(entity, []):
|
if action not in valid_actions.get(entity, []):
|
||||||
@ -287,7 +398,15 @@ def manage_system_callable(**context):
|
|||||||
# The session is committed automatically by the `with create_session()` context manager.
|
# The session is committed automatically by the `with create_session()` context manager.
|
||||||
logger.info(f"Successfully deleted {deleted_count} DagRun(s) for DAG '{dag_id}'.")
|
logger.info(f"Successfully deleted {deleted_count} DagRun(s) for DAG '{dag_id}'.")
|
||||||
print(f"\nSuccessfully deleted {deleted_count} DagRun(s) for DAG '{dag_id}'.\n")
|
print(f"\nSuccessfully deleted {deleted_count} DagRun(s) for DAG '{dag_id}'.\n")
|
||||||
return # End execution
|
return # End execution
|
||||||
|
|
||||||
|
# --- Handle Activity Counter action ---
|
||||||
|
if entity == "activity_counters":
|
||||||
|
if action == "list_with_status":
|
||||||
|
_list_activity_counters(params["redis_conn_id"])
|
||||||
|
return # End execution
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Action '{action}' is not valid for entity 'activity_counters'. Only 'list_with_status' is supported.")
|
||||||
|
|
||||||
# Handle Thrift-based deletion actions
|
# Handle Thrift-based deletion actions
|
||||||
if action == "delete_from_redis":
|
if action == "delete_from_redis":
|
||||||
@ -355,6 +474,15 @@ def manage_system_callable(**context):
|
|||||||
print(f"\nSuccessfully deleted {proxy_result} proxy keys for server '{server_identity}' from Redis.\n")
|
print(f"\nSuccessfully deleted {proxy_result} proxy keys for server '{server_identity}' from Redis.\n")
|
||||||
else:
|
else:
|
||||||
print(f"\nSuccessfully deleted {proxy_result} proxy keys from Redis across ALL servers.\n")
|
print(f"\nSuccessfully deleted {proxy_result} proxy keys from Redis across ALL servers.\n")
|
||||||
|
|
||||||
|
elif entity == "client":
|
||||||
|
logger.info("Deleting all client stats from Redis...")
|
||||||
|
redis_client = _get_redis_client(params["redis_conn_id"])
|
||||||
|
result = redis_client.delete("client_stats")
|
||||||
|
if result > 0:
|
||||||
|
print(f"\nSuccessfully deleted 'client_stats' key from Redis.\n")
|
||||||
|
else:
|
||||||
|
print(f"\nKey 'client_stats' not found in Redis. Nothing to delete.\n")
|
||||||
|
|
||||||
except (PBServiceException, PBUserException) as e:
|
except (PBServiceException, PBUserException) as e:
|
||||||
logger.error(f"Thrift error performing delete action: {e.message}", exc_info=True)
|
logger.error(f"Thrift error performing delete action: {e.message}", exc_info=True)
|
||||||
@ -374,7 +502,10 @@ def manage_system_callable(**context):
|
|||||||
try:
|
try:
|
||||||
client, transport = get_thrift_client(host, port)
|
client, transport = get_thrift_client(host, port)
|
||||||
|
|
||||||
if entity == "proxy":
|
if entity == "client":
|
||||||
|
if action == "list_with_status":
|
||||||
|
_list_client_statuses(params["redis_conn_id"])
|
||||||
|
elif entity == "proxy":
|
||||||
if action == "list_with_status":
|
if action == "list_with_status":
|
||||||
_list_proxy_statuses(client, server_identity)
|
_list_proxy_statuses(client, server_identity)
|
||||||
elif action == "ban":
|
elif action == "ban":
|
||||||
@ -497,6 +628,13 @@ def manage_system_callable(**context):
|
|||||||
_list_account_statuses(client, account_prefix, params["redis_conn_id"])
|
_list_account_statuses(client, account_prefix, params["redis_conn_id"])
|
||||||
|
|
||||||
elif entity == "accounts_and_proxies":
|
elif entity == "accounts_and_proxies":
|
||||||
|
if action == "list_with_status":
|
||||||
|
print("\n--- Listing statuses for Proxies, Accounts, and Clients ---")
|
||||||
|
_list_proxy_statuses(client, server_identity)
|
||||||
|
_list_account_statuses(client, account_id, params["redis_conn_id"])
|
||||||
|
_list_client_statuses(params["redis_conn_id"])
|
||||||
|
return # End execution for list_with_status
|
||||||
|
|
||||||
print(f"\n--- Performing action '{action}' on BOTH Proxies and Accounts ---")
|
print(f"\n--- Performing action '{action}' on BOTH Proxies and Accounts ---")
|
||||||
|
|
||||||
# --- Proxy Action ---
|
# --- Proxy Action ---
|
||||||
@ -674,7 +812,7 @@ with DAG(
|
|||||||
"entity": Param(
|
"entity": Param(
|
||||||
"accounts_and_proxies",
|
"accounts_and_proxies",
|
||||||
type="string",
|
type="string",
|
||||||
enum=["account", "proxy", "accounts_and_proxies", "airflow_meta"],
|
enum=["account", "proxy", "client", "accounts_and_proxies", "activity_counters", "airflow_meta"],
|
||||||
description="The type of entity to manage.",
|
description="The type of entity to manage.",
|
||||||
),
|
),
|
||||||
"action": Param(
|
"action": Param(
|
||||||
@ -698,6 +836,13 @@ with DAG(
|
|||||||
- `unban_all`: Sets the status of all accounts (or those matching a prefix in `account_id`) to `ACTIVE`.
|
- `unban_all`: Sets the status of all accounts (or those matching a prefix in `account_id`) to `ACTIVE`.
|
||||||
- `delete_from_redis`: **(Destructive)** Deletes account status from Redis via Thrift service. This permanently removes the account from being tracked by the system. If `account_id` is provided, it deletes that specific account. If `account_id` is provided as a prefix, it deletes all accounts matching that prefix. If `account_id` is empty, it deletes ALL accounts.
|
- `delete_from_redis`: **(Destructive)** Deletes account status from Redis via Thrift service. This permanently removes the account from being tracked by the system. If `account_id` is provided, it deletes that specific account. If `account_id` is provided as a prefix, it deletes all accounts matching that prefix. If `account_id` is empty, it deletes ALL accounts.
|
||||||
|
|
||||||
|
#### Actions for `entity: client`
|
||||||
|
- `list_with_status`: View success/failure statistics for each client type.
|
||||||
|
- `delete_from_redis`: **(Destructive)** Deletes all client stats from Redis.
|
||||||
|
|
||||||
|
#### Actions for `entity: activity_counters`
|
||||||
|
- `list_with_status`: View current activity rates (ops/min, ops/hr) for proxies and accounts.
|
||||||
|
|
||||||
#### Actions for `entity: accounts_and_proxies`
|
#### Actions for `entity: accounts_and_proxies`
|
||||||
- This entity performs the selected action on **both** proxies and accounts where applicable.
|
- This entity performs the selected action on **both** proxies and accounts where applicable.
|
||||||
- `list_with_status`: View statuses for both proxies and accounts.
|
- `list_with_status`: View statuses for both proxies and accounts.
|
||||||
@ -735,9 +880,9 @@ with DAG(
|
|||||||
description="The Airflow connection ID for the Redis server (used for 'delete_from_redis' and for fetching detailed account status).",
|
description="The Airflow connection ID for the Redis server (used for 'delete_from_redis' and for fetching detailed account status).",
|
||||||
),
|
),
|
||||||
"dag_id_to_manage": Param(
|
"dag_id_to_manage": Param(
|
||||||
"ytdlp_ops_worker_per_url",
|
"ytdlp_ops_v01_worker_per_url",
|
||||||
type="string",
|
type="string",
|
||||||
enum=["ytdlp_ops_worker_per_url", "ytdlp_ops_orchestrator"],
|
enum=["ytdlp_ops_v01_orchestrator", "ytdlp_ops_v01_dispatcher", "ytdlp_ops_v01_worker_per_url", "ytdlp_ops_v02_orchestrator_auth", "ytdlp_ops_v02_dispatcher_auth", "ytdlp_ops_v02_worker_per_url_auth", "ytdlp_ops_v02_orchestrator_dl", "ytdlp_ops_v02_dispatcher_dl", "ytdlp_ops_v02_worker_per_url_dl"],
|
||||||
title="[Airflow Meta] DAG ID",
|
title="[Airflow Meta] DAG ID",
|
||||||
description="The DAG ID to perform the action on.",
|
description="The DAG ID to perform the action on.",
|
||||||
),
|
),
|
||||||
|
|||||||
@ -254,7 +254,18 @@ def clear_queue_callable(**context):
|
|||||||
ti = context['task_instance']
|
ti = context['task_instance']
|
||||||
logger.info(f"Task '{ti.task_id}' running on queue '{ti.queue}'.")
|
logger.info(f"Task '{ti.task_id}' running on queue '{ti.queue}'.")
|
||||||
redis_conn_id = params['redis_conn_id']
|
redis_conn_id = params['redis_conn_id']
|
||||||
queue_base_name = params['queue_base_name']
|
|
||||||
|
queue_system = params.get('queue_system', 'v1_monolithic')
|
||||||
|
if queue_system == 'v1_monolithic':
|
||||||
|
queue_base_name = params['queue_base_name']
|
||||||
|
elif queue_system == 'v2_separated_auth':
|
||||||
|
queue_base_name = 'queue2_auth'
|
||||||
|
elif queue_system == 'v2_separated_dl':
|
||||||
|
queue_base_name = 'queue2_dl'
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Invalid queue_system: {queue_system}")
|
||||||
|
logger.info(f"Operating on queue system '{queue_system}' with base name '{queue_base_name}'.")
|
||||||
|
|
||||||
queues_to_clear_options = params.get('queues_to_clear_options', [])
|
queues_to_clear_options = params.get('queues_to_clear_options', [])
|
||||||
confirm_clear = params.get('confirm_clear', False)
|
confirm_clear = params.get('confirm_clear', False)
|
||||||
dump_queues = params['dump_queues']
|
dump_queues = params['dump_queues']
|
||||||
@ -386,50 +397,77 @@ def check_status_callable(**context):
|
|||||||
ti = context['task_instance']
|
ti = context['task_instance']
|
||||||
logger.info(f"Task '{ti.task_id}' running on queue '{ti.queue}'.")
|
logger.info(f"Task '{ti.task_id}' running on queue '{ti.queue}'.")
|
||||||
redis_conn_id = params['redis_conn_id']
|
redis_conn_id = params['redis_conn_id']
|
||||||
queue_name = params.get('queue_base_name', DEFAULT_QUEUE_NAME)
|
queue_system = params.get('queue_system', 'v1_monolithic')
|
||||||
queue_suffixes = ['_inbox', '_progress', '_result', '_fail']
|
|
||||||
|
queue_base_names_to_check = []
|
||||||
|
if queue_system == 'v1_monolithic':
|
||||||
|
queue_base_names_to_check.append(params.get('queue_base_name', DEFAULT_QUEUE_NAME))
|
||||||
|
elif queue_system.startswith('v2_'):
|
||||||
|
# For v2, always check both auth and dl queues for a complete picture.
|
||||||
|
queue_base_names_to_check.extend(['queue2_auth', 'queue2_dl'])
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Invalid queue_system: {queue_system}")
|
||||||
|
|
||||||
logger.info(f"--- Checking Status for Queues with Base Name: '{queue_name}' ---")
|
queue_suffixes = ['_inbox', '_progress', '_result', '_fail']
|
||||||
|
|
||||||
|
logger.info(f"--- Checking Status for Queue System: '{queue_system}' ---")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
redis_client = _get_redis_client(redis_conn_id)
|
redis_client = _get_redis_client(redis_conn_id)
|
||||||
|
|
||||||
for suffix in queue_suffixes:
|
for queue_name in queue_base_names_to_check:
|
||||||
queue_to_check = f"{queue_name}{suffix}"
|
logger.info(f"--- Base Name: '{queue_name}' ---")
|
||||||
key_type = redis_client.type(queue_to_check).decode('utf-8')
|
for suffix in queue_suffixes:
|
||||||
size = 0
|
queue_to_check = f"{queue_name}{suffix}"
|
||||||
if key_type == 'list':
|
key_type = redis_client.type(queue_to_check).decode('utf-8')
|
||||||
size = redis_client.llen(queue_to_check)
|
size = 0
|
||||||
elif key_type == 'hash':
|
if key_type == 'list':
|
||||||
size = redis_client.hlen(queue_to_check)
|
size = redis_client.llen(queue_to_check)
|
||||||
|
elif key_type == 'hash':
|
||||||
|
size = redis_client.hlen(queue_to_check)
|
||||||
|
|
||||||
if key_type != 'none':
|
if key_type != 'none':
|
||||||
logger.info(f" - Queue '{queue_to_check}': Type='{key_type.upper()}', Size={size}")
|
logger.info(f" - Queue '{queue_to_check}': Type='{key_type.upper()}', Size={size}")
|
||||||
else:
|
else:
|
||||||
logger.info(f" - Queue '{queue_to_check}': Does not exist.")
|
logger.info(f" - Queue '{queue_to_check}': Does not exist.")
|
||||||
|
|
||||||
logger.info(f"--- End of Status Check ---")
|
logger.info(f"--- End of Status Check ---")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to check queue status for base name '{queue_name}': {e}", exc_info=True)
|
logger.error(f"Failed to check queue status for system '{queue_system}': {e}", exc_info=True)
|
||||||
raise AirflowException(f"Failed to check queue status: {e}")
|
raise AirflowException(f"Failed to check queue status: {e}")
|
||||||
|
|
||||||
|
|
||||||
def requeue_failed_callable(**context):
|
def requeue_failed_callable(**context):
|
||||||
"""
|
"""
|
||||||
Copies all URLs from the fail hash to the inbox list and optionally clears the fail hash.
|
Copies all URLs from the fail hash to the inbox list and optionally clears the fail hash.
|
||||||
|
Adapts behavior for v1 and v2 queue systems.
|
||||||
"""
|
"""
|
||||||
params = context['params']
|
params = context['params']
|
||||||
ti = context['task_instance']
|
ti = context['task_instance']
|
||||||
logger.info(f"Task '{ti.task_id}' running on queue '{ti.queue}'.")
|
logger.info(f"Task '{ti.task_id}' running on queue '{ti.queue}'.")
|
||||||
redis_conn_id = params['redis_conn_id']
|
redis_conn_id = params['redis_conn_id']
|
||||||
queue_name = params['queue_base_name']
|
|
||||||
clear_fail_queue = params['clear_fail_queue_after_requeue']
|
clear_fail_queue = params['clear_fail_queue_after_requeue']
|
||||||
|
queue_system = params.get('queue_system', 'v1_monolithic')
|
||||||
|
|
||||||
fail_queue_name = f"{queue_name}_fail"
|
fail_queue_name = ""
|
||||||
inbox_queue_name = f"{queue_name}_inbox"
|
inbox_queue_name = ""
|
||||||
|
|
||||||
logger.info(f"Requeuing failed URLs from '{fail_queue_name}' to '{inbox_queue_name}'.")
|
if queue_system == 'v1_monolithic':
|
||||||
|
queue_name = params['queue_base_name']
|
||||||
|
fail_queue_name = f"{queue_name}_fail"
|
||||||
|
inbox_queue_name = f"{queue_name}_inbox"
|
||||||
|
elif queue_system == 'v2_separated_auth':
|
||||||
|
fail_queue_name = "queue2_auth_fail"
|
||||||
|
inbox_queue_name = "queue2_auth_inbox"
|
||||||
|
elif queue_system == 'v2_separated_dl':
|
||||||
|
fail_queue_name = "queue2_dl_fail"
|
||||||
|
# DL failures must be re-authenticated, so they go back to the auth inbox.
|
||||||
|
inbox_queue_name = "queue2_auth_inbox"
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Invalid queue_system: {queue_system}")
|
||||||
|
|
||||||
|
logger.info(f"Requeuing failed URLs from '{fail_queue_name}' to '{inbox_queue_name}' (system: {queue_system}).")
|
||||||
|
|
||||||
redis_client = _get_redis_client(redis_conn_id)
|
redis_client = _get_redis_client(redis_conn_id)
|
||||||
|
|
||||||
@ -478,7 +516,15 @@ def add_videos_to_queue_callable(**context):
|
|||||||
params = context["params"]
|
params = context["params"]
|
||||||
ti = context['task_instance']
|
ti = context['task_instance']
|
||||||
logger.info(f"Task '{ti.task_id}' running on queue '{ti.queue}'.")
|
logger.info(f"Task '{ti.task_id}' running on queue '{ti.queue}'.")
|
||||||
queue_name = params["queue_base_name"]
|
|
||||||
|
queue_system = params.get('queue_system', 'v1_monolithic')
|
||||||
|
if queue_system.startswith('v2_'):
|
||||||
|
# For v2 systems, raw URLs are always added to the auth queue.
|
||||||
|
queue_name = 'queue2_auth'
|
||||||
|
logger.info(f"Queue system is '{queue_system}'. Adding URLs to '{queue_name}_inbox'.")
|
||||||
|
else:
|
||||||
|
queue_name = params["queue_base_name"]
|
||||||
|
|
||||||
redis_conn_id = params["redis_conn_id"]
|
redis_conn_id = params["redis_conn_id"]
|
||||||
dry_run = params["dry_run"]
|
dry_run = params["dry_run"]
|
||||||
|
|
||||||
@ -565,11 +611,18 @@ with DAG(
|
|||||||
title="Action",
|
title="Action",
|
||||||
description="The management action to perform.",
|
description="The management action to perform.",
|
||||||
),
|
),
|
||||||
|
"queue_system": Param(
|
||||||
|
"v1_monolithic",
|
||||||
|
type="string",
|
||||||
|
enum=["v1_monolithic", "v2_separated_auth", "v2_separated_dl"],
|
||||||
|
title="Queue System",
|
||||||
|
description="Select the target queue system to manage. This choice affects which queues are targeted by actions.",
|
||||||
|
),
|
||||||
"queue_base_name": Param(
|
"queue_base_name": Param(
|
||||||
DEFAULT_QUEUE_NAME,
|
DEFAULT_QUEUE_NAME,
|
||||||
type="string",
|
type="string",
|
||||||
title="Queue Base Name",
|
title="Queue Base Name (v1 only)",
|
||||||
description="Base name for queues used in actions like 'add_videos', 'check_status', 'clear_queue', 'requeue_failed'.",
|
description="Base name for queues. Only used when 'Queue System' is 'v1_monolithic'.",
|
||||||
),
|
),
|
||||||
# --- Params for 'add_videos' ---
|
# --- Params for 'add_videos' ---
|
||||||
"input_source": Param(
|
"input_source": Param(
|
||||||
@ -644,7 +697,7 @@ with DAG(
|
|||||||
),
|
),
|
||||||
# --- Params for 'list_contents' ---
|
# --- Params for 'list_contents' ---
|
||||||
"queue_to_list": Param(
|
"queue_to_list": Param(
|
||||||
'video_queue_inbox,video_queue_result,video_queue_fail',
|
'video_queue_inbox,queue2_auth_inbox,queue2_dl_result',
|
||||||
type="string",
|
type="string",
|
||||||
title="[list_contents] Queues to List",
|
title="[list_contents] Queues to List",
|
||||||
description="Comma-separated list of exact Redis key names to list.",
|
description="Comma-separated list of exact Redis key names to list.",
|
||||||
|
|||||||
@ -22,6 +22,7 @@ from datetime import datetime
|
|||||||
from airflow.decorators import task
|
from airflow.decorators import task
|
||||||
from airflow.models import Variable
|
from airflow.models import Variable
|
||||||
from airflow.models.dag import DAG
|
from airflow.models.dag import DAG
|
||||||
|
from airflow.models.param import Param
|
||||||
from airflow.utils.dates import days_ago
|
from airflow.utils.dates import days_ago
|
||||||
|
|
||||||
# Import utility functions and Thrift modules
|
# Import utility functions and Thrift modules
|
||||||
@ -42,7 +43,7 @@ DEFAULT_ARGS = {
|
|||||||
'owner': 'airflow',
|
'owner': 'airflow',
|
||||||
'retries': 1,
|
'retries': 1,
|
||||||
'retry_delay': 30,
|
'retry_delay': 30,
|
||||||
'queue': 'maintenance',
|
'queue': 'default',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -61,38 +62,76 @@ def _get_thrift_client(host, port, timeout=60):
|
|||||||
|
|
||||||
|
|
||||||
@task
|
@task
|
||||||
def manage_account_states():
|
def manage_account_states(**context):
|
||||||
"""
|
"""
|
||||||
Fetches all account statuses and performs necessary state transitions.
|
Fetches all account statuses and performs necessary state transitions
|
||||||
|
based on time durations configured in the DAG parameters.
|
||||||
"""
|
"""
|
||||||
|
params = context['params']
|
||||||
|
requests_limit = params['account_requests_limit']
|
||||||
|
cooldown_duration_s = params['account_cooldown_duration_min'] * 60
|
||||||
|
ban_duration_s = params['account_ban_duration_hours'] * 3600
|
||||||
|
|
||||||
host = DEFAULT_YT_AUTH_SERVICE_IP
|
host = DEFAULT_YT_AUTH_SERVICE_IP
|
||||||
port = int(DEFAULT_YT_AUTH_SERVICE_PORT)
|
port = int(DEFAULT_YT_AUTH_SERVICE_PORT)
|
||||||
redis_conn_id = DEFAULT_REDIS_CONN_ID
|
redis_conn_id = DEFAULT_REDIS_CONN_ID
|
||||||
|
logger.info(f"Starting account maintenance. Service: {host}:{port}, Redis: {redis_conn_id}")
|
||||||
|
logger.info(f"Using limits: Requests={requests_limit}, Cooldown={params['account_cooldown_duration_min']}m, Ban={params['account_ban_duration_hours']}h")
|
||||||
|
|
||||||
client, transport = None, None
|
client, transport = None, None
|
||||||
try:
|
try:
|
||||||
client, transport = _get_thrift_client(host, port)
|
client, transport = _get_thrift_client(host, port)
|
||||||
redis_client = _get_redis_client(redis_conn_id)
|
redis_client = _get_redis_client(redis_conn_id)
|
||||||
|
|
||||||
logger.info("Fetching all account statuses from the service...")
|
logger.info(f"--- Step 1: Fetching all account statuses from the ytdlp-ops-server at {host}:{port}... ---")
|
||||||
all_accounts = client.getAccountStatus(accountPrefix=None)
|
all_accounts = client.getAccountStatus(accountId=None, accountPrefix=None)
|
||||||
logger.info(f"Found {len(all_accounts)} accounts to process.")
|
logger.info(f"Found {len(all_accounts)} total accounts to process.")
|
||||||
|
|
||||||
accounts_to_unban = []
|
accounts_to_unban = []
|
||||||
accounts_to_activate = []
|
accounts_to_activate = []
|
||||||
accounts_to_rest = []
|
accounts_to_rest = []
|
||||||
|
|
||||||
|
now_ts = int(time.time())
|
||||||
|
|
||||||
for acc in all_accounts:
|
for acc in all_accounts:
|
||||||
if acc.status == "BANNED (expired)":
|
# Thrift can return 0 for unset integer fields.
|
||||||
|
# The AccountStatus thrift object is missing status_changed_timestamp and active_since_timestamp.
|
||||||
|
# We use available timestamps as proxies.
|
||||||
|
last_failure_ts = int(acc.lastFailureTimestamp or 0)
|
||||||
|
last_success_ts = int(acc.lastSuccessTimestamp or 0)
|
||||||
|
last_usage_ts = max(last_failure_ts, last_success_ts)
|
||||||
|
|
||||||
|
if acc.status == "BANNED" and last_failure_ts > 0 and (now_ts - last_failure_ts) >= ban_duration_s:
|
||||||
accounts_to_unban.append(acc.accountId)
|
accounts_to_unban.append(acc.accountId)
|
||||||
elif acc.status == "RESTING (expired)":
|
elif acc.status == "RESTING" and last_usage_ts > 0 and (now_ts - last_usage_ts) >= cooldown_duration_s:
|
||||||
accounts_to_activate.append(acc.accountId)
|
accounts_to_activate.append(acc.accountId)
|
||||||
elif acc.status == "ACTIVE (should be resting)":
|
elif acc.status == "ACTIVE":
|
||||||
accounts_to_rest.append(acc.accountId)
|
# For ACTIVE -> RESTING, check how many requests have been made since activation.
|
||||||
|
count_at_activation_raw = redis_client.hget(f"account_status:{acc.accountId}", "success_count_at_activation")
|
||||||
|
|
||||||
|
if count_at_activation_raw is not None:
|
||||||
|
count_at_activation = int(count_at_activation_raw)
|
||||||
|
current_success_count = acc.successCount or 0
|
||||||
|
requests_made = current_success_count - count_at_activation
|
||||||
|
|
||||||
|
if requests_made >= requests_limit:
|
||||||
|
logger.info(f"Account {acc.accountId} reached request limit ({requests_made}/{requests_limit}). Moving to RESTING.")
|
||||||
|
accounts_to_rest.append(acc.accountId)
|
||||||
|
else:
|
||||||
|
# This is a fallback for accounts that were activated before this logic was deployed.
|
||||||
|
# We can activate them "fresh" by setting their baseline count now.
|
||||||
|
logger.info(f"Account {acc.accountId} is ACTIVE but has no 'success_count_at_activation'. Setting it now.")
|
||||||
|
redis_client.hset(f"account_status:{acc.accountId}", "success_count_at_activation", acc.successCount or 0)
|
||||||
|
|
||||||
|
logger.info("--- Step 2: Analyzing accounts for state transitions ---")
|
||||||
|
logger.info(f"Found {len(accounts_to_unban)} accounts with expired bans to un-ban.")
|
||||||
|
logger.info(f"Found {len(accounts_to_activate)} accounts with expired rest periods to activate.")
|
||||||
|
logger.info(f"Found {len(accounts_to_rest)} accounts with expired active periods to put to rest.")
|
||||||
|
|
||||||
# --- Perform State Transitions ---
|
# --- Perform State Transitions ---
|
||||||
|
|
||||||
# 1. Un-ban accounts via Thrift call
|
# 1. Un-ban accounts via Thrift call
|
||||||
|
logger.info("--- Step 3: Processing un-bans ---")
|
||||||
if accounts_to_unban:
|
if accounts_to_unban:
|
||||||
logger.info(f"Un-banning {len(accounts_to_unban)} accounts: {accounts_to_unban}")
|
logger.info(f"Un-banning {len(accounts_to_unban)} accounts: {accounts_to_unban}")
|
||||||
for acc_id in accounts_to_unban:
|
for acc_id in accounts_to_unban:
|
||||||
@ -101,21 +140,30 @@ def manage_account_states():
|
|||||||
logger.info(f"Successfully un-banned account '{acc_id}'.")
|
logger.info(f"Successfully un-banned account '{acc_id}'.")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to un-ban account '{acc_id}': {e}")
|
logger.error(f"Failed to un-ban account '{acc_id}': {e}")
|
||||||
|
else:
|
||||||
|
logger.info("No accounts to un-ban.")
|
||||||
|
|
||||||
# 2. Activate resting accounts via direct Redis write
|
# 2. Activate resting accounts via direct Redis write
|
||||||
|
logger.info("--- Step 4: Processing activations ---")
|
||||||
if accounts_to_activate:
|
if accounts_to_activate:
|
||||||
logger.info(f"Activating {len(accounts_to_activate)} accounts: {accounts_to_activate}")
|
logger.info(f"Activating {len(accounts_to_activate)} accounts: {accounts_to_activate}")
|
||||||
now_ts = int(time.time())
|
now_ts = int(time.time())
|
||||||
|
account_map = {acc.accountId: acc for acc in all_accounts}
|
||||||
with redis_client.pipeline() as pipe:
|
with redis_client.pipeline() as pipe:
|
||||||
for acc_id in accounts_to_activate:
|
for acc_id in accounts_to_activate:
|
||||||
key = f"account_status:{acc_id}"
|
key = f"account_status:{acc_id}"
|
||||||
|
current_success_count = account_map[acc_id].successCount or 0
|
||||||
pipe.hset(key, "status", "ACTIVE")
|
pipe.hset(key, "status", "ACTIVE")
|
||||||
pipe.hset(key, "active_since_timestamp", now_ts)
|
pipe.hset(key, "active_since_timestamp", now_ts)
|
||||||
pipe.hset(key, "status_changed_timestamp", now_ts)
|
pipe.hset(key, "status_changed_timestamp", now_ts)
|
||||||
|
pipe.hset(key, "success_count_at_activation", current_success_count)
|
||||||
pipe.execute()
|
pipe.execute()
|
||||||
logger.info("Finished activating accounts.")
|
logger.info("Finished activating accounts.")
|
||||||
|
else:
|
||||||
|
logger.info("No accounts to activate.")
|
||||||
|
|
||||||
# 3. Rest active accounts via direct Redis write
|
# 3. Rest active accounts via direct Redis write
|
||||||
|
logger.info("--- Step 5: Processing rests ---")
|
||||||
if accounts_to_rest:
|
if accounts_to_rest:
|
||||||
logger.info(f"Putting {len(accounts_to_rest)} accounts to rest: {accounts_to_rest}")
|
logger.info(f"Putting {len(accounts_to_rest)} accounts to rest: {accounts_to_rest}")
|
||||||
now_ts = int(time.time())
|
now_ts = int(time.time())
|
||||||
@ -124,8 +172,13 @@ def manage_account_states():
|
|||||||
key = f"account_status:{acc_id}"
|
key = f"account_status:{acc_id}"
|
||||||
pipe.hset(key, "status", "RESTING")
|
pipe.hset(key, "status", "RESTING")
|
||||||
pipe.hset(key, "status_changed_timestamp", now_ts)
|
pipe.hset(key, "status_changed_timestamp", now_ts)
|
||||||
|
pipe.hdel(key, "success_count_at_activation")
|
||||||
pipe.execute()
|
pipe.execute()
|
||||||
logger.info("Finished putting accounts to rest.")
|
logger.info("Finished putting accounts to rest.")
|
||||||
|
else:
|
||||||
|
logger.info("No accounts to put to rest.")
|
||||||
|
|
||||||
|
logger.info("--- Account maintenance run complete. ---")
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
if transport and transport.isOpen():
|
if transport and transport.isOpen():
|
||||||
@ -139,6 +192,47 @@ with DAG(
|
|||||||
start_date=days_ago(1),
|
start_date=days_ago(1),
|
||||||
catchup=False,
|
catchup=False,
|
||||||
tags=['ytdlp', 'maintenance'],
|
tags=['ytdlp', 'maintenance'],
|
||||||
doc_md=__doc__,
|
doc_md="""
|
||||||
|
### YT-DLP Account Maintenance: Time-Based State Transitions
|
||||||
|
|
||||||
|
This DAG is the central authority for automated, **time-based** state management for ytdlp-ops accounts.
|
||||||
|
It runs periodically to fetch the status of all accounts and applies its own logic to determine if an account's state should change based on configurable time durations.
|
||||||
|
|
||||||
|
The thresholds are defined as DAG parameters and can be configured via the Airflow UI:
|
||||||
|
- **Requests Limit**: How many successful requests an account can perform before it needs to rest.
|
||||||
|
- **Cooldown Duration**: How long an account must rest before it can be used again.
|
||||||
|
- **Ban Duration**: How long a ban lasts before the account is automatically un-banned.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
#### Separation of Concerns: Time vs. Errors
|
||||||
|
|
||||||
|
It is critical to understand that this DAG primarily handles time-based state changes. Error-based banning may be handled by worker DAGs during URL processing. This separation ensures that maintenance is predictable and based on timers, while acute, error-driven actions are handled immediately by the workers that encounter them.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
#### State Transitions Performed by This DAG:
|
||||||
|
|
||||||
|
On each run, this DAG fetches the raw status and timestamps for all accounts and performs the following checks:
|
||||||
|
|
||||||
|
1. **Un-banning (`BANNED` -> `ACTIVE`)**:
|
||||||
|
- **Condition**: An account has been in the `BANNED` state for longer than the configured `account_ban_duration_hours`.
|
||||||
|
- **Action**: The DAG calls the `unbanAccount` service endpoint to lift the ban.
|
||||||
|
|
||||||
|
2. **Activation (`RESTING` -> `ACTIVE`)**:
|
||||||
|
- **Condition**: An account has been in the `RESTING` state for longer than the configured `account_cooldown_duration_min`.
|
||||||
|
- **Action**: The DAG updates the account's status to `ACTIVE` directly in Redis.
|
||||||
|
|
||||||
|
3. **Resting (`ACTIVE` -> `RESTING`)**:
|
||||||
|
- **Condition**: An account has performed more successful requests than the configured `account_requests_limit` since it was last activated.
|
||||||
|
- **Action**: The DAG updates the account's status to `RESTING` directly in Redis.
|
||||||
|
|
||||||
|
This process gives full control over time-based account lifecycle management to the Airflow orchestrator.
|
||||||
|
""",
|
||||||
|
params={
|
||||||
|
'account_requests_limit': Param(250, type="integer", description="Number of successful requests an account can make before it is rested."),
|
||||||
|
'account_cooldown_duration_min': Param(60, type="integer", description="Duration in minutes an account must rest before being activated again. Default is 1 hour."),
|
||||||
|
'account_ban_duration_hours': Param(24, type="integer", description="Duration in hours an account stays banned before it can be un-banned."),
|
||||||
|
}
|
||||||
) as dag:
|
) as dag:
|
||||||
manage_account_states()
|
manage_account_states()
|
||||||
|
|||||||
@ -72,16 +72,16 @@ def dispatch_url_to_worker(**context):
|
|||||||
# The hook will parse the queue name from the run_id itself.
|
# The hook will parse the queue name from the run_id itself.
|
||||||
run_id = f"worker_run_{context['dag_run'].run_id}_{context['ts_nodash']}_q_{worker_queue}"
|
run_id = f"worker_run_{context['dag_run'].run_id}_{context['ts_nodash']}_q_{worker_queue}"
|
||||||
|
|
||||||
logger.info(f"Triggering 'ytdlp_ops_worker_per_url' with run_id '{run_id}'")
|
logger.info(f"Triggering 'ytdlp_ops_v01_worker_per_url' with run_id '{run_id}'")
|
||||||
trigger_dag(
|
trigger_dag(
|
||||||
dag_id='ytdlp_ops_worker_per_url',
|
dag_id='ytdlp_ops_v01_worker_per_url',
|
||||||
run_id=run_id,
|
run_id=run_id,
|
||||||
conf=conf_to_pass,
|
conf=conf_to_pass,
|
||||||
replace_microseconds=False
|
replace_microseconds=False
|
||||||
)
|
)
|
||||||
|
|
||||||
with DAG(
|
with DAG(
|
||||||
dag_id='ytdlp_ops_dispatcher',
|
dag_id='ytdlp_ops_v01_dispatcher',
|
||||||
default_args={'owner': 'airflow', 'retries': 0},
|
default_args={'owner': 'airflow', 'retries': 0},
|
||||||
schedule=None, # This DAG is only triggered by the orchestrator.
|
schedule=None, # This DAG is only triggered by the orchestrator.
|
||||||
start_date=days_ago(1),
|
start_date=days_ago(1),
|
||||||
@ -94,10 +94,10 @@ with DAG(
|
|||||||
1. It pulls a single URL from the Redis `_inbox` queue.
|
1. It pulls a single URL from the Redis `_inbox` queue.
|
||||||
2. It runs on the generic `queue-dl` to find any available worker.
|
2. It runs on the generic `queue-dl` to find any available worker.
|
||||||
3. It determines the worker's hostname and constructs a dedicated queue name (e.g., `queue-dl-dl-worker-1`).
|
3. It determines the worker's hostname and constructs a dedicated queue name (e.g., `queue-dl-dl-worker-1`).
|
||||||
4. It triggers the `ytdlp_ops_worker_per_url` DAG, passing the URL and the dedicated queue name in the configuration.
|
4. It triggers the `ytdlp_ops_v01_worker_per_url` DAG, passing the URL and the dedicated queue name in the configuration.
|
||||||
|
|
||||||
This dispatcher-led affinity, combined with the `task_instance_mutation_hook` cluster policy, ensures that all subsequent processing for that URL happens on the same machine.
|
This dispatcher-led affinity, combined with the `task_instance_mutation_hook` cluster policy, ensures that all subsequent processing for that URL happens on the same machine.
|
||||||
The `ytdlp_ops_orchestrator` is used to trigger a batch of these dispatcher runs.
|
The `ytdlp_ops_v01_orchestrator` is used to trigger a batch of these dispatcher runs.
|
||||||
""",
|
""",
|
||||||
# All params are passed through from the orchestrator
|
# All params are passed through from the orchestrator
|
||||||
render_template_as_native_obj=True,
|
render_template_as_native_obj=True,
|
||||||
444
airflow/dags/ytdlp_ops_v01_orchestrator.py
Normal file
444
airflow/dags/ytdlp_ops_v01_orchestrator.py
Normal file
@ -0,0 +1,444 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# vim:fenc=utf-8
|
||||||
|
#
|
||||||
|
# Copyright © 2024 rl <rl@rlmbp>
|
||||||
|
#
|
||||||
|
# Distributed under terms of the MIT license.
|
||||||
|
|
||||||
|
"""
|
||||||
|
DAG to orchestrate ytdlp_ops_dispatcher DAG runs based on a defined policy.
|
||||||
|
It fetches URLs from a Redis queue and launches dispatchers in controlled bunches,
|
||||||
|
which in turn trigger workers with affinity.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from airflow import DAG
|
||||||
|
from airflow.exceptions import AirflowException, AirflowSkipException
|
||||||
|
from airflow.operators.python import PythonOperator
|
||||||
|
from airflow.models.param import Param
|
||||||
|
from airflow.models.variable import Variable
|
||||||
|
from airflow.utils.dates import days_ago
|
||||||
|
from airflow.api.common.trigger_dag import trigger_dag
|
||||||
|
from airflow.models.dagrun import DagRun
|
||||||
|
from airflow.models.dag import DagModel
|
||||||
|
from datetime import timedelta
|
||||||
|
import logging
|
||||||
|
import random
|
||||||
|
import time
|
||||||
|
import json
|
||||||
|
|
||||||
|
# Import utility functions
|
||||||
|
from utils.redis_utils import _get_redis_client
|
||||||
|
|
||||||
|
# Import Thrift modules for proxy status check
|
||||||
|
from pangramia.yt.tokens_ops import YTTokenOpService
|
||||||
|
from thrift.protocol import TBinaryProtocol
|
||||||
|
from thrift.transport import TSocket, TTransport
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
DEFAULT_REQUEST_PARAMS_JSON = """{
|
||||||
|
"context_reuse_policy": {
|
||||||
|
"enabled": true,
|
||||||
|
"max_age_seconds": 86400,
|
||||||
|
"reuse_visitor_id": true,
|
||||||
|
"reuse_cookies": true
|
||||||
|
},
|
||||||
|
"token_generation_strategy": {
|
||||||
|
"youtubei_js": {
|
||||||
|
"generate_po_token": true,
|
||||||
|
"generate_gvs_token": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"ytdlp_params": {
|
||||||
|
"use_curl_prefetch": false,
|
||||||
|
"token_supplement_strategy": {
|
||||||
|
"youtubepot_bgutilhttp_extractor": {
|
||||||
|
"enabled": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"visitor_id_override": {
|
||||||
|
"enabled": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"session_params": {
|
||||||
|
"lang": "en-US",
|
||||||
|
"location": "US",
|
||||||
|
"deviceCategory": "MOBILE",
|
||||||
|
"user_agents": {
|
||||||
|
"youtubei_js": "Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)",
|
||||||
|
"yt_dlp": "Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}"""
|
||||||
|
|
||||||
|
# Default settings
|
||||||
|
DEFAULT_QUEUE_NAME = 'video_queue'
|
||||||
|
DEFAULT_REDIS_CONN_ID = 'redis_default'
|
||||||
|
DEFAULT_TOTAL_WORKERS = 3
|
||||||
|
DEFAULT_WORKERS_PER_BUNCH = 1
|
||||||
|
DEFAULT_WORKER_DELAY_S = 5
|
||||||
|
DEFAULT_BUNCH_DELAY_S = 20
|
||||||
|
|
||||||
|
DEFAULT_YT_AUTH_SERVICE_IP = Variable.get("YT_AUTH_SERVICE_IP", default_var="172.17.0.1")
|
||||||
|
DEFAULT_YT_AUTH_SERVICE_PORT = Variable.get("YT_AUTH_SERVICE_PORT", default_var=9080)
|
||||||
|
|
||||||
|
# --- Helper Functions ---
|
||||||
|
|
||||||
|
def _check_application_queue(redis_client, queue_base_name: str) -> int:
|
||||||
|
"""Checks and logs the length of the application's inbox queue."""
|
||||||
|
inbox_queue_name = f"{queue_base_name}_inbox"
|
||||||
|
logger.info(f"--- Checking Application Work Queue ---")
|
||||||
|
try:
|
||||||
|
q_len = redis_client.llen(inbox_queue_name)
|
||||||
|
logger.info(f"Application work queue '{inbox_queue_name}' has {q_len} item(s).")
|
||||||
|
return q_len
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to check application queue '{inbox_queue_name}': {e}", exc_info=True)
|
||||||
|
return -1 # Indicate an error
|
||||||
|
|
||||||
|
def _inspect_celery_queues(redis_client, queue_names: list):
|
||||||
|
"""Inspects Celery queues in Redis and logs their status."""
|
||||||
|
logger.info("--- Inspecting Celery Queues in Redis ---")
|
||||||
|
for queue_name in queue_names:
|
||||||
|
try:
|
||||||
|
q_len = redis_client.llen(queue_name)
|
||||||
|
logger.info(f"Queue '{queue_name}': Length = {q_len}")
|
||||||
|
|
||||||
|
if q_len > 0:
|
||||||
|
logger.info(f"Showing up to 10 tasks in '{queue_name}':")
|
||||||
|
# Fetch up to 10 items from the start of the list (queue)
|
||||||
|
items_bytes = redis_client.lrange(queue_name, 0, 9)
|
||||||
|
for i, item_bytes in enumerate(items_bytes):
|
||||||
|
try:
|
||||||
|
# Celery tasks are JSON-encoded strings
|
||||||
|
task_data = json.loads(item_bytes.decode('utf-8'))
|
||||||
|
# Pretty print for readability in logs
|
||||||
|
pretty_task_data = json.dumps(task_data, indent=2)
|
||||||
|
logger.info(f" Task {i+1}:\n{pretty_task_data}")
|
||||||
|
except (json.JSONDecodeError, UnicodeDecodeError) as e:
|
||||||
|
logger.warning(f" Task {i+1}: Could not decode/parse task data. Error: {e}. Raw: {item_bytes!r}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to inspect queue '{queue_name}': {e}", exc_info=True)
|
||||||
|
logger.info("--- End of Queue Inspection ---")
|
||||||
|
|
||||||
|
|
||||||
|
# --- Main Orchestration Callable ---
|
||||||
|
|
||||||
|
def orchestrate_workers_ignition_callable(**context):
|
||||||
|
"""
|
||||||
|
Main orchestration logic. Triggers a specified number of dispatcher DAGs
|
||||||
|
to initiate self-sustaining processing loops.
|
||||||
|
"""
|
||||||
|
params = context['params']
|
||||||
|
ti = context['task_instance']
|
||||||
|
logger.info(f"Orchestrator task '{ti.task_id}' running on queue '{ti.queue}'.")
|
||||||
|
logger.info("Starting dispatcher ignition sequence.")
|
||||||
|
|
||||||
|
dispatcher_dag_id = 'ytdlp_ops_v01_dispatcher'
|
||||||
|
worker_queue = 'queue-dl'
|
||||||
|
app_queue_name = params['queue_name']
|
||||||
|
|
||||||
|
logger.info(f"Running in v1 (monolithic) mode. Dispatcher DAG: '{dispatcher_dag_id}', Worker Queue: '{worker_queue}'")
|
||||||
|
|
||||||
|
dag_model = DagModel.get_dagmodel(dispatcher_dag_id)
|
||||||
|
if dag_model and dag_model.is_paused:
|
||||||
|
logger.warning(f"Dispatcher DAG '{dispatcher_dag_id}' is paused. Skipping dispatcher ignition.")
|
||||||
|
raise AirflowSkipException(f"Dispatcher DAG '{dispatcher_dag_id}' is paused.")
|
||||||
|
|
||||||
|
total_workers = int(params['total_workers'])
|
||||||
|
workers_per_bunch = int(params['workers_per_bunch'])
|
||||||
|
|
||||||
|
# --- Input Validation ---
|
||||||
|
if total_workers <= 0:
|
||||||
|
logger.warning(f"'total_workers' is {total_workers}. No workers will be started. Skipping ignition.")
|
||||||
|
raise AirflowSkipException(f"No workers to start (total_workers={total_workers}).")
|
||||||
|
|
||||||
|
if workers_per_bunch <= 0:
|
||||||
|
logger.error(f"'workers_per_bunch' must be a positive integer, but got {workers_per_bunch}. Aborting.")
|
||||||
|
raise AirflowException(f"'workers_per_bunch' must be a positive integer, but got {workers_per_bunch}.")
|
||||||
|
# --- End Input Validation ---
|
||||||
|
|
||||||
|
worker_delay = int(params['delay_between_workers_s'])
|
||||||
|
bunch_delay = int(params['delay_between_bunches_s'])
|
||||||
|
|
||||||
|
# Create a list of worker numbers to trigger
|
||||||
|
worker_indices = list(range(total_workers))
|
||||||
|
bunches = [worker_indices[i:i + workers_per_bunch] for i in range(0, len(worker_indices), workers_per_bunch)]
|
||||||
|
|
||||||
|
# --- Inspect Queues before starting ---
|
||||||
|
try:
|
||||||
|
redis_conn_id = params.get('redis_conn_id', DEFAULT_REDIS_CONN_ID)
|
||||||
|
redis_client = _get_redis_client(redis_conn_id)
|
||||||
|
|
||||||
|
# First, check the application queue for work
|
||||||
|
app_queue_len = _check_application_queue(redis_client, app_queue_name)
|
||||||
|
|
||||||
|
if params.get('skip_if_queue_empty') and app_queue_len == 0:
|
||||||
|
logger.info("'skip_if_queue_empty' is True and application queue is empty. Skipping worker ignition.")
|
||||||
|
raise AirflowSkipException("Application work queue is empty.")
|
||||||
|
|
||||||
|
# Then, inspect the target Celery queue for debugging
|
||||||
|
_inspect_celery_queues(redis_client, [worker_queue])
|
||||||
|
except AirflowSkipException:
|
||||||
|
raise # Re-raise to let Airflow handle the skip
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Could not inspect queues due to an error: {e}. Continuing with ignition sequence.")
|
||||||
|
# --- End of Inspection ---
|
||||||
|
|
||||||
|
logger.info(f"Plan: Triggering {total_workers} total dispatcher runs in {len(bunches)} bunches. Each run will attempt to process one URL.")
|
||||||
|
|
||||||
|
dag_run_id = context['dag_run'].run_id
|
||||||
|
total_triggered = 0
|
||||||
|
|
||||||
|
for i, bunch in enumerate(bunches):
|
||||||
|
logger.info(f"--- Triggering Bunch {i+1}/{len(bunches)} (contains {len(bunch)} dispatcher(s)) ---")
|
||||||
|
for j, _ in enumerate(bunch):
|
||||||
|
# Create a unique run_id for each dispatcher run
|
||||||
|
run_id = f"dispatched_{dag_run_id}_{total_triggered}"
|
||||||
|
|
||||||
|
# Pass all orchestrator params to the dispatcher, which will then pass them to the worker.
|
||||||
|
conf_to_pass = {p: params[p] for p in params}
|
||||||
|
|
||||||
|
logger.info(f"Triggering dispatcher {j+1}/{len(bunch)} in bunch {i+1} (run {total_triggered + 1}/{total_workers}) (Run ID: {run_id})")
|
||||||
|
logger.debug(f"Full conf for dispatcher run {run_id}: {conf_to_pass}")
|
||||||
|
|
||||||
|
trigger_dag(
|
||||||
|
dag_id=dispatcher_dag_id,
|
||||||
|
run_id=run_id,
|
||||||
|
conf=conf_to_pass,
|
||||||
|
replace_microseconds=False
|
||||||
|
)
|
||||||
|
total_triggered += 1
|
||||||
|
|
||||||
|
# Delay between dispatches in a bunch
|
||||||
|
if j < len(bunch) - 1:
|
||||||
|
logger.info(f"Waiting {worker_delay}s before next dispatcher in bunch...")
|
||||||
|
time.sleep(worker_delay)
|
||||||
|
|
||||||
|
# Delay between bunches
|
||||||
|
if i < len(bunches) - 1:
|
||||||
|
logger.info(f"--- Bunch {i+1} triggered. Waiting {bunch_delay}s before next bunch... ---")
|
||||||
|
time.sleep(bunch_delay)
|
||||||
|
|
||||||
|
logger.info(f"--- Ignition sequence complete. Total dispatcher runs triggered: {total_triggered}. ---")
|
||||||
|
|
||||||
|
# --- Final Queue Inspection ---
|
||||||
|
final_check_delay = 30 # seconds
|
||||||
|
logger.info(f"Waiting {final_check_delay}s for a final queue status check to see if workers picked up tasks...")
|
||||||
|
time.sleep(final_check_delay)
|
||||||
|
|
||||||
|
try:
|
||||||
|
redis_conn_id = params.get('redis_conn_id', DEFAULT_REDIS_CONN_ID)
|
||||||
|
redis_client = _get_redis_client(redis_conn_id)
|
||||||
|
|
||||||
|
# Log connection details for debugging broker mismatch issues
|
||||||
|
conn_kwargs = redis_client.connection_pool.connection_kwargs
|
||||||
|
logger.info(f"Final check using Redis connection '{redis_conn_id}': "
|
||||||
|
f"host={conn_kwargs.get('host')}, "
|
||||||
|
f"port={conn_kwargs.get('port')}, "
|
||||||
|
f"db={conn_kwargs.get('db')}")
|
||||||
|
|
||||||
|
_inspect_celery_queues(redis_client, [worker_queue])
|
||||||
|
logger.info("Final queue inspection complete. If queues are not empty, workers have not picked up tasks yet. "
|
||||||
|
"If queues are empty, workers have started processing.")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Could not perform final queue inspection: {e}. This does not affect worker ignition.")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# DAG Definition
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
default_args = {
|
||||||
|
'owner': 'airflow',
|
||||||
|
'depends_on_past': False,
|
||||||
|
'email_on_failure': False,
|
||||||
|
'email_on_retry': False,
|
||||||
|
'retries': 1,
|
||||||
|
'retry_delay': timedelta(minutes=1),
|
||||||
|
'start_date': days_ago(1),
|
||||||
|
}
|
||||||
|
|
||||||
|
with DAG(
|
||||||
|
dag_id='ytdlp_ops_v01_orchestrator',
|
||||||
|
default_args=default_args,
|
||||||
|
schedule=None, # This DAG runs only when triggered.
|
||||||
|
max_active_runs=1, # Only one ignition process should run at a time.
|
||||||
|
catchup=False,
|
||||||
|
description='Ignition system for ytdlp_ops_v01_dispatcher DAGs. Starts self-sustaining worker loops via dispatchers.',
|
||||||
|
doc_md="""
|
||||||
|
### YT-DLP v1 (Monolithic) Worker Ignition System
|
||||||
|
|
||||||
|
This DAG acts as an "ignition system" to start one or more self-sustaining worker loops for the **v1 monolithic worker**.
|
||||||
|
It does **not** process URLs itself. Its only job is to trigger a specified number of `ytdlp_ops_v01_dispatcher` DAGs,
|
||||||
|
which in turn pull URLs and trigger `ytdlp_ops_v01_worker_per_url` with worker affinity.
|
||||||
|
|
||||||
|
#### How it Works:
|
||||||
|
|
||||||
|
1. **Manual Trigger:** You manually trigger this DAG with parameters defining how many dispatcher loops to start (`total_workers`), in what configuration (`workers_per_bunch`, delays).
|
||||||
|
2. **Ignition:** The orchestrator triggers the initial set of dispatcher DAGs in a "fire-and-forget" manner, passing all its configuration parameters to them.
|
||||||
|
3. **Completion:** Once all initial dispatchers have been triggered, the orchestrator's job is complete.
|
||||||
|
|
||||||
|
The dispatchers then take over, each pulling a URL, determining affinity, and triggering a worker DAG.
|
||||||
|
|
||||||
|
#### Client Selection (`clients` parameter):
|
||||||
|
The `clients` parameter determines which YouTube client persona is used for token generation. Different clients have different capabilities and requirements.
|
||||||
|
|
||||||
|
**Supported Clients:**
|
||||||
|
|
||||||
|
| Client | Visitor ID | Player poToken | GVS poToken | Cookies Support | Notes |
|
||||||
|
| ---------------- | ------------ | -------------- | ------------ | --------------- | ------------------------------------------------------------------ |
|
||||||
|
| `tv` | Required | Not Required | Not Required | Supported | All formats may have DRM if you request too much. |
|
||||||
|
| `web_safari` | Required | Required | Required* | Supported | *Provides HLS (m3u8) formats which may not require a GVS token. |
|
||||||
|
| `mweb` | Required | Required | Required | Supported | |
|
||||||
|
| `web_camoufox` | Required | Required | Required | Supported | Camoufox variant of `web`. |
|
||||||
|
|
||||||
|
**Untested / Not Recommended Clients:**
|
||||||
|
|
||||||
|
| Client | Visitor ID | Player poToken | GVS poToken | Cookies Support | Notes |
|
||||||
|
| ---------------- | ------------ | -------------- | ------------ | --------------- | ------------------------------------------------------------------ |
|
||||||
|
| `web` | Required | Required | Required | Supported | Only SABR formats available. |
|
||||||
|
| `tv_simply` | Required | Not Required | Not Required | Not Supported | |
|
||||||
|
| `tv_embedded` | Required | Not Required | Not Required | Supported | Requires account cookies for most videos. |
|
||||||
|
| `web_embedded` | Required | Not Required | Not Required | Supported | Only for embeddable videos. |
|
||||||
|
| `web_music` | Required | Required | Required | Supported | |
|
||||||
|
| `web_creator` | Required | Required | Required | Supported | Requires account cookies. |
|
||||||
|
| `android` | Required | Required | Required | Not Supported | |
|
||||||
|
| `android_vr` | Required | Not Required | Not Required | Not Supported | YouTube Kids videos are not available. |
|
||||||
|
| `ios` | Required | Required | Required | Not Supported | |
|
||||||
|
|
||||||
|
Other `_camoufox` variants are also available but untested.
|
||||||
|
""",
|
||||||
|
tags=['ytdlp', 'mgmt', 'master'],
|
||||||
|
params={
|
||||||
|
# --- Ignition Control Parameters ---
|
||||||
|
'total_workers': Param(DEFAULT_TOTAL_WORKERS, type="integer", description="Total number of dispatcher loops to start."),
|
||||||
|
'workers_per_bunch': Param(DEFAULT_WORKERS_PER_BUNCH, type="integer", description="Number of dispatchers to start in each bunch."),
|
||||||
|
'delay_between_workers_s': Param(DEFAULT_WORKER_DELAY_S, type="integer", description="Delay in seconds between starting each dispatcher within a bunch."),
|
||||||
|
'delay_between_bunches_s': Param(DEFAULT_BUNCH_DELAY_S, type="integer", description="Delay in seconds between starting each bunch."),
|
||||||
|
'skip_if_queue_empty': Param(False, type="boolean", title="[Ignition Control] Skip if Queue Empty", description="If True, the orchestrator will not start any dispatchers if the application's work queue is empty."),
|
||||||
|
|
||||||
|
# --- Worker Passthrough Parameters ---
|
||||||
|
'on_auth_failure': Param(
|
||||||
|
'retry_with_new_account',
|
||||||
|
type="string",
|
||||||
|
enum=['stop_loop', 'retry_with_new_account', 'retry_without_ban', 'proceed_loop_under_manual_inspection'],
|
||||||
|
title="[Worker Param] On Authentication Failure Policy",
|
||||||
|
description="Policy for a worker when a bannable authentication error occurs. "
|
||||||
|
"'stop_loop': Ban the account, mark URL as failed, and stop the worker's loop. "
|
||||||
|
"'retry_with_new_account': (Default) Ban the failed account, retry ONCE with a new account. If retry fails, ban the second account and stop."
|
||||||
|
"'retry_without_ban': If a connection error (e.g. SOCKS timeout) occurs, retry with a new account but do NOT ban the first account/proxy. If retry fails, stop the loop without banning."
|
||||||
|
"'proceed_loop_under_manual_inspection': **BEWARE: MANUAL SUPERVISION REQUIRED.** Marks the URL as failed but continues the processing loop. Use this only when you can manually intervene."
|
||||||
|
),
|
||||||
|
'on_download_failure': Param(
|
||||||
|
'proceed_loop',
|
||||||
|
type="string",
|
||||||
|
enum=['stop_loop', 'proceed_loop', 'retry_with_new_token'],
|
||||||
|
title="[Worker Param] On Download Failure Policy",
|
||||||
|
description="Policy for a worker when a download or probe error occurs. "
|
||||||
|
"'stop_loop': Mark URL as failed and stop the worker's loop. "
|
||||||
|
"'proceed_loop': (Default) Mark URL as failed but continue the processing loop with a new URL. "
|
||||||
|
"'retry_with_new_token': Attempt to get a new token with a new account and retry the download once. If it fails again, proceed loop."
|
||||||
|
),
|
||||||
|
'request_params_json': Param(DEFAULT_REQUEST_PARAMS_JSON, type="string", title="[Worker Param] Request Params JSON", description="JSON string with request parameters for the token service.", render_kwargs={"rows": 20, "cols": 120}),
|
||||||
|
'queue_name': Param(DEFAULT_QUEUE_NAME, type="string", description="[Worker Param] Base name for Redis queues."),
|
||||||
|
'redis_conn_id': Param(DEFAULT_REDIS_CONN_ID, type="string", description="[Worker Param] Airflow Redis connection ID."),
|
||||||
|
'clients': Param(
|
||||||
|
'mweb,web_camoufox,tv',
|
||||||
|
type="string",
|
||||||
|
enum=[
|
||||||
|
'mweb,web_camoufox,tv',
|
||||||
|
'mweb',
|
||||||
|
'web_camoufox',
|
||||||
|
'tv',
|
||||||
|
'custom',
|
||||||
|
'tv,web_safari,mweb,web_camoufox',
|
||||||
|
'web_safari',
|
||||||
|
'web',
|
||||||
|
'web_embedded',
|
||||||
|
'web_music',
|
||||||
|
'web_creator',
|
||||||
|
'web_safari_camoufox',
|
||||||
|
'web_embedded_camoufox',
|
||||||
|
'web_music_camoufox',
|
||||||
|
'web_creator_camoufox',
|
||||||
|
'mweb_camoufox',
|
||||||
|
'android',
|
||||||
|
'android_music',
|
||||||
|
'android_creator',
|
||||||
|
'android_vr',
|
||||||
|
'ios',
|
||||||
|
'ios_music',
|
||||||
|
'ios_creator',
|
||||||
|
'tv_simply',
|
||||||
|
'tv_embedded',
|
||||||
|
],
|
||||||
|
title="[Worker Param] Clients",
|
||||||
|
description="[Worker Param] Comma-separated list of clients for token generation. Full list: web, web_safari, web_embedded, web_music, web_creator, mweb, web_camoufox, web_safari_camoufox, web_embedded_camoufox, web_music_camoufox, web_creator_camoufox, mweb_camoufox, android, android_music, android_creator, android_vr, ios, ios_music, ios_creator, tv, tv_simply, tv_embedded. See DAG documentation for details."
|
||||||
|
),
|
||||||
|
'account_pool': Param('ytdlp_account', type="string", description="[Worker Param] Account pool prefix or comma-separated list."),
|
||||||
|
'account_pool_size': Param(10, type=["integer", "null"], description="[Worker Param] If using a prefix for 'account_pool', this specifies the number of accounts to generate (e.g., 10 for 'prefix_01' through 'prefix_10'). Required when using a prefix."),
|
||||||
|
'prepend_client_to_account': Param(True, type="boolean", title="[Worker Param] Prepend Client to Account", description="If True, prepends client and timestamp to account names in prefix mode. Format: prefix_YYYYMMDDHHMMSS_client_XX."),
|
||||||
|
'service_ip': Param(DEFAULT_YT_AUTH_SERVICE_IP, type="string", description="[Worker Param] IP of the ytdlp-ops-server. Default is from Airflow variable YT_AUTH_SERVICE_IP or hardcoded."),
|
||||||
|
'service_port': Param(DEFAULT_YT_AUTH_SERVICE_PORT, type="integer", description="[Worker Param] Port of the Envoy load balancer. Default is from Airflow variable YT_AUTH_SERVICE_PORT or hardcoded."),
|
||||||
|
'machine_id': Param("ytdlp-ops-airflow-service", type="string", description="[Worker Param] Identifier for the client machine."),
|
||||||
|
'assigned_proxy_url': Param(None, type=["string", "null"], title="[Worker Param] Assigned Proxy URL", description="A specific proxy URL to use for the request, overriding the server's proxy pool logic."),
|
||||||
|
'auto_create_new_accounts_on_exhaustion': Param(True, type="boolean", description="[Worker Param] If True and all accounts in a prefix-based pool are exhausted, create a new one automatically."),
|
||||||
|
# --- Download Control Parameters ---
|
||||||
|
'delay_between_formats_s': Param(15, type="integer", title="[Worker Param] Delay Between Formats (s)", description="Delay in seconds between downloading each format when multiple formats are specified. A 22s wait may be effective for batch downloads, while 6-12s may suffice if cookies are refreshed regularly."),
|
||||||
|
'yt_dlp_test_mode': Param(False, type="boolean", title="[Worker Param] yt-dlp Test Mode", description="If True, runs yt-dlp with --test flag (dry run without downloading)."),
|
||||||
|
'skip_probe': Param(True, type="boolean", title="[Worker Param] Skip Probe", description="If True, skips the ffmpeg probe of downloaded files."),
|
||||||
|
'yt_dlp_cleanup_mode': Param(True, type="boolean", title="[Worker Param] yt-dlp Cleanup Mode", description="If True, creates a .empty file and deletes the original media file after successful download and probe."),
|
||||||
|
'fragment_retries': Param(2, type="integer", title="[Worker Param] Fragment Retries", description="Number of retries for a fragment before giving up."),
|
||||||
|
'limit_rate': Param('5M', type=["string", "null"], title="[Worker Param] Limit Rate", description="Download speed limit (e.g., 50K, 4.2M)."),
|
||||||
|
'socket_timeout': Param(15, type="integer", title="[Worker Param] Socket Timeout", description="Timeout in seconds for socket operations."),
|
||||||
|
'min_sleep_interval': Param(5, type="integer", title="[Worker Param] Min Sleep Interval", description="Minimum time to sleep between downloads (seconds)."),
|
||||||
|
'max_sleep_interval': Param(10, type="integer", title="[Worker Param] Max Sleep Interval", description="Maximum time to sleep between downloads (seconds)."),
|
||||||
|
'download_format_preset': Param(
|
||||||
|
'formats_2',
|
||||||
|
type="string",
|
||||||
|
enum=['best_audio', 'formats_0', 'formats_2', 'formats_3', 'custom'],
|
||||||
|
title="[Worker Param] Download Format Preset",
|
||||||
|
description="Select a predefined format string or choose 'custom' to use the value from 'Custom Download Format'.\nformats_0: 18,140\nformats_2: 18,140,299/298/137/136/135/134/133\nformats_3: 18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318"
|
||||||
|
),
|
||||||
|
'download_format_custom': Param(
|
||||||
|
'18,140,299/298/137/136/135/134/133',
|
||||||
|
type="string",
|
||||||
|
title="[Worker Param] Custom Download Format",
|
||||||
|
description="Custom yt-dlp format string. Used when preset is 'custom'. E.g., 'ba[ext=m4a]/bestaudio/best'."
|
||||||
|
),
|
||||||
|
'downloader': Param(
|
||||||
|
'default',
|
||||||
|
type="string",
|
||||||
|
enum=['default', 'aria2c'],
|
||||||
|
title="[Worker Param] Downloader",
|
||||||
|
description="Choose the downloader for yt-dlp."
|
||||||
|
),
|
||||||
|
'downloader_args_aria2c': Param(
|
||||||
|
'aria2c:-x 4 -k 2M --max-download-limit=3M',
|
||||||
|
type="string",
|
||||||
|
title="[Worker Param] Aria2c Downloader Arguments",
|
||||||
|
description="Arguments to pass to yt-dlp's --downloader-args. Used when downloader is 'aria2c'."
|
||||||
|
),
|
||||||
|
'yt_dlp_extra_args': Param(
|
||||||
|
'--restrict-filenames',
|
||||||
|
type=["string", "null"],
|
||||||
|
title="[Worker Param] Extra yt-dlp arguments",
|
||||||
|
description="Extra command-line arguments for yt-dlp during download."
|
||||||
|
),
|
||||||
|
}
|
||||||
|
) as dag:
|
||||||
|
|
||||||
|
orchestrate_task = PythonOperator(
|
||||||
|
task_id='start_worker_loops',
|
||||||
|
python_callable=orchestrate_workers_ignition_callable,
|
||||||
|
)
|
||||||
|
orchestrate_task.doc_md = """
|
||||||
|
### Start Worker Loops
|
||||||
|
This is the main task that executes the ignition policy.
|
||||||
|
- It triggers `ytdlp_ops_dispatcher` DAGs according to the batch settings.
|
||||||
|
- It passes all its parameters down to the dispatchers, which will use them to trigger workers.
|
||||||
|
"""
|
||||||
1794
airflow/dags/ytdlp_ops_v01_worker_per_url.py
Normal file
1794
airflow/dags/ytdlp_ops_v01_worker_per_url.py
Normal file
File diff suppressed because it is too large
Load Diff
98
airflow/dags/ytdlp_ops_v02_dispatcher_auth.py
Normal file
98
airflow/dags/ytdlp_ops_v02_dispatcher_auth.py
Normal file
@ -0,0 +1,98 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
DAG to dispatch work to ytdlp_ops_worker_per_url_auth DAGs.
|
||||||
|
It pulls a URL from Redis and triggers an auth worker with a pinned queue.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import socket
|
||||||
|
from datetime import timedelta
|
||||||
|
|
||||||
|
from airflow.decorators import task
|
||||||
|
from airflow.exceptions import AirflowSkipException
|
||||||
|
from airflow.models.dag import DAG
|
||||||
|
from airflow.models.param import Param
|
||||||
|
from airflow.api.common.trigger_dag import trigger_dag
|
||||||
|
from airflow.utils.dates import days_ago
|
||||||
|
|
||||||
|
from utils.redis_utils import _get_redis_client
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
DEFAULT_QUEUE_NAME = 'queue2_auth'
|
||||||
|
DEFAULT_REDIS_CONN_ID = 'redis_default'
|
||||||
|
|
||||||
|
@task(queue='queue-auth')
|
||||||
|
def dispatch_url_to_auth_worker(**context):
|
||||||
|
"""
|
||||||
|
Pulls one URL from Redis, determines the current worker's dedicated queue,
|
||||||
|
and triggers the auth worker DAG to process the URL on that specific queue.
|
||||||
|
"""
|
||||||
|
ti = context['task_instance']
|
||||||
|
logger.info(f"Auth Dispatcher task '{ti.task_id}' running on queue '{ti.queue}'.")
|
||||||
|
|
||||||
|
# --- Check for worker pause lock file ---
|
||||||
|
lock_file_path = '/opt/airflow/inputfiles/AIRFLOW.PREVENT_URL_PULL.lockfile'
|
||||||
|
hostname = socket.gethostname()
|
||||||
|
if os.path.exists(lock_file_path):
|
||||||
|
logger.info(f"Worker '{hostname}' is paused. Lock file found at '{lock_file_path}'. Skipping URL pull.")
|
||||||
|
raise AirflowSkipException(f"Worker '{hostname}' is paused.")
|
||||||
|
else:
|
||||||
|
logger.info(f"Worker '{hostname}' is active (no lock file found at '{lock_file_path}'). Proceeding to pull URL.")
|
||||||
|
|
||||||
|
params = context['params']
|
||||||
|
redis_conn_id = params['redis_conn_id']
|
||||||
|
queue_name = params['queue_name']
|
||||||
|
inbox_queue = f"{queue_name}_inbox"
|
||||||
|
|
||||||
|
logger.info(f"Attempting to pull one URL from Redis queue '{inbox_queue}'...")
|
||||||
|
client = _get_redis_client(redis_conn_id)
|
||||||
|
url_bytes = client.lpop(inbox_queue)
|
||||||
|
|
||||||
|
if not url_bytes:
|
||||||
|
logger.info("Redis auth inbox queue is empty. No work to dispatch. Skipping task.")
|
||||||
|
raise AirflowSkipException("Redis auth inbox queue is empty. No work to dispatch.")
|
||||||
|
|
||||||
|
url_to_process = url_bytes.decode('utf-8')
|
||||||
|
logger.info(f"Pulled URL '{url_to_process}' from the queue.")
|
||||||
|
|
||||||
|
# Determine the worker-specific queue for affinity
|
||||||
|
hostname = socket.gethostname()
|
||||||
|
worker_queue = f"queue-auth-{hostname}"
|
||||||
|
logger.info(f"Running on worker '{hostname}'. Dispatching job to its dedicated queue '{worker_queue}'.")
|
||||||
|
|
||||||
|
conf_to_pass = {**params, 'url_to_process': url_to_process, 'worker_queue': worker_queue}
|
||||||
|
|
||||||
|
run_id = f"worker_run_auth_{context['dag_run'].run_id}_{context['ts_nodash']}_q_{worker_queue}"
|
||||||
|
|
||||||
|
logger.info(f"Triggering 'ytdlp_ops_v02_worker_per_url_auth' with run_id '{run_id}'")
|
||||||
|
trigger_dag(
|
||||||
|
dag_id='ytdlp_ops_v02_worker_per_url_auth',
|
||||||
|
run_id=run_id,
|
||||||
|
conf=conf_to_pass,
|
||||||
|
replace_microseconds=False
|
||||||
|
)
|
||||||
|
|
||||||
|
with DAG(
|
||||||
|
dag_id='ytdlp_ops_v02_dispatcher_auth',
|
||||||
|
default_args={'owner': 'airflow', 'retries': 0},
|
||||||
|
schedule=None,
|
||||||
|
start_date=days_ago(1),
|
||||||
|
catchup=False,
|
||||||
|
tags=['ytdlp', 'worker', 'dispatcher', 'auth'],
|
||||||
|
is_paused_upon_creation=True,
|
||||||
|
doc_md="""
|
||||||
|
### YT-DLP Auth URL Dispatcher
|
||||||
|
|
||||||
|
This DAG dispatches a single URL to an auth worker with a pinned queue.
|
||||||
|
It pulls from the `queue2_auth_inbox` Redis queue and triggers the `ytdlp_ops_v02_worker_per_url_auth` DAG.
|
||||||
|
""",
|
||||||
|
render_template_as_native_obj=True,
|
||||||
|
params={
|
||||||
|
'queue_name': Param(DEFAULT_QUEUE_NAME, type='string', title='Queue Name', description='The base name of the Redis queue to pull URLs from.'),
|
||||||
|
'redis_conn_id': Param(DEFAULT_REDIS_CONN_ID, type='string', title='Redis Connection ID'),
|
||||||
|
},
|
||||||
|
) as dag:
|
||||||
|
dispatch_url_to_auth_worker()
|
||||||
89
airflow/dags/ytdlp_ops_v02_dispatcher_dl.py
Normal file
89
airflow/dags/ytdlp_ops_v02_dispatcher_dl.py
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
DAG to dispatch download jobs to ytdlp_ops_worker_per_url_dl DAGs.
|
||||||
|
It pulls a job payload from Redis and triggers a download worker.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import socket
|
||||||
|
from datetime import timedelta
|
||||||
|
|
||||||
|
from airflow.decorators import task
|
||||||
|
from airflow.exceptions import AirflowSkipException
|
||||||
|
from airflow.models.dag import DAG
|
||||||
|
from airflow.models.param import Param
|
||||||
|
from airflow.api.common.trigger_dag import trigger_dag
|
||||||
|
from airflow.utils.dates import days_ago
|
||||||
|
|
||||||
|
from utils.redis_utils import _get_redis_client
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
DEFAULT_QUEUE_NAME = 'queue2_dl'
|
||||||
|
DEFAULT_REDIS_CONN_ID = 'redis_default'
|
||||||
|
|
||||||
|
@task(queue='queue-dl')
|
||||||
|
def dispatch_job_to_dl_worker(**context):
|
||||||
|
"""
|
||||||
|
Pulls one job payload from Redis, determines the current worker's dedicated queue,
|
||||||
|
and triggers the download worker DAG to process the job on that specific queue.
|
||||||
|
"""
|
||||||
|
ti = context['task_instance']
|
||||||
|
logger.info(f"Download Dispatcher task '{ti.task_id}' running on queue '{ti.queue}'.")
|
||||||
|
|
||||||
|
params = context['params']
|
||||||
|
redis_conn_id = params['redis_conn_id']
|
||||||
|
queue_name = params['queue_name']
|
||||||
|
inbox_queue = f"{queue_name}_inbox"
|
||||||
|
|
||||||
|
logger.info(f"Attempting to pull one job from Redis queue '{inbox_queue}'...")
|
||||||
|
client = _get_redis_client(redis_conn_id)
|
||||||
|
job_bytes = client.lpop(inbox_queue)
|
||||||
|
|
||||||
|
if not job_bytes:
|
||||||
|
logger.info("Redis download inbox queue is empty. No work to dispatch. Skipping task.")
|
||||||
|
raise AirflowSkipException("Redis download inbox queue is empty. No work to dispatch.")
|
||||||
|
|
||||||
|
job_data_str = job_bytes.decode('utf-8')
|
||||||
|
logger.info(f"Pulled job from the queue.")
|
||||||
|
|
||||||
|
# Determine the worker-specific queue for affinity
|
||||||
|
hostname = socket.gethostname()
|
||||||
|
worker_queue = f"queue-dl-{hostname}"
|
||||||
|
logger.info(f"Running on worker '{hostname}'. Dispatching job to its dedicated queue '{worker_queue}'.")
|
||||||
|
|
||||||
|
conf_to_pass = {**params, 'job_data': job_data_str, 'worker_queue': worker_queue}
|
||||||
|
|
||||||
|
run_id = f"worker_run_dl_{context['dag_run'].run_id}_{context['ts_nodash']}_q_{worker_queue}"
|
||||||
|
|
||||||
|
logger.info(f"Triggering 'ytdlp_ops_v02_worker_per_url_dl' with run_id '{run_id}'")
|
||||||
|
trigger_dag(
|
||||||
|
dag_id='ytdlp_ops_v02_worker_per_url_dl',
|
||||||
|
run_id=run_id,
|
||||||
|
conf=conf_to_pass,
|
||||||
|
replace_microseconds=False
|
||||||
|
)
|
||||||
|
|
||||||
|
with DAG(
|
||||||
|
dag_id='ytdlp_ops_v02_dispatcher_dl',
|
||||||
|
default_args={'owner': 'airflow', 'retries': 0},
|
||||||
|
schedule=None,
|
||||||
|
start_date=days_ago(1),
|
||||||
|
catchup=False,
|
||||||
|
tags=['ytdlp', 'worker', 'dispatcher', 'download'],
|
||||||
|
is_paused_upon_creation=True,
|
||||||
|
doc_md="""
|
||||||
|
### YT-DLP Download Job Dispatcher
|
||||||
|
|
||||||
|
This DAG dispatches a single download job to a download worker with a pinned queue.
|
||||||
|
It pulls a JSON payload from the `queue2_dl_inbox` Redis queue and triggers the `ytdlp_ops_v02_worker_per_url_dl` DAG.
|
||||||
|
""",
|
||||||
|
render_template_as_native_obj=True,
|
||||||
|
params={
|
||||||
|
'queue_name': Param(DEFAULT_QUEUE_NAME, type='string', title='Queue Name', description='The base name of the Redis queue to pull job payloads from.'),
|
||||||
|
'redis_conn_id': Param(DEFAULT_REDIS_CONN_ID, type='string', title='Redis Connection ID'),
|
||||||
|
},
|
||||||
|
) as dag:
|
||||||
|
dispatch_job_to_dl_worker()
|
||||||
@ -6,9 +6,7 @@
|
|||||||
# Distributed under terms of the MIT license.
|
# Distributed under terms of the MIT license.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
DAG to orchestrate ytdlp_ops_dispatcher DAG runs based on a defined policy.
|
DAG to orchestrate ytdlp_ops_dispatcher_v2_auth DAG runs based on a defined policy.
|
||||||
It fetches URLs from a Redis queue and launches dispatchers in controlled bunches,
|
|
||||||
which in turn trigger workers with affinity.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from airflow import DAG
|
from airflow import DAG
|
||||||
@ -37,8 +35,42 @@ from thrift.transport import TSocket, TTransport
|
|||||||
# Configure logging
|
# Configure logging
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
DEFAULT_REQUEST_PARAMS_JSON = """{
|
||||||
|
"context_reuse_policy": {
|
||||||
|
"enabled": true,
|
||||||
|
"max_age_seconds": 86400,
|
||||||
|
"reuse_visitor_id": true,
|
||||||
|
"reuse_cookies": true
|
||||||
|
},
|
||||||
|
"token_generation_strategy": {
|
||||||
|
"youtubei_js": {
|
||||||
|
"generate_po_token": true,
|
||||||
|
"generate_gvs_token": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"ytdlp_params": {
|
||||||
|
"use_curl_prefetch": false,
|
||||||
|
"token_supplement_strategy": {
|
||||||
|
"youtubepot_bgutilhttp_extractor": {
|
||||||
|
"enabled": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"visitor_id_override": {
|
||||||
|
"enabled": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"session_params": {
|
||||||
|
"lang": "en-US",
|
||||||
|
"location": "US",
|
||||||
|
"deviceCategory": "MOBILE",
|
||||||
|
"user_agents": {
|
||||||
|
"youtubei_js": "Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)",
|
||||||
|
"yt_dlp": "Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}"""
|
||||||
|
|
||||||
# Default settings
|
# Default settings
|
||||||
DEFAULT_QUEUE_NAME = 'video_queue'
|
|
||||||
DEFAULT_REDIS_CONN_ID = 'redis_default'
|
DEFAULT_REDIS_CONN_ID = 'redis_default'
|
||||||
DEFAULT_TOTAL_WORKERS = 3
|
DEFAULT_TOTAL_WORKERS = 3
|
||||||
DEFAULT_WORKERS_PER_BUNCH = 1
|
DEFAULT_WORKERS_PER_BUNCH = 1
|
||||||
@ -100,7 +132,12 @@ def orchestrate_workers_ignition_callable(**context):
|
|||||||
logger.info(f"Orchestrator task '{ti.task_id}' running on queue '{ti.queue}'.")
|
logger.info(f"Orchestrator task '{ti.task_id}' running on queue '{ti.queue}'.")
|
||||||
logger.info("Starting dispatcher ignition sequence.")
|
logger.info("Starting dispatcher ignition sequence.")
|
||||||
|
|
||||||
dispatcher_dag_id = 'ytdlp_ops_dispatcher'
|
dispatcher_dag_id = 'ytdlp_ops_v02_dispatcher_auth'
|
||||||
|
worker_queue = 'queue-auth'
|
||||||
|
app_queue_name = 'queue2_auth'
|
||||||
|
|
||||||
|
logger.info(f"Running in v2 (auth) mode. Dispatcher DAG: '{dispatcher_dag_id}', Worker Queue: '{worker_queue}'")
|
||||||
|
|
||||||
dag_model = DagModel.get_dagmodel(dispatcher_dag_id)
|
dag_model = DagModel.get_dagmodel(dispatcher_dag_id)
|
||||||
if dag_model and dag_model.is_paused:
|
if dag_model and dag_model.is_paused:
|
||||||
logger.warning(f"Dispatcher DAG '{dispatcher_dag_id}' is paused. Skipping dispatcher ignition.")
|
logger.warning(f"Dispatcher DAG '{dispatcher_dag_id}' is paused. Skipping dispatcher ignition.")
|
||||||
@ -127,13 +164,12 @@ def orchestrate_workers_ignition_callable(**context):
|
|||||||
bunches = [worker_indices[i:i + workers_per_bunch] for i in range(0, len(worker_indices), workers_per_bunch)]
|
bunches = [worker_indices[i:i + workers_per_bunch] for i in range(0, len(worker_indices), workers_per_bunch)]
|
||||||
|
|
||||||
# --- Inspect Queues before starting ---
|
# --- Inspect Queues before starting ---
|
||||||
worker_queue = 'queue-dl' # The static queue the worker DAG uses.
|
|
||||||
try:
|
try:
|
||||||
redis_conn_id = params.get('redis_conn_id', DEFAULT_REDIS_CONN_ID)
|
redis_conn_id = params.get('redis_conn_id', DEFAULT_REDIS_CONN_ID)
|
||||||
redis_client = _get_redis_client(redis_conn_id)
|
redis_client = _get_redis_client(redis_conn_id)
|
||||||
|
|
||||||
# First, check the application queue for work
|
# First, check the application queue for work
|
||||||
app_queue_len = _check_application_queue(redis_client, params['queue_name'])
|
app_queue_len = _check_application_queue(redis_client, app_queue_name)
|
||||||
|
|
||||||
if params.get('skip_if_queue_empty') and app_queue_len == 0:
|
if params.get('skip_if_queue_empty') and app_queue_len == 0:
|
||||||
logger.info("'skip_if_queue_empty' is True and application queue is empty. Skipping worker ignition.")
|
logger.info("'skip_if_queue_empty' is True and application queue is empty. Skipping worker ignition.")
|
||||||
@ -224,26 +260,17 @@ default_args = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
with DAG(
|
with DAG(
|
||||||
dag_id='ytdlp_ops_orchestrator',
|
dag_id='ytdlp_ops_v02_orchestrator_auth',
|
||||||
default_args=default_args,
|
default_args=default_args,
|
||||||
schedule_interval=None, # This DAG runs only when triggered.
|
schedule=None, # This DAG runs only when triggered.
|
||||||
max_active_runs=1, # Only one ignition process should run at a time.
|
max_active_runs=1, # Only one ignition process should run at a time.
|
||||||
catchup=False,
|
catchup=False,
|
||||||
description='Ignition system for ytdlp_ops_dispatcher DAGs. Starts self-sustaining worker loops via dispatchers.',
|
description='Ignition system for ytdlp_ops_v02_dispatcher_auth DAGs.',
|
||||||
doc_md="""
|
doc_md="""
|
||||||
### YT-DLP Worker Ignition System
|
### YT-DLP v2 (Auth) Worker Ignition System
|
||||||
|
|
||||||
This DAG acts as an "ignition system" to start one or more self-sustaining worker loops.
|
This DAG acts as an "ignition system" to start one or more self-sustaining worker loops for the **v2 authentication worker**.
|
||||||
It does **not** process URLs itself. Its only job is to trigger a specified number of `ytdlp_ops_dispatcher` DAGs,
|
It triggers `ytdlp_ops_v02_dispatcher_auth` DAGs, which pull raw URLs from `queue2_auth_inbox` and trigger `ytdlp_ops_v02_worker_per_url_auth` workers.
|
||||||
which in turn pull URLs and trigger `ytdlp_ops_worker_per_url` with worker affinity.
|
|
||||||
|
|
||||||
#### How it Works:
|
|
||||||
|
|
||||||
1. **Manual Trigger:** You manually trigger this DAG with parameters defining how many dispatcher loops to start (`total_workers`), in what configuration (`workers_per_bunch`, delays).
|
|
||||||
2. **Ignition:** The orchestrator triggers the initial set of dispatcher DAGs in a "fire-and-forget" manner, passing all its configuration parameters to them.
|
|
||||||
3. **Completion:** Once all initial dispatchers have been triggered, the orchestrator's job is complete.
|
|
||||||
|
|
||||||
The dispatchers then take over, each pulling a URL, determining affinity, and triggering a worker DAG.
|
|
||||||
""",
|
""",
|
||||||
tags=['ytdlp', 'mgmt', 'master'],
|
tags=['ytdlp', 'mgmt', 'master'],
|
||||||
params={
|
params={
|
||||||
@ -256,25 +283,60 @@ with DAG(
|
|||||||
|
|
||||||
# --- Worker Passthrough Parameters ---
|
# --- Worker Passthrough Parameters ---
|
||||||
'on_bannable_failure': Param(
|
'on_bannable_failure': Param(
|
||||||
'stop_loop',
|
'stop_loop_on_auth_proceed_on_download_error',
|
||||||
type="string",
|
type="string",
|
||||||
enum=['stop_loop', 'retry_with_new_account', 'retry_without_ban', 'retry_and_ban_account_only', 'retry_on_connection_error'],
|
enum=['stop_loop', 'retry_with_new_account', 'retry_without_ban', 'retry_and_ban_account_only', 'retry_on_connection_error', 'proceed_loop_under_manual_inspection', 'stop_loop_on_auth_proceed_on_download_error'],
|
||||||
title="[Worker Param] On Bannable Failure Policy",
|
title="[Worker Param] On Bannable Failure Policy",
|
||||||
description="Policy for a worker when a bannable error occurs. "
|
description="Policy for a worker when a bannable error occurs. "
|
||||||
"'stop_loop': Ban the account, mark URL as failed, and stop the worker's loop. "
|
"'stop_loop': Ban the account, mark URL as failed, and stop the worker's loop on any failure (auth or download). "
|
||||||
"'retry_with_new_account': Ban the failed account, retry ONCE with a new account. If retry fails, ban the second account and proxy, then stop."
|
"'retry_with_new_account': Ban the failed account, retry ONCE with a new account. If retry fails, ban the second account and proxy, then stop."
|
||||||
"'retry_on_connection_error': If a connection error (e.g. SOCKS timeout) occurs, retry with a new account but do NOT ban the first account/proxy. If retry fails, stop the loop without banning."
|
"'retry_on_connection_error': If a connection error (e.g. SOCKS timeout) occurs, retry with a new account but do NOT ban the first account/proxy. If retry fails, stop the loop without banning."
|
||||||
|
"'proceed_loop_under_manual_inspection': **BEWARE: MANUAL SUPERVISION REQUIRED.** Marks the URL as failed but continues the processing loop. Use this only when you can manually intervene by pausing the dispatcher DAG or creating a lock file (`/opt/airflow/inputfiles/AIRFLOW.PREVENT_URL_PULL.lockfile`) to prevent a runaway failure loop."
|
||||||
|
"'stop_loop_on_auth_proceed_on_download_error': **(Default)** Stops the loop on an authentication/token error (like 'stop_loop'), but continues the loop on a download/probe error (like 'proceed...')."
|
||||||
),
|
),
|
||||||
'queue_name': Param(DEFAULT_QUEUE_NAME, type="string", description="[Worker Param] Base name for Redis queues."),
|
'request_params_json': Param(DEFAULT_REQUEST_PARAMS_JSON, type="string", title="[Worker Param] Request Params JSON", description="JSON string with request parameters for the token service.", render_kwargs={"rows": 20, "cols": 120}),
|
||||||
'redis_conn_id': Param(DEFAULT_REDIS_CONN_ID, type="string", description="[Worker Param] Airflow Redis connection ID."),
|
'redis_conn_id': Param(DEFAULT_REDIS_CONN_ID, type="string", description="[Worker Param] Airflow Redis connection ID."),
|
||||||
'clients': Param('tv_sample,mweb,web_camoufox', type="string", description="[Worker Param] Comma-separated list of clients for token generation. Full list: web, web_safari, web_embedded, web_music, web_creator, mweb, web_camoufox, web_safari_camoufox, web_embedded_camoufox, web_music_camoufox, web_creator_camoufox, mweb_camoufox, android, android_music, android_creator, android_vr, ios, ios_music, ios_creator, tv, tv_simply, tv_sample, tv_embedded"),
|
'clients': Param(
|
||||||
|
'mweb,web_camoufox,tv',
|
||||||
|
type="string",
|
||||||
|
enum=[
|
||||||
|
'mweb,web_camoufox,tv',
|
||||||
|
'mweb',
|
||||||
|
'web_camoufox',
|
||||||
|
'tv',
|
||||||
|
'custom',
|
||||||
|
'tv,web_safari,mweb,web_camoufox',
|
||||||
|
'web_safari',
|
||||||
|
'web',
|
||||||
|
'web_embedded',
|
||||||
|
'web_music',
|
||||||
|
'web_creator',
|
||||||
|
'web_safari_camoufox',
|
||||||
|
'web_embedded_camoufox',
|
||||||
|
'web_music_camoufox',
|
||||||
|
'web_creator_camoufox',
|
||||||
|
'mweb_camoufox',
|
||||||
|
'android',
|
||||||
|
'android_music',
|
||||||
|
'android_creator',
|
||||||
|
'android_vr',
|
||||||
|
'ios',
|
||||||
|
'ios_music',
|
||||||
|
'ios_creator',
|
||||||
|
'tv_simply',
|
||||||
|
'tv_embedded',
|
||||||
|
],
|
||||||
|
title="[Worker Param] Clients",
|
||||||
|
description="[Worker Param] Comma-separated list of clients for token generation. Full list: web, web_safari, web_embedded, web_music, web_creator, mweb, web_camoufox, web_safari_camoufox, web_embedded_camoufox, web_music_camoufox, web_creator_camoufox, mweb_camoufox, android, android_music, android_creator, android_vr, ios, ios_music, ios_creator, tv, tv_simply, tv_embedded. See DAG documentation for details."
|
||||||
|
),
|
||||||
'account_pool': Param('ytdlp_account', type="string", description="[Worker Param] Account pool prefix or comma-separated list."),
|
'account_pool': Param('ytdlp_account', type="string", description="[Worker Param] Account pool prefix or comma-separated list."),
|
||||||
'account_pool_size': Param(10, type=["integer", "null"], description="[Worker Param] If using a prefix for 'account_pool', this specifies the number of accounts to generate (e.g., 10 for 'prefix_01' through 'prefix_10'). Required when using a prefix."),
|
'account_pool_size': Param(10, type=["integer", "null"], description="[Worker Param] If using a prefix for 'account_pool', this specifies the number of accounts to generate (e.g., 10 for 'prefix_01' through 'prefix_10'). Required when using a prefix."),
|
||||||
|
'prepend_client_to_account': Param(True, type="boolean", title="[Worker Param] Prepend Client to Account", description="If True, prepends client and timestamp to account names in prefix mode. Format: prefix_YYYYMMDDHHMMSS_client_XX."),
|
||||||
'service_ip': Param(DEFAULT_YT_AUTH_SERVICE_IP, type="string", description="[Worker Param] IP of the ytdlp-ops-server. Default is from Airflow variable YT_AUTH_SERVICE_IP or hardcoded."),
|
'service_ip': Param(DEFAULT_YT_AUTH_SERVICE_IP, type="string", description="[Worker Param] IP of the ytdlp-ops-server. Default is from Airflow variable YT_AUTH_SERVICE_IP or hardcoded."),
|
||||||
'service_port': Param(DEFAULT_YT_AUTH_SERVICE_PORT, type="integer", description="[Worker Param] Port of the Envoy load balancer. Default is from Airflow variable YT_AUTH_SERVICE_PORT or hardcoded."),
|
'service_port': Param(DEFAULT_YT_AUTH_SERVICE_PORT, type="integer", description="[Worker Param] Port of the Envoy load balancer. Default is from Airflow variable YT_AUTH_SERVICE_PORT or hardcoded."),
|
||||||
'machine_id': Param("ytdlp-ops-airflow-service", type="string", description="[Worker Param] Identifier for the client machine."),
|
'machine_id': Param("ytdlp-ops-airflow-service", type="string", description="[Worker Param] Identifier for the client machine."),
|
||||||
|
'assigned_proxy_url': Param(None, type=["string", "null"], title="[Worker Param] Assigned Proxy URL", description="If provided, forces the token service to use this specific proxy for the request."),
|
||||||
'auto_create_new_accounts_on_exhaustion': Param(True, type="boolean", description="[Worker Param] If True and all accounts in a prefix-based pool are exhausted, create a new one automatically."),
|
'auto_create_new_accounts_on_exhaustion': Param(True, type="boolean", description="[Worker Param] If True and all accounts in a prefix-based pool are exhausted, create a new one automatically."),
|
||||||
'retrigger_delay_on_empty_s': Param(60, type="integer", description="[Worker Param] Delay in seconds before a worker re-triggers itself if the queue is empty. Set to -1 to stop the loop."),
|
|
||||||
}
|
}
|
||||||
) as dag:
|
) as dag:
|
||||||
|
|
||||||
@ -285,6 +347,6 @@ with DAG(
|
|||||||
orchestrate_task.doc_md = """
|
orchestrate_task.doc_md = """
|
||||||
### Start Worker Loops
|
### Start Worker Loops
|
||||||
This is the main task that executes the ignition policy.
|
This is the main task that executes the ignition policy.
|
||||||
- It triggers `ytdlp_ops_dispatcher` DAGs according to the batch settings.
|
- It triggers `ytdlp_ops_v02_dispatcher_auth` DAGs according to the batch settings.
|
||||||
- It passes all its parameters down to the dispatchers, which will use them to trigger workers.
|
- It passes all its parameters down to the dispatchers, which will use them to trigger workers.
|
||||||
"""
|
"""
|
||||||
302
airflow/dags/ytdlp_ops_v02_orchestrator_dl.py
Normal file
302
airflow/dags/ytdlp_ops_v02_orchestrator_dl.py
Normal file
@ -0,0 +1,302 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# vim:fenc=utf-8
|
||||||
|
#
|
||||||
|
# Copyright © 2024 rl <rl@rlmbp>
|
||||||
|
#
|
||||||
|
# Distributed under terms of the MIT license.
|
||||||
|
|
||||||
|
"""
|
||||||
|
DAG to orchestrate ytdlp_ops_dispatcher_v2_dl DAG runs based on a defined policy.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from airflow import DAG
|
||||||
|
from airflow.exceptions import AirflowException, AirflowSkipException
|
||||||
|
from airflow.operators.python import PythonOperator
|
||||||
|
from airflow.models.param import Param
|
||||||
|
from airflow.models.variable import Variable
|
||||||
|
from airflow.utils.dates import days_ago
|
||||||
|
from airflow.api.common.trigger_dag import trigger_dag
|
||||||
|
from airflow.models.dagrun import DagRun
|
||||||
|
from airflow.models.dag import DagModel
|
||||||
|
from datetime import timedelta
|
||||||
|
import logging
|
||||||
|
import random
|
||||||
|
import time
|
||||||
|
import json
|
||||||
|
|
||||||
|
# Import utility functions
|
||||||
|
from utils.redis_utils import _get_redis_client
|
||||||
|
|
||||||
|
# Import Thrift modules for proxy status check
|
||||||
|
from pangramia.yt.tokens_ops import YTTokenOpService
|
||||||
|
from thrift.protocol import TBinaryProtocol
|
||||||
|
from thrift.transport import TSocket, TTransport
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Default settings
|
||||||
|
DEFAULT_REDIS_CONN_ID = 'redis_default'
|
||||||
|
DEFAULT_TOTAL_WORKERS = 3
|
||||||
|
DEFAULT_WORKERS_PER_BUNCH = 1
|
||||||
|
DEFAULT_WORKER_DELAY_S = 5
|
||||||
|
DEFAULT_BUNCH_DELAY_S = 20
|
||||||
|
|
||||||
|
# --- Helper Functions ---
|
||||||
|
|
||||||
|
def _check_application_queue(redis_client, queue_base_name: str) -> int:
|
||||||
|
"""Checks and logs the length of the application's inbox queue."""
|
||||||
|
inbox_queue_name = f"{queue_base_name}_inbox"
|
||||||
|
logger.info(f"--- Checking Application Work Queue ---")
|
||||||
|
try:
|
||||||
|
q_len = redis_client.llen(inbox_queue_name)
|
||||||
|
logger.info(f"Application work queue '{inbox_queue_name}' has {q_len} item(s).")
|
||||||
|
return q_len
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to check application queue '{inbox_queue_name}': {e}", exc_info=True)
|
||||||
|
return -1 # Indicate an error
|
||||||
|
|
||||||
|
def _inspect_celery_queues(redis_client, queue_names: list):
|
||||||
|
"""Inspects Celery queues in Redis and logs their status."""
|
||||||
|
logger.info("--- Inspecting Celery Queues in Redis ---")
|
||||||
|
for queue_name in queue_names:
|
||||||
|
try:
|
||||||
|
q_len = redis_client.llen(queue_name)
|
||||||
|
logger.info(f"Queue '{queue_name}': Length = {q_len}")
|
||||||
|
|
||||||
|
if q_len > 0:
|
||||||
|
logger.info(f"Showing up to 10 tasks in '{queue_name}':")
|
||||||
|
# Fetch up to 10 items from the start of the list (queue)
|
||||||
|
items_bytes = redis_client.lrange(queue_name, 0, 9)
|
||||||
|
for i, item_bytes in enumerate(items_bytes):
|
||||||
|
try:
|
||||||
|
# Celery tasks are JSON-encoded strings
|
||||||
|
task_data = json.loads(item_bytes.decode('utf-8'))
|
||||||
|
# Pretty print for readability in logs
|
||||||
|
pretty_task_data = json.dumps(task_data, indent=2)
|
||||||
|
logger.info(f" Task {i+1}:\n{pretty_task_data}")
|
||||||
|
except (json.JSONDecodeError, UnicodeDecodeError) as e:
|
||||||
|
logger.warning(f" Task {i+1}: Could not decode/parse task data. Error: {e}. Raw: {item_bytes!r}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to inspect queue '{queue_name}': {e}", exc_info=True)
|
||||||
|
logger.info("--- End of Queue Inspection ---")
|
||||||
|
|
||||||
|
|
||||||
|
# --- Main Orchestration Callable ---
|
||||||
|
|
||||||
|
def orchestrate_workers_ignition_callable(**context):
|
||||||
|
"""
|
||||||
|
Main orchestration logic. Triggers a specified number of dispatcher DAGs
|
||||||
|
to initiate self-sustaining processing loops.
|
||||||
|
"""
|
||||||
|
params = context['params']
|
||||||
|
ti = context['task_instance']
|
||||||
|
logger.info(f"Orchestrator task '{ti.task_id}' running on queue '{ti.queue}'.")
|
||||||
|
logger.info("Starting dispatcher ignition sequence.")
|
||||||
|
|
||||||
|
dispatcher_dag_id = 'ytdlp_ops_v02_dispatcher_dl'
|
||||||
|
worker_queue = 'queue-dl'
|
||||||
|
app_queue_name = 'queue2_dl'
|
||||||
|
|
||||||
|
logger.info(f"Running in v2 (download) mode. Dispatcher DAG: '{dispatcher_dag_id}', Worker Queue: '{worker_queue}'")
|
||||||
|
|
||||||
|
dag_model = DagModel.get_dagmodel(dispatcher_dag_id)
|
||||||
|
if dag_model and dag_model.is_paused:
|
||||||
|
logger.warning(f"Dispatcher DAG '{dispatcher_dag_id}' is paused. Skipping dispatcher ignition.")
|
||||||
|
raise AirflowSkipException(f"Dispatcher DAG '{dispatcher_dag_id}' is paused.")
|
||||||
|
|
||||||
|
total_workers = int(params['total_workers'])
|
||||||
|
workers_per_bunch = int(params['workers_per_bunch'])
|
||||||
|
|
||||||
|
# --- Input Validation ---
|
||||||
|
if total_workers <= 0:
|
||||||
|
logger.warning(f"'total_workers' is {total_workers}. No workers will be started. Skipping ignition.")
|
||||||
|
raise AirflowSkipException(f"No workers to start (total_workers={total_workers}).")
|
||||||
|
|
||||||
|
if workers_per_bunch <= 0:
|
||||||
|
logger.error(f"'workers_per_bunch' must be a positive integer, but got {workers_per_bunch}. Aborting.")
|
||||||
|
raise AirflowException(f"'workers_per_bunch' must be a positive integer, but got {workers_per_bunch}.")
|
||||||
|
# --- End Input Validation ---
|
||||||
|
|
||||||
|
worker_delay = int(params['delay_between_workers_s'])
|
||||||
|
bunch_delay = int(params['delay_between_bunches_s'])
|
||||||
|
|
||||||
|
# Create a list of worker numbers to trigger
|
||||||
|
worker_indices = list(range(total_workers))
|
||||||
|
bunches = [worker_indices[i:i + workers_per_bunch] for i in range(0, len(worker_indices), workers_per_bunch)]
|
||||||
|
|
||||||
|
# --- Inspect Queues before starting ---
|
||||||
|
try:
|
||||||
|
redis_conn_id = params.get('redis_conn_id', DEFAULT_REDIS_CONN_ID)
|
||||||
|
redis_client = _get_redis_client(redis_conn_id)
|
||||||
|
|
||||||
|
# First, check the application queue for work
|
||||||
|
app_queue_len = _check_application_queue(redis_client, app_queue_name)
|
||||||
|
|
||||||
|
if params.get('skip_if_queue_empty') and app_queue_len == 0:
|
||||||
|
logger.info("'skip_if_queue_empty' is True and application queue is empty. Skipping worker ignition.")
|
||||||
|
raise AirflowSkipException("Application work queue is empty.")
|
||||||
|
|
||||||
|
# Then, inspect the target Celery queue for debugging
|
||||||
|
_inspect_celery_queues(redis_client, [worker_queue])
|
||||||
|
except AirflowSkipException:
|
||||||
|
raise # Re-raise to let Airflow handle the skip
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Could not inspect queues due to an error: {e}. Continuing with ignition sequence.")
|
||||||
|
# --- End of Inspection ---
|
||||||
|
|
||||||
|
logger.info(f"Plan: Triggering {total_workers} total dispatcher runs in {len(bunches)} bunches. Each run will attempt to process one URL.")
|
||||||
|
|
||||||
|
dag_run_id = context['dag_run'].run_id
|
||||||
|
total_triggered = 0
|
||||||
|
|
||||||
|
for i, bunch in enumerate(bunches):
|
||||||
|
logger.info(f"--- Triggering Bunch {i+1}/{len(bunches)} (contains {len(bunch)} dispatcher(s)) ---")
|
||||||
|
for j, _ in enumerate(bunch):
|
||||||
|
# Create a unique run_id for each dispatcher run
|
||||||
|
run_id = f"dispatched_{dag_run_id}_{total_triggered}"
|
||||||
|
|
||||||
|
# Pass all orchestrator params to the dispatcher, which will then pass them to the worker.
|
||||||
|
conf_to_pass = {p: params[p] for p in params}
|
||||||
|
|
||||||
|
logger.info(f"Triggering dispatcher {j+1}/{len(bunch)} in bunch {i+1} (run {total_triggered + 1}/{total_workers}) (Run ID: {run_id})")
|
||||||
|
logger.debug(f"Full conf for dispatcher run {run_id}: {conf_to_pass}")
|
||||||
|
|
||||||
|
trigger_dag(
|
||||||
|
dag_id=dispatcher_dag_id,
|
||||||
|
run_id=run_id,
|
||||||
|
conf=conf_to_pass,
|
||||||
|
replace_microseconds=False
|
||||||
|
)
|
||||||
|
total_triggered += 1
|
||||||
|
|
||||||
|
# Delay between dispatches in a bunch
|
||||||
|
if j < len(bunch) - 1:
|
||||||
|
logger.info(f"Waiting {worker_delay}s before next dispatcher in bunch...")
|
||||||
|
time.sleep(worker_delay)
|
||||||
|
|
||||||
|
# Delay between bunches
|
||||||
|
if i < len(bunches) - 1:
|
||||||
|
logger.info(f"--- Bunch {i+1} triggered. Waiting {bunch_delay}s before next bunch... ---")
|
||||||
|
time.sleep(bunch_delay)
|
||||||
|
|
||||||
|
logger.info(f"--- Ignition sequence complete. Total dispatcher runs triggered: {total_triggered}. ---")
|
||||||
|
|
||||||
|
# --- Final Queue Inspection ---
|
||||||
|
final_check_delay = 30 # seconds
|
||||||
|
logger.info(f"Waiting {final_check_delay}s for a final queue status check to see if workers picked up tasks...")
|
||||||
|
time.sleep(final_check_delay)
|
||||||
|
|
||||||
|
try:
|
||||||
|
redis_conn_id = params.get('redis_conn_id', DEFAULT_REDIS_CONN_ID)
|
||||||
|
redis_client = _get_redis_client(redis_conn_id)
|
||||||
|
|
||||||
|
# Log connection details for debugging broker mismatch issues
|
||||||
|
conn_kwargs = redis_client.connection_pool.connection_kwargs
|
||||||
|
logger.info(f"Final check using Redis connection '{redis_conn_id}': "
|
||||||
|
f"host={conn_kwargs.get('host')}, "
|
||||||
|
f"port={conn_kwargs.get('port')}, "
|
||||||
|
f"db={conn_kwargs.get('db')}")
|
||||||
|
|
||||||
|
_inspect_celery_queues(redis_client, [worker_queue])
|
||||||
|
logger.info("Final queue inspection complete. If queues are not empty, workers have not picked up tasks yet. "
|
||||||
|
"If queues are empty, workers have started processing.")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Could not perform final queue inspection: {e}. This does not affect worker ignition.")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# DAG Definition
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
default_args = {
|
||||||
|
'owner': 'airflow',
|
||||||
|
'depends_on_past': False,
|
||||||
|
'email_on_failure': False,
|
||||||
|
'email_on_retry': False,
|
||||||
|
'retries': 1,
|
||||||
|
'retry_delay': timedelta(minutes=1),
|
||||||
|
'start_date': days_ago(1),
|
||||||
|
}
|
||||||
|
|
||||||
|
with DAG(
|
||||||
|
dag_id='ytdlp_ops_v02_orchestrator_dl',
|
||||||
|
default_args=default_args,
|
||||||
|
schedule=None, # This DAG runs only when triggered.
|
||||||
|
max_active_runs=1, # Only one ignition process should run at a time.
|
||||||
|
catchup=False,
|
||||||
|
description='Ignition system for ytdlp_ops_v02_dispatcher_dl DAGs.',
|
||||||
|
doc_md="""
|
||||||
|
### YT-DLP v2 (Download) Worker Ignition System
|
||||||
|
|
||||||
|
This DAG acts as an "ignition system" to start one or more self-sustaining worker loops for the **v2 download worker**.
|
||||||
|
It triggers `ytdlp_ops_v02_dispatcher_dl` DAGs, which pull job payloads from `queue2_dl_inbox` and trigger `ytdlp_ops_v02_worker_per_url_dl` workers.
|
||||||
|
""",
|
||||||
|
tags=['ytdlp', 'mgmt', 'master'],
|
||||||
|
params={
|
||||||
|
# --- Ignition Control Parameters ---
|
||||||
|
'total_workers': Param(DEFAULT_TOTAL_WORKERS, type="integer", description="Total number of dispatcher loops to start."),
|
||||||
|
'workers_per_bunch': Param(DEFAULT_WORKERS_PER_BUNCH, type="integer", description="Number of dispatchers to start in each bunch."),
|
||||||
|
'delay_between_workers_s': Param(DEFAULT_WORKER_DELAY_S, type="integer", description="Delay in seconds between starting each dispatcher within a bunch."),
|
||||||
|
'delay_between_bunches_s': Param(DEFAULT_BUNCH_DELAY_S, type="integer", description="Delay in seconds between starting each bunch."),
|
||||||
|
'skip_if_queue_empty': Param(False, type="boolean", title="[Ignition Control] Skip if Queue Empty", description="If True, the orchestrator will not start any dispatchers if the application's work queue is empty."),
|
||||||
|
'redis_conn_id': Param(DEFAULT_REDIS_CONN_ID, type="string", description="[Worker Param] Airflow Redis connection ID."),
|
||||||
|
'clients': Param('mweb,web_camoufox,tv', type="string", title="[Worker Param] Clients", description="Comma-separated list of clients for token generation. e.g. mweb,tv,web_camoufox"),
|
||||||
|
|
||||||
|
# --- Download Control Parameters ---
|
||||||
|
'delay_between_formats_s': Param(15, type="integer", title="[Worker Param] Delay Between Formats (s)", description="Delay in seconds between downloading each format when multiple formats are specified. A 22s wait may be effective for batch downloads, while 6-12s may suffice if cookies are refreshed regularly."),
|
||||||
|
'yt_dlp_test_mode': Param(False, type="boolean", title="[Worker Param] yt-dlp Test Mode", description="If True, runs yt-dlp with --test flag (dry run without downloading)."),
|
||||||
|
'skip_probe': Param(True, type="boolean", title="[Worker Param] Skip Probe", description="If True, skips the ffmpeg probe of downloaded files."),
|
||||||
|
'yt_dlp_cleanup_mode': Param(True, type="boolean", title="[Worker Param] yt-dlp Cleanup Mode", description="If True, creates a .empty file and deletes the original media file after successful download and probe."),
|
||||||
|
'fragment_retries': Param(2, type="integer", title="[Worker Param] Fragment Retries", description="Number of retries for a fragment before giving up."),
|
||||||
|
'limit_rate': Param('5M', type=["string", "null"], title="[Worker Param] Limit Rate", description="Download speed limit (e.g., 50K, 4.2M)."),
|
||||||
|
'socket_timeout': Param(15, type="integer", title="[Worker Param] Socket Timeout", description="Timeout in seconds for socket operations."),
|
||||||
|
'min_sleep_interval': Param(5, type="integer", title="[Worker Param] Min Sleep Interval", description="Minimum time to sleep between downloads (seconds)."),
|
||||||
|
'max_sleep_interval': Param(10, type="integer", title="[Worker Param] Max Sleep Interval", description="Maximum time to sleep between downloads (seconds)."),
|
||||||
|
'download_format_preset': Param(
|
||||||
|
'formats_2',
|
||||||
|
type="string",
|
||||||
|
enum=['best_audio', 'formats_0', 'formats_2', 'formats_3', 'custom'],
|
||||||
|
title="[Worker Param] Download Format Preset",
|
||||||
|
description="Select a predefined format string or choose 'custom' to use the value from 'Custom Download Format'.\nformats_0: 18,140\nformats_2: 18,140,299/298/137/136/135/134/133\nformats_3: 18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318"
|
||||||
|
),
|
||||||
|
'download_format_custom': Param(
|
||||||
|
'18,140,299/298/137/136/135/134/133',
|
||||||
|
type="string",
|
||||||
|
title="[Worker Param] Custom Download Format",
|
||||||
|
description="Custom yt-dlp format string. Used when preset is 'custom'. E.g., 'ba[ext=m4a]/bestaudio/best'."
|
||||||
|
),
|
||||||
|
'downloader': Param(
|
||||||
|
'default',
|
||||||
|
type="string",
|
||||||
|
enum=['default', 'aria2c'],
|
||||||
|
title="[Worker Param] Downloader",
|
||||||
|
description="Choose the downloader for yt-dlp."
|
||||||
|
),
|
||||||
|
'downloader_args_aria2c': Param(
|
||||||
|
'aria2c:-x 4 -k 2M --max-download-limit=3M',
|
||||||
|
type="string",
|
||||||
|
title="[Worker Param] Aria2c Downloader Arguments",
|
||||||
|
description="Arguments to pass to yt-dlp's --downloader-args. Used when downloader is 'aria2c'."
|
||||||
|
),
|
||||||
|
'yt_dlp_extra_args': Param(
|
||||||
|
'--restrict-filenames',
|
||||||
|
type=["string", "null"],
|
||||||
|
title="[Worker Param] Extra yt-dlp arguments",
|
||||||
|
description="Extra command-line arguments for yt-dlp during download."
|
||||||
|
),
|
||||||
|
}
|
||||||
|
) as dag:
|
||||||
|
|
||||||
|
orchestrate_task = PythonOperator(
|
||||||
|
task_id='start_worker_loops',
|
||||||
|
python_callable=orchestrate_workers_ignition_callable,
|
||||||
|
)
|
||||||
|
orchestrate_task.doc_md = """
|
||||||
|
### Start Worker Loops
|
||||||
|
This is the main task that executes the ignition policy.
|
||||||
|
- It triggers `ytdlp_ops_v02_dispatcher_dl` DAGs according to the batch settings.
|
||||||
|
- It passes all its parameters down to the dispatchers, which will use them to trigger workers.
|
||||||
|
"""
|
||||||
@ -6,10 +6,10 @@
|
|||||||
# Distributed under terms of the MIT license.
|
# Distributed under terms of the MIT license.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
DAG for processing a single YouTube URL passed via DAG run configuration.
|
DAG for authenticating a single YouTube URL passed via DAG run configuration.
|
||||||
This is the "Worker" part of a Sensor/Worker pattern.
|
This is the "Auth Worker" part of a separated Auth/Download pattern.
|
||||||
This DAG has been refactored to use the TaskFlow API to implement worker affinity,
|
It acquires a token, saves the info.json, and pushes the token data to a
|
||||||
ensuring all tasks for a single URL run on the same machine.
|
Redis queue for the download worker.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
@ -24,12 +24,15 @@ from airflow.operators.dummy import DummyOperator
|
|||||||
from airflow.utils.dates import days_ago
|
from airflow.utils.dates import days_ago
|
||||||
from airflow.utils.task_group import TaskGroup
|
from airflow.utils.task_group import TaskGroup
|
||||||
from airflow.api.common.trigger_dag import trigger_dag
|
from airflow.api.common.trigger_dag import trigger_dag
|
||||||
|
from copy import copy
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
|
import concurrent.futures
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
|
import redis
|
||||||
import socket
|
import socket
|
||||||
import time
|
import time
|
||||||
import traceback
|
import traceback
|
||||||
@ -37,7 +40,7 @@ import uuid
|
|||||||
|
|
||||||
# Import utility functions and Thrift modules
|
# Import utility functions and Thrift modules
|
||||||
from utils.redis_utils import _get_redis_client
|
from utils.redis_utils import _get_redis_client
|
||||||
from pangramia.yt.common.ttypes import TokenUpdateMode
|
from pangramia.yt.common.ttypes import TokenUpdateMode, AirflowLogContext
|
||||||
from pangramia.yt.exceptions.ttypes import PBServiceException, PBUserException
|
from pangramia.yt.exceptions.ttypes import PBServiceException, PBUserException
|
||||||
from pangramia.yt.tokens_ops import YTTokenOpService
|
from pangramia.yt.tokens_ops import YTTokenOpService
|
||||||
from thrift.protocol import TBinaryProtocol
|
from thrift.protocol import TBinaryProtocol
|
||||||
@ -47,20 +50,114 @@ from thrift.transport.TTransport import TTransportException
|
|||||||
# Configure logging
|
# Configure logging
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
# --- Client Stats Helper ---
|
||||||
|
|
||||||
|
def _update_client_stats(redis_client, clients_str: str, status: str, url: str, machine_id: str, dag_run_id: str):
|
||||||
|
"""Updates success/failure statistics for a client type in Redis."""
|
||||||
|
if not clients_str:
|
||||||
|
logger.warning("Cannot update client stats: 'clients' string is empty.")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Assumption: The service tries clients in the order provided.
|
||||||
|
# We attribute the result to the first client in the list.
|
||||||
|
primary_client = clients_str.split(',')[0].strip()
|
||||||
|
if not primary_client:
|
||||||
|
logger.warning("Cannot update client stats: could not determine primary client.")
|
||||||
|
return
|
||||||
|
|
||||||
|
stats_key = "client_stats"
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Using a pipeline with WATCH for safe concurrent updates.
|
||||||
|
with redis_client.pipeline() as pipe:
|
||||||
|
pipe.watch(stats_key)
|
||||||
|
|
||||||
|
current_stats_json = redis_client.hget(stats_key, primary_client)
|
||||||
|
stats = {}
|
||||||
|
if current_stats_json:
|
||||||
|
try:
|
||||||
|
stats = json.loads(current_stats_json)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
logger.warning(f"Could not parse existing stats for client '{primary_client}'. Resetting stats.")
|
||||||
|
stats = {}
|
||||||
|
|
||||||
|
stats.setdefault('success_count', 0)
|
||||||
|
stats.setdefault('failure_count', 0)
|
||||||
|
|
||||||
|
details = {
|
||||||
|
'timestamp': time.time(), 'url': url,
|
||||||
|
'machine_id': machine_id, 'dag_run_id': dag_run_id,
|
||||||
|
}
|
||||||
|
|
||||||
|
if status == 'success':
|
||||||
|
stats['success_count'] += 1
|
||||||
|
stats['latest_success'] = details
|
||||||
|
elif status == 'failure':
|
||||||
|
stats['failure_count'] += 1
|
||||||
|
stats['latest_failure'] = details
|
||||||
|
|
||||||
|
pipe.multi()
|
||||||
|
pipe.hset(stats_key, primary_client, json.dumps(stats))
|
||||||
|
pipe.execute()
|
||||||
|
|
||||||
|
logger.info(f"Successfully updated '{status}' stats for client '{primary_client}'.")
|
||||||
|
|
||||||
|
except redis.exceptions.WatchError:
|
||||||
|
logger.warning(f"WatchError updating stats for client '{primary_client}'. Another process updated it. Skipping this update.")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to update client stats for '{primary_client}': {e}", exc_info=True)
|
||||||
|
|
||||||
|
|
||||||
# Default settings from Airflow Variables or hardcoded fallbacks
|
# Default settings from Airflow Variables or hardcoded fallbacks
|
||||||
DEFAULT_QUEUE_NAME = 'video_queue'
|
DEFAULT_QUEUE_NAME = 'queue2_auth'
|
||||||
DEFAULT_REDIS_CONN_ID = 'redis_default'
|
DEFAULT_REDIS_CONN_ID = 'redis_default'
|
||||||
DEFAULT_TIMEOUT = 3600
|
DEFAULT_TIMEOUT = 3600
|
||||||
DEFAULT_YT_AUTH_SERVICE_IP = Variable.get("YT_AUTH_SERVICE_IP", default_var="172.17.0.1")
|
DEFAULT_YT_AUTH_SERVICE_IP = Variable.get("YT_AUTH_SERVICE_IP", default_var="172.17.0.1")
|
||||||
DEFAULT_YT_AUTH_SERVICE_PORT = Variable.get("YT_AUTH_SERVICE_PORT", default_var=9080)
|
DEFAULT_YT_AUTH_SERVICE_PORT = Variable.get("YT_AUTH_SERVICE_PORT", default_var=9080)
|
||||||
|
|
||||||
|
DEFAULT_REQUEST_PARAMS = {
|
||||||
|
"context_reuse_policy": {
|
||||||
|
"enabled": True,
|
||||||
|
"max_age_seconds": 86400,
|
||||||
|
"reuse_visitor_id": True,
|
||||||
|
"reuse_cookies": True
|
||||||
|
},
|
||||||
|
"token_generation_strategy": {
|
||||||
|
"youtubei_js": {
|
||||||
|
"generate_po_token": True,
|
||||||
|
"generate_gvs_token": True
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"ytdlp_params": {
|
||||||
|
"use_curl_prefetch": False,
|
||||||
|
"token_supplement_strategy": {
|
||||||
|
"youtubepot_bgutilhttp_extractor": {
|
||||||
|
"enabled": True
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"visitor_id_override": {
|
||||||
|
"enabled": True
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"session_params": {
|
||||||
|
"lang": "en-US",
|
||||||
|
"location": "US",
|
||||||
|
"deviceCategory": "MOBILE",
|
||||||
|
"user_agents": {
|
||||||
|
"youtubei_js": "Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)",
|
||||||
|
"yt_dlp": "Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
# The queue is set to a fallback here. The actual worker-specific queue is
|
# The queue is set to a fallback here. The actual worker-specific queue is
|
||||||
# assigned just-in-time by the task_instance_mutation_hook (see: airflow/config/custom_task_hooks.py),
|
# assigned just-in-time by the task_instance_mutation_hook (see: airflow/config/custom_task_hooks.py),
|
||||||
# which parses the target queue from the DAG run_id.
|
# which parses the target queue from the DAG run_id.
|
||||||
DEFAULT_ARGS = {
|
DEFAULT_ARGS = {
|
||||||
'owner': 'airflow',
|
'owner': 'airflow',
|
||||||
'retries': 0,
|
'retries': 0,
|
||||||
'queue': 'queue-dl', # Fallback queue. Will be overridden by the policy hook.
|
'queue': 'queue-auth', # Fallback queue. Will be overridden by the policy hook.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -105,7 +202,15 @@ def _get_account_pool(params: dict) -> list:
|
|||||||
if pool_size_param is not None:
|
if pool_size_param is not None:
|
||||||
is_prefix_mode = True
|
is_prefix_mode = True
|
||||||
pool_size = int(pool_size_param)
|
pool_size = int(pool_size_param)
|
||||||
accounts = [f"{prefix}_{i:02d}" for i in range(1, pool_size + 1)]
|
|
||||||
|
if params.get('prepend_client_to_account', True):
|
||||||
|
clients_str = params.get('clients', '')
|
||||||
|
primary_client = clients_str.split(',')[0].strip() if clients_str else 'unknown'
|
||||||
|
timestamp = datetime.now().strftime('%Y%m%d%H%M%S')
|
||||||
|
new_prefix = f"{prefix}_{timestamp}_{primary_client}"
|
||||||
|
accounts = [f"{new_prefix}_{i:02d}" for i in range(1, pool_size + 1)]
|
||||||
|
else:
|
||||||
|
accounts = [f"{prefix}_{i:02d}" for i in range(1, pool_size + 1)]
|
||||||
else:
|
else:
|
||||||
accounts = [prefix]
|
accounts = [prefix]
|
||||||
|
|
||||||
@ -140,6 +245,61 @@ def _get_account_pool(params: dict) -> list:
|
|||||||
logger.info(f"Final active account pool with {len(accounts)} accounts.")
|
logger.info(f"Final active account pool with {len(accounts)} accounts.")
|
||||||
return accounts
|
return accounts
|
||||||
|
|
||||||
|
@task
|
||||||
|
def list_available_formats(token_data: dict, **context):
|
||||||
|
"""
|
||||||
|
Lists available formats for the given video using the info.json.
|
||||||
|
This is for debugging and informational purposes.
|
||||||
|
"""
|
||||||
|
import subprocess
|
||||||
|
import shlex
|
||||||
|
|
||||||
|
info_json_path = token_data.get('info_json_path')
|
||||||
|
if not (info_json_path and os.path.exists(info_json_path)):
|
||||||
|
logger.warning(f"Cannot list formats: info.json path is missing or file does not exist ({info_json_path}).")
|
||||||
|
return []
|
||||||
|
|
||||||
|
try:
|
||||||
|
cmd = [
|
||||||
|
'yt-dlp',
|
||||||
|
'--verbose',
|
||||||
|
'--list-formats',
|
||||||
|
'--load-info-json', info_json_path,
|
||||||
|
]
|
||||||
|
|
||||||
|
copy_paste_cmd = ' '.join(shlex.quote(arg) for arg in cmd)
|
||||||
|
logger.info(f"Executing yt-dlp command to list formats: {copy_paste_cmd}")
|
||||||
|
|
||||||
|
process = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
|
||||||
|
|
||||||
|
if process.stderr:
|
||||||
|
logger.info(f"yt-dlp --list-formats STDERR:\n{process.stderr}")
|
||||||
|
|
||||||
|
if process.returncode != 0:
|
||||||
|
logger.error(f"yt-dlp --list-formats failed with exit code {process.returncode}")
|
||||||
|
|
||||||
|
available_formats = []
|
||||||
|
if process.stdout:
|
||||||
|
logger.info(f"--- Available Formats ---\n{process.stdout}\n--- End of Formats ---")
|
||||||
|
# Parse the output to get format IDs
|
||||||
|
lines = process.stdout.split('\n')
|
||||||
|
header_found = False
|
||||||
|
for line in lines:
|
||||||
|
if line.startswith('ID '):
|
||||||
|
header_found = True
|
||||||
|
continue
|
||||||
|
if header_found and line.strip() and line.strip()[0].isdigit():
|
||||||
|
format_id = line.split()[0]
|
||||||
|
available_formats.append(format_id)
|
||||||
|
logger.info(f"Parsed available format IDs: {available_formats}")
|
||||||
|
|
||||||
|
return available_formats
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"An error occurred while trying to list formats: {e}", exc_info=True)
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# TASK DEFINITIONS (TaskFlow API)
|
# TASK DEFINITIONS (TaskFlow API)
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
@ -178,12 +338,36 @@ def get_url_and_assign_account(**context):
|
|||||||
logger.info(f"Worker pinning verified. Task is correctly running on queue '{ti.queue}'.")
|
logger.info(f"Worker pinning verified. Task is correctly running on queue '{ti.queue}'.")
|
||||||
# --- End Verification ---
|
# --- End Verification ---
|
||||||
|
|
||||||
# The URL is passed by the dispatcher DAG.
|
# The URL is passed by the dispatcher DAG via 'url_to_process'.
|
||||||
|
# For manual runs, we fall back to 'manual_url_to_process'.
|
||||||
url_to_process = params.get('url_to_process')
|
url_to_process = params.get('url_to_process')
|
||||||
if not url_to_process:
|
if not url_to_process:
|
||||||
raise AirflowException("'url_to_process' was not found in the DAG run configuration.")
|
url_to_process = params.get('manual_url_to_process')
|
||||||
|
if url_to_process:
|
||||||
|
logger.info(f"Using URL from manual run parameter: '{url_to_process}'")
|
||||||
|
|
||||||
|
if not url_to_process:
|
||||||
|
raise AirflowException("No URL to process. For manual runs, please provide a URL in the 'manual_url_to_process' parameter.")
|
||||||
logger.info(f"Received URL '{url_to_process}' to process.")
|
logger.info(f"Received URL '{url_to_process}' to process.")
|
||||||
|
|
||||||
|
# Mark the URL as in-progress in Redis
|
||||||
|
try:
|
||||||
|
redis_conn_id = params.get('redis_conn_id', DEFAULT_REDIS_CONN_ID)
|
||||||
|
queue_name = params.get('queue_name', DEFAULT_QUEUE_NAME)
|
||||||
|
progress_queue = f"{queue_name}_progress"
|
||||||
|
client = _get_redis_client(redis_conn_id)
|
||||||
|
|
||||||
|
progress_data = {
|
||||||
|
'status': 'in_progress',
|
||||||
|
'start_time': time.time(),
|
||||||
|
'dag_run_id': context['dag_run'].run_id,
|
||||||
|
'hostname': socket.gethostname(),
|
||||||
|
}
|
||||||
|
client.hset(progress_queue, url_to_process, json.dumps(progress_data))
|
||||||
|
logger.info(f"Marked URL '{url_to_process}' as in-progress.")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Could not mark URL as in-progress in Redis: {e}", exc_info=True)
|
||||||
|
|
||||||
# Account assignment logic is the same as before.
|
# Account assignment logic is the same as before.
|
||||||
account_id = random.choice(_get_account_pool(params))
|
account_id = random.choice(_get_account_pool(params))
|
||||||
logger.info(f"Selected account '{account_id}' for this run.")
|
logger.info(f"Selected account '{account_id}' for this run.")
|
||||||
@ -206,22 +390,100 @@ def get_token(initial_data: dict, **context):
|
|||||||
|
|
||||||
host, port, timeout = params['service_ip'], int(params['service_port']), int(params.get('timeout', DEFAULT_TIMEOUT))
|
host, port, timeout = params['service_ip'], int(params['service_port']), int(params.get('timeout', DEFAULT_TIMEOUT))
|
||||||
machine_id = params.get('machine_id') or socket.gethostname()
|
machine_id = params.get('machine_id') or socket.gethostname()
|
||||||
|
clients = params.get('clients')
|
||||||
|
request_params_json = params.get('request_params_json', '{}')
|
||||||
|
assigned_proxy_url = params.get('assigned_proxy_url')
|
||||||
|
|
||||||
|
# Pretty-print the request parameters for debugging
|
||||||
|
try:
|
||||||
|
pretty_request_params = json.dumps(json.loads(request_params_json), indent=2)
|
||||||
|
logger.info(f"\n--- Request Parameters ---\n{pretty_request_params}\n--- End of Request Parameters ---")
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
logger.warning("Could not parse request_params_json. Using raw content.")
|
||||||
|
logger.info(f"\n--- Raw Request Parameters ---\n{request_params_json}\n--- End of Raw Request Parameters ---")
|
||||||
|
|
||||||
|
# Construct Airflow log context to pass to the service
|
||||||
|
try:
|
||||||
|
from airflow.configuration import conf
|
||||||
|
remote_base = conf.get('logging', 'remote_base_log_folder')
|
||||||
|
log_path = (
|
||||||
|
f"{remote_base}/dag_id={ti.dag_id}/run_id={ti.run_id}/"
|
||||||
|
f"task_id={ti.task_id}/attempt={ti.try_number}.log"
|
||||||
|
)
|
||||||
|
airflow_log_context = AirflowLogContext(
|
||||||
|
logS3Path=log_path,
|
||||||
|
dagId=ti.dag_id,
|
||||||
|
runId=ti.run_id,
|
||||||
|
taskId=ti.task_id,
|
||||||
|
tryNumber=ti.try_number,
|
||||||
|
workerHostname=socket.gethostname(),
|
||||||
|
queue=ti.queue
|
||||||
|
)
|
||||||
|
logger.info(f"Constructed Airflow log context for yt-ops service: {airflow_log_context}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Could not construct full Airflow log context: {e}. Creating a basic one.")
|
||||||
|
airflow_log_context = AirflowLogContext(
|
||||||
|
dagId=ti.dag_id,
|
||||||
|
runId=ti.run_id,
|
||||||
|
taskId=ti.task_id,
|
||||||
|
tryNumber=ti.try_number,
|
||||||
|
workerHostname=socket.gethostname(),
|
||||||
|
queue=ti.queue
|
||||||
|
)
|
||||||
|
|
||||||
logger.info(f"--- Attempting to get token for URL '{url}' with account '{account_id}' ---")
|
logger.info(f"--- Attempting to get token for URL '{url}' with account '{account_id}' (Clients: {clients}) ---")
|
||||||
client, transport = None, None
|
client, transport = None, None
|
||||||
try:
|
try:
|
||||||
client, transport = _get_thrift_client(host, port, timeout)
|
client, transport = _get_thrift_client(host, port, timeout)
|
||||||
token_data = client.getOrRefreshToken(accountId=account_id, updateType=TokenUpdateMode.AUTO, url=url, clients=params.get('clients'), machineId=machine_id)
|
token_data = client.getOrRefreshToken(
|
||||||
|
accountId=account_id,
|
||||||
|
updateType=TokenUpdateMode.AUTO,
|
||||||
|
url=url,
|
||||||
|
clients=clients,
|
||||||
|
machineId=machine_id,
|
||||||
|
airflowLogContext=airflow_log_context,
|
||||||
|
requestParamsJson=request_params_json,
|
||||||
|
assignedProxyUrl=assigned_proxy_url
|
||||||
|
)
|
||||||
|
|
||||||
|
# Log a compact summary of the Thrift response, omitting large/detailed fields.
|
||||||
|
summary_token_data = copy(token_data)
|
||||||
|
if hasattr(summary_token_data, 'infoJson') and summary_token_data.infoJson:
|
||||||
|
summary_token_data.infoJson = f"... ({len(summary_token_data.infoJson)} bytes) ..."
|
||||||
|
if hasattr(summary_token_data, 'cookiesBlob') and summary_token_data.cookiesBlob:
|
||||||
|
summary_token_data.cookiesBlob = f"... ({len(summary_token_data.cookiesBlob)} bytes) ..."
|
||||||
|
# These will be logged separately below.
|
||||||
|
if hasattr(summary_token_data, 'requestSummary'):
|
||||||
|
summary_token_data.requestSummary = "..."
|
||||||
|
if hasattr(summary_token_data, 'communicationLogPaths'):
|
||||||
|
summary_token_data.communicationLogPaths = "..."
|
||||||
|
logger.info(f"Thrift service response summary: {summary_token_data}")
|
||||||
|
|
||||||
|
request_summary = getattr(token_data, 'requestSummary', None)
|
||||||
|
if request_summary:
|
||||||
|
# Prepending a newline for better separation in logs.
|
||||||
|
logger.info(f"\n--- Request Summary ---\n{request_summary}")
|
||||||
|
|
||||||
|
communication_log_paths = getattr(token_data, 'communicationLogPaths', None)
|
||||||
|
if communication_log_paths:
|
||||||
|
logger.info("--- Communication Log Paths ---")
|
||||||
|
for path in communication_log_paths:
|
||||||
|
logger.info(f" - {path}")
|
||||||
|
|
||||||
info_json = getattr(token_data, 'infoJson', None)
|
info_json = getattr(token_data, 'infoJson', None)
|
||||||
if not (info_json and json.loads(info_json)):
|
if not (info_json and json.loads(info_json)):
|
||||||
raise AirflowException("Service returned success but info.json was empty or invalid.")
|
raise AirflowException("Service returned success but info.json was empty or invalid.")
|
||||||
|
|
||||||
video_id = _extract_video_id(url)
|
video_id = _extract_video_id(url)
|
||||||
os.makedirs(info_json_dir, exist_ok=True)
|
|
||||||
# Use a readable timestamp for a unique filename on each attempt.
|
|
||||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||||
info_json_path = os.path.join(info_json_dir, f"info_{video_id or 'unknown'}_{account_id}_{timestamp}.json")
|
|
||||||
|
# Create a unique directory for this job's artifacts
|
||||||
|
job_dir_name = f"{timestamp}-{video_id or 'unknown'}"
|
||||||
|
job_dir_path = os.path.join(info_json_dir, job_dir_name)
|
||||||
|
os.makedirs(job_dir_path, exist_ok=True)
|
||||||
|
|
||||||
|
info_json_filename = f"info_{video_id or 'unknown'}_{account_id}_{timestamp}.json"
|
||||||
|
info_json_path = os.path.join(job_dir_path, info_json_filename)
|
||||||
with open(info_json_path, 'w', encoding='utf-8') as f:
|
with open(info_json_path, 'w', encoding='utf-8') as f:
|
||||||
f.write(info_json)
|
f.write(info_json)
|
||||||
|
|
||||||
@ -232,6 +494,7 @@ def get_token(initial_data: dict, **context):
|
|||||||
'ytdlp_command': getattr(token_data, 'ytdlpCommand', None),
|
'ytdlp_command': getattr(token_data, 'ytdlpCommand', None),
|
||||||
'successful_account_id': account_id,
|
'successful_account_id': account_id,
|
||||||
'original_url': url, # Include original URL for fallback
|
'original_url': url, # Include original URL for fallback
|
||||||
|
'clients': clients, # Pass clients string for accurate stats
|
||||||
}
|
}
|
||||||
except (PBServiceException, PBUserException, TTransportException) as e:
|
except (PBServiceException, PBUserException, TTransportException) as e:
|
||||||
error_context = getattr(e, 'context', None)
|
error_context = getattr(e, 'context', None)
|
||||||
@ -297,8 +560,11 @@ def handle_bannable_error_branch(task_id_to_check: str, **context):
|
|||||||
return 'ban_account_and_prepare_for_retry'
|
return 'ban_account_and_prepare_for_retry'
|
||||||
if policy in ['retry_on_connection_error', 'retry_without_ban']:
|
if policy in ['retry_on_connection_error', 'retry_without_ban']:
|
||||||
return 'assign_new_account_for_direct_retry'
|
return 'assign_new_account_for_direct_retry'
|
||||||
if policy == 'stop_loop':
|
if policy in ['stop_loop', 'stop_loop_on_auth_proceed_on_download_error']:
|
||||||
return 'ban_and_report_immediately'
|
return 'ban_and_report_immediately'
|
||||||
|
if policy == 'proceed_loop_under_manual_inspection':
|
||||||
|
logger.warning(f"Bannable error with 'proceed_loop_under_manual_inspection' policy. Reporting failure and continuing loop. MANUAL INTERVENTION IS LIKELY REQUIRED.")
|
||||||
|
return 'report_bannable_and_continue'
|
||||||
|
|
||||||
# Any other error is considered fatal for this run.
|
# Any other error is considered fatal for this run.
|
||||||
logger.error(f"Unhandled or non-retryable error '{error_code}' from '{task_id_to_check}'. Marking as fatal.")
|
logger.error(f"Unhandled or non-retryable error '{error_code}' from '{task_id_to_check}'. Marking as fatal.")
|
||||||
@ -447,121 +713,43 @@ def ban_and_report_immediately(initial_data: dict, reason: str, **context):
|
|||||||
return initial_data # Pass data along if needed by reporting
|
return initial_data # Pass data along if needed by reporting
|
||||||
|
|
||||||
@task
|
@task
|
||||||
def download_and_probe(token_data: dict, **context):
|
def push_auth_success_to_redis(initial_data: dict, token_data: dict, **context):
|
||||||
"""
|
"""
|
||||||
Uses the retrieved token data to download and probe the media file.
|
On successful token acquisition, pushes the complete token data to the
|
||||||
This version uses subprocess directly with an argument list for better security and clarity.
|
Redis queue for the download worker and records the auth success.
|
||||||
"""
|
"""
|
||||||
import subprocess
|
|
||||||
import shlex
|
|
||||||
|
|
||||||
params = context['params']
|
|
||||||
info_json_path = token_data.get('info_json_path')
|
|
||||||
proxy = token_data.get('socks_proxy')
|
|
||||||
original_url = token_data.get('original_url')
|
|
||||||
download_dir = Variable.get('DOWNLOADS_TEMP', '/opt/airflow/downloadfiles/video')
|
|
||||||
|
|
||||||
download_format = params.get('download_format', 'ba[ext=m4a]/bestaudio/best')
|
|
||||||
output_template = params.get('output_path_template', "%(title)s [%(id)s].%(ext)s")
|
|
||||||
full_output_path = os.path.join(download_dir, output_template)
|
|
||||||
retry_on_probe_failure = params.get('retry_on_probe_failure', False)
|
|
||||||
|
|
||||||
if not (info_json_path and os.path.exists(info_json_path)):
|
|
||||||
raise AirflowException(f"Error: info.json path is missing or file does not exist ({info_json_path}).")
|
|
||||||
|
|
||||||
def run_yt_dlp():
|
|
||||||
"""Constructs and runs the yt-dlp command, returning the final filename."""
|
|
||||||
cmd = [
|
|
||||||
'yt-dlp',
|
|
||||||
'--verbose',
|
|
||||||
'--load-info-json', info_json_path,
|
|
||||||
'-f', download_format,
|
|
||||||
'-o', full_output_path,
|
|
||||||
'--print', 'filename',
|
|
||||||
'--continue',
|
|
||||||
'--no-progress',
|
|
||||||
'--no-simulate',
|
|
||||||
'--no-write-info-json',
|
|
||||||
'--ignore-errors',
|
|
||||||
'--no-playlist',
|
|
||||||
]
|
|
||||||
if proxy:
|
|
||||||
cmd.extend(['--proxy', proxy])
|
|
||||||
|
|
||||||
# Crucially, add the original URL to allow yt-dlp to refresh expired download links,
|
|
||||||
# which is the most common cause of HTTP 403 errors.
|
|
||||||
if original_url:
|
|
||||||
cmd.append(original_url)
|
|
||||||
|
|
||||||
copy_paste_cmd = ' '.join(shlex.quote(arg) for arg in cmd)
|
|
||||||
logger.info(f"Executing yt-dlp command: {copy_paste_cmd}")
|
|
||||||
|
|
||||||
process = subprocess.run(cmd, capture_output=True, text=True, timeout=3600)
|
|
||||||
|
|
||||||
if process.returncode != 0:
|
|
||||||
logger.error(f"yt-dlp failed with exit code {process.returncode}")
|
|
||||||
logger.error(f"STDOUT: {process.stdout}")
|
|
||||||
logger.error(f"STDERR: {process.stderr}")
|
|
||||||
raise AirflowException("yt-dlp command failed.")
|
|
||||||
|
|
||||||
# Get the last line of stdout, which should be the filename
|
|
||||||
final_filename = process.stdout.strip().split('\n')[-1]
|
|
||||||
if not (final_filename and os.path.exists(final_filename)):
|
|
||||||
logger.error(f"Download command finished but the output file does not exist: '{final_filename}'")
|
|
||||||
logger.error(f"Full STDOUT:\n{process.stdout}")
|
|
||||||
logger.error(f"Full STDERR:\n{process.stderr}")
|
|
||||||
raise AirflowException(f"Download failed or did not produce a file: {final_filename}")
|
|
||||||
|
|
||||||
logger.info(f"SUCCESS: Download complete. Final file at: {final_filename}")
|
|
||||||
return final_filename
|
|
||||||
|
|
||||||
def run_ffmpeg_probe(filename):
|
|
||||||
"""Probes the given file with ffmpeg to check for corruption."""
|
|
||||||
logger.info(f"Probing downloaded file: {filename}")
|
|
||||||
try:
|
|
||||||
subprocess.run(['ffmpeg', '-v', 'error', '-i', filename, '-f', 'null', '-'], check=True, capture_output=True, text=True)
|
|
||||||
logger.info("SUCCESS: Probe confirmed valid media file.")
|
|
||||||
except subprocess.CalledProcessError as e:
|
|
||||||
logger.error(f"ffmpeg probe check failed for '{filename}'. The file might be corrupt.")
|
|
||||||
logger.error(f"ffmpeg STDERR: {e.stderr}")
|
|
||||||
raise AirflowException("ffmpeg probe failed.")
|
|
||||||
|
|
||||||
# --- Main Execution Logic ---
|
|
||||||
final_filename = run_yt_dlp()
|
|
||||||
try:
|
|
||||||
run_ffmpeg_probe(final_filename)
|
|
||||||
return final_filename
|
|
||||||
except AirflowException as e:
|
|
||||||
if "probe failed" in str(e) and retry_on_probe_failure:
|
|
||||||
logger.warning("Probe failed. Attempting one re-download...")
|
|
||||||
try:
|
|
||||||
# Rename the failed file to allow for a fresh download attempt
|
|
||||||
part_file = f"{final_filename}.part"
|
|
||||||
os.rename(final_filename, part_file)
|
|
||||||
logger.info(f"Renamed corrupted file to {part_file}")
|
|
||||||
except OSError as rename_err:
|
|
||||||
logger.error(f"Could not rename corrupted file: {rename_err}")
|
|
||||||
|
|
||||||
final_filename_retry = run_yt_dlp()
|
|
||||||
run_ffmpeg_probe(final_filename_retry)
|
|
||||||
return final_filename_retry
|
|
||||||
else:
|
|
||||||
# Re-raise the original exception if no retry is attempted
|
|
||||||
raise
|
|
||||||
|
|
||||||
@task
|
|
||||||
def mark_url_as_success(initial_data: dict, downloaded_file_path: str, token_data: dict, **context):
|
|
||||||
"""Records the successful result in Redis."""
|
|
||||||
params = context['params']
|
params = context['params']
|
||||||
url = initial_data['url_to_process']
|
url = initial_data['url_to_process']
|
||||||
result_data = {
|
|
||||||
'status': 'success', 'end_time': time.time(), 'url': url,
|
# The download inbox queue is derived from the auth queue name.
|
||||||
'downloaded_file_path': downloaded_file_path, **token_data,
|
dl_inbox_queue = f"{params['queue_name'].replace('_auth', '_dl')}_inbox"
|
||||||
'dag_run_id': context['dag_run'].run_id,
|
auth_result_queue = f"{params['queue_name']}_result"
|
||||||
}
|
progress_queue = f"{params['queue_name']}_progress"
|
||||||
|
|
||||||
client = _get_redis_client(params['redis_conn_id'])
|
client = _get_redis_client(params['redis_conn_id'])
|
||||||
client.hset(f"{params['queue_name']}_result", url, json.dumps(result_data))
|
|
||||||
logger.info(f"Stored success result for URL '{url}'.")
|
payload = {
|
||||||
|
'timestamp': time.time(),
|
||||||
|
'dag_run_id': context['dag_run'].run_id,
|
||||||
|
**token_data
|
||||||
|
}
|
||||||
|
|
||||||
|
result_data = {
|
||||||
|
'status': 'success',
|
||||||
|
'end_time': time.time(),
|
||||||
|
'url': url,
|
||||||
|
'dag_run_id': context['dag_run'].run_id,
|
||||||
|
'token_data': token_data
|
||||||
|
}
|
||||||
|
|
||||||
|
with client.pipeline() as pipe:
|
||||||
|
pipe.lpush(dl_inbox_queue, json.dumps(payload))
|
||||||
|
pipe.hset(auth_result_queue, url, json.dumps(result_data))
|
||||||
|
pipe.hdel(progress_queue, url)
|
||||||
|
pipe.execute()
|
||||||
|
|
||||||
|
logger.info(f"Pushed successful auth data for URL '{url}' to '{dl_inbox_queue}'.")
|
||||||
|
logger.info(f"Stored success result for auth on URL '{url}' in '{auth_result_queue}'.")
|
||||||
|
|
||||||
@task(trigger_rule='one_failed')
|
@task(trigger_rule='one_failed')
|
||||||
def report_failure_and_continue(**context):
|
def report_failure_and_continue(**context):
|
||||||
@ -606,15 +794,26 @@ def report_failure_and_continue(**context):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
client = _get_redis_client(params['redis_conn_id'])
|
client = _get_redis_client(params['redis_conn_id'])
|
||||||
|
|
||||||
|
# Update client-specific stats
|
||||||
|
try:
|
||||||
|
machine_id = params.get('machine_id') or socket.gethostname()
|
||||||
|
_update_client_stats(client, params.get('clients', ''), 'failure', url, machine_id, context['dag_run'].run_id)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Could not update client stats on failure: {e}", exc_info=True)
|
||||||
|
|
||||||
result_queue = f"{params['queue_name']}_result"
|
result_queue = f"{params['queue_name']}_result"
|
||||||
fail_queue = f"{params['queue_name']}_fail"
|
fail_queue = f"{params['queue_name']}_fail"
|
||||||
|
|
||||||
|
progress_queue = f"{params['queue_name']}_progress"
|
||||||
|
|
||||||
with client.pipeline() as pipe:
|
with client.pipeline() as pipe:
|
||||||
pipe.hset(result_queue, url, json.dumps(result_data))
|
pipe.hset(result_queue, url, json.dumps(result_data))
|
||||||
pipe.hset(fail_queue, url, json.dumps(result_data))
|
pipe.hset(fail_queue, url, json.dumps(result_data))
|
||||||
|
pipe.hdel(progress_queue, url)
|
||||||
pipe.execute()
|
pipe.execute()
|
||||||
|
|
||||||
logger.info(f"Stored failure result for URL '{url}' in '{result_queue}' and '{fail_queue}'.")
|
logger.info(f"Stored failure result for URL '{url}' in '{result_queue}' and '{fail_queue}' and removed from progress queue.")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Could not report failure to Redis: {e}", exc_info=True)
|
logger.error(f"Could not report failure to Redis: {e}", exc_info=True)
|
||||||
|
|
||||||
@ -648,6 +847,15 @@ def handle_fatal_error(**context):
|
|||||||
|
|
||||||
# Report failure to Redis so the URL can be reprocessed later
|
# Report failure to Redis so the URL can be reprocessed later
|
||||||
try:
|
try:
|
||||||
|
client = _get_redis_client(params['redis_conn_id'])
|
||||||
|
|
||||||
|
# Update client-specific stats
|
||||||
|
try:
|
||||||
|
machine_id = params.get('machine_id') or socket.gethostname()
|
||||||
|
_update_client_stats(client, params.get('clients', ''), 'failure', url, machine_id, context['dag_run'].run_id)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Could not update client stats on fatal error: {e}", exc_info=True)
|
||||||
|
|
||||||
result_data = {
|
result_data = {
|
||||||
'status': 'failed',
|
'status': 'failed',
|
||||||
'end_time': time.time(),
|
'end_time': time.time(),
|
||||||
@ -657,13 +865,15 @@ def handle_fatal_error(**context):
|
|||||||
'error_message': 'Fatal non-retryable error occurred',
|
'error_message': 'Fatal non-retryable error occurred',
|
||||||
'error_details': error_details
|
'error_details': error_details
|
||||||
}
|
}
|
||||||
client = _get_redis_client(params['redis_conn_id'])
|
|
||||||
result_queue = f"{params['queue_name']}_result"
|
result_queue = f"{params['queue_name']}_result"
|
||||||
fail_queue = f"{params['queue_name']}_fail"
|
fail_queue = f"{params['queue_name']}_fail"
|
||||||
|
|
||||||
|
progress_queue = f"{params['queue_name']}_progress"
|
||||||
|
|
||||||
with client.pipeline() as pipe:
|
with client.pipeline() as pipe:
|
||||||
pipe.hset(result_queue, url, json.dumps(result_data))
|
pipe.hset(result_queue, url, json.dumps(result_data))
|
||||||
pipe.hset(fail_queue, url, json.dumps(result_data))
|
pipe.hset(fail_queue, url, json.dumps(result_data))
|
||||||
|
pipe.hdel(progress_queue, url)
|
||||||
pipe.execute()
|
pipe.execute()
|
||||||
|
|
||||||
logger.info(f"Stored fatal error result for URL '{url}' in '{result_queue}' and '{fail_queue}' for later reprocessing.")
|
logger.info(f"Stored fatal error result for URL '{url}' in '{result_queue}' and '{fail_queue}' for later reprocessing.")
|
||||||
@ -683,6 +893,12 @@ def continue_processing_loop(**context):
|
|||||||
params = context['params']
|
params = context['params']
|
||||||
dag_run = context['dag_run']
|
dag_run = context['dag_run']
|
||||||
|
|
||||||
|
# Do not continue the loop for manual runs of the worker DAG.
|
||||||
|
# A worker DAG triggered by the dispatcher will have a run_id starting with 'worker_run_'.
|
||||||
|
if not dag_run.run_id.startswith('worker_run_'):
|
||||||
|
logger.info(f"DAG run '{dag_run.run_id}' does not appear to be triggered by the dispatcher. Stopping processing loop.")
|
||||||
|
return
|
||||||
|
|
||||||
# Create a new unique run_id for the dispatcher.
|
# Create a new unique run_id for the dispatcher.
|
||||||
# Using a timestamp and UUID ensures the ID is unique and does not grow in length over time,
|
# Using a timestamp and UUID ensures the ID is unique and does not grow in length over time,
|
||||||
# preventing database errors.
|
# preventing database errors.
|
||||||
@ -697,7 +913,7 @@ def continue_processing_loop(**context):
|
|||||||
|
|
||||||
logger.info(f"Worker finished successfully. Triggering a new dispatcher ('{new_dispatcher_run_id}') to continue the loop.")
|
logger.info(f"Worker finished successfully. Triggering a new dispatcher ('{new_dispatcher_run_id}') to continue the loop.")
|
||||||
trigger_dag(
|
trigger_dag(
|
||||||
dag_id='ytdlp_ops_dispatcher',
|
dag_id='ytdlp_ops_v02_dispatcher_auth',
|
||||||
run_id=new_dispatcher_run_id,
|
run_id=new_dispatcher_run_id,
|
||||||
conf=conf_to_pass,
|
conf=conf_to_pass,
|
||||||
replace_microseconds=False
|
replace_microseconds=False
|
||||||
@ -711,6 +927,7 @@ def handle_retry_failure_branch(task_id_to_check: str, **context):
|
|||||||
On retry, most errors are considered fatal for the URL, but not for the system.
|
On retry, most errors are considered fatal for the URL, but not for the system.
|
||||||
"""
|
"""
|
||||||
ti = context['task_instance']
|
ti = context['task_instance']
|
||||||
|
params = context['params']
|
||||||
error_details = ti.xcom_pull(task_ids=task_id_to_check, key='error_details')
|
error_details = ti.xcom_pull(task_ids=task_id_to_check, key='error_details')
|
||||||
if not error_details:
|
if not error_details:
|
||||||
return 'handle_fatal_error'
|
return 'handle_fatal_error'
|
||||||
@ -720,8 +937,8 @@ def handle_retry_failure_branch(task_id_to_check: str, **context):
|
|||||||
|
|
||||||
# Check if this is an age confirmation error - should not stop the loop
|
# Check if this is an age confirmation error - should not stop the loop
|
||||||
if "Sign in to confirm your age" in error_message or "confirm your age" in error_message.lower():
|
if "Sign in to confirm your age" in error_message or "confirm your age" in error_message.lower():
|
||||||
logger.info(f"Age confirmation error detected on retry from '{task_id_to_check}'. Reporting failure and continuing loop.")
|
logger.info(f"Age confirmation error detected on retry from '{task_id_to_check}'. This is a content restriction, not a bot detection issue.")
|
||||||
return 'report_failure_and_continue'
|
return 'handle_age_restriction_error'
|
||||||
|
|
||||||
if error_code == 'TRANSPORT_ERROR':
|
if error_code == 'TRANSPORT_ERROR':
|
||||||
logger.error(f"Fatal Thrift connection error on retry from '{task_id_to_check}'.")
|
logger.error(f"Fatal Thrift connection error on retry from '{task_id_to_check}'.")
|
||||||
@ -729,6 +946,11 @@ def handle_retry_failure_branch(task_id_to_check: str, **context):
|
|||||||
|
|
||||||
is_bannable = error_code in ["BOT_DETECTED", "BOT_DETECTION_SIGN_IN_REQUIRED"]
|
is_bannable = error_code in ["BOT_DETECTED", "BOT_DETECTION_SIGN_IN_REQUIRED"]
|
||||||
if is_bannable:
|
if is_bannable:
|
||||||
|
policy = params.get('on_bannable_failure', 'retry_with_new_account')
|
||||||
|
if policy == 'proceed_loop_under_manual_inspection':
|
||||||
|
logger.warning(f"Bannable error '{error_code}' on retry with 'proceed_loop_under_manual_inspection' policy. Reporting failure and continuing loop. MANUAL INTERVENTION IS LIKELY REQUIRED.")
|
||||||
|
return 'report_bannable_and_continue'
|
||||||
|
|
||||||
logger.warning(f"Bannable error '{error_code}' on retry. Banning account and reporting failure.")
|
logger.warning(f"Bannable error '{error_code}' on retry. Banning account and reporting failure.")
|
||||||
return 'ban_and_report_after_retry'
|
return 'ban_and_report_after_retry'
|
||||||
|
|
||||||
@ -745,11 +967,6 @@ def ban_and_report_after_retry(retry_data: dict, reason: str, **context):
|
|||||||
return retry_data
|
return retry_data
|
||||||
|
|
||||||
|
|
||||||
@task.branch(trigger_rule='one_failed')
|
|
||||||
def handle_download_failure_branch(**context):
|
|
||||||
"""If download or probe fails, routes to the standard failure reporting."""
|
|
||||||
logger.warning("Download or probe failed. Reporting failure and continuing loop.")
|
|
||||||
return 'report_failure_and_continue'
|
|
||||||
|
|
||||||
|
|
||||||
@task(trigger_rule='one_success')
|
@task(trigger_rule='one_success')
|
||||||
@ -768,7 +985,69 @@ def coalesce_token_data(get_token_result=None, retry_get_token_result=None):
|
|||||||
raise AirflowException("Could not find a successful token result from any attempt.")
|
raise AirflowException("Could not find a successful token result from any attempt.")
|
||||||
|
|
||||||
|
|
||||||
@task(trigger_rule='one_failed')
|
@task
|
||||||
|
def report_bannable_and_continue(**context):
|
||||||
|
"""
|
||||||
|
Handles a bannable error by reporting it, but continues the loop
|
||||||
|
as per the 'proceed_loop_under_manual_inspection' policy.
|
||||||
|
"""
|
||||||
|
params = context['params']
|
||||||
|
ti = context['task_instance']
|
||||||
|
url = params.get('url_to_process', 'unknown')
|
||||||
|
|
||||||
|
# Collect error details
|
||||||
|
error_details = {}
|
||||||
|
first_token_task_id = 'get_token'
|
||||||
|
retry_token_task_id = 'retry_get_token'
|
||||||
|
|
||||||
|
first_token_error = ti.xcom_pull(task_ids=first_token_task_id, key='error_details')
|
||||||
|
retry_token_error = ti.xcom_pull(task_ids=retry_token_task_id, key='error_details')
|
||||||
|
|
||||||
|
# Use the most recent error details
|
||||||
|
if retry_token_error:
|
||||||
|
error_details = retry_token_error
|
||||||
|
elif first_token_error:
|
||||||
|
error_details = first_token_error
|
||||||
|
|
||||||
|
logger.error(f"Bannable error for URL '{url}'. Policy is to continue loop under manual supervision.")
|
||||||
|
|
||||||
|
# Report failure to Redis
|
||||||
|
try:
|
||||||
|
client = _get_redis_client(params['redis_conn_id'])
|
||||||
|
|
||||||
|
# Update client-specific stats
|
||||||
|
try:
|
||||||
|
machine_id = params.get('machine_id') or socket.gethostname()
|
||||||
|
_update_client_stats(client, params.get('clients', ''), 'failure', url, machine_id, context['dag_run'].run_id)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Could not update client stats on bannable error: {e}", exc_info=True)
|
||||||
|
|
||||||
|
result_data = {
|
||||||
|
'status': 'failed',
|
||||||
|
'end_time': time.time(),
|
||||||
|
'url': url,
|
||||||
|
'dag_run_id': context['dag_run'].run_id,
|
||||||
|
'error': 'bannable_error_manual_override',
|
||||||
|
'error_message': 'Bannable error occurred, but policy is set to continue loop under manual supervision.',
|
||||||
|
'error_details': error_details
|
||||||
|
}
|
||||||
|
result_queue = f"{params['queue_name']}_result"
|
||||||
|
fail_queue = f"{params['queue_name']}_fail"
|
||||||
|
|
||||||
|
progress_queue = f"{params['queue_name']}_progress"
|
||||||
|
|
||||||
|
with client.pipeline() as pipe:
|
||||||
|
pipe.hset(result_queue, url, json.dumps(result_data))
|
||||||
|
pipe.hset(fail_queue, url, json.dumps(result_data))
|
||||||
|
pipe.hdel(progress_queue, url)
|
||||||
|
pipe.execute()
|
||||||
|
|
||||||
|
logger.info(f"Stored bannable error for URL '{url}' in '{result_queue}' and '{fail_queue}'.")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Could not report bannable error to Redis: {e}", exc_info=True)
|
||||||
|
|
||||||
|
|
||||||
|
@task
|
||||||
def handle_age_restriction_error(**context):
|
def handle_age_restriction_error(**context):
|
||||||
"""
|
"""
|
||||||
Handles age restriction errors specifically. These are content restrictions
|
Handles age restriction errors specifically. These are content restrictions
|
||||||
@ -797,6 +1076,15 @@ def handle_age_restriction_error(**context):
|
|||||||
|
|
||||||
# Report failure to Redis so the URL can be marked as failed
|
# Report failure to Redis so the URL can be marked as failed
|
||||||
try:
|
try:
|
||||||
|
client = _get_redis_client(params['redis_conn_id'])
|
||||||
|
|
||||||
|
# Update client-specific stats
|
||||||
|
try:
|
||||||
|
machine_id = params.get('machine_id') or socket.gethostname()
|
||||||
|
_update_client_stats(client, params.get('clients', ''), 'failure', url, machine_id, context['dag_run'].run_id)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Could not update client stats on age restriction error: {e}", exc_info=True)
|
||||||
|
|
||||||
result_data = {
|
result_data = {
|
||||||
'status': 'failed',
|
'status': 'failed',
|
||||||
'end_time': time.time(),
|
'end_time': time.time(),
|
||||||
@ -806,13 +1094,15 @@ def handle_age_restriction_error(**context):
|
|||||||
'error_message': 'Content requires age confirmation',
|
'error_message': 'Content requires age confirmation',
|
||||||
'error_details': error_details
|
'error_details': error_details
|
||||||
}
|
}
|
||||||
client = _get_redis_client(params['redis_conn_id'])
|
|
||||||
result_queue = f"{params['queue_name']}_result"
|
result_queue = f"{params['queue_name']}_result"
|
||||||
fail_queue = f"{params['queue_name']}_fail"
|
fail_queue = f"{params['queue_name']}_fail"
|
||||||
|
|
||||||
|
progress_queue = f"{params['queue_name']}_progress"
|
||||||
|
|
||||||
with client.pipeline() as pipe:
|
with client.pipeline() as pipe:
|
||||||
pipe.hset(result_queue, url, json.dumps(result_data))
|
pipe.hset(result_queue, url, json.dumps(result_data))
|
||||||
pipe.hset(fail_queue, url, json.dumps(result_data))
|
pipe.hset(fail_queue, url, json.dumps(result_data))
|
||||||
|
pipe.hdel(progress_queue, url)
|
||||||
pipe.execute()
|
pipe.execute()
|
||||||
|
|
||||||
logger.info(f"Stored age restriction error for URL '{url}' in '{result_queue}' and '{fail_queue}'.")
|
logger.info(f"Stored age restriction error for URL '{url}' in '{result_queue}' and '{fail_queue}'.")
|
||||||
@ -826,7 +1116,7 @@ def handle_age_restriction_error(**context):
|
|||||||
# DAG Definition with TaskGroups
|
# DAG Definition with TaskGroups
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
with DAG(
|
with DAG(
|
||||||
dag_id='ytdlp_ops_worker_per_url',
|
dag_id='ytdlp_ops_v02_worker_per_url_auth',
|
||||||
default_args=DEFAULT_ARGS,
|
default_args=DEFAULT_ARGS,
|
||||||
schedule=None,
|
schedule=None,
|
||||||
start_date=days_ago(1),
|
start_date=days_ago(1),
|
||||||
@ -834,6 +1124,7 @@ with DAG(
|
|||||||
tags=['ytdlp', 'worker'],
|
tags=['ytdlp', 'worker'],
|
||||||
doc_md=__doc__,
|
doc_md=__doc__,
|
||||||
render_template_as_native_obj=True,
|
render_template_as_native_obj=True,
|
||||||
|
is_paused_upon_creation=True,
|
||||||
params={
|
params={
|
||||||
'queue_name': Param(DEFAULT_QUEUE_NAME, type="string"),
|
'queue_name': Param(DEFAULT_QUEUE_NAME, type="string"),
|
||||||
'redis_conn_id': Param(DEFAULT_REDIS_CONN_ID, type="string"),
|
'redis_conn_id': Param(DEFAULT_REDIS_CONN_ID, type="string"),
|
||||||
@ -841,17 +1132,18 @@ with DAG(
|
|||||||
'service_port': Param(DEFAULT_YT_AUTH_SERVICE_PORT, type="integer"),
|
'service_port': Param(DEFAULT_YT_AUTH_SERVICE_PORT, type="integer"),
|
||||||
'account_pool': Param('default_account', type="string"),
|
'account_pool': Param('default_account', type="string"),
|
||||||
'account_pool_size': Param(None, type=["integer", "null"]),
|
'account_pool_size': Param(None, type=["integer", "null"]),
|
||||||
|
'prepend_client_to_account': Param(True, type="boolean", title="[Worker Param] Prepend Client to Account", description="If True, prepends client and timestamp to account names in prefix mode."),
|
||||||
'machine_id': Param(None, type=["string", "null"]),
|
'machine_id': Param(None, type=["string", "null"]),
|
||||||
'clients': Param('web', type="string"),
|
'assigned_proxy_url': Param(None, type=["string", "null"], title="[Worker Param] Assigned Proxy URL", description="If provided, forces the token service to use this specific proxy for the request."),
|
||||||
|
'clients': Param('mweb', type="string", description="Comma-separated list of clients for token generation. e.g. mweb,tv,web_camoufox"),
|
||||||
'timeout': Param(DEFAULT_TIMEOUT, type="integer"),
|
'timeout': Param(DEFAULT_TIMEOUT, type="integer"),
|
||||||
'download_format': Param('ba[ext=m4a]/bestaudio/best', type="string"),
|
'on_bannable_failure': Param('stop_loop_on_auth_proceed_on_download_error', type="string", enum=['stop_loop', 'retry_with_new_account', 'retry_without_ban', 'retry_and_ban_account_only', 'retry_on_connection_error', 'proceed_loop_under_manual_inspection', 'stop_loop_on_auth_proceed_on_download_error']),
|
||||||
'output_path_template': Param("%(title)s [%(id)s].%(ext)s", type="string"),
|
'request_params_json': Param(json.dumps(DEFAULT_REQUEST_PARAMS), type="string", title="[Worker Param] Request Params JSON", description="JSON string with request parameters for the token service."),
|
||||||
'on_bannable_failure': Param('retry_with_new_account', type="string", enum=['stop_loop', 'retry_with_new_account', 'retry_without_ban', 'retry_and_ban_account_only', 'retry_on_connection_error']),
|
|
||||||
'retry_on_probe_failure': Param(False, type="boolean"),
|
|
||||||
'auto_create_new_accounts_on_exhaustion': Param(True, type="boolean"),
|
'auto_create_new_accounts_on_exhaustion': Param(True, type="boolean"),
|
||||||
# Internal params passed from dispatcher
|
# --- Manual Run / Internal Parameters ---
|
||||||
'url_to_process': Param(None, type=["string", "null"]),
|
'manual_url_to_process': Param('iPwdia3gAnk', type=["string", "null"], title="[Manual Run] URL to Process", description="For manual runs, provide a single YouTube URL to process. This is ignored if triggered by the dispatcher."),
|
||||||
'worker_queue': Param(None, type=["string", "null"]),
|
'url_to_process': Param(None, type=["string", "null"], title="[Internal] URL from Dispatcher", description="This parameter is set by the dispatcher DAG and should not be used for manual runs."),
|
||||||
|
'worker_queue': Param(None, type=["string", "null"], title="[Internal] Worker Queue", description="This parameter is set by the dispatcher DAG and should not be used for manual runs."),
|
||||||
}
|
}
|
||||||
) as dag:
|
) as dag:
|
||||||
initial_data = get_url_and_assign_account()
|
initial_data = get_url_and_assign_account()
|
||||||
@ -863,6 +1155,7 @@ with DAG(
|
|||||||
report_failure_task = report_failure_and_continue()
|
report_failure_task = report_failure_and_continue()
|
||||||
continue_loop_task = continue_processing_loop()
|
continue_loop_task = continue_processing_loop()
|
||||||
age_restriction_task = handle_age_restriction_error()
|
age_restriction_task = handle_age_restriction_error()
|
||||||
|
report_bannable_and_continue_task = report_bannable_and_continue()
|
||||||
|
|
||||||
# --- Task Group 1: Initial Attempt ---
|
# --- Task Group 1: Initial Attempt ---
|
||||||
with TaskGroup("initial_attempt", tooltip="Initial token acquisition attempt") as initial_attempt_group:
|
with TaskGroup("initial_attempt", tooltip="Initial token acquisition attempt") as initial_attempt_group:
|
||||||
@ -878,7 +1171,7 @@ with DAG(
|
|||||||
)
|
)
|
||||||
|
|
||||||
first_token_attempt >> initial_branch_task
|
first_token_attempt >> initial_branch_task
|
||||||
initial_branch_task >> [fatal_error_task, ban_and_report_immediately_task, age_restriction_task]
|
initial_branch_task >> [fatal_error_task, ban_and_report_immediately_task, age_restriction_task, report_bannable_and_continue_task]
|
||||||
|
|
||||||
# --- Task Group 2: Retry Logic ---
|
# --- Task Group 2: Retry Logic ---
|
||||||
with TaskGroup("retry_logic", tooltip="Retry logic with account management") as retry_logic_group:
|
with TaskGroup("retry_logic", tooltip="Retry logic with account management") as retry_logic_group:
|
||||||
@ -928,42 +1221,40 @@ with DAG(
|
|||||||
direct_retry_account_task >> coalesced_retry_data
|
direct_retry_account_task >> coalesced_retry_data
|
||||||
coalesced_retry_data >> retry_token_task
|
coalesced_retry_data >> retry_token_task
|
||||||
retry_token_task >> retry_branch_task
|
retry_token_task >> retry_branch_task
|
||||||
retry_branch_task >> [fatal_error_task, report_failure_task, ban_after_retry_report_task, age_restriction_task]
|
retry_branch_task >> [fatal_error_task, report_failure_task, ban_after_retry_report_task, age_restriction_task, report_bannable_and_continue_task]
|
||||||
ban_after_retry_report_task >> report_failure_task
|
ban_after_retry_report_task >> report_failure_task
|
||||||
|
|
||||||
# --- Task Group 3: Download and Processing ---
|
# --- Task Group 3: Success/Continuation Logic ---
|
||||||
with TaskGroup("download_processing", tooltip="Download and media processing") as download_processing_group:
|
with TaskGroup("success_and_continuation", tooltip="Push to DL queue and continue loop") as success_group:
|
||||||
# Coalesce, download, and success tasks
|
|
||||||
token_data = coalesce_token_data(
|
token_data = coalesce_token_data(
|
||||||
get_token_result=first_token_attempt,
|
get_token_result=first_token_attempt,
|
||||||
retry_get_token_result=retry_token_task
|
retry_get_token_result=retry_token_task
|
||||||
)
|
)
|
||||||
download_task = download_and_probe(token_data=token_data)
|
list_formats_task = list_available_formats(token_data=token_data)
|
||||||
download_branch_task = handle_download_failure_branch.override(trigger_rule='one_failed')()
|
success_task = push_auth_success_to_redis(
|
||||||
success_task = mark_url_as_success(
|
|
||||||
initial_data=initial_data,
|
initial_data=initial_data,
|
||||||
downloaded_file_path=download_task,
|
|
||||||
token_data=token_data
|
token_data=token_data
|
||||||
)
|
)
|
||||||
|
|
||||||
# Internal dependencies within download group
|
|
||||||
first_token_attempt >> token_data
|
first_token_attempt >> token_data
|
||||||
retry_token_task >> token_data
|
retry_token_task >> token_data
|
||||||
token_data >> download_task
|
token_data >> list_formats_task >> success_task
|
||||||
download_task >> download_branch_task
|
|
||||||
download_branch_task >> report_failure_task
|
|
||||||
download_task >> success_task
|
|
||||||
success_task >> continue_loop_task
|
success_task >> continue_loop_task
|
||||||
|
|
||||||
# --- DAG Dependencies between TaskGroups ---
|
# --- DAG Dependencies between TaskGroups ---
|
||||||
# Initial attempt can lead to retry logic or direct failure
|
# Initial attempt can lead to retry logic or direct failure
|
||||||
initial_branch_task >> [retry_logic_group, fatal_error_task, ban_and_report_immediately_task, age_restriction_task]
|
initial_branch_task >> [retry_logic_group, fatal_error_task, ban_and_report_immediately_task, age_restriction_task, report_bannable_and_continue_task]
|
||||||
|
|
||||||
# Retry logic leads to download processing on success or failure reporting on failure
|
# A successful initial attempt bypasses retry and goes straight to the success group
|
||||||
retry_branch_task >> [download_processing_group, report_failure_task]
|
initial_attempt_group >> success_group
|
||||||
|
|
||||||
|
# Retry logic leads to success/continuation on success or failure reporting on failure
|
||||||
|
retry_branch_task >> [report_failure_task] # Handled within the group
|
||||||
|
retry_logic_group >> success_group
|
||||||
|
|
||||||
# Ban and report immediately leads to failure reporting
|
# Ban and report immediately leads to failure reporting
|
||||||
ban_and_report_immediately_task >> report_failure_task
|
ban_and_report_immediately_task >> report_failure_task
|
||||||
|
|
||||||
# Age restriction error leads to failure reporting and continues the loop
|
# Age restriction error leads to failure reporting and continues the loop
|
||||||
age_restriction_task >> continue_loop_task
|
age_restriction_task >> continue_loop_task
|
||||||
|
report_bannable_and_continue_task >> continue_loop_task
|
||||||
895
airflow/dags/ytdlp_ops_v02_worker_per_url_dl.py
Normal file
895
airflow/dags/ytdlp_ops_v02_worker_per_url_dl.py
Normal file
@ -0,0 +1,895 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# vim:fenc=utf-8
|
||||||
|
#
|
||||||
|
# Copyright © 2024 rl <rl@rlmbp>
|
||||||
|
#
|
||||||
|
# Distributed under terms of the MIT license.
|
||||||
|
|
||||||
|
"""
|
||||||
|
DAG for downloading a single YouTube URL based on pre-fetched token data.
|
||||||
|
This is the "Download Worker" part of a separated Auth/Download pattern.
|
||||||
|
It receives a job payload with all necessary token info and handles only the
|
||||||
|
downloading and probing of media files.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from airflow.decorators import task, task_group
|
||||||
|
from airflow.exceptions import AirflowException, AirflowSkipException
|
||||||
|
from airflow.models import Variable
|
||||||
|
from airflow.models.dag import DAG
|
||||||
|
from airflow.models.param import Param
|
||||||
|
from airflow.models.xcom_arg import XComArg
|
||||||
|
from airflow.operators.dummy import DummyOperator
|
||||||
|
from airflow.utils.dates import days_ago
|
||||||
|
from airflow.utils.task_group import TaskGroup
|
||||||
|
from airflow.api.common.trigger_dag import trigger_dag
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
import concurrent.futures
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import random
|
||||||
|
import re
|
||||||
|
import redis
|
||||||
|
import socket
|
||||||
|
import time
|
||||||
|
import traceback
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
# Import utility functions and Thrift modules
|
||||||
|
from utils.redis_utils import _get_redis_client
|
||||||
|
from pangramia.yt.common.ttypes import TokenUpdateMode, AirflowLogContext
|
||||||
|
from pangramia.yt.exceptions.ttypes import PBServiceException, PBUserException
|
||||||
|
from pangramia.yt.tokens_ops import YTTokenOpService
|
||||||
|
from thrift.protocol import TBinaryProtocol
|
||||||
|
from thrift.transport import TSocket, TTransport
|
||||||
|
from thrift.transport.TTransport import TTransportException
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
# --- Client Stats Helper ---
|
||||||
|
|
||||||
|
def _update_client_stats(redis_client, clients_str: str, status: str, url: str, machine_id: str, dag_run_id: str):
|
||||||
|
"""Updates success/failure statistics for a client type in Redis."""
|
||||||
|
if not clients_str:
|
||||||
|
logger.warning("Cannot update client stats: 'clients' string is empty.")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Assumption: The service tries clients in the order provided.
|
||||||
|
# We attribute the result to the first client in the list.
|
||||||
|
primary_client = clients_str.split(',')[0].strip()
|
||||||
|
if not primary_client:
|
||||||
|
logger.warning("Cannot update client stats: could not determine primary client.")
|
||||||
|
return
|
||||||
|
|
||||||
|
stats_key = "client_stats"
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Using a pipeline with WATCH for safe concurrent updates.
|
||||||
|
with redis_client.pipeline() as pipe:
|
||||||
|
pipe.watch(stats_key)
|
||||||
|
|
||||||
|
current_stats_json = redis_client.hget(stats_key, primary_client)
|
||||||
|
stats = {}
|
||||||
|
if current_stats_json:
|
||||||
|
try:
|
||||||
|
stats = json.loads(current_stats_json)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
logger.warning(f"Could not parse existing stats for client '{primary_client}'. Resetting stats.")
|
||||||
|
stats = {}
|
||||||
|
|
||||||
|
stats.setdefault('success_count', 0)
|
||||||
|
stats.setdefault('failure_count', 0)
|
||||||
|
|
||||||
|
details = {
|
||||||
|
'timestamp': time.time(), 'url': url,
|
||||||
|
'machine_id': machine_id, 'dag_run_id': dag_run_id,
|
||||||
|
}
|
||||||
|
|
||||||
|
if status == 'success':
|
||||||
|
stats['success_count'] += 1
|
||||||
|
stats['latest_success'] = details
|
||||||
|
elif status == 'failure':
|
||||||
|
stats['failure_count'] += 1
|
||||||
|
stats['latest_failure'] = details
|
||||||
|
|
||||||
|
pipe.multi()
|
||||||
|
pipe.hset(stats_key, primary_client, json.dumps(stats))
|
||||||
|
pipe.execute()
|
||||||
|
|
||||||
|
logger.info(f"Successfully updated '{status}' stats for client '{primary_client}'.")
|
||||||
|
|
||||||
|
except redis.exceptions.WatchError:
|
||||||
|
logger.warning(f"WatchError updating stats for client '{primary_client}'. Another process updated it. Skipping this update.")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to update client stats for '{primary_client}': {e}", exc_info=True)
|
||||||
|
|
||||||
|
|
||||||
|
# Default settings from Airflow Variables or hardcoded fallbacks
|
||||||
|
DEFAULT_QUEUE_NAME = 'queue2_dl'
|
||||||
|
DEFAULT_REDIS_CONN_ID = 'redis_default'
|
||||||
|
DEFAULT_TIMEOUT = 3600
|
||||||
|
DEFAULT_YT_AUTH_SERVICE_IP = Variable.get("YT_AUTH_SERVICE_IP", default_var="172.17.0.1")
|
||||||
|
DEFAULT_YT_AUTH_SERVICE_PORT = Variable.get("YT_AUTH_SERVICE_PORT", default_var=9080)
|
||||||
|
|
||||||
|
# The queue is set to a fallback here. The actual worker-specific queue is
|
||||||
|
# assigned just-in-time by the task_instance_mutation_hook (see: airflow/config/custom_task_hooks.py),
|
||||||
|
# which parses the target queue from the DAG run_id.
|
||||||
|
DEFAULT_ARGS = {
|
||||||
|
'owner': 'airflow',
|
||||||
|
'retries': 0,
|
||||||
|
'queue': 'queue-dl', # Fallback queue. Will be overridden by the policy hook.
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# --- Helper Functions ---
|
||||||
|
|
||||||
|
def _extract_video_id(url):
|
||||||
|
"""Extracts YouTube video ID from URL."""
|
||||||
|
if not url or not isinstance(url, str):
|
||||||
|
return None
|
||||||
|
patterns = [r'v=([a-zA-Z0-9_-]{11})', r'youtu\.be/([a-zA-Z0-9_-]{11})']
|
||||||
|
for pattern in patterns:
|
||||||
|
match = re.search(pattern, url)
|
||||||
|
if match:
|
||||||
|
return match.group(1)
|
||||||
|
return None
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# TASK DEFINITIONS (TaskFlow API)
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
@task
|
||||||
|
def get_download_job_from_conf(**context):
|
||||||
|
"""
|
||||||
|
Gets the download job details (which includes token data) from the DAG run conf.
|
||||||
|
This is the first task in the download worker DAG.
|
||||||
|
"""
|
||||||
|
params = context['params']
|
||||||
|
ti = context['task_instance']
|
||||||
|
|
||||||
|
# --- Worker Pinning Verification ---
|
||||||
|
# This is a safeguard against a known Airflow issue where clearing a task
|
||||||
|
# can cause the task_instance_mutation_hook to be skipped, breaking pinning.
|
||||||
|
# See: https://github.com/apache/airflow/issues/20143
|
||||||
|
expected_queue = None
|
||||||
|
if ti.run_id and '_q_' in ti.run_id:
|
||||||
|
expected_queue = ti.run_id.split('_q_')[-1]
|
||||||
|
|
||||||
|
if not expected_queue:
|
||||||
|
# Fallback to conf if run_id parsing fails for some reason
|
||||||
|
expected_queue = params.get('worker_queue')
|
||||||
|
|
||||||
|
if expected_queue and ti.queue != expected_queue:
|
||||||
|
error_msg = (
|
||||||
|
f"WORKER PINNING FAILURE: Task is running on queue '{ti.queue}' but was expected on '{expected_queue}'. "
|
||||||
|
"This usually happens after manually clearing a task, which is not the recommended recovery method for this DAG. "
|
||||||
|
"To recover a failed URL, let the DAG run fail, use the 'ytdlp_mgmt_queues' DAG to requeue the URL, "
|
||||||
|
"and use the 'ytdlp_ops_orchestrator' to start a new worker loop if needed."
|
||||||
|
)
|
||||||
|
logger.error(error_msg)
|
||||||
|
raise AirflowException(error_msg)
|
||||||
|
elif expected_queue:
|
||||||
|
logger.info(f"Worker pinning verified. Task is correctly running on queue '{ti.queue}'.")
|
||||||
|
# --- End Verification ---
|
||||||
|
|
||||||
|
# The job data is passed by the dispatcher DAG via 'job_data'.
|
||||||
|
job_data = params.get('job_data')
|
||||||
|
if not job_data:
|
||||||
|
raise AirflowException("No job_data provided in DAG run configuration.")
|
||||||
|
|
||||||
|
# If job_data is a string, parse it as JSON
|
||||||
|
if isinstance(job_data, str):
|
||||||
|
try:
|
||||||
|
job_data = json.loads(job_data)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
raise AirflowException(f"Could not decode job_data JSON: {job_data}")
|
||||||
|
|
||||||
|
url_to_process = job_data.get('original_url')
|
||||||
|
if not url_to_process:
|
||||||
|
raise AirflowException("'original_url' not found in job_data.")
|
||||||
|
|
||||||
|
logger.info(f"Received job for URL '{url_to_process}'.")
|
||||||
|
|
||||||
|
# Mark the URL as in-progress in Redis
|
||||||
|
try:
|
||||||
|
redis_conn_id = params.get('redis_conn_id', DEFAULT_REDIS_CONN_ID)
|
||||||
|
queue_name = params.get('queue_name', DEFAULT_QUEUE_NAME)
|
||||||
|
progress_queue = f"{queue_name}_progress"
|
||||||
|
client = _get_redis_client(redis_conn_id)
|
||||||
|
|
||||||
|
progress_data = {
|
||||||
|
'status': 'in_progress',
|
||||||
|
'start_time': time.time(),
|
||||||
|
'dag_run_id': context['dag_run'].run_id,
|
||||||
|
'hostname': socket.gethostname(),
|
||||||
|
}
|
||||||
|
client.hset(progress_queue, url_to_process, json.dumps(progress_data))
|
||||||
|
logger.info(f"Marked URL '{url_to_process}' as in-progress.")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Could not mark URL as in-progress in Redis: {e}", exc_info=True)
|
||||||
|
|
||||||
|
return job_data
|
||||||
|
|
||||||
|
@task
|
||||||
|
def list_available_formats(token_data: dict, **context):
|
||||||
|
"""
|
||||||
|
Lists available formats for the given video using the info.json.
|
||||||
|
This is for debugging and informational purposes.
|
||||||
|
"""
|
||||||
|
import subprocess
|
||||||
|
import shlex
|
||||||
|
|
||||||
|
info_json_path = token_data.get('info_json_path')
|
||||||
|
if not (info_json_path and os.path.exists(info_json_path)):
|
||||||
|
logger.warning(f"Cannot list formats: info.json path is missing or file does not exist ({info_json_path}).")
|
||||||
|
return []
|
||||||
|
|
||||||
|
try:
|
||||||
|
cmd = [
|
||||||
|
'yt-dlp',
|
||||||
|
'--verbose',
|
||||||
|
'--list-formats',
|
||||||
|
'--load-info-json', info_json_path,
|
||||||
|
]
|
||||||
|
|
||||||
|
copy_paste_cmd = ' '.join(shlex.quote(arg) for arg in cmd)
|
||||||
|
logger.info(f"Executing yt-dlp command to list formats: {copy_paste_cmd}")
|
||||||
|
|
||||||
|
process = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
|
||||||
|
|
||||||
|
if process.stderr:
|
||||||
|
logger.info(f"yt-dlp --list-formats STDERR:\n{process.stderr}")
|
||||||
|
|
||||||
|
if process.returncode != 0:
|
||||||
|
logger.error(f"yt-dlp --list-formats failed with exit code {process.returncode}")
|
||||||
|
|
||||||
|
available_formats = []
|
||||||
|
if process.stdout:
|
||||||
|
logger.info(f"--- Available Formats ---\n{process.stdout}\n--- End of Formats ---")
|
||||||
|
# Parse the output to get format IDs
|
||||||
|
lines = process.stdout.split('\n')
|
||||||
|
header_found = False
|
||||||
|
for line in lines:
|
||||||
|
if line.startswith('ID '):
|
||||||
|
header_found = True
|
||||||
|
continue
|
||||||
|
if header_found and line.strip() and line.strip()[0].isdigit():
|
||||||
|
format_id = line.split()[0]
|
||||||
|
available_formats.append(format_id)
|
||||||
|
logger.info(f"Parsed available format IDs: {available_formats}")
|
||||||
|
|
||||||
|
return available_formats
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"An error occurred while trying to list formats: {e}", exc_info=True)
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
@task
|
||||||
|
def download_and_probe(token_data: dict, available_formats: list[str], **context):
|
||||||
|
"""
|
||||||
|
Uses retrieved token data to download and probe media files.
|
||||||
|
Supports parallel downloading of specific, comma-separated format IDs.
|
||||||
|
If probing fails, retries downloading only the failed files.
|
||||||
|
"""
|
||||||
|
import subprocess
|
||||||
|
import shlex
|
||||||
|
import concurrent.futures
|
||||||
|
|
||||||
|
params = context['params']
|
||||||
|
info_json_path = token_data.get('info_json_path')
|
||||||
|
proxy = token_data.get('socks_proxy')
|
||||||
|
original_url = token_data.get('original_url')
|
||||||
|
|
||||||
|
if not (info_json_path and os.path.exists(info_json_path)):
|
||||||
|
raise AirflowException(f"Error: info.json path is missing or file does not exist ({info_json_path}).")
|
||||||
|
|
||||||
|
download_dir = os.path.dirname(info_json_path)
|
||||||
|
|
||||||
|
format_preset = params.get('download_format_preset', 'best_audio')
|
||||||
|
if format_preset == 'custom':
|
||||||
|
download_format = params.get('download_format_custom')
|
||||||
|
if not download_format:
|
||||||
|
raise AirflowException("Format preset is 'custom' but no custom format string was provided.")
|
||||||
|
elif format_preset == 'best_audio':
|
||||||
|
download_format = 'ba[ext=m4a]/bestaudio/best'
|
||||||
|
elif format_preset == 'formats_0':
|
||||||
|
download_format = '18,140'
|
||||||
|
elif format_preset == 'formats_2':
|
||||||
|
download_format = '18,140,299/298/137/136/135/134/133'
|
||||||
|
elif format_preset == 'formats_3':
|
||||||
|
download_format = '18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318'
|
||||||
|
else:
|
||||||
|
download_format = 'ba[ext=m4a]/bestaudio/best'
|
||||||
|
|
||||||
|
output_template = params.get('output_path_template', "%(title)s [%(id)s].f%(format_id)s.%(ext)s")
|
||||||
|
full_output_path = os.path.join(download_dir, output_template)
|
||||||
|
retry_on_probe_failure = params.get('retry_on_probe_failure', False)
|
||||||
|
|
||||||
|
def run_yt_dlp_command(format_selector: str):
|
||||||
|
"""Constructs and runs a yt-dlp command, returning a list of final filenames."""
|
||||||
|
cmd = [
|
||||||
|
'yt-dlp', '--verbose', '--print-traffic', '--load-info-json', info_json_path,
|
||||||
|
'-f', format_selector, '-o', full_output_path,
|
||||||
|
'--print', 'filename', '--continue', '--no-progress', '--no-simulate',
|
||||||
|
'--no-write-info-json', '--ignore-errors', '--no-playlist',
|
||||||
|
]
|
||||||
|
|
||||||
|
if params.get('fragment_retries'):
|
||||||
|
cmd.extend(['--fragment-retries', str(params['fragment_retries'])])
|
||||||
|
if params.get('limit_rate'):
|
||||||
|
cmd.extend(['--limit-rate', params['limit_rate']])
|
||||||
|
if params.get('socket_timeout'):
|
||||||
|
cmd.extend(['--socket-timeout', str(params['socket_timeout'])])
|
||||||
|
if params.get('min_sleep_interval'):
|
||||||
|
cmd.extend(['--min-sleep-interval', str(params['min_sleep_interval'])])
|
||||||
|
if params.get('max_sleep_interval'):
|
||||||
|
cmd.extend(['--max-sleep-interval', str(params['max_sleep_interval'])])
|
||||||
|
if params.get('yt_dlp_test_mode'):
|
||||||
|
cmd.append('--test')
|
||||||
|
|
||||||
|
downloader = params.get('downloader', 'default')
|
||||||
|
if proxy and not (downloader == 'aria2c' and proxy.startswith('socks5://')):
|
||||||
|
cmd.extend(['--proxy', proxy])
|
||||||
|
|
||||||
|
gost_process = None
|
||||||
|
try:
|
||||||
|
if downloader == 'aria2c':
|
||||||
|
cmd.extend(['--downloader', 'aria2c'])
|
||||||
|
downloader_args = params.get('downloader_args_aria2c')
|
||||||
|
if proxy and proxy.startswith('socks5://'):
|
||||||
|
import socket
|
||||||
|
from contextlib import closing
|
||||||
|
def find_free_port():
|
||||||
|
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
|
||||||
|
s.bind(('', 0))
|
||||||
|
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
||||||
|
return s.getsockname()[1]
|
||||||
|
local_port = find_free_port()
|
||||||
|
http_proxy = f"http://127.0.0.1:{local_port}"
|
||||||
|
logger.info(f"Starting gost for format '{format_selector}' to forward {proxy} to {http_proxy}")
|
||||||
|
gost_cmd = ['gost', '-L', f'http://127.0.0.1:{local_port}', '-F', proxy]
|
||||||
|
gost_process = subprocess.Popen(gost_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||||
|
time.sleep(1)
|
||||||
|
if gost_process.poll() is not None:
|
||||||
|
stdout, stderr = gost_process.communicate()
|
||||||
|
logger.error(f"gost failed to start. Exit: {gost_process.returncode}. Stdout: {stdout.decode()}. Stderr: {stderr.decode()}")
|
||||||
|
raise AirflowException("gost proxy tunnel failed to start.")
|
||||||
|
user_args = downloader_args[len('aria2c:'):] if downloader_args and downloader_args.startswith('aria2c:') else (downloader_args or "")
|
||||||
|
final_args_str = f'aria2c:{user_args.strip()} --http-proxy={http_proxy}'
|
||||||
|
cmd.extend(['--downloader-args', final_args_str])
|
||||||
|
elif downloader_args:
|
||||||
|
cmd.extend(['--downloader-args', downloader_args])
|
||||||
|
|
||||||
|
extra_args = params.get('yt_dlp_extra_args')
|
||||||
|
if extra_args:
|
||||||
|
cmd.extend(shlex.split(extra_args))
|
||||||
|
if original_url:
|
||||||
|
cmd.append(original_url)
|
||||||
|
|
||||||
|
copy_paste_cmd = ' '.join(shlex.quote(arg) for arg in cmd)
|
||||||
|
logger.info(f"Executing yt-dlp command for format '{format_selector}': {copy_paste_cmd}")
|
||||||
|
process = subprocess.run(cmd, capture_output=True, text=True, timeout=3600)
|
||||||
|
|
||||||
|
if process.stdout:
|
||||||
|
logger.info(f"yt-dlp STDOUT for format '{format_selector}':\n{process.stdout}")
|
||||||
|
if process.stderr:
|
||||||
|
# yt-dlp often prints progress and informational messages to stderr
|
||||||
|
logger.info(f"yt-dlp STDERR for format '{format_selector}':\n{process.stderr}")
|
||||||
|
|
||||||
|
if process.returncode != 0:
|
||||||
|
logger.error(f"yt-dlp failed for format '{format_selector}' with exit code {process.returncode}")
|
||||||
|
# STDOUT and STDERR are already logged above.
|
||||||
|
raise AirflowException(f"yt-dlp command failed for format '{format_selector}'.")
|
||||||
|
|
||||||
|
# In test mode, files are not created, so we only check that yt-dlp returned filenames.
|
||||||
|
# Otherwise, we verify that the files actually exist on disk.
|
||||||
|
output_files = [f for f in process.stdout.strip().split('\n') if f]
|
||||||
|
if not params.get('yt_dlp_test_mode'):
|
||||||
|
output_files = [f for f in output_files if os.path.exists(f)]
|
||||||
|
|
||||||
|
if not output_files:
|
||||||
|
log_msg = (f"Test run for format '{format_selector}' did not produce any filenames."
|
||||||
|
if params.get('yt_dlp_test_mode') else
|
||||||
|
f"Download for format '{format_selector}' finished but no output files exist.")
|
||||||
|
exc_msg = (f"Test run for format '{format_selector}' did not produce any filenames."
|
||||||
|
if params.get('yt_dlp_test_mode') else
|
||||||
|
f"Download for format '{format_selector}' did not produce a file.")
|
||||||
|
|
||||||
|
logger.error(log_msg)
|
||||||
|
logger.error(f"Full STDOUT:\n{process.stdout}")
|
||||||
|
logger.error(f"Full STDERR:\n{process.stderr}")
|
||||||
|
raise AirflowException(exc_msg)
|
||||||
|
|
||||||
|
log_prefix = "SUCCESS (Test Mode):" if params.get('yt_dlp_test_mode') else "SUCCESS:"
|
||||||
|
logger.info(f"{log_prefix} Command for format '{format_selector}' complete. Files: {output_files}")
|
||||||
|
return output_files
|
||||||
|
finally:
|
||||||
|
if gost_process:
|
||||||
|
logger.info(f"Terminating gost process (PID: {gost_process.pid}) for format '{format_selector}'.")
|
||||||
|
gost_process.terminate()
|
||||||
|
try:
|
||||||
|
gost_process.wait(timeout=5)
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
gost_process.kill()
|
||||||
|
gost_process.wait()
|
||||||
|
|
||||||
|
def run_ffmpeg_probe(filename):
|
||||||
|
"""Probes a file with ffmpeg to check for corruption."""
|
||||||
|
logger.info(f"Probing downloaded file: {filename}")
|
||||||
|
try:
|
||||||
|
subprocess.run(['ffmpeg', '-v', 'error', '-i', filename, '-f', 'null', '-'], check=True, capture_output=True, text=True)
|
||||||
|
logger.info(f"SUCCESS: Probe confirmed valid media file: {filename}")
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
logger.error(f"ffmpeg probe failed for '{filename}'. File may be corrupt.")
|
||||||
|
logger.error(f"ffmpeg STDERR: {e.stderr}")
|
||||||
|
raise AirflowException(f"ffmpeg probe failed for {filename}.")
|
||||||
|
|
||||||
|
def _download_and_probe_formats(formats_to_process: list[str] | str):
|
||||||
|
"""
|
||||||
|
Helper to download a list of format IDs (or a single complex selector) and probe the results.
|
||||||
|
Returns a tuple of (successful_files, failed_probe_files).
|
||||||
|
"""
|
||||||
|
all_downloaded_files = []
|
||||||
|
delay_between_formats = params.get('delay_between_formats_s', 0)
|
||||||
|
|
||||||
|
if isinstance(formats_to_process, list) and formats_to_process:
|
||||||
|
logger.info(f"Downloading {len(formats_to_process)} format(s) sequentially: {formats_to_process}")
|
||||||
|
for i, fid in enumerate(formats_to_process):
|
||||||
|
all_downloaded_files.extend(run_yt_dlp_command(fid))
|
||||||
|
if delay_between_formats > 0 and i < len(formats_to_process) - 1:
|
||||||
|
logger.info(f"Waiting {delay_between_formats}s before next format download...")
|
||||||
|
time.sleep(delay_between_formats)
|
||||||
|
|
||||||
|
elif isinstance(formats_to_process, str):
|
||||||
|
logger.info(f"Using complex format selector '{formats_to_process}'. Running as a single command.")
|
||||||
|
all_downloaded_files = run_yt_dlp_command(formats_to_process)
|
||||||
|
|
||||||
|
if not all_downloaded_files:
|
||||||
|
logger.warning("Download process completed but produced no files.")
|
||||||
|
return [], []
|
||||||
|
|
||||||
|
if params.get('yt_dlp_test_mode'):
|
||||||
|
logger.info("Test mode is enabled. Skipping probe of output files.")
|
||||||
|
return all_downloaded_files, []
|
||||||
|
|
||||||
|
if params.get('skip_probe'):
|
||||||
|
logger.info("Skipping probe of output files as per configuration.")
|
||||||
|
return all_downloaded_files, []
|
||||||
|
|
||||||
|
successful_probes, failed_probes = [], []
|
||||||
|
logger.info(f"Probing {len(all_downloaded_files)} downloaded file(s) sequentially...")
|
||||||
|
for filename in all_downloaded_files:
|
||||||
|
try:
|
||||||
|
run_ffmpeg_probe(filename)
|
||||||
|
successful_probes.append(filename)
|
||||||
|
except Exception:
|
||||||
|
failed_probes.append(filename)
|
||||||
|
|
||||||
|
return successful_probes, failed_probes
|
||||||
|
|
||||||
|
# --- Main Execution Logic ---
|
||||||
|
with open(info_json_path, 'r', encoding='utf-8') as f:
|
||||||
|
info = json.load(f)
|
||||||
|
|
||||||
|
# Split the format string by commas to get a list of individual format selectors.
|
||||||
|
# This enables parallel downloads of different formats or format groups.
|
||||||
|
# For example, '18,140,299/298' becomes ['18', '140', '299/298'],
|
||||||
|
# and each item will be downloaded in a separate yt-dlp process.
|
||||||
|
if download_format and isinstance(download_format, str):
|
||||||
|
formats_to_download_initial = [selector.strip() for selector in download_format.split(',') if selector.strip()]
|
||||||
|
else:
|
||||||
|
# Fallback for safety, though download_format should always be a string.
|
||||||
|
formats_to_download_initial = []
|
||||||
|
|
||||||
|
if not formats_to_download_initial:
|
||||||
|
raise AirflowException("No valid download format selectors were found after parsing.")
|
||||||
|
|
||||||
|
# --- Filter requested formats against available formats ---
|
||||||
|
final_formats_to_download = []
|
||||||
|
if not available_formats:
|
||||||
|
logger.warning("List of available formats is empty. Will attempt to download all requested formats without validation.")
|
||||||
|
final_formats_to_download = formats_to_download_initial
|
||||||
|
else:
|
||||||
|
for selector in formats_to_download_initial:
|
||||||
|
# A selector can be '140' or '299/298/137'
|
||||||
|
individual_ids = re.split(r'[/+]', selector)
|
||||||
|
if any(fid in available_formats for fid in individual_ids):
|
||||||
|
final_formats_to_download.append(selector)
|
||||||
|
else:
|
||||||
|
logger.warning(f"Requested format selector '{selector}' contains no available formats. Skipping.")
|
||||||
|
|
||||||
|
if not final_formats_to_download:
|
||||||
|
raise AirflowException("None of the requested formats are available for this video.")
|
||||||
|
|
||||||
|
# --- Initial Download and Probe ---
|
||||||
|
successful_files, failed_files = _download_and_probe_formats(final_formats_to_download)
|
||||||
|
|
||||||
|
if params.get('yt_dlp_test_mode'):
|
||||||
|
logger.info(f"Test mode: yt-dlp returned {len(successful_files)} filenames. Skipping probe failure checks.")
|
||||||
|
if not successful_files:
|
||||||
|
raise AirflowException("Test run did not produce any filenames.")
|
||||||
|
return successful_files
|
||||||
|
|
||||||
|
if not failed_files:
|
||||||
|
if not successful_files:
|
||||||
|
raise AirflowException("Download and probe process completed but produced no valid files.")
|
||||||
|
return successful_files
|
||||||
|
|
||||||
|
# --- Handle Probe Failures and Retry ---
|
||||||
|
if not retry_on_probe_failure:
|
||||||
|
raise AirflowException(f"Probe failed for {len(failed_files)} file(s) and retry is disabled: {failed_files}")
|
||||||
|
|
||||||
|
logger.warning(f"Probe failed for {len(failed_files)} file(s). Attempting one re-download for failed files...")
|
||||||
|
|
||||||
|
format_ids_to_retry = []
|
||||||
|
# Since each download is now for a specific selector and the output template
|
||||||
|
# includes the format_id, we can always attempt to extract the format_id
|
||||||
|
# from the failed filename for a targeted retry.
|
||||||
|
for f in failed_files:
|
||||||
|
match = re.search(r'\.f([\d]+)\.', f)
|
||||||
|
if match:
|
||||||
|
format_ids_to_retry.append(match.group(1))
|
||||||
|
else:
|
||||||
|
logger.error(f"Could not extract format_id from failed file '{f}'. Cannot retry this specific file.")
|
||||||
|
formats_to_download_retry = format_ids_to_retry
|
||||||
|
|
||||||
|
if not formats_to_download_retry:
|
||||||
|
raise AirflowException("Probe failed, but could not determine which formats to retry.")
|
||||||
|
|
||||||
|
# Rename failed files to allow for a fresh download attempt
|
||||||
|
for f in failed_files:
|
||||||
|
try:
|
||||||
|
failed_path = f"{f}.probe_failed_{int(time.time())}"
|
||||||
|
os.rename(f, failed_path)
|
||||||
|
logger.info(f"Renamed corrupted file to {failed_path}")
|
||||||
|
except OSError as rename_err:
|
||||||
|
logger.error(f"Could not rename corrupted file '{f}': {rename_err}")
|
||||||
|
|
||||||
|
# --- Retry Download and Probe ---
|
||||||
|
retried_successful_files, retried_failed_files = _download_and_probe_formats(formats_to_download_retry)
|
||||||
|
|
||||||
|
if retried_failed_files:
|
||||||
|
logger.error(f"Probe failed again for {len(retried_failed_files)} file(s) after retry: {retried_failed_files}")
|
||||||
|
|
||||||
|
final_success_list = successful_files + retried_successful_files
|
||||||
|
if not final_success_list:
|
||||||
|
raise AirflowException("All files failed to download or probe correctly, even after retry.")
|
||||||
|
|
||||||
|
logger.info(f"Retry complete. Final success count: {len(final_success_list)} file(s).")
|
||||||
|
|
||||||
|
if params.get('yt_dlp_cleanup_mode', True):
|
||||||
|
logger.info(f"Cleanup mode is enabled. Creating .empty files and deleting originals for {len(final_success_list)} files.")
|
||||||
|
for f in final_success_list:
|
||||||
|
try:
|
||||||
|
empty_file_path = f"{f}.empty"
|
||||||
|
with open(empty_file_path, 'w') as fp:
|
||||||
|
pass # create empty file
|
||||||
|
logger.info(f"Created empty file: {empty_file_path}")
|
||||||
|
os.remove(f)
|
||||||
|
logger.info(f"Deleted original file: {f}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error during cleanup for file {f}: {e}", exc_info=True)
|
||||||
|
# Do not fail the task for a cleanup error, just log it.
|
||||||
|
|
||||||
|
return final_success_list
|
||||||
|
|
||||||
|
@task
|
||||||
|
def mark_url_as_success(job_data: dict, downloaded_file_paths: list, **context):
|
||||||
|
"""Records the successful download result in Redis."""
|
||||||
|
params = context['params']
|
||||||
|
url = job_data['original_url']
|
||||||
|
result_data = {
|
||||||
|
'status': 'success', 'end_time': time.time(), 'url': url,
|
||||||
|
'downloaded_file_paths': downloaded_file_paths, **job_data,
|
||||||
|
'dag_run_id': context['dag_run'].run_id,
|
||||||
|
}
|
||||||
|
client = _get_redis_client(params['redis_conn_id'])
|
||||||
|
|
||||||
|
# Update activity counters
|
||||||
|
try:
|
||||||
|
proxy_url = job_data.get('socks_proxy')
|
||||||
|
account_id = job_data.get('successful_account_id')
|
||||||
|
now = time.time()
|
||||||
|
# Use a unique member to prevent collisions, e.g., dag_run_id
|
||||||
|
member = context['dag_run'].run_id
|
||||||
|
|
||||||
|
if proxy_url:
|
||||||
|
proxy_key = f"activity:per_proxy:{proxy_url}"
|
||||||
|
client.zadd(proxy_key, {member: now})
|
||||||
|
client.expire(proxy_key, 3600 * 2) # Expire after 2 hours
|
||||||
|
if account_id:
|
||||||
|
account_key = f"activity:per_account:{account_id}"
|
||||||
|
client.zadd(account_key, {member: now})
|
||||||
|
client.expire(account_key, 3600 * 2) # Expire after 2 hours
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Could not update activity counters: {e}", exc_info=True)
|
||||||
|
|
||||||
|
# Update client-specific stats
|
||||||
|
try:
|
||||||
|
machine_id = params.get('machine_id') or socket.gethostname()
|
||||||
|
clients_str = job_data.get('clients', params.get('clients', '')) # Prefer clients from job, fallback to params
|
||||||
|
_update_client_stats(client, clients_str, 'success', url, machine_id, context['dag_run'].run_id)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Could not update client stats on success: {e}", exc_info=True)
|
||||||
|
|
||||||
|
progress_queue = f"{params['queue_name']}_progress"
|
||||||
|
result_queue = f"{params['queue_name']}_result"
|
||||||
|
|
||||||
|
with client.pipeline() as pipe:
|
||||||
|
pipe.hset(result_queue, url, json.dumps(result_data))
|
||||||
|
pipe.hdel(progress_queue, url)
|
||||||
|
pipe.execute()
|
||||||
|
|
||||||
|
logger.info(f"Stored success result for URL '{url}' and removed from progress queue.")
|
||||||
|
|
||||||
|
@task(trigger_rule='one_failed')
|
||||||
|
def report_failure_and_continue(**context):
|
||||||
|
"""
|
||||||
|
Handles a failed download attempt by recording an error report to Redis.
|
||||||
|
"""
|
||||||
|
params = context['params']
|
||||||
|
ti = context['task_instance']
|
||||||
|
|
||||||
|
job_data = params.get('job_data', {})
|
||||||
|
if isinstance(job_data, str):
|
||||||
|
try:
|
||||||
|
job_data = json.loads(job_data)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
job_data = {}
|
||||||
|
url = job_data.get('original_url', 'unknown')
|
||||||
|
|
||||||
|
# No token errors to collect, just report a generic download failure.
|
||||||
|
error_details = {'error_message': 'Download or probe stage failed.'}
|
||||||
|
|
||||||
|
logger.error(f"A failure occurred while processing URL '{url}'. Reporting to Redis.")
|
||||||
|
|
||||||
|
result_data = {
|
||||||
|
'status': 'failed',
|
||||||
|
'end_time': time.time(),
|
||||||
|
'url': url,
|
||||||
|
'dag_run_id': context['dag_run'].run_id,
|
||||||
|
'error_details': error_details
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
client = _get_redis_client(params['redis_conn_id'])
|
||||||
|
|
||||||
|
# Update client-specific stats
|
||||||
|
try:
|
||||||
|
machine_id = params.get('machine_id') or socket.gethostname()
|
||||||
|
clients_str = job_data.get('clients', params.get('clients', '')) # Prefer clients from job, fallback to params
|
||||||
|
_update_client_stats(client, clients_str, 'failure', url, machine_id, context['dag_run'].run_id)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Could not update client stats on failure: {e}", exc_info=True)
|
||||||
|
|
||||||
|
result_queue = f"{params['queue_name']}_result"
|
||||||
|
fail_queue = f"{params['queue_name']}_fail"
|
||||||
|
|
||||||
|
progress_queue = f"{params['queue_name']}_progress"
|
||||||
|
|
||||||
|
with client.pipeline() as pipe:
|
||||||
|
pipe.hset(result_queue, url, json.dumps(result_data))
|
||||||
|
pipe.hset(fail_queue, url, json.dumps(result_data))
|
||||||
|
pipe.hdel(progress_queue, url)
|
||||||
|
pipe.execute()
|
||||||
|
|
||||||
|
logger.info(f"Stored failure result for URL '{url}' in '{result_queue}' and '{fail_queue}' and removed from progress queue.")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Could not report failure to Redis: {e}", exc_info=True)
|
||||||
|
|
||||||
|
|
||||||
|
@task(trigger_rule='one_failed')
|
||||||
|
def handle_fatal_error(**context):
|
||||||
|
"""
|
||||||
|
Handles fatal, non-retryable errors (e.g., infrastructure issues).
|
||||||
|
This task reports the failure to Redis to ensure failed URLs are queued
|
||||||
|
for later reprocessing, but allows the processing loop to continue.
|
||||||
|
"""
|
||||||
|
params = context['params']
|
||||||
|
ti = context['task_instance']
|
||||||
|
|
||||||
|
job_data = params.get('job_data', {})
|
||||||
|
if isinstance(job_data, str):
|
||||||
|
try:
|
||||||
|
job_data = json.loads(job_data)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
job_data = {}
|
||||||
|
url = job_data.get('original_url', 'unknown')
|
||||||
|
|
||||||
|
error_details = {'error_message': 'Fatal error during download stage.'}
|
||||||
|
|
||||||
|
logger.error(f"A fatal, non-retryable error occurred for URL '{url}'. See previous task logs for details.")
|
||||||
|
|
||||||
|
# Report failure to Redis so the URL can be reprocessed later
|
||||||
|
try:
|
||||||
|
client = _get_redis_client(params['redis_conn_id'])
|
||||||
|
|
||||||
|
# Update client-specific stats
|
||||||
|
try:
|
||||||
|
machine_id = params.get('machine_id') or socket.gethostname()
|
||||||
|
clients_str = job_data.get('clients', params.get('clients', '')) # Prefer clients from job, fallback to params
|
||||||
|
_update_client_stats(client, clients_str, 'failure', url, machine_id, context['dag_run'].run_id)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Could not update client stats on fatal error: {e}", exc_info=True)
|
||||||
|
|
||||||
|
result_data = {
|
||||||
|
'status': 'failed',
|
||||||
|
'end_time': time.time(),
|
||||||
|
'url': url,
|
||||||
|
'dag_run_id': context['dag_run'].run_id,
|
||||||
|
'error': 'fatal_error',
|
||||||
|
'error_message': 'Fatal non-retryable error occurred',
|
||||||
|
'error_details': error_details
|
||||||
|
}
|
||||||
|
result_queue = f"{params['queue_name']}_result"
|
||||||
|
fail_queue = f"{params['queue_name']}_fail"
|
||||||
|
|
||||||
|
progress_queue = f"{params['queue_name']}_progress"
|
||||||
|
|
||||||
|
with client.pipeline() as pipe:
|
||||||
|
pipe.hset(result_queue, url, json.dumps(result_data))
|
||||||
|
pipe.hset(fail_queue, url, json.dumps(result_data))
|
||||||
|
pipe.hdel(progress_queue, url)
|
||||||
|
pipe.execute()
|
||||||
|
|
||||||
|
logger.info(f"Stored fatal error result for URL '{url}' in '{result_queue}' and '{fail_queue}' for later reprocessing.")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Could not report fatal error to Redis: {e}", exc_info=True)
|
||||||
|
|
||||||
|
# Do not fail the DAG run. Allow the processing loop to continue.
|
||||||
|
logger.warning("A fatal error was handled, but the DAG is configured to continue the processing loop.")
|
||||||
|
|
||||||
|
|
||||||
|
@task(trigger_rule='one_success')
|
||||||
|
def continue_processing_loop(**context):
|
||||||
|
"""
|
||||||
|
After a successful run, triggers a new dispatcher to continue the processing loop,
|
||||||
|
effectively asking for the next URL to be processed.
|
||||||
|
"""
|
||||||
|
params = context['params']
|
||||||
|
dag_run = context['dag_run']
|
||||||
|
|
||||||
|
# Do not continue the loop for manual runs of the worker DAG.
|
||||||
|
# A worker DAG triggered by the dispatcher will have a run_id starting with 'worker_run_'.
|
||||||
|
if not dag_run.run_id.startswith('worker_run_'):
|
||||||
|
logger.info(f"DAG run '{dag_run.run_id}' does not appear to be triggered by the dispatcher. Stopping processing loop.")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Create a new unique run_id for the dispatcher.
|
||||||
|
# Using a timestamp and UUID ensures the ID is unique and does not grow in length over time,
|
||||||
|
# preventing database errors.
|
||||||
|
new_dispatcher_run_id = f"retriggered_by_worker_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{str(uuid.uuid4())[:8]}"
|
||||||
|
|
||||||
|
# Pass all original parameters from the orchestrator through to the new dispatcher run.
|
||||||
|
conf_to_pass = {k: v for k, v in params.items() if v is not None}
|
||||||
|
|
||||||
|
# The new dispatcher will pull its own job data and determine its own queue, so we don't pass these.
|
||||||
|
conf_to_pass.pop('job_data', None)
|
||||||
|
conf_to_pass.pop('worker_queue', None)
|
||||||
|
|
||||||
|
logger.info(f"Worker finished successfully. Triggering a new dispatcher ('{new_dispatcher_run_id}') to continue the loop.")
|
||||||
|
trigger_dag(
|
||||||
|
dag_id='ytdlp_ops_v02_dispatcher_dl',
|
||||||
|
run_id=new_dispatcher_run_id,
|
||||||
|
conf=conf_to_pass,
|
||||||
|
replace_microseconds=False
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@task.branch(trigger_rule='one_failed')
|
||||||
|
def handle_download_failure_branch(**context):
|
||||||
|
"""If download or probe fails, routes to the standard failure reporting."""
|
||||||
|
logger.warning("Download or probe failed. Reporting failure and continuing loop.")
|
||||||
|
return 'report_failure_and_continue'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# DAG Definition with TaskGroups
|
||||||
|
# =============================================================================
|
||||||
|
with DAG(
|
||||||
|
dag_id='ytdlp_ops_v02_worker_per_url_dl',
|
||||||
|
default_args=DEFAULT_ARGS,
|
||||||
|
schedule=None,
|
||||||
|
start_date=days_ago(1),
|
||||||
|
catchup=False,
|
||||||
|
tags=['ytdlp', 'worker'],
|
||||||
|
doc_md=__doc__,
|
||||||
|
render_template_as_native_obj=True,
|
||||||
|
is_paused_upon_creation=True,
|
||||||
|
params={
|
||||||
|
'queue_name': Param(DEFAULT_QUEUE_NAME, type="string"),
|
||||||
|
'redis_conn_id': Param(DEFAULT_REDIS_CONN_ID, type="string"),
|
||||||
|
'machine_id': Param(None, type=["string", "null"]),
|
||||||
|
'clients': Param('mweb,web_camoufox,tv', type="string", description="Comma-separated list of clients for token generation. e.g. mweb,tv,web_camoufox"),
|
||||||
|
'output_path_template': Param("%(title)s [%(id)s].f%(format_id)s.%(ext)s", type="string", title="[Worker Param] Output Path Template", description="Output filename template for yt-dlp. It is highly recommended to include `%(format_id)s` to prevent filename collisions when downloading multiple formats."),
|
||||||
|
'retry_on_probe_failure': Param(False, type="boolean"),
|
||||||
|
'skip_probe': Param(False, type="boolean", title="[Worker Param] Skip Probe", description="If True, skips the ffmpeg probe of downloaded files."),
|
||||||
|
'yt_dlp_cleanup_mode': Param(True, type="boolean", title="[Worker Param] yt-dlp Cleanup Mode", description="If True, creates a .empty file and deletes the original media file after successful download and probe."),
|
||||||
|
'delay_between_formats_s': Param(15, type="integer", title="[Worker Param] Delay Between Formats (s)", description="Delay in seconds between downloading each format when multiple formats are specified. A 22s wait may be effective for batch downloads, while 6-12s may suffice if cookies are refreshed regularly."),
|
||||||
|
'yt_dlp_test_mode': Param(False, type="boolean", title="[Worker Param] yt-dlp Test Mode", description="If True, runs yt-dlp with --test flag (dry run without downloading)."),
|
||||||
|
'fragment_retries': Param(10, type="integer", title="[Worker Param] Fragment Retries", description="Number of retries for a fragment before giving up."),
|
||||||
|
'limit_rate': Param('5M', type=["string", "null"], title="[Worker Param] Limit Rate", description="Download speed limit (e.g., 50K, 4.2M)."),
|
||||||
|
'socket_timeout': Param(15, type="integer", title="[Worker Param] Socket Timeout", description="Timeout in seconds for socket operations."),
|
||||||
|
'min_sleep_interval': Param(5, type="integer", title="[Worker Param] Min Sleep Interval", description="Minimum time to sleep between downloads (seconds)."),
|
||||||
|
'max_sleep_interval': Param(10, type="integer", title="[Worker Param] Max Sleep Interval", description="Maximum time to sleep between downloads (seconds)."),
|
||||||
|
'download_format_preset': Param(
|
||||||
|
'formats_2',
|
||||||
|
type="string",
|
||||||
|
enum=['best_audio', 'formats_0', 'formats_2', 'formats_3', 'custom'],
|
||||||
|
title="Download Format Preset",
|
||||||
|
description="Select a predefined format string or choose 'custom'. To download multiple formats, this should be a comma-separated list of format IDs (e.g., '137,140').\nformats_0: 18,140\nformats_2: 18,140,299/298/137/136/135/134/133\nformats_3: 18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318"
|
||||||
|
),
|
||||||
|
'download_format_custom': Param(
|
||||||
|
'ba[ext=m4a]/bestaudio/best',
|
||||||
|
type="string",
|
||||||
|
title="Custom Download Format",
|
||||||
|
description="Custom yt-dlp format string. Used when preset is 'custom'. To download multiple formats, provide a comma-separated list of format IDs (e.g., '137,140')."
|
||||||
|
),
|
||||||
|
'downloader': Param(
|
||||||
|
'default',
|
||||||
|
type="string",
|
||||||
|
enum=['default', 'aria2c'],
|
||||||
|
title="Downloader",
|
||||||
|
description="Choose the downloader for yt-dlp."
|
||||||
|
),
|
||||||
|
'downloader_args_aria2c': Param(
|
||||||
|
'aria2c:-x 4 -k 2M --max-download-limit=3M',
|
||||||
|
type="string",
|
||||||
|
title="Aria2c Downloader Arguments",
|
||||||
|
description="Arguments to pass to yt-dlp's --downloader-args. Used when downloader is 'aria2c'."
|
||||||
|
),
|
||||||
|
'yt_dlp_extra_args': Param(
|
||||||
|
'--no-part --restrict-filenames',
|
||||||
|
type=["string", "null"],
|
||||||
|
title="Extra yt-dlp arguments",
|
||||||
|
description="Extra command-line arguments for yt-dlp during download."
|
||||||
|
),
|
||||||
|
# --- Manual Run / Internal Parameters ---
|
||||||
|
'job_data': Param(None, type=["object", "string", "null"], title="[Internal] Job Data from Dispatcher", description="This parameter is set by the dispatcher DAG and should not be used for manual runs."),
|
||||||
|
'worker_queue': Param(None, type=["string", "null"], title="[Internal] Worker Queue", description="This parameter is set by the dispatcher DAG and should not be used for manual runs."),
|
||||||
|
}
|
||||||
|
) as dag:
|
||||||
|
job_data = get_download_job_from_conf()
|
||||||
|
|
||||||
|
# --- Task Instantiation ---
|
||||||
|
|
||||||
|
# Main success/failure handlers
|
||||||
|
fatal_error_task = handle_fatal_error()
|
||||||
|
report_failure_task = report_failure_and_continue()
|
||||||
|
continue_loop_task = continue_processing_loop()
|
||||||
|
|
||||||
|
# --- Download and Processing Group ---
|
||||||
|
with TaskGroup("download_processing", tooltip="Download and media processing") as download_processing_group:
|
||||||
|
list_formats_task = list_available_formats(token_data=job_data)
|
||||||
|
download_task = download_and_probe(
|
||||||
|
token_data=job_data,
|
||||||
|
available_formats=list_formats_task,
|
||||||
|
)
|
||||||
|
download_branch_task = handle_download_failure_branch.override(trigger_rule='one_failed')()
|
||||||
|
success_task = mark_url_as_success(
|
||||||
|
job_data=job_data,
|
||||||
|
downloaded_file_paths=download_task,
|
||||||
|
)
|
||||||
|
|
||||||
|
list_formats_task >> download_task
|
||||||
|
download_task >> download_branch_task
|
||||||
|
download_branch_task >> report_failure_task
|
||||||
|
download_task >> success_task
|
||||||
|
success_task >> continue_loop_task
|
||||||
|
|
||||||
|
# If the initial job setup succeeds, proceed to the download group.
|
||||||
|
# If it fails, trigger the fatal error handler. This prevents fatal_error_task
|
||||||
|
# from being an "island" task that gets triggered by any other failure in the DAG.
|
||||||
|
job_data.operator >> download_processing_group
|
||||||
|
job_data.operator >> fatal_error_task
|
||||||
|
|
||||||
|
# Any failure path should continue the loop to process the next URL.
|
||||||
|
report_failure_task >> continue_loop_task
|
||||||
|
fatal_error_task >> continue_loop_task
|
||||||
9
ansible/MIGRATION.md
Normal file
9
ansible/MIGRATION.md
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
# Migration Notes
|
||||||
|
|
||||||
|
This document tracks the process of migrating the Ansible deployment.
|
||||||
|
|
||||||
|
## Guiding Principles
|
||||||
|
|
||||||
|
- No changes to business logic or core functionality are permitted during this phase.
|
||||||
|
- The focus is solely on resolving file path issues, dependency errors, and structural inconsistencies resulting from the migration of a subset of files.
|
||||||
|
- All changes should be aimed at making the existing playbooks runnable in the new environment.
|
||||||
120
ansible/README-yt.md
Normal file
120
ansible/README-yt.md
Normal file
@ -0,0 +1,120 @@
|
|||||||
|
# Ansible-driven YT-DLP / Airflow Cluster – Quick-Start & Cheat-Sheet
|
||||||
|
|
||||||
|
> One playbook = one command to **deploy**, **update**, **restart**, or **re-configure** the entire cluster.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 0. Prerequisites (run once on the **tower** server)
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. Ansible Vault Setup (run once on your **local machine**)
|
||||||
|
|
||||||
|
This project uses Ansible Vault to encrypt sensitive data like passwords and API keys. To run the playbooks, you need to provide the vault password. The recommended way is to create a file named `.vault_pass` in the root of the project directory.
|
||||||
|
|
||||||
|
1. **Create the Vault Password File:**
|
||||||
|
From the project's root directory (e.g., `/opt/yt-ops-services`), create the file. The file should contain only your vault password on a single line.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Replace 'your_secret_password_here' with your actual vault password
|
||||||
|
echo "your_secret_password_here" > .vault_pass
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Secure the File:**
|
||||||
|
It's good practice to restrict permissions on this file so only you can read it.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
chmod 600 .vault_pass
|
||||||
|
```
|
||||||
|
|
||||||
|
The `ansible.cfg` file is configured to automatically look for this `.vault_pass` file in the project root.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1.5. Cluster & Inventory Management
|
||||||
|
|
||||||
|
The Ansible inventory (`ansible/inventory.ini`), host-specific variables (`ansible/host_vars/`), and the master `docker-compose.yaml` are dynamically generated from a central cluster definition file (e.g., `cluster.yml`).
|
||||||
|
|
||||||
|
**Whenever you add, remove, or change the IP of a node in your `cluster.yml`, you must re-run the generator script.**
|
||||||
|
|
||||||
|
1. **Install Script Dependencies (run once):**
|
||||||
|
The generator script requires `PyYAML` and `Jinja2`. Install them using pip:
|
||||||
|
```bash
|
||||||
|
pip3 install PyYAML Jinja2
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Edit Your Cluster Definition:**
|
||||||
|
Modify your `cluster.yml` file (located in the project root) to define your master and worker nodes.
|
||||||
|
|
||||||
|
3. **Run the Generator Script:**
|
||||||
|
From the project's root directory, run the following command to update all generated files:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Make sure the script is executable first: chmod +x tools/generate-inventory.py
|
||||||
|
./tools/generate-inventory.py cluster.yml
|
||||||
|
```
|
||||||
|
|
||||||
|
This ensures that Ansible has the correct host information and that the master node's Docker Compose configuration includes the correct `extra_hosts` for log fetching from workers.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. Setup and Basic Usage
|
||||||
|
|
||||||
|
### Running Ansible Commands
|
||||||
|
|
||||||
|
**IMPORTANT:** All `ansible-playbook` commands should be run from within the `ansible/` directory. This allows Ansible to automatically find the `ansible.cfg` and `inventory.ini` files.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ansible
|
||||||
|
ansible-playbook <playbook_name>.yml
|
||||||
|
```
|
||||||
|
|
||||||
|
The `ansible.cfg` file is configured to automatically use the `.vault_pass` file located in the project root (one level above `ansible/`). This means you **do not** need to manually specify `--vault-password-file ../.vault_pass` in your commands. Ensure your `.vault_pass` file is located in the project root.
|
||||||
|
|
||||||
|
If you run `ansible-playbook` from the project root instead of the `ansible/` directory, you will see warnings about the inventory not being parsed, because Ansible does not automatically find `ansible/ansible.cfg`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. Deployment Scenarios
|
||||||
|
|
||||||
|
### Full Cluster Deployment
|
||||||
|
|
||||||
|
To deploy or update the entire cluster (master and all workers), run the main playbook. This will build/pull images and restart all services.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Run from inside the ansible/ directory
|
||||||
|
ansible-playbook playbook-full.yml
|
||||||
|
```
|
||||||
|
|
||||||
|
### Targeted & Fast Deployments
|
||||||
|
|
||||||
|
For faster development cycles, you can deploy changes to specific parts of the cluster without rebuilding or re-pulling Docker images.
|
||||||
|
|
||||||
|
#### Updating Only the Master Node (Fast Deploy)
|
||||||
|
|
||||||
|
To sync configuration, code, and restart services on the master node *without* rebuilding the Airflow image or pulling the `ytdlp-ops-server` image, use the `fast_deploy` flag with the master playbook. This is ideal for pushing changes to DAGs, Python code, or config files.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Run from inside the ansible/ directory
|
||||||
|
ansible-playbook playbook-master.yml --extra-vars "fast_deploy=true"
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Updating Only a Specific Worker Node (Fast Deploy)
|
||||||
|
|
||||||
|
Similarly, you can update a single worker node. Replace `dl001` with the hostname of the worker you want to target from your `inventory.ini`.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Run from inside the ansible/ directory
|
||||||
|
ansible-playbook playbook-worker.yml --limit dl001 --extra-vars "fast_deploy=true"
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Updating Only DAGs and Configs
|
||||||
|
|
||||||
|
If you have only changed DAGs or configuration files and don't need to restart any services, you can run a much faster playbook that only syncs the `dags/` and `config/` directories.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Run from inside the ansible/ directory
|
||||||
|
ansible-playbook playbook-dags.yml
|
||||||
|
```
|
||||||
@ -6,3 +6,5 @@ vault_vnc_password: "vnc_pwd_Z5xW8cV2bN4mP7lK"
|
|||||||
vault_ss_password_1: "UCUAR7vRO/u9Zo71nfA13c+/b1MCiJpfZJo+EmEBCfA="
|
vault_ss_password_1: "UCUAR7vRO/u9Zo71nfA13c+/b1MCiJpfZJo+EmEBCfA="
|
||||||
vault_ss_password_2: "tgtQcfjJp/A3F01g4woO0bEQoxij3CAOK/iR1OTPuF4="
|
vault_ss_password_2: "tgtQcfjJp/A3F01g4woO0bEQoxij3CAOK/iR1OTPuF4="
|
||||||
vault_dockerhub_password: "dckr_pat_DmFFqwFEdXFvZlgngGY9ooBaq6o"
|
vault_dockerhub_password: "dckr_pat_DmFFqwFEdXFvZlgngGY9ooBaq6o"
|
||||||
|
vault_s3_access_key_id: "admin"
|
||||||
|
vault_s3_secret_access_key: "0153093693-0009"
|
||||||
|
|||||||
@ -60,3 +60,4 @@
|
|||||||
loop:
|
loop:
|
||||||
- "airflow.cfg"
|
- "airflow.cfg"
|
||||||
- "custom_task_hooks.py"
|
- "custom_task_hooks.py"
|
||||||
|
|
||||||
|
|||||||
@ -111,6 +111,53 @@
|
|||||||
name: airflow_proxynet
|
name: airflow_proxynet
|
||||||
driver: bridge
|
driver: bridge
|
||||||
|
|
||||||
|
post_tasks:
|
||||||
|
- name: Sync custom_task_hooks.py to MASTER server
|
||||||
|
when: inventory_hostname in groups['airflow_master']
|
||||||
|
synchronize:
|
||||||
|
src: "../airflow/config/custom_task_hooks.py"
|
||||||
|
dest: "{{ airflow_master_dir }}/config/"
|
||||||
|
archive: yes
|
||||||
|
rsync_path: "sudo rsync"
|
||||||
|
|
||||||
|
- name: Sync airflow_local_settings.py to MASTER server
|
||||||
|
when: inventory_hostname in groups['airflow_master']
|
||||||
|
synchronize:
|
||||||
|
src: "../airflow/config/airflow_local_settings.py"
|
||||||
|
dest: "{{ airflow_master_dir }}/config/"
|
||||||
|
archive: yes
|
||||||
|
rsync_path: "sudo rsync"
|
||||||
|
|
||||||
|
- name: Sync custom_task_hooks.py to WORKER server
|
||||||
|
when: inventory_hostname in groups['airflow_workers']
|
||||||
|
synchronize:
|
||||||
|
src: "../airflow/config/custom_task_hooks.py"
|
||||||
|
dest: "{{ airflow_worker_dir }}/config/"
|
||||||
|
archive: yes
|
||||||
|
rsync_path: "sudo rsync"
|
||||||
|
|
||||||
|
- name: Sync airflow_local_settings.py to WORKER server
|
||||||
|
when: inventory_hostname in groups['airflow_workers']
|
||||||
|
synchronize:
|
||||||
|
src: "../airflow/config/airflow_local_settings.py"
|
||||||
|
dest: "{{ airflow_worker_dir }}/config/"
|
||||||
|
archive: yes
|
||||||
|
rsync_path: "sudo rsync"
|
||||||
|
|
||||||
|
- name: Restart Airflow services on MASTER to apply hook
|
||||||
|
when: inventory_hostname in groups['airflow_master']
|
||||||
|
ansible.builtin.command:
|
||||||
|
cmd: "docker compose restart airflow-scheduler airflow-webserver airflow-master-worker airflow-triggerer"
|
||||||
|
chdir: "{{ airflow_master_dir }}"
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Restart Airflow worker on WORKER to apply hook
|
||||||
|
when: inventory_hostname in groups['airflow_workers']
|
||||||
|
ansible.builtin.command:
|
||||||
|
cmd: "docker compose restart airflow-worker-dl airflow-worker-auth"
|
||||||
|
chdir: "{{ airflow_worker_dir }}"
|
||||||
|
become: yes
|
||||||
|
|
||||||
- name: Deploy master
|
- name: Deploy master
|
||||||
import_playbook: playbook-master.yml
|
import_playbook: playbook-master.yml
|
||||||
when: inventory_hostname in groups['airflow_master']
|
when: inventory_hostname in groups['airflow_master']
|
||||||
|
|||||||
@ -48,6 +48,6 @@
|
|||||||
- name: Restart Airflow worker on WORKER
|
- name: Restart Airflow worker on WORKER
|
||||||
when: inventory_hostname in groups['airflow_workers']
|
when: inventory_hostname in groups['airflow_workers']
|
||||||
ansible.builtin.command:
|
ansible.builtin.command:
|
||||||
cmd: "docker compose restart airflow-worker"
|
cmd: "docker compose restart airflow-worker-dl airflow-worker-auth"
|
||||||
chdir: "{{ airflow_worker_dir }}"
|
chdir: "{{ airflow_worker_dir }}"
|
||||||
become: yes
|
become: yes
|
||||||
|
|||||||
@ -144,6 +144,42 @@
|
|||||||
deploy_group_gid: "0"
|
deploy_group_gid: "0"
|
||||||
when: deploy_group_gid is not defined or deploy_group_gid == ""
|
when: deploy_group_gid is not defined or deploy_group_gid == ""
|
||||||
|
|
||||||
|
- name: Generate Docker Compose configurations
|
||||||
|
ansible.builtin.command: >
|
||||||
|
docker compose --project-directory . -f configs/docker-compose.config-generate.yaml run --rm config-generator
|
||||||
|
args:
|
||||||
|
chdir: "{{ airflow_master_dir }}"
|
||||||
|
become: yes
|
||||||
|
become_user: "{{ ansible_user }}"
|
||||||
|
register: config_generator_result
|
||||||
|
changed_when: "'Creating' in config_generator_result.stdout or 'Recreating' in config_generator_result.stdout"
|
||||||
|
|
||||||
|
- name: Show config generator output
|
||||||
|
ansible.builtin.debug:
|
||||||
|
var: config_generator_result.stdout_lines
|
||||||
|
when: config_generator_result.changed
|
||||||
|
|
||||||
|
- name: Ensure Airflow project directory is writable by the container user (UID 50000)
|
||||||
|
ansible.builtin.file:
|
||||||
|
path: "{{ airflow_master_dir }}"
|
||||||
|
owner: 50000
|
||||||
|
group: 50000
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Ensure Airflow subdirectories are writable by the container user (UID 50000)
|
||||||
|
ansible.builtin.file:
|
||||||
|
path: "{{ item }}"
|
||||||
|
owner: 50000
|
||||||
|
group: 50000
|
||||||
|
recurse: yes
|
||||||
|
state: directory
|
||||||
|
loop:
|
||||||
|
- "{{ airflow_master_dir }}/dags"
|
||||||
|
- "{{ airflow_master_dir }}/logs"
|
||||||
|
- "{{ airflow_master_dir }}/plugins"
|
||||||
|
- "{{ airflow_master_dir }}/config"
|
||||||
|
become: yes
|
||||||
|
|
||||||
tasks:
|
tasks:
|
||||||
- name: Install pipx
|
- name: Install pipx
|
||||||
ansible.builtin.apt:
|
ansible.builtin.apt:
|
||||||
@ -170,3 +206,23 @@
|
|||||||
- name: Include camoufox verification tasks
|
- name: Include camoufox verification tasks
|
||||||
include_tasks: tasks/verify_camoufox.yml
|
include_tasks: tasks/verify_camoufox.yml
|
||||||
when: not fast_deploy | default(false)
|
when: not fast_deploy | default(false)
|
||||||
|
|
||||||
|
- name: Run regression test
|
||||||
|
command: >
|
||||||
|
docker exec -i airflow-regression-runner python3 /opt/airflow/dags/scripts/regression.py
|
||||||
|
--client "{{ regression_client | default('mweb') }}"
|
||||||
|
--workers {{ regression_workers | default(4) }}
|
||||||
|
--workers-per-bunch {{ regression_workers_per_bunch | default(4) }}
|
||||||
|
--run-time-min {{ regression_run_time_min | default(120) }}
|
||||||
|
--input-file "{{ regression_input_file | default('/opt/airflow/inputfiles/video_ids.csv') }}"
|
||||||
|
--progress-interval-min {{ regression_progress_interval_min | default(2) }}
|
||||||
|
--report-file "{{ regression_report_file | default('/opt/airflow/downloadfiles/regression_report.csv') }}"
|
||||||
|
{% if regression_cleanup | default(true) %}--cleanup{% endif %}
|
||||||
|
register: regression_test_result
|
||||||
|
changed_when: false
|
||||||
|
when: run_regression_test | default(false)
|
||||||
|
|
||||||
|
- name: Display regression test output
|
||||||
|
debug:
|
||||||
|
var: regression_test_result.stdout_lines
|
||||||
|
when: run_regression_test | default(false)
|
||||||
|
|||||||
108
ansible/playbook-sync-local.yml
Normal file
108
ansible/playbook-sync-local.yml
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
---
|
||||||
|
- name: Sync Local Development Files to Workers
|
||||||
|
hosts: airflow_workers
|
||||||
|
gather_facts: no
|
||||||
|
vars_files:
|
||||||
|
- "{{ inventory_dir }}/group_vars/all/generated_vars.yml"
|
||||||
|
|
||||||
|
pre_tasks:
|
||||||
|
- name: Announce local sync
|
||||||
|
debug:
|
||||||
|
msg: "Syncing local dev files to {{ inventory_hostname }} at {{ airflow_worker_dir }}"
|
||||||
|
|
||||||
|
tasks:
|
||||||
|
- name: Check if yt-dlp is installed
|
||||||
|
ansible.builtin.command: which yt-dlp
|
||||||
|
register: ytdlp_check
|
||||||
|
changed_when: false
|
||||||
|
failed_when: false
|
||||||
|
become: yes
|
||||||
|
become_user: "{{ ansible_user }}"
|
||||||
|
|
||||||
|
- name: Install yt-dlp if not found
|
||||||
|
ansible.builtin.command: python3 -m pip install -U "yt-dlp[default]" --break-system-packages
|
||||||
|
when: ytdlp_check.rc != 0
|
||||||
|
become: yes
|
||||||
|
become_user: "{{ ansible_user }}"
|
||||||
|
|
||||||
|
- name: Sync thrift_model directory to workers
|
||||||
|
ansible.posix.synchronize:
|
||||||
|
src: ../thrift_model/
|
||||||
|
dest: "{{ airflow_worker_dir }}/thrift_model/"
|
||||||
|
rsync_opts:
|
||||||
|
- "--delete"
|
||||||
|
- "--exclude=.DS_Store"
|
||||||
|
- "--exclude=__pycache__"
|
||||||
|
- "--exclude='*.pyc'"
|
||||||
|
recursive: yes
|
||||||
|
perms: yes
|
||||||
|
become: yes
|
||||||
|
become_user: "{{ ansible_user }}"
|
||||||
|
|
||||||
|
- name: Sync pangramia package to workers
|
||||||
|
ansible.posix.synchronize:
|
||||||
|
src: ../pangramia/
|
||||||
|
dest: "{{ airflow_worker_dir }}/pangramia/"
|
||||||
|
rsync_opts:
|
||||||
|
- "--delete"
|
||||||
|
- "--exclude=.DS_Store"
|
||||||
|
- "--exclude=__pycache__"
|
||||||
|
- "--exclude='*.pyc'"
|
||||||
|
recursive: yes
|
||||||
|
perms: yes
|
||||||
|
become: yes
|
||||||
|
become_user: "{{ ansible_user }}"
|
||||||
|
|
||||||
|
- name: Sync ytops_client directory to workers
|
||||||
|
ansible.posix.synchronize:
|
||||||
|
src: ../ytops_client/
|
||||||
|
dest: "{{ airflow_worker_dir }}/ytops_client/"
|
||||||
|
rsync_opts:
|
||||||
|
- "--delete"
|
||||||
|
- "--exclude=.DS_Store"
|
||||||
|
- "--exclude=__pycache__"
|
||||||
|
- "--exclude='*.pyc'"
|
||||||
|
recursive: yes
|
||||||
|
perms: yes
|
||||||
|
become: yes
|
||||||
|
become_user: "{{ ansible_user }}"
|
||||||
|
|
||||||
|
- name: Sync policies directory to workers
|
||||||
|
ansible.posix.synchronize:
|
||||||
|
src: ../policies/
|
||||||
|
dest: "{{ airflow_worker_dir }}/policies/"
|
||||||
|
rsync_opts:
|
||||||
|
- "--delete"
|
||||||
|
- "--exclude=.DS_Store"
|
||||||
|
- "--exclude=__pycache__"
|
||||||
|
- "--exclude='*.pyc'"
|
||||||
|
recursive: yes
|
||||||
|
perms: yes
|
||||||
|
become: yes
|
||||||
|
become_user: "{{ ansible_user }}"
|
||||||
|
|
||||||
|
- name: Ensure bin directory exists on workers for client utilities
|
||||||
|
ansible.builtin.file:
|
||||||
|
path: "{{ airflow_worker_dir }}/bin"
|
||||||
|
state: directory
|
||||||
|
mode: '0755'
|
||||||
|
become: yes
|
||||||
|
become_user: "{{ ansible_user }}"
|
||||||
|
|
||||||
|
- name: Sync client utility scripts to workers
|
||||||
|
ansible.posix.synchronize:
|
||||||
|
src: "../{{ item }}"
|
||||||
|
dest: "{{ airflow_worker_dir }}/{{ item }}"
|
||||||
|
perms: yes
|
||||||
|
loop:
|
||||||
|
- "README.client.md"
|
||||||
|
- "cli.config"
|
||||||
|
- "format_download.py"
|
||||||
|
- "get_info_json_client.py"
|
||||||
|
- "list_formats.py"
|
||||||
|
- "stress_test_formats.py"
|
||||||
|
- "stress_enhanced.py"
|
||||||
|
- "package_client.py"
|
||||||
|
- "bin/ytops-client"
|
||||||
|
become: yes
|
||||||
|
become_user: "{{ ansible_user }}"
|
||||||
27
ansible/playbook-update-regression-script.yml
Normal file
27
ansible/playbook-update-regression-script.yml
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
---
|
||||||
|
- name: Update Regression Test Script
|
||||||
|
hosts: airflow_master
|
||||||
|
gather_facts: no
|
||||||
|
vars:
|
||||||
|
# This should be the root directory of your project on the master host.
|
||||||
|
# It's set as a variable so you can override it if needed, e.g.,
|
||||||
|
# ansible-playbook ... -e "project_dir=/path/to/your/project"
|
||||||
|
project_dir: "/srv/airflow_master"
|
||||||
|
|
||||||
|
tasks:
|
||||||
|
- name: Copy latest regression.py script to the master host
|
||||||
|
copy:
|
||||||
|
src: ../airflow/dags/scripts/regression.py
|
||||||
|
dest: "{{ project_dir }}/dags/scripts/regression.py"
|
||||||
|
owner: "{{ ansible_user }}"
|
||||||
|
group: "ytdl" # Assuming the same deploy group as the main playbook
|
||||||
|
mode: '0644'
|
||||||
|
become: yes
|
||||||
|
notify:
|
||||||
|
- Announce completion
|
||||||
|
|
||||||
|
handlers:
|
||||||
|
- name: Announce completion
|
||||||
|
listen: "Announce completion"
|
||||||
|
debug:
|
||||||
|
msg: "Regression script has been updated on {{ inventory_hostname }}. You can now run it using 'docker exec'."
|
||||||
@ -8,7 +8,7 @@
|
|||||||
pre_tasks:
|
pre_tasks:
|
||||||
- name: Announce worker deployment
|
- name: Announce worker deployment
|
||||||
debug:
|
debug:
|
||||||
msg: "Starting deployment for Airflow Worker: {{ inventory_hostname }} ({{ ansible_host }})"
|
msg: "Starting deployment for Airflow Worker: {{ inventory_hostname }} ({{ ansible_user }}@{{ ansible_host }})"
|
||||||
|
|
||||||
- name: Configure system timezone
|
- name: Configure system timezone
|
||||||
# Ensures all services and logs on this node use a consistent timezone.
|
# Ensures all services and logs on this node use a consistent timezone.
|
||||||
@ -129,6 +129,96 @@
|
|||||||
become: yes
|
become: yes
|
||||||
when: limits_sysctl_config_copy.changed
|
when: limits_sysctl_config_copy.changed
|
||||||
|
|
||||||
|
- name: Create logs directory structure relative to deployment
|
||||||
|
file:
|
||||||
|
path: "./logs/yt-dlp-ops/communication_logs"
|
||||||
|
state: directory
|
||||||
|
mode: '0755'
|
||||||
|
owner: "{{ ansible_user }}"
|
||||||
|
group: "{{ deploy_group }}"
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Build local Docker images (e.g., camoufox)
|
||||||
|
ansible.builtin.command: >
|
||||||
|
docker compose --project-directory . -f configs/docker-compose-ytdlp-ops.yaml build
|
||||||
|
args:
|
||||||
|
chdir: "{{ airflow_worker_dir }}"
|
||||||
|
become: yes
|
||||||
|
become_user: "{{ ansible_user }}"
|
||||||
|
register: docker_build_result
|
||||||
|
changed_when: "'Building' in docker_build_result.stdout or 'writing image' in docker_build_result.stdout"
|
||||||
|
|
||||||
|
- name: Pull pre-built Docker images for ytdlp-ops services
|
||||||
|
ansible.builtin.command: >
|
||||||
|
docker compose --project-directory . -f configs/docker-compose-ytdlp-ops.yaml pull --ignore-buildable
|
||||||
|
args:
|
||||||
|
chdir: "{{ airflow_worker_dir }}"
|
||||||
|
become: yes
|
||||||
|
become_user: "{{ ansible_user }}"
|
||||||
|
register: docker_pull_result
|
||||||
|
retries: 3
|
||||||
|
delay: 10
|
||||||
|
changed_when: "'Pulling' in docker_pull_result.stdout or 'Downloaded' in docker_pull_result.stdout"
|
||||||
|
|
||||||
|
- name: Show docker pull output
|
||||||
|
ansible.builtin.debug:
|
||||||
|
var: docker_pull_result.stdout_lines
|
||||||
|
when: docker_pull_result.changed
|
||||||
|
|
||||||
|
- name: Ensure Airflow project directory is writable by the container user (UID 50000)
|
||||||
|
ansible.builtin.file:
|
||||||
|
path: "{{ airflow_worker_dir }}"
|
||||||
|
owner: 50000
|
||||||
|
group: 50000
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Ensure Airflow subdirectories are writable by the container user (UID 50000)
|
||||||
|
ansible.builtin.file:
|
||||||
|
path: "{{ item }}"
|
||||||
|
owner: 50000
|
||||||
|
group: 50000
|
||||||
|
recurse: yes
|
||||||
|
state: directory
|
||||||
|
loop:
|
||||||
|
- "{{ airflow_worker_dir }}/dags"
|
||||||
|
- "{{ airflow_worker_dir }}/logs"
|
||||||
|
- "{{ airflow_worker_dir }}/plugins"
|
||||||
|
- "{{ airflow_worker_dir }}/config"
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Create .dockerignore on worker to exclude runtime data from build context
|
||||||
|
ansible.builtin.copy:
|
||||||
|
dest: "{{ airflow_worker_dir }}/.dockerignore"
|
||||||
|
content: |
|
||||||
|
# Exclude build artifacts and virtual environments
|
||||||
|
__pycache__/
|
||||||
|
*.pyc
|
||||||
|
*.pyo
|
||||||
|
.venv/
|
||||||
|
venv/
|
||||||
|
|
||||||
|
# Exclude sensitive information
|
||||||
|
.env
|
||||||
|
.vault_pass
|
||||||
|
|
||||||
|
# Exclude local development and OS-specific files
|
||||||
|
.DS_Store
|
||||||
|
.idea/
|
||||||
|
*.swp
|
||||||
|
|
||||||
|
# Exclude large directories with runtime data that should not be in the image
|
||||||
|
logs/
|
||||||
|
downloadfiles/
|
||||||
|
addfiles/
|
||||||
|
*downloads/
|
||||||
|
postgres-data/
|
||||||
|
redis-data/
|
||||||
|
minio-data/
|
||||||
|
owner: "{{ ansible_user }}"
|
||||||
|
group: "{{ deploy_group }}"
|
||||||
|
mode: '0644'
|
||||||
|
become: yes
|
||||||
|
|
||||||
tasks:
|
tasks:
|
||||||
- name: Install pipx
|
- name: Install pipx
|
||||||
ansible.builtin.apt:
|
ansible.builtin.apt:
|
||||||
|
|||||||
22
ansible/playbook-ytdlp-master-only.yml
Normal file
22
ansible/playbook-ytdlp-master-only.yml
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
---
|
||||||
|
- name: Deploy YTDLP Master Services (Management Role Only)
|
||||||
|
hosts: airflow_master
|
||||||
|
gather_facts: no
|
||||||
|
vars_files:
|
||||||
|
- "{{ inventory_dir }}/group_vars/all/generated_vars.yml"
|
||||||
|
- "{{ inventory_dir }}/group_vars/all/vault.yml"
|
||||||
|
tasks:
|
||||||
|
- name: Announce ytdlp-master-only deployment
|
||||||
|
debug:
|
||||||
|
msg: "Starting deployment for YTDLP Master services on: {{ inventory_hostname }}"
|
||||||
|
|
||||||
|
- name: Start/Redeploy ytdlp-ops services without camoufox
|
||||||
|
community.docker.docker_compose_v2:
|
||||||
|
project_src: "{{ airflow_master_dir }}"
|
||||||
|
files:
|
||||||
|
- configs/docker-compose-ytdlp-ops.yaml
|
||||||
|
state: present
|
||||||
|
remove_orphans: true
|
||||||
|
recreate: always
|
||||||
|
pull: "{{ 'never' if fast_deploy | default(false) else 'missing' }}"
|
||||||
|
become: yes
|
||||||
19
ansible/playbooks/playbook-bgutils-start.yml
Normal file
19
ansible/playbooks/playbook-bgutils-start.yml
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
---
|
||||||
|
- name: Start bgutil-provider service
|
||||||
|
hosts: all # Use --limit to target specific hosts, e.g., --limit management
|
||||||
|
become: true
|
||||||
|
gather_facts: false
|
||||||
|
vars:
|
||||||
|
container_name: "bgutil-provider"
|
||||||
|
|
||||||
|
tasks:
|
||||||
|
- name: "Ensure {{ container_name }} container is started"
|
||||||
|
community.docker.docker_container:
|
||||||
|
name: "{{ container_name }}"
|
||||||
|
state: started
|
||||||
|
register: container_status
|
||||||
|
|
||||||
|
- name: "Display container status"
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg: "{{ container_name }} was started."
|
||||||
|
when: container_status.changed
|
||||||
19
ansible/playbooks/playbook-bgutils-stop.yml
Normal file
19
ansible/playbooks/playbook-bgutils-stop.yml
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
---
|
||||||
|
- name: Stop bgutil-provider service
|
||||||
|
hosts: all # Use --limit to target specific hosts, e.g., --limit management
|
||||||
|
become: true
|
||||||
|
gather_facts: false
|
||||||
|
vars:
|
||||||
|
container_name: "bgutil-provider"
|
||||||
|
|
||||||
|
tasks:
|
||||||
|
- name: "Ensure {{ container_name }} container is stopped"
|
||||||
|
community.docker.docker_container:
|
||||||
|
name: "{{ container_name }}"
|
||||||
|
state: stopped
|
||||||
|
register: container_status
|
||||||
|
|
||||||
|
- name: "Display container status"
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg: "{{ container_name }} was stopped."
|
||||||
|
when: container_status.changed
|
||||||
53
ansible/playbooks/restart_worker.yml
Normal file
53
ansible/playbooks/restart_worker.yml
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
---
|
||||||
|
- name: Restart and Update ytdlp-ops Worker
|
||||||
|
hosts: all:!af-green
|
||||||
|
vars:
|
||||||
|
# This should be the root directory of your project on the target worker machine.
|
||||||
|
project_dir: "{{ '/srv/airflow_master' if inventory_hostname == 'af-green' else '/srv/airflow_dl_worker' }}"
|
||||||
|
# This is the path to your compose file, relative to the project_dir.
|
||||||
|
compose_file: "configs/docker-compose-ytdlp-ops.yaml"
|
||||||
|
# The specific image to pull for updates.
|
||||||
|
service_image: "pangramia/ytdlp-ops-server:4.0.1"
|
||||||
|
|
||||||
|
tasks:
|
||||||
|
- name: "Ensure project directory exists"
|
||||||
|
ansible.builtin.file:
|
||||||
|
path: "{{ project_dir }}"
|
||||||
|
state: directory
|
||||||
|
mode: '0755'
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: "Copy get_info_json_client.py to worker"
|
||||||
|
ansible.builtin.copy:
|
||||||
|
src: ../../get_info_json_client.py
|
||||||
|
dest: "{{ project_dir }}/get_info_json_client.py"
|
||||||
|
mode: '0755'
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: "Pull the latest image for the ytdlp-ops service"
|
||||||
|
community.docker.docker_image:
|
||||||
|
name: "{{ service_image }}"
|
||||||
|
source: pull
|
||||||
|
tags:
|
||||||
|
- pull
|
||||||
|
|
||||||
|
- name: "Take down the ytdlp-ops services"
|
||||||
|
community.docker.docker_compose_v2:
|
||||||
|
project_src: "{{ project_dir }}"
|
||||||
|
files:
|
||||||
|
- "{{ compose_file }}"
|
||||||
|
state: absent
|
||||||
|
remove_volumes: true
|
||||||
|
tags:
|
||||||
|
- down
|
||||||
|
|
||||||
|
- name: "Bring up the ytdlp-ops services"
|
||||||
|
community.docker.docker_compose_v2:
|
||||||
|
project_src: "{{ project_dir }}"
|
||||||
|
files:
|
||||||
|
- "{{ compose_file }}"
|
||||||
|
state: present
|
||||||
|
recreate: always # Corresponds to --force-recreate
|
||||||
|
build: never
|
||||||
|
tags:
|
||||||
|
- up
|
||||||
3
ansible/roles/ytdlp-worker/defaults/main.yml
Normal file
3
ansible/roles/ytdlp-worker/defaults/main.yml
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
---
|
||||||
|
# defaults file for ytdlp-worker
|
||||||
|
camoufox_base_port: 10000
|
||||||
@ -101,6 +101,22 @@
|
|||||||
- "envoy.yaml.j2"
|
- "envoy.yaml.j2"
|
||||||
- "docker-compose.camoufox.yaml.j2"
|
- "docker-compose.camoufox.yaml.j2"
|
||||||
|
|
||||||
|
- name: Sync Airflow build context to worker
|
||||||
|
synchronize:
|
||||||
|
src: "../{{ item }}"
|
||||||
|
dest: "{{ airflow_worker_dir }}/"
|
||||||
|
archive: yes
|
||||||
|
recursive: yes
|
||||||
|
rsync_path: "sudo rsync"
|
||||||
|
rsync_opts: "{{ rsync_default_opts }}"
|
||||||
|
loop:
|
||||||
|
- "airflow/Dockerfile"
|
||||||
|
- "setup.py"
|
||||||
|
- "VERSION"
|
||||||
|
- "yt_ops_services"
|
||||||
|
- "thrift_model"
|
||||||
|
- "pangramia"
|
||||||
|
|
||||||
- name: Create .env file for YT-DLP worker service
|
- name: Create .env file for YT-DLP worker service
|
||||||
template:
|
template:
|
||||||
src: "../../templates/.env.j2"
|
src: "../../templates/.env.j2"
|
||||||
@ -179,6 +195,20 @@
|
|||||||
group: "{{ deploy_group }}"
|
group: "{{ deploy_group }}"
|
||||||
become: yes
|
become: yes
|
||||||
|
|
||||||
|
- name: "Log: Building Airflow image"
|
||||||
|
debug:
|
||||||
|
msg: "Building the Airflow image locally. This image contains all dependencies for running DAGs."
|
||||||
|
|
||||||
|
- name: Build Airflow image from local Dockerfile
|
||||||
|
community.docker.docker_image:
|
||||||
|
name: "pangramia/ytdlp-ops-airflow:latest"
|
||||||
|
build:
|
||||||
|
path: "{{ airflow_worker_dir }}"
|
||||||
|
dockerfile: "Dockerfile"
|
||||||
|
source: build
|
||||||
|
force_source: true
|
||||||
|
when: not fast_deploy | default(false)
|
||||||
|
|
||||||
- name: "Log: Building Camoufox (remote browser) image"
|
- name: "Log: Building Camoufox (remote browser) image"
|
||||||
debug:
|
debug:
|
||||||
msg: "Building the Camoufox image locally. This image provides remote-controlled Firefox browsers for token generation."
|
msg: "Building the Camoufox image locally. This image provides remote-controlled Firefox browsers for token generation."
|
||||||
@ -206,6 +236,27 @@
|
|||||||
path: "/srv/shadowsocks-rust/docker-compose.proxies.yaml"
|
path: "/srv/shadowsocks-rust/docker-compose.proxies.yaml"
|
||||||
register: proxy_compose_file
|
register: proxy_compose_file
|
||||||
|
|
||||||
|
- name: "Log: Stopping worker services before start"
|
||||||
|
debug:
|
||||||
|
msg: "Stopping all worker services to ensure a clean start."
|
||||||
|
|
||||||
|
- name: Stop all worker services
|
||||||
|
community.docker.docker_compose_v2:
|
||||||
|
project_src: "{{ airflow_worker_dir }}"
|
||||||
|
files:
|
||||||
|
- "configs/docker-compose-ytdlp-ops.yaml"
|
||||||
|
- "configs/docker-compose.camoufox.yaml"
|
||||||
|
- "configs/docker-compose.airflow.yml"
|
||||||
|
state: absent
|
||||||
|
remove_volumes: true # Corresponds to docker compose down -v
|
||||||
|
|
||||||
|
- name: Forcefully remove project-specific Docker volumes to fix corruption issues
|
||||||
|
ansible.builtin.shell: "docker volume ls -q --filter 'label=com.docker.compose.project=ytdlp-ops-worker' | xargs -r docker volume rm --force"
|
||||||
|
become: yes
|
||||||
|
register: removed_volumes
|
||||||
|
changed_when: removed_volumes.stdout | length > 0
|
||||||
|
failed_when: false
|
||||||
|
|
||||||
- name: "Log: Starting all worker services"
|
- name: "Log: Starting all worker services"
|
||||||
debug:
|
debug:
|
||||||
msg: "Starting all worker services: ytdlp-ops, camoufox, and airflow-worker."
|
msg: "Starting all worker services: ytdlp-ops, camoufox, and airflow-worker."
|
||||||
@ -220,6 +271,7 @@
|
|||||||
state: present
|
state: present
|
||||||
remove_orphans: true
|
remove_orphans: true
|
||||||
pull: "{{ 'never' if fast_deploy | default(false) else 'missing' }}"
|
pull: "{{ 'never' if fast_deploy | default(false) else 'missing' }}"
|
||||||
|
recreate: always # Corresponds to --force-recreate
|
||||||
|
|
||||||
- name: Include camoufox verification tasks
|
- name: Include camoufox verification tasks
|
||||||
include_tasks: ../../../tasks/verify_camoufox.yml
|
include_tasks: ../../../tasks/verify_camoufox.yml
|
||||||
|
|||||||
@ -29,6 +29,14 @@ FLOWER_PASSWORD="{{ vault_flower_password }}"
|
|||||||
AIRFLOW_UID={{ airflow_uid | default(1003) }}
|
AIRFLOW_UID={{ airflow_uid | default(1003) }}
|
||||||
AIRFLOW_GID={{ deploy_group_gid | default(1001) }}
|
AIRFLOW_GID={{ deploy_group_gid | default(1001) }}
|
||||||
|
|
||||||
|
# --- S3 Logging Configuration (for Airflow integration) ---
|
||||||
|
# Optional: for appending service logs to Airflow's S3 logs.
|
||||||
|
# These should match the 'minio_default' connection configured in Airflow.
|
||||||
|
S3_ENDPOINT_URL="{{ s3_endpoint_url | default('') }}"
|
||||||
|
S3_ACCESS_KEY_ID="{{ vault_s3_access_key_id | default('') }}"
|
||||||
|
S3_SECRET_ACCESS_KEY="{{ vault_s3_secret_access_key | default('') }}"
|
||||||
|
S3_REGION_NAME="{{ s3_region_name | default('us-east-1') }}"
|
||||||
|
|
||||||
# --- Master-specific settings ---
|
# --- Master-specific settings ---
|
||||||
{% if 'master' in service_role or 'management' in service_role %}
|
{% if 'master' in service_role or 'management' in service_role %}
|
||||||
MASTER_HOST_IP={{ hostvars[groups['airflow_master'][0]].ansible_host }}
|
MASTER_HOST_IP={{ hostvars[groups['airflow_master'][0]].ansible_host }}
|
||||||
|
|||||||
10
bin/ytops-client
Executable file
10
bin/ytops-client
Executable file
@ -0,0 +1,10 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
set -e
|
||||||
|
# Find the directory where this script is located.
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||||
|
# Go up one level to the project root.
|
||||||
|
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||||
|
# Set PYTHONPATH to include the project root, so we can import 'ytops_client'
|
||||||
|
export PYTHONPATH="$PROJECT_ROOT${PYTHONPATH:+:$PYTHONPATH}"
|
||||||
|
# Execute the Python CLI script as a module to handle relative imports
|
||||||
|
exec python3 -m ytops_client.cli "$@"
|
||||||
35
cli.config
Normal file
35
cli.config
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
# yt-dlp configuration for format_download.py
|
||||||
|
|
||||||
|
# Continue on broken downloads
|
||||||
|
#--continue
|
||||||
|
|
||||||
|
# Do not simulate
|
||||||
|
--no-simulate
|
||||||
|
|
||||||
|
# Do not write info.json file (we already have it)
|
||||||
|
--no-write-info-json
|
||||||
|
|
||||||
|
# Continue on download errors
|
||||||
|
--ignore-errors
|
||||||
|
|
||||||
|
# Do not download playlist
|
||||||
|
--no-playlist
|
||||||
|
|
||||||
|
# Retry fragments 10 times
|
||||||
|
--fragment-retries 10
|
||||||
|
|
||||||
|
# Limit download rate to 5M
|
||||||
|
--limit-rate 5M
|
||||||
|
|
||||||
|
# Socket timeout
|
||||||
|
--socket-timeout 15
|
||||||
|
|
||||||
|
# Sleep interval
|
||||||
|
--min-sleep-interval 5
|
||||||
|
--max-sleep-interval 10
|
||||||
|
|
||||||
|
# Progress
|
||||||
|
--progress
|
||||||
|
|
||||||
|
|
||||||
|
--no-part
|
||||||
@ -1,150 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
Client script to get info.json from the Thrift service.
|
|
||||||
|
|
||||||
Usage:
|
|
||||||
python get_info_json_client.py [URL] --host [HOST] --port [PORT] [options]
|
|
||||||
|
|
||||||
Options:
|
|
||||||
--host HOST Thrift server host
|
|
||||||
--port PORT Thrift server port
|
|
||||||
--account-id ID Account ID to use
|
|
||||||
--output FILE Output file path
|
|
||||||
--verbose Enable verbose output
|
|
||||||
"""
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import logging
|
|
||||||
from typing import Dict, Any, Optional
|
|
||||||
|
|
||||||
# Configure logging
|
|
||||||
logging.basicConfig(
|
|
||||||
level=logging.INFO,
|
|
||||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
||||||
)
|
|
||||||
logger = logging.getLogger('info_json_client')
|
|
||||||
|
|
||||||
# Import Thrift modules
|
|
||||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
||||||
from thrift.transport import TTransport
|
|
||||||
from pangramia.yt.common.ttypes import TokenUpdateMode
|
|
||||||
from pangramia.yt.exceptions.ttypes import PBServiceException, PBUserException
|
|
||||||
from yt_ops_services.client_utils import get_thrift_client
|
|
||||||
|
|
||||||
def parse_args():
|
|
||||||
"""Parse command line arguments"""
|
|
||||||
parser = argparse.ArgumentParser(description='Get info.json from Thrift service')
|
|
||||||
parser.add_argument('url', help='YouTube URL or video ID')
|
|
||||||
parser.add_argument('--host', default='127.0.0.1', help="Thrift server host. Using 127.0.0.1 avoids harmless connection errors when the local Envoy proxy only listens on IPv4.")
|
|
||||||
parser.add_argument('--port', type=int, default=9080, help='Thrift server port')
|
|
||||||
parser.add_argument('--profile', default='default_profile', help='The profile name (accountId) to use for the request.')
|
|
||||||
parser.add_argument('--client', help='Specific client to use (e.g., web, ios). Overrides server default. Append "_camoufox" to any client name (e.g., "web_camoufox") to force the browser-based generation strategy.')
|
|
||||||
parser.add_argument('--output', help='Output file path for the info.json. If not provided, prints to stdout.')
|
|
||||||
parser.add_argument('--machine-id', help='Identifier for the client machine. Defaults to hostname.')
|
|
||||||
parser.add_argument('--verbose', action='store_true', help='Enable verbose output')
|
|
||||||
return parser.parse_args()
|
|
||||||
|
|
||||||
def main():
|
|
||||||
"""Main entry point"""
|
|
||||||
args = parse_args()
|
|
||||||
|
|
||||||
# Set log level
|
|
||||||
if args.verbose:
|
|
||||||
logger.setLevel(logging.DEBUG)
|
|
||||||
|
|
||||||
transport = None
|
|
||||||
try:
|
|
||||||
# Create Thrift client
|
|
||||||
client, transport = get_thrift_client(args.host, args.port)
|
|
||||||
|
|
||||||
# Get token data, which includes the info.json
|
|
||||||
logger.info(f"Requesting info.json for URL '{args.url}' using profile '{args.profile}'")
|
|
||||||
|
|
||||||
# Prepare arguments for the Thrift call
|
|
||||||
machine_id = args.machine_id
|
|
||||||
if not machine_id:
|
|
||||||
import socket
|
|
||||||
machine_id = socket.gethostname()
|
|
||||||
logger.info(f"No machine ID provided, using hostname: {machine_id}")
|
|
||||||
|
|
||||||
thrift_args = {
|
|
||||||
'accountId': args.profile,
|
|
||||||
'updateType': TokenUpdateMode.AUTO,
|
|
||||||
'url': args.url,
|
|
||||||
'clients': args.client,
|
|
||||||
'machineId': machine_id
|
|
||||||
}
|
|
||||||
if args.client:
|
|
||||||
logger.info(f"Requesting to use specific client: {args.client}")
|
|
||||||
else:
|
|
||||||
logger.info("No specific client requested, server will use its default.")
|
|
||||||
|
|
||||||
token_data = client.getOrRefreshToken(**thrift_args)
|
|
||||||
|
|
||||||
if not token_data or not hasattr(token_data, 'infoJson') or not token_data.infoJson:
|
|
||||||
logger.error("Server did not return valid info.json data.")
|
|
||||||
print("Error: Server did not return valid info.json data.", file=sys.stderr)
|
|
||||||
return 1
|
|
||||||
|
|
||||||
info_json_str = token_data.infoJson
|
|
||||||
|
|
||||||
# Check if the returned info.json is an error report
|
|
||||||
try:
|
|
||||||
info_data = json.loads(info_json_str)
|
|
||||||
if isinstance(info_data, dict) and 'error' in info_data:
|
|
||||||
error_code = info_data.get('errorCode', 'N/A')
|
|
||||||
error_message = info_data.get('message', info_data.get('error', 'Unknown error'))
|
|
||||||
logger.error(f"Server returned an error in info.json (Code: {error_code}): {error_message}")
|
|
||||||
print(f"Error from server (Code: {error_code}): {error_message}", file=sys.stderr)
|
|
||||||
# Optionally print the full error JSON
|
|
||||||
if args.verbose:
|
|
||||||
print(json.dumps(info_data, indent=2), file=sys.stderr)
|
|
||||||
return 1
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
logger.error(f"Failed to parse info.json from server: {info_json_str[:200]}...")
|
|
||||||
print("Error: Failed to parse the info.json response from the server.", file=sys.stderr)
|
|
||||||
return 1
|
|
||||||
|
|
||||||
logger.info(f"Successfully retrieved info.json ({len(info_json_str)} bytes)")
|
|
||||||
|
|
||||||
# Write to output file if specified, otherwise print to stdout
|
|
||||||
if args.output:
|
|
||||||
try:
|
|
||||||
with open(args.output, 'w', encoding='utf-8') as f:
|
|
||||||
# Pretty-print the JSON to the file
|
|
||||||
json.dump(info_data, f, indent=2)
|
|
||||||
logger.info(f"Wrote info.json to {args.output}")
|
|
||||||
print(f"Successfully saved info.json to {args.output}")
|
|
||||||
except IOError as e:
|
|
||||||
logger.error(f"Failed to write to output file {args.output}: {e}")
|
|
||||||
print(f"Error: Failed to write to output file {args.output}: {e}", file=sys.stderr)
|
|
||||||
return 1
|
|
||||||
else:
|
|
||||||
# Pretty-print the JSON to stdout
|
|
||||||
print(json.dumps(info_data, indent=2))
|
|
||||||
|
|
||||||
return 0
|
|
||||||
except (PBServiceException, PBUserException) as e:
|
|
||||||
logger.error(f"A Thrift error occurred: {e.message}", exc_info=args.verbose)
|
|
||||||
print(f"Error: {e.message}", file=sys.stderr)
|
|
||||||
if hasattr(e, 'context') and e.context:
|
|
||||||
print(f"Context: {e.context}", file=sys.stderr)
|
|
||||||
return 1
|
|
||||||
except TTransport.TTransportException as e:
|
|
||||||
logger.error(f"Connection to server failed: {e}", exc_info=args.verbose)
|
|
||||||
print(f"Error: Connection to server at {args.host}:{args.port} failed.", file=sys.stderr)
|
|
||||||
return 1
|
|
||||||
except Exception as e:
|
|
||||||
logger.exception(f"An unexpected error occurred: {e}")
|
|
||||||
print(f"An unexpected error occurred: {e}", file=sys.stderr)
|
|
||||||
return 1
|
|
||||||
finally:
|
|
||||||
if transport and transport.isOpen():
|
|
||||||
transport.close()
|
|
||||||
logger.info("Thrift connection closed.")
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
sys.exit(main())
|
|
||||||
117
package_client.py
Executable file
117
package_client.py
Executable file
@ -0,0 +1,117 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Packages the client-side scripts and their dependencies into a distributable .tar.gz archive.
|
||||||
|
|
||||||
|
This script should be run from the root of the project repository.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import sys
|
||||||
|
import tarfile
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Assumes yt_ops_services/version.py exists and is importable
|
||||||
|
from yt_ops_services.version import get_version as get_api_version
|
||||||
|
except ImportError:
|
||||||
|
print("Error: Could not import get_version from yt_ops_services.version.", file=sys.stderr)
|
||||||
|
print("Please ensure yt_ops_services/version.py exists and run this script from the project root.", file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
def get_client_version():
|
||||||
|
"""Reads the client version from the VERSION.client file."""
|
||||||
|
try:
|
||||||
|
return Path('VERSION.client').read_text(encoding='utf-8').strip()
|
||||||
|
except FileNotFoundError:
|
||||||
|
print("Error: VERSION.client file not found in the project root.", file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# --- Configuration ---
|
||||||
|
|
||||||
|
# Defines the content of the package.
|
||||||
|
# Keys are source paths relative to the project root.
|
||||||
|
# Values are destination paths inside the archive.
|
||||||
|
PACKAGE_CONTENT = {
|
||||||
|
'get_info_json_client.py': 'get_info_json_client.py',
|
||||||
|
'list_formats.py': 'list_formats.py',
|
||||||
|
'format_download.py': 'format_download.py',
|
||||||
|
'stress_test_formats.py': 'stress_test_formats.py',
|
||||||
|
'cli.config': 'cli.config',
|
||||||
|
'README.client.md': 'README.md', # Rename for convention
|
||||||
|
'formats.md': 'formats.md',
|
||||||
|
'VERSION.client': 'VERSION.client',
|
||||||
|
'yt_ops_services': 'yt_ops_services',
|
||||||
|
'thrift_model/gen_py': 'thrift_model/gen_py',
|
||||||
|
}
|
||||||
|
|
||||||
|
# Client-side Python requirements
|
||||||
|
CLIENT_REQUIREMENTS = [
|
||||||
|
'thrift==0.16.0',
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Main entry point"""
|
||||||
|
parser = argparse.ArgumentParser(description="Package the yt-ops-services client tools.")
|
||||||
|
parser.add_argument('--output-dir', default='dist', help='Directory to save the package file (default: dist).')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
api_version = get_api_version()
|
||||||
|
client_version = get_client_version()
|
||||||
|
package_name = f"yt-ops-services-client-{api_version}-{client_version}"
|
||||||
|
archive_filename = f"{package_name}.tar.gz"
|
||||||
|
|
||||||
|
os.makedirs(args.output_dir, exist_ok=True)
|
||||||
|
archive_path = os.path.join(args.output_dir, archive_filename)
|
||||||
|
|
||||||
|
staging_dir = Path(args.output_dir) / f"{package_name}-staging"
|
||||||
|
|
||||||
|
print(f"Creating client package: {archive_filename}")
|
||||||
|
|
||||||
|
if staging_dir.exists():
|
||||||
|
shutil.rmtree(staging_dir)
|
||||||
|
staging_dir.mkdir(parents=True)
|
||||||
|
|
||||||
|
package_root = staging_dir / package_name
|
||||||
|
package_root.mkdir()
|
||||||
|
|
||||||
|
try:
|
||||||
|
print("Staging files...")
|
||||||
|
for src, dest in PACKAGE_CONTENT.items():
|
||||||
|
src_path = Path(src)
|
||||||
|
dest_path = package_root / dest
|
||||||
|
|
||||||
|
if not src_path.exists():
|
||||||
|
print(f"Warning: Source not found, skipping: {src_path}", file=sys.stderr)
|
||||||
|
continue
|
||||||
|
|
||||||
|
dest_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
if src_path.is_dir():
|
||||||
|
shutil.copytree(src_path, dest_path)
|
||||||
|
else:
|
||||||
|
shutil.copy2(src_path, dest_path)
|
||||||
|
|
||||||
|
# Create __init__.py to ensure thrift_model is a package
|
||||||
|
(package_root / 'thrift_model/__init__.py').touch()
|
||||||
|
|
||||||
|
print("Creating requirements.txt...")
|
||||||
|
(package_root / 'requirements.txt').write_text('\n'.join(CLIENT_REQUIREMENTS) + '\n', encoding='utf-8')
|
||||||
|
|
||||||
|
print(f"Creating archive at {archive_path}...")
|
||||||
|
with tarfile.open(archive_path, "w:gz") as tar:
|
||||||
|
tar.add(package_root, arcname=package_name)
|
||||||
|
|
||||||
|
print("\nPackage created successfully!")
|
||||||
|
print(f" -> {archive_path}")
|
||||||
|
|
||||||
|
finally:
|
||||||
|
if staging_dir.exists():
|
||||||
|
print("Cleaning up staging directory...")
|
||||||
|
shutil.rmtree(staging_dir)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
0
pangramia/__init__.py
Normal file
0
pangramia/__init__.py
Normal file
BIN
pangramia/__pycache__/__init__.cpython-39.pyc
Normal file
BIN
pangramia/__pycache__/__init__.cpython-39.pyc
Normal file
Binary file not shown.
131
pangramia/base_service/BaseService-remote
Executable file
131
pangramia/base_service/BaseService-remote
Executable file
@ -0,0 +1,131 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
#
|
||||||
|
# Autogenerated by Thrift Compiler (0.20.0)
|
||||||
|
#
|
||||||
|
# DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
|
||||||
|
#
|
||||||
|
# options string: py
|
||||||
|
#
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import pprint
|
||||||
|
if sys.version_info[0] > 2:
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
else:
|
||||||
|
from urlparse import urlparse
|
||||||
|
from thrift.transport import TTransport, TSocket, TSSLSocket, THttpClient
|
||||||
|
from thrift.protocol.TBinaryProtocol import TBinaryProtocol
|
||||||
|
|
||||||
|
from pangramia.base_service import BaseService
|
||||||
|
from pangramia.base_service.ttypes import *
|
||||||
|
|
||||||
|
if len(sys.argv) <= 1 or sys.argv[1] == '--help':
|
||||||
|
print('')
|
||||||
|
print('Usage: ' + sys.argv[0] + ' [-h host[:port]] [-u url] [-f[ramed]] [-s[sl]] [-novalidate] [-ca_certs certs] [-keyfile keyfile] [-certfile certfile] function [arg1 [arg2...]]')
|
||||||
|
print('')
|
||||||
|
print('Functions:')
|
||||||
|
print(' bool ping()')
|
||||||
|
print(' bool reportError(string message, details)')
|
||||||
|
print(' void shutdown()')
|
||||||
|
print('')
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
pp = pprint.PrettyPrinter(indent=2)
|
||||||
|
host = 'localhost'
|
||||||
|
port = 9090
|
||||||
|
uri = ''
|
||||||
|
framed = False
|
||||||
|
ssl = False
|
||||||
|
validate = True
|
||||||
|
ca_certs = None
|
||||||
|
keyfile = None
|
||||||
|
certfile = None
|
||||||
|
http = False
|
||||||
|
argi = 1
|
||||||
|
|
||||||
|
if sys.argv[argi] == '-h':
|
||||||
|
parts = sys.argv[argi + 1].split(':')
|
||||||
|
host = parts[0]
|
||||||
|
if len(parts) > 1:
|
||||||
|
port = int(parts[1])
|
||||||
|
argi += 2
|
||||||
|
|
||||||
|
if sys.argv[argi] == '-u':
|
||||||
|
url = urlparse(sys.argv[argi + 1])
|
||||||
|
parts = url[1].split(':')
|
||||||
|
host = parts[0]
|
||||||
|
if len(parts) > 1:
|
||||||
|
port = int(parts[1])
|
||||||
|
else:
|
||||||
|
port = 80
|
||||||
|
uri = url[2]
|
||||||
|
if url[4]:
|
||||||
|
uri += '?%s' % url[4]
|
||||||
|
http = True
|
||||||
|
argi += 2
|
||||||
|
|
||||||
|
if sys.argv[argi] == '-f' or sys.argv[argi] == '-framed':
|
||||||
|
framed = True
|
||||||
|
argi += 1
|
||||||
|
|
||||||
|
if sys.argv[argi] == '-s' or sys.argv[argi] == '-ssl':
|
||||||
|
ssl = True
|
||||||
|
argi += 1
|
||||||
|
|
||||||
|
if sys.argv[argi] == '-novalidate':
|
||||||
|
validate = False
|
||||||
|
argi += 1
|
||||||
|
|
||||||
|
if sys.argv[argi] == '-ca_certs':
|
||||||
|
ca_certs = sys.argv[argi+1]
|
||||||
|
argi += 2
|
||||||
|
|
||||||
|
if sys.argv[argi] == '-keyfile':
|
||||||
|
keyfile = sys.argv[argi+1]
|
||||||
|
argi += 2
|
||||||
|
|
||||||
|
if sys.argv[argi] == '-certfile':
|
||||||
|
certfile = sys.argv[argi+1]
|
||||||
|
argi += 2
|
||||||
|
|
||||||
|
cmd = sys.argv[argi]
|
||||||
|
args = sys.argv[argi + 1:]
|
||||||
|
|
||||||
|
if http:
|
||||||
|
transport = THttpClient.THttpClient(host, port, uri)
|
||||||
|
else:
|
||||||
|
if ssl:
|
||||||
|
socket = TSSLSocket.TSSLSocket(host, port, validate=validate, ca_certs=ca_certs, keyfile=keyfile, certfile=certfile)
|
||||||
|
else:
|
||||||
|
socket = TSocket.TSocket(host, port)
|
||||||
|
if framed:
|
||||||
|
transport = TTransport.TFramedTransport(socket)
|
||||||
|
else:
|
||||||
|
transport = TTransport.TBufferedTransport(socket)
|
||||||
|
protocol = TBinaryProtocol(transport)
|
||||||
|
client = BaseService.Client(protocol)
|
||||||
|
transport.open()
|
||||||
|
|
||||||
|
if cmd == 'ping':
|
||||||
|
if len(args) != 0:
|
||||||
|
print('ping requires 0 args')
|
||||||
|
sys.exit(1)
|
||||||
|
pp.pprint(client.ping())
|
||||||
|
|
||||||
|
elif cmd == 'reportError':
|
||||||
|
if len(args) != 2:
|
||||||
|
print('reportError requires 2 args')
|
||||||
|
sys.exit(1)
|
||||||
|
pp.pprint(client.reportError(args[0], eval(args[1]),))
|
||||||
|
|
||||||
|
elif cmd == 'shutdown':
|
||||||
|
if len(args) != 0:
|
||||||
|
print('shutdown requires 0 args')
|
||||||
|
sys.exit(1)
|
||||||
|
pp.pprint(client.shutdown())
|
||||||
|
|
||||||
|
else:
|
||||||
|
print('Unrecognized method %s' % cmd)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
transport.close()
|
||||||
564
pangramia/base_service/BaseService.py
Normal file
564
pangramia/base_service/BaseService.py
Normal file
@ -0,0 +1,564 @@
|
|||||||
|
#
|
||||||
|
# Autogenerated by Thrift Compiler (0.20.0)
|
||||||
|
#
|
||||||
|
# DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
|
||||||
|
#
|
||||||
|
# options string: py
|
||||||
|
#
|
||||||
|
|
||||||
|
from thrift.Thrift import TType, TMessageType, TFrozenDict, TException, TApplicationException
|
||||||
|
from thrift.protocol.TProtocol import TProtocolException
|
||||||
|
from thrift.TRecursive import fix_spec
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import logging
|
||||||
|
from .ttypes import *
|
||||||
|
from thrift.Thrift import TProcessor
|
||||||
|
from thrift.transport import TTransport
|
||||||
|
all_structs = []
|
||||||
|
|
||||||
|
|
||||||
|
class Iface(object):
|
||||||
|
def ping(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def reportError(self, message, details):
|
||||||
|
"""
|
||||||
|
Parameters:
|
||||||
|
- message
|
||||||
|
- details
|
||||||
|
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
def shutdown(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class Client(Iface):
|
||||||
|
def __init__(self, iprot, oprot=None):
|
||||||
|
self._iprot = self._oprot = iprot
|
||||||
|
if oprot is not None:
|
||||||
|
self._oprot = oprot
|
||||||
|
self._seqid = 0
|
||||||
|
|
||||||
|
def ping(self):
|
||||||
|
self.send_ping()
|
||||||
|
return self.recv_ping()
|
||||||
|
|
||||||
|
def send_ping(self):
|
||||||
|
self._oprot.writeMessageBegin('ping', TMessageType.CALL, self._seqid)
|
||||||
|
args = ping_args()
|
||||||
|
args.write(self._oprot)
|
||||||
|
self._oprot.writeMessageEnd()
|
||||||
|
self._oprot.trans.flush()
|
||||||
|
|
||||||
|
def recv_ping(self):
|
||||||
|
iprot = self._iprot
|
||||||
|
(fname, mtype, rseqid) = iprot.readMessageBegin()
|
||||||
|
if mtype == TMessageType.EXCEPTION:
|
||||||
|
x = TApplicationException()
|
||||||
|
x.read(iprot)
|
||||||
|
iprot.readMessageEnd()
|
||||||
|
raise x
|
||||||
|
result = ping_result()
|
||||||
|
result.read(iprot)
|
||||||
|
iprot.readMessageEnd()
|
||||||
|
if result.success is not None:
|
||||||
|
return result.success
|
||||||
|
if result.serviceExp is not None:
|
||||||
|
raise result.serviceExp
|
||||||
|
if result.userExp is not None:
|
||||||
|
raise result.userExp
|
||||||
|
raise TApplicationException(TApplicationException.MISSING_RESULT, "ping failed: unknown result")
|
||||||
|
|
||||||
|
def reportError(self, message, details):
|
||||||
|
"""
|
||||||
|
Parameters:
|
||||||
|
- message
|
||||||
|
- details
|
||||||
|
|
||||||
|
"""
|
||||||
|
self.send_reportError(message, details)
|
||||||
|
return self.recv_reportError()
|
||||||
|
|
||||||
|
def send_reportError(self, message, details):
|
||||||
|
self._oprot.writeMessageBegin('reportError', TMessageType.CALL, self._seqid)
|
||||||
|
args = reportError_args()
|
||||||
|
args.message = message
|
||||||
|
args.details = details
|
||||||
|
args.write(self._oprot)
|
||||||
|
self._oprot.writeMessageEnd()
|
||||||
|
self._oprot.trans.flush()
|
||||||
|
|
||||||
|
def recv_reportError(self):
|
||||||
|
iprot = self._iprot
|
||||||
|
(fname, mtype, rseqid) = iprot.readMessageBegin()
|
||||||
|
if mtype == TMessageType.EXCEPTION:
|
||||||
|
x = TApplicationException()
|
||||||
|
x.read(iprot)
|
||||||
|
iprot.readMessageEnd()
|
||||||
|
raise x
|
||||||
|
result = reportError_result()
|
||||||
|
result.read(iprot)
|
||||||
|
iprot.readMessageEnd()
|
||||||
|
if result.success is not None:
|
||||||
|
return result.success
|
||||||
|
if result.serviceExp is not None:
|
||||||
|
raise result.serviceExp
|
||||||
|
if result.userExp is not None:
|
||||||
|
raise result.userExp
|
||||||
|
raise TApplicationException(TApplicationException.MISSING_RESULT, "reportError failed: unknown result")
|
||||||
|
|
||||||
|
def shutdown(self):
|
||||||
|
self.send_shutdown()
|
||||||
|
|
||||||
|
def send_shutdown(self):
|
||||||
|
self._oprot.writeMessageBegin('shutdown', TMessageType.ONEWAY, self._seqid)
|
||||||
|
args = shutdown_args()
|
||||||
|
args.write(self._oprot)
|
||||||
|
self._oprot.writeMessageEnd()
|
||||||
|
self._oprot.trans.flush()
|
||||||
|
|
||||||
|
|
||||||
|
class Processor(Iface, TProcessor):
|
||||||
|
def __init__(self, handler):
|
||||||
|
self._handler = handler
|
||||||
|
self._processMap = {}
|
||||||
|
self._processMap["ping"] = Processor.process_ping
|
||||||
|
self._processMap["reportError"] = Processor.process_reportError
|
||||||
|
self._processMap["shutdown"] = Processor.process_shutdown
|
||||||
|
self._on_message_begin = None
|
||||||
|
|
||||||
|
def on_message_begin(self, func):
|
||||||
|
self._on_message_begin = func
|
||||||
|
|
||||||
|
def process(self, iprot, oprot):
|
||||||
|
(name, type, seqid) = iprot.readMessageBegin()
|
||||||
|
if self._on_message_begin:
|
||||||
|
self._on_message_begin(name, type, seqid)
|
||||||
|
if name not in self._processMap:
|
||||||
|
iprot.skip(TType.STRUCT)
|
||||||
|
iprot.readMessageEnd()
|
||||||
|
x = TApplicationException(TApplicationException.UNKNOWN_METHOD, 'Unknown function %s' % (name))
|
||||||
|
oprot.writeMessageBegin(name, TMessageType.EXCEPTION, seqid)
|
||||||
|
x.write(oprot)
|
||||||
|
oprot.writeMessageEnd()
|
||||||
|
oprot.trans.flush()
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
self._processMap[name](self, seqid, iprot, oprot)
|
||||||
|
return True
|
||||||
|
|
||||||
|
def process_ping(self, seqid, iprot, oprot):
|
||||||
|
args = ping_args()
|
||||||
|
args.read(iprot)
|
||||||
|
iprot.readMessageEnd()
|
||||||
|
result = ping_result()
|
||||||
|
try:
|
||||||
|
result.success = self._handler.ping()
|
||||||
|
msg_type = TMessageType.REPLY
|
||||||
|
except TTransport.TTransportException:
|
||||||
|
raise
|
||||||
|
except pangramia.yt.exceptions.ttypes.PBServiceException as serviceExp:
|
||||||
|
msg_type = TMessageType.REPLY
|
||||||
|
result.serviceExp = serviceExp
|
||||||
|
except pangramia.yt.exceptions.ttypes.PBUserException as userExp:
|
||||||
|
msg_type = TMessageType.REPLY
|
||||||
|
result.userExp = userExp
|
||||||
|
except TApplicationException as ex:
|
||||||
|
logging.exception('TApplication exception in handler')
|
||||||
|
msg_type = TMessageType.EXCEPTION
|
||||||
|
result = ex
|
||||||
|
except Exception:
|
||||||
|
logging.exception('Unexpected exception in handler')
|
||||||
|
msg_type = TMessageType.EXCEPTION
|
||||||
|
result = TApplicationException(TApplicationException.INTERNAL_ERROR, 'Internal error')
|
||||||
|
oprot.writeMessageBegin("ping", msg_type, seqid)
|
||||||
|
result.write(oprot)
|
||||||
|
oprot.writeMessageEnd()
|
||||||
|
oprot.trans.flush()
|
||||||
|
|
||||||
|
def process_reportError(self, seqid, iprot, oprot):
|
||||||
|
args = reportError_args()
|
||||||
|
args.read(iprot)
|
||||||
|
iprot.readMessageEnd()
|
||||||
|
result = reportError_result()
|
||||||
|
try:
|
||||||
|
result.success = self._handler.reportError(args.message, args.details)
|
||||||
|
msg_type = TMessageType.REPLY
|
||||||
|
except TTransport.TTransportException:
|
||||||
|
raise
|
||||||
|
except pangramia.yt.exceptions.ttypes.PBServiceException as serviceExp:
|
||||||
|
msg_type = TMessageType.REPLY
|
||||||
|
result.serviceExp = serviceExp
|
||||||
|
except pangramia.yt.exceptions.ttypes.PBUserException as userExp:
|
||||||
|
msg_type = TMessageType.REPLY
|
||||||
|
result.userExp = userExp
|
||||||
|
except TApplicationException as ex:
|
||||||
|
logging.exception('TApplication exception in handler')
|
||||||
|
msg_type = TMessageType.EXCEPTION
|
||||||
|
result = ex
|
||||||
|
except Exception:
|
||||||
|
logging.exception('Unexpected exception in handler')
|
||||||
|
msg_type = TMessageType.EXCEPTION
|
||||||
|
result = TApplicationException(TApplicationException.INTERNAL_ERROR, 'Internal error')
|
||||||
|
oprot.writeMessageBegin("reportError", msg_type, seqid)
|
||||||
|
result.write(oprot)
|
||||||
|
oprot.writeMessageEnd()
|
||||||
|
oprot.trans.flush()
|
||||||
|
|
||||||
|
def process_shutdown(self, seqid, iprot, oprot):
|
||||||
|
args = shutdown_args()
|
||||||
|
args.read(iprot)
|
||||||
|
iprot.readMessageEnd()
|
||||||
|
try:
|
||||||
|
self._handler.shutdown()
|
||||||
|
except TTransport.TTransportException:
|
||||||
|
raise
|
||||||
|
except Exception:
|
||||||
|
logging.exception('Exception in oneway handler')
|
||||||
|
|
||||||
|
# HELPER FUNCTIONS AND STRUCTURES
|
||||||
|
|
||||||
|
|
||||||
|
class ping_args(object):
|
||||||
|
|
||||||
|
|
||||||
|
def read(self, iprot):
|
||||||
|
if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None:
|
||||||
|
iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec])
|
||||||
|
return
|
||||||
|
iprot.readStructBegin()
|
||||||
|
while True:
|
||||||
|
(fname, ftype, fid) = iprot.readFieldBegin()
|
||||||
|
if ftype == TType.STOP:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
iprot.skip(ftype)
|
||||||
|
iprot.readFieldEnd()
|
||||||
|
iprot.readStructEnd()
|
||||||
|
|
||||||
|
def write(self, oprot):
|
||||||
|
if oprot._fast_encode is not None and self.thrift_spec is not None:
|
||||||
|
oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec]))
|
||||||
|
return
|
||||||
|
oprot.writeStructBegin('ping_args')
|
||||||
|
oprot.writeFieldStop()
|
||||||
|
oprot.writeStructEnd()
|
||||||
|
|
||||||
|
def validate(self):
|
||||||
|
return
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
L = ['%s=%r' % (key, value)
|
||||||
|
for key, value in self.__dict__.items()]
|
||||||
|
return '%s(%s)' % (self.__class__.__name__, ', '.join(L))
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
return isinstance(other, self.__class__) and self.__dict__ == other.__dict__
|
||||||
|
|
||||||
|
def __ne__(self, other):
|
||||||
|
return not (self == other)
|
||||||
|
all_structs.append(ping_args)
|
||||||
|
ping_args.thrift_spec = (
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ping_result(object):
|
||||||
|
"""
|
||||||
|
Attributes:
|
||||||
|
- success
|
||||||
|
- serviceExp
|
||||||
|
- userExp
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def __init__(self, success=None, serviceExp=None, userExp=None,):
|
||||||
|
self.success = success
|
||||||
|
self.serviceExp = serviceExp
|
||||||
|
self.userExp = userExp
|
||||||
|
|
||||||
|
def read(self, iprot):
|
||||||
|
if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None:
|
||||||
|
iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec])
|
||||||
|
return
|
||||||
|
iprot.readStructBegin()
|
||||||
|
while True:
|
||||||
|
(fname, ftype, fid) = iprot.readFieldBegin()
|
||||||
|
if ftype == TType.STOP:
|
||||||
|
break
|
||||||
|
if fid == 0:
|
||||||
|
if ftype == TType.BOOL:
|
||||||
|
self.success = iprot.readBool()
|
||||||
|
else:
|
||||||
|
iprot.skip(ftype)
|
||||||
|
elif fid == 1:
|
||||||
|
if ftype == TType.STRUCT:
|
||||||
|
self.serviceExp = pangramia.yt.exceptions.ttypes.PBServiceException.read(iprot)
|
||||||
|
else:
|
||||||
|
iprot.skip(ftype)
|
||||||
|
elif fid == 2:
|
||||||
|
if ftype == TType.STRUCT:
|
||||||
|
self.userExp = pangramia.yt.exceptions.ttypes.PBUserException.read(iprot)
|
||||||
|
else:
|
||||||
|
iprot.skip(ftype)
|
||||||
|
else:
|
||||||
|
iprot.skip(ftype)
|
||||||
|
iprot.readFieldEnd()
|
||||||
|
iprot.readStructEnd()
|
||||||
|
|
||||||
|
def write(self, oprot):
|
||||||
|
if oprot._fast_encode is not None and self.thrift_spec is not None:
|
||||||
|
oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec]))
|
||||||
|
return
|
||||||
|
oprot.writeStructBegin('ping_result')
|
||||||
|
if self.success is not None:
|
||||||
|
oprot.writeFieldBegin('success', TType.BOOL, 0)
|
||||||
|
oprot.writeBool(self.success)
|
||||||
|
oprot.writeFieldEnd()
|
||||||
|
if self.serviceExp is not None:
|
||||||
|
oprot.writeFieldBegin('serviceExp', TType.STRUCT, 1)
|
||||||
|
self.serviceExp.write(oprot)
|
||||||
|
oprot.writeFieldEnd()
|
||||||
|
if self.userExp is not None:
|
||||||
|
oprot.writeFieldBegin('userExp', TType.STRUCT, 2)
|
||||||
|
self.userExp.write(oprot)
|
||||||
|
oprot.writeFieldEnd()
|
||||||
|
oprot.writeFieldStop()
|
||||||
|
oprot.writeStructEnd()
|
||||||
|
|
||||||
|
def validate(self):
|
||||||
|
return
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
L = ['%s=%r' % (key, value)
|
||||||
|
for key, value in self.__dict__.items()]
|
||||||
|
return '%s(%s)' % (self.__class__.__name__, ', '.join(L))
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
return isinstance(other, self.__class__) and self.__dict__ == other.__dict__
|
||||||
|
|
||||||
|
def __ne__(self, other):
|
||||||
|
return not (self == other)
|
||||||
|
all_structs.append(ping_result)
|
||||||
|
ping_result.thrift_spec = (
|
||||||
|
(0, TType.BOOL, 'success', None, None, ), # 0
|
||||||
|
(1, TType.STRUCT, 'serviceExp', [pangramia.yt.exceptions.ttypes.PBServiceException, None], None, ), # 1
|
||||||
|
(2, TType.STRUCT, 'userExp', [pangramia.yt.exceptions.ttypes.PBUserException, None], None, ), # 2
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class reportError_args(object):
|
||||||
|
"""
|
||||||
|
Attributes:
|
||||||
|
- message
|
||||||
|
- details
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def __init__(self, message=None, details=None,):
|
||||||
|
self.message = message
|
||||||
|
self.details = details
|
||||||
|
|
||||||
|
def read(self, iprot):
|
||||||
|
if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None:
|
||||||
|
iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec])
|
||||||
|
return
|
||||||
|
iprot.readStructBegin()
|
||||||
|
while True:
|
||||||
|
(fname, ftype, fid) = iprot.readFieldBegin()
|
||||||
|
if ftype == TType.STOP:
|
||||||
|
break
|
||||||
|
if fid == 1:
|
||||||
|
if ftype == TType.STRING:
|
||||||
|
self.message = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString()
|
||||||
|
else:
|
||||||
|
iprot.skip(ftype)
|
||||||
|
elif fid == 2:
|
||||||
|
if ftype == TType.MAP:
|
||||||
|
self.details = {}
|
||||||
|
(_ktype1, _vtype2, _size0) = iprot.readMapBegin()
|
||||||
|
for _i4 in range(_size0):
|
||||||
|
_key5 = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString()
|
||||||
|
_val6 = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString()
|
||||||
|
self.details[_key5] = _val6
|
||||||
|
iprot.readMapEnd()
|
||||||
|
else:
|
||||||
|
iprot.skip(ftype)
|
||||||
|
else:
|
||||||
|
iprot.skip(ftype)
|
||||||
|
iprot.readFieldEnd()
|
||||||
|
iprot.readStructEnd()
|
||||||
|
|
||||||
|
def write(self, oprot):
|
||||||
|
if oprot._fast_encode is not None and self.thrift_spec is not None:
|
||||||
|
oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec]))
|
||||||
|
return
|
||||||
|
oprot.writeStructBegin('reportError_args')
|
||||||
|
if self.message is not None:
|
||||||
|
oprot.writeFieldBegin('message', TType.STRING, 1)
|
||||||
|
oprot.writeString(self.message.encode('utf-8') if sys.version_info[0] == 2 else self.message)
|
||||||
|
oprot.writeFieldEnd()
|
||||||
|
if self.details is not None:
|
||||||
|
oprot.writeFieldBegin('details', TType.MAP, 2)
|
||||||
|
oprot.writeMapBegin(TType.STRING, TType.STRING, len(self.details))
|
||||||
|
for kiter7, viter8 in self.details.items():
|
||||||
|
oprot.writeString(kiter7.encode('utf-8') if sys.version_info[0] == 2 else kiter7)
|
||||||
|
oprot.writeString(viter8.encode('utf-8') if sys.version_info[0] == 2 else viter8)
|
||||||
|
oprot.writeMapEnd()
|
||||||
|
oprot.writeFieldEnd()
|
||||||
|
oprot.writeFieldStop()
|
||||||
|
oprot.writeStructEnd()
|
||||||
|
|
||||||
|
def validate(self):
|
||||||
|
return
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
L = ['%s=%r' % (key, value)
|
||||||
|
for key, value in self.__dict__.items()]
|
||||||
|
return '%s(%s)' % (self.__class__.__name__, ', '.join(L))
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
return isinstance(other, self.__class__) and self.__dict__ == other.__dict__
|
||||||
|
|
||||||
|
def __ne__(self, other):
|
||||||
|
return not (self == other)
|
||||||
|
all_structs.append(reportError_args)
|
||||||
|
reportError_args.thrift_spec = (
|
||||||
|
None, # 0
|
||||||
|
(1, TType.STRING, 'message', 'UTF8', None, ), # 1
|
||||||
|
(2, TType.MAP, 'details', (TType.STRING, 'UTF8', TType.STRING, 'UTF8', False), None, ), # 2
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class reportError_result(object):
|
||||||
|
"""
|
||||||
|
Attributes:
|
||||||
|
- success
|
||||||
|
- serviceExp
|
||||||
|
- userExp
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def __init__(self, success=None, serviceExp=None, userExp=None,):
|
||||||
|
self.success = success
|
||||||
|
self.serviceExp = serviceExp
|
||||||
|
self.userExp = userExp
|
||||||
|
|
||||||
|
def read(self, iprot):
|
||||||
|
if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None:
|
||||||
|
iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec])
|
||||||
|
return
|
||||||
|
iprot.readStructBegin()
|
||||||
|
while True:
|
||||||
|
(fname, ftype, fid) = iprot.readFieldBegin()
|
||||||
|
if ftype == TType.STOP:
|
||||||
|
break
|
||||||
|
if fid == 0:
|
||||||
|
if ftype == TType.BOOL:
|
||||||
|
self.success = iprot.readBool()
|
||||||
|
else:
|
||||||
|
iprot.skip(ftype)
|
||||||
|
elif fid == 1:
|
||||||
|
if ftype == TType.STRUCT:
|
||||||
|
self.serviceExp = pangramia.yt.exceptions.ttypes.PBServiceException.read(iprot)
|
||||||
|
else:
|
||||||
|
iprot.skip(ftype)
|
||||||
|
elif fid == 2:
|
||||||
|
if ftype == TType.STRUCT:
|
||||||
|
self.userExp = pangramia.yt.exceptions.ttypes.PBUserException.read(iprot)
|
||||||
|
else:
|
||||||
|
iprot.skip(ftype)
|
||||||
|
else:
|
||||||
|
iprot.skip(ftype)
|
||||||
|
iprot.readFieldEnd()
|
||||||
|
iprot.readStructEnd()
|
||||||
|
|
||||||
|
def write(self, oprot):
|
||||||
|
if oprot._fast_encode is not None and self.thrift_spec is not None:
|
||||||
|
oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec]))
|
||||||
|
return
|
||||||
|
oprot.writeStructBegin('reportError_result')
|
||||||
|
if self.success is not None:
|
||||||
|
oprot.writeFieldBegin('success', TType.BOOL, 0)
|
||||||
|
oprot.writeBool(self.success)
|
||||||
|
oprot.writeFieldEnd()
|
||||||
|
if self.serviceExp is not None:
|
||||||
|
oprot.writeFieldBegin('serviceExp', TType.STRUCT, 1)
|
||||||
|
self.serviceExp.write(oprot)
|
||||||
|
oprot.writeFieldEnd()
|
||||||
|
if self.userExp is not None:
|
||||||
|
oprot.writeFieldBegin('userExp', TType.STRUCT, 2)
|
||||||
|
self.userExp.write(oprot)
|
||||||
|
oprot.writeFieldEnd()
|
||||||
|
oprot.writeFieldStop()
|
||||||
|
oprot.writeStructEnd()
|
||||||
|
|
||||||
|
def validate(self):
|
||||||
|
return
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
L = ['%s=%r' % (key, value)
|
||||||
|
for key, value in self.__dict__.items()]
|
||||||
|
return '%s(%s)' % (self.__class__.__name__, ', '.join(L))
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
return isinstance(other, self.__class__) and self.__dict__ == other.__dict__
|
||||||
|
|
||||||
|
def __ne__(self, other):
|
||||||
|
return not (self == other)
|
||||||
|
all_structs.append(reportError_result)
|
||||||
|
reportError_result.thrift_spec = (
|
||||||
|
(0, TType.BOOL, 'success', None, None, ), # 0
|
||||||
|
(1, TType.STRUCT, 'serviceExp', [pangramia.yt.exceptions.ttypes.PBServiceException, None], None, ), # 1
|
||||||
|
(2, TType.STRUCT, 'userExp', [pangramia.yt.exceptions.ttypes.PBUserException, None], None, ), # 2
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class shutdown_args(object):
|
||||||
|
|
||||||
|
|
||||||
|
def read(self, iprot):
|
||||||
|
if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None:
|
||||||
|
iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec])
|
||||||
|
return
|
||||||
|
iprot.readStructBegin()
|
||||||
|
while True:
|
||||||
|
(fname, ftype, fid) = iprot.readFieldBegin()
|
||||||
|
if ftype == TType.STOP:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
iprot.skip(ftype)
|
||||||
|
iprot.readFieldEnd()
|
||||||
|
iprot.readStructEnd()
|
||||||
|
|
||||||
|
def write(self, oprot):
|
||||||
|
if oprot._fast_encode is not None and self.thrift_spec is not None:
|
||||||
|
oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec]))
|
||||||
|
return
|
||||||
|
oprot.writeStructBegin('shutdown_args')
|
||||||
|
oprot.writeFieldStop()
|
||||||
|
oprot.writeStructEnd()
|
||||||
|
|
||||||
|
def validate(self):
|
||||||
|
return
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
L = ['%s=%r' % (key, value)
|
||||||
|
for key, value in self.__dict__.items()]
|
||||||
|
return '%s(%s)' % (self.__class__.__name__, ', '.join(L))
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
return isinstance(other, self.__class__) and self.__dict__ == other.__dict__
|
||||||
|
|
||||||
|
def __ne__(self, other):
|
||||||
|
return not (self == other)
|
||||||
|
all_structs.append(shutdown_args)
|
||||||
|
shutdown_args.thrift_spec = (
|
||||||
|
)
|
||||||
|
fix_spec(all_structs)
|
||||||
|
del all_structs
|
||||||
1
pangramia/base_service/__init__.py
Normal file
1
pangramia/base_service/__init__.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
__all__ = ['ttypes', 'constants', 'BaseService']
|
||||||
14
pangramia/base_service/constants.py
Normal file
14
pangramia/base_service/constants.py
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
#
|
||||||
|
# Autogenerated by Thrift Compiler (0.20.0)
|
||||||
|
#
|
||||||
|
# DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
|
||||||
|
#
|
||||||
|
# options string: py
|
||||||
|
#
|
||||||
|
|
||||||
|
from thrift.Thrift import TType, TMessageType, TFrozenDict, TException, TApplicationException
|
||||||
|
from thrift.protocol.TProtocol import TProtocolException
|
||||||
|
from thrift.TRecursive import fix_spec
|
||||||
|
|
||||||
|
import sys
|
||||||
|
from .ttypes import *
|
||||||
20
pangramia/base_service/ttypes.py
Normal file
20
pangramia/base_service/ttypes.py
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
#
|
||||||
|
# Autogenerated by Thrift Compiler (0.20.0)
|
||||||
|
#
|
||||||
|
# DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
|
||||||
|
#
|
||||||
|
# options string: py
|
||||||
|
#
|
||||||
|
|
||||||
|
from thrift.Thrift import TType, TMessageType, TFrozenDict, TException, TApplicationException
|
||||||
|
from thrift.protocol.TProtocol import TProtocolException
|
||||||
|
from thrift.TRecursive import fix_spec
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import pangramia.yt.common.ttypes
|
||||||
|
import pangramia.yt.exceptions.ttypes
|
||||||
|
|
||||||
|
from thrift.transport import TTransport
|
||||||
|
all_structs = []
|
||||||
|
fix_spec(all_structs)
|
||||||
|
del all_structs
|
||||||
0
pangramia/yt/__init__.py
Normal file
0
pangramia/yt/__init__.py
Normal file
1
pangramia/yt/common/__init__.py
Normal file
1
pangramia/yt/common/__init__.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
__all__ = ['ttypes', 'constants']
|
||||||
14
pangramia/yt/common/constants.py
Normal file
14
pangramia/yt/common/constants.py
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
#
|
||||||
|
# Autogenerated by Thrift Compiler (0.20.0)
|
||||||
|
#
|
||||||
|
# DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
|
||||||
|
#
|
||||||
|
# options string: py
|
||||||
|
#
|
||||||
|
|
||||||
|
from thrift.Thrift import TType, TMessageType, TFrozenDict, TException, TApplicationException
|
||||||
|
from thrift.protocol.TProtocol import TProtocolException
|
||||||
|
from thrift.TRecursive import fix_spec
|
||||||
|
|
||||||
|
import sys
|
||||||
|
from .ttypes import *
|
||||||
1403
pangramia/yt/common/ttypes.py
Normal file
1403
pangramia/yt/common/ttypes.py
Normal file
File diff suppressed because it is too large
Load Diff
1
pangramia/yt/exceptions/__init__.py
Normal file
1
pangramia/yt/exceptions/__init__.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
__all__ = ['ttypes', 'constants']
|
||||||
BIN
pangramia/yt/exceptions/__pycache__/__init__.cpython-39.pyc
Normal file
BIN
pangramia/yt/exceptions/__pycache__/__init__.cpython-39.pyc
Normal file
Binary file not shown.
BIN
pangramia/yt/exceptions/__pycache__/ttypes.cpython-39.pyc
Normal file
BIN
pangramia/yt/exceptions/__pycache__/ttypes.cpython-39.pyc
Normal file
Binary file not shown.
14
pangramia/yt/exceptions/constants.py
Normal file
14
pangramia/yt/exceptions/constants.py
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
#
|
||||||
|
# Autogenerated by Thrift Compiler (0.20.0)
|
||||||
|
#
|
||||||
|
# DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
|
||||||
|
#
|
||||||
|
# options string: py
|
||||||
|
#
|
||||||
|
|
||||||
|
from thrift.Thrift import TType, TMessageType, TFrozenDict, TException, TApplicationException
|
||||||
|
from thrift.protocol.TProtocol import TProtocolException
|
||||||
|
from thrift.TRecursive import fix_spec
|
||||||
|
|
||||||
|
import sys
|
||||||
|
from .ttypes import *
|
||||||
254
pangramia/yt/exceptions/ttypes.py
Normal file
254
pangramia/yt/exceptions/ttypes.py
Normal file
@ -0,0 +1,254 @@
|
|||||||
|
#
|
||||||
|
# Autogenerated by Thrift Compiler (0.20.0)
|
||||||
|
#
|
||||||
|
# DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
|
||||||
|
#
|
||||||
|
# options string: py
|
||||||
|
#
|
||||||
|
|
||||||
|
from thrift.Thrift import TType, TMessageType, TFrozenDict, TException, TApplicationException
|
||||||
|
from thrift.protocol.TProtocol import TProtocolException
|
||||||
|
from thrift.TRecursive import fix_spec
|
||||||
|
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from thrift.transport import TTransport
|
||||||
|
all_structs = []
|
||||||
|
|
||||||
|
|
||||||
|
class PBServiceException(TException):
|
||||||
|
"""
|
||||||
|
Attributes:
|
||||||
|
- message
|
||||||
|
- errorCode
|
||||||
|
- context
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def __init__(self, message=None, errorCode=None, context=None,):
|
||||||
|
super(PBServiceException, self).__setattr__('message', message)
|
||||||
|
super(PBServiceException, self).__setattr__('errorCode', errorCode)
|
||||||
|
super(PBServiceException, self).__setattr__('context', context)
|
||||||
|
|
||||||
|
def __setattr__(self, *args):
|
||||||
|
raise TypeError("can't modify immutable instance")
|
||||||
|
|
||||||
|
def __delattr__(self, *args):
|
||||||
|
raise TypeError("can't modify immutable instance")
|
||||||
|
|
||||||
|
def __hash__(self):
|
||||||
|
return hash(self.__class__) ^ hash((self.message, self.errorCode, self.context, ))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def read(cls, iprot):
|
||||||
|
if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and cls.thrift_spec is not None:
|
||||||
|
return iprot._fast_decode(None, iprot, [cls, cls.thrift_spec])
|
||||||
|
iprot.readStructBegin()
|
||||||
|
message = None
|
||||||
|
errorCode = None
|
||||||
|
context = None
|
||||||
|
while True:
|
||||||
|
(fname, ftype, fid) = iprot.readFieldBegin()
|
||||||
|
if ftype == TType.STOP:
|
||||||
|
break
|
||||||
|
if fid == 1:
|
||||||
|
if ftype == TType.STRING:
|
||||||
|
message = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString()
|
||||||
|
else:
|
||||||
|
iprot.skip(ftype)
|
||||||
|
elif fid == 2:
|
||||||
|
if ftype == TType.STRING:
|
||||||
|
errorCode = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString()
|
||||||
|
else:
|
||||||
|
iprot.skip(ftype)
|
||||||
|
elif fid == 3:
|
||||||
|
if ftype == TType.MAP:
|
||||||
|
context = {}
|
||||||
|
(_ktype1, _vtype2, _size0) = iprot.readMapBegin()
|
||||||
|
for _i4 in range(_size0):
|
||||||
|
_key5 = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString()
|
||||||
|
_val6 = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString()
|
||||||
|
context[_key5] = _val6
|
||||||
|
iprot.readMapEnd()
|
||||||
|
else:
|
||||||
|
iprot.skip(ftype)
|
||||||
|
else:
|
||||||
|
iprot.skip(ftype)
|
||||||
|
iprot.readFieldEnd()
|
||||||
|
iprot.readStructEnd()
|
||||||
|
return cls(
|
||||||
|
message=message,
|
||||||
|
errorCode=errorCode,
|
||||||
|
context=context,
|
||||||
|
)
|
||||||
|
|
||||||
|
def write(self, oprot):
|
||||||
|
if oprot._fast_encode is not None and self.thrift_spec is not None:
|
||||||
|
oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec]))
|
||||||
|
return
|
||||||
|
oprot.writeStructBegin('PBServiceException')
|
||||||
|
if self.message is not None:
|
||||||
|
oprot.writeFieldBegin('message', TType.STRING, 1)
|
||||||
|
oprot.writeString(self.message.encode('utf-8') if sys.version_info[0] == 2 else self.message)
|
||||||
|
oprot.writeFieldEnd()
|
||||||
|
if self.errorCode is not None:
|
||||||
|
oprot.writeFieldBegin('errorCode', TType.STRING, 2)
|
||||||
|
oprot.writeString(self.errorCode.encode('utf-8') if sys.version_info[0] == 2 else self.errorCode)
|
||||||
|
oprot.writeFieldEnd()
|
||||||
|
if self.context is not None:
|
||||||
|
oprot.writeFieldBegin('context', TType.MAP, 3)
|
||||||
|
oprot.writeMapBegin(TType.STRING, TType.STRING, len(self.context))
|
||||||
|
for kiter7, viter8 in self.context.items():
|
||||||
|
oprot.writeString(kiter7.encode('utf-8') if sys.version_info[0] == 2 else kiter7)
|
||||||
|
oprot.writeString(viter8.encode('utf-8') if sys.version_info[0] == 2 else viter8)
|
||||||
|
oprot.writeMapEnd()
|
||||||
|
oprot.writeFieldEnd()
|
||||||
|
oprot.writeFieldStop()
|
||||||
|
oprot.writeStructEnd()
|
||||||
|
|
||||||
|
def validate(self):
|
||||||
|
if self.message is None:
|
||||||
|
raise TProtocolException(message='Required field message is unset!')
|
||||||
|
return
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return repr(self)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
L = ['%s=%r' % (key, value)
|
||||||
|
for key, value in self.__dict__.items()]
|
||||||
|
return '%s(%s)' % (self.__class__.__name__, ', '.join(L))
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
return isinstance(other, self.__class__) and self.__dict__ == other.__dict__
|
||||||
|
|
||||||
|
def __ne__(self, other):
|
||||||
|
return not (self == other)
|
||||||
|
|
||||||
|
|
||||||
|
class PBUserException(TException):
|
||||||
|
"""
|
||||||
|
Attributes:
|
||||||
|
- message
|
||||||
|
- errorCode
|
||||||
|
- context
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def __init__(self, message=None, errorCode=None, context=None,):
|
||||||
|
super(PBUserException, self).__setattr__('message', message)
|
||||||
|
super(PBUserException, self).__setattr__('errorCode', errorCode)
|
||||||
|
super(PBUserException, self).__setattr__('context', context)
|
||||||
|
|
||||||
|
def __setattr__(self, *args):
|
||||||
|
raise TypeError("can't modify immutable instance")
|
||||||
|
|
||||||
|
def __delattr__(self, *args):
|
||||||
|
raise TypeError("can't modify immutable instance")
|
||||||
|
|
||||||
|
def __hash__(self):
|
||||||
|
return hash(self.__class__) ^ hash((self.message, self.errorCode, self.context, ))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def read(cls, iprot):
|
||||||
|
if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and cls.thrift_spec is not None:
|
||||||
|
return iprot._fast_decode(None, iprot, [cls, cls.thrift_spec])
|
||||||
|
iprot.readStructBegin()
|
||||||
|
message = None
|
||||||
|
errorCode = None
|
||||||
|
context = None
|
||||||
|
while True:
|
||||||
|
(fname, ftype, fid) = iprot.readFieldBegin()
|
||||||
|
if ftype == TType.STOP:
|
||||||
|
break
|
||||||
|
if fid == 1:
|
||||||
|
if ftype == TType.STRING:
|
||||||
|
message = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString()
|
||||||
|
else:
|
||||||
|
iprot.skip(ftype)
|
||||||
|
elif fid == 2:
|
||||||
|
if ftype == TType.STRING:
|
||||||
|
errorCode = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString()
|
||||||
|
else:
|
||||||
|
iprot.skip(ftype)
|
||||||
|
elif fid == 3:
|
||||||
|
if ftype == TType.MAP:
|
||||||
|
context = {}
|
||||||
|
(_ktype10, _vtype11, _size9) = iprot.readMapBegin()
|
||||||
|
for _i13 in range(_size9):
|
||||||
|
_key14 = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString()
|
||||||
|
_val15 = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString()
|
||||||
|
context[_key14] = _val15
|
||||||
|
iprot.readMapEnd()
|
||||||
|
else:
|
||||||
|
iprot.skip(ftype)
|
||||||
|
else:
|
||||||
|
iprot.skip(ftype)
|
||||||
|
iprot.readFieldEnd()
|
||||||
|
iprot.readStructEnd()
|
||||||
|
return cls(
|
||||||
|
message=message,
|
||||||
|
errorCode=errorCode,
|
||||||
|
context=context,
|
||||||
|
)
|
||||||
|
|
||||||
|
def write(self, oprot):
|
||||||
|
if oprot._fast_encode is not None and self.thrift_spec is not None:
|
||||||
|
oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec]))
|
||||||
|
return
|
||||||
|
oprot.writeStructBegin('PBUserException')
|
||||||
|
if self.message is not None:
|
||||||
|
oprot.writeFieldBegin('message', TType.STRING, 1)
|
||||||
|
oprot.writeString(self.message.encode('utf-8') if sys.version_info[0] == 2 else self.message)
|
||||||
|
oprot.writeFieldEnd()
|
||||||
|
if self.errorCode is not None:
|
||||||
|
oprot.writeFieldBegin('errorCode', TType.STRING, 2)
|
||||||
|
oprot.writeString(self.errorCode.encode('utf-8') if sys.version_info[0] == 2 else self.errorCode)
|
||||||
|
oprot.writeFieldEnd()
|
||||||
|
if self.context is not None:
|
||||||
|
oprot.writeFieldBegin('context', TType.MAP, 3)
|
||||||
|
oprot.writeMapBegin(TType.STRING, TType.STRING, len(self.context))
|
||||||
|
for kiter16, viter17 in self.context.items():
|
||||||
|
oprot.writeString(kiter16.encode('utf-8') if sys.version_info[0] == 2 else kiter16)
|
||||||
|
oprot.writeString(viter17.encode('utf-8') if sys.version_info[0] == 2 else viter17)
|
||||||
|
oprot.writeMapEnd()
|
||||||
|
oprot.writeFieldEnd()
|
||||||
|
oprot.writeFieldStop()
|
||||||
|
oprot.writeStructEnd()
|
||||||
|
|
||||||
|
def validate(self):
|
||||||
|
if self.message is None:
|
||||||
|
raise TProtocolException(message='Required field message is unset!')
|
||||||
|
return
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return repr(self)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
L = ['%s=%r' % (key, value)
|
||||||
|
for key, value in self.__dict__.items()]
|
||||||
|
return '%s(%s)' % (self.__class__.__name__, ', '.join(L))
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
return isinstance(other, self.__class__) and self.__dict__ == other.__dict__
|
||||||
|
|
||||||
|
def __ne__(self, other):
|
||||||
|
return not (self == other)
|
||||||
|
all_structs.append(PBServiceException)
|
||||||
|
PBServiceException.thrift_spec = (
|
||||||
|
None, # 0
|
||||||
|
(1, TType.STRING, 'message', 'UTF8', None, ), # 1
|
||||||
|
(2, TType.STRING, 'errorCode', 'UTF8', None, ), # 2
|
||||||
|
(3, TType.MAP, 'context', (TType.STRING, 'UTF8', TType.STRING, 'UTF8', False), None, ), # 3
|
||||||
|
)
|
||||||
|
all_structs.append(PBUserException)
|
||||||
|
PBUserException.thrift_spec = (
|
||||||
|
None, # 0
|
||||||
|
(1, TType.STRING, 'message', 'UTF8', None, ), # 1
|
||||||
|
(2, TType.STRING, 'errorCode', 'UTF8', None, ), # 2
|
||||||
|
(3, TType.MAP, 'context', (TType.STRING, 'UTF8', TType.STRING, 'UTF8', False), None, ), # 3
|
||||||
|
)
|
||||||
|
fix_spec(all_structs)
|
||||||
|
del all_structs
|
||||||
215
pangramia/yt/management/YTManagementService-remote
Executable file
215
pangramia/yt/management/YTManagementService-remote
Executable file
@ -0,0 +1,215 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
#
|
||||||
|
# Autogenerated by Thrift Compiler (0.20.0)
|
||||||
|
#
|
||||||
|
# DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
|
||||||
|
#
|
||||||
|
# options string: py
|
||||||
|
#
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import pprint
|
||||||
|
if sys.version_info[0] > 2:
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
else:
|
||||||
|
from urlparse import urlparse
|
||||||
|
from thrift.transport import TTransport, TSocket, TSSLSocket, THttpClient
|
||||||
|
from thrift.protocol.TBinaryProtocol import TBinaryProtocol
|
||||||
|
|
||||||
|
from pangramia.yt.management import YTManagementService
|
||||||
|
from pangramia.yt.management.ttypes import *
|
||||||
|
|
||||||
|
if len(sys.argv) <= 1 or sys.argv[1] == '--help':
|
||||||
|
print('')
|
||||||
|
print('Usage: ' + sys.argv[0] + ' [-h host[:port]] [-u url] [-f[ramed]] [-s[sl]] [-novalidate] [-ca_certs certs] [-keyfile keyfile] [-certfile certfile] function [arg1 [arg2...]]')
|
||||||
|
print('')
|
||||||
|
print('Functions:')
|
||||||
|
print(' getProxyStatus(string serverIdentity)')
|
||||||
|
print(' bool banProxy(string proxyUrl, string serverIdentity)')
|
||||||
|
print(' bool unbanProxy(string proxyUrl, string serverIdentity)')
|
||||||
|
print(' bool resetAllProxyStatuses(string serverIdentity)')
|
||||||
|
print(' bool banAllProxies(string serverIdentity)')
|
||||||
|
print(' bool deleteProxyFromRedis(string proxyUrl, string serverIdentity)')
|
||||||
|
print(' i32 deleteAllProxiesFromRedis(string serverIdentity)')
|
||||||
|
print(' getAccountStatus(string accountId, string accountPrefix)')
|
||||||
|
print(' bool banAccount(string accountId, string reason)')
|
||||||
|
print(' bool unbanAccount(string accountId, string reason)')
|
||||||
|
print(' bool deleteAccountFromRedis(string accountId)')
|
||||||
|
print(' i32 deleteAllAccountsFromRedis(string accountPrefix)')
|
||||||
|
print(' bool ping()')
|
||||||
|
print(' bool reportError(string message, details)')
|
||||||
|
print(' void shutdown()')
|
||||||
|
print('')
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
pp = pprint.PrettyPrinter(indent=2)
|
||||||
|
host = 'localhost'
|
||||||
|
port = 9090
|
||||||
|
uri = ''
|
||||||
|
framed = False
|
||||||
|
ssl = False
|
||||||
|
validate = True
|
||||||
|
ca_certs = None
|
||||||
|
keyfile = None
|
||||||
|
certfile = None
|
||||||
|
http = False
|
||||||
|
argi = 1
|
||||||
|
|
||||||
|
if sys.argv[argi] == '-h':
|
||||||
|
parts = sys.argv[argi + 1].split(':')
|
||||||
|
host = parts[0]
|
||||||
|
if len(parts) > 1:
|
||||||
|
port = int(parts[1])
|
||||||
|
argi += 2
|
||||||
|
|
||||||
|
if sys.argv[argi] == '-u':
|
||||||
|
url = urlparse(sys.argv[argi + 1])
|
||||||
|
parts = url[1].split(':')
|
||||||
|
host = parts[0]
|
||||||
|
if len(parts) > 1:
|
||||||
|
port = int(parts[1])
|
||||||
|
else:
|
||||||
|
port = 80
|
||||||
|
uri = url[2]
|
||||||
|
if url[4]:
|
||||||
|
uri += '?%s' % url[4]
|
||||||
|
http = True
|
||||||
|
argi += 2
|
||||||
|
|
||||||
|
if sys.argv[argi] == '-f' or sys.argv[argi] == '-framed':
|
||||||
|
framed = True
|
||||||
|
argi += 1
|
||||||
|
|
||||||
|
if sys.argv[argi] == '-s' or sys.argv[argi] == '-ssl':
|
||||||
|
ssl = True
|
||||||
|
argi += 1
|
||||||
|
|
||||||
|
if sys.argv[argi] == '-novalidate':
|
||||||
|
validate = False
|
||||||
|
argi += 1
|
||||||
|
|
||||||
|
if sys.argv[argi] == '-ca_certs':
|
||||||
|
ca_certs = sys.argv[argi+1]
|
||||||
|
argi += 2
|
||||||
|
|
||||||
|
if sys.argv[argi] == '-keyfile':
|
||||||
|
keyfile = sys.argv[argi+1]
|
||||||
|
argi += 2
|
||||||
|
|
||||||
|
if sys.argv[argi] == '-certfile':
|
||||||
|
certfile = sys.argv[argi+1]
|
||||||
|
argi += 2
|
||||||
|
|
||||||
|
cmd = sys.argv[argi]
|
||||||
|
args = sys.argv[argi + 1:]
|
||||||
|
|
||||||
|
if http:
|
||||||
|
transport = THttpClient.THttpClient(host, port, uri)
|
||||||
|
else:
|
||||||
|
if ssl:
|
||||||
|
socket = TSSLSocket.TSSLSocket(host, port, validate=validate, ca_certs=ca_certs, keyfile=keyfile, certfile=certfile)
|
||||||
|
else:
|
||||||
|
socket = TSocket.TSocket(host, port)
|
||||||
|
if framed:
|
||||||
|
transport = TTransport.TFramedTransport(socket)
|
||||||
|
else:
|
||||||
|
transport = TTransport.TBufferedTransport(socket)
|
||||||
|
protocol = TBinaryProtocol(transport)
|
||||||
|
client = YTManagementService.Client(protocol)
|
||||||
|
transport.open()
|
||||||
|
|
||||||
|
if cmd == 'getProxyStatus':
|
||||||
|
if len(args) != 1:
|
||||||
|
print('getProxyStatus requires 1 args')
|
||||||
|
sys.exit(1)
|
||||||
|
pp.pprint(client.getProxyStatus(args[0],))
|
||||||
|
|
||||||
|
elif cmd == 'banProxy':
|
||||||
|
if len(args) != 2:
|
||||||
|
print('banProxy requires 2 args')
|
||||||
|
sys.exit(1)
|
||||||
|
pp.pprint(client.banProxy(args[0], args[1],))
|
||||||
|
|
||||||
|
elif cmd == 'unbanProxy':
|
||||||
|
if len(args) != 2:
|
||||||
|
print('unbanProxy requires 2 args')
|
||||||
|
sys.exit(1)
|
||||||
|
pp.pprint(client.unbanProxy(args[0], args[1],))
|
||||||
|
|
||||||
|
elif cmd == 'resetAllProxyStatuses':
|
||||||
|
if len(args) != 1:
|
||||||
|
print('resetAllProxyStatuses requires 1 args')
|
||||||
|
sys.exit(1)
|
||||||
|
pp.pprint(client.resetAllProxyStatuses(args[0],))
|
||||||
|
|
||||||
|
elif cmd == 'banAllProxies':
|
||||||
|
if len(args) != 1:
|
||||||
|
print('banAllProxies requires 1 args')
|
||||||
|
sys.exit(1)
|
||||||
|
pp.pprint(client.banAllProxies(args[0],))
|
||||||
|
|
||||||
|
elif cmd == 'deleteProxyFromRedis':
|
||||||
|
if len(args) != 2:
|
||||||
|
print('deleteProxyFromRedis requires 2 args')
|
||||||
|
sys.exit(1)
|
||||||
|
pp.pprint(client.deleteProxyFromRedis(args[0], args[1],))
|
||||||
|
|
||||||
|
elif cmd == 'deleteAllProxiesFromRedis':
|
||||||
|
if len(args) != 1:
|
||||||
|
print('deleteAllProxiesFromRedis requires 1 args')
|
||||||
|
sys.exit(1)
|
||||||
|
pp.pprint(client.deleteAllProxiesFromRedis(args[0],))
|
||||||
|
|
||||||
|
elif cmd == 'getAccountStatus':
|
||||||
|
if len(args) != 2:
|
||||||
|
print('getAccountStatus requires 2 args')
|
||||||
|
sys.exit(1)
|
||||||
|
pp.pprint(client.getAccountStatus(args[0], args[1],))
|
||||||
|
|
||||||
|
elif cmd == 'banAccount':
|
||||||
|
if len(args) != 2:
|
||||||
|
print('banAccount requires 2 args')
|
||||||
|
sys.exit(1)
|
||||||
|
pp.pprint(client.banAccount(args[0], args[1],))
|
||||||
|
|
||||||
|
elif cmd == 'unbanAccount':
|
||||||
|
if len(args) != 2:
|
||||||
|
print('unbanAccount requires 2 args')
|
||||||
|
sys.exit(1)
|
||||||
|
pp.pprint(client.unbanAccount(args[0], args[1],))
|
||||||
|
|
||||||
|
elif cmd == 'deleteAccountFromRedis':
|
||||||
|
if len(args) != 1:
|
||||||
|
print('deleteAccountFromRedis requires 1 args')
|
||||||
|
sys.exit(1)
|
||||||
|
pp.pprint(client.deleteAccountFromRedis(args[0],))
|
||||||
|
|
||||||
|
elif cmd == 'deleteAllAccountsFromRedis':
|
||||||
|
if len(args) != 1:
|
||||||
|
print('deleteAllAccountsFromRedis requires 1 args')
|
||||||
|
sys.exit(1)
|
||||||
|
pp.pprint(client.deleteAllAccountsFromRedis(args[0],))
|
||||||
|
|
||||||
|
elif cmd == 'ping':
|
||||||
|
if len(args) != 0:
|
||||||
|
print('ping requires 0 args')
|
||||||
|
sys.exit(1)
|
||||||
|
pp.pprint(client.ping())
|
||||||
|
|
||||||
|
elif cmd == 'reportError':
|
||||||
|
if len(args) != 2:
|
||||||
|
print('reportError requires 2 args')
|
||||||
|
sys.exit(1)
|
||||||
|
pp.pprint(client.reportError(args[0], eval(args[1]),))
|
||||||
|
|
||||||
|
elif cmd == 'shutdown':
|
||||||
|
if len(args) != 0:
|
||||||
|
print('shutdown requires 0 args')
|
||||||
|
sys.exit(1)
|
||||||
|
pp.pprint(client.shutdown())
|
||||||
|
|
||||||
|
else:
|
||||||
|
print('Unrecognized method %s' % cmd)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
transport.close()
|
||||||
2816
pangramia/yt/management/YTManagementService.py
Normal file
2816
pangramia/yt/management/YTManagementService.py
Normal file
File diff suppressed because it is too large
Load Diff
1
pangramia/yt/management/__init__.py
Normal file
1
pangramia/yt/management/__init__.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
__all__ = ['ttypes', 'constants', 'YTManagementService']
|
||||||
14
pangramia/yt/management/constants.py
Normal file
14
pangramia/yt/management/constants.py
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
#
|
||||||
|
# Autogenerated by Thrift Compiler (0.20.0)
|
||||||
|
#
|
||||||
|
# DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
|
||||||
|
#
|
||||||
|
# options string: py
|
||||||
|
#
|
||||||
|
|
||||||
|
from thrift.Thrift import TType, TMessageType, TFrozenDict, TException, TApplicationException
|
||||||
|
from thrift.protocol.TProtocol import TProtocolException
|
||||||
|
from thrift.TRecursive import fix_spec
|
||||||
|
|
||||||
|
import sys
|
||||||
|
from .ttypes import *
|
||||||
21
pangramia/yt/management/ttypes.py
Normal file
21
pangramia/yt/management/ttypes.py
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
#
|
||||||
|
# Autogenerated by Thrift Compiler (0.20.0)
|
||||||
|
#
|
||||||
|
# DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
|
||||||
|
#
|
||||||
|
# options string: py
|
||||||
|
#
|
||||||
|
|
||||||
|
from thrift.Thrift import TType, TMessageType, TFrozenDict, TException, TApplicationException
|
||||||
|
from thrift.protocol.TProtocol import TProtocolException
|
||||||
|
from thrift.TRecursive import fix_spec
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import pangramia.yt.common.ttypes
|
||||||
|
import pangramia.yt.exceptions.ttypes
|
||||||
|
import pangramia.base_service.ttypes
|
||||||
|
|
||||||
|
from thrift.transport import TTransport
|
||||||
|
all_structs = []
|
||||||
|
fix_spec(all_structs)
|
||||||
|
del all_structs
|
||||||
257
pangramia/yt/tokens_ops/YTTokenOpService-remote
Executable file
257
pangramia/yt/tokens_ops/YTTokenOpService-remote
Executable file
@ -0,0 +1,257 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
#
|
||||||
|
# Autogenerated by Thrift Compiler (0.20.0)
|
||||||
|
#
|
||||||
|
# DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
|
||||||
|
#
|
||||||
|
# options string: py
|
||||||
|
#
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import pprint
|
||||||
|
if sys.version_info[0] > 2:
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
else:
|
||||||
|
from urlparse import urlparse
|
||||||
|
from thrift.transport import TTransport, TSocket, TSSLSocket, THttpClient
|
||||||
|
from thrift.protocol.TBinaryProtocol import TBinaryProtocol
|
||||||
|
|
||||||
|
from pangramia.yt.tokens_ops import YTTokenOpService
|
||||||
|
from pangramia.yt.tokens_ops.ttypes import *
|
||||||
|
|
||||||
|
if len(sys.argv) <= 1 or sys.argv[1] == '--help':
|
||||||
|
print('')
|
||||||
|
print('Usage: ' + sys.argv[0] + ' [-h host[:port]] [-u url] [-f[ramed]] [-s[sl]] [-novalidate] [-ca_certs certs] [-keyfile keyfile] [-certfile certfile] function [arg1 [arg2...]]')
|
||||||
|
print('')
|
||||||
|
print('Functions:')
|
||||||
|
print(' JobTokenData getOrRefreshTokenWithReport(string accountId, string oldUrl, JobState status, string details, string jobId, TokenUpdateMode updateType, string url, string clients, AirflowLogContext airflowLogContext, string requestParamsJson)')
|
||||||
|
print(' JobTokenData getOrRefreshToken(string accountId, TokenUpdateMode updateType, string url, string clients, string machineId, AirflowLogContext airflowLogContext, string requestParamsJson, string assignedProxyUrl)')
|
||||||
|
print(' JobTokenData getLatestToken(string accountId)')
|
||||||
|
print(' JobTokenData refreshToken(string accountId, TokenUpdateMode updateType, string url)')
|
||||||
|
print(' bool reportState(string url, JobState status, string details, string jobId)')
|
||||||
|
print(' JobTokenData getInfoJsonDirect(string url, string clients)')
|
||||||
|
print(' getProxyStatus(string serverIdentity)')
|
||||||
|
print(' bool banProxy(string proxyUrl, string serverIdentity)')
|
||||||
|
print(' bool unbanProxy(string proxyUrl, string serverIdentity)')
|
||||||
|
print(' bool resetAllProxyStatuses(string serverIdentity)')
|
||||||
|
print(' bool banAllProxies(string serverIdentity)')
|
||||||
|
print(' bool deleteProxyFromRedis(string proxyUrl, string serverIdentity)')
|
||||||
|
print(' i32 deleteAllProxiesFromRedis(string serverIdentity)')
|
||||||
|
print(' getAccountStatus(string accountId, string accountPrefix)')
|
||||||
|
print(' bool banAccount(string accountId, string reason)')
|
||||||
|
print(' bool unbanAccount(string accountId, string reason)')
|
||||||
|
print(' bool deleteAccountFromRedis(string accountId)')
|
||||||
|
print(' i32 deleteAllAccountsFromRedis(string accountPrefix)')
|
||||||
|
print(' bool ping()')
|
||||||
|
print(' bool reportError(string message, details)')
|
||||||
|
print(' void shutdown()')
|
||||||
|
print('')
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
pp = pprint.PrettyPrinter(indent=2)
|
||||||
|
host = 'localhost'
|
||||||
|
port = 9090
|
||||||
|
uri = ''
|
||||||
|
framed = False
|
||||||
|
ssl = False
|
||||||
|
validate = True
|
||||||
|
ca_certs = None
|
||||||
|
keyfile = None
|
||||||
|
certfile = None
|
||||||
|
http = False
|
||||||
|
argi = 1
|
||||||
|
|
||||||
|
if sys.argv[argi] == '-h':
|
||||||
|
parts = sys.argv[argi + 1].split(':')
|
||||||
|
host = parts[0]
|
||||||
|
if len(parts) > 1:
|
||||||
|
port = int(parts[1])
|
||||||
|
argi += 2
|
||||||
|
|
||||||
|
if sys.argv[argi] == '-u':
|
||||||
|
url = urlparse(sys.argv[argi + 1])
|
||||||
|
parts = url[1].split(':')
|
||||||
|
host = parts[0]
|
||||||
|
if len(parts) > 1:
|
||||||
|
port = int(parts[1])
|
||||||
|
else:
|
||||||
|
port = 80
|
||||||
|
uri = url[2]
|
||||||
|
if url[4]:
|
||||||
|
uri += '?%s' % url[4]
|
||||||
|
http = True
|
||||||
|
argi += 2
|
||||||
|
|
||||||
|
if sys.argv[argi] == '-f' or sys.argv[argi] == '-framed':
|
||||||
|
framed = True
|
||||||
|
argi += 1
|
||||||
|
|
||||||
|
if sys.argv[argi] == '-s' or sys.argv[argi] == '-ssl':
|
||||||
|
ssl = True
|
||||||
|
argi += 1
|
||||||
|
|
||||||
|
if sys.argv[argi] == '-novalidate':
|
||||||
|
validate = False
|
||||||
|
argi += 1
|
||||||
|
|
||||||
|
if sys.argv[argi] == '-ca_certs':
|
||||||
|
ca_certs = sys.argv[argi+1]
|
||||||
|
argi += 2
|
||||||
|
|
||||||
|
if sys.argv[argi] == '-keyfile':
|
||||||
|
keyfile = sys.argv[argi+1]
|
||||||
|
argi += 2
|
||||||
|
|
||||||
|
if sys.argv[argi] == '-certfile':
|
||||||
|
certfile = sys.argv[argi+1]
|
||||||
|
argi += 2
|
||||||
|
|
||||||
|
cmd = sys.argv[argi]
|
||||||
|
args = sys.argv[argi + 1:]
|
||||||
|
|
||||||
|
if http:
|
||||||
|
transport = THttpClient.THttpClient(host, port, uri)
|
||||||
|
else:
|
||||||
|
if ssl:
|
||||||
|
socket = TSSLSocket.TSSLSocket(host, port, validate=validate, ca_certs=ca_certs, keyfile=keyfile, certfile=certfile)
|
||||||
|
else:
|
||||||
|
socket = TSocket.TSocket(host, port)
|
||||||
|
if framed:
|
||||||
|
transport = TTransport.TFramedTransport(socket)
|
||||||
|
else:
|
||||||
|
transport = TTransport.TBufferedTransport(socket)
|
||||||
|
protocol = TBinaryProtocol(transport)
|
||||||
|
client = YTTokenOpService.Client(protocol)
|
||||||
|
transport.open()
|
||||||
|
|
||||||
|
if cmd == 'getOrRefreshTokenWithReport':
|
||||||
|
if len(args) != 10:
|
||||||
|
print('getOrRefreshTokenWithReport requires 10 args')
|
||||||
|
sys.exit(1)
|
||||||
|
pp.pprint(client.getOrRefreshTokenWithReport(args[0], args[1], eval(args[2]), args[3], args[4], eval(args[5]), args[6], args[7], eval(args[8]), args[9],))
|
||||||
|
|
||||||
|
elif cmd == 'getOrRefreshToken':
|
||||||
|
if len(args) != 8:
|
||||||
|
print('getOrRefreshToken requires 8 args')
|
||||||
|
sys.exit(1)
|
||||||
|
pp.pprint(client.getOrRefreshToken(args[0], eval(args[1]), args[2], args[3], args[4], eval(args[5]), args[6], args[7],))
|
||||||
|
|
||||||
|
elif cmd == 'getLatestToken':
|
||||||
|
if len(args) != 1:
|
||||||
|
print('getLatestToken requires 1 args')
|
||||||
|
sys.exit(1)
|
||||||
|
pp.pprint(client.getLatestToken(args[0],))
|
||||||
|
|
||||||
|
elif cmd == 'refreshToken':
|
||||||
|
if len(args) != 3:
|
||||||
|
print('refreshToken requires 3 args')
|
||||||
|
sys.exit(1)
|
||||||
|
pp.pprint(client.refreshToken(args[0], eval(args[1]), args[2],))
|
||||||
|
|
||||||
|
elif cmd == 'reportState':
|
||||||
|
if len(args) != 4:
|
||||||
|
print('reportState requires 4 args')
|
||||||
|
sys.exit(1)
|
||||||
|
pp.pprint(client.reportState(args[0], eval(args[1]), args[2], args[3],))
|
||||||
|
|
||||||
|
elif cmd == 'getInfoJsonDirect':
|
||||||
|
if len(args) != 2:
|
||||||
|
print('getInfoJsonDirect requires 2 args')
|
||||||
|
sys.exit(1)
|
||||||
|
pp.pprint(client.getInfoJsonDirect(args[0], args[1],))
|
||||||
|
|
||||||
|
elif cmd == 'getProxyStatus':
|
||||||
|
if len(args) != 1:
|
||||||
|
print('getProxyStatus requires 1 args')
|
||||||
|
sys.exit(1)
|
||||||
|
pp.pprint(client.getProxyStatus(args[0],))
|
||||||
|
|
||||||
|
elif cmd == 'banProxy':
|
||||||
|
if len(args) != 2:
|
||||||
|
print('banProxy requires 2 args')
|
||||||
|
sys.exit(1)
|
||||||
|
pp.pprint(client.banProxy(args[0], args[1],))
|
||||||
|
|
||||||
|
elif cmd == 'unbanProxy':
|
||||||
|
if len(args) != 2:
|
||||||
|
print('unbanProxy requires 2 args')
|
||||||
|
sys.exit(1)
|
||||||
|
pp.pprint(client.unbanProxy(args[0], args[1],))
|
||||||
|
|
||||||
|
elif cmd == 'resetAllProxyStatuses':
|
||||||
|
if len(args) != 1:
|
||||||
|
print('resetAllProxyStatuses requires 1 args')
|
||||||
|
sys.exit(1)
|
||||||
|
pp.pprint(client.resetAllProxyStatuses(args[0],))
|
||||||
|
|
||||||
|
elif cmd == 'banAllProxies':
|
||||||
|
if len(args) != 1:
|
||||||
|
print('banAllProxies requires 1 args')
|
||||||
|
sys.exit(1)
|
||||||
|
pp.pprint(client.banAllProxies(args[0],))
|
||||||
|
|
||||||
|
elif cmd == 'deleteProxyFromRedis':
|
||||||
|
if len(args) != 2:
|
||||||
|
print('deleteProxyFromRedis requires 2 args')
|
||||||
|
sys.exit(1)
|
||||||
|
pp.pprint(client.deleteProxyFromRedis(args[0], args[1],))
|
||||||
|
|
||||||
|
elif cmd == 'deleteAllProxiesFromRedis':
|
||||||
|
if len(args) != 1:
|
||||||
|
print('deleteAllProxiesFromRedis requires 1 args')
|
||||||
|
sys.exit(1)
|
||||||
|
pp.pprint(client.deleteAllProxiesFromRedis(args[0],))
|
||||||
|
|
||||||
|
elif cmd == 'getAccountStatus':
|
||||||
|
if len(args) != 2:
|
||||||
|
print('getAccountStatus requires 2 args')
|
||||||
|
sys.exit(1)
|
||||||
|
pp.pprint(client.getAccountStatus(args[0], args[1],))
|
||||||
|
|
||||||
|
elif cmd == 'banAccount':
|
||||||
|
if len(args) != 2:
|
||||||
|
print('banAccount requires 2 args')
|
||||||
|
sys.exit(1)
|
||||||
|
pp.pprint(client.banAccount(args[0], args[1],))
|
||||||
|
|
||||||
|
elif cmd == 'unbanAccount':
|
||||||
|
if len(args) != 2:
|
||||||
|
print('unbanAccount requires 2 args')
|
||||||
|
sys.exit(1)
|
||||||
|
pp.pprint(client.unbanAccount(args[0], args[1],))
|
||||||
|
|
||||||
|
elif cmd == 'deleteAccountFromRedis':
|
||||||
|
if len(args) != 1:
|
||||||
|
print('deleteAccountFromRedis requires 1 args')
|
||||||
|
sys.exit(1)
|
||||||
|
pp.pprint(client.deleteAccountFromRedis(args[0],))
|
||||||
|
|
||||||
|
elif cmd == 'deleteAllAccountsFromRedis':
|
||||||
|
if len(args) != 1:
|
||||||
|
print('deleteAllAccountsFromRedis requires 1 args')
|
||||||
|
sys.exit(1)
|
||||||
|
pp.pprint(client.deleteAllAccountsFromRedis(args[0],))
|
||||||
|
|
||||||
|
elif cmd == 'ping':
|
||||||
|
if len(args) != 0:
|
||||||
|
print('ping requires 0 args')
|
||||||
|
sys.exit(1)
|
||||||
|
pp.pprint(client.ping())
|
||||||
|
|
||||||
|
elif cmd == 'reportError':
|
||||||
|
if len(args) != 2:
|
||||||
|
print('reportError requires 2 args')
|
||||||
|
sys.exit(1)
|
||||||
|
pp.pprint(client.reportError(args[0], eval(args[1]),))
|
||||||
|
|
||||||
|
elif cmd == 'shutdown':
|
||||||
|
if len(args) != 0:
|
||||||
|
print('shutdown requires 0 args')
|
||||||
|
sys.exit(1)
|
||||||
|
pp.pprint(client.shutdown())
|
||||||
|
|
||||||
|
else:
|
||||||
|
print('Unrecognized method %s' % cmd)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
transport.close()
|
||||||
1719
pangramia/yt/tokens_ops/YTTokenOpService.py
Normal file
1719
pangramia/yt/tokens_ops/YTTokenOpService.py
Normal file
File diff suppressed because it is too large
Load Diff
1
pangramia/yt/tokens_ops/__init__.py
Normal file
1
pangramia/yt/tokens_ops/__init__.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
__all__ = ['ttypes', 'constants', 'YTTokenOpService']
|
||||||
14
pangramia/yt/tokens_ops/constants.py
Normal file
14
pangramia/yt/tokens_ops/constants.py
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
#
|
||||||
|
# Autogenerated by Thrift Compiler (0.20.0)
|
||||||
|
#
|
||||||
|
# DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
|
||||||
|
#
|
||||||
|
# options string: py
|
||||||
|
#
|
||||||
|
|
||||||
|
from thrift.Thrift import TType, TMessageType, TFrozenDict, TException, TApplicationException
|
||||||
|
from thrift.protocol.TProtocol import TProtocolException
|
||||||
|
from thrift.TRecursive import fix_spec
|
||||||
|
|
||||||
|
import sys
|
||||||
|
from .ttypes import *
|
||||||
21
pangramia/yt/tokens_ops/ttypes.py
Normal file
21
pangramia/yt/tokens_ops/ttypes.py
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
#
|
||||||
|
# Autogenerated by Thrift Compiler (0.20.0)
|
||||||
|
#
|
||||||
|
# DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
|
||||||
|
#
|
||||||
|
# options string: py
|
||||||
|
#
|
||||||
|
|
||||||
|
from thrift.Thrift import TType, TMessageType, TFrozenDict, TException, TApplicationException
|
||||||
|
from thrift.protocol.TProtocol import TProtocolException
|
||||||
|
from thrift.TRecursive import fix_spec
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import pangramia.yt.common.ttypes
|
||||||
|
import pangramia.yt.exceptions.ttypes
|
||||||
|
import pangramia.yt.management.ttypes
|
||||||
|
|
||||||
|
from thrift.transport import TTransport
|
||||||
|
all_structs = []
|
||||||
|
fix_spec(all_structs)
|
||||||
|
del all_structs
|
||||||
0
playbooks/playbook-bgutils-start.yml
Normal file
0
playbooks/playbook-bgutils-start.yml
Normal file
0
playbooks/playbook-bgutils-stop.yml
Normal file
0
playbooks/playbook-bgutils-stop.yml
Normal file
155
policies/1_fetch_only_policies.yaml
Normal file
155
policies/1_fetch_only_policies.yaml
Normal file
@ -0,0 +1,155 @@
|
|||||||
|
# This file contains policies for testing only the info.json generation step.
|
||||||
|
# No downloads are performed.
|
||||||
|
|
||||||
|
---
|
||||||
|
# Policy: Basic fetch-only test for a TV client.
|
||||||
|
# This policy uses a single, static profile and has a rate limit to avoid being
|
||||||
|
# too aggressive. It saves the generated info.json files to a directory.
|
||||||
|
name: tv_downgraded_single_profile
|
||||||
|
|
||||||
|
settings:
|
||||||
|
mode: fetch_only
|
||||||
|
urls_file: "urls.txt"
|
||||||
|
info_json_script: "bin/ytops-client get-info"
|
||||||
|
save_info_json_dir: "fetched_info_jsons/tv_downgraded"
|
||||||
|
# Use a single, static profile for all requests.
|
||||||
|
profile_prefix: "tv_downgraded_user"
|
||||||
|
profile_mode: per_worker # With 1 worker, this is effectively a single profile.
|
||||||
|
|
||||||
|
execution_control:
|
||||||
|
run_until: { cycles: 1 }
|
||||||
|
workers: 1
|
||||||
|
sleep_between_tasks: { min_seconds: 5, max_seconds: 10 }
|
||||||
|
|
||||||
|
info_json_generation_policy:
|
||||||
|
client: tv_downgraded
|
||||||
|
# Safety rate limit: 450 requests per hour (7.5 req/min)
|
||||||
|
rate_limits:
|
||||||
|
per_ip: { max_requests: 450, per_minutes: 60 }
|
||||||
|
|
||||||
|
---
|
||||||
|
# Policy: Fetch-only test for an Android client using a cookie file.
|
||||||
|
# This demonstrates how to pass a cookie file for authenticated requests.
|
||||||
|
# It uses a single profile and stops if it encounters too many errors.
|
||||||
|
name: android_sdkless_with_cookies
|
||||||
|
|
||||||
|
settings:
|
||||||
|
mode: fetch_only
|
||||||
|
urls_file: "urls.txt"
|
||||||
|
info_json_script: "bin/ytops-client get-info"
|
||||||
|
save_info_json_dir: "fetched_info_jsons/android_sdkless"
|
||||||
|
profile_prefix: "android_user_with_cookies"
|
||||||
|
profile_mode: per_worker
|
||||||
|
|
||||||
|
execution_control:
|
||||||
|
run_until: { cycles: 1 } # Run through the URL list once.
|
||||||
|
workers: 1
|
||||||
|
sleep_between_tasks: { min_seconds: 2, max_seconds: 4 }
|
||||||
|
|
||||||
|
info_json_generation_policy:
|
||||||
|
client: android_sdkless
|
||||||
|
# Pass per-request parameters. This is how you specify a cookie file.
|
||||||
|
request_params:
|
||||||
|
cookies_file_path: "/path/to/your/android_cookies.txt"
|
||||||
|
|
||||||
|
stop_conditions:
|
||||||
|
# Stop if we get more than 5 errors in any 10-minute window.
|
||||||
|
on_error_rate: { max_errors: 5, per_minutes: 10 }
|
||||||
|
|
||||||
|
---
|
||||||
|
# Policy: TV Fetch with Profile Cooldown (Pipeline Stage 1)
|
||||||
|
# Fetches info.json files using the 'tv' client. Each profile is limited
|
||||||
|
# to a certain number of requests before it is put into a cooldown period.
|
||||||
|
# The output of this policy is intended to be used by a 'download_only' policy.
|
||||||
|
name: tv_fetch_with_cooldown
|
||||||
|
|
||||||
|
settings:
|
||||||
|
mode: fetch_only
|
||||||
|
urls_file: "urls.txt"
|
||||||
|
info_json_script: "bin/ytops-client get-info"
|
||||||
|
# Save the generated files to this directory for the download task to find.
|
||||||
|
save_info_json_dir: "live_jsons_tv"
|
||||||
|
profile_management:
|
||||||
|
prefix: "tv_user"
|
||||||
|
initial_pool_size: 10
|
||||||
|
auto_expand_pool: true
|
||||||
|
max_requests_per_profile: 60
|
||||||
|
sleep_minutes_on_exhaustion: 60
|
||||||
|
|
||||||
|
execution_control:
|
||||||
|
run_until: { cycles: 1 }
|
||||||
|
workers: 1
|
||||||
|
sleep_between_tasks: { min_seconds: 2, max_seconds: 5 }
|
||||||
|
|
||||||
|
info_json_generation_policy:
|
||||||
|
client: "tv"
|
||||||
|
request_params:
|
||||||
|
context_reuse_policy: { enabled: true, max_age_seconds: 86400 }
|
||||||
|
|
||||||
|
---
|
||||||
|
# Policy: MWeb with client rotation and rate limits.
|
||||||
|
# This demonstrates a more complex scenario with multiple clients and strict
|
||||||
|
# rate limiting, useful for simulating sophisticated user behavior.
|
||||||
|
name: mweb_client_rotation_and_rate_limits
|
||||||
|
|
||||||
|
settings:
|
||||||
|
mode: fetch_only
|
||||||
|
urls_file: "urls.txt"
|
||||||
|
info_json_script: "bin/ytops-client get-info"
|
||||||
|
# Use the dynamic profile pool management system.
|
||||||
|
profile_management:
|
||||||
|
prefix: "mweb_user"
|
||||||
|
initial_pool_size: 10
|
||||||
|
max_requests_per_profile: 100
|
||||||
|
sleep_minutes_on_exhaustion: 15
|
||||||
|
|
||||||
|
execution_control:
|
||||||
|
run_until: { cycles: 1 }
|
||||||
|
workers: 10
|
||||||
|
sleep_between_tasks: { min_seconds: 2, max_seconds: 5 }
|
||||||
|
|
||||||
|
info_json_generation_policy:
|
||||||
|
# Enforce strict rate limits for both the entire IP and each individual profile.
|
||||||
|
rate_limits:
|
||||||
|
per_ip: { max_requests: 120, per_minutes: 10 }
|
||||||
|
per_profile: { max_requests: 10, per_minutes: 10 }
|
||||||
|
|
||||||
|
# Rotate between a primary client (mweb) and a refresh client (web_camoufox)
|
||||||
|
# to keep sessions fresh.
|
||||||
|
client_rotation_policy:
|
||||||
|
major_client: "mweb"
|
||||||
|
major_client_params:
|
||||||
|
context_reuse_policy: { enabled: true, max_age_seconds: 1800 }
|
||||||
|
refresh_client: "web_camoufox"
|
||||||
|
refresh_every: { requests: 20, minutes: 10 }
|
||||||
|
|
||||||
|
---
|
||||||
|
# Policy: TV Simply, fetch-only test with per-worker profile rotation.
|
||||||
|
# Fetches info.json using tv_simply with multiple workers. Each worker gets a
|
||||||
|
# unique profile that is retired and replaced with a new generation after a
|
||||||
|
# set number of requests.
|
||||||
|
name: tv_simply_fetch_rotation
|
||||||
|
|
||||||
|
settings:
|
||||||
|
mode: fetch_only
|
||||||
|
urls_file: "urls.txt"
|
||||||
|
info_json_script: "bin/ytops-client get-info"
|
||||||
|
save_info_json_dir: "fetched_info_jsons/tv_simply_rotation"
|
||||||
|
# Use the modern profile management system.
|
||||||
|
profile_mode: per_worker_with_rotation
|
||||||
|
profile_management:
|
||||||
|
prefix: "tv_simply_user"
|
||||||
|
# Rotate to a new profile generation after 250 requests.
|
||||||
|
max_requests_per_profile: 250
|
||||||
|
|
||||||
|
execution_control:
|
||||||
|
run_until: { cycles: 1 } # Run through the URL list once.
|
||||||
|
workers: 8 # Run with 8 parallel workers.
|
||||||
|
sleep_between_tasks: { min_seconds: 2, max_seconds: 5 }
|
||||||
|
# Optional: Override the assumed time for a fetch task to improve rate estimation.
|
||||||
|
# The default is 3 seconds for fetch_only mode.
|
||||||
|
# assumptions:
|
||||||
|
# fetch_task_duration: 2.5
|
||||||
|
|
||||||
|
info_json_generation_policy:
|
||||||
|
client: tv_simply
|
||||||
58
policies/2_download_only_policies.yaml
Normal file
58
policies/2_download_only_policies.yaml
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
# This file contains policies for testing only the download step from
|
||||||
|
# existing info.json files. No new info.json files are generated.
|
||||||
|
|
||||||
|
---
|
||||||
|
# Policy: Basic profile-aware download test.
|
||||||
|
# This policy reads info.json files from a directory, groups them by a profile
|
||||||
|
# name extracted from the filename, and downloads them using multiple workers.
|
||||||
|
# Each worker handles one or more profiles sequentially.
|
||||||
|
name: basic_profile_aware_download
|
||||||
|
|
||||||
|
settings:
|
||||||
|
mode: download_only
|
||||||
|
info_json_dir: "prefetched_info_jsons"
|
||||||
|
# Regex to extract profile names from filenames like '...-VIDEOID-my_profile_name.json'.
|
||||||
|
profile_extraction_regex: ".*-[a-zA-Z0-9_-]{11}-(.+)\\.json"
|
||||||
|
|
||||||
|
execution_control:
|
||||||
|
run_until: { cycles: 1 }
|
||||||
|
# 'auto' sets workers to the number of profiles, capped by auto_workers_max.
|
||||||
|
workers: auto
|
||||||
|
auto_workers_max: 8
|
||||||
|
# This sleep applies between each file downloaded by a single profile.
|
||||||
|
sleep_between_tasks: { min_seconds: 1, max_seconds: 2 }
|
||||||
|
|
||||||
|
download_policy:
|
||||||
|
formats: "18,140,299/298/137/136/135/134/133"
|
||||||
|
downloader: "aria2c"
|
||||||
|
downloader_args: "aria2c:-x 4 -k 1M"
|
||||||
|
extra_args: "--cleanup --output-dir /tmp/downloads"
|
||||||
|
# This sleep applies between formats of a single video.
|
||||||
|
sleep_between_formats: { min_seconds: 0, max_seconds: 0 }
|
||||||
|
|
||||||
|
---
|
||||||
|
# Policy: Continuous download from a folder (Pipeline Stage 2).
|
||||||
|
# This policy watches a directory for new info.json files and processes them
|
||||||
|
# as they appear. It is designed to work as the second stage of a pipeline,
|
||||||
|
# consuming files generated by a 'fetch_only' policy like 'tv_fetch_with_cooldown'.
|
||||||
|
name: continuous_watch_download
|
||||||
|
|
||||||
|
settings:
|
||||||
|
mode: download_only
|
||||||
|
info_json_dir: "live_info_jsons"
|
||||||
|
directory_scan_mode: continuous
|
||||||
|
mark_processed_files: true # Rename files to *.processed to avoid re-downloading.
|
||||||
|
max_files_per_cycle: 50 # Process up to 50 new files each time it checks.
|
||||||
|
sleep_if_no_new_files_seconds: 15
|
||||||
|
|
||||||
|
execution_control:
|
||||||
|
# Note: For 'continuous' mode, a time-based run_until (e.g., {minutes: 120})
|
||||||
|
# is more typical. {cycles: 1} will cause it to scan the directory once
|
||||||
|
# for new files, process them, and then exit.
|
||||||
|
run_until: { cycles: 1 }
|
||||||
|
workers: 4 # Use a few workers to process files in parallel.
|
||||||
|
sleep_between_tasks: { min_seconds: 0, max_seconds: 0 }
|
||||||
|
|
||||||
|
download_policy:
|
||||||
|
formats: "18,140"
|
||||||
|
extra_args: "--cleanup --output-dir /tmp/downloads"
|
||||||
158
policies/3_full_stack_policies.yaml
Normal file
158
policies/3_full_stack_policies.yaml
Normal file
@ -0,0 +1,158 @@
|
|||||||
|
# This file contains policies for full-stack tests, which include both
|
||||||
|
# info.json generation and the subsequent download step.
|
||||||
|
|
||||||
|
---
|
||||||
|
# Policy: TV client with profile rotation.
|
||||||
|
# This test uses multiple parallel workers. Each worker gets its own profile
|
||||||
|
# that is automatically rotated (e.g., from tv_user_0_0 to tv_user_0_1) after
|
||||||
|
# a certain number of requests to simulate user churn.
|
||||||
|
name: tv_simply_profile_rotation
|
||||||
|
|
||||||
|
settings:
|
||||||
|
mode: full_stack
|
||||||
|
urls_file: "urls.txt"
|
||||||
|
info_json_script: "bin/ytops-client get-info"
|
||||||
|
save_info_json_dir: "fetched_info_jsons/tv_simply_rotation"
|
||||||
|
# Use the modern profile management system.
|
||||||
|
profile_mode: per_worker_with_rotation
|
||||||
|
profile_management:
|
||||||
|
prefix: "tv_simply"
|
||||||
|
# Rotate to a new profile generation after 250 requests.
|
||||||
|
max_requests_per_profile: 250
|
||||||
|
|
||||||
|
execution_control:
|
||||||
|
run_until: { cycles: 1 }
|
||||||
|
workers: 8 # Run with 8 parallel workers.
|
||||||
|
sleep_between_tasks: { min_seconds: 2, max_seconds: 5 }
|
||||||
|
# Optional: Override assumptions to improve rate estimation.
|
||||||
|
# assumptions:
|
||||||
|
# fetch_task_duration: 10 # Est. seconds to get info.json
|
||||||
|
# download_task_duration: 20 # Est. seconds to download all formats for one video
|
||||||
|
|
||||||
|
info_json_generation_policy:
|
||||||
|
client: tv_simply
|
||||||
|
|
||||||
|
download_policy:
|
||||||
|
formats: "18,140"
|
||||||
|
extra_args: "--cleanup --output-dir downloads/tv_simply_rotation"
|
||||||
|
proxy: "socks5://127.0.0.1:1087"
|
||||||
|
downloader: "aria2c"
|
||||||
|
downloader_args: "aria2c:-x 8 -k 1M"
|
||||||
|
sleep_between_formats: { min_seconds: 2, max_seconds: 2 }
|
||||||
|
|
||||||
|
stop_conditions:
|
||||||
|
on_cumulative_403: { max_errors: 5, per_minutes: 2 }
|
||||||
|
|
||||||
|
---
|
||||||
|
# Policy: TV Simply, full-stack test with per-worker profile rotation.
|
||||||
|
# Generates info.json using tv_simply and immediately attempts to download.
|
||||||
|
# This combines the fetch and download steps into a single workflow.
|
||||||
|
name: tv_simply_full_stack_rotation
|
||||||
|
|
||||||
|
settings:
|
||||||
|
mode: full_stack
|
||||||
|
urls_file: "urls.txt"
|
||||||
|
info_json_script: "bin/ytops-client get-info"
|
||||||
|
profile_mode: per_worker_with_rotation
|
||||||
|
profile_management:
|
||||||
|
prefix: "tv_simply_worker"
|
||||||
|
max_requests_per_profile: 240
|
||||||
|
|
||||||
|
execution_control:
|
||||||
|
workers: 10
|
||||||
|
run_until: { cycles: 1 }
|
||||||
|
sleep_between_tasks: { min_seconds: 5, max_seconds: 5 }
|
||||||
|
|
||||||
|
info_json_generation_policy:
|
||||||
|
client: "tv_simply"
|
||||||
|
request_params:
|
||||||
|
context_reuse_policy: { enabled: false }
|
||||||
|
|
||||||
|
download_policy:
|
||||||
|
formats: "18,140"
|
||||||
|
extra_args: "--output-dir downloads/tv_simply_downloads"
|
||||||
|
|
||||||
|
---
|
||||||
|
# Policy: MWeb client with multiple profiles, each with its own cookie file.
|
||||||
|
# This demonstrates how to run an authenticated test with a pool of accounts.
|
||||||
|
# The orchestrator will cycle through the cookie files, assigning one to each profile.
|
||||||
|
name: mweb_multi_profile_with_cookies
|
||||||
|
|
||||||
|
settings:
|
||||||
|
mode: full_stack
|
||||||
|
urls_file: "urls.txt"
|
||||||
|
info_json_script: "bin/ytops-client get-info"
|
||||||
|
# Use the dynamic profile pool management system.
|
||||||
|
profile_management:
|
||||||
|
prefix: "mweb_user"
|
||||||
|
initial_pool_size: 3 # Start with 3 profiles.
|
||||||
|
auto_expand_pool: true # Create new profiles if the initial 3 are all rate-limited.
|
||||||
|
max_requests_per_profile: 100 # Let each profile make 100 requests...
|
||||||
|
sleep_minutes_on_exhaustion: 15 # ...then put it to sleep for 15 minutes.
|
||||||
|
# Assign a different cookie file to each profile in the pool.
|
||||||
|
# The tool will cycle through this list.
|
||||||
|
cookie_files:
|
||||||
|
- "/path/to/your/mweb_cookies_0.txt"
|
||||||
|
- "/path/to/your/mweb_cookies_1.txt"
|
||||||
|
- "/path/to/your/mweb_cookies_2.txt"
|
||||||
|
|
||||||
|
execution_control:
|
||||||
|
run_until: { cycles: 1 }
|
||||||
|
workers: 3 # Match workers to the number of initial profiles.
|
||||||
|
sleep_between_tasks: { min_seconds: 1, max_seconds: 3 }
|
||||||
|
|
||||||
|
info_json_generation_policy:
|
||||||
|
client: mweb
|
||||||
|
# This client uses youtubei.js, which generates PO tokens.
|
||||||
|
|
||||||
|
download_policy:
|
||||||
|
formats: "18,140"
|
||||||
|
extra_args: "--cleanup --output-dir /tmp/downloads"
|
||||||
|
|
||||||
|
---
|
||||||
|
# Policy: TV client with profile rotation and aria2c RPC download.
|
||||||
|
# This test uses multiple parallel workers. Each worker gets its own profile
|
||||||
|
# that is automatically rotated. Downloads are submitted to an aria2c daemon
|
||||||
|
# via its RPC interface.
|
||||||
|
name: tv_simply_profile_rotation_aria2c_rpc
|
||||||
|
|
||||||
|
settings:
|
||||||
|
mode: full_stack
|
||||||
|
urls_file: "urls.txt"
|
||||||
|
info_json_script: "bin/ytops-client get-info"
|
||||||
|
save_info_json_dir: "fetched_info_jsons/tv_simply_rotation_aria"
|
||||||
|
profile_mode: per_worker_with_rotation
|
||||||
|
profile_management:
|
||||||
|
prefix: "tv_simply_aria"
|
||||||
|
max_requests_per_profile: 250
|
||||||
|
|
||||||
|
execution_control:
|
||||||
|
run_until: { cycles: 1 }
|
||||||
|
workers: 8
|
||||||
|
sleep_between_tasks: { min_seconds: 2, max_seconds: 5 }
|
||||||
|
|
||||||
|
info_json_generation_policy:
|
||||||
|
client: tv_simply
|
||||||
|
|
||||||
|
download_policy:
|
||||||
|
formats: "18,140"
|
||||||
|
# Use the aria2c RPC downloader
|
||||||
|
downloader: "aria2c_rpc"
|
||||||
|
# RPC server connection details
|
||||||
|
aria_host: "localhost"
|
||||||
|
aria_port: 6800
|
||||||
|
# aria_secret: "your_secret" # Uncomment and set if needed
|
||||||
|
# Set to true to wait for each download and get a success/fail result.
|
||||||
|
# This is the default and recommended for monitoring success/failure.
|
||||||
|
# Set to false for maximum submission throughput ("fire-and-forget"),
|
||||||
|
# but you will lose per-download status reporting.
|
||||||
|
aria_wait: true
|
||||||
|
# The output directory is on the aria2c host machine
|
||||||
|
output_dir: "/downloads/tv_simply_rotation_aria"
|
||||||
|
# Pass custom arguments to aria2c in yt-dlp format for better performance.
|
||||||
|
# -x: max connections per server, -k: min split size.
|
||||||
|
downloader_args: "aria2c:[-x 8, -k 1M]"
|
||||||
|
sleep_between_formats: { min_seconds: 1, max_seconds: 2 }
|
||||||
|
|
||||||
|
stop_conditions:
|
||||||
|
on_cumulative_403: { max_errors: 5, per_minutes: 2 }
|
||||||
28
policies/README.md
Normal file
28
policies/README.md
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
# Stress Test Policies
|
||||||
|
|
||||||
|
This directory contains example policy files for the `stress_enhanced.py` orchestrator. Each file defines a specific testing strategy, organized by task type.
|
||||||
|
|
||||||
|
## Authentication & Info.json Policies (`fetch_only` mode)
|
||||||
|
|
||||||
|
These policies focus on testing the info.json generation service.
|
||||||
|
|
||||||
|
- `info_json_rate_limit.yaml`: Tests the service with a focus on rate limits and client rotation.
|
||||||
|
- `auth_scenarios.yaml`: Contains specific scenarios for fetching info.json files, such as using a low-level command template for full control.
|
||||||
|
|
||||||
|
## Download Policies (`download_only` mode)
|
||||||
|
|
||||||
|
These policies focus on testing the download infrastructure using pre-existing info.json files.
|
||||||
|
|
||||||
|
- `download_throughput.yaml`: Tests download/CDN infrastructure, focusing on throughput and error handling.
|
||||||
|
- `download_scenarios.yaml`: Contains specific scenarios for downloading, such as testing random formats from a directory of info.json files.
|
||||||
|
|
||||||
|
## Full-Stack Policies (`full_stack` mode)
|
||||||
|
|
||||||
|
These policies test the entire workflow from info.json generation through to downloading.
|
||||||
|
|
||||||
|
- `regular_testing_scenarios.yaml`: Contains a collection of common, end-to-end testing scenarios, including:
|
||||||
|
- `mweb_per_request_profile`: A high-volume test that uses a new profile for every request.
|
||||||
|
- `mixed_client_profile_pool`: A complex test that alternates clients and reuses profiles from a pool.
|
||||||
|
- `tv_pipeline_scenarios.yaml`: A two-stage pipeline for fetching with the TV client and then continuously downloading.
|
||||||
|
|
||||||
|
These files can be used as templates for creating custom test scenarios.
|
||||||
1
setup.py
1
setup.py
@ -32,7 +32,6 @@ setup(
|
|||||||
'psutil',
|
'psutil',
|
||||||
'flask',
|
'flask',
|
||||||
'waitress',
|
'waitress',
|
||||||
'yt_dlp>=2025.3.27',
|
|
||||||
'yt-dlp-get-pot==0.3.0',
|
'yt-dlp-get-pot==0.3.0',
|
||||||
'requests>=2.31.0',
|
'requests>=2.31.0',
|
||||||
'ffprobe3',
|
'ffprobe3',
|
||||||
|
|||||||
1
thrift_model/.gitignore
vendored
1
thrift_model/.gitignore
vendored
@ -1 +1,2 @@
|
|||||||
|
__py_cache__
|
||||||
target/
|
target/
|
||||||
|
|||||||
145
thrift_model/data/common.thrift
Normal file
145
thrift_model/data/common.thrift
Normal file
@ -0,0 +1,145 @@
|
|||||||
|
namespace py pangramia.yt.common
|
||||||
|
namespace java com.pangramia.yt.common
|
||||||
|
|
||||||
|
typedef string JobID
|
||||||
|
typedef string Timestamp
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Standard error codes for service exceptions.
|
||||||
|
*/
|
||||||
|
enum ErrorCode {
|
||||||
|
UNKNOWN = 0,
|
||||||
|
NOT_IMPLEMENTED = 1,
|
||||||
|
INTERNAL_ERROR = 2,
|
||||||
|
INVALID_REQUEST = 3,
|
||||||
|
PROXY_UNAVAILABLE = 4,
|
||||||
|
ACCOUNT_UNAVAILABLE = 5,
|
||||||
|
BOT_DETECTED = 6,
|
||||||
|
BOT_DETECTION_SIGN_IN_REQUIRED = 7,
|
||||||
|
SABR_STREAMING_DETECTED = 8
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
enum JobState {
|
||||||
|
SUCCESS,
|
||||||
|
FAIL,
|
||||||
|
BOT_FORBIDDEN_ON_URL_ACCESS,
|
||||||
|
BOT_FORBIDDEN_ON_FILE_DOWNLOAD,
|
||||||
|
BOT_CAPTCHA,
|
||||||
|
BOT_AUTH_RELOGIN_REQUIRED,
|
||||||
|
BOT_AUTH_SMS_REQUIRED,
|
||||||
|
BOT_AUTH_DEVICE_QR_REQUIRED,
|
||||||
|
BOT_ACCOUNT_BANNED,
|
||||||
|
BOT_IP_BANNED
|
||||||
|
}
|
||||||
|
|
||||||
|
struct JobTokenData {
|
||||||
|
1: optional string infoJson,
|
||||||
|
2: optional string ytdlpCommand,
|
||||||
|
3: optional string socks,
|
||||||
|
4: optional JobID jobId,
|
||||||
|
5: optional string url,
|
||||||
|
6: optional string cookiesBlob,
|
||||||
|
7: optional string requestSummary,
|
||||||
|
8: optional list<string> communicationLogPaths,
|
||||||
|
9: optional string serverVersionInfo,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
enum TokenUpdateMode {
|
||||||
|
AUTOREFRESH_AND_REMAIN_ANONYMOUS,
|
||||||
|
AUTOREFRESH_AND_ALLOW_AUTH,
|
||||||
|
AUTOREFRESH_AND_ONLY_AUTH,
|
||||||
|
CLEANUP_THEN_AUTOREFRESH_AND_ONLY_AUTH,
|
||||||
|
CLEANUP_THEN_AUTOREFRESH_AND_REMAIN_ANONYMOUS,
|
||||||
|
CLEANUP_THEN_AUTOREFRESH_AND_ALLOW_AUTH,
|
||||||
|
AUTO,// AUTOREFRESH_AND_ONLY_AUTH,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
struct AccountData {
|
||||||
|
1: required string username,
|
||||||
|
2: required string password,
|
||||||
|
3: optional string countryCode
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ProxyData {
|
||||||
|
1: required string proxyUrl,
|
||||||
|
2: optional string countryCode
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
enum AccountPairState {
|
||||||
|
ACTIVE,
|
||||||
|
PAUSED,
|
||||||
|
REMOVED,
|
||||||
|
IN_PROGRESS,
|
||||||
|
ALL
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
struct AccountPairWithState {
|
||||||
|
1: required string accountId,
|
||||||
|
2: required string proxyId,
|
||||||
|
3: optional AccountPairState accountPairState
|
||||||
|
4: optional string machineId,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct JobData {
|
||||||
|
1: required string jobId,
|
||||||
|
2: required string url,
|
||||||
|
3: required string cookiesBlob,
|
||||||
|
4: required string potoken,
|
||||||
|
5: required string visitorId,
|
||||||
|
6: required string ytdlpCommand,
|
||||||
|
7: required string createdTime,
|
||||||
|
8: required map<string,string> telemetry,
|
||||||
|
9: required JobState state,
|
||||||
|
10: optional string errorMessage,
|
||||||
|
11: optional string socks5Id
|
||||||
|
}
|
||||||
|
|
||||||
|
struct RichCollectionPagination {
|
||||||
|
1: required bool hasNext,
|
||||||
|
2: required i32 totalCount,
|
||||||
|
3: required i32 page,
|
||||||
|
4: required i32 pageSize
|
||||||
|
}
|
||||||
|
|
||||||
|
struct RichCollectionJobData {
|
||||||
|
1: required list<JobData> items,
|
||||||
|
2: required RichCollectionPagination pagination
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ProxyStatus {
|
||||||
|
1: string proxyUrl,
|
||||||
|
2: string status,
|
||||||
|
3: i64 successCount,
|
||||||
|
4: i64 failureCount,
|
||||||
|
5: optional string lastFailureTimestamp,
|
||||||
|
6: optional string lastSuccessTimestamp,
|
||||||
|
7: optional string serverIdentity
|
||||||
|
}
|
||||||
|
|
||||||
|
struct AccountStatus {
|
||||||
|
1: string accountId,
|
||||||
|
2: string status,
|
||||||
|
3: i64 successCount,
|
||||||
|
4: i64 failureCount,
|
||||||
|
5: optional string lastFailureTimestamp,
|
||||||
|
6: optional string lastSuccessTimestamp,
|
||||||
|
7: optional string lastUsedProxy,
|
||||||
|
8: optional string lastUsedMachine
|
||||||
|
}
|
||||||
|
|
||||||
|
struct AirflowLogContext {
|
||||||
|
1: optional string logS3Path,
|
||||||
|
2: optional string dagId,
|
||||||
|
3: optional string runId,
|
||||||
|
4: optional string taskId,
|
||||||
|
5: optional i32 tryNumber,
|
||||||
|
6: optional string workerHostname,
|
||||||
|
7: optional string queue
|
||||||
|
}
|
||||||
|
|
||||||
14
thrift_model/data/exceptions.thrift
Normal file
14
thrift_model/data/exceptions.thrift
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
namespace py pangramia.yt.exceptions
|
||||||
|
namespace java com.pangramia.yt.exceptions
|
||||||
|
|
||||||
|
exception PBServiceException {
|
||||||
|
1: required string message,
|
||||||
|
2: optional string errorCode,
|
||||||
|
3: optional map<string, string> context
|
||||||
|
}
|
||||||
|
|
||||||
|
exception PBUserException {
|
||||||
|
1: required string message,
|
||||||
|
2: optional string errorCode,
|
||||||
|
3: optional map<string, string> context
|
||||||
|
}
|
||||||
@ -29,6 +29,7 @@ class ErrorCode(object):
|
|||||||
ACCOUNT_UNAVAILABLE = 5
|
ACCOUNT_UNAVAILABLE = 5
|
||||||
BOT_DETECTED = 6
|
BOT_DETECTED = 6
|
||||||
BOT_DETECTION_SIGN_IN_REQUIRED = 7
|
BOT_DETECTION_SIGN_IN_REQUIRED = 7
|
||||||
|
SABR_STREAMING_DETECTED = 8
|
||||||
|
|
||||||
_VALUES_TO_NAMES = {
|
_VALUES_TO_NAMES = {
|
||||||
0: "UNKNOWN",
|
0: "UNKNOWN",
|
||||||
@ -39,6 +40,7 @@ class ErrorCode(object):
|
|||||||
5: "ACCOUNT_UNAVAILABLE",
|
5: "ACCOUNT_UNAVAILABLE",
|
||||||
6: "BOT_DETECTED",
|
6: "BOT_DETECTED",
|
||||||
7: "BOT_DETECTION_SIGN_IN_REQUIRED",
|
7: "BOT_DETECTION_SIGN_IN_REQUIRED",
|
||||||
|
8: "SABR_STREAMING_DETECTED",
|
||||||
}
|
}
|
||||||
|
|
||||||
_NAMES_TO_VALUES = {
|
_NAMES_TO_VALUES = {
|
||||||
@ -50,6 +52,7 @@ class ErrorCode(object):
|
|||||||
"ACCOUNT_UNAVAILABLE": 5,
|
"ACCOUNT_UNAVAILABLE": 5,
|
||||||
"BOT_DETECTED": 6,
|
"BOT_DETECTED": 6,
|
||||||
"BOT_DETECTION_SIGN_IN_REQUIRED": 7,
|
"BOT_DETECTION_SIGN_IN_REQUIRED": 7,
|
||||||
|
"SABR_STREAMING_DETECTED": 8,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -155,17 +158,23 @@ class JobTokenData(object):
|
|||||||
- jobId
|
- jobId
|
||||||
- url
|
- url
|
||||||
- cookiesBlob
|
- cookiesBlob
|
||||||
|
- requestSummary
|
||||||
|
- communicationLogPaths
|
||||||
|
- serverVersionInfo
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
def __init__(self, infoJson=None, ytdlpCommand=None, socks=None, jobId=None, url=None, cookiesBlob=None,):
|
def __init__(self, infoJson=None, ytdlpCommand=None, socks=None, jobId=None, url=None, cookiesBlob=None, requestSummary=None, communicationLogPaths=None, serverVersionInfo=None,):
|
||||||
self.infoJson = infoJson
|
self.infoJson = infoJson
|
||||||
self.ytdlpCommand = ytdlpCommand
|
self.ytdlpCommand = ytdlpCommand
|
||||||
self.socks = socks
|
self.socks = socks
|
||||||
self.jobId = jobId
|
self.jobId = jobId
|
||||||
self.url = url
|
self.url = url
|
||||||
self.cookiesBlob = cookiesBlob
|
self.cookiesBlob = cookiesBlob
|
||||||
|
self.requestSummary = requestSummary
|
||||||
|
self.communicationLogPaths = communicationLogPaths
|
||||||
|
self.serverVersionInfo = serverVersionInfo
|
||||||
|
|
||||||
def read(self, iprot):
|
def read(self, iprot):
|
||||||
if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None:
|
if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None:
|
||||||
@ -206,6 +215,26 @@ class JobTokenData(object):
|
|||||||
self.cookiesBlob = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString()
|
self.cookiesBlob = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString()
|
||||||
else:
|
else:
|
||||||
iprot.skip(ftype)
|
iprot.skip(ftype)
|
||||||
|
elif fid == 7:
|
||||||
|
if ftype == TType.STRING:
|
||||||
|
self.requestSummary = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString()
|
||||||
|
else:
|
||||||
|
iprot.skip(ftype)
|
||||||
|
elif fid == 8:
|
||||||
|
if ftype == TType.LIST:
|
||||||
|
self.communicationLogPaths = []
|
||||||
|
(_etype3, _size0) = iprot.readListBegin()
|
||||||
|
for _i4 in range(_size0):
|
||||||
|
_elem5 = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString()
|
||||||
|
self.communicationLogPaths.append(_elem5)
|
||||||
|
iprot.readListEnd()
|
||||||
|
else:
|
||||||
|
iprot.skip(ftype)
|
||||||
|
elif fid == 9:
|
||||||
|
if ftype == TType.STRING:
|
||||||
|
self.serverVersionInfo = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString()
|
||||||
|
else:
|
||||||
|
iprot.skip(ftype)
|
||||||
else:
|
else:
|
||||||
iprot.skip(ftype)
|
iprot.skip(ftype)
|
||||||
iprot.readFieldEnd()
|
iprot.readFieldEnd()
|
||||||
@ -240,6 +269,21 @@ class JobTokenData(object):
|
|||||||
oprot.writeFieldBegin('cookiesBlob', TType.STRING, 6)
|
oprot.writeFieldBegin('cookiesBlob', TType.STRING, 6)
|
||||||
oprot.writeString(self.cookiesBlob.encode('utf-8') if sys.version_info[0] == 2 else self.cookiesBlob)
|
oprot.writeString(self.cookiesBlob.encode('utf-8') if sys.version_info[0] == 2 else self.cookiesBlob)
|
||||||
oprot.writeFieldEnd()
|
oprot.writeFieldEnd()
|
||||||
|
if self.requestSummary is not None:
|
||||||
|
oprot.writeFieldBegin('requestSummary', TType.STRING, 7)
|
||||||
|
oprot.writeString(self.requestSummary.encode('utf-8') if sys.version_info[0] == 2 else self.requestSummary)
|
||||||
|
oprot.writeFieldEnd()
|
||||||
|
if self.communicationLogPaths is not None:
|
||||||
|
oprot.writeFieldBegin('communicationLogPaths', TType.LIST, 8)
|
||||||
|
oprot.writeListBegin(TType.STRING, len(self.communicationLogPaths))
|
||||||
|
for iter6 in self.communicationLogPaths:
|
||||||
|
oprot.writeString(iter6.encode('utf-8') if sys.version_info[0] == 2 else iter6)
|
||||||
|
oprot.writeListEnd()
|
||||||
|
oprot.writeFieldEnd()
|
||||||
|
if self.serverVersionInfo is not None:
|
||||||
|
oprot.writeFieldBegin('serverVersionInfo', TType.STRING, 9)
|
||||||
|
oprot.writeString(self.serverVersionInfo.encode('utf-8') if sys.version_info[0] == 2 else self.serverVersionInfo)
|
||||||
|
oprot.writeFieldEnd()
|
||||||
oprot.writeFieldStop()
|
oprot.writeFieldStop()
|
||||||
oprot.writeStructEnd()
|
oprot.writeStructEnd()
|
||||||
|
|
||||||
@ -583,11 +627,11 @@ class JobData(object):
|
|||||||
elif fid == 8:
|
elif fid == 8:
|
||||||
if ftype == TType.MAP:
|
if ftype == TType.MAP:
|
||||||
self.telemetry = {}
|
self.telemetry = {}
|
||||||
(_ktype1, _vtype2, _size0) = iprot.readMapBegin()
|
(_ktype8, _vtype9, _size7) = iprot.readMapBegin()
|
||||||
for _i4 in range(_size0):
|
for _i11 in range(_size7):
|
||||||
_key5 = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString()
|
_key12 = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString()
|
||||||
_val6 = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString()
|
_val13 = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString()
|
||||||
self.telemetry[_key5] = _val6
|
self.telemetry[_key12] = _val13
|
||||||
iprot.readMapEnd()
|
iprot.readMapEnd()
|
||||||
else:
|
else:
|
||||||
iprot.skip(ftype)
|
iprot.skip(ftype)
|
||||||
@ -647,9 +691,9 @@ class JobData(object):
|
|||||||
if self.telemetry is not None:
|
if self.telemetry is not None:
|
||||||
oprot.writeFieldBegin('telemetry', TType.MAP, 8)
|
oprot.writeFieldBegin('telemetry', TType.MAP, 8)
|
||||||
oprot.writeMapBegin(TType.STRING, TType.STRING, len(self.telemetry))
|
oprot.writeMapBegin(TType.STRING, TType.STRING, len(self.telemetry))
|
||||||
for kiter7, viter8 in self.telemetry.items():
|
for kiter14, viter15 in self.telemetry.items():
|
||||||
oprot.writeString(kiter7.encode('utf-8') if sys.version_info[0] == 2 else kiter7)
|
oprot.writeString(kiter14.encode('utf-8') if sys.version_info[0] == 2 else kiter14)
|
||||||
oprot.writeString(viter8.encode('utf-8') if sys.version_info[0] == 2 else viter8)
|
oprot.writeString(viter15.encode('utf-8') if sys.version_info[0] == 2 else viter15)
|
||||||
oprot.writeMapEnd()
|
oprot.writeMapEnd()
|
||||||
oprot.writeFieldEnd()
|
oprot.writeFieldEnd()
|
||||||
if self.state is not None:
|
if self.state is not None:
|
||||||
@ -823,11 +867,11 @@ class RichCollectionJobData(object):
|
|||||||
if fid == 1:
|
if fid == 1:
|
||||||
if ftype == TType.LIST:
|
if ftype == TType.LIST:
|
||||||
self.items = []
|
self.items = []
|
||||||
(_etype12, _size9) = iprot.readListBegin()
|
(_etype19, _size16) = iprot.readListBegin()
|
||||||
for _i13 in range(_size9):
|
for _i20 in range(_size16):
|
||||||
_elem14 = JobData()
|
_elem21 = JobData()
|
||||||
_elem14.read(iprot)
|
_elem21.read(iprot)
|
||||||
self.items.append(_elem14)
|
self.items.append(_elem21)
|
||||||
iprot.readListEnd()
|
iprot.readListEnd()
|
||||||
else:
|
else:
|
||||||
iprot.skip(ftype)
|
iprot.skip(ftype)
|
||||||
@ -850,8 +894,8 @@ class RichCollectionJobData(object):
|
|||||||
if self.items is not None:
|
if self.items is not None:
|
||||||
oprot.writeFieldBegin('items', TType.LIST, 1)
|
oprot.writeFieldBegin('items', TType.LIST, 1)
|
||||||
oprot.writeListBegin(TType.STRUCT, len(self.items))
|
oprot.writeListBegin(TType.STRUCT, len(self.items))
|
||||||
for iter15 in self.items:
|
for iter22 in self.items:
|
||||||
iter15.write(oprot)
|
iter22.write(oprot)
|
||||||
oprot.writeListEnd()
|
oprot.writeListEnd()
|
||||||
oprot.writeFieldEnd()
|
oprot.writeFieldEnd()
|
||||||
if self.pagination is not None:
|
if self.pagination is not None:
|
||||||
@ -1135,6 +1179,129 @@ class AccountStatus(object):
|
|||||||
|
|
||||||
def __ne__(self, other):
|
def __ne__(self, other):
|
||||||
return not (self == other)
|
return not (self == other)
|
||||||
|
|
||||||
|
|
||||||
|
class AirflowLogContext(object):
|
||||||
|
"""
|
||||||
|
Attributes:
|
||||||
|
- logS3Path
|
||||||
|
- dagId
|
||||||
|
- runId
|
||||||
|
- taskId
|
||||||
|
- tryNumber
|
||||||
|
- workerHostname
|
||||||
|
- queue
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def __init__(self, logS3Path=None, dagId=None, runId=None, taskId=None, tryNumber=None, workerHostname=None, queue=None,):
|
||||||
|
self.logS3Path = logS3Path
|
||||||
|
self.dagId = dagId
|
||||||
|
self.runId = runId
|
||||||
|
self.taskId = taskId
|
||||||
|
self.tryNumber = tryNumber
|
||||||
|
self.workerHostname = workerHostname
|
||||||
|
self.queue = queue
|
||||||
|
|
||||||
|
def read(self, iprot):
|
||||||
|
if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None:
|
||||||
|
iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec])
|
||||||
|
return
|
||||||
|
iprot.readStructBegin()
|
||||||
|
while True:
|
||||||
|
(fname, ftype, fid) = iprot.readFieldBegin()
|
||||||
|
if ftype == TType.STOP:
|
||||||
|
break
|
||||||
|
if fid == 1:
|
||||||
|
if ftype == TType.STRING:
|
||||||
|
self.logS3Path = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString()
|
||||||
|
else:
|
||||||
|
iprot.skip(ftype)
|
||||||
|
elif fid == 2:
|
||||||
|
if ftype == TType.STRING:
|
||||||
|
self.dagId = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString()
|
||||||
|
else:
|
||||||
|
iprot.skip(ftype)
|
||||||
|
elif fid == 3:
|
||||||
|
if ftype == TType.STRING:
|
||||||
|
self.runId = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString()
|
||||||
|
else:
|
||||||
|
iprot.skip(ftype)
|
||||||
|
elif fid == 4:
|
||||||
|
if ftype == TType.STRING:
|
||||||
|
self.taskId = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString()
|
||||||
|
else:
|
||||||
|
iprot.skip(ftype)
|
||||||
|
elif fid == 5:
|
||||||
|
if ftype == TType.I32:
|
||||||
|
self.tryNumber = iprot.readI32()
|
||||||
|
else:
|
||||||
|
iprot.skip(ftype)
|
||||||
|
elif fid == 6:
|
||||||
|
if ftype == TType.STRING:
|
||||||
|
self.workerHostname = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString()
|
||||||
|
else:
|
||||||
|
iprot.skip(ftype)
|
||||||
|
elif fid == 7:
|
||||||
|
if ftype == TType.STRING:
|
||||||
|
self.queue = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString()
|
||||||
|
else:
|
||||||
|
iprot.skip(ftype)
|
||||||
|
else:
|
||||||
|
iprot.skip(ftype)
|
||||||
|
iprot.readFieldEnd()
|
||||||
|
iprot.readStructEnd()
|
||||||
|
|
||||||
|
def write(self, oprot):
|
||||||
|
if oprot._fast_encode is not None and self.thrift_spec is not None:
|
||||||
|
oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec]))
|
||||||
|
return
|
||||||
|
oprot.writeStructBegin('AirflowLogContext')
|
||||||
|
if self.logS3Path is not None:
|
||||||
|
oprot.writeFieldBegin('logS3Path', TType.STRING, 1)
|
||||||
|
oprot.writeString(self.logS3Path.encode('utf-8') if sys.version_info[0] == 2 else self.logS3Path)
|
||||||
|
oprot.writeFieldEnd()
|
||||||
|
if self.dagId is not None:
|
||||||
|
oprot.writeFieldBegin('dagId', TType.STRING, 2)
|
||||||
|
oprot.writeString(self.dagId.encode('utf-8') if sys.version_info[0] == 2 else self.dagId)
|
||||||
|
oprot.writeFieldEnd()
|
||||||
|
if self.runId is not None:
|
||||||
|
oprot.writeFieldBegin('runId', TType.STRING, 3)
|
||||||
|
oprot.writeString(self.runId.encode('utf-8') if sys.version_info[0] == 2 else self.runId)
|
||||||
|
oprot.writeFieldEnd()
|
||||||
|
if self.taskId is not None:
|
||||||
|
oprot.writeFieldBegin('taskId', TType.STRING, 4)
|
||||||
|
oprot.writeString(self.taskId.encode('utf-8') if sys.version_info[0] == 2 else self.taskId)
|
||||||
|
oprot.writeFieldEnd()
|
||||||
|
if self.tryNumber is not None:
|
||||||
|
oprot.writeFieldBegin('tryNumber', TType.I32, 5)
|
||||||
|
oprot.writeI32(self.tryNumber)
|
||||||
|
oprot.writeFieldEnd()
|
||||||
|
if self.workerHostname is not None:
|
||||||
|
oprot.writeFieldBegin('workerHostname', TType.STRING, 6)
|
||||||
|
oprot.writeString(self.workerHostname.encode('utf-8') if sys.version_info[0] == 2 else self.workerHostname)
|
||||||
|
oprot.writeFieldEnd()
|
||||||
|
if self.queue is not None:
|
||||||
|
oprot.writeFieldBegin('queue', TType.STRING, 7)
|
||||||
|
oprot.writeString(self.queue.encode('utf-8') if sys.version_info[0] == 2 else self.queue)
|
||||||
|
oprot.writeFieldEnd()
|
||||||
|
oprot.writeFieldStop()
|
||||||
|
oprot.writeStructEnd()
|
||||||
|
|
||||||
|
def validate(self):
|
||||||
|
return
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
L = ['%s=%r' % (key, value)
|
||||||
|
for key, value in self.__dict__.items()]
|
||||||
|
return '%s(%s)' % (self.__class__.__name__, ', '.join(L))
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
return isinstance(other, self.__class__) and self.__dict__ == other.__dict__
|
||||||
|
|
||||||
|
def __ne__(self, other):
|
||||||
|
return not (self == other)
|
||||||
all_structs.append(JobTokenData)
|
all_structs.append(JobTokenData)
|
||||||
JobTokenData.thrift_spec = (
|
JobTokenData.thrift_spec = (
|
||||||
None, # 0
|
None, # 0
|
||||||
@ -1144,6 +1311,9 @@ JobTokenData.thrift_spec = (
|
|||||||
(4, TType.STRING, 'jobId', 'UTF8', None, ), # 4
|
(4, TType.STRING, 'jobId', 'UTF8', None, ), # 4
|
||||||
(5, TType.STRING, 'url', 'UTF8', None, ), # 5
|
(5, TType.STRING, 'url', 'UTF8', None, ), # 5
|
||||||
(6, TType.STRING, 'cookiesBlob', 'UTF8', None, ), # 6
|
(6, TType.STRING, 'cookiesBlob', 'UTF8', None, ), # 6
|
||||||
|
(7, TType.STRING, 'requestSummary', 'UTF8', None, ), # 7
|
||||||
|
(8, TType.LIST, 'communicationLogPaths', (TType.STRING, 'UTF8', False), None, ), # 8
|
||||||
|
(9, TType.STRING, 'serverVersionInfo', 'UTF8', None, ), # 9
|
||||||
)
|
)
|
||||||
all_structs.append(AccountData)
|
all_structs.append(AccountData)
|
||||||
AccountData.thrift_spec = (
|
AccountData.thrift_spec = (
|
||||||
@ -1218,5 +1388,16 @@ AccountStatus.thrift_spec = (
|
|||||||
(7, TType.STRING, 'lastUsedProxy', 'UTF8', None, ), # 7
|
(7, TType.STRING, 'lastUsedProxy', 'UTF8', None, ), # 7
|
||||||
(8, TType.STRING, 'lastUsedMachine', 'UTF8', None, ), # 8
|
(8, TType.STRING, 'lastUsedMachine', 'UTF8', None, ), # 8
|
||||||
)
|
)
|
||||||
|
all_structs.append(AirflowLogContext)
|
||||||
|
AirflowLogContext.thrift_spec = (
|
||||||
|
None, # 0
|
||||||
|
(1, TType.STRING, 'logS3Path', 'UTF8', None, ), # 1
|
||||||
|
(2, TType.STRING, 'dagId', 'UTF8', None, ), # 2
|
||||||
|
(3, TType.STRING, 'runId', 'UTF8', None, ), # 3
|
||||||
|
(4, TType.STRING, 'taskId', 'UTF8', None, ), # 4
|
||||||
|
(5, TType.I32, 'tryNumber', None, None, ), # 5
|
||||||
|
(6, TType.STRING, 'workerHostname', 'UTF8', None, ), # 6
|
||||||
|
(7, TType.STRING, 'queue', 'UTF8', None, ), # 7
|
||||||
|
)
|
||||||
fix_spec(all_structs)
|
fix_spec(all_structs)
|
||||||
del all_structs
|
del all_structs
|
||||||
|
|||||||
@ -24,11 +24,12 @@ if len(sys.argv) <= 1 or sys.argv[1] == '--help':
|
|||||||
print('Usage: ' + sys.argv[0] + ' [-h host[:port]] [-u url] [-f[ramed]] [-s[sl]] [-novalidate] [-ca_certs certs] [-keyfile keyfile] [-certfile certfile] function [arg1 [arg2...]]')
|
print('Usage: ' + sys.argv[0] + ' [-h host[:port]] [-u url] [-f[ramed]] [-s[sl]] [-novalidate] [-ca_certs certs] [-keyfile keyfile] [-certfile certfile] function [arg1 [arg2...]]')
|
||||||
print('')
|
print('')
|
||||||
print('Functions:')
|
print('Functions:')
|
||||||
print(' JobTokenData getOrRefreshTokenWithReport(string accountId, string oldUrl, JobState status, string details, string jobId, TokenUpdateMode updateType, string url, string clients)')
|
print(' JobTokenData getOrRefreshTokenWithReport(string accountId, string oldUrl, JobState status, string details, string jobId, TokenUpdateMode updateType, string url, string clients, AirflowLogContext airflowLogContext, string requestParamsJson)')
|
||||||
print(' JobTokenData getOrRefreshToken(string accountId, TokenUpdateMode updateType, string url, string clients, string machineId)')
|
print(' JobTokenData getOrRefreshToken(string accountId, TokenUpdateMode updateType, string url, string clients, string machineId, AirflowLogContext airflowLogContext, string requestParamsJson, string assignedProxyUrl)')
|
||||||
print(' JobTokenData getLatestToken(string accountId)')
|
print(' JobTokenData getLatestToken(string accountId)')
|
||||||
print(' JobTokenData refreshToken(string accountId, TokenUpdateMode updateType, string url)')
|
print(' JobTokenData refreshToken(string accountId, TokenUpdateMode updateType, string url)')
|
||||||
print(' bool reportState(string url, JobState status, string details, string jobId)')
|
print(' bool reportState(string url, JobState status, string details, string jobId)')
|
||||||
|
print(' JobTokenData getInfoJsonDirect(string url, string clients)')
|
||||||
print(' getProxyStatus(string serverIdentity)')
|
print(' getProxyStatus(string serverIdentity)')
|
||||||
print(' bool banProxy(string proxyUrl, string serverIdentity)')
|
print(' bool banProxy(string proxyUrl, string serverIdentity)')
|
||||||
print(' bool unbanProxy(string proxyUrl, string serverIdentity)')
|
print(' bool unbanProxy(string proxyUrl, string serverIdentity)')
|
||||||
@ -124,16 +125,16 @@ client = YTTokenOpService.Client(protocol)
|
|||||||
transport.open()
|
transport.open()
|
||||||
|
|
||||||
if cmd == 'getOrRefreshTokenWithReport':
|
if cmd == 'getOrRefreshTokenWithReport':
|
||||||
if len(args) != 8:
|
if len(args) != 10:
|
||||||
print('getOrRefreshTokenWithReport requires 8 args')
|
print('getOrRefreshTokenWithReport requires 10 args')
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
pp.pprint(client.getOrRefreshTokenWithReport(args[0], args[1], eval(args[2]), args[3], args[4], eval(args[5]), args[6], args[7],))
|
pp.pprint(client.getOrRefreshTokenWithReport(args[0], args[1], eval(args[2]), args[3], args[4], eval(args[5]), args[6], args[7], eval(args[8]), args[9],))
|
||||||
|
|
||||||
elif cmd == 'getOrRefreshToken':
|
elif cmd == 'getOrRefreshToken':
|
||||||
if len(args) != 5:
|
if len(args) != 8:
|
||||||
print('getOrRefreshToken requires 5 args')
|
print('getOrRefreshToken requires 8 args')
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
pp.pprint(client.getOrRefreshToken(args[0], eval(args[1]), args[2], args[3], args[4],))
|
pp.pprint(client.getOrRefreshToken(args[0], eval(args[1]), args[2], args[3], args[4], eval(args[5]), args[6], args[7],))
|
||||||
|
|
||||||
elif cmd == 'getLatestToken':
|
elif cmd == 'getLatestToken':
|
||||||
if len(args) != 1:
|
if len(args) != 1:
|
||||||
@ -153,6 +154,12 @@ elif cmd == 'reportState':
|
|||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
pp.pprint(client.reportState(args[0], eval(args[1]), args[2], args[3],))
|
pp.pprint(client.reportState(args[0], eval(args[1]), args[2], args[3],))
|
||||||
|
|
||||||
|
elif cmd == 'getInfoJsonDirect':
|
||||||
|
if len(args) != 2:
|
||||||
|
print('getInfoJsonDirect requires 2 args')
|
||||||
|
sys.exit(1)
|
||||||
|
pp.pprint(client.getInfoJsonDirect(args[0], args[1],))
|
||||||
|
|
||||||
elif cmd == 'getProxyStatus':
|
elif cmd == 'getProxyStatus':
|
||||||
if len(args) != 1:
|
if len(args) != 1:
|
||||||
print('getProxyStatus requires 1 args')
|
print('getProxyStatus requires 1 args')
|
||||||
|
|||||||
@ -20,7 +20,7 @@ all_structs = []
|
|||||||
|
|
||||||
|
|
||||||
class Iface(pangramia.yt.management.YTManagementService.Iface):
|
class Iface(pangramia.yt.management.YTManagementService.Iface):
|
||||||
def getOrRefreshTokenWithReport(self, accountId, oldUrl, status, details, jobId, updateType, url, clients):
|
def getOrRefreshTokenWithReport(self, accountId, oldUrl, status, details, jobId, updateType, url, clients, airflowLogContext, requestParamsJson):
|
||||||
"""
|
"""
|
||||||
Parameters:
|
Parameters:
|
||||||
- accountId
|
- accountId
|
||||||
@ -31,11 +31,13 @@ class Iface(pangramia.yt.management.YTManagementService.Iface):
|
|||||||
- updateType
|
- updateType
|
||||||
- url
|
- url
|
||||||
- clients
|
- clients
|
||||||
|
- airflowLogContext
|
||||||
|
- requestParamsJson
|
||||||
|
|
||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def getOrRefreshToken(self, accountId, updateType, url, clients, machineId):
|
def getOrRefreshToken(self, accountId, updateType, url, clients, machineId, airflowLogContext, requestParamsJson, assignedProxyUrl):
|
||||||
"""
|
"""
|
||||||
Parameters:
|
Parameters:
|
||||||
- accountId
|
- accountId
|
||||||
@ -43,6 +45,9 @@ class Iface(pangramia.yt.management.YTManagementService.Iface):
|
|||||||
- url
|
- url
|
||||||
- clients
|
- clients
|
||||||
- machineId
|
- machineId
|
||||||
|
- airflowLogContext
|
||||||
|
- requestParamsJson
|
||||||
|
- assignedProxyUrl
|
||||||
|
|
||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
@ -76,12 +81,21 @@ class Iface(pangramia.yt.management.YTManagementService.Iface):
|
|||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def getInfoJsonDirect(self, url, clients):
|
||||||
|
"""
|
||||||
|
Parameters:
|
||||||
|
- url
|
||||||
|
- clients
|
||||||
|
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class Client(pangramia.yt.management.YTManagementService.Client, Iface):
|
class Client(pangramia.yt.management.YTManagementService.Client, Iface):
|
||||||
def __init__(self, iprot, oprot=None):
|
def __init__(self, iprot, oprot=None):
|
||||||
pangramia.yt.management.YTManagementService.Client.__init__(self, iprot, oprot)
|
pangramia.yt.management.YTManagementService.Client.__init__(self, iprot, oprot)
|
||||||
|
|
||||||
def getOrRefreshTokenWithReport(self, accountId, oldUrl, status, details, jobId, updateType, url, clients):
|
def getOrRefreshTokenWithReport(self, accountId, oldUrl, status, details, jobId, updateType, url, clients, airflowLogContext, requestParamsJson):
|
||||||
"""
|
"""
|
||||||
Parameters:
|
Parameters:
|
||||||
- accountId
|
- accountId
|
||||||
@ -92,12 +106,14 @@ class Client(pangramia.yt.management.YTManagementService.Client, Iface):
|
|||||||
- updateType
|
- updateType
|
||||||
- url
|
- url
|
||||||
- clients
|
- clients
|
||||||
|
- airflowLogContext
|
||||||
|
- requestParamsJson
|
||||||
|
|
||||||
"""
|
"""
|
||||||
self.send_getOrRefreshTokenWithReport(accountId, oldUrl, status, details, jobId, updateType, url, clients)
|
self.send_getOrRefreshTokenWithReport(accountId, oldUrl, status, details, jobId, updateType, url, clients, airflowLogContext, requestParamsJson)
|
||||||
return self.recv_getOrRefreshTokenWithReport()
|
return self.recv_getOrRefreshTokenWithReport()
|
||||||
|
|
||||||
def send_getOrRefreshTokenWithReport(self, accountId, oldUrl, status, details, jobId, updateType, url, clients):
|
def send_getOrRefreshTokenWithReport(self, accountId, oldUrl, status, details, jobId, updateType, url, clients, airflowLogContext, requestParamsJson):
|
||||||
self._oprot.writeMessageBegin('getOrRefreshTokenWithReport', TMessageType.CALL, self._seqid)
|
self._oprot.writeMessageBegin('getOrRefreshTokenWithReport', TMessageType.CALL, self._seqid)
|
||||||
args = getOrRefreshTokenWithReport_args()
|
args = getOrRefreshTokenWithReport_args()
|
||||||
args.accountId = accountId
|
args.accountId = accountId
|
||||||
@ -108,6 +124,8 @@ class Client(pangramia.yt.management.YTManagementService.Client, Iface):
|
|||||||
args.updateType = updateType
|
args.updateType = updateType
|
||||||
args.url = url
|
args.url = url
|
||||||
args.clients = clients
|
args.clients = clients
|
||||||
|
args.airflowLogContext = airflowLogContext
|
||||||
|
args.requestParamsJson = requestParamsJson
|
||||||
args.write(self._oprot)
|
args.write(self._oprot)
|
||||||
self._oprot.writeMessageEnd()
|
self._oprot.writeMessageEnd()
|
||||||
self._oprot.trans.flush()
|
self._oprot.trans.flush()
|
||||||
@ -131,7 +149,7 @@ class Client(pangramia.yt.management.YTManagementService.Client, Iface):
|
|||||||
raise result.userExp
|
raise result.userExp
|
||||||
raise TApplicationException(TApplicationException.MISSING_RESULT, "getOrRefreshTokenWithReport failed: unknown result")
|
raise TApplicationException(TApplicationException.MISSING_RESULT, "getOrRefreshTokenWithReport failed: unknown result")
|
||||||
|
|
||||||
def getOrRefreshToken(self, accountId, updateType, url, clients, machineId):
|
def getOrRefreshToken(self, accountId, updateType, url, clients, machineId, airflowLogContext, requestParamsJson, assignedProxyUrl):
|
||||||
"""
|
"""
|
||||||
Parameters:
|
Parameters:
|
||||||
- accountId
|
- accountId
|
||||||
@ -139,12 +157,15 @@ class Client(pangramia.yt.management.YTManagementService.Client, Iface):
|
|||||||
- url
|
- url
|
||||||
- clients
|
- clients
|
||||||
- machineId
|
- machineId
|
||||||
|
- airflowLogContext
|
||||||
|
- requestParamsJson
|
||||||
|
- assignedProxyUrl
|
||||||
|
|
||||||
"""
|
"""
|
||||||
self.send_getOrRefreshToken(accountId, updateType, url, clients, machineId)
|
self.send_getOrRefreshToken(accountId, updateType, url, clients, machineId, airflowLogContext, requestParamsJson, assignedProxyUrl)
|
||||||
return self.recv_getOrRefreshToken()
|
return self.recv_getOrRefreshToken()
|
||||||
|
|
||||||
def send_getOrRefreshToken(self, accountId, updateType, url, clients, machineId):
|
def send_getOrRefreshToken(self, accountId, updateType, url, clients, machineId, airflowLogContext, requestParamsJson, assignedProxyUrl):
|
||||||
self._oprot.writeMessageBegin('getOrRefreshToken', TMessageType.CALL, self._seqid)
|
self._oprot.writeMessageBegin('getOrRefreshToken', TMessageType.CALL, self._seqid)
|
||||||
args = getOrRefreshToken_args()
|
args = getOrRefreshToken_args()
|
||||||
args.accountId = accountId
|
args.accountId = accountId
|
||||||
@ -152,6 +173,9 @@ class Client(pangramia.yt.management.YTManagementService.Client, Iface):
|
|||||||
args.url = url
|
args.url = url
|
||||||
args.clients = clients
|
args.clients = clients
|
||||||
args.machineId = machineId
|
args.machineId = machineId
|
||||||
|
args.airflowLogContext = airflowLogContext
|
||||||
|
args.requestParamsJson = requestParamsJson
|
||||||
|
args.assignedProxyUrl = assignedProxyUrl
|
||||||
args.write(self._oprot)
|
args.write(self._oprot)
|
||||||
self._oprot.writeMessageEnd()
|
self._oprot.writeMessageEnd()
|
||||||
self._oprot.trans.flush()
|
self._oprot.trans.flush()
|
||||||
@ -293,6 +317,44 @@ class Client(pangramia.yt.management.YTManagementService.Client, Iface):
|
|||||||
raise result.userExp
|
raise result.userExp
|
||||||
raise TApplicationException(TApplicationException.MISSING_RESULT, "reportState failed: unknown result")
|
raise TApplicationException(TApplicationException.MISSING_RESULT, "reportState failed: unknown result")
|
||||||
|
|
||||||
|
def getInfoJsonDirect(self, url, clients):
|
||||||
|
"""
|
||||||
|
Parameters:
|
||||||
|
- url
|
||||||
|
- clients
|
||||||
|
|
||||||
|
"""
|
||||||
|
self.send_getInfoJsonDirect(url, clients)
|
||||||
|
return self.recv_getInfoJsonDirect()
|
||||||
|
|
||||||
|
def send_getInfoJsonDirect(self, url, clients):
|
||||||
|
self._oprot.writeMessageBegin('getInfoJsonDirect', TMessageType.CALL, self._seqid)
|
||||||
|
args = getInfoJsonDirect_args()
|
||||||
|
args.url = url
|
||||||
|
args.clients = clients
|
||||||
|
args.write(self._oprot)
|
||||||
|
self._oprot.writeMessageEnd()
|
||||||
|
self._oprot.trans.flush()
|
||||||
|
|
||||||
|
def recv_getInfoJsonDirect(self):
|
||||||
|
iprot = self._iprot
|
||||||
|
(fname, mtype, rseqid) = iprot.readMessageBegin()
|
||||||
|
if mtype == TMessageType.EXCEPTION:
|
||||||
|
x = TApplicationException()
|
||||||
|
x.read(iprot)
|
||||||
|
iprot.readMessageEnd()
|
||||||
|
raise x
|
||||||
|
result = getInfoJsonDirect_result()
|
||||||
|
result.read(iprot)
|
||||||
|
iprot.readMessageEnd()
|
||||||
|
if result.success is not None:
|
||||||
|
return result.success
|
||||||
|
if result.serviceExp is not None:
|
||||||
|
raise result.serviceExp
|
||||||
|
if result.userExp is not None:
|
||||||
|
raise result.userExp
|
||||||
|
raise TApplicationException(TApplicationException.MISSING_RESULT, "getInfoJsonDirect failed: unknown result")
|
||||||
|
|
||||||
|
|
||||||
class Processor(pangramia.yt.management.YTManagementService.Processor, Iface, TProcessor):
|
class Processor(pangramia.yt.management.YTManagementService.Processor, Iface, TProcessor):
|
||||||
def __init__(self, handler):
|
def __init__(self, handler):
|
||||||
@ -302,6 +364,7 @@ class Processor(pangramia.yt.management.YTManagementService.Processor, Iface, TP
|
|||||||
self._processMap["getLatestToken"] = Processor.process_getLatestToken
|
self._processMap["getLatestToken"] = Processor.process_getLatestToken
|
||||||
self._processMap["refreshToken"] = Processor.process_refreshToken
|
self._processMap["refreshToken"] = Processor.process_refreshToken
|
||||||
self._processMap["reportState"] = Processor.process_reportState
|
self._processMap["reportState"] = Processor.process_reportState
|
||||||
|
self._processMap["getInfoJsonDirect"] = Processor.process_getInfoJsonDirect
|
||||||
self._on_message_begin = None
|
self._on_message_begin = None
|
||||||
|
|
||||||
def on_message_begin(self, func):
|
def on_message_begin(self, func):
|
||||||
@ -330,7 +393,7 @@ class Processor(pangramia.yt.management.YTManagementService.Processor, Iface, TP
|
|||||||
iprot.readMessageEnd()
|
iprot.readMessageEnd()
|
||||||
result = getOrRefreshTokenWithReport_result()
|
result = getOrRefreshTokenWithReport_result()
|
||||||
try:
|
try:
|
||||||
result.success = self._handler.getOrRefreshTokenWithReport(args.accountId, args.oldUrl, args.status, args.details, args.jobId, args.updateType, args.url, args.clients)
|
result.success = self._handler.getOrRefreshTokenWithReport(args.accountId, args.oldUrl, args.status, args.details, args.jobId, args.updateType, args.url, args.clients, args.airflowLogContext, args.requestParamsJson)
|
||||||
msg_type = TMessageType.REPLY
|
msg_type = TMessageType.REPLY
|
||||||
except TTransport.TTransportException:
|
except TTransport.TTransportException:
|
||||||
raise
|
raise
|
||||||
@ -359,7 +422,7 @@ class Processor(pangramia.yt.management.YTManagementService.Processor, Iface, TP
|
|||||||
iprot.readMessageEnd()
|
iprot.readMessageEnd()
|
||||||
result = getOrRefreshToken_result()
|
result = getOrRefreshToken_result()
|
||||||
try:
|
try:
|
||||||
result.success = self._handler.getOrRefreshToken(args.accountId, args.updateType, args.url, args.clients, args.machineId)
|
result.success = self._handler.getOrRefreshToken(args.accountId, args.updateType, args.url, args.clients, args.machineId, args.airflowLogContext, args.requestParamsJson, args.assignedProxyUrl)
|
||||||
msg_type = TMessageType.REPLY
|
msg_type = TMessageType.REPLY
|
||||||
except TTransport.TTransportException:
|
except TTransport.TTransportException:
|
||||||
raise
|
raise
|
||||||
@ -469,6 +532,35 @@ class Processor(pangramia.yt.management.YTManagementService.Processor, Iface, TP
|
|||||||
oprot.writeMessageEnd()
|
oprot.writeMessageEnd()
|
||||||
oprot.trans.flush()
|
oprot.trans.flush()
|
||||||
|
|
||||||
|
def process_getInfoJsonDirect(self, seqid, iprot, oprot):
|
||||||
|
args = getInfoJsonDirect_args()
|
||||||
|
args.read(iprot)
|
||||||
|
iprot.readMessageEnd()
|
||||||
|
result = getInfoJsonDirect_result()
|
||||||
|
try:
|
||||||
|
result.success = self._handler.getInfoJsonDirect(args.url, args.clients)
|
||||||
|
msg_type = TMessageType.REPLY
|
||||||
|
except TTransport.TTransportException:
|
||||||
|
raise
|
||||||
|
except pangramia.yt.exceptions.ttypes.PBServiceException as serviceExp:
|
||||||
|
msg_type = TMessageType.REPLY
|
||||||
|
result.serviceExp = serviceExp
|
||||||
|
except pangramia.yt.exceptions.ttypes.PBUserException as userExp:
|
||||||
|
msg_type = TMessageType.REPLY
|
||||||
|
result.userExp = userExp
|
||||||
|
except TApplicationException as ex:
|
||||||
|
logging.exception('TApplication exception in handler')
|
||||||
|
msg_type = TMessageType.EXCEPTION
|
||||||
|
result = ex
|
||||||
|
except Exception:
|
||||||
|
logging.exception('Unexpected exception in handler')
|
||||||
|
msg_type = TMessageType.EXCEPTION
|
||||||
|
result = TApplicationException(TApplicationException.INTERNAL_ERROR, 'Internal error')
|
||||||
|
oprot.writeMessageBegin("getInfoJsonDirect", msg_type, seqid)
|
||||||
|
result.write(oprot)
|
||||||
|
oprot.writeMessageEnd()
|
||||||
|
oprot.trans.flush()
|
||||||
|
|
||||||
# HELPER FUNCTIONS AND STRUCTURES
|
# HELPER FUNCTIONS AND STRUCTURES
|
||||||
|
|
||||||
|
|
||||||
@ -483,11 +575,13 @@ class getOrRefreshTokenWithReport_args(object):
|
|||||||
- updateType
|
- updateType
|
||||||
- url
|
- url
|
||||||
- clients
|
- clients
|
||||||
|
- airflowLogContext
|
||||||
|
- requestParamsJson
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
def __init__(self, accountId=None, oldUrl=None, status=None, details=None, jobId=None, updateType= 6, url=None, clients=None,):
|
def __init__(self, accountId=None, oldUrl=None, status=None, details=None, jobId=None, updateType= 6, url=None, clients=None, airflowLogContext=None, requestParamsJson=None,):
|
||||||
self.accountId = accountId
|
self.accountId = accountId
|
||||||
self.oldUrl = oldUrl
|
self.oldUrl = oldUrl
|
||||||
self.status = status
|
self.status = status
|
||||||
@ -496,6 +590,8 @@ class getOrRefreshTokenWithReport_args(object):
|
|||||||
self.updateType = updateType
|
self.updateType = updateType
|
||||||
self.url = url
|
self.url = url
|
||||||
self.clients = clients
|
self.clients = clients
|
||||||
|
self.airflowLogContext = airflowLogContext
|
||||||
|
self.requestParamsJson = requestParamsJson
|
||||||
|
|
||||||
def read(self, iprot):
|
def read(self, iprot):
|
||||||
if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None:
|
if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None:
|
||||||
@ -546,6 +642,17 @@ class getOrRefreshTokenWithReport_args(object):
|
|||||||
self.clients = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString()
|
self.clients = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString()
|
||||||
else:
|
else:
|
||||||
iprot.skip(ftype)
|
iprot.skip(ftype)
|
||||||
|
elif fid == 9:
|
||||||
|
if ftype == TType.STRUCT:
|
||||||
|
self.airflowLogContext = pangramia.yt.common.ttypes.AirflowLogContext()
|
||||||
|
self.airflowLogContext.read(iprot)
|
||||||
|
else:
|
||||||
|
iprot.skip(ftype)
|
||||||
|
elif fid == 10:
|
||||||
|
if ftype == TType.STRING:
|
||||||
|
self.requestParamsJson = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString()
|
||||||
|
else:
|
||||||
|
iprot.skip(ftype)
|
||||||
else:
|
else:
|
||||||
iprot.skip(ftype)
|
iprot.skip(ftype)
|
||||||
iprot.readFieldEnd()
|
iprot.readFieldEnd()
|
||||||
@ -588,6 +695,14 @@ class getOrRefreshTokenWithReport_args(object):
|
|||||||
oprot.writeFieldBegin('clients', TType.STRING, 8)
|
oprot.writeFieldBegin('clients', TType.STRING, 8)
|
||||||
oprot.writeString(self.clients.encode('utf-8') if sys.version_info[0] == 2 else self.clients)
|
oprot.writeString(self.clients.encode('utf-8') if sys.version_info[0] == 2 else self.clients)
|
||||||
oprot.writeFieldEnd()
|
oprot.writeFieldEnd()
|
||||||
|
if self.airflowLogContext is not None:
|
||||||
|
oprot.writeFieldBegin('airflowLogContext', TType.STRUCT, 9)
|
||||||
|
self.airflowLogContext.write(oprot)
|
||||||
|
oprot.writeFieldEnd()
|
||||||
|
if self.requestParamsJson is not None:
|
||||||
|
oprot.writeFieldBegin('requestParamsJson', TType.STRING, 10)
|
||||||
|
oprot.writeString(self.requestParamsJson.encode('utf-8') if sys.version_info[0] == 2 else self.requestParamsJson)
|
||||||
|
oprot.writeFieldEnd()
|
||||||
oprot.writeFieldStop()
|
oprot.writeFieldStop()
|
||||||
oprot.writeStructEnd()
|
oprot.writeStructEnd()
|
||||||
|
|
||||||
@ -615,6 +730,8 @@ getOrRefreshTokenWithReport_args.thrift_spec = (
|
|||||||
(6, TType.I32, 'updateType', None, 6, ), # 6
|
(6, TType.I32, 'updateType', None, 6, ), # 6
|
||||||
(7, TType.STRING, 'url', 'UTF8', None, ), # 7
|
(7, TType.STRING, 'url', 'UTF8', None, ), # 7
|
||||||
(8, TType.STRING, 'clients', 'UTF8', None, ), # 8
|
(8, TType.STRING, 'clients', 'UTF8', None, ), # 8
|
||||||
|
(9, TType.STRUCT, 'airflowLogContext', [pangramia.yt.common.ttypes.AirflowLogContext, None], None, ), # 9
|
||||||
|
(10, TType.STRING, 'requestParamsJson', 'UTF8', None, ), # 10
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -712,16 +829,22 @@ class getOrRefreshToken_args(object):
|
|||||||
- url
|
- url
|
||||||
- clients
|
- clients
|
||||||
- machineId
|
- machineId
|
||||||
|
- airflowLogContext
|
||||||
|
- requestParamsJson
|
||||||
|
- assignedProxyUrl
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
def __init__(self, accountId=None, updateType= 6, url=None, clients=None, machineId=None,):
|
def __init__(self, accountId=None, updateType= 6, url=None, clients=None, machineId=None, airflowLogContext=None, requestParamsJson=None, assignedProxyUrl=None,):
|
||||||
self.accountId = accountId
|
self.accountId = accountId
|
||||||
self.updateType = updateType
|
self.updateType = updateType
|
||||||
self.url = url
|
self.url = url
|
||||||
self.clients = clients
|
self.clients = clients
|
||||||
self.machineId = machineId
|
self.machineId = machineId
|
||||||
|
self.airflowLogContext = airflowLogContext
|
||||||
|
self.requestParamsJson = requestParamsJson
|
||||||
|
self.assignedProxyUrl = assignedProxyUrl
|
||||||
|
|
||||||
def read(self, iprot):
|
def read(self, iprot):
|
||||||
if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None:
|
if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None:
|
||||||
@ -757,6 +880,22 @@ class getOrRefreshToken_args(object):
|
|||||||
self.machineId = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString()
|
self.machineId = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString()
|
||||||
else:
|
else:
|
||||||
iprot.skip(ftype)
|
iprot.skip(ftype)
|
||||||
|
elif fid == 6:
|
||||||
|
if ftype == TType.STRUCT:
|
||||||
|
self.airflowLogContext = pangramia.yt.common.ttypes.AirflowLogContext()
|
||||||
|
self.airflowLogContext.read(iprot)
|
||||||
|
else:
|
||||||
|
iprot.skip(ftype)
|
||||||
|
elif fid == 7:
|
||||||
|
if ftype == TType.STRING:
|
||||||
|
self.requestParamsJson = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString()
|
||||||
|
else:
|
||||||
|
iprot.skip(ftype)
|
||||||
|
elif fid == 8:
|
||||||
|
if ftype == TType.STRING:
|
||||||
|
self.assignedProxyUrl = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString()
|
||||||
|
else:
|
||||||
|
iprot.skip(ftype)
|
||||||
else:
|
else:
|
||||||
iprot.skip(ftype)
|
iprot.skip(ftype)
|
||||||
iprot.readFieldEnd()
|
iprot.readFieldEnd()
|
||||||
@ -787,6 +926,18 @@ class getOrRefreshToken_args(object):
|
|||||||
oprot.writeFieldBegin('machineId', TType.STRING, 5)
|
oprot.writeFieldBegin('machineId', TType.STRING, 5)
|
||||||
oprot.writeString(self.machineId.encode('utf-8') if sys.version_info[0] == 2 else self.machineId)
|
oprot.writeString(self.machineId.encode('utf-8') if sys.version_info[0] == 2 else self.machineId)
|
||||||
oprot.writeFieldEnd()
|
oprot.writeFieldEnd()
|
||||||
|
if self.airflowLogContext is not None:
|
||||||
|
oprot.writeFieldBegin('airflowLogContext', TType.STRUCT, 6)
|
||||||
|
self.airflowLogContext.write(oprot)
|
||||||
|
oprot.writeFieldEnd()
|
||||||
|
if self.requestParamsJson is not None:
|
||||||
|
oprot.writeFieldBegin('requestParamsJson', TType.STRING, 7)
|
||||||
|
oprot.writeString(self.requestParamsJson.encode('utf-8') if sys.version_info[0] == 2 else self.requestParamsJson)
|
||||||
|
oprot.writeFieldEnd()
|
||||||
|
if self.assignedProxyUrl is not None:
|
||||||
|
oprot.writeFieldBegin('assignedProxyUrl', TType.STRING, 8)
|
||||||
|
oprot.writeString(self.assignedProxyUrl.encode('utf-8') if sys.version_info[0] == 2 else self.assignedProxyUrl)
|
||||||
|
oprot.writeFieldEnd()
|
||||||
oprot.writeFieldStop()
|
oprot.writeFieldStop()
|
||||||
oprot.writeStructEnd()
|
oprot.writeStructEnd()
|
||||||
|
|
||||||
@ -811,6 +962,9 @@ getOrRefreshToken_args.thrift_spec = (
|
|||||||
(3, TType.STRING, 'url', 'UTF8', None, ), # 3
|
(3, TType.STRING, 'url', 'UTF8', None, ), # 3
|
||||||
(4, TType.STRING, 'clients', 'UTF8', None, ), # 4
|
(4, TType.STRING, 'clients', 'UTF8', None, ), # 4
|
||||||
(5, TType.STRING, 'machineId', 'UTF8', None, ), # 5
|
(5, TType.STRING, 'machineId', 'UTF8', None, ), # 5
|
||||||
|
(6, TType.STRUCT, 'airflowLogContext', [pangramia.yt.common.ttypes.AirflowLogContext, None], None, ), # 6
|
||||||
|
(7, TType.STRING, 'requestParamsJson', 'UTF8', None, ), # 7
|
||||||
|
(8, TType.STRING, 'assignedProxyUrl', 'UTF8', None, ), # 8
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -1401,5 +1555,165 @@ reportState_result.thrift_spec = (
|
|||||||
(1, TType.STRUCT, 'serviceExp', [pangramia.yt.exceptions.ttypes.PBServiceException, None], None, ), # 1
|
(1, TType.STRUCT, 'serviceExp', [pangramia.yt.exceptions.ttypes.PBServiceException, None], None, ), # 1
|
||||||
(2, TType.STRUCT, 'userExp', [pangramia.yt.exceptions.ttypes.PBUserException, None], None, ), # 2
|
(2, TType.STRUCT, 'userExp', [pangramia.yt.exceptions.ttypes.PBUserException, None], None, ), # 2
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class getInfoJsonDirect_args(object):
|
||||||
|
"""
|
||||||
|
Attributes:
|
||||||
|
- url
|
||||||
|
- clients
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def __init__(self, url=None, clients=None,):
|
||||||
|
self.url = url
|
||||||
|
self.clients = clients
|
||||||
|
|
||||||
|
def read(self, iprot):
|
||||||
|
if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None:
|
||||||
|
iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec])
|
||||||
|
return
|
||||||
|
iprot.readStructBegin()
|
||||||
|
while True:
|
||||||
|
(fname, ftype, fid) = iprot.readFieldBegin()
|
||||||
|
if ftype == TType.STOP:
|
||||||
|
break
|
||||||
|
if fid == 1:
|
||||||
|
if ftype == TType.STRING:
|
||||||
|
self.url = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString()
|
||||||
|
else:
|
||||||
|
iprot.skip(ftype)
|
||||||
|
elif fid == 2:
|
||||||
|
if ftype == TType.STRING:
|
||||||
|
self.clients = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString()
|
||||||
|
else:
|
||||||
|
iprot.skip(ftype)
|
||||||
|
else:
|
||||||
|
iprot.skip(ftype)
|
||||||
|
iprot.readFieldEnd()
|
||||||
|
iprot.readStructEnd()
|
||||||
|
|
||||||
|
def write(self, oprot):
|
||||||
|
if oprot._fast_encode is not None and self.thrift_spec is not None:
|
||||||
|
oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec]))
|
||||||
|
return
|
||||||
|
oprot.writeStructBegin('getInfoJsonDirect_args')
|
||||||
|
if self.url is not None:
|
||||||
|
oprot.writeFieldBegin('url', TType.STRING, 1)
|
||||||
|
oprot.writeString(self.url.encode('utf-8') if sys.version_info[0] == 2 else self.url)
|
||||||
|
oprot.writeFieldEnd()
|
||||||
|
if self.clients is not None:
|
||||||
|
oprot.writeFieldBegin('clients', TType.STRING, 2)
|
||||||
|
oprot.writeString(self.clients.encode('utf-8') if sys.version_info[0] == 2 else self.clients)
|
||||||
|
oprot.writeFieldEnd()
|
||||||
|
oprot.writeFieldStop()
|
||||||
|
oprot.writeStructEnd()
|
||||||
|
|
||||||
|
def validate(self):
|
||||||
|
return
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
L = ['%s=%r' % (key, value)
|
||||||
|
for key, value in self.__dict__.items()]
|
||||||
|
return '%s(%s)' % (self.__class__.__name__, ', '.join(L))
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
return isinstance(other, self.__class__) and self.__dict__ == other.__dict__
|
||||||
|
|
||||||
|
def __ne__(self, other):
|
||||||
|
return not (self == other)
|
||||||
|
all_structs.append(getInfoJsonDirect_args)
|
||||||
|
getInfoJsonDirect_args.thrift_spec = (
|
||||||
|
None, # 0
|
||||||
|
(1, TType.STRING, 'url', 'UTF8', None, ), # 1
|
||||||
|
(2, TType.STRING, 'clients', 'UTF8', None, ), # 2
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class getInfoJsonDirect_result(object):
|
||||||
|
"""
|
||||||
|
Attributes:
|
||||||
|
- success
|
||||||
|
- serviceExp
|
||||||
|
- userExp
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def __init__(self, success=None, serviceExp=None, userExp=None,):
|
||||||
|
self.success = success
|
||||||
|
self.serviceExp = serviceExp
|
||||||
|
self.userExp = userExp
|
||||||
|
|
||||||
|
def read(self, iprot):
|
||||||
|
if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None:
|
||||||
|
iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec])
|
||||||
|
return
|
||||||
|
iprot.readStructBegin()
|
||||||
|
while True:
|
||||||
|
(fname, ftype, fid) = iprot.readFieldBegin()
|
||||||
|
if ftype == TType.STOP:
|
||||||
|
break
|
||||||
|
if fid == 0:
|
||||||
|
if ftype == TType.STRUCT:
|
||||||
|
self.success = pangramia.yt.common.ttypes.JobTokenData()
|
||||||
|
self.success.read(iprot)
|
||||||
|
else:
|
||||||
|
iprot.skip(ftype)
|
||||||
|
elif fid == 1:
|
||||||
|
if ftype == TType.STRUCT:
|
||||||
|
self.serviceExp = pangramia.yt.exceptions.ttypes.PBServiceException.read(iprot)
|
||||||
|
else:
|
||||||
|
iprot.skip(ftype)
|
||||||
|
elif fid == 2:
|
||||||
|
if ftype == TType.STRUCT:
|
||||||
|
self.userExp = pangramia.yt.exceptions.ttypes.PBUserException.read(iprot)
|
||||||
|
else:
|
||||||
|
iprot.skip(ftype)
|
||||||
|
else:
|
||||||
|
iprot.skip(ftype)
|
||||||
|
iprot.readFieldEnd()
|
||||||
|
iprot.readStructEnd()
|
||||||
|
|
||||||
|
def write(self, oprot):
|
||||||
|
if oprot._fast_encode is not None and self.thrift_spec is not None:
|
||||||
|
oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec]))
|
||||||
|
return
|
||||||
|
oprot.writeStructBegin('getInfoJsonDirect_result')
|
||||||
|
if self.success is not None:
|
||||||
|
oprot.writeFieldBegin('success', TType.STRUCT, 0)
|
||||||
|
self.success.write(oprot)
|
||||||
|
oprot.writeFieldEnd()
|
||||||
|
if self.serviceExp is not None:
|
||||||
|
oprot.writeFieldBegin('serviceExp', TType.STRUCT, 1)
|
||||||
|
self.serviceExp.write(oprot)
|
||||||
|
oprot.writeFieldEnd()
|
||||||
|
if self.userExp is not None:
|
||||||
|
oprot.writeFieldBegin('userExp', TType.STRUCT, 2)
|
||||||
|
self.userExp.write(oprot)
|
||||||
|
oprot.writeFieldEnd()
|
||||||
|
oprot.writeFieldStop()
|
||||||
|
oprot.writeStructEnd()
|
||||||
|
|
||||||
|
def validate(self):
|
||||||
|
return
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
L = ['%s=%r' % (key, value)
|
||||||
|
for key, value in self.__dict__.items()]
|
||||||
|
return '%s(%s)' % (self.__class__.__name__, ', '.join(L))
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
return isinstance(other, self.__class__) and self.__dict__ == other.__dict__
|
||||||
|
|
||||||
|
def __ne__(self, other):
|
||||||
|
return not (self == other)
|
||||||
|
all_structs.append(getInfoJsonDirect_result)
|
||||||
|
getInfoJsonDirect_result.thrift_spec = (
|
||||||
|
(0, TType.STRUCT, 'success', [pangramia.yt.common.ttypes.JobTokenData, None], None, ), # 0
|
||||||
|
(1, TType.STRUCT, 'serviceExp', [pangramia.yt.exceptions.ttypes.PBServiceException, None], None, ), # 1
|
||||||
|
(2, TType.STRUCT, 'userExp', [pangramia.yt.exceptions.ttypes.PBUserException, None], None, ), # 2
|
||||||
|
)
|
||||||
fix_spec(all_structs)
|
fix_spec(all_structs)
|
||||||
del all_structs
|
del all_structs
|
||||||
|
|||||||
@ -7,7 +7,7 @@
|
|||||||
<groupId>com.pangramia.yt</groupId>
|
<groupId>com.pangramia.yt</groupId>
|
||||||
<artifactId>thrift-services</artifactId>
|
<artifactId>thrift-services</artifactId>
|
||||||
<!-- This version is for the Thrift API definition. The installable package will use this version. -->
|
<!-- This version is for the Thrift API definition. The installable package will use this version. -->
|
||||||
<version>3.6.0-SNAPSHOT</version>
|
<version>5.5.0-SNAPSHOT</version>
|
||||||
|
|
||||||
<properties>
|
<properties>
|
||||||
<thrift.version>0.16.0</thrift.version>
|
<thrift.version>0.16.0</thrift.version>
|
||||||
|
|||||||
19
thrift_model/services/base_service.thrift
Normal file
19
thrift_model/services/base_service.thrift
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
namespace py pangramia.base_service
|
||||||
|
namespace java com.pangramia.base_service
|
||||||
|
|
||||||
|
include "../data/common.thrift"
|
||||||
|
include "../data/exceptions.thrift"
|
||||||
|
|
||||||
|
service BaseService {
|
||||||
|
// Common health check method
|
||||||
|
bool ping() throws (1: exceptions.PBServiceException serviceExp,
|
||||||
|
2: exceptions.PBUserException userExp),
|
||||||
|
|
||||||
|
// Common error reporting
|
||||||
|
bool reportError(1: string message,
|
||||||
|
2: map<string, string> details) throws (1: exceptions.PBServiceException serviceExp,
|
||||||
|
2: exceptions.PBUserException userExp)
|
||||||
|
|
||||||
|
// Add this to fix AsyncProcessor issues
|
||||||
|
oneway void shutdown()
|
||||||
|
}
|
||||||
63
thrift_model/services/yt_admin_ops.thrift
Normal file
63
thrift_model/services/yt_admin_ops.thrift
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
namespace py pangramia.yt.admin_ops
|
||||||
|
namespace java com.pangramia.yt.admin_ops
|
||||||
|
|
||||||
|
include "../data/common.thrift"
|
||||||
|
include "../data/exceptions.thrift"
|
||||||
|
include "base_service.thrift"
|
||||||
|
|
||||||
|
// Proxy and Account management
|
||||||
|
service YTAccountsOpService extends base_service.BaseService {
|
||||||
|
|
||||||
|
// AccountPairs
|
||||||
|
bool addAccountPair(1: string accountId, 2: string proxyId, 3: string machineId, 4: common.ProxyData proxyData, 5: optional common.AccountData accountData)
|
||||||
|
throws (1: exceptions.PBServiceException serviceExp,
|
||||||
|
2: exceptions.PBUserException userExp),
|
||||||
|
|
||||||
|
common.AccountPairWithState getPair(1: string machineId)
|
||||||
|
throws (1: exceptions.PBServiceException serviceExp,
|
||||||
|
2: exceptions.PBUserException userExp),
|
||||||
|
|
||||||
|
bool pair(1: string accountId, 2: string proxyId, 3:string machineId)
|
||||||
|
throws (1: exceptions.PBServiceException serviceExp,
|
||||||
|
2: exceptions.PBUserException userExp),
|
||||||
|
|
||||||
|
bool unpair(1: string accountId, 2: string proxyId, 3:string machineId)
|
||||||
|
throws (1: exceptions.PBServiceException serviceExp,
|
||||||
|
2: exceptions.PBUserException userExp),
|
||||||
|
|
||||||
|
list<common.AccountPairWithState> listAccountPairs(1: optional common.AccountPairState filter) throws (1: exceptions.PBServiceException serviceExp,
|
||||||
|
2: exceptions.PBUserException userExp),
|
||||||
|
|
||||||
|
// ManageAccounts
|
||||||
|
bool addAccount(1: string accountId, 2: optional common.AccountData accountData) throws (1: exceptions.PBServiceException serviceExp,
|
||||||
|
2: exceptions.PBUserException userExp),
|
||||||
|
|
||||||
|
|
||||||
|
bool suspendAccount(1: string accountId) throws (1: exceptions.PBServiceException serviceExp,
|
||||||
|
2: exceptions.PBUserException userExp),
|
||||||
|
|
||||||
|
bool resumeAccount(1: string accountId) throws (1: exceptions.PBServiceException serviceExp,
|
||||||
|
2: exceptions.PBUserException userExp),
|
||||||
|
|
||||||
|
bool removeAccount(1: string accountId) throws (1: exceptions.PBServiceException serviceExp,
|
||||||
|
2: exceptions.PBUserException userExp),
|
||||||
|
|
||||||
|
list<string> listActiveAccounts() throws (1: exceptions.PBServiceException serviceExp,
|
||||||
|
2: exceptions.PBUserException userExp),
|
||||||
|
|
||||||
|
// ManageProxy
|
||||||
|
bool addProxy(1: string proxyId, 2: common.ProxyData proxyData) throws (1: exceptions.PBServiceException serviceExp,
|
||||||
|
2: exceptions.PBUserException userExp),
|
||||||
|
|
||||||
|
bool suspendProxy(1: string proxyId) throws (1: exceptions.PBServiceException serviceExp,
|
||||||
|
2: exceptions.PBUserException userExp),
|
||||||
|
|
||||||
|
bool resumeProxy(1: string proxyId) throws (1: exceptions.PBServiceException serviceExp,
|
||||||
|
2: exceptions.PBUserException userExp),
|
||||||
|
|
||||||
|
bool removeProxy(1: string proxyId) throws (1: exceptions.PBServiceException serviceExp,
|
||||||
|
2: exceptions.PBUserException userExp),
|
||||||
|
|
||||||
|
list<string> listActiveProxies() throws (1: exceptions.PBServiceException serviceExp,
|
||||||
|
2: exceptions.PBUserException userExp),
|
||||||
|
}
|
||||||
27
thrift_model/services/yt_management.thrift
Normal file
27
thrift_model/services/yt_management.thrift
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
namespace py pangramia.yt.management
|
||||||
|
namespace java com.pangramia.yt.management
|
||||||
|
|
||||||
|
include "../data/common.thrift"
|
||||||
|
include "../data/exceptions.thrift"
|
||||||
|
include "base_service.thrift"
|
||||||
|
|
||||||
|
// Service for managing the state of shared resources like proxies and accounts.
|
||||||
|
// This service is intended to be run as a single, authoritative instance.
|
||||||
|
service YTManagementService extends base_service.BaseService {
|
||||||
|
|
||||||
|
// --- Proxy Management Methods ---
|
||||||
|
list<common.ProxyStatus> getProxyStatus(1: optional string serverIdentity) throws (1: exceptions.PBServiceException serviceExp, 2: exceptions.PBUserException userExp),
|
||||||
|
bool banProxy(1: string proxyUrl, 2: string serverIdentity) throws (1: exceptions.PBServiceException serviceExp, 2: exceptions.PBUserException userExp),
|
||||||
|
bool unbanProxy(1: string proxyUrl, 2: string serverIdentity) throws (1: exceptions.PBServiceException serviceExp, 2: exceptions.PBUserException userExp),
|
||||||
|
bool resetAllProxyStatuses(1: string serverIdentity) throws (1: exceptions.PBServiceException serviceExp, 2: exceptions.PBUserException userExp),
|
||||||
|
bool banAllProxies(1: string serverIdentity) throws (1: exceptions.PBServiceException serviceExp, 2: exceptions.PBUserException userExp),
|
||||||
|
bool deleteProxyFromRedis(1: string proxyUrl, 2: string serverIdentity) throws (1: exceptions.PBServiceException serviceExp, 2: exceptions.PBUserException userExp),
|
||||||
|
i32 deleteAllProxiesFromRedis(1: optional string serverIdentity) throws (1: exceptions.PBServiceException serviceExp, 2: exceptions.PBUserException userExp),
|
||||||
|
|
||||||
|
// --- Account Management Methods ---
|
||||||
|
list<common.AccountStatus> getAccountStatus(1: optional string accountId, 2: optional string accountPrefix) throws (1: exceptions.PBServiceException serviceExp, 2: exceptions.PBUserException userExp),
|
||||||
|
bool banAccount(1: string accountId, 2: optional string reason) throws (1: exceptions.PBServiceException serviceExp, 2: exceptions.PBUserException userExp),
|
||||||
|
bool unbanAccount(1: string accountId, 2: optional string reason) throws (1: exceptions.PBServiceException serviceExp, 2: exceptions.PBUserException userExp),
|
||||||
|
bool deleteAccountFromRedis(1: string accountId) throws (1: exceptions.PBServiceException serviceExp, 2: exceptions.PBUserException userExp),
|
||||||
|
i32 deleteAllAccountsFromRedis(1: optional string accountPrefix) throws (1: exceptions.PBServiceException serviceExp, 2: exceptions.PBUserException userExp)
|
||||||
|
}
|
||||||
50
thrift_model/services/yt_tokens_ops.thrift
Normal file
50
thrift_model/services/yt_tokens_ops.thrift
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
namespace py pangramia.yt.tokens_ops
|
||||||
|
namespace java com.pangramia.yt.tokens_ops
|
||||||
|
|
||||||
|
include "../data/common.thrift"
|
||||||
|
include "../data/exceptions.thrift"
|
||||||
|
include "yt_management.thrift"
|
||||||
|
|
||||||
|
// The unified service that combines token operations and management functions.
|
||||||
|
// The server implementation will decide which functions are active based on its role.
|
||||||
|
service YTTokenOpService extends yt_management.YTManagementService {
|
||||||
|
|
||||||
|
common.JobTokenData getOrRefreshTokenWithReport ( 1: string accountId,
|
||||||
|
2: string oldUrl,
|
||||||
|
3: common.JobState status,
|
||||||
|
4: optional string details,
|
||||||
|
5: optional string jobId,
|
||||||
|
6: optional common.TokenUpdateMode updateType = common.TokenUpdateMode.AUTO,
|
||||||
|
7: optional string url,
|
||||||
|
8: optional string clients,
|
||||||
|
9: optional common.AirflowLogContext airflowLogContext,
|
||||||
|
10: optional string requestParamsJson) throws (1: exceptions.PBServiceException serviceExp,
|
||||||
|
2: exceptions.PBUserException userExp)
|
||||||
|
|
||||||
|
common.JobTokenData getOrRefreshToken ( 1: string accountId,
|
||||||
|
2: optional common.TokenUpdateMode updateType = common.TokenUpdateMode.AUTO,
|
||||||
|
3: optional string url,
|
||||||
|
4: optional string clients,
|
||||||
|
5: optional string machineId,
|
||||||
|
6: optional common.AirflowLogContext airflowLogContext,
|
||||||
|
7: optional string requestParamsJson,
|
||||||
|
8: optional string assignedProxyUrl) throws (1: exceptions.PBServiceException serviceExp,
|
||||||
|
2: exceptions.PBUserException userExp)
|
||||||
|
|
||||||
|
common.JobTokenData getLatestToken (1: string accountId) throws (1: exceptions.PBServiceException serviceExp,
|
||||||
|
2: exceptions.PBUserException userExp),
|
||||||
|
common.JobTokenData refreshToken ( 1: string accountId,
|
||||||
|
2: optional common.TokenUpdateMode updateType = common.TokenUpdateMode.AUTO,
|
||||||
|
3: optional string url ) throws (1: exceptions.PBServiceException serviceExp,
|
||||||
|
2: exceptions.PBUserException userExp)
|
||||||
|
bool reportState( 1: string url,
|
||||||
|
2: common.JobState status,
|
||||||
|
3: optional string details,
|
||||||
|
4: optional string jobId) throws (1: exceptions.PBServiceException serviceExp,
|
||||||
|
2: exceptions.PBUserException userExp)
|
||||||
|
|
||||||
|
// New method for direct info.json generation, bypassing Node.js token generation.
|
||||||
|
common.JobTokenData getInfoJsonDirect(1: string url,
|
||||||
|
2: optional string clients) throws (1: exceptions.PBServiceException serviceExp,
|
||||||
|
2: exceptions.PBUserException userExp)
|
||||||
|
}
|
||||||
@ -110,11 +110,15 @@ def generate_group_vars(cluster_config, group_vars_dir):
|
|||||||
# Get master IP for Redis configuration
|
# Get master IP for Redis configuration
|
||||||
master_ip = list(cluster_config['master'].values())[0]['ip']
|
master_ip = list(cluster_config['master'].values())[0]['ip']
|
||||||
|
|
||||||
|
# Combine master and worker nodes to create a hostvars-like structure
|
||||||
|
all_nodes = {**cluster_config.get('master', {}), **cluster_config.get('workers', {})}
|
||||||
|
|
||||||
# Prepare data for YAML dump
|
# Prepare data for YAML dump
|
||||||
generated_data = {
|
generated_data = {
|
||||||
'master_host_ip': master_ip,
|
'master_host_ip': master_ip,
|
||||||
'redis_port': 52909,
|
'redis_port': 52909,
|
||||||
'external_access_ips': external_ips if external_ips else []
|
'external_access_ips': external_ips if external_ips else [],
|
||||||
|
'hostvars': all_nodes
|
||||||
}
|
}
|
||||||
generated_data.update(global_vars)
|
generated_data.update(global_vars)
|
||||||
|
|
||||||
|
|||||||
@ -1,10 +1,11 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
#
|
#
|
||||||
# Syncs the project directory to a remote "tower" host for deployment orchestration.
|
# Syncs the project directory to a remote "jump" host for deployment orchestration.
|
||||||
#
|
#
|
||||||
# This script is designed to be run from the root of the project directory.
|
# This script is designed to be run from the root of the project directory.
|
||||||
# It excludes generated files, local data, logs, and other non-essential files
|
# It syncs essential project files like source code, DAGs, and Ansible playbooks,
|
||||||
# to ensure a clean copy of the source code and configuration templates is synced.
|
# while excluding generated files, local data, logs, and other non-essential files
|
||||||
|
# to ensure a clean copy is deployed.
|
||||||
|
|
||||||
set -e # Exit immediately if a command exits with a non-zero status.
|
set -e # Exit immediately if a command exits with a non-zero status.
|
||||||
set -u # Treat unset variables as an error.
|
set -u # Treat unset variables as an error.
|
||||||
@ -13,9 +14,9 @@ set -u # Treat unset variables as an error.
|
|||||||
# IMPORTANT: Update these variables to match your environment.
|
# IMPORTANT: Update these variables to match your environment.
|
||||||
#
|
#
|
||||||
# The remote host to sync to (e.g., user@hostname)
|
# The remote host to sync to (e.g., user@hostname)
|
||||||
REMOTE_HOST="user@your-tower-host.com"
|
REMOTE_HOST="alex_p@af-jump"
|
||||||
# The destination path on the remote host
|
# The destination path on the remote host
|
||||||
REMOTE_PATH="/path/to/your/project"
|
REMOTE_PATH="/home/alex_p/yt-ops-services"
|
||||||
# The root directory of the project on the local machine.
|
# The root directory of the project on the local machine.
|
||||||
SOURCE_DIR="."
|
SOURCE_DIR="."
|
||||||
|
|
||||||
@ -46,8 +47,11 @@ EXCLUDE_OPTS=(
|
|||||||
"--exclude=airflow/configs/envoy.yaml"
|
"--exclude=airflow/configs/envoy.yaml"
|
||||||
"--exclude=airflow/configs/docker-compose.camoufox.yaml"
|
"--exclude=airflow/configs/docker-compose.camoufox.yaml"
|
||||||
"--exclude=airflow/configs/camoufox_endpoints.json"
|
"--exclude=airflow/configs/camoufox_endpoints.json"
|
||||||
|
"--exclude=cluster*.yml"
|
||||||
# Exclude local development notes
|
# Exclude local development notes
|
||||||
"--exclude=TODO-*.md"
|
"--exclude=TODO-*.md"
|
||||||
|
# Exclude user-specific tools
|
||||||
|
"--exclude=*aider*"
|
||||||
)
|
)
|
||||||
|
|
||||||
# The rsync command:
|
# The rsync command:
|
||||||
@ -55,7 +59,9 @@ EXCLUDE_OPTS=(
|
|||||||
# -v: verbose
|
# -v: verbose
|
||||||
# -z: compress file data during the transfer
|
# -z: compress file data during the transfer
|
||||||
# --delete: delete extraneous files from the destination directory
|
# --delete: delete extraneous files from the destination directory
|
||||||
rsync -avz --delete \
|
# --partial: keep partially transferred files
|
||||||
|
# --progress: show progress during transfer
|
||||||
|
rsync -avz --delete --partial --progress \
|
||||||
"${EXCLUDE_OPTS[@]}" \
|
"${EXCLUDE_OPTS[@]}" \
|
||||||
"$SOURCE_DIR/" \
|
"$SOURCE_DIR/" \
|
||||||
"$REMOTE_HOST:$REMOTE_PATH/"
|
"$REMOTE_HOST:$REMOTE_PATH/"
|
||||||
BIN
yt_ops_services/__pycache__/__init__.cpython-39.pyc
Normal file
BIN
yt_ops_services/__pycache__/__init__.cpython-39.pyc
Normal file
Binary file not shown.
BIN
yt_ops_services/__pycache__/client_utils.cpython-39.pyc
Normal file
BIN
yt_ops_services/__pycache__/client_utils.cpython-39.pyc
Normal file
Binary file not shown.
BIN
yt_ops_services/__pycache__/version.cpython-39.pyc
Normal file
BIN
yt_ops_services/__pycache__/version.cpython-39.pyc
Normal file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user