diff --git a/.vault_pass b/.vault_pass deleted file mode 100644 index 89a0cd2..0000000 --- a/.vault_pass +++ /dev/null @@ -1 +0,0 @@ -ytdlp-ops diff --git a/README.md b/README.md deleted file mode 100644 index e69f731..0000000 --- a/README.md +++ /dev/null @@ -1,80 +0,0 @@ -# Airflow/YT-DLP Cluster Deployment - -This repository contains Ansible playbooks and configuration files for deploying a distributed Airflow cluster with YT-DLP services. - -## Prerequisites - -1. Install Ansible on your control machine -2. Ensure SSH access to all target nodes -3. Set up your vault password in `.vault_pass` file - -## Initial Setup - -Generate the inventory and configuration files from your cluster definition: - -```bash -./tools/generate-inventory.py cluster.test.yml -``` - -**Note:** All Ansible commands should be run from the project root directory. - -## Full Deployment - -### Deploy entire cluster with proxies (recommended for new setups): - -```bash -ansible-playbook ansible/playbook-full-with-proxies.yml -``` - -### Deploy cluster without proxies: - -```bash -ansible-playbook ansible/playbook-full.yml -``` - -## Targeted Deployments - -### Deploy only to master node: - -```bash -ansible-playbook ansible/playbook-master.yml --limit="af-test" -``` - -### Deploy only to worker nodes: - -```bash -ansible-playbook ansible/playbook-worker.yml -``` - -## Deploy Specific Steps - -To start at a specific task (useful for debugging or partial deployments): - -```bash -ansible-playbook ansible/playbook-master.yml --limit="af-test" --start-at-task="Prepare Caddy asset extraction directory" -``` - -## Debug Deployments - -Run with dry-run and verbose output for debugging: - -```bash -ansible-playbook ansible/playbook-full.yml --check --diff -vv -``` - -## DAGs Only Deployment - -To update only DAG files and configurations: - -```bash -ansible-playbook ansible/playbook-dags.yml -``` - -## Vault Management - -All sensitive data is encrypted with Ansible Vault. The vault password should be stored in `.vault_pass` file in the project root. - -To edit vault files: -```bash -ansible-vault edit group_vars/all/vault.yml -``` diff --git a/VERSION b/VERSION deleted file mode 100644 index 12f67a4..0000000 --- a/VERSION +++ /dev/null @@ -1 +0,0 @@ -3.11.3-exp diff --git a/airflow/.dockerignore b/airflow/.dockerignore deleted file mode 100644 index 6037927..0000000 --- a/airflow/.dockerignore +++ /dev/null @@ -1,7 +0,0 @@ -redis-data -minio-data -postgres-data -logs -downloadfiles -addfiles -inputfiles diff --git a/airflow/Dockerfile b/airflow/Dockerfile deleted file mode 100644 index 87a13bd..0000000 --- a/airflow/Dockerfile +++ /dev/null @@ -1,163 +0,0 @@ -FROM apache/airflow:2.10.3 -ENV AIRFLOW_VERSION=2.10.3 - -WORKDIR /app - -# Install system dependencies -USER root -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - vim \ - mc \ - jq \ - build-essential \ - python3-dev \ - wget \ - tar \ - xz-utils \ - iputils-ping \ - curl \ - traceroute \ - tcpdump \ - unzip \ - git && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* /usr/share/man /usr/share/doc /usr/share/doc-base - -# Ensure the airflow user and group exist with the correct UID/GID and permissions. -# This is done early to allow `COPY --chown` to work correctly. -RUN if ! getent group airflow > /dev/null 2>&1; then \ - groupadd -g 50000 airflow; \ - fi && \ - if ! id -u airflow > /dev/null 2>&1; then \ - useradd -u 50000 -g 50000 -m -s /bin/bash airflow; \ - else \ - usermod -g 50000 airflow; \ - fi && \ - chown -R airflow:airflow /app && \ - chmod -R g+w /app - -# Download and install mc (MinIO client) -RUN wget https://dl.min.io/client/mc/release/linux-amd64/mc -O /usr/local/bin/mc && \ - chmod +x /usr/local/bin/mc - -# Install FFmpeg -RUN FFMPEG_URL="https://github.com/yt-dlp/FFmpeg-Builds/releases/download/latest/ffmpeg-master-latest-linux64-gpl.tar.xz" && \ - wget -qO /tmp/ffmpeg.tar.xz "$FFMPEG_URL" && \ - mkdir -p /opt/ffmpeg && \ - tar -xf /tmp/ffmpeg.tar.xz -C /opt/ffmpeg --strip-components=1 && \ - ln -sf /opt/ffmpeg/bin/ffmpeg /usr/local/bin/ffmpeg && \ - ln -sf /opt/ffmpeg/bin/ffprobe /usr/local/bin/ffprobe && \ - rm -rf /tmp/ffmpeg.tar.xz - -# Install s5cmd -RUN S5CMD_URL="https://github.com/peak/s5cmd/releases/download/v2.3.0/s5cmd_2.3.0_linux_amd64.deb" && \ - wget -qO /tmp/s5cmd.deb "$S5CMD_URL" && \ - dpkg -i /tmp/s5cmd.deb && \ - rm /tmp/s5cmd.deb - -# Install yt-dlp from master -# Temporarily rename pip to bypass the root check in the base image's pip wrapper, -# ensuring a system-wide installation. -RUN mv /usr/local/bin/pip /usr/local/bin/pip.orig && \ - python3 -m pip install --no-cache-dir -U pip hatchling wheel && \ - python3 -m pip install --no-cache-dir --force-reinstall "yt-dlp[default] @ https://github.com/yt-dlp/yt-dlp/archive/master.tar.gz" && \ - chmod a+x "$(which yt-dlp)" && \ - mv /usr/local/bin/pip.orig /usr/local/bin/pip - -# Install Deno -RUN curl -fsSL https://github.com/denoland/deno/releases/latest/download/deno-x86_64-unknown-linux-gnu.zip -o deno.zip && \ - unzip deno.zip && mv deno /usr/local/bin/ && rm deno.zip - -# Install aria2c and gost -RUN curl -fsSL https://raw.githubusercontent.com/P3TERX/aria2-builder/master/aria2-install.sh | bash - -# Install gost (direct download of binary) -RUN wget -q https://github.com/ginuerzh/gost/releases/download/v2.12.0/gost_2.12.0_linux_amd64.tar.gz && \ - tar -xzf gost_2.12.0_linux_amd64.tar.gz -C /usr/local/bin/ && \ - rm gost_2.12.0_linux_amd64.tar.gz - -# Verify installations -RUN ffmpeg -version && deno --version && yt-dlp --version && aria2c --version && gost -V && s5cmd version - -# Create version information files -RUN ( \ - echo "--- yt-dlp ---" && \ - yt-dlp --version && \ - echo "" && \ - echo "--- deno ---" && \ - deno --version && \ - echo "" && \ - echo "--- ffmpeg ---" && \ - ffmpeg -version | head -n 1 \ -) > VERSION-airflow-latest.txt && \ -cp VERSION-airflow-latest.txt VERSION-airflow-$(date +%Y%m%d-%H%M%S).txt - - -# Install base Airflow dependencies as root (system-wide) -# [FIX] Explicitly install a version of botocore compatible with Python 3.12 -# to fix a RecursionError when handling S3 remote logs. -# Temporarily rename pip to bypass the root check in the base image's pip wrapper. -RUN mv /usr/local/bin/pip /usr/local/bin/pip.orig && \ - python3 -m pip install --no-cache-dir \ - "apache-airflow==${AIRFLOW_VERSION}" \ - apache-airflow-providers-docker \ - apache-airflow-providers-http \ - apache-airflow-providers-amazon \ - "apache-airflow-providers-celery>=3.3.0" \ - apache-airflow-providers-redis \ - "botocore>=1.34.118" \ - psycopg2-binary \ - "gunicorn==20.1.0" \ - "python-ffmpeg==2.0.12" \ - "ffprobe3" \ - "python-dotenv" \ - "PyYAML" \ - "aria2p" \ - "s5cmdpy" && \ - mv /usr/local/bin/pip.orig /usr/local/bin/pip - -# --- Install the custom yt_ops_services package --- -# Copy all the necessary source code for the package. -# The deploy script ensures these files are in the build context. -COPY --chown=airflow:airflow setup.py ./ -COPY --chown=airflow:airflow VERSION ./ -COPY --chown=airflow:airflow yt_ops_services ./yt_ops_services/ -COPY --chown=airflow:airflow thrift_model ./thrift_model/ -COPY --chown=airflow:airflow pangramia ./pangramia/ - -# Copy the ytops-client tool and its executable -COPY --chown=airflow:airflow ytops_client ./ytops_client/ -COPY --chown=airflow:airflow bin/ytops-client /app/bin/ytops-client -RUN chmod +x /app/bin/ytops-client -ENV PATH="/app/bin:${PATH}" - -# Install dependencies for the ytops_client package, then install the package itself -# in editable mode. This makes the `yt_ops_services` and `ytops_client` modules -# available everywhere. -# Bypass the pip root check again. -RUN mv /usr/local/bin/pip /usr/local/bin/pip.orig && \ - python3 -m pip install --no-cache-dir -r ytops_client/requirements.txt && \ - python3 -m pip install --no-cache-dir -e . && \ - mv /usr/local/bin/pip.orig /usr/local/bin/pip - -# Ensure all files in /app, including the generated .egg-info directory, are owned by the airflow user. -RUN chown -R airflow:airflow /app - -# Copy token generator scripts and utils with correct permissions -# COPY --chown=airflow:airflow generate_tokens_direct.mjs ./ -# COPY --chown=airflow:airflow utils ./utils/ -# COPY --chown=airflow:airflow token_generator ./token_generator/ - -# Ensure the home directory and all its contents are owned by the airflow user before switching to it. -# This fixes permission issues that can occur if previous RUN commands created files in /home/airflow as root. -# We also make it world-writable to accommodate running the container with a different user ID, which can -# happen in some environments (e.g., OpenShift or with docker-compose user overrides). -RUN mkdir -p /home/airflow/.aws && chown -R airflow:airflow /home/airflow/.aws -RUN chown -R airflow:airflow /home/airflow && chmod -R 777 /home/airflow - -# Switch to airflow user for all subsequent operations -USER airflow - -# Expose bgutil plugin to worker path -ENV PYTHONPATH=/opt/bgutil-ytdlp-pot-provider/plugin:$PYTHONPATH diff --git a/airflow/Dockerfile.caddy b/airflow/Dockerfile.caddy deleted file mode 100644 index 5d37c33..0000000 --- a/airflow/Dockerfile.caddy +++ /dev/null @@ -1,14 +0,0 @@ -# Build the final Caddy image -FROM caddy:2-alpine - -# Copy the pre-compressed static assets from the build context. -# These assets are extracted from the main Airflow image by the Ansible playbook. -COPY caddy_build_assets/appbuilder /usr/share/caddy/static/appbuilder -COPY caddy_build_assets/dist /usr/share/caddy/static/dist - -# Copy the Caddyfile configuration. The build context is the project root, -# so the path is relative to that. -COPY configs/Caddyfile /etc/caddy/Caddyfile - -# Expose the port Caddy listens on -EXPOSE 8080 diff --git a/airflow/Dockerfile.old b/airflow/Dockerfile.old deleted file mode 100644 index 5b56f60..0000000 --- a/airflow/Dockerfile.old +++ /dev/null @@ -1,125 +0,0 @@ - - - -FROM apache/airflow:2.10.3 -ENV AIRFLOW_VERSION=2.10.3 - -WORKDIR /app - -# Install system dependencies -USER root -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - vim \ - mc \ - jq \ - build-essential \ - python3-dev \ - wget \ - tar \ - xz-utils \ - iputils-ping \ - curl \ - traceroute \ - tcpdump \ - unzip \ - git && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* /usr/share/man /usr/share/doc /usr/share/doc-base - -# Download and install mc (MinIO client) -RUN wget https://dl.min.io/client/mc/release/linux-amd64/mc -O /usr/local/bin/mc && \ - chmod +x /usr/local/bin/mc - -# Install FFmpeg -RUN FFMPEG_URL="https://github.com/yt-dlp/FFmpeg-Builds/releases/download/latest/ffmpeg-master-latest-linux64-gpl.tar.xz" && \ - wget -qO /tmp/ffmpeg.tar.xz "$FFMPEG_URL" && \ - mkdir -p /opt/ffmpeg && \ - tar -xf /tmp/ffmpeg.tar.xz -C /opt/ffmpeg --strip-components=1 && \ - ln -sf /opt/ffmpeg/bin/ffmpeg /usr/local/bin/ffmpeg && \ - ln -sf /opt/ffmpeg/bin/ffprobe /usr/local/bin/ffprobe && \ - rm -rf /tmp/ffmpeg.tar.xz - -# Install yt-dlp from master -RUN python3 -m pip install -U pip hatchling wheel && \ - python3 -m pip install --force-reinstall "yt-dlp[default] @ https://github.com/yt-dlp/yt-dlp/archive/master.tar.gz" - -# Install Deno -RUN curl -fsSL https://github.com/denoland/deno/releases/latest/download/deno-x86_64-unknown-linux-gnu.zip -o deno.zip && \ - unzip deno.zip && mv deno /usr/local/bin/ && rm deno.zip - -# Install aria2c and gost -RUN curl -fsSL https://raw.githubusercontent.com/P3TERX/aria2-builder/master/aria2-install.sh | bash - -# Install gost (direct download of binary) -RUN wget -q https://github.com/ginuerzh/gost/releases/download/v2.12.0/gost_2.12.0_linux_amd64.tar.gz && \ - tar -xzf gost_2.12.0_linux_amd64.tar.gz -C /usr/local/bin/ && \ - rm gost_2.12.0_linux_amd64.tar.gz - -# Verify installations -RUN ffmpeg -version && deno --version && yt-dlp --version && aria2c --version && gost -V - -# Check if airflow group exists, create it if it doesn't, then ensure proper setup -RUN if ! getent group airflow > /dev/null 2>&1; then \ - groupadd -g 1001 airflow; \ - fi && \ - # Check if airflow user exists and is in the airflow group - if id -u airflow > /dev/null 2>&1; then \ - usermod -a -G airflow airflow; \ - else \ - useradd -u 1003 -g 1001 -m -s /bin/bash airflow; \ - fi && \ - chown -R airflow:airflow /app && \ - chmod g+w /app - -# Install base Airflow dependencies -# [FIX] Explicitly install a version of botocore compatible with Python 3.12 -# to fix a RecursionError when handling S3 remote logs. -RUN pip install --no-cache-dir \ - "apache-airflow==${AIRFLOW_VERSION}" \ - apache-airflow-providers-docker \ - apache-airflow-providers-http \ - apache-airflow-providers-amazon \ - "botocore>=1.34.118" \ - psycopg2-binary \ - "gunicorn==20.1.0" \ - "python-ffmpeg==2.0.12" \ - "ffprobe3" \ - "python-dotenv" - -# Switch to airflow user for package installation -USER airflow - -# --- Install the custom yt_ops_services package --- -# Copy all the necessary source code for the package. -# The deploy script ensures these files are in the build context. -COPY --chown=airflow:airflow setup.py ./ -COPY --chown=airflow:airflow VERSION ./ -COPY --chown=airflow:airflow yt_ops_services ./yt_ops_services/ -COPY --chown=airflow:airflow thrift_model ./thrift_model/ -COPY --chown=airflow:airflow pangramia ./pangramia/ - -# Install the package in editable mode. This runs setup.py and installs all dependencies -# listed in `install_requires`, making the `yt_ops_services` module available everywhere. -RUN pip install --no-cache-dir -e . - -# Copy token generator scripts and utils with correct permissions -# COPY --chown=airflow:airflow generate_tokens_direct.mjs ./ -# COPY --chown=airflow:airflow utils ./utils/ -# COPY --chown=airflow:airflow token_generator ./token_generator/ - -# Create version information files -RUN ( \ - echo "--- yt-dlp ---" && \ - yt-dlp --version && \ - echo "" && \ - echo "--- deno ---" && \ - deno --version && \ - echo "" && \ - echo "--- ffmpeg ---" && \ - ffmpeg -version | head -n 1 \ -) > VERSION-airflow-latest.txt && \ -cp VERSION-airflow-latest.txt VERSION-airflow-$(date +%Y%m%d-%H%M%S).txt - -# Expose bgutil plugin to worker path -ENV PYTHONPATH=/opt/bgutil-ytdlp-pot-provider/plugin:$PYTHONPATH diff --git a/airflow/README.md b/airflow/README.md deleted file mode 100644 index 97a9f5d..0000000 --- a/airflow/README.md +++ /dev/null @@ -1,108 +0,0 @@ -# Airflow Cluster for YT-DLP Operations - -This directory contains the configuration and deployment files for an Apache Airflow cluster designed to manage distributed YouTube video downloading tasks using the `ytdlp-ops` service. - -## Overview - -The cluster consists of: -- **Master Node:** Runs the Airflow webserver, scheduler, and Flower (Celery monitoring). It also hosts shared services like Redis (broker/backend) and MinIO (artifact storage). -- **Worker Nodes:** Run Celery workers that execute download tasks. Each worker node also runs the `ytdlp-ops-service` (Thrift API server), Envoy proxy (load balancer for Thrift traffic), and Camoufox (remote browser instances for token generation). - -## Key Components - -### Airflow DAGs - -- `ytdlp_ops_dispatcher.py`: The "Sensor" part of a Sensor/Worker pattern. It monitors a Redis queue for URLs to process and triggers a `ytdlp_ops_worker_per_url` DAG run for each URL. -- `ytdlp_ops_worker_per_url.py`: The "Worker" DAG. It processes a single URL passed via DAG run configuration. It implements worker affinity (all tasks for a URL run on the same machine) and handles account management (retrying with different accounts, banning failed accounts based on sliding window checks). - -### Configuration Files - -- `airflow.cfg`: Main Airflow configuration file. -- `config/airflow_local_settings.py`: Contains the `task_instance_mutation_hook` which implements worker affinity by dynamically assigning tasks to queues based on the worker node's hostname. -- `config/custom_task_hooks.py`: Contains the `task_instance_mutation_hook` (duplicated here, but `airflow_local_settings.py` is the active one). -- `config/redis_default_conn.json.j2`: Jinja2 template for the Airflow Redis connection configuration. -- `config/minio_default_conn.json.j2`: Jinja2 template for the Airflow MinIO connection configuration. - -### Docker & Compose - -- `Dockerfile`: Defines the Airflow image, including necessary dependencies like `yt-dlp`, `ffmpeg`, and Python packages. -- `Dockerfile.caddy`: Defines a Caddy image used as a reverse proxy for serving Airflow static assets. -- `configs/docker-compose-master.yaml.j2`: Jinja2 template for the Docker Compose configuration on the Airflow master node. -- `configs/docker-compose-dl.yaml.j2`: Jinja2 template for the Docker Compose configuration on the Airflow worker nodes. -- `configs/docker-compose-ytdlp-ops.yaml.j2`: Jinja2 template for the Docker Compose configuration for the `ytdlp-ops` services (Thrift API, Envoy, Camoufox) on both master (management role) and worker nodes. -- `configs/docker-compose.camoufox.yaml.j2`: Jinja2 template (auto-generated by `generate_envoy_config.py`) for the Camoufox browser service definitions. -- `configs/docker-compose.config-generate.yaml`: Docker Compose file used to run the `generate_envoy_config.py` script in a container to create the final service configuration files. -- `generate_envoy_config.py`: Script that generates `envoy.yaml`, `docker-compose.camoufox.yaml`, and `camoufox_endpoints.json` based on environment variables. -- `configs/envoy.yaml.j2`: Jinja2 template (used by `generate_envoy_config.py`) for the Envoy proxy configuration. - -### Camoufox (Remote Browsers) - -- `camoufox/`: Directory containing the Camoufox browser setup. - - `Dockerfile`: Defines the Camoufox image. - - `requirements.txt`: Python dependencies for the Camoufox server. - - `camoufox_server.py`: The core server logic for managing remote browser instances. - - `start_camoufox.sh`: Wrapper script to start the Camoufox server with Xvfb and VNC. - - `*.xpi`: Browser extensions used by Camoufox. - -## Deployment Process - -Deployment is managed by Ansible playbooks located in the `ansible/` directory. - -1. **Inventory Generation:** The `tools/generate-inventory.py` script reads `cluster.yml` and generates `ansible/inventory.ini`, `ansible/host_vars/`, and `ansible/group_vars/all/generated_vars.yml`. -2. **Full Deployment:** `ansible-playbook playbook-full.yml` is the main command. - - Installs prerequisites (Docker, pipx, Glances). - - Ensures the `airflow_proxynet` Docker network exists. - - Imports and runs `playbook-master.yml` for the master node. - - Imports and runs `playbook-worker.yml` for worker nodes. -3. **Master Deployment (`playbook-master.yml`):** - - Sets system configurations (timezone, NTP, swap, sysctl). - - Calls `airflow-master` role: - - Syncs files to `/srv/airflow_master/`. - - Templates `configs/docker-compose-master.yaml`. - - Builds the Airflow image. - - Extracts static assets and builds the Caddy image. - - Starts services using `docker compose`. - - Calls `ytdlp-master` role: - - Syncs `generate_envoy_config.py` and templates. - - Creates `.env` file. - - Runs `generate_envoy_config.py` to create service configs. - - Creates a dummy `docker-compose.camoufox.yaml`. - - Starts `ytdlp-ops` management services using `docker compose`. -4. **Worker Deployment (`playbook-worker.yml`):** - - Sets system configurations (timezone, NTP, swap, sysctl, system limits). - - Calls `ytdlp-worker` role: - - Syncs files (including `camoufox/` directory) to `/srv/airflow_dl_worker/`. - - Creates `.env` file. - - Runs `generate_envoy_config.py` to create service configs (including `docker-compose.camoufox.yaml`). - - Builds the Camoufox image. - - Starts `ytdlp-ops` worker services using `docker compose`. - - Calls `airflow-worker` role: - - Syncs files to `/srv/airflow_dl_worker/`. - - Templates `configs/docker-compose-dl.yaml`. - - Builds the Airflow image. - - Starts services using `docker compose`. - - Verifies Camoufox services are running. - -## Service Interaction Flow (Worker Node) - -1. **Airflow Worker:** Pulls tasks from the Redis queue. -2. **`ytdlp_ops_worker_per_url` DAG:** Executes tasks on the local worker node. -3. **Thrift Client (in DAG task):** Connects to `localhost:9080` (Envoy's public port). -4. **Envoy Proxy:** Listens on `:9080`, load balances Thrift requests across internal ports (`9090`, `9091`, `9092` - based on `YTDLP_WORKERS`) of the local `ytdlp-ops-service`. -5. **`ytdlp-ops-service`:** Receives the Thrift request. -6. **Token Generation:** If needed, `ytdlp-ops-service` connects to a local Camoufox instance via WebSocket (using `camoufox_endpoints.json` for the address) to generate tokens. -7. **Camoufox:** Runs a headless Firefox browser, potentially using a SOCKS5 proxy, to interact with YouTube and generate the required tokens. -8. **Download:** The DAG task uses the token (via `info.json`) and potentially the SOCKS5 proxy to run `yt-dlp` for the actual download. - -## Environment Variables - -Key environment variables used in `.env` files (generated by Ansible templates) control service behavior: -- `HOSTNAME`: The Ansible inventory hostname. -- `SERVICE_ROLE`: `management` (master) or `worker`. -- `SERVER_IDENTITY`: Unique identifier for the `ytdlp-ops-service` instance. -- `YTDLP_WORKERS`: Number of internal Thrift worker endpoints and Camoufox browser instances. -- `CAMOUFOX_PROXIES`: Comma-separated list of SOCKS5 proxy URLs for Camoufox. -- `MASTER_HOST_IP`: IP address of the Airflow master node (for connecting back to Redis). -- Various passwords and ports. - -This setup allows for a scalable and robust system for managing YouTube downloads with account rotation and proxy usage. diff --git a/airflow/airflow.cfg b/airflow/airflow.cfg deleted file mode 100644 index 96f558d..0000000 --- a/airflow/airflow.cfg +++ /dev/null @@ -1,3167 +0,0 @@ -[core] -# The folder where your airflow pipelines live, most likely a -# subfolder in a code repository. This path must be absolute. -# -# Variable: AIRFLOW__CORE__DAGS_FOLDER -# -dags_folder = /opt/airflow/dags - -# Hostname by providing a path to a callable, which will resolve the hostname. -# The format is "package.function". -# -# For example, default value ``airflow.utils.net.getfqdn`` means that result from patched -# version of `socket.getfqdn() `__, -# see related `CPython Issue `__. -# -# No argument should be required in the function specified. -# If using IP address as hostname is preferred, use value ``airflow.utils.net.get_host_ip_address`` -# -# Variable: AIRFLOW__CORE__HOSTNAME_CALLABLE -# -hostname_callable = airflow.utils.net.getfqdn - -# A callable to check if a python file has airflow dags defined or not and should -# return ``True`` if it has dags otherwise ``False``. -# If this is not provided, Airflow uses its own heuristic rules. -# -# The function should have the following signature -# -# .. code-block:: python -# -# def func_name(file_path: str, zip_file: zipfile.ZipFile | None = None) -> bool: ... -# -# Variable: AIRFLOW__CORE__MIGHT_CONTAIN_DAG_CALLABLE -# -might_contain_dag_callable = airflow.utils.file.might_contain_dag_via_default_heuristic - -# Default timezone in case supplied date times are naive -# can be `UTC` (default), `system`, or any `IANA ` -# timezone string (e.g. Europe/Amsterdam) -# -# Variable: AIRFLOW__CORE__DEFAULT_TIMEZONE -# -default_timezone = utc - -# The executor class that airflow should use. Choices include -# ``SequentialExecutor``, ``LocalExecutor``, ``CeleryExecutor``, -# ``KubernetesExecutor``, ``CeleryKubernetesExecutor``, ``LocalKubernetesExecutor`` or the -# full import path to the class when using a custom executor. -# -# Variable: AIRFLOW__CORE__EXECUTOR -# -executor = CeleryExecutor - -# The auth manager class that airflow should use. Full import path to the auth manager class. -# -# Variable: AIRFLOW__CORE__AUTH_MANAGER -# -auth_manager = airflow.providers.fab.auth_manager.fab_auth_manager.FabAuthManager - -# This defines the maximum number of task instances that can run concurrently per scheduler in -# Airflow, regardless of the worker count. Generally this value, multiplied by the number of -# schedulers in your cluster, is the maximum number of task instances with the running -# state in the metadata database. Setting this value to zero allows unlimited parallelism. -# -# Variable: AIRFLOW__CORE__PARALLELISM -# -parallelism = 32 - -# The maximum number of task instances allowed to run concurrently in each DAG. To calculate -# the number of tasks that is running concurrently for a DAG, add up the number of running -# tasks for all DAG runs of the DAG. This is configurable at the DAG level with ``max_active_tasks``, -# which is defaulted as ``[core] max_active_tasks_per_dag``. -# -# An example scenario when this would be useful is when you want to stop a new dag with an early -# start date from stealing all the executor slots in a cluster. -# -# Variable: AIRFLOW__CORE__MAX_ACTIVE_TASKS_PER_DAG -# -max_active_tasks_per_dag = 16 - -# Are DAGs paused by default at creation -# -# Variable: AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION -# -dags_are_paused_at_creation = True - -# The maximum number of active DAG runs per DAG. The scheduler will not create more DAG runs -# if it reaches the limit. This is configurable at the DAG level with ``max_active_runs``, -# which is defaulted as ``[core] max_active_runs_per_dag``. -# -# Variable: AIRFLOW__CORE__MAX_ACTIVE_RUNS_PER_DAG -# -max_active_runs_per_dag = 16 - -# (experimental) The maximum number of consecutive DAG failures before DAG is automatically paused. -# This is also configurable per DAG level with ``max_consecutive_failed_dag_runs``, -# which is defaulted as ``[core] max_consecutive_failed_dag_runs_per_dag``. -# If not specified, then the value is considered as 0, -# meaning that the dags are never paused out by default. -# -# Variable: AIRFLOW__CORE__MAX_CONSECUTIVE_FAILED_DAG_RUNS_PER_DAG -# -max_consecutive_failed_dag_runs_per_dag = 0 - -# The name of the method used in order to start Python processes via the multiprocessing module. -# This corresponds directly with the options available in the Python docs: -# `multiprocessing.set_start_method -# `__ -# must be one of the values returned by `multiprocessing.get_all_start_methods() -# `__. -# -# Example: mp_start_method = fork -# -# Variable: AIRFLOW__CORE__MP_START_METHOD -# -# mp_start_method = - -# Whether to load the DAG examples that ship with Airflow. It's good to -# get started, but you probably want to set this to ``False`` in a production -# environment -# -# Variable: AIRFLOW__CORE__LOAD_EXAMPLES -# -load_examples = False - -# Path to the folder containing Airflow plugins -# -# Variable: AIRFLOW__CORE__PLUGINS_FOLDER -# -plugins_folder = /opt/airflow/plugins - -# Should tasks be executed via forking of the parent process -# -# * ``False``: Execute via forking of the parent process -# * ``True``: Spawning a new python process, slower than fork, but means plugin changes picked -# up by tasks straight away -# -# Variable: AIRFLOW__CORE__EXECUTE_TASKS_NEW_PYTHON_INTERPRETER -# -execute_tasks_new_python_interpreter = False - -# Secret key to save connection passwords in the db -# -# Variable: AIRFLOW__CORE__FERNET_KEY -# -fernet_key = - -# Whether to disable pickling dags -# -# Variable: AIRFLOW__CORE__DONOT_PICKLE -# -donot_pickle = True - -# How long before timing out a python file import -# -# Variable: AIRFLOW__CORE__DAGBAG_IMPORT_TIMEOUT -# -dagbag_import_timeout = 30.0 - -# Should a traceback be shown in the UI for dagbag import errors, -# instead of just the exception message -# -# Variable: AIRFLOW__CORE__DAGBAG_IMPORT_ERROR_TRACEBACKS -# -dagbag_import_error_tracebacks = True - -# If tracebacks are shown, how many entries from the traceback should be shown -# -# Variable: AIRFLOW__CORE__DAGBAG_IMPORT_ERROR_TRACEBACK_DEPTH -# -dagbag_import_error_traceback_depth = 2 - -# How long before timing out a DagFileProcessor, which processes a dag file -# -# Variable: AIRFLOW__CORE__DAG_FILE_PROCESSOR_TIMEOUT -# -dag_file_processor_timeout = 50 - -# The class to use for running task instances in a subprocess. -# Choices include StandardTaskRunner, CgroupTaskRunner or the full import path to the class -# when using a custom task runner. -# -# Variable: AIRFLOW__CORE__TASK_RUNNER -# -task_runner = StandardTaskRunner - -# If set, tasks without a ``run_as_user`` argument will be run with this user -# Can be used to de-elevate a sudo user running Airflow when executing tasks -# -# Variable: AIRFLOW__CORE__DEFAULT_IMPERSONATION -# -default_impersonation = - -# What security module to use (for example kerberos) -# -# Variable: AIRFLOW__CORE__SECURITY -# -security = - -# Turn unit test mode on (overwrites many configuration options with test -# values at runtime) -# -# Variable: AIRFLOW__CORE__UNIT_TEST_MODE -# -unit_test_mode = False - -# Whether to enable pickling for xcom (note that this is insecure and allows for -# RCE exploits). -# -# Variable: AIRFLOW__CORE__ENABLE_XCOM_PICKLING -# -enable_xcom_pickling = False - -# What classes can be imported during deserialization. This is a multi line value. -# The individual items will be parsed as a pattern to a glob function. -# Python built-in classes (like dict) are always allowed. -# -# Variable: AIRFLOW__CORE__ALLOWED_DESERIALIZATION_CLASSES -# -allowed_deserialization_classes = airflow.* - -# What classes can be imported during deserialization. This is a multi line value. -# The individual items will be parsed as regexp patterns. -# This is a secondary option to ``[core] allowed_deserialization_classes``. -# -# Variable: AIRFLOW__CORE__ALLOWED_DESERIALIZATION_CLASSES_REGEXP -# -allowed_deserialization_classes_regexp = - -# When a task is killed forcefully, this is the amount of time in seconds that -# it has to cleanup after it is sent a SIGTERM, before it is SIGKILLED -# -# Variable: AIRFLOW__CORE__KILLED_TASK_CLEANUP_TIME -# -killed_task_cleanup_time = 60 - -# Whether to override params with dag_run.conf. If you pass some key-value pairs -# through ``airflow dags backfill -c`` or -# ``airflow dags trigger -c``, the key-value pairs will override the existing ones in params. -# -# Variable: AIRFLOW__CORE__DAG_RUN_CONF_OVERRIDES_PARAMS -# -dag_run_conf_overrides_params = True - -# If enabled, Airflow will only scan files containing both ``DAG`` and ``airflow`` (case-insensitive). -# -# Variable: AIRFLOW__CORE__DAG_DISCOVERY_SAFE_MODE -# -dag_discovery_safe_mode = True - -# The pattern syntax used in the -# `.airflowignore -# `__ -# files in the DAG directories. Valid values are ``regexp`` or ``glob``. -# -# Variable: AIRFLOW__CORE__DAG_IGNORE_FILE_SYNTAX -# -dag_ignore_file_syntax = regexp - -# The number of retries each task is going to have by default. Can be overridden at dag or task level. -# -# Variable: AIRFLOW__CORE__DEFAULT_TASK_RETRIES -# -default_task_retries = 3 - -# The number of seconds each task is going to wait by default between retries. Can be overridden at -# dag or task level. -# -# Variable: AIRFLOW__CORE__DEFAULT_TASK_RETRY_DELAY -# -default_task_retry_delay = 300 - -# The maximum delay (in seconds) each task is going to wait by default between retries. -# This is a global setting and cannot be overridden at task or DAG level. -# -# Variable: AIRFLOW__CORE__MAX_TASK_RETRY_DELAY -# -max_task_retry_delay = 86400 - -# The weighting method used for the effective total priority weight of the task -# -# Variable: AIRFLOW__CORE__DEFAULT_TASK_WEIGHT_RULE -# -default_task_weight_rule = downstream - -# Maximum possible time (in seconds) that task will have for execution of auxiliary processes -# (like listeners, mini scheduler...) after task is marked as success.. -# -# Variable: AIRFLOW__CORE__TASK_SUCCESS_OVERTIME -# -task_success_overtime = 20 - -# The default task execution_timeout value for the operators. Expected an integer value to -# be passed into timedelta as seconds. If not specified, then the value is considered as None, -# meaning that the operators are never timed out by default. -# -# Variable: AIRFLOW__CORE__DEFAULT_TASK_EXECUTION_TIMEOUT -# -default_task_execution_timeout = 3600 - -# Updating serialized DAG can not be faster than a minimum interval to reduce database write rate. -# -# Variable: AIRFLOW__CORE__MIN_SERIALIZED_DAG_UPDATE_INTERVAL -# -min_serialized_dag_update_interval = 30 - -# If ``True``, serialized DAGs are compressed before writing to DB. -# -# .. note:: -# -# This will disable the DAG dependencies view -# -# Variable: AIRFLOW__CORE__COMPRESS_SERIALIZED_DAGS -# -compress_serialized_dags = False - -# Fetching serialized DAG can not be faster than a minimum interval to reduce database -# read rate. This config controls when your DAGs are updated in the Webserver -# -# Variable: AIRFLOW__CORE__MIN_SERIALIZED_DAG_FETCH_INTERVAL -# -min_serialized_dag_fetch_interval = 10 - -# Maximum number of Rendered Task Instance Fields (Template Fields) per task to store -# in the Database. -# All the template_fields for each of Task Instance are stored in the Database. -# Keeping this number small may cause an error when you try to view ``Rendered`` tab in -# TaskInstance view for older tasks. -# -# Variable: AIRFLOW__CORE__MAX_NUM_RENDERED_TI_FIELDS_PER_TASK -# -max_num_rendered_ti_fields_per_task = 30 - -# On each dagrun check against defined SLAs -# -# Variable: AIRFLOW__CORE__CHECK_SLAS -# -check_slas = True - -# Path to custom XCom class that will be used to store and resolve operators results -# -# Example: xcom_backend = path.to.CustomXCom -# -# Variable: AIRFLOW__CORE__XCOM_BACKEND -# -xcom_backend = airflow.models.xcom.BaseXCom - -# By default Airflow plugins are lazily-loaded (only loaded when required). Set it to ``False``, -# if you want to load plugins whenever 'airflow' is invoked via cli or loaded from module. -# -# Variable: AIRFLOW__CORE__LAZY_LOAD_PLUGINS -# -lazy_load_plugins = True - -# By default Airflow providers are lazily-discovered (discovery and imports happen only when required). -# Set it to ``False``, if you want to discover providers whenever 'airflow' is invoked via cli or -# loaded from module. -# -# Variable: AIRFLOW__CORE__LAZY_DISCOVER_PROVIDERS -# -lazy_discover_providers = True - -# Hide sensitive **Variables** or **Connection extra json keys** from UI -# and task logs when set to ``True`` -# -# .. note:: -# -# Connection passwords are always hidden in logs -# -# Variable: AIRFLOW__CORE__HIDE_SENSITIVE_VAR_CONN_FIELDS -# -hide_sensitive_var_conn_fields = False - -# A comma-separated list of extra sensitive keywords to look for in variables names or connection's -# extra JSON. -# -# Variable: AIRFLOW__CORE__SENSITIVE_VAR_CONN_NAMES -# -sensitive_var_conn_names = - -# Task Slot counts for ``default_pool``. This setting would not have any effect in an existing -# deployment where the ``default_pool`` is already created. For existing deployments, users can -# change the number of slots using Webserver, API or the CLI -# -# Variable: AIRFLOW__CORE__DEFAULT_POOL_TASK_SLOT_COUNT -# -default_pool_task_slot_count = 128 - -# The maximum list/dict length an XCom can push to trigger task mapping. If the pushed list/dict has a -# length exceeding this value, the task pushing the XCom will be failed automatically to prevent the -# mapped tasks from clogging the scheduler. -# -# Variable: AIRFLOW__CORE__MAX_MAP_LENGTH -# -max_map_length = 1024 - -# The default umask to use for process when run in daemon mode (scheduler, worker, etc.) -# -# This controls the file-creation mode mask which determines the initial value of file permission bits -# for newly created files. -# -# This value is treated as an octal-integer. -# -# Variable: AIRFLOW__CORE__DAEMON_UMASK -# -daemon_umask = 0o002 - -# Class to use as dataset manager. -# -# Example: dataset_manager_class = airflow.datasets.manager.DatasetManager -# -# Variable: AIRFLOW__CORE__DATASET_MANAGER_CLASS -# -# dataset_manager_class = - -# Kwargs to supply to dataset manager. -# -# Example: dataset_manager_kwargs = {"some_param": "some_value"} -# -# Variable: AIRFLOW__CORE__DATASET_MANAGER_KWARGS -# -# dataset_manager_kwargs = - -# Dataset URI validation should raise an exception if it is not compliant with AIP-60. -# By default this configuration is false, meaning that Airflow 2.x only warns the user. -# In Airflow 3, this configuration will be removed, unconditionally enabling strict validation. -# -# Variable: AIRFLOW__CORE__STRICT_DATASET_URI_VALIDATION -# -strict_dataset_uri_validation = False - -# (experimental) Whether components should use Airflow Internal API for DB connectivity. -# -# Variable: AIRFLOW__CORE__DATABASE_ACCESS_ISOLATION -# -database_access_isolation = False - -# (experimental) Airflow Internal API url. -# Only used if ``[core] database_access_isolation`` is ``True``. -# -# Example: internal_api_url = http://localhost:8080 -# -# Variable: AIRFLOW__CORE__INTERNAL_API_URL -# -# internal_api_url = - -# Secret key used to authenticate internal API clients to core. It should be as random as possible. -# However, when running more than 1 instances of webserver / internal API services, make sure all -# of them use the same ``secret_key`` otherwise calls will fail on authentication. -# The authentication token generated using the secret key has a short expiry time though - make -# sure that time on ALL the machines that you run airflow components on is synchronized -# (for example using ntpd) otherwise you might get "forbidden" errors when the logs are accessed. -# -# Variable: AIRFLOW__CORE__INTERNAL_API_SECRET_KEY -# -internal_api_secret_key = tCnTbEabdFBDLHWoT/LxLw== - -# The ability to allow testing connections across Airflow UI, API and CLI. -# Supported options: ``Disabled``, ``Enabled``, ``Hidden``. Default: Disabled -# Disabled - Disables the test connection functionality and disables the Test Connection button in UI. -# Enabled - Enables the test connection functionality and shows the Test Connection button in UI. -# Hidden - Disables the test connection functionality and hides the Test Connection button in UI. -# Before setting this to Enabled, make sure that you review the users who are able to add/edit -# connections and ensure they are trusted. Connection testing can be done maliciously leading to -# undesired and insecure outcomes. -# See `Airflow Security Model: Capabilities of authenticated UI users -# `__ -# for more details. -# -# Variable: AIRFLOW__CORE__TEST_CONNECTION -# -test_connection = Disabled - -# The maximum length of the rendered template field. If the value to be stored in the -# rendered template field exceeds this size, it's redacted. -# -# Variable: AIRFLOW__CORE__MAX_TEMPLATED_FIELD_LENGTH -# -max_templated_field_length = 4096 - -host_docker_socket = /var/run/docker.sock - -[database] -# Path to the ``alembic.ini`` file. You can either provide the file path relative -# to the Airflow home directory or the absolute path if it is located elsewhere. -# -# Variable: AIRFLOW__DATABASE__ALEMBIC_INI_FILE_PATH -# -alembic_ini_file_path = alembic.ini - -# The SQLAlchemy connection string to the metadata database. -# SQLAlchemy supports many different database engines. -# See: `Set up a Database Backend: Database URI -# `__ -# for more details. -# -# Variable: AIRFLOW__DATABASE__SQL_ALCHEMY_CONN -# -# This is configured via the AIRFLOW__DATABASE__SQL_ALCHEMY_CONN environment variable -# in the docker-compose files, as it differs between master and workers. -# A dummy value is set here to ensure the env var override is picked up. -sql_alchemy_conn = postgresql://dummy:dummy@dummy/dummy - -# Extra engine specific keyword args passed to SQLAlchemy's create_engine, as a JSON-encoded value -# -# Example: sql_alchemy_engine_args = {"arg1": true} -# -# Variable: AIRFLOW__DATABASE__SQL_ALCHEMY_ENGINE_ARGS -# -# sql_alchemy_engine_args = - -# The encoding for the databases -# -# Variable: AIRFLOW__DATABASE__SQL_ENGINE_ENCODING -# -sql_engine_encoding = utf-8 - -# Collation for ``dag_id``, ``task_id``, ``key``, ``external_executor_id`` columns -# in case they have different encoding. -# By default this collation is the same as the database collation, however for ``mysql`` and ``mariadb`` -# the default is ``utf8mb3_bin`` so that the index sizes of our index keys will not exceed -# the maximum size of allowed index when collation is set to ``utf8mb4`` variant, see -# `GitHub Issue Comment `__ -# for more details. -# -# Variable: AIRFLOW__DATABASE__SQL_ENGINE_COLLATION_FOR_IDS -# -# sql_engine_collation_for_ids = - -# If SQLAlchemy should pool database connections. -# -# Variable: AIRFLOW__DATABASE__SQL_ALCHEMY_POOL_ENABLED -# -sql_alchemy_pool_enabled = True - -# The SQLAlchemy pool size is the maximum number of database connections -# in the pool. 0 indicates no limit. -# -# Variable: AIRFLOW__DATABASE__SQL_ALCHEMY_POOL_SIZE -# -sql_alchemy_pool_size = 20 - -# The maximum overflow size of the pool. -# When the number of checked-out connections reaches the size set in pool_size, -# additional connections will be returned up to this limit. -# When those additional connections are returned to the pool, they are disconnected and discarded. -# It follows then that the total number of simultaneous connections the pool will allow -# is **pool_size** + **max_overflow**, -# and the total number of "sleeping" connections the pool will allow is pool_size. -# max_overflow can be set to ``-1`` to indicate no overflow limit; -# no limit will be placed on the total number of concurrent connections. Defaults to ``10``. -# -# Variable: AIRFLOW__DATABASE__SQL_ALCHEMY_MAX_OVERFLOW -# -sql_alchemy_max_overflow = 30 - -# The SQLAlchemy pool recycle is the number of seconds a connection -# can be idle in the pool before it is invalidated. This config does -# not apply to sqlite. If the number of DB connections is ever exceeded, -# a lower config value will allow the system to recover faster. -# -# Variable: AIRFLOW__DATABASE__SQL_ALCHEMY_POOL_RECYCLE -# -sql_alchemy_pool_recycle = 1800 - -# Check connection at the start of each connection pool checkout. -# Typically, this is a simple statement like "SELECT 1". -# See `SQLAlchemy Pooling: Disconnect Handling - Pessimistic -# `__ -# for more details. -# -# Variable: AIRFLOW__DATABASE__SQL_ALCHEMY_POOL_PRE_PING -# -sql_alchemy_pool_pre_ping = True - -# The schema to use for the metadata database. -# SQLAlchemy supports databases with the concept of multiple schemas. -# -# Variable: AIRFLOW__DATABASE__SQL_ALCHEMY_SCHEMA -# -sql_alchemy_schema = - -# Import path for connect args in SQLAlchemy. Defaults to an empty dict. -# This is useful when you want to configure db engine args that SQLAlchemy won't parse -# in connection string. This can be set by passing a dictionary containing the create engine parameters. -# For more details about passing create engine parameters (keepalives variables, timeout etc) -# in Postgres DB Backend see `Setting up a PostgreSQL Database -# `__ -# e.g ``connect_args={"timeout":30}`` can be defined in ``airflow_local_settings.py`` and -# can be imported as shown below -# -# Example: sql_alchemy_connect_args = airflow_local_settings.connect_args -# -# Variable: AIRFLOW__DATABASE__SQL_ALCHEMY_CONNECT_ARGS -# -# sql_alchemy_connect_args = - -# Important Warning: Use of sql_alchemy_session_maker Highly Discouraged -# Import path for function which returns 'sqlalchemy.orm.sessionmaker'. -# Improper configuration of sql_alchemy_session_maker can lead to serious issues, -# including data corruption, unrecoverable application crashes. Please review the SQLAlchemy -# documentation for detailed guidance on proper configuration and best practices. -# -# Example: sql_alchemy_session_maker = airflow_local_settings._sessionmaker -# -# Variable: AIRFLOW__DATABASE__SQL_ALCHEMY_SESSION_MAKER -# -# sql_alchemy_session_maker = - -# Whether to load the default connections that ship with Airflow when ``airflow db init`` is called. -# It's good to get started, but you probably want to set this to ``False`` in a production environment. -# -# Variable: AIRFLOW__DATABASE__LOAD_DEFAULT_CONNECTIONS -# -load_default_connections = True - -# Number of times the code should be retried in case of DB Operational Errors. -# Not all transactions will be retried as it can cause undesired state. -# Currently it is only used in ``DagFileProcessor.process_file`` to retry ``dagbag.sync_to_db``. -# -# Variable: AIRFLOW__DATABASE__MAX_DB_RETRIES -# -max_db_retries = 3 - -# Whether to run alembic migrations during Airflow start up. Sometimes this operation can be expensive, -# and the users can assert the correct version through other means (e.g. through a Helm chart). -# Accepts ``True`` or ``False``. -# -# Variable: AIRFLOW__DATABASE__CHECK_MIGRATIONS -# -check_migrations = True - -[logging] -# The folder where airflow should store its log files. -# This path must be absolute. -# There are a few existing configurations that assume this is set to the default. -# If you choose to override this you may need to update the -# ``[logging] dag_processor_manager_log_location`` and -# ``[logging] child_process_log_directory settings`` as well. -# -# Variable: AIRFLOW__LOGGING__BASE_LOG_FOLDER -# -base_log_folder = /opt/airflow/logs - -# Airflow can store logs remotely in AWS S3, Google Cloud Storage or Elastic Search. -# Set this to ``True`` if you want to enable remote logging. -# -# Variable: AIRFLOW__LOGGING__REMOTE_LOGGING -# -remote_logging = True - -# Users must supply an Airflow connection id that provides access to the storage -# location. Depending on your remote logging service, this may only be used for -# reading logs, not writing them. -# -# Variable: AIRFLOW__LOGGING__REMOTE_LOG_CONN_ID -# -remote_log_conn_id = minio_default - -# Whether the local log files for GCS, S3, WASB and OSS remote logging should be deleted after -# they are uploaded to the remote location. -# -# Variable: AIRFLOW__LOGGING__DELETE_LOCAL_LOGS -# -delete_local_logs = False - -# Path to Google Credential JSON file. If omitted, authorization based on `the Application Default -# Credentials -# `__ will -# be used. -# -# Variable: AIRFLOW__LOGGING__GOOGLE_KEY_PATH -# -google_key_path = - -# Storage bucket URL for remote logging -# S3 buckets should start with **s3://** -# Cloudwatch log groups should start with **cloudwatch://** -# GCS buckets should start with **gs://** -# WASB buckets should start with **wasb** just to help Airflow select correct handler -# Stackdriver logs should start with **stackdriver://** -# -# Variable: AIRFLOW__LOGGING__REMOTE_BASE_LOG_FOLDER -# -remote_base_log_folder = s3://airflow-logs/ - -# The remote_task_handler_kwargs param is loaded into a dictionary and passed to the ``__init__`` -# of remote task handler and it overrides the values provided by Airflow config. For example if you set -# ``delete_local_logs=False`` and you provide ``{"delete_local_copy": true}``, then the local -# log files will be deleted after they are uploaded to remote location. -# -# Example: remote_task_handler_kwargs = {"delete_local_copy": true} -# -# Variable: AIRFLOW__LOGGING__REMOTE_TASK_HANDLER_KWARGS -# -remote_task_handler_kwargs = - -# Use server-side encryption for logs stored in S3 -# -# Variable: AIRFLOW__LOGGING__ENCRYPT_S3_LOGS -# -encrypt_s3_logs = False - -# Logging level. -# -# Supported values: ``CRITICAL``, ``ERROR``, ``WARNING``, ``INFO``, ``DEBUG``. -# -# Variable: AIRFLOW__LOGGING__LOGGING_LEVEL -# -logging_level = INFO - -# Logging level for celery. If not set, it uses the value of logging_level -# -# Supported values: ``CRITICAL``, ``ERROR``, ``WARNING``, ``INFO``, ``DEBUG``. -# -# Variable: AIRFLOW__LOGGING__CELERY_LOGGING_LEVEL -# -celery_logging_level = - -# Logging level for Flask-appbuilder UI. -# -# Supported values: ``CRITICAL``, ``ERROR``, ``WARNING``, ``INFO``, ``DEBUG``. -# -# Variable: AIRFLOW__LOGGING__FAB_LOGGING_LEVEL -# -fab_logging_level = WARNING - -# Logging class -# Specify the class that will specify the logging configuration -# This class has to be on the python classpath -# -# Example: logging_config_class = my.path.default_local_settings.LOGGING_CONFIG -# -# Variable: AIRFLOW__LOGGING__LOGGING_CONFIG_CLASS -# -logging_config_class = airflow_local_settings.LOGGING_CONFIG - -# Flag to enable/disable Colored logs in Console -# Colour the logs when the controlling terminal is a TTY. -# -# Variable: AIRFLOW__LOGGING__COLORED_CONSOLE_LOG -# -colored_console_log = True - -# Log format for when Colored logs is enabled -# -# Variable: AIRFLOW__LOGGING__COLORED_LOG_FORMAT -# -colored_log_format = [%%(blue)s%%(asctime)s%%(reset)s] {%%(blue)s%%(filename)s:%%(reset)s%%(lineno)d} %%(log_color)s%%(levelname)s%%(reset)s - %%(log_color)s%%(message)s%%(reset)s - -# Specifies the class utilized by Airflow to implement colored logging -# -# Variable: AIRFLOW__LOGGING__COLORED_FORMATTER_CLASS -# -colored_formatter_class = airflow.utils.log.colored_log.CustomTTYColoredFormatter - -# Format of Log line -# -# Variable: AIRFLOW__LOGGING__LOG_FORMAT -# -log_format = [%%(asctime)s] {%%(filename)s:%%(lineno)d} %%(levelname)s - %%(message)s - -# Defines the format of log messages for simple logging configuration -# -# Variable: AIRFLOW__LOGGING__SIMPLE_LOG_FORMAT -# -simple_log_format = %%(asctime)s %%(levelname)s - %%(message)s - -# Where to send dag parser logs. If "file", logs are sent to log files defined by child_process_log_directory. -# -# Variable: AIRFLOW__LOGGING__DAG_PROCESSOR_LOG_TARGET -# -dag_processor_log_target = file - -# Format of Dag Processor Log line -# -# Variable: AIRFLOW__LOGGING__DAG_PROCESSOR_LOG_FORMAT -# -dag_processor_log_format = [%%(asctime)s] [SOURCE:DAG_PROCESSOR] {%%(filename)s:%%(lineno)d} %%(levelname)s - %%(message)s - -# Determines the formatter class used by Airflow for structuring its log messages -# The default formatter class is timezone-aware, which means that timestamps attached to log entries -# will be adjusted to reflect the local timezone of the Airflow instance -# -# Variable: AIRFLOW__LOGGING__LOG_FORMATTER_CLASS -# -log_formatter_class = airflow.utils.log.timezone_aware.TimezoneAware - -# An import path to a function to add adaptations of each secret added with -# ``airflow.utils.log.secrets_masker.mask_secret`` to be masked in log messages. The given function -# is expected to require a single parameter: the secret to be adapted. It may return a -# single adaptation of the secret or an iterable of adaptations to each be masked as secrets. -# The original secret will be masked as well as any adaptations returned. -# -# Example: secret_mask_adapter = urllib.parse.quote -# -# Variable: AIRFLOW__LOGGING__SECRET_MASK_ADAPTER -# -secret_mask_adapter = - -secret_mask_exception_args = False - -# Specify prefix pattern like mentioned below with stream handler ``TaskHandlerWithCustomFormatter`` -# -# Example: task_log_prefix_template = {{ti.dag_id}}-{{ti.task_id}}-{{execution_date}}-{{ti.try_number}} -# -# Variable: AIRFLOW__LOGGING__TASK_LOG_PREFIX_TEMPLATE -# -task_log_prefix_template = {{ ti.dag_id }}-{{ ti.task_id }}-{{ ti.run_id }} - -# Formatting for how airflow generates file names/paths for each task run. -# -# Variable: AIRFLOW__LOGGING__LOG_FILENAME_TEMPLATE -# -log_filename_template = dag_id={{ ti.dag_id }}/run_id={{ ti.run_id }}/task_id={{ ti.task_id }}/{%% if ti.map_index >= 0 %%}map_index={{ ti.map_index }}/{%% endif %%}attempt={{ try_number }}.log - -# Formatting for how airflow generates file names for log -# -# Variable: AIRFLOW__LOGGING__LOG_PROCESSOR_FILENAME_TEMPLATE -# -log_processor_filename_template = {{ filename }}.log - -# Full path of dag_processor_manager logfile. -# -# Variable: AIRFLOW__LOGGING__DAG_PROCESSOR_MANAGER_LOG_LOCATION -# -dag_processor_manager_log_location = /opt/airflow/logs/dag_processor_manager/dag_processor_manager.log - -# Whether DAG processor manager will write logs to stdout -# -# Variable: AIRFLOW__LOGGING__DAG_PROCESSOR_MANAGER_LOG_STDOUT -# -dag_processor_manager_log_stdout = False - -# Name of handler to read task instance logs. -# Defaults to use ``task`` handler. -# -# Variable: AIRFLOW__LOGGING__TASK_LOG_READER -# -task_log_reader = task - -# A comma\-separated list of third-party logger names that will be configured to print messages to -# consoles\. -# -# Example: extra_logger_names = connexion,sqlalchemy -# -# Variable: AIRFLOW__LOGGING__EXTRA_LOGGER_NAMES -# -extra_logger_names = - -# When you start an Airflow worker, Airflow starts a tiny web server -# subprocess to serve the workers local log files to the airflow main -# web server, who then builds pages and sends them to users. This defines -# the port on which the logs are served. It needs to be unused, and open -# visible from the main web server to connect into the workers. -# -# Variable: AIRFLOW__LOGGING__WORKER_LOG_SERVER_PORT -# -worker_log_server_port = 8793 - -# Port to serve logs from for triggerer. -# See ``[logging] worker_log_server_port`` description for more info. -# -# Variable: AIRFLOW__LOGGING__TRIGGER_LOG_SERVER_PORT -# -trigger_log_server_port = 8794 - -# We must parse timestamps to interleave logs between trigger and task. To do so, -# we need to parse timestamps in log files. In case your log format is non-standard, -# you may provide import path to callable which takes a string log line and returns -# the timestamp (datetime.datetime compatible). -# -# Example: interleave_timestamp_parser = path.to.my_func -# -# Variable: AIRFLOW__LOGGING__INTERLEAVE_TIMESTAMP_PARSER -# -# interleave_timestamp_parser = - -# Permissions in the form or of octal string as understood by chmod. The permissions are important -# when you use impersonation, when logs are written by a different user than airflow. The most secure -# way of configuring it in this case is to add both users to the same group and make it the default -# group of both users. Group-writeable logs are default in airflow, but you might decide that you are -# OK with having the logs other-writeable, in which case you should set it to ``0o777``. You might -# decide to add more security if you do not use impersonation and change it to ``0o755`` to make it -# only owner-writeable. You can also make it just readable only for owner by changing it to ``0o700`` -# if all the access (read/write) for your logs happens from the same user. -# -# Example: file_task_handler_new_folder_permissions = 0o775 -# -# Variable: AIRFLOW__LOGGING__FILE_TASK_HANDLER_NEW_FOLDER_PERMISSIONS -# -file_task_handler_new_folder_permissions = 0o775 - -# Permissions in the form or of octal string as understood by chmod. The permissions are important -# when you use impersonation, when logs are written by a different user than airflow. The most secure -# way of configuring it in this case is to add both users to the same group and make it the default -# group of both users. Group-writeable logs are default in airflow, but you might decide that you are -# OK with having the logs other-writeable, in which case you should set it to ``0o666``. You might -# decide to add more security if you do not use impersonation and change it to ``0o644`` to make it -# only owner-writeable. You can also make it just readable only for owner by changing it to ``0o600`` -# if all the access (read/write) for your logs happens from the same user. -# -# Example: file_task_handler_new_file_permissions = 0o664 -# -# Variable: AIRFLOW__LOGGING__FILE_TASK_HANDLER_NEW_FILE_PERMISSIONS -# -file_task_handler_new_file_permissions = 0o664 - -# By default Celery sends all logs into stderr. -# If enabled any previous logging handlers will get *removed*. -# With this option AirFlow will create new handlers -# and send low level logs like INFO and WARNING to stdout, -# while sending higher severity logs to stderr. -# -# Variable: AIRFLOW__LOGGING__CELERY_STDOUT_STDERR_SEPARATION -# -celery_stdout_stderr_separation = False - -# If enabled, Airflow may ship messages to task logs from outside the task run context, e.g. from -# the scheduler, executor, or callback execution context. This can help in circumstances such as -# when there's something blocking the execution of the task and ordinarily there may be no task -# logs at all. -# This is set to ``True`` by default. If you encounter issues with this feature -# (e.g. scheduler performance issues) it can be disabled. -# -# Variable: AIRFLOW__LOGGING__ENABLE_TASK_CONTEXT_LOGGER -# -enable_task_context_logger = True - -# A comma separated list of keywords related to errors whose presence should display the line in red -# color in UI -# -# Variable: AIRFLOW__LOGGING__COLOR_LOG_ERROR_KEYWORDS -# -color_log_error_keywords = error,exception - -# A comma separated list of keywords related to warning whose presence should display the line in yellow -# color in UI -# -# Variable: AIRFLOW__LOGGING__COLOR_LOG_WARNING_KEYWORDS -# -color_log_warning_keywords = warn - -[metrics] -# `StatsD `__ integration settings. - -# If true, ``[metrics] metrics_allow_list`` and ``[metrics] metrics_block_list`` will use -# regex pattern matching anywhere within the metric name instead of only prefix matching -# at the start of the name. -# -# Variable: AIRFLOW__METRICS__METRICS_USE_PATTERN_MATCH -# -metrics_use_pattern_match = False - -# Configure an allow list (comma separated string) to send only certain metrics. -# If ``[metrics] metrics_use_pattern_match`` is ``false``, match only the exact metric name prefix. -# If ``[metrics] metrics_use_pattern_match`` is ``true``, provide regex patterns to match. -# -# Example: metrics_allow_list = "scheduler,executor,dagrun,pool,triggerer,celery" or "^scheduler,^executor,heartbeat|timeout" -# -# Variable: AIRFLOW__METRICS__METRICS_ALLOW_LIST -# -metrics_allow_list = - -# Configure a block list (comma separated string) to block certain metrics from being emitted. -# If ``[metrics] metrics_allow_list`` and ``[metrics] metrics_block_list`` are both configured, -# ``[metrics] metrics_block_list`` is ignored. -# -# If ``[metrics] metrics_use_pattern_match`` is ``false``, match only the exact metric name prefix. -# -# If ``[metrics] metrics_use_pattern_match`` is ``true``, provide regex patterns to match. -# -# Example: metrics_block_list = "scheduler,executor,dagrun,pool,triggerer,celery" or "^scheduler,^executor,heartbeat|timeout" -# -# Variable: AIRFLOW__METRICS__METRICS_BLOCK_LIST -# -metrics_block_list = - -# Enables sending metrics to StatsD. -# -# Variable: AIRFLOW__METRICS__STATSD_ON -# -statsd_on = False - -# Specifies the host address where the StatsD daemon (or server) is running -# -# Variable: AIRFLOW__METRICS__STATSD_HOST -# -statsd_host = localhost - -# Specifies the port on which the StatsD daemon (or server) is listening to -# -# Variable: AIRFLOW__METRICS__STATSD_PORT -# -statsd_port = 8125 - -# Defines the namespace for all metrics sent from Airflow to StatsD -# -# Variable: AIRFLOW__METRICS__STATSD_PREFIX -# -statsd_prefix = airflow - -# A function that validate the StatsD stat name, apply changes to the stat name if necessary and return -# the transformed stat name. -# -# The function should have the following signature -# -# .. code-block:: python -# -# def func_name(stat_name: str) -> str: ... -# -# Variable: AIRFLOW__METRICS__STAT_NAME_HANDLER -# -stat_name_handler = - -# To enable datadog integration to send airflow metrics. -# -# Variable: AIRFLOW__METRICS__STATSD_DATADOG_ENABLED -# -statsd_datadog_enabled = False - -# List of datadog tags attached to all metrics(e.g: ``key1:value1,key2:value2``) -# -# Variable: AIRFLOW__METRICS__STATSD_DATADOG_TAGS -# -statsd_datadog_tags = - -# Set to ``False`` to disable metadata tags for some of the emitted metrics -# -# Variable: AIRFLOW__METRICS__STATSD_DATADOG_METRICS_TAGS -# -statsd_datadog_metrics_tags = True - -# If you want to utilise your own custom StatsD client set the relevant -# module path below. -# Note: The module path must exist on your -# `PYTHONPATH ` -# for Airflow to pick it up -# -# Variable: AIRFLOW__METRICS__STATSD_CUSTOM_CLIENT_PATH -# -# statsd_custom_client_path = - -# If you want to avoid sending all the available metrics tags to StatsD, -# you can configure a block list of prefixes (comma separated) to filter out metric tags -# that start with the elements of the list (e.g: ``job_id,run_id``) -# -# Example: statsd_disabled_tags = job_id,run_id,dag_id,task_id -# -# Variable: AIRFLOW__METRICS__STATSD_DISABLED_TAGS -# -statsd_disabled_tags = job_id,run_id - -# To enable sending Airflow metrics with StatsD-Influxdb tagging convention. -# -# Variable: AIRFLOW__METRICS__STATSD_INFLUXDB_ENABLED -# -statsd_influxdb_enabled = False - -# Enables sending metrics to OpenTelemetry. -# -# Variable: AIRFLOW__METRICS__OTEL_ON -# -otel_on = False - -# Specifies the hostname or IP address of the OpenTelemetry Collector to which Airflow sends -# metrics and traces. -# -# Variable: AIRFLOW__METRICS__OTEL_HOST -# -otel_host = localhost - -# Specifies the port of the OpenTelemetry Collector that is listening to. -# -# Variable: AIRFLOW__METRICS__OTEL_PORT -# -otel_port = 8889 - -# The prefix for the Airflow metrics. -# -# Variable: AIRFLOW__METRICS__OTEL_PREFIX -# -otel_prefix = airflow - -# Defines the interval, in milliseconds, at which Airflow sends batches of metrics and traces -# to the configured OpenTelemetry Collector. -# -# Variable: AIRFLOW__METRICS__OTEL_INTERVAL_MILLISECONDS -# -otel_interval_milliseconds = 60000 - -# If ``True``, all metrics are also emitted to the console. Defaults to ``False``. -# -# Variable: AIRFLOW__METRICS__OTEL_DEBUGGING_ON -# -otel_debugging_on = False - -# The default service name of traces. -# -# Variable: AIRFLOW__METRICS__OTEL_SERVICE -# -otel_service = Airflow - -# If ``True``, SSL will be enabled. Defaults to ``False``. -# To establish an HTTPS connection to the OpenTelemetry collector, -# you need to configure the SSL certificate and key within the OpenTelemetry collector's -# ``config.yml`` file. -# -# Variable: AIRFLOW__METRICS__OTEL_SSL_ACTIVE -# -otel_ssl_active = False - -[traces] -# Distributed traces integration settings. - -# Enables sending traces to OpenTelemetry. -# -# Variable: AIRFLOW__TRACES__OTEL_ON -# -otel_on = False - -# Specifies the hostname or IP address of the OpenTelemetry Collector to which Airflow sends -# traces. -# -# Variable: AIRFLOW__TRACES__OTEL_HOST -# -otel_host = localhost - -# Specifies the port of the OpenTelemetry Collector that is listening to. -# -# Variable: AIRFLOW__TRACES__OTEL_PORT -# -otel_port = 8889 - -# The default service name of traces. -# -# Variable: AIRFLOW__TRACES__OTEL_SERVICE -# -otel_service = Airflow - -# If True, all traces are also emitted to the console. Defaults to False. -# -# Variable: AIRFLOW__TRACES__OTEL_DEBUGGING_ON -# -otel_debugging_on = False - -# If True, SSL will be enabled. Defaults to False. -# To establish an HTTPS connection to the OpenTelemetry collector, -# you need to configure the SSL certificate and key within the OpenTelemetry collector's -# config.yml file. -# -# Variable: AIRFLOW__TRACES__OTEL_SSL_ACTIVE -# -otel_ssl_active = False - -# If True, after the task is complete, the full task log messages will be added as the -# span events, chunked by 64k size. defaults to False. -# -# Variable: AIRFLOW__TRACES__OTEL_TASK_LOG_EVENT -# -otel_task_log_event = False - -[secrets] -# Full class name of secrets backend to enable (will precede env vars and metastore in search path) -# -# Example: backend = airflow.providers.amazon.aws.secrets.systems_manager.SystemsManagerParameterStoreBackend -# -# Variable: AIRFLOW__SECRETS__BACKEND -# -backend = - -# The backend_kwargs param is loaded into a dictionary and passed to ``__init__`` -# of secrets backend class. See documentation for the secrets backend you are using. -# JSON is expected. -# -# Example for AWS Systems Manager ParameterStore: -# ``{"connections_prefix": "/airflow/connections", "profile_name": "default"}`` -# -# Variable: AIRFLOW__SECRETS__BACKEND_KWARGS -# -backend_kwargs = - -# .. note:: |experimental| -# -# Enables local caching of Variables, when parsing DAGs only. -# Using this option can make dag parsing faster if Variables are used in top level code, at the expense -# of longer propagation time for changes. -# Please note that this cache concerns only the DAG parsing step. There is no caching in place when DAG -# tasks are run. -# -# Variable: AIRFLOW__SECRETS__USE_CACHE -# -use_cache = False - -# .. note:: |experimental| -# -# When the cache is enabled, this is the duration for which we consider an entry in the cache to be -# valid. Entries are refreshed if they are older than this many seconds. -# It means that when the cache is enabled, this is the maximum amount of time you need to wait to see a -# Variable change take effect. -# -# Variable: AIRFLOW__SECRETS__CACHE_TTL_SECONDS -# -cache_ttl_seconds = 900 - -[cli] -# In what way should the cli access the API. The LocalClient will use the -# database directly, while the json_client will use the api running on the -# webserver -# -# Variable: AIRFLOW__CLI__API_CLIENT -# -api_client = airflow.api.client.local_client - -# If you set web_server_url_prefix, do NOT forget to append it here, ex: -# ``endpoint_url = http://localhost:8080/myroot`` -# So api will look like: ``http://localhost:8080/myroot/api/experimental/...`` -# -# Variable: AIRFLOW__CLI__ENDPOINT_URL -# -endpoint_url = http://localhost:8080 - -[debug] -# Used only with ``DebugExecutor``. If set to ``True`` DAG will fail with first -# failed task. Helpful for debugging purposes. -# -# Variable: AIRFLOW__DEBUG__FAIL_FAST -# -fail_fast = False - -[api] -# Enables the deprecated experimental API. Please note that these API endpoints do not have -# access control. An authenticated user has full access. -# -# .. warning:: -# -# This `Experimental REST API -# `__ is -# deprecated since version 2.0. Please consider using -# `the Stable REST API -# `__. -# For more information on migration, see -# `RELEASE_NOTES.rst `_ -# -# Variable: AIRFLOW__API__ENABLE_EXPERIMENTAL_API -# -enable_experimental_api = False - -# Comma separated list of auth backends to authenticate users of the API. See -# `Security: API -# `__ for possible values. -# ("airflow.api.auth.backend.default" allows all requests for historic reasons) -# -# Variable: AIRFLOW__API__AUTH_BACKENDS -# -auth_backends = airflow.api.auth.backend.session - -# Used to set the maximum page limit for API requests. If limit passed as param -# is greater than maximum page limit, it will be ignored and maximum page limit value -# will be set as the limit -# -# Variable: AIRFLOW__API__MAXIMUM_PAGE_LIMIT -# -maximum_page_limit = 100 - -# Used to set the default page limit when limit param is zero or not provided in API -# requests. Otherwise if positive integer is passed in the API requests as limit, the -# smallest number of user given limit or maximum page limit is taken as limit. -# -# Variable: AIRFLOW__API__FALLBACK_PAGE_LIMIT -# -fallback_page_limit = 100 - -# The intended audience for JWT token credentials used for authorization. This value must match on the client and server sides. If empty, audience will not be tested. -# -# Example: google_oauth2_audience = project-id-random-value.apps.googleusercontent.com -# -# Variable: AIRFLOW__API__GOOGLE_OAUTH2_AUDIENCE -# -google_oauth2_audience = - -# Path to Google Cloud Service Account key file (JSON). If omitted, authorization based on -# `the Application Default Credentials -# `__ will -# be used. -# -# Example: google_key_path = /files/service-account-json -# -# Variable: AIRFLOW__API__GOOGLE_KEY_PATH -# -google_key_path = - -# Used in response to a preflight request to indicate which HTTP -# headers can be used when making the actual request. This header is -# the server side response to the browser's -# Access-Control-Request-Headers header. -# -# Variable: AIRFLOW__API__ACCESS_CONTROL_ALLOW_HEADERS -# -access_control_allow_headers = - -# Specifies the method or methods allowed when accessing the resource. -# -# Variable: AIRFLOW__API__ACCESS_CONTROL_ALLOW_METHODS -# -access_control_allow_methods = - -# Indicates whether the response can be shared with requesting code from the given origins. -# Separate URLs with space. -# -# Variable: AIRFLOW__API__ACCESS_CONTROL_ALLOW_ORIGINS -# -access_control_allow_origins = - -# Indicates whether the **xcomEntries** endpoint supports the **deserialize** -# flag. If set to ``False``, setting this flag in a request would result in a -# 400 Bad Request error. -# -# Variable: AIRFLOW__API__ENABLE_XCOM_DESERIALIZE_SUPPORT -# -enable_xcom_deserialize_support = False - -[lineage] -# what lineage backend to use -# -# Variable: AIRFLOW__LINEAGE__BACKEND -# -backend = - -[operators] -# The default owner assigned to each new operator, unless -# provided explicitly or passed via ``default_args`` -# -# Variable: AIRFLOW__OPERATORS__DEFAULT_OWNER -# -default_owner = airflow - -# The default value of attribute "deferrable" in operators and sensors. -# -# Variable: AIRFLOW__OPERATORS__DEFAULT_DEFERRABLE -# -default_deferrable = false - -# Indicates the default number of CPU units allocated to each operator when no specific CPU request -# is specified in the operator's configuration -# -# Variable: AIRFLOW__OPERATORS__DEFAULT_CPUS -# -default_cpus = 1 - -# Indicates the default number of RAM allocated to each operator when no specific RAM request -# is specified in the operator's configuration -# -# Variable: AIRFLOW__OPERATORS__DEFAULT_RAM -# -default_ram = 512 - -# Indicates the default number of disk storage allocated to each operator when no specific disk request -# is specified in the operator's configuration -# -# Variable: AIRFLOW__OPERATORS__DEFAULT_DISK -# -default_disk = 512 - -# Indicates the default number of GPUs allocated to each operator when no specific GPUs request -# is specified in the operator's configuration -# -# Variable: AIRFLOW__OPERATORS__DEFAULT_GPUS -# -default_gpus = 0 - -# Default queue that tasks get assigned to and that worker listen on. -# -# Variable: AIRFLOW__OPERATORS__DEFAULT_QUEUE -# -default_queue = default - -# Is allowed to pass additional/unused arguments (args, kwargs) to the BaseOperator operator. -# If set to ``False``, an exception will be thrown, -# otherwise only the console message will be displayed. -# -# Variable: AIRFLOW__OPERATORS__ALLOW_ILLEGAL_ARGUMENTS -# -allow_illegal_arguments = False - -[webserver] -# The message displayed when a user attempts to execute actions beyond their authorised privileges. -# -# Variable: AIRFLOW__WEBSERVER__ACCESS_DENIED_MESSAGE -# -access_denied_message = Access is Denied - -# Path of webserver config file used for configuring the webserver parameters -# -# Variable: AIRFLOW__WEBSERVER__CONFIG_FILE -# -config_file = /opt/airflow/webserver_config.py - -# The base url of your website: Airflow cannot guess what domain or CNAME you are using. -# This is used to create links in the Log Url column in the Browse - Task Instances menu, -# as well as in any automated emails sent by Airflow that contain links to your webserver. -# -# Variable: AIRFLOW__WEBSERVER__BASE_URL -# -base_url = http://localhost:8080 - -# Default timezone to display all dates in the UI, can be UTC, system, or -# any IANA timezone string (e.g. **Europe/Amsterdam**). If left empty the -# default value of core/default_timezone will be used -# -# Example: default_ui_timezone = America/New_York -# -# Variable: AIRFLOW__WEBSERVER__DEFAULT_UI_TIMEZONE -# -default_ui_timezone = UTC - -# The ip specified when starting the web server -# -# Variable: AIRFLOW__WEBSERVER__WEB_SERVER_HOST -# -web_server_host = 0.0.0.0 - -# The port on which to run the web server -# -# Variable: AIRFLOW__WEBSERVER__WEB_SERVER_PORT -# -web_server_port = 8080 - -# Paths to the SSL certificate and key for the web server. When both are -# provided SSL will be enabled. This does not change the web server port. -# -# Variable: AIRFLOW__WEBSERVER__WEB_SERVER_SSL_CERT -# -web_server_ssl_cert = - -# Paths to the SSL certificate and key for the web server. When both are -# provided SSL will be enabled. This does not change the web server port. -# -# Variable: AIRFLOW__WEBSERVER__WEB_SERVER_SSL_KEY -# -web_server_ssl_key = - -# The type of backend used to store web session data, can be ``database`` or ``securecookie``. For the -# ``database`` backend, sessions are store in the database and they can be -# managed there (for example when you reset password of the user, all sessions for that user are -# deleted). For the ``securecookie`` backend, sessions are stored in encrypted cookies on the client -# side. The ``securecookie`` mechanism is 'lighter' than database backend, but sessions are not deleted -# when you reset password of the user, which means that other than waiting for expiry time, the only -# way to invalidate all sessions for a user is to change secret_key and restart webserver (which -# also invalidates and logs out all other user's sessions). -# -# When you are using ``database`` backend, make sure to keep your database session table small -# by periodically running ``airflow db clean --table session`` command, especially if you have -# automated API calls that will create a new session for each call rather than reuse the sessions -# stored in browser cookies. -# -# Example: session_backend = securecookie -# -# Variable: AIRFLOW__WEBSERVER__SESSION_BACKEND -# -session_backend = database - -# Number of seconds the webserver waits before killing gunicorn master that doesn't respond -# -# Variable: AIRFLOW__WEBSERVER__WEB_SERVER_MASTER_TIMEOUT -# -web_server_master_timeout = 120 - -# Number of seconds the gunicorn webserver waits before timing out on a worker -# -# Variable: AIRFLOW__WEBSERVER__WEB_SERVER_WORKER_TIMEOUT -# -web_server_worker_timeout = 120 - -# Number of workers to refresh at a time. When set to 0, worker refresh is -# disabled. When nonzero, airflow periodically refreshes webserver workers by -# bringing up new ones and killing old ones. -# -# Variable: AIRFLOW__WEBSERVER__WORKER_REFRESH_BATCH_SIZE -# -worker_refresh_batch_size = 1 - -# Number of seconds to wait before refreshing a batch of workers. -# -# Variable: AIRFLOW__WEBSERVER__WORKER_REFRESH_INTERVAL -# -worker_refresh_interval = 6000 - -# If set to ``True``, Airflow will track files in plugins_folder directory. When it detects changes, -# then reload the gunicorn. If set to ``True``, gunicorn starts without preloading, which is slower, -# uses more memory, and may cause race conditions. Avoid setting this to ``True`` in production. -# -# Variable: AIRFLOW__WEBSERVER__RELOAD_ON_PLUGIN_CHANGE -# -reload_on_plugin_change = False - -# Secret key used to run your flask app. It should be as random as possible. However, when running -# more than 1 instances of webserver, make sure all of them use the same ``secret_key`` otherwise -# one of them will error with "CSRF session token is missing". -# The webserver key is also used to authorize requests to Celery workers when logs are retrieved. -# The token generated using the secret key has a short expiry time though - make sure that time on -# ALL the machines that you run airflow components on is synchronized (for example using ntpd) -# otherwise you might get "forbidden" errors when the logs are accessed. -# -# Variable: AIRFLOW__WEBSERVER__SECRET_KEY -# -secret_key = tCnTbEabdFBDLHWoT/LxLw== - -# Number of workers to run the Gunicorn web server -# -# Variable: AIRFLOW__WEBSERVER__WORKERS -# -workers = 1 - -# The worker class gunicorn should use. Choices include -# ``sync`` (default), ``eventlet``, ``gevent``. -# -# .. warning:: -# -# When using ``gevent`` you might also want to set the ``_AIRFLOW_PATCH_GEVENT`` -# environment variable to ``"1"`` to make sure gevent patching is done as early as possible. -# -# Be careful to set ``_AIRFLOW_PATCH_GEVENT`` only on the web server as gevent patching may -# affect the scheduler behavior via the ``multiprocessing`` sockets module and cause crash. -# -# See related Issues / PRs for more details: -# -# * https://github.com/benoitc/gunicorn/issues/2796 -# * https://github.com/apache/airflow/issues/8212 -# * https://github.com/apache/airflow/pull/28283 -# -# Variable: AIRFLOW__WEBSERVER__WORKER_CLASS -# -worker_class = gevent - -# Log files for the gunicorn webserver. '-' means log to stderr. -# -# Variable: AIRFLOW__WEBSERVER__ACCESS_LOGFILE -# -access_logfile = - - -# Log files for the gunicorn webserver. '-' means log to stderr. -# -# Variable: AIRFLOW__WEBSERVER__ERROR_LOGFILE -# -error_logfile = - - -# Access log format for gunicorn webserver. -# default format is ``%%(h)s %%(l)s %%(u)s %%(t)s "%%(r)s" %%(s)s %%(b)s "%%(f)s" "%%(a)s"`` -# See `Gunicorn Settings: 'access_log_format' Reference -# `__ for more details -# -# Variable: AIRFLOW__WEBSERVER__ACCESS_LOGFORMAT -# -access_logformat = - -# Expose the configuration file in the web server. Set to ``non-sensitive-only`` to show all values -# except those that have security implications. ``True`` shows all values. ``False`` hides the -# configuration completely. -# -# Variable: AIRFLOW__WEBSERVER__EXPOSE_CONFIG -# -expose_config = False - -# Expose hostname in the web server -# -# Variable: AIRFLOW__WEBSERVER__EXPOSE_HOSTNAME -# -expose_hostname = False - -# Expose stacktrace in the web server -# -# Variable: AIRFLOW__WEBSERVER__EXPOSE_STACKTRACE -# -expose_stacktrace = False - -# Default DAG view. Valid values are: ``grid``, ``graph``, ``duration``, ``gantt``, ``landing_times`` -# -# Variable: AIRFLOW__WEBSERVER__DAG_DEFAULT_VIEW -# -dag_default_view = grid - -# Default DAG orientation. Valid values are: -# ``LR`` (Left->Right), ``TB`` (Top->Bottom), ``RL`` (Right->Left), ``BT`` (Bottom->Top) -# -# Variable: AIRFLOW__WEBSERVER__DAG_ORIENTATION -# -dag_orientation = LR - -# Sorting order in grid view. Valid values are: ``topological``, ``hierarchical_alphabetical`` -# -# Variable: AIRFLOW__WEBSERVER__GRID_VIEW_SORTING_ORDER -# -grid_view_sorting_order = topological - -# The amount of time (in secs) webserver will wait for initial handshake -# while fetching logs from other worker machine -# -# Variable: AIRFLOW__WEBSERVER__LOG_FETCH_TIMEOUT_SEC -# -log_fetch_timeout_sec = 10 - -# Time interval (in secs) to wait before next log fetching. -# -# Variable: AIRFLOW__WEBSERVER__LOG_FETCH_DELAY_SEC -# -log_fetch_delay_sec = 5 - -# Distance away from page bottom to enable auto tailing. -# -# Variable: AIRFLOW__WEBSERVER__LOG_AUTO_TAILING_OFFSET -# -log_auto_tailing_offset = 30 - -# Animation speed for auto tailing log display. -# -# Variable: AIRFLOW__WEBSERVER__LOG_ANIMATION_SPEED -# -log_animation_speed = 1000 - -# By default, the webserver shows paused DAGs. Flip this to hide paused -# DAGs by default -# -# Variable: AIRFLOW__WEBSERVER__HIDE_PAUSED_DAGS_BY_DEFAULT -# -hide_paused_dags_by_default = False - -# Consistent page size across all listing views in the UI -# -# Variable: AIRFLOW__WEBSERVER__PAGE_SIZE -# -page_size = 100 - -# Define the color of navigation bar -# -# Variable: AIRFLOW__WEBSERVER__NAVBAR_COLOR -# -navbar_color = #fff - -# Define the color of text in the navigation bar -# -# Variable: AIRFLOW__WEBSERVER__NAVBAR_TEXT_COLOR -# -navbar_text_color = #51504f - -# Define the color of navigation bar links when hovered -# -# Variable: AIRFLOW__WEBSERVER__NAVBAR_HOVER_COLOR -# -navbar_hover_color = #eee - -# Define the color of text in the navigation bar when hovered -# -# Variable: AIRFLOW__WEBSERVER__NAVBAR_TEXT_HOVER_COLOR -# -navbar_text_hover_color = #51504f - -# Define the color of the logo text -# -# Variable: AIRFLOW__WEBSERVER__NAVBAR_LOGO_TEXT_COLOR -# -navbar_logo_text_color = #51504f - -# Default dagrun to show in UI -# -# Variable: AIRFLOW__WEBSERVER__DEFAULT_DAG_RUN_DISPLAY_NUMBER -# -default_dag_run_display_number = 25 - -# Enable werkzeug ``ProxyFix`` middleware for reverse proxy -# -# Variable: AIRFLOW__WEBSERVER__ENABLE_PROXY_FIX -# -enable_proxy_fix = True - -# Number of values to trust for ``X-Forwarded-For``. -# See `Werkzeug: X-Forwarded-For Proxy Fix -# `__ for more details. -# -# Variable: AIRFLOW__WEBSERVER__PROXY_FIX_X_FOR -# -proxy_fix_x_for = 1 - -# Number of values to trust for ``X-Forwarded-Proto``. -# See `Werkzeug: X-Forwarded-For Proxy Fix -# `__ for more details. -# -# Variable: AIRFLOW__WEBSERVER__PROXY_FIX_X_PROTO -# -proxy_fix_x_proto = 1 - -# Number of values to trust for ``X-Forwarded-Host``. -# See `Werkzeug: X-Forwarded-For Proxy Fix -# `__ for more details. -# -# Variable: AIRFLOW__WEBSERVER__PROXY_FIX_X_HOST -# -proxy_fix_x_host = 1 - -# Number of values to trust for ``X-Forwarded-Port``. -# See `Werkzeug: X-Forwarded-For Proxy Fix -# `__ for more details. -# -# Variable: AIRFLOW__WEBSERVER__PROXY_FIX_X_PORT -# -proxy_fix_x_port = 1 - -# Number of values to trust for ``X-Forwarded-Prefix``. -# See `Werkzeug: X-Forwarded-For Proxy Fix -# `__ for more details. -# -# Variable: AIRFLOW__WEBSERVER__PROXY_FIX_X_PREFIX -# -proxy_fix_x_prefix = 1 - -# Set secure flag on session cookie -# -# Variable: AIRFLOW__WEBSERVER__COOKIE_SECURE -# -cookie_secure = False - -# Set samesite policy on session cookie -# -# Variable: AIRFLOW__WEBSERVER__COOKIE_SAMESITE -# -cookie_samesite = Lax - -# Default setting for wrap toggle on DAG code and TI log views. -# -# Variable: AIRFLOW__WEBSERVER__DEFAULT_WRAP -# -default_wrap = False - -# Allow the UI to be rendered in a frame -# -# Variable: AIRFLOW__WEBSERVER__X_FRAME_ENABLED -# -x_frame_enabled = True - -# Send anonymous user activity to your analytics tool -# choose from ``google_analytics``, ``segment``, ``metarouter``, or ``matomo`` -# -# Variable: AIRFLOW__WEBSERVER__ANALYTICS_TOOL -# -# analytics_tool = - -# Unique ID of your account in the analytics tool -# -# Variable: AIRFLOW__WEBSERVER__ANALYTICS_ID -# -# analytics_id = - -# Your instances url, only applicable to Matomo. -# -# Example: analytics_url = https://your.matomo.instance.com/ -# -# Variable: AIRFLOW__WEBSERVER__ANALYTICS_URL -# -# analytics_url = - -# 'Recent Tasks' stats will show for old DagRuns if set -# -# Variable: AIRFLOW__WEBSERVER__SHOW_RECENT_STATS_FOR_COMPLETED_RUNS -# -show_recent_stats_for_completed_runs = True - -# The UI cookie lifetime in minutes. User will be logged out from UI after -# ``[webserver] session_lifetime_minutes`` of non-activity -# -# Variable: AIRFLOW__WEBSERVER__SESSION_LIFETIME_MINUTES -# -session_lifetime_minutes = 43200 - -# Sets a custom page title for the DAGs overview page and site title for all pages -# -# Variable: AIRFLOW__WEBSERVER__INSTANCE_NAME -# -# instance_name = - -# Whether the custom page title for the DAGs overview page contains any Markup language -# -# Variable: AIRFLOW__WEBSERVER__INSTANCE_NAME_HAS_MARKUP -# -instance_name_has_markup = False - -# How frequently, in seconds, the DAG data will auto-refresh in graph or grid view -# when auto-refresh is turned on -# -# Variable: AIRFLOW__WEBSERVER__AUTO_REFRESH_INTERVAL -# -auto_refresh_interval = 3 - -# Boolean for displaying warning for publicly viewable deployment -# -# Variable: AIRFLOW__WEBSERVER__WARN_DEPLOYMENT_EXPOSURE -# -warn_deployment_exposure = True - -# Comma separated string of view events to exclude from dag audit view. -# All other events will be added minus the ones passed here. -# The audit logs in the db will not be affected by this parameter. -# -# Example: audit_view_excluded_events = cli_task_run,running,success -# -# Variable: AIRFLOW__WEBSERVER__AUDIT_VIEW_EXCLUDED_EVENTS -# -# audit_view_excluded_events = - -# Comma separated string of view events to include in dag audit view. -# If passed, only these events will populate the dag audit view. -# The audit logs in the db will not be affected by this parameter. -# -# Example: audit_view_included_events = dagrun_cleared,failed -# -# Variable: AIRFLOW__WEBSERVER__AUDIT_VIEW_INCLUDED_EVENTS -# -# audit_view_included_events = - -# Boolean for running SwaggerUI in the webserver. -# -# Variable: AIRFLOW__WEBSERVER__ENABLE_SWAGGER_UI -# -enable_swagger_ui = True - -# Boolean for running Internal API in the webserver. -# -# Variable: AIRFLOW__WEBSERVER__RUN_INTERNAL_API -# -run_internal_api = False - -# The caching algorithm used by the webserver. Must be a valid hashlib function name. -# -# Example: caching_hash_method = sha256 -# -# Variable: AIRFLOW__WEBSERVER__CACHING_HASH_METHOD -# -caching_hash_method = md5 - -# Behavior of the trigger DAG run button for DAGs without params. ``False`` to skip and trigger -# without displaying a form to add a **dag_run.conf**, ``True`` to always display the form. -# The form is displayed always if parameters are defined. -# -# Variable: AIRFLOW__WEBSERVER__SHOW_TRIGGER_FORM_IF_NO_PARAMS -# -show_trigger_form_if_no_params = False - -# Number of recent DAG run configurations in the selector on the trigger web form. -# -# Example: num_recent_configurations_for_trigger = 10 -# -# Variable: AIRFLOW__WEBSERVER__NUM_RECENT_CONFIGURATIONS_FOR_TRIGGER -# -num_recent_configurations_for_trigger = 5 - -# A DAG author is able to provide any raw HTML into ``doc_md`` or params description in -# ``description_md`` for text formatting. This is including potentially unsafe javascript. -# Displaying the DAG or trigger form in web UI provides the DAG author the potential to -# inject malicious code into clients browsers. To ensure the web UI is safe by default, -# raw HTML is disabled by default. If you trust your DAG authors, you can enable HTML -# support in markdown by setting this option to ``True``. -# -# This parameter also enables the deprecated fields ``description_html`` and -# ``custom_html_form`` in DAG params until the feature is removed in a future version. -# -# Example: allow_raw_html_descriptions = False -# -# Variable: AIRFLOW__WEBSERVER__ALLOW_RAW_HTML_DESCRIPTIONS -# -allow_raw_html_descriptions = False - -# The maximum size of the request payload (in MB) that can be sent. -# -# Variable: AIRFLOW__WEBSERVER__ALLOWED_PAYLOAD_SIZE -# -allowed_payload_size = 1.0 - -# Require confirmation when changing a DAG in the web UI. This is to prevent accidental changes -# to a DAG that may be running on sensitive environments like production. -# When set to ``True``, confirmation dialog will be shown when a user tries to Pause/Unpause, -# Trigger a DAG -# -# Variable: AIRFLOW__WEBSERVER__REQUIRE_CONFIRMATION_DAG_CHANGE -# -require_confirmation_dag_change = False - -# The maximum size in bytes any non-file form field may be in a multipart/form-data body. -# If this limit is exceeded, a 413 RequestEntityTooLarge error is raised by webserver. -# -# Variable: AIRFLOW__WEBSERVER__MAX_FORM_MEMORY_SIZE -# -max_form_memory_size = 500000 - -# The maximum number of fields that may be present in a multipart/form-data body. -# If this limit is exceeded, a 413 RequestEntityTooLarge error is raised by webserver. -# -# Variable: AIRFLOW__WEBSERVER__MAX_FORM_PARTS -# -max_form_parts = 1000 - -[email] -# Configuration email backend and whether to -# send email alerts on retry or failure - -# Email backend to use -# -# Variable: AIRFLOW__EMAIL__EMAIL_BACKEND -# -email_backend = airflow.utils.email.send_email_smtp - -# Email connection to use -# -# Variable: AIRFLOW__EMAIL__EMAIL_CONN_ID -# -email_conn_id = smtp_default - -# Whether email alerts should be sent when a task is retried -# -# Variable: AIRFLOW__EMAIL__DEFAULT_EMAIL_ON_RETRY -# -default_email_on_retry = True - -# Whether email alerts should be sent when a task failed -# -# Variable: AIRFLOW__EMAIL__DEFAULT_EMAIL_ON_FAILURE -# -default_email_on_failure = True - -# File that will be used as the template for Email subject (which will be rendered using Jinja2). -# If not set, Airflow uses a base template. -# -# Example: subject_template = /path/to/my_subject_template_file -# -# Variable: AIRFLOW__EMAIL__SUBJECT_TEMPLATE -# -# subject_template = - -# File that will be used as the template for Email content (which will be rendered using Jinja2). -# If not set, Airflow uses a base template. -# -# Example: html_content_template = /path/to/my_html_content_template_file -# -# Variable: AIRFLOW__EMAIL__HTML_CONTENT_TEMPLATE -# -# html_content_template = - -# Email address that will be used as sender address. -# It can either be raw email or the complete address in a format ``Sender Name `` -# -# Example: from_email = Airflow -# -# Variable: AIRFLOW__EMAIL__FROM_EMAIL -# -# from_email = - -# ssl context to use when using SMTP and IMAP SSL connections. By default, the context is "default" -# which sets it to ``ssl.create_default_context()`` which provides the right balance between -# compatibility and security, it however requires that certificates in your operating system are -# updated and that SMTP/IMAP servers of yours have valid certificates that have corresponding public -# keys installed on your machines. You can switch it to "none" if you want to disable checking -# of the certificates, but it is not recommended as it allows MITM (man-in-the-middle) attacks -# if your infrastructure is not sufficiently secured. It should only be set temporarily while you -# are fixing your certificate configuration. This can be typically done by upgrading to newer -# version of the operating system you run Airflow components on,by upgrading/refreshing proper -# certificates in the OS or by updating certificates for your mail servers. -# -# Example: ssl_context = default -# -# Variable: AIRFLOW__EMAIL__SSL_CONTEXT -# -ssl_context = default - -[smtp] -# If you want airflow to send emails on retries, failure, and you want to use -# the airflow.utils.email.send_email_smtp function, you have to configure an -# smtp server here - -# Specifies the host server address used by Airflow when sending out email notifications via SMTP. -# -# Variable: AIRFLOW__SMTP__SMTP_HOST -# -smtp_host = localhost - -# Determines whether to use the STARTTLS command when connecting to the SMTP server. -# -# Variable: AIRFLOW__SMTP__SMTP_STARTTLS -# -smtp_starttls = True - -# Determines whether to use an SSL connection when talking to the SMTP server. -# -# Variable: AIRFLOW__SMTP__SMTP_SSL -# -smtp_ssl = False - -# Username to authenticate when connecting to smtp server. -# -# Example: smtp_user = airflow -# -# Variable: AIRFLOW__SMTP__SMTP_USER -# -# smtp_user = - -# Password to authenticate when connecting to smtp server. -# -# Example: smtp_password = airflow -# -# Variable: AIRFLOW__SMTP__SMTP_PASSWORD -# -# smtp_password = - -# Defines the port number on which Airflow connects to the SMTP server to send email notifications. -# -# Variable: AIRFLOW__SMTP__SMTP_PORT -# -smtp_port = 25 - -# Specifies the default **from** email address used when Airflow sends email notifications. -# -# Variable: AIRFLOW__SMTP__SMTP_MAIL_FROM -# -smtp_mail_from = airflow@example.com - -# Determines the maximum time (in seconds) the Apache Airflow system will wait for a -# connection to the SMTP server to be established. -# -# Variable: AIRFLOW__SMTP__SMTP_TIMEOUT -# -smtp_timeout = 30 - -# Defines the maximum number of times Airflow will attempt to connect to the SMTP server. -# -# Variable: AIRFLOW__SMTP__SMTP_RETRY_LIMIT -# -smtp_retry_limit = 5 - -[sentry] -# `Sentry `__ integration. Here you can supply -# additional configuration options based on the Python platform. -# See `Python / Configuration / Basic Options -# `__ for more details. -# Unsupported options: ``integrations``, ``in_app_include``, ``in_app_exclude``, -# ``ignore_errors``, ``before_breadcrumb``, ``transport``. - -# Enable error reporting to Sentry -# -# Variable: AIRFLOW__SENTRY__SENTRY_ON -# -sentry_on = false - -# -# Variable: AIRFLOW__SENTRY__SENTRY_DSN -# -sentry_dsn = - -# Dotted path to a before_send function that the sentry SDK should be configured to use. -# -# Variable: AIRFLOW__SENTRY__BEFORE_SEND -# -# before_send = - -[scheduler] -# Task instances listen for external kill signal (when you clear tasks -# from the CLI or the UI), this defines the frequency at which they should -# listen (in seconds). -# -# Variable: AIRFLOW__SCHEDULER__JOB_HEARTBEAT_SEC -# -job_heartbeat_sec = 5 - -# The scheduler constantly tries to trigger new tasks (look at the -# scheduler section in the docs for more information). This defines -# how often the scheduler should run (in seconds). -# -# Variable: AIRFLOW__SCHEDULER__SCHEDULER_HEARTBEAT_SEC -# -scheduler_heartbeat_sec = 5 - -# The frequency (in seconds) at which the LocalTaskJob should send heartbeat signals to the -# scheduler to notify it's still alive. If this value is set to 0, the heartbeat interval will default -# to the value of ``[scheduler] scheduler_zombie_task_threshold``. -# -# Variable: AIRFLOW__SCHEDULER__LOCAL_TASK_JOB_HEARTBEAT_SEC -# -local_task_job_heartbeat_sec = 0 - -# The number of times to try to schedule each DAG file -# -1 indicates unlimited number -# -# Variable: AIRFLOW__SCHEDULER__NUM_RUNS -# -num_runs = -1 - -# Controls how long the scheduler will sleep between loops, but if there was nothing to do -# in the loop. i.e. if it scheduled something then it will start the next loop -# iteration straight away. -# -# Variable: AIRFLOW__SCHEDULER__SCHEDULER_IDLE_SLEEP_TIME -# -scheduler_idle_sleep_time = 1 - -# Number of seconds after which a DAG file is parsed. The DAG file is parsed every -# ``[scheduler] min_file_process_interval`` number of seconds. Updates to DAGs are reflected after -# this interval. Keeping this number low will increase CPU usage. -# -# Variable: AIRFLOW__SCHEDULER__MIN_FILE_PROCESS_INTERVAL -# -min_file_process_interval = 60 - -# How often (in seconds) to check for stale DAGs (DAGs which are no longer present in -# the expected files) which should be deactivated, as well as datasets that are no longer -# referenced and should be marked as orphaned. -# -# Variable: AIRFLOW__SCHEDULER__PARSING_CLEANUP_INTERVAL -# -parsing_cleanup_interval = 60 - -# How long (in seconds) to wait after we have re-parsed a DAG file before deactivating stale -# DAGs (DAGs which are no longer present in the expected files). The reason why we need -# this threshold is to account for the time between when the file is parsed and when the -# DAG is loaded. The absolute maximum that this could take is ``[core] dag_file_processor_timeout``, -# but when you have a long timeout configured, it results in a significant delay in the -# deactivation of stale dags. -# -# Variable: AIRFLOW__SCHEDULER__STALE_DAG_THRESHOLD -# -stale_dag_threshold = 50 - -# How often (in seconds) to scan the DAGs directory for new files. Default to 5 minutes. -# -# Variable: AIRFLOW__SCHEDULER__DAG_DIR_LIST_INTERVAL -# -dag_dir_list_interval = 600 - -# How often should stats be printed to the logs. Setting to 0 will disable printing stats -# -# Variable: AIRFLOW__SCHEDULER__PRINT_STATS_INTERVAL -# -print_stats_interval = 30 - -# How often (in seconds) should pool usage stats be sent to StatsD (if statsd_on is enabled) -# -# Variable: AIRFLOW__SCHEDULER__POOL_METRICS_INTERVAL -# -pool_metrics_interval = 5.0 - -# If the last scheduler heartbeat happened more than ``[scheduler] scheduler_health_check_threshold`` -# ago (in seconds), scheduler is considered unhealthy. -# This is used by the health check in the **/health** endpoint and in ``airflow jobs check`` CLI -# for SchedulerJob. -# -# Variable: AIRFLOW__SCHEDULER__SCHEDULER_HEALTH_CHECK_THRESHOLD -# -scheduler_health_check_threshold = 30 - -# When you start a scheduler, airflow starts a tiny web server -# subprocess to serve a health check if this is set to ``True`` -# -# Variable: AIRFLOW__SCHEDULER__ENABLE_HEALTH_CHECK -# -enable_health_check = True - -# When you start a scheduler, airflow starts a tiny web server -# subprocess to serve a health check on this host -# -# Variable: AIRFLOW__SCHEDULER__SCHEDULER_HEALTH_CHECK_SERVER_HOST -# -scheduler_health_check_server_host = 0.0.0.0 - -# When you start a scheduler, airflow starts a tiny web server -# subprocess to serve a health check on this port -# -# Variable: AIRFLOW__SCHEDULER__SCHEDULER_HEALTH_CHECK_SERVER_PORT -# -scheduler_health_check_server_port = 8974 - -# How often (in seconds) should the scheduler check for orphaned tasks and SchedulerJobs -# -# Variable: AIRFLOW__SCHEDULER__ORPHANED_TASKS_CHECK_INTERVAL -# -orphaned_tasks_check_interval = 300.0 - -# Determines the directory where logs for the child processes of the scheduler will be stored -# -# Variable: AIRFLOW__SCHEDULER__CHILD_PROCESS_LOG_DIRECTORY -# -child_process_log_directory = /opt/airflow/logs/scheduler - -# Local task jobs periodically heartbeat to the DB. If the job has -# not heartbeat in this many seconds, the scheduler will mark the -# associated task instance as failed and will re-schedule the task. -# -# Variable: AIRFLOW__SCHEDULER__SCHEDULER_ZOMBIE_TASK_THRESHOLD -# -scheduler_zombie_task_threshold = 300 - -# How often (in seconds) should the scheduler check for zombie tasks. -# -# Variable: AIRFLOW__SCHEDULER__ZOMBIE_DETECTION_INTERVAL -# -zombie_detection_interval = 10.0 - -# Turn off scheduler catchup by setting this to ``False``. -# Default behavior is unchanged and -# Command Line Backfills still work, but the scheduler -# will not do scheduler catchup if this is ``False``, -# however it can be set on a per DAG basis in the -# DAG definition (catchup) -# -# Variable: AIRFLOW__SCHEDULER__CATCHUP_BY_DEFAULT -# -catchup_by_default = True - -# Setting this to ``True`` will make first task instance of a task -# ignore depends_on_past setting. A task instance will be considered -# as the first task instance of a task when there is no task instance -# in the DB with an execution_date earlier than it., i.e. no manual marking -# success will be needed for a newly added task to be scheduled. -# -# Variable: AIRFLOW__SCHEDULER__IGNORE_FIRST_DEPENDS_ON_PAST_BY_DEFAULT -# -ignore_first_depends_on_past_by_default = True - -# This changes the batch size of queries in the scheduling main loop. -# This should not be greater than ``[core] parallelism``. -# If this is too high, SQL query performance may be impacted by -# complexity of query predicate, and/or excessive locking. -# Additionally, you may hit the maximum allowable query length for your db. -# Set this to 0 to use the value of ``[core] parallelism`` -# -# Variable: AIRFLOW__SCHEDULER__MAX_TIS_PER_QUERY -# -max_tis_per_query = 16 - -# Should the scheduler issue ``SELECT ... FOR UPDATE`` in relevant queries. -# If this is set to ``False`` then you should not run more than a single -# scheduler at once -# -# Variable: AIRFLOW__SCHEDULER__USE_ROW_LEVEL_LOCKING -# -use_row_level_locking = True - -# Max number of DAGs to create DagRuns for per scheduler loop. -# -# Variable: AIRFLOW__SCHEDULER__MAX_DAGRUNS_TO_CREATE_PER_LOOP -# -max_dagruns_to_create_per_loop = 10 - -# How many DagRuns should a scheduler examine (and lock) when scheduling -# and queuing tasks. -# -# Variable: AIRFLOW__SCHEDULER__MAX_DAGRUNS_PER_LOOP_TO_SCHEDULE -# -max_dagruns_per_loop_to_schedule = 20 - -# Should the Task supervisor process perform a "mini scheduler" to attempt to schedule more tasks of the -# same DAG. Leaving this on will mean tasks in the same DAG execute quicker, but might starve out other -# dags in some circumstances -# -# Variable: AIRFLOW__SCHEDULER__SCHEDULE_AFTER_TASK_EXECUTION -# -schedule_after_task_execution = True - -# The scheduler reads dag files to extract the airflow modules that are going to be used, -# and imports them ahead of time to avoid having to re-do it for each parsing process. -# This flag can be set to ``False`` to disable this behavior in case an airflow module needs -# to be freshly imported each time (at the cost of increased DAG parsing time). -# -# Variable: AIRFLOW__SCHEDULER__PARSING_PRE_IMPORT_MODULES -# -parsing_pre_import_modules = True - -# The scheduler can run multiple processes in parallel to parse dags. -# This defines how many processes will run. -# -# Variable: AIRFLOW__SCHEDULER__PARSING_PROCESSES -# -parsing_processes = 2 - -# One of ``modified_time``, ``random_seeded_by_host`` and ``alphabetical``. -# The scheduler will list and sort the dag files to decide the parsing order. -# -# * ``modified_time``: Sort by modified time of the files. This is useful on large scale to parse the -# recently modified DAGs first. -# * ``random_seeded_by_host``: Sort randomly across multiple Schedulers but with same order on the -# same host. This is useful when running with Scheduler in HA mode where each scheduler can -# parse different DAG files. -# * ``alphabetical``: Sort by filename -# -# Variable: AIRFLOW__SCHEDULER__FILE_PARSING_SORT_MODE -# -file_parsing_sort_mode = modified_time - -# Whether the dag processor is running as a standalone process or it is a subprocess of a scheduler -# job. -# -# Variable: AIRFLOW__SCHEDULER__STANDALONE_DAG_PROCESSOR -# -standalone_dag_processor = False - -# Only applicable if ``[scheduler] standalone_dag_processor`` is true and callbacks are stored -# in database. Contains maximum number of callbacks that are fetched during a single loop. -# -# Variable: AIRFLOW__SCHEDULER__MAX_CALLBACKS_PER_LOOP -# -max_callbacks_per_loop = 20 - -# Only applicable if ``[scheduler] standalone_dag_processor`` is true. -# Time in seconds after which dags, which were not updated by Dag Processor are deactivated. -# -# Variable: AIRFLOW__SCHEDULER__DAG_STALE_NOT_SEEN_DURATION -# -dag_stale_not_seen_duration = 600 - -# Turn off scheduler use of cron intervals by setting this to ``False``. -# DAGs submitted manually in the web UI or with trigger_dag will still run. -# -# Variable: AIRFLOW__SCHEDULER__USE_JOB_SCHEDULE -# -use_job_schedule = True - -# Allow externally triggered DagRuns for Execution Dates in the future -# Only has effect if schedule_interval is set to None in DAG -# -# Variable: AIRFLOW__SCHEDULER__ALLOW_TRIGGER_IN_FUTURE -# -allow_trigger_in_future = False - -# How often to check for expired trigger requests that have not run yet. -# -# Variable: AIRFLOW__SCHEDULER__TRIGGER_TIMEOUT_CHECK_INTERVAL -# -trigger_timeout_check_interval = 15 - -# Amount of time a task can be in the queued state before being retried or set to failed. -# -# Variable: AIRFLOW__SCHEDULER__TASK_QUEUED_TIMEOUT -# -task_queued_timeout = 300.0 - -# How often to check for tasks that have been in the queued state for -# longer than ``[scheduler] task_queued_timeout``. -# -# Variable: AIRFLOW__SCHEDULER__TASK_QUEUED_TIMEOUT_CHECK_INTERVAL -# -task_queued_timeout_check_interval = 120.0 - -# The run_id pattern used to verify the validity of user input to the run_id parameter when -# triggering a DAG. This pattern cannot change the pattern used by scheduler to generate run_id -# for scheduled DAG runs or DAG runs triggered without changing the run_id parameter. -# -# Variable: AIRFLOW__SCHEDULER__ALLOWED_RUN_ID_PATTERN -# -allowed_run_id_pattern = ^[A-Za-z0-9_.~:+-]+$ - -# Whether to create DAG runs that span an interval or one single point in time for cron schedules, when -# a cron string is provided to ``schedule`` argument of a DAG. -# -# * ``True``: **CronDataIntervalTimetable** is used, which is suitable -# for DAGs with well-defined data interval. You get contiguous intervals from the end of the previous -# interval up to the scheduled datetime. -# * ``False``: **CronTriggerTimetable** is used, which is closer to the behavior of cron itself. -# -# Notably, for **CronTriggerTimetable**, the logical date is the same as the time the DAG Run will -# try to schedule, while for **CronDataIntervalTimetable**, the logical date is the beginning of -# the data interval, but the DAG Run will try to schedule at the end of the data interval. -# -# Variable: AIRFLOW__SCHEDULER__CREATE_CRON_DATA_INTERVALS -# -create_cron_data_intervals = True - -[triggerer] -# How many triggers a single Triggerer will run at once, by default. -# -# Variable: AIRFLOW__TRIGGERER__DEFAULT_CAPACITY -# -default_capacity = 1000 - -# How often to heartbeat the Triggerer job to ensure it hasn't been killed. -# -# Variable: AIRFLOW__TRIGGERER__JOB_HEARTBEAT_SEC -# -job_heartbeat_sec = 5 - -# If the last triggerer heartbeat happened more than ``[triggerer] triggerer_health_check_threshold`` -# ago (in seconds), triggerer is considered unhealthy. -# This is used by the health check in the **/health** endpoint and in ``airflow jobs check`` CLI -# for TriggererJob. -# -# Variable: AIRFLOW__TRIGGERER__TRIGGERER_HEALTH_CHECK_THRESHOLD -# -triggerer_health_check_threshold = 30 - -[kerberos] -# Location of your ccache file once kinit has been performed. -# -# Variable: AIRFLOW__KERBEROS__CCACHE -# -ccache = /tmp/airflow_krb5_ccache - -# gets augmented with fqdn -# -# Variable: AIRFLOW__KERBEROS__PRINCIPAL -# -principal = airflow - -# Determines the frequency at which initialization or re-initialization processes occur. -# -# Variable: AIRFLOW__KERBEROS__REINIT_FREQUENCY -# -reinit_frequency = 3600 - -# Path to the kinit executable -# -# Variable: AIRFLOW__KERBEROS__KINIT_PATH -# -kinit_path = kinit - -# Designates the path to the Kerberos keytab file for the Airflow user -# -# Variable: AIRFLOW__KERBEROS__KEYTAB -# -keytab = airflow.keytab - -# Allow to disable ticket forwardability. -# -# Variable: AIRFLOW__KERBEROS__FORWARDABLE -# -forwardable = True - -# Allow to remove source IP from token, useful when using token behind NATted Docker host. -# -# Variable: AIRFLOW__KERBEROS__INCLUDE_IP -# -include_ip = True - -[sensors] -# Sensor default timeout, 7 days by default (7 * 24 * 60 * 60). -# -# Variable: AIRFLOW__SENSORS__DEFAULT_TIMEOUT -# -default_timeout = 604800 - -[aws] -# This section contains settings for Amazon Web Services (AWS) integration. - -# session_factory = -cloudwatch_task_handler_json_serializer = airflow.providers.amazon.aws.log.cloudwatch_task_handler.json_serialize_legacy - -[aws_batch_executor] -# This section only applies if you are using the AwsBatchExecutor in -# Airflow's ``[core]`` configuration. -# For more information on any of these execution parameters, see the link below: -# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/batch.html#Batch.Client.submit_job -# For boto3 credential management, see -# https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html - -conn_id = aws_default -# region_name = -max_submit_job_attempts = 3 -check_health_on_startup = True -# job_name = -# job_queue = -# job_definition = -# submit_job_kwargs = - -[aws_ecs_executor] -# This section only applies if you are using the AwsEcsExecutor in -# Airflow's ``[core]`` configuration. -# For more information on any of these execution parameters, see the link below: -# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ecs/client/run_task.html -# For boto3 credential management, see -# https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html - -conn_id = aws_default -# region_name = -assign_public_ip = False -# cluster = -# capacity_provider_strategy = -# container_name = -# launch_type = -platform_version = LATEST -# security_groups = -# subnets = -# task_definition = -max_run_task_attempts = 3 -# run_task_kwargs = -check_health_on_startup = True - -[aws_auth_manager] -# This section only applies if you are using the AwsAuthManager. In other words, if you set -# ``[core] auth_manager = airflow.providers.amazon.aws.auth_manager.aws_auth_manager.AwsAuthManager`` in -# Airflow's configuration. - -enable = False -conn_id = aws_default -# region_name = -# saml_metadata_url = -# avp_policy_store_id = - -[celery_kubernetes_executor] -# This section only applies if you are using the ``CeleryKubernetesExecutor`` in -# ``[core]`` section above - -# Define when to send a task to ``KubernetesExecutor`` when using ``CeleryKubernetesExecutor``. -# When the queue of a task is the value of ``kubernetes_queue`` (default ``kubernetes``), -# the task is executed via ``KubernetesExecutor``, -# otherwise via ``CeleryExecutor`` -# -# Variable: AIRFLOW__CELERY_KUBERNETES_EXECUTOR__KUBERNETES_QUEUE -# -kubernetes_queue = kubernetes - -[celery] -# This section only applies if you are using the CeleryExecutor in -# ``[core]`` section above - -# The app name that will be used by celery -# -# Variable: AIRFLOW__CELERY__CELERY_APP_NAME -# -celery_app_name = airflow.providers.celery.executors.celery_executor - -# The concurrency that will be used when starting workers with the -# ``airflow celery worker`` command. This defines the number of task instances that -# a worker will take, so size up your workers based on the resources on -# your worker box and the nature of your tasks -# -# Variable: AIRFLOW__CELERY__WORKER_CONCURRENCY -# -worker_concurrency = 32 - -# The maximum and minimum number of pool processes that will be used to dynamically resize -# the pool based on load.Enable autoscaling by providing max_concurrency,min_concurrency -# with the ``airflow celery worker`` command (always keep minimum processes, -# but grow to maximum if necessary). -# Pick these numbers based on resources on worker box and the nature of the task. -# If autoscale option is available, worker_concurrency will be ignored. -# https://docs.celeryq.dev/en/latest/reference/celery.bin.worker.html#cmdoption-celery-worker-autoscale -# -# Example: worker_autoscale = 16,12 -# -# Variable: AIRFLOW__CELERY__WORKER_AUTOSCALE -# -# worker_autoscale = - -# Used to increase the number of tasks that a worker prefetches which can improve performance. -# The number of processes multiplied by worker_prefetch_multiplier is the number of tasks -# that are prefetched by a worker. A value greater than 1 can result in tasks being unnecessarily -# blocked if there are multiple workers and one worker prefetches tasks that sit behind long -# running tasks while another worker has unutilized processes that are unable to process the already -# claimed blocked tasks. -# https://docs.celeryq.dev/en/stable/userguide/optimizing.html#prefetch-limits -# -# Variable: AIRFLOW__CELERY__WORKER_PREFETCH_MULTIPLIER -# -worker_prefetch_multiplier = 2 - -# Specify if remote control of the workers is enabled. -# In some cases when the broker does not support remote control, Celery creates lots of -# ``.*reply-celery-pidbox`` queues. You can prevent this by setting this to false. -# However, with this disabled Flower won't work. -# https://docs.celeryq.dev/en/stable/getting-started/backends-and-brokers/index.html#broker-overview -# -# Variable: AIRFLOW__CELERY__WORKER_ENABLE_REMOTE_CONTROL -# -worker_enable_remote_control = true - -# The Celery broker URL. Celery supports RabbitMQ, Redis and experimentally -# a sqlalchemy database. Refer to the Celery documentation for more information. -# -# Variable: AIRFLOW__CELERY__BROKER_URL -# -# This will be configured via environment variables, as it differs between master and workers. -# broker_url = - -# The Celery result_backend. When a job finishes, it needs to update the -# metadata of the job. Therefore it will post a message on a message bus, -# or insert it into a database (depending of the backend) -# This status is used by the scheduler to update the state of the task -# The use of a database is highly recommended -# When not specified, sql_alchemy_conn with a db+ scheme prefix will be used -# https://docs.celeryq.dev/en/latest/userguide/configuration.html#task-result-backend-settings -# -# Example: result_backend = db+postgresql://postgres:airflow@postgres/airflow -# -# Variable: AIRFLOW__CELERY__RESULT_BACKEND -# -# The result_backend is intentionally left blank. -# When blank, Airflow's CeleryExecutor defaults to using the value from -# `sql_alchemy_conn` as the result backend, which is the recommended setup. -result_backend = - -# Optional configuration dictionary to pass to the Celery result backend SQLAlchemy engine. -# -# Example: result_backend_sqlalchemy_engine_options = {"pool_recycle": 1800} -# -# Variable: AIRFLOW__CELERY__RESULT_BACKEND_SQLALCHEMY_ENGINE_OPTIONS -# -result_backend_sqlalchemy_engine_options = - -# Celery Flower is a sweet UI for Celery. Airflow has a shortcut to start -# it ``airflow celery flower``. This defines the IP that Celery Flower runs on -# -# Variable: AIRFLOW__CELERY__FLOWER_HOST -# -flower_host = 0.0.0.0 - -# The root URL for Flower -# -# Example: flower_url_prefix = /flower -# -# Variable: AIRFLOW__CELERY__FLOWER_URL_PREFIX -# -flower_url_prefix = - -# This defines the port that Celery Flower runs on -# -# Variable: AIRFLOW__CELERY__FLOWER_PORT -# -flower_port = 5555 - -# Securing Flower with Basic Authentication -# Accepts user:password pairs separated by a comma -# -# Example: flower_basic_auth = user1:password1,user2:password2 -# -# Variable: AIRFLOW__CELERY__FLOWER_BASIC_AUTH -# -flower_basic_auth = - -# How many processes CeleryExecutor uses to sync task state. -# 0 means to use max(1, number of cores - 1) processes. -# -# Variable: AIRFLOW__CELERY__SYNC_PARALLELISM -# -sync_parallelism = 0 - -# Import path for celery configuration options -# -# Variable: AIRFLOW__CELERY__CELERY_CONFIG_OPTIONS -# -celery_config_options = airflow.providers.celery.executors.default_celery.DEFAULT_CELERY_CONFIG - -# -# Variable: AIRFLOW__CELERY__SSL_ACTIVE -# -ssl_active = False - -# Path to the client key. -# -# Variable: AIRFLOW__CELERY__SSL_KEY -# -ssl_key = - -# Path to the client certificate. -# -# Variable: AIRFLOW__CELERY__SSL_CERT -# -ssl_cert = - -# Path to the CA certificate. -# -# Variable: AIRFLOW__CELERY__SSL_CACERT -# -ssl_cacert = - -# Celery Pool implementation. -# Choices include: ``prefork`` (default), ``eventlet``, ``gevent`` or ``solo``. -# See: -# https://docs.celeryq.dev/en/latest/userguide/workers.html#concurrency -# https://docs.celeryq.dev/en/latest/userguide/concurrency/eventlet.html -# -# Variable: AIRFLOW__CELERY__POOL -# -pool = prefork - -# The number of seconds to wait before timing out ``send_task_to_executor`` or -# ``fetch_celery_task_state`` operations. -# -# Variable: AIRFLOW__CELERY__OPERATION_TIMEOUT -# -operation_timeout = 1.0 - -task_acks_late = True -# Celery task will report its status as 'started' when the task is executed by a worker. -# This is used in Airflow to keep track of the running tasks and if a Scheduler is restarted -# or run in HA mode, it can adopt the orphan tasks launched by previous SchedulerJob. -# -# Variable: AIRFLOW__CELERY__TASK_TRACK_STARTED -# -task_track_started = True - -# The Maximum number of retries for publishing task messages to the broker when failing -# due to ``AirflowTaskTimeout`` error before giving up and marking Task as failed. -# -# Variable: AIRFLOW__CELERY__TASK_PUBLISH_MAX_RETRIES -# -task_publish_max_retries = 3 - -# Worker initialisation check to validate Metadata Database connection -# -# Variable: AIRFLOW__CELERY__WORKER_PRECHECK -# -worker_precheck = False - -# Extra celery configs to include in the celery worker. -# Any of the celery config can be added to this config and it -# will be applied while starting the celery worker. e.g. {"worker_max_tasks_per_child": 10} -# See also: -# https://docs.celeryq.dev/en/stable/userguide/configuration.html#configuration-and-defaults -# -# Variable: AIRFLOW__CELERY__EXTRA_CELERY_CONFIG -# -extra_celery_config = {} - -[celery_broker_transport_options] -# This section is for specifying options which can be passed to the -# underlying celery broker transport. See: -# https://docs.celeryq.dev/en/latest/userguide/configuration.html#std:setting-broker_transport_options - -# The visibility timeout defines the number of seconds to wait for the worker -# to acknowledge the task before the message is redelivered to another worker. -# Make sure to increase the visibility timeout to match the time of the longest -# ETA you're planning to use. -# visibility_timeout is only supported for Redis and SQS celery brokers. -# See: -# https://docs.celeryq.dev/en/stable/getting-started/backends-and-brokers/redis.html#visibility-timeout -# -# Example: visibility_timeout = 21600 -# -# Variable: AIRFLOW__CELERY_BROKER_TRANSPORT_OPTIONS__VISIBILITY_TIMEOUT -# -# visibility_timeout = - -# The sentinel_kwargs parameter allows passing additional options to the Sentinel client. -# In a typical scenario where Redis Sentinel is used as the broker and Redis servers are -# password-protected, the password needs to be passed through this parameter. Although its -# type is string, it is required to pass a string that conforms to the dictionary format. -# See: -# https://docs.celeryq.dev/en/stable/getting-started/backends-and-brokers/redis.html#configuration -# -# Example: sentinel_kwargs = {"password": "password_for_redis_server"} -# -# Variable: AIRFLOW__CELERY_BROKER_TRANSPORT_OPTIONS__SENTINEL_KWARGS -# -# sentinel_kwargs = - -[local_kubernetes_executor] -# This section only applies if you are using the ``LocalKubernetesExecutor`` in -# ``[core]`` section above - -# Define when to send a task to ``KubernetesExecutor`` when using ``LocalKubernetesExecutor``. -# When the queue of a task is the value of ``kubernetes_queue`` (default ``kubernetes``), -# the task is executed via ``KubernetesExecutor``, -# otherwise via ``LocalExecutor`` -# -# Variable: AIRFLOW__LOCAL_KUBERNETES_EXECUTOR__KUBERNETES_QUEUE -# -kubernetes_queue = kubernetes - -[kubernetes_executor] -# Kwargs to override the default urllib3 Retry used in the kubernetes API client -# -# Example: api_client_retry_configuration = { "total": 3, "backoff_factor": 0.5 } -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__API_CLIENT_RETRY_CONFIGURATION -# -api_client_retry_configuration = - -# Flag to control the information added to kubernetes executor logs for better traceability -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__LOGS_TASK_METADATA -# -logs_task_metadata = False - -# Path to the YAML pod file that forms the basis for KubernetesExecutor workers. -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__POD_TEMPLATE_FILE -# -pod_template_file = - -# The repository of the Kubernetes Image for the Worker to Run -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__WORKER_CONTAINER_REPOSITORY -# -worker_container_repository = - -# The tag of the Kubernetes Image for the Worker to Run -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__WORKER_CONTAINER_TAG -# -worker_container_tag = - -# The Kubernetes namespace where airflow workers should be created. Defaults to ``default`` -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__NAMESPACE -# -namespace = default - -# If True, all worker pods will be deleted upon termination -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__DELETE_WORKER_PODS -# -delete_worker_pods = True - -# If False (and delete_worker_pods is True), -# failed worker pods will not be deleted so users can investigate them. -# This only prevents removal of worker pods where the worker itself failed, -# not when the task it ran failed. -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__DELETE_WORKER_PODS_ON_FAILURE -# -delete_worker_pods_on_failure = False - -worker_pod_pending_fatal_container_state_reasons = CreateContainerConfigError,ErrImagePull,CreateContainerError,ImageInspectError, InvalidImageName -# Number of Kubernetes Worker Pod creation calls per scheduler loop. -# Note that the current default of "1" will only launch a single pod -# per-heartbeat. It is HIGHLY recommended that users increase this -# number to match the tolerance of their kubernetes cluster for -# better performance. -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__WORKER_PODS_CREATION_BATCH_SIZE -# -worker_pods_creation_batch_size = 1 - -# Allows users to launch pods in multiple namespaces. -# Will require creating a cluster-role for the scheduler, -# or use multi_namespace_mode_namespace_list configuration. -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__MULTI_NAMESPACE_MODE -# -multi_namespace_mode = False - -# If multi_namespace_mode is True while scheduler does not have a cluster-role, -# give the list of namespaces where the scheduler will schedule jobs -# Scheduler needs to have the necessary permissions in these namespaces. -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__MULTI_NAMESPACE_MODE_NAMESPACE_LIST -# -multi_namespace_mode_namespace_list = - -# Use the service account kubernetes gives to pods to connect to kubernetes cluster. -# It's intended for clients that expect to be running inside a pod running on kubernetes. -# It will raise an exception if called from a process not running in a kubernetes environment. -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__IN_CLUSTER -# -in_cluster = True - -# When running with in_cluster=False change the default cluster_context or config_file -# options to Kubernetes client. Leave blank these to use default behaviour like ``kubectl`` has. -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__CLUSTER_CONTEXT -# -# cluster_context = - -# Path to the kubernetes configfile to be used when ``in_cluster`` is set to False -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__CONFIG_FILE -# -# config_file = - -# Keyword parameters to pass while calling a kubernetes client core_v1_api methods -# from Kubernetes Executor provided as a single line formatted JSON dictionary string. -# List of supported params are similar for all core_v1_apis, hence a single config -# variable for all apis. See: -# https://raw.githubusercontent.com/kubernetes-client/python/41f11a09995efcd0142e25946adc7591431bfb2f/kubernetes/client/api/core_v1_api.py -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__KUBE_CLIENT_REQUEST_ARGS -# -kube_client_request_args = - -# Optional keyword arguments to pass to the ``delete_namespaced_pod`` kubernetes client -# ``core_v1_api`` method when using the Kubernetes Executor. -# This should be an object and can contain any of the options listed in the ``v1DeleteOptions`` -# class defined here: -# https://github.com/kubernetes-client/python/blob/41f11a09995efcd0142e25946adc7591431bfb2f/kubernetes/client/models/v1_delete_options.py#L19 -# -# Example: delete_option_kwargs = {"grace_period_seconds": 10} -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__DELETE_OPTION_KWARGS -# -delete_option_kwargs = - -# Enables TCP keepalive mechanism. This prevents Kubernetes API requests to hang indefinitely -# when idle connection is time-outed on services like cloud load balancers or firewalls. -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__ENABLE_TCP_KEEPALIVE -# -enable_tcp_keepalive = True - -# When the `enable_tcp_keepalive` option is enabled, TCP probes a connection that has -# been idle for `tcp_keep_idle` seconds. -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__TCP_KEEP_IDLE -# -tcp_keep_idle = 120 - -# When the `enable_tcp_keepalive` option is enabled, if Kubernetes API does not respond -# to a keepalive probe, TCP retransmits the probe after `tcp_keep_intvl` seconds. -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__TCP_KEEP_INTVL -# -tcp_keep_intvl = 30 - -# When the `enable_tcp_keepalive` option is enabled, if Kubernetes API does not respond -# to a keepalive probe, TCP retransmits the probe `tcp_keep_cnt number` of times before -# a connection is considered to be broken. -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__TCP_KEEP_CNT -# -tcp_keep_cnt = 6 - -# Set this to false to skip verifying SSL certificate of Kubernetes python client. -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__VERIFY_SSL -# -verify_ssl = True - -# How often in seconds to check for task instances stuck in "queued" status without a pod -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__WORKER_PODS_QUEUED_CHECK_INTERVAL -# -worker_pods_queued_check_interval = 60 - -# Path to a CA certificate to be used by the Kubernetes client to verify the server's SSL certificate. -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__SSL_CA_CERT -# -ssl_ca_cert = - -# The Maximum number of retries for queuing the task to the kubernetes scheduler when -# failing due to Kube API exceeded quota errors before giving up and marking task as failed. -# -1 for unlimited times. -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__TASK_PUBLISH_MAX_RETRIES -# -task_publish_max_retries = 0 - -[common.io] -# Common IO configuration section - -# Path to a location on object storage where XComs can be stored in url format. -# -# Example: xcom_objectstorage_path = s3://conn_id@bucket/path -# -# Variable: AIRFLOW__COMMON.IO__XCOM_OBJECTSTORAGE_PATH -# -xcom_objectstorage_path = - -# Threshold in bytes for storing XComs in object storage. -1 means always store in the -# database. 0 means always store in object storage. Any positive number means -# it will be stored in object storage if the size of the value is greater than the threshold. -# -# Example: xcom_objectstorage_threshold = 1000000 -# -# Variable: AIRFLOW__COMMON.IO__XCOM_OBJECTSTORAGE_THRESHOLD -# -xcom_objectstorage_threshold = -1 - -# Compression algorithm to use when storing XComs in object storage. Supported algorithms -# are a.o.: snappy, zip, gzip, bz2, and lzma. If not specified, no compression will be used. -# Note that the compression algorithm must be available in the Python installation (e.g. -# python-snappy for snappy). Zip, gz, bz2 are available by default. -# -# Example: xcom_objectstorage_compression = gz -# -# Variable: AIRFLOW__COMMON.IO__XCOM_OBJECTSTORAGE_COMPRESSION -# -xcom_objectstorage_compression = - - - -[fab] -# This section contains configs specific to FAB provider. - -# Boolean for enabling rate limiting on authentication endpoints. -# -# Variable: AIRFLOW__FAB__AUTH_RATE_LIMITED -# -auth_rate_limited = True - -# Rate limit for authentication endpoints. -# -# Variable: AIRFLOW__FAB__AUTH_RATE_LIMIT -# -auth_rate_limit = 5 per 40 second - -# Update FAB permissions and sync security manager roles -# on webserver startup -# -# Variable: AIRFLOW__FAB__UPDATE_FAB_PERMS -# -update_fab_perms = True - -[imap] -# Options for IMAP provider. - -# ssl_context = - -[azure_remote_logging] -# Configuration that needs to be set for enable remote logging in Azure Blob Storage - -remote_wasb_log_container = airflow-logs - -[openlineage] -# This section applies settings for OpenLineage integration. -# More about configuration and it's precedence can be found at -# https://airflow.apache.org/docs/apache-airflow-providers-openlineage/stable/guides/user.html#transport-setup - -# Disable sending events without uninstalling the OpenLineage Provider by setting this to true. -# -# Variable: AIRFLOW__OPENLINEAGE__DISABLED -# -disabled = False - -# Exclude some Operators from emitting OpenLineage events by passing a string of semicolon separated -# full import paths of Operators to disable. -# -# Example: disabled_for_operators = airflow.providers.standard.operators.bash.BashOperator; airflow.providers.standard.operators.python.PythonOperator -# -# Variable: AIRFLOW__OPENLINEAGE__DISABLED_FOR_OPERATORS -# -disabled_for_operators = - -# If this setting is enabled, OpenLineage integration won't collect and emit metadata, -# unless you explicitly enable it per `DAG` or `Task` using `enable_lineage` method. -# -# Variable: AIRFLOW__OPENLINEAGE__SELECTIVE_ENABLE -# -selective_enable = False - -# Set namespace that the lineage data belongs to, so that if you use multiple OpenLineage producers, -# events coming from them will be logically separated. -# -# Example: namespace = my_airflow_instance_1 -# -# Variable: AIRFLOW__OPENLINEAGE__NAMESPACE -# -# namespace = - -# Register custom OpenLineage Extractors by passing a string of semicolon separated full import paths. -# -# Example: extractors = full.path.to.ExtractorClass;full.path.to.AnotherExtractorClass -# -# Variable: AIRFLOW__OPENLINEAGE__EXTRACTORS -# -# extractors = - -# Register custom run facet functions by passing a string of semicolon separated full import paths. -# -# Example: custom_run_facets = full.path.to.custom_facet_function;full.path.to.another_custom_facet_function -# -# Variable: AIRFLOW__OPENLINEAGE__CUSTOM_RUN_FACETS -# -custom_run_facets = - -# Specify the path to the YAML configuration file. -# This ensures backwards compatibility with passing config through the `openlineage.yml` file. -# -# Example: config_path = full/path/to/openlineage.yml -# -# Variable: AIRFLOW__OPENLINEAGE__CONFIG_PATH -# -config_path = - -# Pass OpenLineage Client transport configuration as JSON string. It should contain type of the -# transport and additional options (different for each transport type). For more details see: -# https://openlineage.io/docs/client/python/#built-in-transport-types -# -# Currently supported types are: -# -# * HTTP -# * Kafka -# * Console -# * File -# -# Example: transport = {"type": "http", "url": "http://localhost:5000", "endpoint": "api/v1/lineage"} -# -# Variable: AIRFLOW__OPENLINEAGE__TRANSPORT -# -transport = - -# Disable the inclusion of source code in OpenLineage events by setting this to `true`. -# By default, several Operators (e.g. Python, Bash) will include their source code in the events -# unless disabled. -# -# Variable: AIRFLOW__OPENLINEAGE__DISABLE_SOURCE_CODE -# -disable_source_code = False - -# Number of processes to utilize for processing DAG state changes -# in an asynchronous manner within the scheduler process. -# -# Variable: AIRFLOW__OPENLINEAGE__DAG_STATE_CHANGE_PROCESS_POOL_SIZE -# -dag_state_change_process_pool_size = 1 - -# Maximum amount of time (in seconds) that OpenLineage can spend executing metadata extraction. -# -# Variable: AIRFLOW__OPENLINEAGE__EXECUTION_TIMEOUT -# -execution_timeout = 10 - -# If true, OpenLineage event will include full task info - potentially containing large fields. -# -# Variable: AIRFLOW__OPENLINEAGE__INCLUDE_FULL_TASK_INFO -# -include_full_task_info = False - -# If true, OpenLineage events will include information useful for debugging - potentially -# containing large fields e.g. all installed packages and their versions. -# -# Variable: AIRFLOW__OPENLINEAGE__DEBUG_MODE -# -debug_mode = False - -# Automatically inject OpenLineage's parent job (namespace, job name, run id) information into Spark -# application properties for supported Operators. -# -# Variable: AIRFLOW__OPENLINEAGE__SPARK_INJECT_PARENT_JOB_INFO -# -spark_inject_parent_job_info = False - -[smtp_provider] -# Options for SMTP provider. - -# ssl context to use when using SMTP and IMAP SSL connections. By default, the context is "default" -# which sets it to ``ssl.create_default_context()`` which provides the right balance between -# compatibility and security, it however requires that certificates in your operating system are -# updated and that SMTP/IMAP servers of yours have valid certificates that have corresponding public -# keys installed on your machines. You can switch it to "none" if you want to disable checking -# of the certificates, but it is not recommended as it allows MITM (man-in-the-middle) attacks -# if your infrastructure is not sufficiently secured. It should only be set temporarily while you -# are fixing your certificate configuration. This can be typically done by upgrading to newer -# version of the operating system you run Airflow components on,by upgrading/refreshing proper -# certificates in the OS or by updating certificates for your mail servers. -# -# If you do not set this option explicitly, it will use Airflow "email.ssl_context" configuration, -# but if this configuration is not present, it will use "default" value. -# -# Example: ssl_context = default -# -# Variable: AIRFLOW__SMTP_PROVIDER__SSL_CONTEXT -# -# ssl_context = - -# Allows overriding of the standard templated email subject line when the SmtpNotifier is used. -# Must provide a path to the template. -# -# Example: templated_email_subject_path = path/to/override/email_subject.html -# -# Variable: AIRFLOW__SMTP_PROVIDER__TEMPLATED_EMAIL_SUBJECT_PATH -# -# templated_email_subject_path = - -# Allows overriding of the standard templated email path when the SmtpNotifier is used. Must provide -# a path to the template. -# -# Example: templated_html_content_path = path/to/override/email.html -# -# Variable: AIRFLOW__SMTP_PROVIDER__TEMPLATED_HTML_CONTENT_PATH -# -# templated_html_content_path = - -[docker] -docker_url = unix://var/run/docker.sock diff --git a/airflow/aria2-pro-docker/Dockerfile b/airflow/aria2-pro-docker/Dockerfile deleted file mode 100644 index 27fc41a..0000000 --- a/airflow/aria2-pro-docker/Dockerfile +++ /dev/null @@ -1,126 +0,0 @@ -# _ _ ____ ____ -# / \ _ __(_) __ _|___ \ | _ \ _ __ ___ -# / _ \ | '__| |/ _` | __) | | |_) | '__/ _ \ -# / ___ \| | | | (_| |/ __/ | __/| | | (_) | -# /_/ \_\_| |_|\__,_|_____| |_| |_| \___/ -# -# https://github.com/P3TERX/Aria2-Pro-Docker -# -# Copyright (c) 2020-2021 P3TERX -# -# This is free software, licensed under the MIT License. -# See /LICENSE for more information. - -# Using Debian Bullseye as a more stable base than EOL Alpine -FROM debian:bullseye-slim - -# Install s6-overlay and build aria2 in a single layer to reduce image size -# renovate: datasource=github-releases depName=just-containers/s6-overlay -ARG S6_OVERLAY_VERSION=v3.1.6.2 -RUN BUILD_DEPS=" \ - build-essential \ - autoconf \ - automake \ - autotools-dev \ - libtool \ - pkg-config \ - git \ - gettext \ - autopoint \ - gettext-base \ - libssl-dev \ - libssh2-1-dev \ - libc-ares-dev \ - libexpat1-dev \ - libc-ares-dev \ - vim \ - libexpat1 \ - zlib1g-dev \ - libsqlite3-dev \ - " && \ - apt-get update && \ - apt-get install -y --no-install-recommends \ - jq \ - findutils \ - ca-certificates \ - curl \ - xz-utils \ - dos2unix \ - $BUILD_DEPS && \ - curl -sSL https://github.com/just-containers/s6-overlay/releases/download/${S6_OVERLAY_VERSION}/s6-overlay-noarch.tar.xz -o /tmp/s6-overlay-noarch.tar.xz && \ - curl -sSL https://github.com/just-containers/s6-overlay/releases/download/${S6_OVERLAY_VERSION}/s6-overlay-x86_64.tar.xz -o /tmp/s6-overlay-x86_64.tar.xz && \ - tar -C / -Jxpf /tmp/s6-overlay-noarch.tar.xz && \ - tar -C / -Jxpf /tmp/s6-overlay-x86_64.tar.xz && \ - git clone https://github.com/aria2/aria2.git /tmp/aria2 && \ - cd /tmp/aria2 && \ - git checkout 8985d66e71f980e7d2765753800078f47761f1ba && \ - sed -i "s/\"1\", 1, 16, 'x'));/\"1\", 1, 128, 'x'));/" src/OptionHandlerFactory.cc && \ - autoreconf -i && \ - ./configure \ - --disable-dependency-tracking \ - --enable-static \ - --disable-shared \ - --with-ca-bundle=/etc/ssl/certs/ca-certificates.crt \ - --without-libxml2 \ - --with-libexpat \ - --without-libgcrypt \ - --with-openssl \ - --with-libcares \ - --with-libsqlite3 \ - --with-libssh2 \ - --with-zlib && \ - make -j$(nproc) && \ - make install && \ - cd / && \ -# No purge runtime dev apt-get purge -y --auto-remove $BUILD_DEPS && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* /tmp/* - -COPY rootfs / - -RUN find /etc/cont-init.d /etc/services.d -type f -exec dos2unix {} + && \ - find /etc/cont-init.d /etc/services.d -type f -exec chmod +x {} + - -ENV S6_BEHAVIOUR_IF_STAGE2_FAILS=1 \ - RCLONE_CONFIG=/config/rclone.conf \ - UPDATE_TRACKERS=true \ - CUSTOM_TRACKER_URL= \ - LISTEN_PORT=6888 \ - RPC_PORT=6800 \ - RPC_SECRET= \ - PUID= PGID= \ - DISK_CACHE= \ - IPV6_MODE= \ - UMASK_SET= \ - SPECIAL_MODE= - -EXPOSE \ - 6800 \ - 6888 \ - 6888/udp - -VOLUME \ - /config \ - /downloads - -#ENTRYPOINT ["/init"] -CMD ["aria2c", \ - "--enable-rpc=true", \ - "--rpc-listen-all=true", \ - "--rpc-listen-port=6800", \ - "--listen-port=6888", \ - "--disable-ipv6=true", \ - "--max-concurrent-downloads=128", \ - "--max-connection-per-server=32", \ - "--split=6", \ - "--min-split-size=2M", \ - "--file-allocation=falloc", \ - "--continue=false", \ - "--check-integrity=false", \ - "--log-level=info", \ - "--console-log-level=info", \ - "--save-session-interval=5", \ - "--dir=/downloads", \ - "--disk-cache=64M", \ - "--input-file=/config/aria2.session", \ - "--save-session=/config/aria2.session"] diff --git a/airflow/aria2-pro-docker/rootfs/Aria2-Pro b/airflow/aria2-pro-docker/rootfs/Aria2-Pro deleted file mode 100644 index 4680af2..0000000 --- a/airflow/aria2-pro-docker/rootfs/Aria2-Pro +++ /dev/null @@ -1,17 +0,0 @@ - ----------------------------------------------------------------- - - █████╗ ██████╗ ██╗ █████╗ ██████╗ ██████╗ ██████╗ ██████╗ -██╔══██╗██╔══██╗██║██╔══██╗╚════██╗ ██╔══██╗██╔══██╗██╔═══██╗ -███████║██████╔╝██║███████║ █████╔╝ ██████╔╝██████╔╝██║ ██║ -██╔══██║██╔══██╗██║██╔══██║██╔═══╝ ██╔═══╝ ██╔══██╗██║ ██║ -██║ ██║██║ ██║██║██║ ██║███████╗ ██║ ██║ ██║╚██████╔╝ -╚═╝ ╚═╝╚═╝ ╚═╝╚═╝╚═╝ ╚═╝╚══════╝ ╚═╝ ╚═╝ ╚═╝ ╚═════╝ - -https://github.com/P3TERX/Aria2-Pro-Docker - -Copyright (c) 2020-2021 P3TERX - -Version: COMMIT_HASH | Build Time: DATE_TIME ----------------------------------------------------------------- - diff --git a/airflow/aria2-pro-docker/rootfs/etc/cont-init.d/08-config b/airflow/aria2-pro-docker/rootfs/etc/cont-init.d/08-config deleted file mode 100644 index 2336abe..0000000 --- a/airflow/aria2-pro-docker/rootfs/etc/cont-init.d/08-config +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/with-contenv bash -# _ _ ____ ____ -# / \ _ __(_) __ _|___ \ | _ \ _ __ ___ -# / _ \ | '__| |/ _` | __) | | |_) | '__/ _ \ -# / ___ \| | | | (_| |/ __/ | __/| | | (_) | -# /_/ \_\_| |_|\__,_|_____| |_| |_| \___/ -# -# https://github.com/P3TERX/Aria2-Pro-Docker -# -# Copyright (c) 2020-2021 P3TERX -# -# This is free software, licensed under the MIT License. -# See /LICENSE for more information. - -. /etc/init-base - -mkdir -p ${ARIA2_CONF_DIR} ${SCRIPT_DIR} ${DOWNLOAD_DIR} - -PROFILES=" -aria2.conf -" - -DOWNLOAD_PROFILE - -[[ ! -f "${ARIA2_CONF_DIR}/aria2.session" ]] && { - rm -rf "${ARIA2_CONF_DIR}/aria2.session" - touch "${ARIA2_CONF_DIR}/aria2.session" -} - -if ! [[ "${UPDATE_TRACKERS}" = "false" || "${UPDATE_TRACKERS}" = "disable" ]]; then - rm -f /etc/services.d/crond/down - PROFILES="tracker.sh" - DOWNLOAD_PROFILE - bash ${SCRIPT_DIR}/tracker.sh ${ARIA2_CONF} -else - touch /etc/services.d/crond/down -fi - -exit 0 diff --git a/airflow/aria2-pro-docker/rootfs/etc/cont-init.d/18-mode b/airflow/aria2-pro-docker/rootfs/etc/cont-init.d/18-mode deleted file mode 100644 index 2a21920..0000000 --- a/airflow/aria2-pro-docker/rootfs/etc/cont-init.d/18-mode +++ /dev/null @@ -1,35 +0,0 @@ -#!/usr/bin/with-contenv bash -# _ _ ____ ____ -# / \ _ __(_) __ _|___ \ | _ \ _ __ ___ -# / _ \ | '__| |/ _` | __) | | |_) | '__/ _ \ -# / ___ \| | | | (_| |/ __/ | __/| | | (_) | -# /_/ \_\_| |_|\__,_|_____| |_| |_| \___/ -# -# https://github.com/P3TERX/Aria2-Pro-Docker -# -# Copyright (c) 2020-2021 P3TERX -# -# This is free software, licensed under the MIT License. -# See /LICENSE for more information. - -. /etc/init-base - -INSTALL_RCLONE() { - if [[ ! -f /usr/local/bin/rclone ]]; then - echo - echo -e "${INFO} Installing RCLONE ..." - [[ -L /usr/bin/unzip ]] && rm -f /usr/bin/unzip - curl -fsSL https://rclone.org/install.sh | bash - fi -} - -if [[ "${SPECIAL_MODE}" = "rclone" ]]; then - INSTALL_RCLONE - PROFILES="upload.sh rclone.env" - DOWNLOAD_PROFILE -elif [[ "${SPECIAL_MODE}" = "move" ]]; then - PROFILES="move.sh" - DOWNLOAD_PROFILE -fi - -exit 0 diff --git a/airflow/aria2-pro-docker/rootfs/etc/cont-init.d/28-fix b/airflow/aria2-pro-docker/rootfs/etc/cont-init.d/28-fix deleted file mode 100644 index 4d4963b..0000000 --- a/airflow/aria2-pro-docker/rootfs/etc/cont-init.d/28-fix +++ /dev/null @@ -1,61 +0,0 @@ -#!/usr/bin/with-contenv bash -# _ _ ____ ____ -# / \ _ __(_) __ _|___ \ | _ \ _ __ ___ -# / _ \ | '__| |/ _` | __) | | |_) | '__/ _ \ -# / ___ \| | | | (_| |/ __/ | __/| | | (_) | -# /_/ \_\_| |_|\__,_|_____| |_| |_| \___/ -# -# https://github.com/P3TERX/Aria2-Pro-Docker -# -# Copyright (c) 2020-2021 P3TERX -# -# This is free software, licensed under the MIT License. -# See /LICENSE for more information. - -. /etc/init-base - -[[ -e ${ARIA2_CONF_DIR}/delete.sh ]] && { - rm -f ${ARIA2_CONF_DIR}/*.sh - sed -i "s@^\(on-download-stop=\).*@\1${SCRIPT_DIR}/delete.sh@" ${ARIA2_CONF} - sed -i "s@^\(on-download-complete=\).*@\1${SCRIPT_DIR}/clean.sh@" ${ARIA2_CONF} -} - -sed -i "s@^\(dir=\).*@\1/downloads@" ${ARIA2_CONF} -sed -i "s@^\(input-file=\).*@\1${ARIA2_CONF_DIR}/aria2.session@" ${ARIA2_CONF} -sed -i "s@^\(save-session=\).*@\1${ARIA2_CONF_DIR}/aria2.session@" ${ARIA2_CONF} -sed -i "s@^\(dht-file-path=\).*@\1${ARIA2_CONF_DIR}/dht.dat@" ${ARIA2_CONF} -sed -i "s@^\(dht-file-path6=\).*@\1${ARIA2_CONF_DIR}/dht6.dat@" ${ARIA2_CONF} - -[[ -e ${ARIA2_CONF_DIR}/HelloWorld ]] && exit 0 - -[[ ${RPC_PORT} ]] && - sed -i "s@^\(rpc-listen-port=\).*@\1${RPC_PORT}@" ${ARIA2_CONF} - -[[ ${LISTEN_PORT} ]] && { - sed -i "s@^\(listen-port=\).*@\1${LISTEN_PORT}@" ${ARIA2_CONF} - sed -i "s@^\(dht-listen-port=\).*@\1${LISTEN_PORT}@" ${ARIA2_CONF} -} - -[[ ${RPC_SECRET} ]] && - sed -i "s@^\(rpc-secret=\).*@\1${RPC_SECRET}@" ${ARIA2_CONF} - -[[ ${DISK_CACHE} ]] && - sed -i "s@^\(disk-cache=\).*@\1${DISK_CACHE}@" ${ARIA2_CONF} - -[[ "${IPV6_MODE}" = "true" || "${IPV6_MODE}" = "enable" ]] && { - sed -i "s@^\(disable-ipv6=\).*@\1false@" ${ARIA2_CONF} - sed -i "s@^\(enable-dht6=\).*@\1true@" ${ARIA2_CONF} -} - -[[ "${IPV6_MODE}" = "false" || "${IPV6_MODE}" = "disable" ]] && { - sed -i "s@^\(disable-ipv6=\).*@\1true@" ${ARIA2_CONF} - sed -i "s@^\(enable-dht6=\).*@\1false@" ${ARIA2_CONF} -} - -[[ "${SPECIAL_MODE}" = "rclone" ]] && - sed -i "s@^\(on-download-complete=\).*@\1${SCRIPT_DIR}/upload.sh@" ${ARIA2_CONF} - -[[ "${SPECIAL_MODE}" = "move" ]] && - sed -i "s@^\(on-download-complete=\).*@\1${SCRIPT_DIR}/move.sh@" ${ARIA2_CONF} - -exit 0 diff --git a/airflow/aria2-pro-docker/rootfs/etc/cont-init.d/58-permissions b/airflow/aria2-pro-docker/rootfs/etc/cont-init.d/58-permissions deleted file mode 100644 index b3adde8..0000000 --- a/airflow/aria2-pro-docker/rootfs/etc/cont-init.d/58-permissions +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/with-contenv bash -# _ _ ____ ____ -# / \ _ __(_) __ _|___ \ | _ \ _ __ ___ -# / _ \ | '__| |/ _` | __) | | |_) | '__/ _ \ -# / ___ \| | | | (_| |/ __/ | __/| | | (_) | -# /_/ \_\_| |_|\__,_|_____| |_| |_| \___/ -# -# https://github.com/P3TERX/Aria2-Pro-Docker -# -# Copyright (c) 2020-2021 P3TERX -# -# This is free software, licensed under the MIT License. -# See /LICENSE for more information. - -. /etc/init-base -if [ -w ${DOWNLOAD_DIR} ]; then echo "Download DIR writeable, not changing owner."; else chown -R p3terx:p3terx ${DOWNLOAD_DIR}; fi -chown -R p3terx:p3terx ${ARIA2_CONF_DIR} -if [[ -z ${PUID} && -z ${PGID} ]] || [[ ${PUID} = 65534 && ${PGID} = 65534 ]]; then - echo -e "${WARN} Ignore permission settings." - chmod -v 777 ${DOWNLOAD_DIR} - chmod -vR 777 ${ARIA2_CONF_DIR} -else - if [ -w ${DOWNLOAD_DIR} ]; then echo "Download DIR writeable, not modifying permission."; else chmod -v u=rwx ${DOWNLOAD_DIR}; fi - chmod -v 600 ${ARIA2_CONF_DIR}/* - chmod -v 755 ${SCRIPT_DIR} - chmod -v 700 ${SCRIPT_DIR}/* -fi diff --git a/airflow/aria2-pro-docker/rootfs/etc/cont-init.d/88-done b/airflow/aria2-pro-docker/rootfs/etc/cont-init.d/88-done deleted file mode 100644 index de5f287..0000000 --- a/airflow/aria2-pro-docker/rootfs/etc/cont-init.d/88-done +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/sh -cat /Aria2-Pro diff --git a/airflow/aria2-pro-docker/rootfs/etc/crontabs/p3terx b/airflow/aria2-pro-docker/rootfs/etc/crontabs/p3terx deleted file mode 100644 index 3f47c87..0000000 --- a/airflow/aria2-pro-docker/rootfs/etc/crontabs/p3terx +++ /dev/null @@ -1 +0,0 @@ -# BT tracker updates disabled. diff --git a/airflow/aria2-pro-docker/rootfs/etc/init-base b/airflow/aria2-pro-docker/rootfs/etc/init-base deleted file mode 100644 index 1043831..0000000 --- a/airflow/aria2-pro-docker/rootfs/etc/init-base +++ /dev/null @@ -1,118 +0,0 @@ -# _ _ ____ ____ -# / \ _ __(_) __ _|___ \ | _ \ _ __ ___ -# / _ \ | '__| |/ _` | __) | | |_) | '__/ _ \ -# / ___ \| | | | (_| |/ __/ | __/| | | (_) | -# /_/ \_\_| |_|\__,_|_____| |_| |_| \___/ -# -# https://github.com/P3TERX/Docker-Aria2-Pro -# -# Copyright (c) 2020 P3TERX -# -# This is free software, licensed under the MIT License. -# See /LICENSE for more information. - -Green_font_prefix="\033[32m" -Red_font_prefix="\033[31m" -Green_background_prefix="\033[42;37m" -Red_background_prefix="\033[41;37m" -Font_color_suffix="\033[0m" -INFO="[${Green_font_prefix}INFO${Font_color_suffix}]" -ERROR="[${Red_font_prefix}ERROR${Font_color_suffix}]" -WARN="[${Yellow_font_prefix}WARN${Font_color_suffix}]" -DOWNLOAD_DIR="/downloads" -ARIA2_CONF_DIR="/config" -ARIA2_CONF="${ARIA2_CONF_DIR}/aria2.conf" -SCRIPT_CONF="${ARIA2_CONF_DIR}/script.conf" -SCRIPT_DIR="${ARIA2_CONF_DIR}/script" -CURL_OPTIONS="-fsSL --connect-timeout 3 --max-time 3" -PROFILE_URL1="https://p3terx.github.io/aria2.conf" -PROFILE_URL2="https://aria2c.now.sh" -PROFILE_URL3="https://cdn.jsdelivr.net/gh/P3TERX/aria2.conf" - -FILE_ALLOCATION_SET() { - TMP_FILE="/downloads/P3TERX.COM" - if fallocate -l 5G ${TMP_FILE}; then - FILE_ALLOCATION=falloc - else - FILE_ALLOCATION=none - fi - rm -f ${TMP_FILE} - sed -i "s@^\(file-allocation=\).*@\1${FILE_ALLOCATION}@" "${ARIA2_CONF}" -} - -CONVERSION_ARIA2_CONF() { - sed -i "s@^\(rpc-listen-port=\).*@\1${RPC_PORT:-6800}@" "${ARIA2_CONF}" - sed -i "s@^\(listen-port=\).*@\1${LISTEN_PORT:-6888}@" "${ARIA2_CONF}" - sed -i "s@^\(dht-listen-port=\).*@\1${LISTEN_PORT:-6888}@" "${ARIA2_CONF}" - sed -i "s@^\(dir=\).*@\1/downloads@" "${ARIA2_CONF}" - sed -i "s@/root/.aria2@${ARIA2_CONF_DIR}@" "${ARIA2_CONF}" - sed -i "s@^#\(retry-on-.*=\).*@\1true@" "${ARIA2_CONF}" - sed -i "s@^\(max-connection-per-server=\).*@\1128@" "${ARIA2_CONF}" - sed -i "/^on-download-stop=/d" "${ARIA2_CONF}" - sed -i "/^on-download-complete=/d" "${ARIA2_CONF}" - - # Custom settings from user - sed -i "s@^\(continue=\).*@\1false@" "${ARIA2_CONF}" - sed -i "s@^\(always-resume=\).*@\1false@" "${ARIA2_CONF}" - sed -i "s@^\(max-concurrent-downloads=\).*@\1500@" "${ARIA2_CONF}" - sed -i "s@^\(enable-dht=\).*@\1false@" "${ARIA2_CONF}" - sed -i "s@^\(enable-dht6=\).*@\1false@" "${ARIA2_CONF}" - sed -i "s@^\(bt-enable-lpd=\).*@\1true@" "${ARIA2_CONF}" - sed -i "s@^\(enable-peer-exchange=\).*@\1false@" "${ARIA2_CONF}" - sed -i "s@^\(max-overall-upload-limit=\).*@\12M@" "${ARIA2_CONF}" - sed -i "s@^\(seed-time=\).*@\11@" "${ARIA2_CONF}" - sed -i "s@^\(user-agent=\).*@\1Mozilla/5.0 (ChromiumStylePlatform) Cobalt/Version@" "${ARIA2_CONF}" - sed -i "s@^\(peer-id-prefix=\).*@\1-DE13F0-@" "${ARIA2_CONF}" - sed -i "s@^\(summary-interval=\).*@\11@" "${ARIA2_CONF}" - sed -i "s@^\(show-console-readout=\).*@\1false@" "${ARIA2_CONF}" - sed -i "s@^\(console-log-level=\).*@\1notice@" "${ARIA2_CONF}" - - # Add settings not present in default config - echo "" >>"${ARIA2_CONF}" - echo "# Custom settings added" >>"${ARIA2_CONF}" - echo "disable-metalink=true" >>"${ARIA2_CONF}" - echo "follow-torrent=false" >>"${ARIA2_CONF}" - echo "retry-on-400=false" >>"${ARIA2_CONF}" - echo "retry-on-403=false" >>"${ARIA2_CONF}" - echo "retry-on-406=false" >>"${ARIA2_CONF}" - echo "retry-on-unknown=true" >>"${ARIA2_CONF}" - echo "rpc-listen-all=true" >>"${ARIA2_CONF}" - - [[ $TZ != "Asia/Shanghai" ]] && sed -i '11,$s/#.*//;/^$/d' "${ARIA2_CONF}" - FILE_ALLOCATION_SET -} - -CONVERSION_SCRIPT_CONF() { - sed -i "s@\(upload-log=\).*@\1${ARIA2_CONF_DIR}/upload.log@" "${SCRIPT_CONF}" - sed -i "s@\(move-log=\).*@\1${ARIA2_CONF_DIR}/move.log@" "${SCRIPT_CONF}" - sed -i "s@^\(dest-dir=\).*@\1${DOWNLOAD_DIR}/completed@" "${SCRIPT_CONF}" -} - -CONVERSION_CORE() { - sed -i "s@\(ARIA2_CONF_DIR=\"\).*@\1${ARIA2_CONF_DIR}\"@" "${SCRIPT_DIR}/core" -} - -DOWNLOAD_PROFILE() { - for PROFILE in ${PROFILES}; do - [[ ${PROFILE} = *.sh || ${PROFILE} = core ]] && cd "${SCRIPT_DIR}" || cd "${ARIA2_CONF_DIR}" - while [[ ! -f ${PROFILE} ]]; do - rm -rf ${PROFILE} - echo - echo -e "${INFO} Downloading '${PROFILE}' ..." - curl -O ${CURL_OPTIONS} ${PROFILE_URL1}/${PROFILE} || - curl -O ${CURL_OPTIONS} ${PROFILE_URL2}/${PROFILE} || - curl -O ${CURL_OPTIONS} ${PROFILE_URL3}/${PROFILE} - [[ -s ${PROFILE} ]] && { - [[ "${PROFILE}" = "aria2.conf" ]] && CONVERSION_ARIA2_CONF - [[ "${PROFILE}" = "script.conf" ]] && CONVERSION_SCRIPT_CONF - [[ "${PROFILE}" = "core" ]] && CONVERSION_CORE - echo - echo -e "${INFO} '${PROFILE}' download completed !" - } || { - echo - echo -e "${ERROR} '${PROFILE}' download error, retry ..." - sleep 3 - } - done - done -} diff --git a/airflow/aria2-pro-docker/rootfs/etc/services.d/aria2/finish b/airflow/aria2-pro-docker/rootfs/etc/services.d/aria2/finish deleted file mode 100644 index 6d0cce0..0000000 --- a/airflow/aria2-pro-docker/rootfs/etc/services.d/aria2/finish +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/bin/execlineb -S0 -# _ _ ____ ____ -# / \ _ __(_) __ _|___ \ | _ \ _ __ ___ -# / _ \ | '__| |/ _` | __) | | |_) | '__/ _ \ -# / ___ \| | | | (_| |/ __/ | __/| | | (_) | -# /_/ \_\_| |_|\__,_|_____| |_| |_| \___/ -# -# https://github.com/P3TERX/Aria2-Pro-Docker -# -# Copyright (c) 2020-2021 P3TERX -# -# This is free software, licensed under the MIT License. -# See /LICENSE for more information. - -s6-svscanctl -t /var/run/s6/services diff --git a/airflow/aria2-pro-docker/rootfs/etc/services.d/aria2/run b/airflow/aria2-pro-docker/rootfs/etc/services.d/aria2/run deleted file mode 100644 index a7f7a02..0000000 --- a/airflow/aria2-pro-docker/rootfs/etc/services.d/aria2/run +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/with-contenv bash -# _ _ ____ ____ -# / \ _ __(_) __ _|___ \ | _ \ _ __ ___ -# / _ \ | '__| |/ _` | __) | | |_) | '__/ _ \ -# / ___ \| | | | (_| |/ __/ | __/| | | (_) | -# /_/ \_\_| |_|\__,_|_____| |_| |_| \___/ -# -# https://github.com/P3TERX/Aria2-Pro-Docker -# -# Copyright (c) 2020-2021 P3TERX -# -# This is free software, licensed under the MIT License. -# See /LICENSE for more information. - -umask ${UMASK_SET:-022} - -exec s6-setuidgid p3terx aria2c \ - --conf-path=/config/aria2.conf diff --git a/airflow/bgutil-ytdlp-pot-provider b/airflow/bgutil-ytdlp-pot-provider deleted file mode 160000 index c79e8dc..0000000 --- a/airflow/bgutil-ytdlp-pot-provider +++ /dev/null @@ -1 +0,0 @@ -Subproject commit c79e8dc48151c8dd7c0349b85ada2ccfcdfeb75b diff --git a/airflow/camoufox/Dockerfile b/airflow/camoufox/Dockerfile deleted file mode 100644 index fef9cd1..0000000 --- a/airflow/camoufox/Dockerfile +++ /dev/null @@ -1,114 +0,0 @@ -# Use ubuntu:22.04 as the base image -FROM ubuntu:22.04 - -# Set working directory -WORKDIR /app - -# Set timezone and non-interactive frontend for apt -ARG DEBIAN_FRONTEND=noninteractive -ARG TZ=Europe/Minsk -ENV TZ=${TZ} LANG=C.UTF-8 LC_ALL=C.UTF-8 - -# Install necessary system packages for Playwright, GeoIP, Xvfb, and VNC -RUN apt-get update && apt-get install -y --no-install-recommends \ - # From user example - vim lsof unzip wget ca-certificates \ - # From existing Dockerfile, kept for completeness - libgeoip1 \ - dbus-x11 \ - xvfb \ - xserver-common \ - xauth \ - x11-xkb-utils \ - xfonts-base \ - procps \ - libgl1-mesa-dri \ - x11vnc \ - fluxbox \ - libnss3 libnspr4 libdbus-1-3 libatk1.0-0 libatk-bridge2.0-0 libcups2 libdrm2 libxkbcommon0 libxcomposite1 libxdamage1 libxfixes3 libxrandr2 libgbm1 libpango-1.0-0 libcairo2 libasound2 \ - libgtk-3-0 libx11-xcb1 fonts-liberation tzdata \ - xauth util-linux x11-xserver-utils \ - curl \ - && \ - # Configure timezone - ln -fs /usr/share/zoneinfo/${TZ} /etc/localtime && \ - dpkg-reconfigure -f noninteractive tzdata && \ - rm -rf /var/lib/apt/lists/* - -# Add build-time argument for VNC password -ARG VNC_PASSWORD="vncpassword" - -# Set up VNC password from build argument -RUN mkdir -p /root/.vnc && \ - x11vnc -storepasswd "${VNC_PASSWORD}" /root/.vnc/passwd - -# Install Miniconda -RUN wget --no-check-certificate https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /tmp/miniconda.sh && \ - bash /tmp/miniconda.sh -b -p /opt/conda && \ - rm /tmp/miniconda.sh - -ENV PATH="/opt/conda/bin:$PATH" - -# Create conda environment and configure it -RUN conda init bash && \ - conda config --set always_yes yes && \ - conda tos accept --override-channels --channel defaults && \ - conda create -n camo python=3.11 -y - -# Install Python dependencies in conda environment -COPY requirements.txt . -RUN conda run -n camo pip install --no-cache-dir -r requirements.txt - -# Install Playwright browsers for version 1.49 -RUN conda run -n camo playwright install --with-deps - -# Pre-download and cache Camoufox to speed up startup -RUN conda run -n camo camoufox fetch - -# Copy the server script into the image -COPY camoufox_server.py . - -# Create directory for extensions and copy them -RUN mkdir /app/extensions -COPY google_sign_in_popup_blocker-1.0.2.xpi /app/extensions/ -COPY spoof_timezone-0.3.4.xpi /app/extensions/ -COPY youtube_ad_auto_skipper-0.6.0.xpi /app/extensions/ - -# Expose the default port Camoufox might use (adjust if needed) -# This is informational; the actual port mapping is in docker-compose. -EXPOSE 12345 -# Expose VNC port -EXPOSE 5900 - -# Copy the wrapper script and make it executable -COPY start_camoufox.sh /app/ -RUN chmod +x /app/start_camoufox.sh && \ - sed -i 's/\r$//' /app/start_camoufox.sh - -# Configure Xvfb resolution via build arguments -ARG RESOLUTION="1920x1080x24" -ENV XVFB_RES="${RESOLUTION}" \ - DISPLAY=":99" \ - XAUTHORITY="/tmp/.Xauth" - -# Create Xauth setup (mcookie installed in previous apt-get) -RUN touch /tmp/.Xauth && \ - chmod 644 /tmp/.Xauth && \ - echo "#!/bin/bash" > /init_x11.sh && \ - echo "xauth add \$DISPLAY . \$(mcookie)" >> /init_x11.sh && \ - echo "xhost +local:" >> /init_x11.sh && \ - chmod +x /init_x11.sh - -# Proper ENTRYPOINT using shell form -#ENTRYPOINT ["/bin/bash", "-c", "source /init_x11.sh && exec xvfb-run --auto-servernum --server-args \"-screen 0 ${XVFB_RES} ${XVFB_ARGS}\" /app/start_camoufox.sh"] - -ENTRYPOINT ["/bin/bash", "-c", "\ - rm -f /tmp/.X99-lock && \ - Xvfb :99 -screen 0 ${XVFB_RES} -ac & \ - export DISPLAY=:99 && \ - sleep 1 && \ - touch /tmp/.Xauth && \ - xauth add :99 . $(mcookie) && \ - xhost +local: && \ - source /init_x11.sh && \ - exec /app/start_camoufox.sh \"$@\"", "camoufox-entrypoint"] diff --git a/airflow/camoufox/camoufox_server.py b/airflow/camoufox/camoufox_server.py deleted file mode 100644 index c4c9b4d..0000000 --- a/airflow/camoufox/camoufox_server.py +++ /dev/null @@ -1,452 +0,0 @@ -#!/usr/bin/env python3 -import re -import argparse -import atexit -import shutil -import logging -import sys -import os -import psutil -import time -import threading -import signal -import asyncio -import websockets -from collections import deque, defaultdict -from datetime import datetime, timedelta -from camoufox.server import launch_server - -# Global variables for resource tracking -active_connections = defaultdict(int) # Track connections per endpoint -max_connections = defaultdict(int) -resource_stats = {} -server_instances = {} # Track multiple server instances -shutdown_requested = False -endpoint_locks = defaultdict(threading.Lock) # Locks for each endpoint -memory_restart_threshold = 1800 # MB - warn when exceeded -restart_in_progress = False - -# Enhanced monitoring metrics -connection_pool_metrics = { - 'total_acquired': 0, - 'total_released': 0, - 'total_reused': 0, - 'pool_size': 0, - 'active_contexts': 0 -} - -def parse_proxy_url(url): - """Parse proxy URL in format proto://user:pass@host:port""" - pattern = r'([^:]+)://(?:([^:]+):([^@]+)@)?([^:]+):(\d+)' - match = re.match(pattern, url) - if not match: - raise ValueError('Invalid proxy URL format. Expected proto://[user:pass@]host:port') - - proto, username, password, host, port = match.groups() - - # Ensure username and password are strings, not None - proxy_config = { - 'server': f'{proto}://{host}:{port}', - 'username': username or '', - 'password': password or '' - } - - # Remove empty credentials - if not proxy_config['username']: - del proxy_config['username'] - if not proxy_config['password']: - del proxy_config['password'] - - return proxy_config - -def monitor_resources(server_ports, proxy_url): - """Monitor system resources and log warnings when thresholds are exceeded""" - global active_connections, max_connections, resource_stats, shutdown_requested, restart_in_progress - global connection_pool_metrics - - logging.info(f"Resource monitor started for proxy '{proxy_url}' on ports {server_ports}") - log_counter = 0 - while not shutdown_requested: - log_counter += 1 - try: - # Get system resource usage - cpu_percent = psutil.cpu_percent(interval=1) - memory = psutil.virtual_memory() - memory_percent = memory.percent - - # Get current process info - current_process = psutil.Process() - process_memory = current_process.memory_info() - process_cpu = current_process.cpu_percent() - - # Update active connections using psutil - all_connections = psutil.net_connections(kind='inet') - new_active_connections = defaultdict(int) - for conn in all_connections: - if conn.status == psutil.CONN_ESTABLISHED and conn.laddr.port in server_ports: - new_active_connections[conn.laddr.port] += 1 - - active_connections.clear() - active_connections.update(new_active_connections) - - for port, count in active_connections.items(): - max_connections[port] = max(max_connections.get(port, 0), count) - - connection_pool_metrics['active_contexts'] = sum(active_connections.values()) - - # Update resource stats - resource_stats = { - 'cpu_percent': cpu_percent, - 'memory_percent': memory_percent, - 'process_memory_mb': process_memory.rss / 1024 / 1024, - 'process_cpu_percent': process_cpu, - 'total_active_connections': sum(active_connections.values()), - 'active_connections_per_endpoint': dict(active_connections), - 'max_connections': dict(max_connections), - 'connection_pool_metrics': dict(connection_pool_metrics) - } - - # Log resource usage periodically - if cpu_percent > 80 or memory_percent > 80: - logging.info(f"RESOURCE STATS - CPU: {cpu_percent}%, Memory: {memory_percent}%, " - f"Process Memory: {resource_stats['process_memory_mb']:.1f}MB, " - f"Total Active Connections: {resource_stats['total_active_connections']}") - - # Log connection pool metrics - pool_metrics = resource_stats['connection_pool_metrics'] - logging.info(f"POOL METRICS - Acquired: {pool_metrics['total_acquired']}, " - f"Released: {pool_metrics['total_released']}, " - f"Reused: {pool_metrics['total_reused']}, " - f"Pool Size: {pool_metrics['pool_size']}, " - f"Active Contexts: {pool_metrics['active_contexts']}") - - # Warning thresholds - if cpu_percent > 85: - logging.warning(f"HIGH CPU USAGE: {cpu_percent}%") - - if memory_percent > 85: - logging.warning(f"HIGH MEMORY USAGE: {memory_percent}%") - - if resource_stats['total_active_connections'] > 100: - logging.warning(f"HIGH TOTAL CONNECTION COUNT: {resource_stats['total_active_connections']} active connections") - - if process_memory.rss > 2 * 1024 * 1024 * 1024: # 2GB - logging.warning(f"HIGH PROCESS MEMORY: {process_memory.rss / 1024 / 1024:.1f}MB") - - # Safety net: Warn instead of restart if memory exceeds threshold - if resource_stats['process_memory_mb'] > memory_restart_threshold: - logging.warning(f"MEMORY THRESHOLD EXCEEDED: {resource_stats['process_memory_mb']}MB > {memory_restart_threshold}MB") - logging.warning("Manual intervention required - memory usage critical but restart disabled") - logging.warning("Consider adding new camoufox instances or reducing concurrent workers") - - # Add metric for monitoring instead of restart - logging.info(f"MEMORY_ALERT: {resource_stats['process_memory_mb']}MB used on {sum(active_connections.values())} active connections") - - # Add a heartbeat log every minute (30s * 2) - if log_counter % 2 == 0: - logging.info( - f"HEARTBEAT - Proxy: {proxy_url} | Ports: {server_ports} | " - f"Memory: {resource_stats.get('process_memory_mb', 0):.1f}MB | " - f"CPU: {resource_stats.get('cpu_percent', 0)}% | " - f"Active Connections: {resource_stats.get('total_active_connections', 0)}" - ) - - except Exception as e: - logging.error(f"Error in resource monitoring: {e}") - - time.sleep(30) # Check every 30 seconds - -def graceful_shutdown(signum, frame): - """Handle graceful shutdown""" - global shutdown_requested, server_instances, restart_in_progress - logging.info("Graceful shutdown requested") - shutdown_requested = True - - # Log final resource stats - if resource_stats: - logging.info(f"Final resource stats: {resource_stats}") - - # Log final connection pool metrics - logging.info(f"Final connection pool metrics: {connection_pool_metrics}") - - # The server instances are running in daemon threads and will be terminated - # when the main process exits. No explicit shutdown call is needed. - logging.info("Shutting down all Camoufox server instances...") - - # If restart was requested, exit with special code - if restart_in_progress: - logging.info("Restarting Camoufox server...") - os.execv(sys.executable, [sys.executable] + sys.argv) - - sys.exit(0) - -def create_server_instance(port, base_config): - """ - Creates and runs a new Camoufox server instance on the specified port. - NOTE: The `launch_server` function is a blocking call that runs an event loop - and does not return. Therefore, any code after it in this function is unreachable. - """ - config = base_config.copy() - config['port'] = port - - try: - # This function blocks and runs the server indefinitely. - launch_server(**config) - except Exception as e: - # If an error occurs, log it. The daemon thread will then terminate. - logging.error(f'Error launching server on port {port}: {str(e)}', exc_info=True) - -def check_listening_ports(expected_ports, log_results=True): - """Checks which of the expected ports are actively listening.""" - successful_ports = [] - failed_ports = [] - try: - # Check all system-wide connections, not just for the current process, - # as the server may run in a child process. - listening_ports = { - conn.laddr.port for conn in psutil.net_connections(kind='inet') - if conn.status == psutil.CONN_LISTEN - } - - for port in expected_ports: - if port in listening_ports: - successful_ports.append(port) - else: - failed_ports.append(port) - - if log_results: - logging.info("--- Verifying Listening Ports ---") - if successful_ports: - logging.info(f"Successfully listening on ports: {sorted(successful_ports)}") - if failed_ports: - logging.error(f"FAILED to listen on ports: {sorted(failed_ports)}") - logging.info("---------------------------------") - - except Exception as e: - if log_results: - logging.error(f"Could not verify listening ports: {e}") - - return successful_ports, failed_ports - -def main(): - parser = argparse.ArgumentParser(description='Launch Camoufox server with optional proxy support') - parser.add_argument('--proxy-url', help='Optional proxy URL in format proto://user:pass@host:port (supports http, https, socks5)') - parser.add_argument('--ws-host', default='0.0.0.0', help='WebSocket server host address (e.g., localhost, 0.0.0.0)') - parser.add_argument('--port', type=int, default=12345, help='Base WebSocket server port') - parser.add_argument('--num-instances', type=int, default=4, help='Number of server instances to create') - parser.add_argument('--port-range', type=str, help='Port range in format start-end (e.g., 12345-12349)') - parser.add_argument('--base-proxy-port', type=int, default=1080, help='Base proxy port for mapping to camoufox instances') - parser.add_argument('--ws-path', default='camoufox', help='Base WebSocket server path') - parser.add_argument('--headless', action='store_true', help='Run browser in headless mode') - parser.add_argument('--geoip', nargs='?', const=True, default=False, - help='Enable geo IP protection. Can specify IP address or use True for automatic detection') - parser.add_argument('--locale', help='Locale(s) to use (e.g. "en-US" or "en-US,fr-FR")') - parser.add_argument('--block-images', action='store_true', help='Block image requests to save bandwidth') - parser.add_argument('--block-webrtc', action='store_true', help='Block WebRTC entirely') - parser.add_argument('--humanize', nargs='?', const=True, type=float, - help='Humanize cursor movements. Can specify max duration in seconds') - parser.add_argument('--extensions', type=str, - help='Comma-separated list of extension paths to enable (XPI files or extracted directories). Use quotes if paths contain spaces.') - parser.add_argument('--persistent-context', action='store_true', help='Enable persistent browser context.') - parser.add_argument('--user-data-dir', type=str, help='Directory to store persistent browser data.') - parser.add_argument('--preferences', type=str, help='Comma-separated list of Firefox preferences (e.g. "key1=value1,key2=value2")') - # Add resource monitoring arguments - parser.add_argument('--monitor-resources', action='store_true', help='Enable resource monitoring') - parser.add_argument('--max-connections-per-instance', type=int, default=50, help='Maximum concurrent connections per instance') - parser.add_argument('--connection-timeout', type=int, default=300, help='Connection timeout in seconds') - parser.add_argument('--memory-restart-threshold', type=int, default=1800, help='Memory threshold (MB) to trigger warning') - - args = parser.parse_args() - - # Set memory restart threshold - global memory_restart_threshold - memory_restart_threshold = args.memory_restart_threshold - - # Set up signal handlers for graceful shutdown - signal.signal(signal.SIGTERM, graceful_shutdown) - signal.signal(signal.SIGINT, graceful_shutdown) - - proxy_config = None - if args.proxy_url: - try: - proxy_config = parse_proxy_url(args.proxy_url) - print(f"Using proxy configuration: {args.proxy_url}") - except ValueError as e: - print(f'Error parsing proxy URL: {e}') - return - else: - print("No proxy URL provided. Running without proxy.") - - # --- Basic Logging Configuration --- - log_formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') - log_handler = logging.StreamHandler(sys.stdout) - log_handler.setFormatter(log_formatter) - - root_logger = logging.getLogger() - for handler in root_logger.handlers[:]: - root_logger.removeHandler(handler) - root_logger.addHandler(log_handler) - root_logger.setLevel(logging.DEBUG) - - logging.debug("DEBUG logging enabled. Starting Camoufox server setup...") - - # --- End Logging Configuration --- - - try: - # --- Check DISPLAY environment variable --- - display_var = os.environ.get('DISPLAY') - logging.info(f"Value of DISPLAY environment variable: {display_var}") - # --- End Check --- - - # Build base config dictionary - base_config = { - 'headless': False, # Force non-headless mode for VNC - 'geoip': True, # Always enable GeoIP when a proxy is used - 'host': args.ws_host, - 'ws_path': args.ws_path, - 'env': {'DISPLAY': os.environ.get('DISPLAY')} - } - - # Add proxy to config only if it was successfully parsed - if proxy_config: - base_config['proxy'] = proxy_config - - # Add optional parameters - if args.locale: - base_config['locale'] = args.locale - if args.block_images: - base_config['block_images'] = True - if args.block_webrtc: - base_config['block_webrtc'] = True - if args.humanize: - base_config['humanize'] = args.humanize if isinstance(args.humanize, float) else True - - # Add persistent context options - if args.persistent_context: - base_config['persistent_context'] = True - if args.user_data_dir: - base_config['user_data_dir'] = args.user_data_dir - - # Add Firefox preferences - if args.preferences: - base_config['preferences'] = {} - prefs_list = args.preferences.split(',') - for pref in prefs_list: - if '=' in pref: - key, value = pref.split('=', 1) - if value.lower() in ('true', 'false'): - base_config['preferences'][key.strip()] = value.lower() == 'true' - elif value.isdigit(): - base_config['preferences'][key.strip()] = int(value) - else: - base_config['preferences'][key.strip()] = value.strip() - print(f"Applied Firefox preferences: {base_config['preferences']}") - - # Exclude default addons including uBlock Origin - base_config['exclude_addons'] = ['ublock_origin', 'default_addons'] - print('Excluded default addons including uBlock Origin') - - # Add custom extensions if specified - if args.extensions: - from pathlib import Path - valid_extensions = [] - extensions_list = [ext.strip() for ext in args.extensions.split(',')] - temp_dirs_to_cleanup = [] - - def cleanup_temp_dirs(): - for temp_dir in temp_dirs_to_cleanup: - try: - shutil.rmtree(temp_dir) - print(f"Cleaned up temporary extension directory: {temp_dir}") - except Exception as e: - print(f"Warning: Failed to clean up temp dir {temp_dir}: {e}") - atexit.register(cleanup_temp_dirs) - - for ext_path in extensions_list: - ext_path = Path(ext_path).absolute() - - if not ext_path.exists(): - print(f"Warning: Extension path does not exist: {ext_path}") - continue - - if ext_path.is_file() and ext_path.suffix == '.xpi': - import tempfile - import zipfile - - try: - temp_dir = tempfile.mkdtemp(prefix=f"camoufox_ext_{ext_path.stem}_") - temp_dirs_to_cleanup.append(temp_dir) - with zipfile.ZipFile(ext_path, 'r') as zip_ref: - zip_ref.extractall(temp_dir) - valid_extensions.append(temp_dir) - print(f"Successfully loaded extension: {ext_path.name} (extracted to {temp_dir})") - except Exception as e: - print(f"Error loading extension {ext_path}: {str(e)}") - if temp_dir in temp_dirs_to_cleanup: - temp_dirs_to_cleanup.remove(temp_dir) - continue - elif ext_path.is_dir(): - if (ext_path / 'manifest.json').exists(): - valid_extensions.append(str(ext_path)) - print(f"Successfully loaded extension: {ext_path.name}") - else: - print(f"Warning: Directory is not a valid Firefox extension: {ext_path}") - else: - print(f"Warning: Invalid extension path: {ext_path}") - - if valid_extensions: - base_config['addons'] = valid_extensions - print(f"Loaded {len(valid_extensions)} extensions") - else: - print("Warning: No valid extensions were loaded") - - # Create multiple server instances - ports_to_create = [] - if args.port_range: - start_port, end_port = map(int, args.port_range.split('-')) - ports_to_create = list(range(start_port, end_port + 1)) - else: - # Create instances starting from base port - ports_to_create = [args.port + i for i in range(args.num_instances)] - - # Start resource monitoring thread if enabled, passing it the ports to watch. - if args.monitor_resources: - # Pass the proxy URL to the monitor for more descriptive logging - monitor_thread = threading.Thread(target=monitor_resources, args=(ports_to_create, args.proxy_url), daemon=True) - monitor_thread.start() - - print(f"Attempting to launch {len(ports_to_create)} Camoufox server instances on ports: {ports_to_create}") - - for port in ports_to_create: - # launch_server is blocking, so we run each instance in its own thread. - thread = threading.Thread(target=create_server_instance, args=(port, base_config), daemon=True) - thread.start() - # Add a small delay between launching instances to avoid race conditions - # in the underlying Playwright/Camoufox library. - time.sleep(1) - - # The script's main purpose is now to launch the daemon threads and then wait. - # The actual readiness is determined by the start_camoufox.sh script. - print("Server threads launched. Main process will now wait for shutdown signal.") - - # Log startup resource usage - process = psutil.Process() - memory_info = process.memory_info() - logging.info(f"Server started. Initial memory usage: {memory_info.rss / 1024 / 1024:.1f}MB") - - # Keep the main thread alive to host the daemon threads and handle shutdown signals - try: - while not shutdown_requested: - time.sleep(1) - except KeyboardInterrupt: - logging.info("Received KeyboardInterrupt, shutting down...") - - except Exception as e: - print(f'Error launching server: {str(e)}') - logging.error(f'Error launching server: {str(e)}', exc_info=True) - if 'Browser.setBrowserProxy' in str(e): - print('Note: The browser may not support SOCKS5 proxy authentication') - return - -if __name__ == '__main__': - main() diff --git a/airflow/camoufox/google_sign_in_popup_blocker-1.0.2.xpi b/airflow/camoufox/google_sign_in_popup_blocker-1.0.2.xpi deleted file mode 100644 index 8f12e5c..0000000 Binary files a/airflow/camoufox/google_sign_in_popup_blocker-1.0.2.xpi and /dev/null differ diff --git a/airflow/camoufox/requirements.txt b/airflow/camoufox/requirements.txt deleted file mode 100644 index 91b84b4..0000000 --- a/airflow/camoufox/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -camoufox[geoip] -playwright==1.49 -psutil -websockets diff --git a/airflow/camoufox/spoof_timezone-0.3.4.xpi b/airflow/camoufox/spoof_timezone-0.3.4.xpi deleted file mode 100644 index 26effcb..0000000 Binary files a/airflow/camoufox/spoof_timezone-0.3.4.xpi and /dev/null differ diff --git a/airflow/camoufox/start_camoufox.sh b/airflow/camoufox/start_camoufox.sh deleted file mode 100755 index c064141..0000000 --- a/airflow/camoufox/start_camoufox.sh +++ /dev/null @@ -1,102 +0,0 @@ -#!/bin/bash -set -e - -# Global PIDs for cleanup -VNC_PID="" -FLUXBOX_PID="" - -# Cleanup function to terminate background processes on script exit -cleanup() { - echo "Cleaning up background processes..." - # Kill processes in reverse order of startup. The '|| true' prevents errors if a process is already dead. - if [ -n "$FLUXBOX_PID" ]; then kill -TERM $FLUXBOX_PID 2>/dev/null || true; fi - if [ -n "$VNC_PID" ]; then kill -TERM $VNC_PID 2>/dev/null || true; fi - echo "Cleanup complete." -} -trap cleanup EXIT - -# Xvfb is now started by xvfb-run in the Dockerfile ENTRYPOINT. -# The DISPLAY variable will be set automatically by xvfb-run. - - -# It's safer to source conda.sh directly -source /opt/conda/etc/profile.d/conda.sh -conda activate camo - -# Ensure the persistent data directory exists before we try to use it for the lock file. -mkdir -p /app/persistent-data - -# --- One-time Initialization --- -# On first launch, multiple instances starting at once can cause a race condition -# during the download/extraction of the Camoufox distribution. To prevent this, -# we run a single dummy instance first, wait for it to become healthy (which -# indicates setup is complete), and then kill it. A lock file ensures this -# only happens on the very first start of the container. -INIT_LOCK_FILE="/app/persistent-data/camoufox.initialized" -if [ ! -f "$INIT_LOCK_FILE" ]; then - echo "First start detected. Performing one-time Camoufox initialization..." - - # Start a single dummy instance in the background, logging to a file. - # It will perform the necessary downloads and setup. - INIT_LOG="/tmp/camoufox_init.log" - rm -f "$INIT_LOG" # Ensure log file is clean before starting - python3 -u camoufox_server.py --port 9999 --num-instances 1 > "$INIT_LOG" 2>&1 & - INIT_PID=$! - - # Wait for the server to log that it's started, which is a reliable signal - # that all one-time downloads and setup tasks are complete. - echo "Waiting for initialization to complete (max 120s)..." - - end_time=$((SECONDS + 120)) - INIT_SUCCESS=false - while [ $SECONDS -lt $end_time ]; do - # The camoufox library logs "Websocket endpoint:" when it's ready. - # This is a more reliable signal than a custom log message from our script. - if grep -q "Websocket endpoint: ws://0.0.0.0:9999" "$INIT_LOG"; then - INIT_SUCCESS=true - break - fi - # Also check if the initialization process died unexpectedly - if ! ps -p $INIT_PID > /dev/null; then - echo "Initialization process died unexpectedly." - break - fi - sleep 2 - done - - if [ "$INIT_SUCCESS" = true ]; then - echo "Initialization successful." - else - echo "Initialization timed out or failed. The main server might fail to start." - echo "--- Initialization Log ---" - cat "$INIT_LOG" - echo "--------------------------" - fi - - # Cleanly terminate the dummy server. - echo "Shutting down initialization server..." - kill -TERM $INIT_PID - wait $INIT_PID 2>/dev/null || true # Wait for it to exit, ignore error code - - # Create the lock file to prevent this from running again. - touch "$INIT_LOCK_FILE" - echo "Initialization complete. Proceeding with normal startup." -else - echo "Initialization already complete. Skipping." -fi -# --- End Initialization --- - -# Start supporting services (VNC, window manager) -echo "Starting VNC server on port 5900..." -# The -noxdamage flag is added to improve compatibility with VNC clients like the one on macOS. -# The '-localhost no' part was likely a typo and has been removed as the default is to allow non-localhost connections. -x11vnc -forever -usepw -display $DISPLAY -rfbport 5900 -o /var/log/x11vnc.log -shared -noxdamage & -VNC_PID=$! - -echo "Starting Fluxbox window manager..." -fluxbox > /var/log/fluxbox.log 2>&1 & -FLUXBOX_PID=$! - -# Start main application -echo "Starting Camoufox server with arguments: $@" -exec python3 -u camoufox_server.py "$@" diff --git a/airflow/camoufox/youtube_ad_auto_skipper-0.6.0.xpi b/airflow/camoufox/youtube_ad_auto_skipper-0.6.0.xpi deleted file mode 100644 index 51173f0..0000000 Binary files a/airflow/camoufox/youtube_ad_auto_skipper-0.6.0.xpi and /dev/null differ diff --git a/airflow/config/.DS_Store b/airflow/config/.DS_Store deleted file mode 100644 index 5008ddf..0000000 Binary files a/airflow/config/.DS_Store and /dev/null differ diff --git a/airflow/config/airflow.cfg b/airflow/config/airflow.cfg deleted file mode 100644 index 96f558d..0000000 --- a/airflow/config/airflow.cfg +++ /dev/null @@ -1,3167 +0,0 @@ -[core] -# The folder where your airflow pipelines live, most likely a -# subfolder in a code repository. This path must be absolute. -# -# Variable: AIRFLOW__CORE__DAGS_FOLDER -# -dags_folder = /opt/airflow/dags - -# Hostname by providing a path to a callable, which will resolve the hostname. -# The format is "package.function". -# -# For example, default value ``airflow.utils.net.getfqdn`` means that result from patched -# version of `socket.getfqdn() `__, -# see related `CPython Issue `__. -# -# No argument should be required in the function specified. -# If using IP address as hostname is preferred, use value ``airflow.utils.net.get_host_ip_address`` -# -# Variable: AIRFLOW__CORE__HOSTNAME_CALLABLE -# -hostname_callable = airflow.utils.net.getfqdn - -# A callable to check if a python file has airflow dags defined or not and should -# return ``True`` if it has dags otherwise ``False``. -# If this is not provided, Airflow uses its own heuristic rules. -# -# The function should have the following signature -# -# .. code-block:: python -# -# def func_name(file_path: str, zip_file: zipfile.ZipFile | None = None) -> bool: ... -# -# Variable: AIRFLOW__CORE__MIGHT_CONTAIN_DAG_CALLABLE -# -might_contain_dag_callable = airflow.utils.file.might_contain_dag_via_default_heuristic - -# Default timezone in case supplied date times are naive -# can be `UTC` (default), `system`, or any `IANA ` -# timezone string (e.g. Europe/Amsterdam) -# -# Variable: AIRFLOW__CORE__DEFAULT_TIMEZONE -# -default_timezone = utc - -# The executor class that airflow should use. Choices include -# ``SequentialExecutor``, ``LocalExecutor``, ``CeleryExecutor``, -# ``KubernetesExecutor``, ``CeleryKubernetesExecutor``, ``LocalKubernetesExecutor`` or the -# full import path to the class when using a custom executor. -# -# Variable: AIRFLOW__CORE__EXECUTOR -# -executor = CeleryExecutor - -# The auth manager class that airflow should use. Full import path to the auth manager class. -# -# Variable: AIRFLOW__CORE__AUTH_MANAGER -# -auth_manager = airflow.providers.fab.auth_manager.fab_auth_manager.FabAuthManager - -# This defines the maximum number of task instances that can run concurrently per scheduler in -# Airflow, regardless of the worker count. Generally this value, multiplied by the number of -# schedulers in your cluster, is the maximum number of task instances with the running -# state in the metadata database. Setting this value to zero allows unlimited parallelism. -# -# Variable: AIRFLOW__CORE__PARALLELISM -# -parallelism = 32 - -# The maximum number of task instances allowed to run concurrently in each DAG. To calculate -# the number of tasks that is running concurrently for a DAG, add up the number of running -# tasks for all DAG runs of the DAG. This is configurable at the DAG level with ``max_active_tasks``, -# which is defaulted as ``[core] max_active_tasks_per_dag``. -# -# An example scenario when this would be useful is when you want to stop a new dag with an early -# start date from stealing all the executor slots in a cluster. -# -# Variable: AIRFLOW__CORE__MAX_ACTIVE_TASKS_PER_DAG -# -max_active_tasks_per_dag = 16 - -# Are DAGs paused by default at creation -# -# Variable: AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION -# -dags_are_paused_at_creation = True - -# The maximum number of active DAG runs per DAG. The scheduler will not create more DAG runs -# if it reaches the limit. This is configurable at the DAG level with ``max_active_runs``, -# which is defaulted as ``[core] max_active_runs_per_dag``. -# -# Variable: AIRFLOW__CORE__MAX_ACTIVE_RUNS_PER_DAG -# -max_active_runs_per_dag = 16 - -# (experimental) The maximum number of consecutive DAG failures before DAG is automatically paused. -# This is also configurable per DAG level with ``max_consecutive_failed_dag_runs``, -# which is defaulted as ``[core] max_consecutive_failed_dag_runs_per_dag``. -# If not specified, then the value is considered as 0, -# meaning that the dags are never paused out by default. -# -# Variable: AIRFLOW__CORE__MAX_CONSECUTIVE_FAILED_DAG_RUNS_PER_DAG -# -max_consecutive_failed_dag_runs_per_dag = 0 - -# The name of the method used in order to start Python processes via the multiprocessing module. -# This corresponds directly with the options available in the Python docs: -# `multiprocessing.set_start_method -# `__ -# must be one of the values returned by `multiprocessing.get_all_start_methods() -# `__. -# -# Example: mp_start_method = fork -# -# Variable: AIRFLOW__CORE__MP_START_METHOD -# -# mp_start_method = - -# Whether to load the DAG examples that ship with Airflow. It's good to -# get started, but you probably want to set this to ``False`` in a production -# environment -# -# Variable: AIRFLOW__CORE__LOAD_EXAMPLES -# -load_examples = False - -# Path to the folder containing Airflow plugins -# -# Variable: AIRFLOW__CORE__PLUGINS_FOLDER -# -plugins_folder = /opt/airflow/plugins - -# Should tasks be executed via forking of the parent process -# -# * ``False``: Execute via forking of the parent process -# * ``True``: Spawning a new python process, slower than fork, but means plugin changes picked -# up by tasks straight away -# -# Variable: AIRFLOW__CORE__EXECUTE_TASKS_NEW_PYTHON_INTERPRETER -# -execute_tasks_new_python_interpreter = False - -# Secret key to save connection passwords in the db -# -# Variable: AIRFLOW__CORE__FERNET_KEY -# -fernet_key = - -# Whether to disable pickling dags -# -# Variable: AIRFLOW__CORE__DONOT_PICKLE -# -donot_pickle = True - -# How long before timing out a python file import -# -# Variable: AIRFLOW__CORE__DAGBAG_IMPORT_TIMEOUT -# -dagbag_import_timeout = 30.0 - -# Should a traceback be shown in the UI for dagbag import errors, -# instead of just the exception message -# -# Variable: AIRFLOW__CORE__DAGBAG_IMPORT_ERROR_TRACEBACKS -# -dagbag_import_error_tracebacks = True - -# If tracebacks are shown, how many entries from the traceback should be shown -# -# Variable: AIRFLOW__CORE__DAGBAG_IMPORT_ERROR_TRACEBACK_DEPTH -# -dagbag_import_error_traceback_depth = 2 - -# How long before timing out a DagFileProcessor, which processes a dag file -# -# Variable: AIRFLOW__CORE__DAG_FILE_PROCESSOR_TIMEOUT -# -dag_file_processor_timeout = 50 - -# The class to use for running task instances in a subprocess. -# Choices include StandardTaskRunner, CgroupTaskRunner or the full import path to the class -# when using a custom task runner. -# -# Variable: AIRFLOW__CORE__TASK_RUNNER -# -task_runner = StandardTaskRunner - -# If set, tasks without a ``run_as_user`` argument will be run with this user -# Can be used to de-elevate a sudo user running Airflow when executing tasks -# -# Variable: AIRFLOW__CORE__DEFAULT_IMPERSONATION -# -default_impersonation = - -# What security module to use (for example kerberos) -# -# Variable: AIRFLOW__CORE__SECURITY -# -security = - -# Turn unit test mode on (overwrites many configuration options with test -# values at runtime) -# -# Variable: AIRFLOW__CORE__UNIT_TEST_MODE -# -unit_test_mode = False - -# Whether to enable pickling for xcom (note that this is insecure and allows for -# RCE exploits). -# -# Variable: AIRFLOW__CORE__ENABLE_XCOM_PICKLING -# -enable_xcom_pickling = False - -# What classes can be imported during deserialization. This is a multi line value. -# The individual items will be parsed as a pattern to a glob function. -# Python built-in classes (like dict) are always allowed. -# -# Variable: AIRFLOW__CORE__ALLOWED_DESERIALIZATION_CLASSES -# -allowed_deserialization_classes = airflow.* - -# What classes can be imported during deserialization. This is a multi line value. -# The individual items will be parsed as regexp patterns. -# This is a secondary option to ``[core] allowed_deserialization_classes``. -# -# Variable: AIRFLOW__CORE__ALLOWED_DESERIALIZATION_CLASSES_REGEXP -# -allowed_deserialization_classes_regexp = - -# When a task is killed forcefully, this is the amount of time in seconds that -# it has to cleanup after it is sent a SIGTERM, before it is SIGKILLED -# -# Variable: AIRFLOW__CORE__KILLED_TASK_CLEANUP_TIME -# -killed_task_cleanup_time = 60 - -# Whether to override params with dag_run.conf. If you pass some key-value pairs -# through ``airflow dags backfill -c`` or -# ``airflow dags trigger -c``, the key-value pairs will override the existing ones in params. -# -# Variable: AIRFLOW__CORE__DAG_RUN_CONF_OVERRIDES_PARAMS -# -dag_run_conf_overrides_params = True - -# If enabled, Airflow will only scan files containing both ``DAG`` and ``airflow`` (case-insensitive). -# -# Variable: AIRFLOW__CORE__DAG_DISCOVERY_SAFE_MODE -# -dag_discovery_safe_mode = True - -# The pattern syntax used in the -# `.airflowignore -# `__ -# files in the DAG directories. Valid values are ``regexp`` or ``glob``. -# -# Variable: AIRFLOW__CORE__DAG_IGNORE_FILE_SYNTAX -# -dag_ignore_file_syntax = regexp - -# The number of retries each task is going to have by default. Can be overridden at dag or task level. -# -# Variable: AIRFLOW__CORE__DEFAULT_TASK_RETRIES -# -default_task_retries = 3 - -# The number of seconds each task is going to wait by default between retries. Can be overridden at -# dag or task level. -# -# Variable: AIRFLOW__CORE__DEFAULT_TASK_RETRY_DELAY -# -default_task_retry_delay = 300 - -# The maximum delay (in seconds) each task is going to wait by default between retries. -# This is a global setting and cannot be overridden at task or DAG level. -# -# Variable: AIRFLOW__CORE__MAX_TASK_RETRY_DELAY -# -max_task_retry_delay = 86400 - -# The weighting method used for the effective total priority weight of the task -# -# Variable: AIRFLOW__CORE__DEFAULT_TASK_WEIGHT_RULE -# -default_task_weight_rule = downstream - -# Maximum possible time (in seconds) that task will have for execution of auxiliary processes -# (like listeners, mini scheduler...) after task is marked as success.. -# -# Variable: AIRFLOW__CORE__TASK_SUCCESS_OVERTIME -# -task_success_overtime = 20 - -# The default task execution_timeout value for the operators. Expected an integer value to -# be passed into timedelta as seconds. If not specified, then the value is considered as None, -# meaning that the operators are never timed out by default. -# -# Variable: AIRFLOW__CORE__DEFAULT_TASK_EXECUTION_TIMEOUT -# -default_task_execution_timeout = 3600 - -# Updating serialized DAG can not be faster than a minimum interval to reduce database write rate. -# -# Variable: AIRFLOW__CORE__MIN_SERIALIZED_DAG_UPDATE_INTERVAL -# -min_serialized_dag_update_interval = 30 - -# If ``True``, serialized DAGs are compressed before writing to DB. -# -# .. note:: -# -# This will disable the DAG dependencies view -# -# Variable: AIRFLOW__CORE__COMPRESS_SERIALIZED_DAGS -# -compress_serialized_dags = False - -# Fetching serialized DAG can not be faster than a minimum interval to reduce database -# read rate. This config controls when your DAGs are updated in the Webserver -# -# Variable: AIRFLOW__CORE__MIN_SERIALIZED_DAG_FETCH_INTERVAL -# -min_serialized_dag_fetch_interval = 10 - -# Maximum number of Rendered Task Instance Fields (Template Fields) per task to store -# in the Database. -# All the template_fields for each of Task Instance are stored in the Database. -# Keeping this number small may cause an error when you try to view ``Rendered`` tab in -# TaskInstance view for older tasks. -# -# Variable: AIRFLOW__CORE__MAX_NUM_RENDERED_TI_FIELDS_PER_TASK -# -max_num_rendered_ti_fields_per_task = 30 - -# On each dagrun check against defined SLAs -# -# Variable: AIRFLOW__CORE__CHECK_SLAS -# -check_slas = True - -# Path to custom XCom class that will be used to store and resolve operators results -# -# Example: xcom_backend = path.to.CustomXCom -# -# Variable: AIRFLOW__CORE__XCOM_BACKEND -# -xcom_backend = airflow.models.xcom.BaseXCom - -# By default Airflow plugins are lazily-loaded (only loaded when required). Set it to ``False``, -# if you want to load plugins whenever 'airflow' is invoked via cli or loaded from module. -# -# Variable: AIRFLOW__CORE__LAZY_LOAD_PLUGINS -# -lazy_load_plugins = True - -# By default Airflow providers are lazily-discovered (discovery and imports happen only when required). -# Set it to ``False``, if you want to discover providers whenever 'airflow' is invoked via cli or -# loaded from module. -# -# Variable: AIRFLOW__CORE__LAZY_DISCOVER_PROVIDERS -# -lazy_discover_providers = True - -# Hide sensitive **Variables** or **Connection extra json keys** from UI -# and task logs when set to ``True`` -# -# .. note:: -# -# Connection passwords are always hidden in logs -# -# Variable: AIRFLOW__CORE__HIDE_SENSITIVE_VAR_CONN_FIELDS -# -hide_sensitive_var_conn_fields = False - -# A comma-separated list of extra sensitive keywords to look for in variables names or connection's -# extra JSON. -# -# Variable: AIRFLOW__CORE__SENSITIVE_VAR_CONN_NAMES -# -sensitive_var_conn_names = - -# Task Slot counts for ``default_pool``. This setting would not have any effect in an existing -# deployment where the ``default_pool`` is already created. For existing deployments, users can -# change the number of slots using Webserver, API or the CLI -# -# Variable: AIRFLOW__CORE__DEFAULT_POOL_TASK_SLOT_COUNT -# -default_pool_task_slot_count = 128 - -# The maximum list/dict length an XCom can push to trigger task mapping. If the pushed list/dict has a -# length exceeding this value, the task pushing the XCom will be failed automatically to prevent the -# mapped tasks from clogging the scheduler. -# -# Variable: AIRFLOW__CORE__MAX_MAP_LENGTH -# -max_map_length = 1024 - -# The default umask to use for process when run in daemon mode (scheduler, worker, etc.) -# -# This controls the file-creation mode mask which determines the initial value of file permission bits -# for newly created files. -# -# This value is treated as an octal-integer. -# -# Variable: AIRFLOW__CORE__DAEMON_UMASK -# -daemon_umask = 0o002 - -# Class to use as dataset manager. -# -# Example: dataset_manager_class = airflow.datasets.manager.DatasetManager -# -# Variable: AIRFLOW__CORE__DATASET_MANAGER_CLASS -# -# dataset_manager_class = - -# Kwargs to supply to dataset manager. -# -# Example: dataset_manager_kwargs = {"some_param": "some_value"} -# -# Variable: AIRFLOW__CORE__DATASET_MANAGER_KWARGS -# -# dataset_manager_kwargs = - -# Dataset URI validation should raise an exception if it is not compliant with AIP-60. -# By default this configuration is false, meaning that Airflow 2.x only warns the user. -# In Airflow 3, this configuration will be removed, unconditionally enabling strict validation. -# -# Variable: AIRFLOW__CORE__STRICT_DATASET_URI_VALIDATION -# -strict_dataset_uri_validation = False - -# (experimental) Whether components should use Airflow Internal API for DB connectivity. -# -# Variable: AIRFLOW__CORE__DATABASE_ACCESS_ISOLATION -# -database_access_isolation = False - -# (experimental) Airflow Internal API url. -# Only used if ``[core] database_access_isolation`` is ``True``. -# -# Example: internal_api_url = http://localhost:8080 -# -# Variable: AIRFLOW__CORE__INTERNAL_API_URL -# -# internal_api_url = - -# Secret key used to authenticate internal API clients to core. It should be as random as possible. -# However, when running more than 1 instances of webserver / internal API services, make sure all -# of them use the same ``secret_key`` otherwise calls will fail on authentication. -# The authentication token generated using the secret key has a short expiry time though - make -# sure that time on ALL the machines that you run airflow components on is synchronized -# (for example using ntpd) otherwise you might get "forbidden" errors when the logs are accessed. -# -# Variable: AIRFLOW__CORE__INTERNAL_API_SECRET_KEY -# -internal_api_secret_key = tCnTbEabdFBDLHWoT/LxLw== - -# The ability to allow testing connections across Airflow UI, API and CLI. -# Supported options: ``Disabled``, ``Enabled``, ``Hidden``. Default: Disabled -# Disabled - Disables the test connection functionality and disables the Test Connection button in UI. -# Enabled - Enables the test connection functionality and shows the Test Connection button in UI. -# Hidden - Disables the test connection functionality and hides the Test Connection button in UI. -# Before setting this to Enabled, make sure that you review the users who are able to add/edit -# connections and ensure they are trusted. Connection testing can be done maliciously leading to -# undesired and insecure outcomes. -# See `Airflow Security Model: Capabilities of authenticated UI users -# `__ -# for more details. -# -# Variable: AIRFLOW__CORE__TEST_CONNECTION -# -test_connection = Disabled - -# The maximum length of the rendered template field. If the value to be stored in the -# rendered template field exceeds this size, it's redacted. -# -# Variable: AIRFLOW__CORE__MAX_TEMPLATED_FIELD_LENGTH -# -max_templated_field_length = 4096 - -host_docker_socket = /var/run/docker.sock - -[database] -# Path to the ``alembic.ini`` file. You can either provide the file path relative -# to the Airflow home directory or the absolute path if it is located elsewhere. -# -# Variable: AIRFLOW__DATABASE__ALEMBIC_INI_FILE_PATH -# -alembic_ini_file_path = alembic.ini - -# The SQLAlchemy connection string to the metadata database. -# SQLAlchemy supports many different database engines. -# See: `Set up a Database Backend: Database URI -# `__ -# for more details. -# -# Variable: AIRFLOW__DATABASE__SQL_ALCHEMY_CONN -# -# This is configured via the AIRFLOW__DATABASE__SQL_ALCHEMY_CONN environment variable -# in the docker-compose files, as it differs between master and workers. -# A dummy value is set here to ensure the env var override is picked up. -sql_alchemy_conn = postgresql://dummy:dummy@dummy/dummy - -# Extra engine specific keyword args passed to SQLAlchemy's create_engine, as a JSON-encoded value -# -# Example: sql_alchemy_engine_args = {"arg1": true} -# -# Variable: AIRFLOW__DATABASE__SQL_ALCHEMY_ENGINE_ARGS -# -# sql_alchemy_engine_args = - -# The encoding for the databases -# -# Variable: AIRFLOW__DATABASE__SQL_ENGINE_ENCODING -# -sql_engine_encoding = utf-8 - -# Collation for ``dag_id``, ``task_id``, ``key``, ``external_executor_id`` columns -# in case they have different encoding. -# By default this collation is the same as the database collation, however for ``mysql`` and ``mariadb`` -# the default is ``utf8mb3_bin`` so that the index sizes of our index keys will not exceed -# the maximum size of allowed index when collation is set to ``utf8mb4`` variant, see -# `GitHub Issue Comment `__ -# for more details. -# -# Variable: AIRFLOW__DATABASE__SQL_ENGINE_COLLATION_FOR_IDS -# -# sql_engine_collation_for_ids = - -# If SQLAlchemy should pool database connections. -# -# Variable: AIRFLOW__DATABASE__SQL_ALCHEMY_POOL_ENABLED -# -sql_alchemy_pool_enabled = True - -# The SQLAlchemy pool size is the maximum number of database connections -# in the pool. 0 indicates no limit. -# -# Variable: AIRFLOW__DATABASE__SQL_ALCHEMY_POOL_SIZE -# -sql_alchemy_pool_size = 20 - -# The maximum overflow size of the pool. -# When the number of checked-out connections reaches the size set in pool_size, -# additional connections will be returned up to this limit. -# When those additional connections are returned to the pool, they are disconnected and discarded. -# It follows then that the total number of simultaneous connections the pool will allow -# is **pool_size** + **max_overflow**, -# and the total number of "sleeping" connections the pool will allow is pool_size. -# max_overflow can be set to ``-1`` to indicate no overflow limit; -# no limit will be placed on the total number of concurrent connections. Defaults to ``10``. -# -# Variable: AIRFLOW__DATABASE__SQL_ALCHEMY_MAX_OVERFLOW -# -sql_alchemy_max_overflow = 30 - -# The SQLAlchemy pool recycle is the number of seconds a connection -# can be idle in the pool before it is invalidated. This config does -# not apply to sqlite. If the number of DB connections is ever exceeded, -# a lower config value will allow the system to recover faster. -# -# Variable: AIRFLOW__DATABASE__SQL_ALCHEMY_POOL_RECYCLE -# -sql_alchemy_pool_recycle = 1800 - -# Check connection at the start of each connection pool checkout. -# Typically, this is a simple statement like "SELECT 1". -# See `SQLAlchemy Pooling: Disconnect Handling - Pessimistic -# `__ -# for more details. -# -# Variable: AIRFLOW__DATABASE__SQL_ALCHEMY_POOL_PRE_PING -# -sql_alchemy_pool_pre_ping = True - -# The schema to use for the metadata database. -# SQLAlchemy supports databases with the concept of multiple schemas. -# -# Variable: AIRFLOW__DATABASE__SQL_ALCHEMY_SCHEMA -# -sql_alchemy_schema = - -# Import path for connect args in SQLAlchemy. Defaults to an empty dict. -# This is useful when you want to configure db engine args that SQLAlchemy won't parse -# in connection string. This can be set by passing a dictionary containing the create engine parameters. -# For more details about passing create engine parameters (keepalives variables, timeout etc) -# in Postgres DB Backend see `Setting up a PostgreSQL Database -# `__ -# e.g ``connect_args={"timeout":30}`` can be defined in ``airflow_local_settings.py`` and -# can be imported as shown below -# -# Example: sql_alchemy_connect_args = airflow_local_settings.connect_args -# -# Variable: AIRFLOW__DATABASE__SQL_ALCHEMY_CONNECT_ARGS -# -# sql_alchemy_connect_args = - -# Important Warning: Use of sql_alchemy_session_maker Highly Discouraged -# Import path for function which returns 'sqlalchemy.orm.sessionmaker'. -# Improper configuration of sql_alchemy_session_maker can lead to serious issues, -# including data corruption, unrecoverable application crashes. Please review the SQLAlchemy -# documentation for detailed guidance on proper configuration and best practices. -# -# Example: sql_alchemy_session_maker = airflow_local_settings._sessionmaker -# -# Variable: AIRFLOW__DATABASE__SQL_ALCHEMY_SESSION_MAKER -# -# sql_alchemy_session_maker = - -# Whether to load the default connections that ship with Airflow when ``airflow db init`` is called. -# It's good to get started, but you probably want to set this to ``False`` in a production environment. -# -# Variable: AIRFLOW__DATABASE__LOAD_DEFAULT_CONNECTIONS -# -load_default_connections = True - -# Number of times the code should be retried in case of DB Operational Errors. -# Not all transactions will be retried as it can cause undesired state. -# Currently it is only used in ``DagFileProcessor.process_file`` to retry ``dagbag.sync_to_db``. -# -# Variable: AIRFLOW__DATABASE__MAX_DB_RETRIES -# -max_db_retries = 3 - -# Whether to run alembic migrations during Airflow start up. Sometimes this operation can be expensive, -# and the users can assert the correct version through other means (e.g. through a Helm chart). -# Accepts ``True`` or ``False``. -# -# Variable: AIRFLOW__DATABASE__CHECK_MIGRATIONS -# -check_migrations = True - -[logging] -# The folder where airflow should store its log files. -# This path must be absolute. -# There are a few existing configurations that assume this is set to the default. -# If you choose to override this you may need to update the -# ``[logging] dag_processor_manager_log_location`` and -# ``[logging] child_process_log_directory settings`` as well. -# -# Variable: AIRFLOW__LOGGING__BASE_LOG_FOLDER -# -base_log_folder = /opt/airflow/logs - -# Airflow can store logs remotely in AWS S3, Google Cloud Storage or Elastic Search. -# Set this to ``True`` if you want to enable remote logging. -# -# Variable: AIRFLOW__LOGGING__REMOTE_LOGGING -# -remote_logging = True - -# Users must supply an Airflow connection id that provides access to the storage -# location. Depending on your remote logging service, this may only be used for -# reading logs, not writing them. -# -# Variable: AIRFLOW__LOGGING__REMOTE_LOG_CONN_ID -# -remote_log_conn_id = minio_default - -# Whether the local log files for GCS, S3, WASB and OSS remote logging should be deleted after -# they are uploaded to the remote location. -# -# Variable: AIRFLOW__LOGGING__DELETE_LOCAL_LOGS -# -delete_local_logs = False - -# Path to Google Credential JSON file. If omitted, authorization based on `the Application Default -# Credentials -# `__ will -# be used. -# -# Variable: AIRFLOW__LOGGING__GOOGLE_KEY_PATH -# -google_key_path = - -# Storage bucket URL for remote logging -# S3 buckets should start with **s3://** -# Cloudwatch log groups should start with **cloudwatch://** -# GCS buckets should start with **gs://** -# WASB buckets should start with **wasb** just to help Airflow select correct handler -# Stackdriver logs should start with **stackdriver://** -# -# Variable: AIRFLOW__LOGGING__REMOTE_BASE_LOG_FOLDER -# -remote_base_log_folder = s3://airflow-logs/ - -# The remote_task_handler_kwargs param is loaded into a dictionary and passed to the ``__init__`` -# of remote task handler and it overrides the values provided by Airflow config. For example if you set -# ``delete_local_logs=False`` and you provide ``{"delete_local_copy": true}``, then the local -# log files will be deleted after they are uploaded to remote location. -# -# Example: remote_task_handler_kwargs = {"delete_local_copy": true} -# -# Variable: AIRFLOW__LOGGING__REMOTE_TASK_HANDLER_KWARGS -# -remote_task_handler_kwargs = - -# Use server-side encryption for logs stored in S3 -# -# Variable: AIRFLOW__LOGGING__ENCRYPT_S3_LOGS -# -encrypt_s3_logs = False - -# Logging level. -# -# Supported values: ``CRITICAL``, ``ERROR``, ``WARNING``, ``INFO``, ``DEBUG``. -# -# Variable: AIRFLOW__LOGGING__LOGGING_LEVEL -# -logging_level = INFO - -# Logging level for celery. If not set, it uses the value of logging_level -# -# Supported values: ``CRITICAL``, ``ERROR``, ``WARNING``, ``INFO``, ``DEBUG``. -# -# Variable: AIRFLOW__LOGGING__CELERY_LOGGING_LEVEL -# -celery_logging_level = - -# Logging level for Flask-appbuilder UI. -# -# Supported values: ``CRITICAL``, ``ERROR``, ``WARNING``, ``INFO``, ``DEBUG``. -# -# Variable: AIRFLOW__LOGGING__FAB_LOGGING_LEVEL -# -fab_logging_level = WARNING - -# Logging class -# Specify the class that will specify the logging configuration -# This class has to be on the python classpath -# -# Example: logging_config_class = my.path.default_local_settings.LOGGING_CONFIG -# -# Variable: AIRFLOW__LOGGING__LOGGING_CONFIG_CLASS -# -logging_config_class = airflow_local_settings.LOGGING_CONFIG - -# Flag to enable/disable Colored logs in Console -# Colour the logs when the controlling terminal is a TTY. -# -# Variable: AIRFLOW__LOGGING__COLORED_CONSOLE_LOG -# -colored_console_log = True - -# Log format for when Colored logs is enabled -# -# Variable: AIRFLOW__LOGGING__COLORED_LOG_FORMAT -# -colored_log_format = [%%(blue)s%%(asctime)s%%(reset)s] {%%(blue)s%%(filename)s:%%(reset)s%%(lineno)d} %%(log_color)s%%(levelname)s%%(reset)s - %%(log_color)s%%(message)s%%(reset)s - -# Specifies the class utilized by Airflow to implement colored logging -# -# Variable: AIRFLOW__LOGGING__COLORED_FORMATTER_CLASS -# -colored_formatter_class = airflow.utils.log.colored_log.CustomTTYColoredFormatter - -# Format of Log line -# -# Variable: AIRFLOW__LOGGING__LOG_FORMAT -# -log_format = [%%(asctime)s] {%%(filename)s:%%(lineno)d} %%(levelname)s - %%(message)s - -# Defines the format of log messages for simple logging configuration -# -# Variable: AIRFLOW__LOGGING__SIMPLE_LOG_FORMAT -# -simple_log_format = %%(asctime)s %%(levelname)s - %%(message)s - -# Where to send dag parser logs. If "file", logs are sent to log files defined by child_process_log_directory. -# -# Variable: AIRFLOW__LOGGING__DAG_PROCESSOR_LOG_TARGET -# -dag_processor_log_target = file - -# Format of Dag Processor Log line -# -# Variable: AIRFLOW__LOGGING__DAG_PROCESSOR_LOG_FORMAT -# -dag_processor_log_format = [%%(asctime)s] [SOURCE:DAG_PROCESSOR] {%%(filename)s:%%(lineno)d} %%(levelname)s - %%(message)s - -# Determines the formatter class used by Airflow for structuring its log messages -# The default formatter class is timezone-aware, which means that timestamps attached to log entries -# will be adjusted to reflect the local timezone of the Airflow instance -# -# Variable: AIRFLOW__LOGGING__LOG_FORMATTER_CLASS -# -log_formatter_class = airflow.utils.log.timezone_aware.TimezoneAware - -# An import path to a function to add adaptations of each secret added with -# ``airflow.utils.log.secrets_masker.mask_secret`` to be masked in log messages. The given function -# is expected to require a single parameter: the secret to be adapted. It may return a -# single adaptation of the secret or an iterable of adaptations to each be masked as secrets. -# The original secret will be masked as well as any adaptations returned. -# -# Example: secret_mask_adapter = urllib.parse.quote -# -# Variable: AIRFLOW__LOGGING__SECRET_MASK_ADAPTER -# -secret_mask_adapter = - -secret_mask_exception_args = False - -# Specify prefix pattern like mentioned below with stream handler ``TaskHandlerWithCustomFormatter`` -# -# Example: task_log_prefix_template = {{ti.dag_id}}-{{ti.task_id}}-{{execution_date}}-{{ti.try_number}} -# -# Variable: AIRFLOW__LOGGING__TASK_LOG_PREFIX_TEMPLATE -# -task_log_prefix_template = {{ ti.dag_id }}-{{ ti.task_id }}-{{ ti.run_id }} - -# Formatting for how airflow generates file names/paths for each task run. -# -# Variable: AIRFLOW__LOGGING__LOG_FILENAME_TEMPLATE -# -log_filename_template = dag_id={{ ti.dag_id }}/run_id={{ ti.run_id }}/task_id={{ ti.task_id }}/{%% if ti.map_index >= 0 %%}map_index={{ ti.map_index }}/{%% endif %%}attempt={{ try_number }}.log - -# Formatting for how airflow generates file names for log -# -# Variable: AIRFLOW__LOGGING__LOG_PROCESSOR_FILENAME_TEMPLATE -# -log_processor_filename_template = {{ filename }}.log - -# Full path of dag_processor_manager logfile. -# -# Variable: AIRFLOW__LOGGING__DAG_PROCESSOR_MANAGER_LOG_LOCATION -# -dag_processor_manager_log_location = /opt/airflow/logs/dag_processor_manager/dag_processor_manager.log - -# Whether DAG processor manager will write logs to stdout -# -# Variable: AIRFLOW__LOGGING__DAG_PROCESSOR_MANAGER_LOG_STDOUT -# -dag_processor_manager_log_stdout = False - -# Name of handler to read task instance logs. -# Defaults to use ``task`` handler. -# -# Variable: AIRFLOW__LOGGING__TASK_LOG_READER -# -task_log_reader = task - -# A comma\-separated list of third-party logger names that will be configured to print messages to -# consoles\. -# -# Example: extra_logger_names = connexion,sqlalchemy -# -# Variable: AIRFLOW__LOGGING__EXTRA_LOGGER_NAMES -# -extra_logger_names = - -# When you start an Airflow worker, Airflow starts a tiny web server -# subprocess to serve the workers local log files to the airflow main -# web server, who then builds pages and sends them to users. This defines -# the port on which the logs are served. It needs to be unused, and open -# visible from the main web server to connect into the workers. -# -# Variable: AIRFLOW__LOGGING__WORKER_LOG_SERVER_PORT -# -worker_log_server_port = 8793 - -# Port to serve logs from for triggerer. -# See ``[logging] worker_log_server_port`` description for more info. -# -# Variable: AIRFLOW__LOGGING__TRIGGER_LOG_SERVER_PORT -# -trigger_log_server_port = 8794 - -# We must parse timestamps to interleave logs between trigger and task. To do so, -# we need to parse timestamps in log files. In case your log format is non-standard, -# you may provide import path to callable which takes a string log line and returns -# the timestamp (datetime.datetime compatible). -# -# Example: interleave_timestamp_parser = path.to.my_func -# -# Variable: AIRFLOW__LOGGING__INTERLEAVE_TIMESTAMP_PARSER -# -# interleave_timestamp_parser = - -# Permissions in the form or of octal string as understood by chmod. The permissions are important -# when you use impersonation, when logs are written by a different user than airflow. The most secure -# way of configuring it in this case is to add both users to the same group and make it the default -# group of both users. Group-writeable logs are default in airflow, but you might decide that you are -# OK with having the logs other-writeable, in which case you should set it to ``0o777``. You might -# decide to add more security if you do not use impersonation and change it to ``0o755`` to make it -# only owner-writeable. You can also make it just readable only for owner by changing it to ``0o700`` -# if all the access (read/write) for your logs happens from the same user. -# -# Example: file_task_handler_new_folder_permissions = 0o775 -# -# Variable: AIRFLOW__LOGGING__FILE_TASK_HANDLER_NEW_FOLDER_PERMISSIONS -# -file_task_handler_new_folder_permissions = 0o775 - -# Permissions in the form or of octal string as understood by chmod. The permissions are important -# when you use impersonation, when logs are written by a different user than airflow. The most secure -# way of configuring it in this case is to add both users to the same group and make it the default -# group of both users. Group-writeable logs are default in airflow, but you might decide that you are -# OK with having the logs other-writeable, in which case you should set it to ``0o666``. You might -# decide to add more security if you do not use impersonation and change it to ``0o644`` to make it -# only owner-writeable. You can also make it just readable only for owner by changing it to ``0o600`` -# if all the access (read/write) for your logs happens from the same user. -# -# Example: file_task_handler_new_file_permissions = 0o664 -# -# Variable: AIRFLOW__LOGGING__FILE_TASK_HANDLER_NEW_FILE_PERMISSIONS -# -file_task_handler_new_file_permissions = 0o664 - -# By default Celery sends all logs into stderr. -# If enabled any previous logging handlers will get *removed*. -# With this option AirFlow will create new handlers -# and send low level logs like INFO and WARNING to stdout, -# while sending higher severity logs to stderr. -# -# Variable: AIRFLOW__LOGGING__CELERY_STDOUT_STDERR_SEPARATION -# -celery_stdout_stderr_separation = False - -# If enabled, Airflow may ship messages to task logs from outside the task run context, e.g. from -# the scheduler, executor, or callback execution context. This can help in circumstances such as -# when there's something blocking the execution of the task and ordinarily there may be no task -# logs at all. -# This is set to ``True`` by default. If you encounter issues with this feature -# (e.g. scheduler performance issues) it can be disabled. -# -# Variable: AIRFLOW__LOGGING__ENABLE_TASK_CONTEXT_LOGGER -# -enable_task_context_logger = True - -# A comma separated list of keywords related to errors whose presence should display the line in red -# color in UI -# -# Variable: AIRFLOW__LOGGING__COLOR_LOG_ERROR_KEYWORDS -# -color_log_error_keywords = error,exception - -# A comma separated list of keywords related to warning whose presence should display the line in yellow -# color in UI -# -# Variable: AIRFLOW__LOGGING__COLOR_LOG_WARNING_KEYWORDS -# -color_log_warning_keywords = warn - -[metrics] -# `StatsD `__ integration settings. - -# If true, ``[metrics] metrics_allow_list`` and ``[metrics] metrics_block_list`` will use -# regex pattern matching anywhere within the metric name instead of only prefix matching -# at the start of the name. -# -# Variable: AIRFLOW__METRICS__METRICS_USE_PATTERN_MATCH -# -metrics_use_pattern_match = False - -# Configure an allow list (comma separated string) to send only certain metrics. -# If ``[metrics] metrics_use_pattern_match`` is ``false``, match only the exact metric name prefix. -# If ``[metrics] metrics_use_pattern_match`` is ``true``, provide regex patterns to match. -# -# Example: metrics_allow_list = "scheduler,executor,dagrun,pool,triggerer,celery" or "^scheduler,^executor,heartbeat|timeout" -# -# Variable: AIRFLOW__METRICS__METRICS_ALLOW_LIST -# -metrics_allow_list = - -# Configure a block list (comma separated string) to block certain metrics from being emitted. -# If ``[metrics] metrics_allow_list`` and ``[metrics] metrics_block_list`` are both configured, -# ``[metrics] metrics_block_list`` is ignored. -# -# If ``[metrics] metrics_use_pattern_match`` is ``false``, match only the exact metric name prefix. -# -# If ``[metrics] metrics_use_pattern_match`` is ``true``, provide regex patterns to match. -# -# Example: metrics_block_list = "scheduler,executor,dagrun,pool,triggerer,celery" or "^scheduler,^executor,heartbeat|timeout" -# -# Variable: AIRFLOW__METRICS__METRICS_BLOCK_LIST -# -metrics_block_list = - -# Enables sending metrics to StatsD. -# -# Variable: AIRFLOW__METRICS__STATSD_ON -# -statsd_on = False - -# Specifies the host address where the StatsD daemon (or server) is running -# -# Variable: AIRFLOW__METRICS__STATSD_HOST -# -statsd_host = localhost - -# Specifies the port on which the StatsD daemon (or server) is listening to -# -# Variable: AIRFLOW__METRICS__STATSD_PORT -# -statsd_port = 8125 - -# Defines the namespace for all metrics sent from Airflow to StatsD -# -# Variable: AIRFLOW__METRICS__STATSD_PREFIX -# -statsd_prefix = airflow - -# A function that validate the StatsD stat name, apply changes to the stat name if necessary and return -# the transformed stat name. -# -# The function should have the following signature -# -# .. code-block:: python -# -# def func_name(stat_name: str) -> str: ... -# -# Variable: AIRFLOW__METRICS__STAT_NAME_HANDLER -# -stat_name_handler = - -# To enable datadog integration to send airflow metrics. -# -# Variable: AIRFLOW__METRICS__STATSD_DATADOG_ENABLED -# -statsd_datadog_enabled = False - -# List of datadog tags attached to all metrics(e.g: ``key1:value1,key2:value2``) -# -# Variable: AIRFLOW__METRICS__STATSD_DATADOG_TAGS -# -statsd_datadog_tags = - -# Set to ``False`` to disable metadata tags for some of the emitted metrics -# -# Variable: AIRFLOW__METRICS__STATSD_DATADOG_METRICS_TAGS -# -statsd_datadog_metrics_tags = True - -# If you want to utilise your own custom StatsD client set the relevant -# module path below. -# Note: The module path must exist on your -# `PYTHONPATH ` -# for Airflow to pick it up -# -# Variable: AIRFLOW__METRICS__STATSD_CUSTOM_CLIENT_PATH -# -# statsd_custom_client_path = - -# If you want to avoid sending all the available metrics tags to StatsD, -# you can configure a block list of prefixes (comma separated) to filter out metric tags -# that start with the elements of the list (e.g: ``job_id,run_id``) -# -# Example: statsd_disabled_tags = job_id,run_id,dag_id,task_id -# -# Variable: AIRFLOW__METRICS__STATSD_DISABLED_TAGS -# -statsd_disabled_tags = job_id,run_id - -# To enable sending Airflow metrics with StatsD-Influxdb tagging convention. -# -# Variable: AIRFLOW__METRICS__STATSD_INFLUXDB_ENABLED -# -statsd_influxdb_enabled = False - -# Enables sending metrics to OpenTelemetry. -# -# Variable: AIRFLOW__METRICS__OTEL_ON -# -otel_on = False - -# Specifies the hostname or IP address of the OpenTelemetry Collector to which Airflow sends -# metrics and traces. -# -# Variable: AIRFLOW__METRICS__OTEL_HOST -# -otel_host = localhost - -# Specifies the port of the OpenTelemetry Collector that is listening to. -# -# Variable: AIRFLOW__METRICS__OTEL_PORT -# -otel_port = 8889 - -# The prefix for the Airflow metrics. -# -# Variable: AIRFLOW__METRICS__OTEL_PREFIX -# -otel_prefix = airflow - -# Defines the interval, in milliseconds, at which Airflow sends batches of metrics and traces -# to the configured OpenTelemetry Collector. -# -# Variable: AIRFLOW__METRICS__OTEL_INTERVAL_MILLISECONDS -# -otel_interval_milliseconds = 60000 - -# If ``True``, all metrics are also emitted to the console. Defaults to ``False``. -# -# Variable: AIRFLOW__METRICS__OTEL_DEBUGGING_ON -# -otel_debugging_on = False - -# The default service name of traces. -# -# Variable: AIRFLOW__METRICS__OTEL_SERVICE -# -otel_service = Airflow - -# If ``True``, SSL will be enabled. Defaults to ``False``. -# To establish an HTTPS connection to the OpenTelemetry collector, -# you need to configure the SSL certificate and key within the OpenTelemetry collector's -# ``config.yml`` file. -# -# Variable: AIRFLOW__METRICS__OTEL_SSL_ACTIVE -# -otel_ssl_active = False - -[traces] -# Distributed traces integration settings. - -# Enables sending traces to OpenTelemetry. -# -# Variable: AIRFLOW__TRACES__OTEL_ON -# -otel_on = False - -# Specifies the hostname or IP address of the OpenTelemetry Collector to which Airflow sends -# traces. -# -# Variable: AIRFLOW__TRACES__OTEL_HOST -# -otel_host = localhost - -# Specifies the port of the OpenTelemetry Collector that is listening to. -# -# Variable: AIRFLOW__TRACES__OTEL_PORT -# -otel_port = 8889 - -# The default service name of traces. -# -# Variable: AIRFLOW__TRACES__OTEL_SERVICE -# -otel_service = Airflow - -# If True, all traces are also emitted to the console. Defaults to False. -# -# Variable: AIRFLOW__TRACES__OTEL_DEBUGGING_ON -# -otel_debugging_on = False - -# If True, SSL will be enabled. Defaults to False. -# To establish an HTTPS connection to the OpenTelemetry collector, -# you need to configure the SSL certificate and key within the OpenTelemetry collector's -# config.yml file. -# -# Variable: AIRFLOW__TRACES__OTEL_SSL_ACTIVE -# -otel_ssl_active = False - -# If True, after the task is complete, the full task log messages will be added as the -# span events, chunked by 64k size. defaults to False. -# -# Variable: AIRFLOW__TRACES__OTEL_TASK_LOG_EVENT -# -otel_task_log_event = False - -[secrets] -# Full class name of secrets backend to enable (will precede env vars and metastore in search path) -# -# Example: backend = airflow.providers.amazon.aws.secrets.systems_manager.SystemsManagerParameterStoreBackend -# -# Variable: AIRFLOW__SECRETS__BACKEND -# -backend = - -# The backend_kwargs param is loaded into a dictionary and passed to ``__init__`` -# of secrets backend class. See documentation for the secrets backend you are using. -# JSON is expected. -# -# Example for AWS Systems Manager ParameterStore: -# ``{"connections_prefix": "/airflow/connections", "profile_name": "default"}`` -# -# Variable: AIRFLOW__SECRETS__BACKEND_KWARGS -# -backend_kwargs = - -# .. note:: |experimental| -# -# Enables local caching of Variables, when parsing DAGs only. -# Using this option can make dag parsing faster if Variables are used in top level code, at the expense -# of longer propagation time for changes. -# Please note that this cache concerns only the DAG parsing step. There is no caching in place when DAG -# tasks are run. -# -# Variable: AIRFLOW__SECRETS__USE_CACHE -# -use_cache = False - -# .. note:: |experimental| -# -# When the cache is enabled, this is the duration for which we consider an entry in the cache to be -# valid. Entries are refreshed if they are older than this many seconds. -# It means that when the cache is enabled, this is the maximum amount of time you need to wait to see a -# Variable change take effect. -# -# Variable: AIRFLOW__SECRETS__CACHE_TTL_SECONDS -# -cache_ttl_seconds = 900 - -[cli] -# In what way should the cli access the API. The LocalClient will use the -# database directly, while the json_client will use the api running on the -# webserver -# -# Variable: AIRFLOW__CLI__API_CLIENT -# -api_client = airflow.api.client.local_client - -# If you set web_server_url_prefix, do NOT forget to append it here, ex: -# ``endpoint_url = http://localhost:8080/myroot`` -# So api will look like: ``http://localhost:8080/myroot/api/experimental/...`` -# -# Variable: AIRFLOW__CLI__ENDPOINT_URL -# -endpoint_url = http://localhost:8080 - -[debug] -# Used only with ``DebugExecutor``. If set to ``True`` DAG will fail with first -# failed task. Helpful for debugging purposes. -# -# Variable: AIRFLOW__DEBUG__FAIL_FAST -# -fail_fast = False - -[api] -# Enables the deprecated experimental API. Please note that these API endpoints do not have -# access control. An authenticated user has full access. -# -# .. warning:: -# -# This `Experimental REST API -# `__ is -# deprecated since version 2.0. Please consider using -# `the Stable REST API -# `__. -# For more information on migration, see -# `RELEASE_NOTES.rst `_ -# -# Variable: AIRFLOW__API__ENABLE_EXPERIMENTAL_API -# -enable_experimental_api = False - -# Comma separated list of auth backends to authenticate users of the API. See -# `Security: API -# `__ for possible values. -# ("airflow.api.auth.backend.default" allows all requests for historic reasons) -# -# Variable: AIRFLOW__API__AUTH_BACKENDS -# -auth_backends = airflow.api.auth.backend.session - -# Used to set the maximum page limit for API requests. If limit passed as param -# is greater than maximum page limit, it will be ignored and maximum page limit value -# will be set as the limit -# -# Variable: AIRFLOW__API__MAXIMUM_PAGE_LIMIT -# -maximum_page_limit = 100 - -# Used to set the default page limit when limit param is zero or not provided in API -# requests. Otherwise if positive integer is passed in the API requests as limit, the -# smallest number of user given limit or maximum page limit is taken as limit. -# -# Variable: AIRFLOW__API__FALLBACK_PAGE_LIMIT -# -fallback_page_limit = 100 - -# The intended audience for JWT token credentials used for authorization. This value must match on the client and server sides. If empty, audience will not be tested. -# -# Example: google_oauth2_audience = project-id-random-value.apps.googleusercontent.com -# -# Variable: AIRFLOW__API__GOOGLE_OAUTH2_AUDIENCE -# -google_oauth2_audience = - -# Path to Google Cloud Service Account key file (JSON). If omitted, authorization based on -# `the Application Default Credentials -# `__ will -# be used. -# -# Example: google_key_path = /files/service-account-json -# -# Variable: AIRFLOW__API__GOOGLE_KEY_PATH -# -google_key_path = - -# Used in response to a preflight request to indicate which HTTP -# headers can be used when making the actual request. This header is -# the server side response to the browser's -# Access-Control-Request-Headers header. -# -# Variable: AIRFLOW__API__ACCESS_CONTROL_ALLOW_HEADERS -# -access_control_allow_headers = - -# Specifies the method or methods allowed when accessing the resource. -# -# Variable: AIRFLOW__API__ACCESS_CONTROL_ALLOW_METHODS -# -access_control_allow_methods = - -# Indicates whether the response can be shared with requesting code from the given origins. -# Separate URLs with space. -# -# Variable: AIRFLOW__API__ACCESS_CONTROL_ALLOW_ORIGINS -# -access_control_allow_origins = - -# Indicates whether the **xcomEntries** endpoint supports the **deserialize** -# flag. If set to ``False``, setting this flag in a request would result in a -# 400 Bad Request error. -# -# Variable: AIRFLOW__API__ENABLE_XCOM_DESERIALIZE_SUPPORT -# -enable_xcom_deserialize_support = False - -[lineage] -# what lineage backend to use -# -# Variable: AIRFLOW__LINEAGE__BACKEND -# -backend = - -[operators] -# The default owner assigned to each new operator, unless -# provided explicitly or passed via ``default_args`` -# -# Variable: AIRFLOW__OPERATORS__DEFAULT_OWNER -# -default_owner = airflow - -# The default value of attribute "deferrable" in operators and sensors. -# -# Variable: AIRFLOW__OPERATORS__DEFAULT_DEFERRABLE -# -default_deferrable = false - -# Indicates the default number of CPU units allocated to each operator when no specific CPU request -# is specified in the operator's configuration -# -# Variable: AIRFLOW__OPERATORS__DEFAULT_CPUS -# -default_cpus = 1 - -# Indicates the default number of RAM allocated to each operator when no specific RAM request -# is specified in the operator's configuration -# -# Variable: AIRFLOW__OPERATORS__DEFAULT_RAM -# -default_ram = 512 - -# Indicates the default number of disk storage allocated to each operator when no specific disk request -# is specified in the operator's configuration -# -# Variable: AIRFLOW__OPERATORS__DEFAULT_DISK -# -default_disk = 512 - -# Indicates the default number of GPUs allocated to each operator when no specific GPUs request -# is specified in the operator's configuration -# -# Variable: AIRFLOW__OPERATORS__DEFAULT_GPUS -# -default_gpus = 0 - -# Default queue that tasks get assigned to and that worker listen on. -# -# Variable: AIRFLOW__OPERATORS__DEFAULT_QUEUE -# -default_queue = default - -# Is allowed to pass additional/unused arguments (args, kwargs) to the BaseOperator operator. -# If set to ``False``, an exception will be thrown, -# otherwise only the console message will be displayed. -# -# Variable: AIRFLOW__OPERATORS__ALLOW_ILLEGAL_ARGUMENTS -# -allow_illegal_arguments = False - -[webserver] -# The message displayed when a user attempts to execute actions beyond their authorised privileges. -# -# Variable: AIRFLOW__WEBSERVER__ACCESS_DENIED_MESSAGE -# -access_denied_message = Access is Denied - -# Path of webserver config file used for configuring the webserver parameters -# -# Variable: AIRFLOW__WEBSERVER__CONFIG_FILE -# -config_file = /opt/airflow/webserver_config.py - -# The base url of your website: Airflow cannot guess what domain or CNAME you are using. -# This is used to create links in the Log Url column in the Browse - Task Instances menu, -# as well as in any automated emails sent by Airflow that contain links to your webserver. -# -# Variable: AIRFLOW__WEBSERVER__BASE_URL -# -base_url = http://localhost:8080 - -# Default timezone to display all dates in the UI, can be UTC, system, or -# any IANA timezone string (e.g. **Europe/Amsterdam**). If left empty the -# default value of core/default_timezone will be used -# -# Example: default_ui_timezone = America/New_York -# -# Variable: AIRFLOW__WEBSERVER__DEFAULT_UI_TIMEZONE -# -default_ui_timezone = UTC - -# The ip specified when starting the web server -# -# Variable: AIRFLOW__WEBSERVER__WEB_SERVER_HOST -# -web_server_host = 0.0.0.0 - -# The port on which to run the web server -# -# Variable: AIRFLOW__WEBSERVER__WEB_SERVER_PORT -# -web_server_port = 8080 - -# Paths to the SSL certificate and key for the web server. When both are -# provided SSL will be enabled. This does not change the web server port. -# -# Variable: AIRFLOW__WEBSERVER__WEB_SERVER_SSL_CERT -# -web_server_ssl_cert = - -# Paths to the SSL certificate and key for the web server. When both are -# provided SSL will be enabled. This does not change the web server port. -# -# Variable: AIRFLOW__WEBSERVER__WEB_SERVER_SSL_KEY -# -web_server_ssl_key = - -# The type of backend used to store web session data, can be ``database`` or ``securecookie``. For the -# ``database`` backend, sessions are store in the database and they can be -# managed there (for example when you reset password of the user, all sessions for that user are -# deleted). For the ``securecookie`` backend, sessions are stored in encrypted cookies on the client -# side. The ``securecookie`` mechanism is 'lighter' than database backend, but sessions are not deleted -# when you reset password of the user, which means that other than waiting for expiry time, the only -# way to invalidate all sessions for a user is to change secret_key and restart webserver (which -# also invalidates and logs out all other user's sessions). -# -# When you are using ``database`` backend, make sure to keep your database session table small -# by periodically running ``airflow db clean --table session`` command, especially if you have -# automated API calls that will create a new session for each call rather than reuse the sessions -# stored in browser cookies. -# -# Example: session_backend = securecookie -# -# Variable: AIRFLOW__WEBSERVER__SESSION_BACKEND -# -session_backend = database - -# Number of seconds the webserver waits before killing gunicorn master that doesn't respond -# -# Variable: AIRFLOW__WEBSERVER__WEB_SERVER_MASTER_TIMEOUT -# -web_server_master_timeout = 120 - -# Number of seconds the gunicorn webserver waits before timing out on a worker -# -# Variable: AIRFLOW__WEBSERVER__WEB_SERVER_WORKER_TIMEOUT -# -web_server_worker_timeout = 120 - -# Number of workers to refresh at a time. When set to 0, worker refresh is -# disabled. When nonzero, airflow periodically refreshes webserver workers by -# bringing up new ones and killing old ones. -# -# Variable: AIRFLOW__WEBSERVER__WORKER_REFRESH_BATCH_SIZE -# -worker_refresh_batch_size = 1 - -# Number of seconds to wait before refreshing a batch of workers. -# -# Variable: AIRFLOW__WEBSERVER__WORKER_REFRESH_INTERVAL -# -worker_refresh_interval = 6000 - -# If set to ``True``, Airflow will track files in plugins_folder directory. When it detects changes, -# then reload the gunicorn. If set to ``True``, gunicorn starts without preloading, which is slower, -# uses more memory, and may cause race conditions. Avoid setting this to ``True`` in production. -# -# Variable: AIRFLOW__WEBSERVER__RELOAD_ON_PLUGIN_CHANGE -# -reload_on_plugin_change = False - -# Secret key used to run your flask app. It should be as random as possible. However, when running -# more than 1 instances of webserver, make sure all of them use the same ``secret_key`` otherwise -# one of them will error with "CSRF session token is missing". -# The webserver key is also used to authorize requests to Celery workers when logs are retrieved. -# The token generated using the secret key has a short expiry time though - make sure that time on -# ALL the machines that you run airflow components on is synchronized (for example using ntpd) -# otherwise you might get "forbidden" errors when the logs are accessed. -# -# Variable: AIRFLOW__WEBSERVER__SECRET_KEY -# -secret_key = tCnTbEabdFBDLHWoT/LxLw== - -# Number of workers to run the Gunicorn web server -# -# Variable: AIRFLOW__WEBSERVER__WORKERS -# -workers = 1 - -# The worker class gunicorn should use. Choices include -# ``sync`` (default), ``eventlet``, ``gevent``. -# -# .. warning:: -# -# When using ``gevent`` you might also want to set the ``_AIRFLOW_PATCH_GEVENT`` -# environment variable to ``"1"`` to make sure gevent patching is done as early as possible. -# -# Be careful to set ``_AIRFLOW_PATCH_GEVENT`` only on the web server as gevent patching may -# affect the scheduler behavior via the ``multiprocessing`` sockets module and cause crash. -# -# See related Issues / PRs for more details: -# -# * https://github.com/benoitc/gunicorn/issues/2796 -# * https://github.com/apache/airflow/issues/8212 -# * https://github.com/apache/airflow/pull/28283 -# -# Variable: AIRFLOW__WEBSERVER__WORKER_CLASS -# -worker_class = gevent - -# Log files for the gunicorn webserver. '-' means log to stderr. -# -# Variable: AIRFLOW__WEBSERVER__ACCESS_LOGFILE -# -access_logfile = - - -# Log files for the gunicorn webserver. '-' means log to stderr. -# -# Variable: AIRFLOW__WEBSERVER__ERROR_LOGFILE -# -error_logfile = - - -# Access log format for gunicorn webserver. -# default format is ``%%(h)s %%(l)s %%(u)s %%(t)s "%%(r)s" %%(s)s %%(b)s "%%(f)s" "%%(a)s"`` -# See `Gunicorn Settings: 'access_log_format' Reference -# `__ for more details -# -# Variable: AIRFLOW__WEBSERVER__ACCESS_LOGFORMAT -# -access_logformat = - -# Expose the configuration file in the web server. Set to ``non-sensitive-only`` to show all values -# except those that have security implications. ``True`` shows all values. ``False`` hides the -# configuration completely. -# -# Variable: AIRFLOW__WEBSERVER__EXPOSE_CONFIG -# -expose_config = False - -# Expose hostname in the web server -# -# Variable: AIRFLOW__WEBSERVER__EXPOSE_HOSTNAME -# -expose_hostname = False - -# Expose stacktrace in the web server -# -# Variable: AIRFLOW__WEBSERVER__EXPOSE_STACKTRACE -# -expose_stacktrace = False - -# Default DAG view. Valid values are: ``grid``, ``graph``, ``duration``, ``gantt``, ``landing_times`` -# -# Variable: AIRFLOW__WEBSERVER__DAG_DEFAULT_VIEW -# -dag_default_view = grid - -# Default DAG orientation. Valid values are: -# ``LR`` (Left->Right), ``TB`` (Top->Bottom), ``RL`` (Right->Left), ``BT`` (Bottom->Top) -# -# Variable: AIRFLOW__WEBSERVER__DAG_ORIENTATION -# -dag_orientation = LR - -# Sorting order in grid view. Valid values are: ``topological``, ``hierarchical_alphabetical`` -# -# Variable: AIRFLOW__WEBSERVER__GRID_VIEW_SORTING_ORDER -# -grid_view_sorting_order = topological - -# The amount of time (in secs) webserver will wait for initial handshake -# while fetching logs from other worker machine -# -# Variable: AIRFLOW__WEBSERVER__LOG_FETCH_TIMEOUT_SEC -# -log_fetch_timeout_sec = 10 - -# Time interval (in secs) to wait before next log fetching. -# -# Variable: AIRFLOW__WEBSERVER__LOG_FETCH_DELAY_SEC -# -log_fetch_delay_sec = 5 - -# Distance away from page bottom to enable auto tailing. -# -# Variable: AIRFLOW__WEBSERVER__LOG_AUTO_TAILING_OFFSET -# -log_auto_tailing_offset = 30 - -# Animation speed for auto tailing log display. -# -# Variable: AIRFLOW__WEBSERVER__LOG_ANIMATION_SPEED -# -log_animation_speed = 1000 - -# By default, the webserver shows paused DAGs. Flip this to hide paused -# DAGs by default -# -# Variable: AIRFLOW__WEBSERVER__HIDE_PAUSED_DAGS_BY_DEFAULT -# -hide_paused_dags_by_default = False - -# Consistent page size across all listing views in the UI -# -# Variable: AIRFLOW__WEBSERVER__PAGE_SIZE -# -page_size = 100 - -# Define the color of navigation bar -# -# Variable: AIRFLOW__WEBSERVER__NAVBAR_COLOR -# -navbar_color = #fff - -# Define the color of text in the navigation bar -# -# Variable: AIRFLOW__WEBSERVER__NAVBAR_TEXT_COLOR -# -navbar_text_color = #51504f - -# Define the color of navigation bar links when hovered -# -# Variable: AIRFLOW__WEBSERVER__NAVBAR_HOVER_COLOR -# -navbar_hover_color = #eee - -# Define the color of text in the navigation bar when hovered -# -# Variable: AIRFLOW__WEBSERVER__NAVBAR_TEXT_HOVER_COLOR -# -navbar_text_hover_color = #51504f - -# Define the color of the logo text -# -# Variable: AIRFLOW__WEBSERVER__NAVBAR_LOGO_TEXT_COLOR -# -navbar_logo_text_color = #51504f - -# Default dagrun to show in UI -# -# Variable: AIRFLOW__WEBSERVER__DEFAULT_DAG_RUN_DISPLAY_NUMBER -# -default_dag_run_display_number = 25 - -# Enable werkzeug ``ProxyFix`` middleware for reverse proxy -# -# Variable: AIRFLOW__WEBSERVER__ENABLE_PROXY_FIX -# -enable_proxy_fix = True - -# Number of values to trust for ``X-Forwarded-For``. -# See `Werkzeug: X-Forwarded-For Proxy Fix -# `__ for more details. -# -# Variable: AIRFLOW__WEBSERVER__PROXY_FIX_X_FOR -# -proxy_fix_x_for = 1 - -# Number of values to trust for ``X-Forwarded-Proto``. -# See `Werkzeug: X-Forwarded-For Proxy Fix -# `__ for more details. -# -# Variable: AIRFLOW__WEBSERVER__PROXY_FIX_X_PROTO -# -proxy_fix_x_proto = 1 - -# Number of values to trust for ``X-Forwarded-Host``. -# See `Werkzeug: X-Forwarded-For Proxy Fix -# `__ for more details. -# -# Variable: AIRFLOW__WEBSERVER__PROXY_FIX_X_HOST -# -proxy_fix_x_host = 1 - -# Number of values to trust for ``X-Forwarded-Port``. -# See `Werkzeug: X-Forwarded-For Proxy Fix -# `__ for more details. -# -# Variable: AIRFLOW__WEBSERVER__PROXY_FIX_X_PORT -# -proxy_fix_x_port = 1 - -# Number of values to trust for ``X-Forwarded-Prefix``. -# See `Werkzeug: X-Forwarded-For Proxy Fix -# `__ for more details. -# -# Variable: AIRFLOW__WEBSERVER__PROXY_FIX_X_PREFIX -# -proxy_fix_x_prefix = 1 - -# Set secure flag on session cookie -# -# Variable: AIRFLOW__WEBSERVER__COOKIE_SECURE -# -cookie_secure = False - -# Set samesite policy on session cookie -# -# Variable: AIRFLOW__WEBSERVER__COOKIE_SAMESITE -# -cookie_samesite = Lax - -# Default setting for wrap toggle on DAG code and TI log views. -# -# Variable: AIRFLOW__WEBSERVER__DEFAULT_WRAP -# -default_wrap = False - -# Allow the UI to be rendered in a frame -# -# Variable: AIRFLOW__WEBSERVER__X_FRAME_ENABLED -# -x_frame_enabled = True - -# Send anonymous user activity to your analytics tool -# choose from ``google_analytics``, ``segment``, ``metarouter``, or ``matomo`` -# -# Variable: AIRFLOW__WEBSERVER__ANALYTICS_TOOL -# -# analytics_tool = - -# Unique ID of your account in the analytics tool -# -# Variable: AIRFLOW__WEBSERVER__ANALYTICS_ID -# -# analytics_id = - -# Your instances url, only applicable to Matomo. -# -# Example: analytics_url = https://your.matomo.instance.com/ -# -# Variable: AIRFLOW__WEBSERVER__ANALYTICS_URL -# -# analytics_url = - -# 'Recent Tasks' stats will show for old DagRuns if set -# -# Variable: AIRFLOW__WEBSERVER__SHOW_RECENT_STATS_FOR_COMPLETED_RUNS -# -show_recent_stats_for_completed_runs = True - -# The UI cookie lifetime in minutes. User will be logged out from UI after -# ``[webserver] session_lifetime_minutes`` of non-activity -# -# Variable: AIRFLOW__WEBSERVER__SESSION_LIFETIME_MINUTES -# -session_lifetime_minutes = 43200 - -# Sets a custom page title for the DAGs overview page and site title for all pages -# -# Variable: AIRFLOW__WEBSERVER__INSTANCE_NAME -# -# instance_name = - -# Whether the custom page title for the DAGs overview page contains any Markup language -# -# Variable: AIRFLOW__WEBSERVER__INSTANCE_NAME_HAS_MARKUP -# -instance_name_has_markup = False - -# How frequently, in seconds, the DAG data will auto-refresh in graph or grid view -# when auto-refresh is turned on -# -# Variable: AIRFLOW__WEBSERVER__AUTO_REFRESH_INTERVAL -# -auto_refresh_interval = 3 - -# Boolean for displaying warning for publicly viewable deployment -# -# Variable: AIRFLOW__WEBSERVER__WARN_DEPLOYMENT_EXPOSURE -# -warn_deployment_exposure = True - -# Comma separated string of view events to exclude from dag audit view. -# All other events will be added minus the ones passed here. -# The audit logs in the db will not be affected by this parameter. -# -# Example: audit_view_excluded_events = cli_task_run,running,success -# -# Variable: AIRFLOW__WEBSERVER__AUDIT_VIEW_EXCLUDED_EVENTS -# -# audit_view_excluded_events = - -# Comma separated string of view events to include in dag audit view. -# If passed, only these events will populate the dag audit view. -# The audit logs in the db will not be affected by this parameter. -# -# Example: audit_view_included_events = dagrun_cleared,failed -# -# Variable: AIRFLOW__WEBSERVER__AUDIT_VIEW_INCLUDED_EVENTS -# -# audit_view_included_events = - -# Boolean for running SwaggerUI in the webserver. -# -# Variable: AIRFLOW__WEBSERVER__ENABLE_SWAGGER_UI -# -enable_swagger_ui = True - -# Boolean for running Internal API in the webserver. -# -# Variable: AIRFLOW__WEBSERVER__RUN_INTERNAL_API -# -run_internal_api = False - -# The caching algorithm used by the webserver. Must be a valid hashlib function name. -# -# Example: caching_hash_method = sha256 -# -# Variable: AIRFLOW__WEBSERVER__CACHING_HASH_METHOD -# -caching_hash_method = md5 - -# Behavior of the trigger DAG run button for DAGs without params. ``False`` to skip and trigger -# without displaying a form to add a **dag_run.conf**, ``True`` to always display the form. -# The form is displayed always if parameters are defined. -# -# Variable: AIRFLOW__WEBSERVER__SHOW_TRIGGER_FORM_IF_NO_PARAMS -# -show_trigger_form_if_no_params = False - -# Number of recent DAG run configurations in the selector on the trigger web form. -# -# Example: num_recent_configurations_for_trigger = 10 -# -# Variable: AIRFLOW__WEBSERVER__NUM_RECENT_CONFIGURATIONS_FOR_TRIGGER -# -num_recent_configurations_for_trigger = 5 - -# A DAG author is able to provide any raw HTML into ``doc_md`` or params description in -# ``description_md`` for text formatting. This is including potentially unsafe javascript. -# Displaying the DAG or trigger form in web UI provides the DAG author the potential to -# inject malicious code into clients browsers. To ensure the web UI is safe by default, -# raw HTML is disabled by default. If you trust your DAG authors, you can enable HTML -# support in markdown by setting this option to ``True``. -# -# This parameter also enables the deprecated fields ``description_html`` and -# ``custom_html_form`` in DAG params until the feature is removed in a future version. -# -# Example: allow_raw_html_descriptions = False -# -# Variable: AIRFLOW__WEBSERVER__ALLOW_RAW_HTML_DESCRIPTIONS -# -allow_raw_html_descriptions = False - -# The maximum size of the request payload (in MB) that can be sent. -# -# Variable: AIRFLOW__WEBSERVER__ALLOWED_PAYLOAD_SIZE -# -allowed_payload_size = 1.0 - -# Require confirmation when changing a DAG in the web UI. This is to prevent accidental changes -# to a DAG that may be running on sensitive environments like production. -# When set to ``True``, confirmation dialog will be shown when a user tries to Pause/Unpause, -# Trigger a DAG -# -# Variable: AIRFLOW__WEBSERVER__REQUIRE_CONFIRMATION_DAG_CHANGE -# -require_confirmation_dag_change = False - -# The maximum size in bytes any non-file form field may be in a multipart/form-data body. -# If this limit is exceeded, a 413 RequestEntityTooLarge error is raised by webserver. -# -# Variable: AIRFLOW__WEBSERVER__MAX_FORM_MEMORY_SIZE -# -max_form_memory_size = 500000 - -# The maximum number of fields that may be present in a multipart/form-data body. -# If this limit is exceeded, a 413 RequestEntityTooLarge error is raised by webserver. -# -# Variable: AIRFLOW__WEBSERVER__MAX_FORM_PARTS -# -max_form_parts = 1000 - -[email] -# Configuration email backend and whether to -# send email alerts on retry or failure - -# Email backend to use -# -# Variable: AIRFLOW__EMAIL__EMAIL_BACKEND -# -email_backend = airflow.utils.email.send_email_smtp - -# Email connection to use -# -# Variable: AIRFLOW__EMAIL__EMAIL_CONN_ID -# -email_conn_id = smtp_default - -# Whether email alerts should be sent when a task is retried -# -# Variable: AIRFLOW__EMAIL__DEFAULT_EMAIL_ON_RETRY -# -default_email_on_retry = True - -# Whether email alerts should be sent when a task failed -# -# Variable: AIRFLOW__EMAIL__DEFAULT_EMAIL_ON_FAILURE -# -default_email_on_failure = True - -# File that will be used as the template for Email subject (which will be rendered using Jinja2). -# If not set, Airflow uses a base template. -# -# Example: subject_template = /path/to/my_subject_template_file -# -# Variable: AIRFLOW__EMAIL__SUBJECT_TEMPLATE -# -# subject_template = - -# File that will be used as the template for Email content (which will be rendered using Jinja2). -# If not set, Airflow uses a base template. -# -# Example: html_content_template = /path/to/my_html_content_template_file -# -# Variable: AIRFLOW__EMAIL__HTML_CONTENT_TEMPLATE -# -# html_content_template = - -# Email address that will be used as sender address. -# It can either be raw email or the complete address in a format ``Sender Name `` -# -# Example: from_email = Airflow -# -# Variable: AIRFLOW__EMAIL__FROM_EMAIL -# -# from_email = - -# ssl context to use when using SMTP and IMAP SSL connections. By default, the context is "default" -# which sets it to ``ssl.create_default_context()`` which provides the right balance between -# compatibility and security, it however requires that certificates in your operating system are -# updated and that SMTP/IMAP servers of yours have valid certificates that have corresponding public -# keys installed on your machines. You can switch it to "none" if you want to disable checking -# of the certificates, but it is not recommended as it allows MITM (man-in-the-middle) attacks -# if your infrastructure is not sufficiently secured. It should only be set temporarily while you -# are fixing your certificate configuration. This can be typically done by upgrading to newer -# version of the operating system you run Airflow components on,by upgrading/refreshing proper -# certificates in the OS or by updating certificates for your mail servers. -# -# Example: ssl_context = default -# -# Variable: AIRFLOW__EMAIL__SSL_CONTEXT -# -ssl_context = default - -[smtp] -# If you want airflow to send emails on retries, failure, and you want to use -# the airflow.utils.email.send_email_smtp function, you have to configure an -# smtp server here - -# Specifies the host server address used by Airflow when sending out email notifications via SMTP. -# -# Variable: AIRFLOW__SMTP__SMTP_HOST -# -smtp_host = localhost - -# Determines whether to use the STARTTLS command when connecting to the SMTP server. -# -# Variable: AIRFLOW__SMTP__SMTP_STARTTLS -# -smtp_starttls = True - -# Determines whether to use an SSL connection when talking to the SMTP server. -# -# Variable: AIRFLOW__SMTP__SMTP_SSL -# -smtp_ssl = False - -# Username to authenticate when connecting to smtp server. -# -# Example: smtp_user = airflow -# -# Variable: AIRFLOW__SMTP__SMTP_USER -# -# smtp_user = - -# Password to authenticate when connecting to smtp server. -# -# Example: smtp_password = airflow -# -# Variable: AIRFLOW__SMTP__SMTP_PASSWORD -# -# smtp_password = - -# Defines the port number on which Airflow connects to the SMTP server to send email notifications. -# -# Variable: AIRFLOW__SMTP__SMTP_PORT -# -smtp_port = 25 - -# Specifies the default **from** email address used when Airflow sends email notifications. -# -# Variable: AIRFLOW__SMTP__SMTP_MAIL_FROM -# -smtp_mail_from = airflow@example.com - -# Determines the maximum time (in seconds) the Apache Airflow system will wait for a -# connection to the SMTP server to be established. -# -# Variable: AIRFLOW__SMTP__SMTP_TIMEOUT -# -smtp_timeout = 30 - -# Defines the maximum number of times Airflow will attempt to connect to the SMTP server. -# -# Variable: AIRFLOW__SMTP__SMTP_RETRY_LIMIT -# -smtp_retry_limit = 5 - -[sentry] -# `Sentry `__ integration. Here you can supply -# additional configuration options based on the Python platform. -# See `Python / Configuration / Basic Options -# `__ for more details. -# Unsupported options: ``integrations``, ``in_app_include``, ``in_app_exclude``, -# ``ignore_errors``, ``before_breadcrumb``, ``transport``. - -# Enable error reporting to Sentry -# -# Variable: AIRFLOW__SENTRY__SENTRY_ON -# -sentry_on = false - -# -# Variable: AIRFLOW__SENTRY__SENTRY_DSN -# -sentry_dsn = - -# Dotted path to a before_send function that the sentry SDK should be configured to use. -# -# Variable: AIRFLOW__SENTRY__BEFORE_SEND -# -# before_send = - -[scheduler] -# Task instances listen for external kill signal (when you clear tasks -# from the CLI or the UI), this defines the frequency at which they should -# listen (in seconds). -# -# Variable: AIRFLOW__SCHEDULER__JOB_HEARTBEAT_SEC -# -job_heartbeat_sec = 5 - -# The scheduler constantly tries to trigger new tasks (look at the -# scheduler section in the docs for more information). This defines -# how often the scheduler should run (in seconds). -# -# Variable: AIRFLOW__SCHEDULER__SCHEDULER_HEARTBEAT_SEC -# -scheduler_heartbeat_sec = 5 - -# The frequency (in seconds) at which the LocalTaskJob should send heartbeat signals to the -# scheduler to notify it's still alive. If this value is set to 0, the heartbeat interval will default -# to the value of ``[scheduler] scheduler_zombie_task_threshold``. -# -# Variable: AIRFLOW__SCHEDULER__LOCAL_TASK_JOB_HEARTBEAT_SEC -# -local_task_job_heartbeat_sec = 0 - -# The number of times to try to schedule each DAG file -# -1 indicates unlimited number -# -# Variable: AIRFLOW__SCHEDULER__NUM_RUNS -# -num_runs = -1 - -# Controls how long the scheduler will sleep between loops, but if there was nothing to do -# in the loop. i.e. if it scheduled something then it will start the next loop -# iteration straight away. -# -# Variable: AIRFLOW__SCHEDULER__SCHEDULER_IDLE_SLEEP_TIME -# -scheduler_idle_sleep_time = 1 - -# Number of seconds after which a DAG file is parsed. The DAG file is parsed every -# ``[scheduler] min_file_process_interval`` number of seconds. Updates to DAGs are reflected after -# this interval. Keeping this number low will increase CPU usage. -# -# Variable: AIRFLOW__SCHEDULER__MIN_FILE_PROCESS_INTERVAL -# -min_file_process_interval = 60 - -# How often (in seconds) to check for stale DAGs (DAGs which are no longer present in -# the expected files) which should be deactivated, as well as datasets that are no longer -# referenced and should be marked as orphaned. -# -# Variable: AIRFLOW__SCHEDULER__PARSING_CLEANUP_INTERVAL -# -parsing_cleanup_interval = 60 - -# How long (in seconds) to wait after we have re-parsed a DAG file before deactivating stale -# DAGs (DAGs which are no longer present in the expected files). The reason why we need -# this threshold is to account for the time between when the file is parsed and when the -# DAG is loaded. The absolute maximum that this could take is ``[core] dag_file_processor_timeout``, -# but when you have a long timeout configured, it results in a significant delay in the -# deactivation of stale dags. -# -# Variable: AIRFLOW__SCHEDULER__STALE_DAG_THRESHOLD -# -stale_dag_threshold = 50 - -# How often (in seconds) to scan the DAGs directory for new files. Default to 5 minutes. -# -# Variable: AIRFLOW__SCHEDULER__DAG_DIR_LIST_INTERVAL -# -dag_dir_list_interval = 600 - -# How often should stats be printed to the logs. Setting to 0 will disable printing stats -# -# Variable: AIRFLOW__SCHEDULER__PRINT_STATS_INTERVAL -# -print_stats_interval = 30 - -# How often (in seconds) should pool usage stats be sent to StatsD (if statsd_on is enabled) -# -# Variable: AIRFLOW__SCHEDULER__POOL_METRICS_INTERVAL -# -pool_metrics_interval = 5.0 - -# If the last scheduler heartbeat happened more than ``[scheduler] scheduler_health_check_threshold`` -# ago (in seconds), scheduler is considered unhealthy. -# This is used by the health check in the **/health** endpoint and in ``airflow jobs check`` CLI -# for SchedulerJob. -# -# Variable: AIRFLOW__SCHEDULER__SCHEDULER_HEALTH_CHECK_THRESHOLD -# -scheduler_health_check_threshold = 30 - -# When you start a scheduler, airflow starts a tiny web server -# subprocess to serve a health check if this is set to ``True`` -# -# Variable: AIRFLOW__SCHEDULER__ENABLE_HEALTH_CHECK -# -enable_health_check = True - -# When you start a scheduler, airflow starts a tiny web server -# subprocess to serve a health check on this host -# -# Variable: AIRFLOW__SCHEDULER__SCHEDULER_HEALTH_CHECK_SERVER_HOST -# -scheduler_health_check_server_host = 0.0.0.0 - -# When you start a scheduler, airflow starts a tiny web server -# subprocess to serve a health check on this port -# -# Variable: AIRFLOW__SCHEDULER__SCHEDULER_HEALTH_CHECK_SERVER_PORT -# -scheduler_health_check_server_port = 8974 - -# How often (in seconds) should the scheduler check for orphaned tasks and SchedulerJobs -# -# Variable: AIRFLOW__SCHEDULER__ORPHANED_TASKS_CHECK_INTERVAL -# -orphaned_tasks_check_interval = 300.0 - -# Determines the directory where logs for the child processes of the scheduler will be stored -# -# Variable: AIRFLOW__SCHEDULER__CHILD_PROCESS_LOG_DIRECTORY -# -child_process_log_directory = /opt/airflow/logs/scheduler - -# Local task jobs periodically heartbeat to the DB. If the job has -# not heartbeat in this many seconds, the scheduler will mark the -# associated task instance as failed and will re-schedule the task. -# -# Variable: AIRFLOW__SCHEDULER__SCHEDULER_ZOMBIE_TASK_THRESHOLD -# -scheduler_zombie_task_threshold = 300 - -# How often (in seconds) should the scheduler check for zombie tasks. -# -# Variable: AIRFLOW__SCHEDULER__ZOMBIE_DETECTION_INTERVAL -# -zombie_detection_interval = 10.0 - -# Turn off scheduler catchup by setting this to ``False``. -# Default behavior is unchanged and -# Command Line Backfills still work, but the scheduler -# will not do scheduler catchup if this is ``False``, -# however it can be set on a per DAG basis in the -# DAG definition (catchup) -# -# Variable: AIRFLOW__SCHEDULER__CATCHUP_BY_DEFAULT -# -catchup_by_default = True - -# Setting this to ``True`` will make first task instance of a task -# ignore depends_on_past setting. A task instance will be considered -# as the first task instance of a task when there is no task instance -# in the DB with an execution_date earlier than it., i.e. no manual marking -# success will be needed for a newly added task to be scheduled. -# -# Variable: AIRFLOW__SCHEDULER__IGNORE_FIRST_DEPENDS_ON_PAST_BY_DEFAULT -# -ignore_first_depends_on_past_by_default = True - -# This changes the batch size of queries in the scheduling main loop. -# This should not be greater than ``[core] parallelism``. -# If this is too high, SQL query performance may be impacted by -# complexity of query predicate, and/or excessive locking. -# Additionally, you may hit the maximum allowable query length for your db. -# Set this to 0 to use the value of ``[core] parallelism`` -# -# Variable: AIRFLOW__SCHEDULER__MAX_TIS_PER_QUERY -# -max_tis_per_query = 16 - -# Should the scheduler issue ``SELECT ... FOR UPDATE`` in relevant queries. -# If this is set to ``False`` then you should not run more than a single -# scheduler at once -# -# Variable: AIRFLOW__SCHEDULER__USE_ROW_LEVEL_LOCKING -# -use_row_level_locking = True - -# Max number of DAGs to create DagRuns for per scheduler loop. -# -# Variable: AIRFLOW__SCHEDULER__MAX_DAGRUNS_TO_CREATE_PER_LOOP -# -max_dagruns_to_create_per_loop = 10 - -# How many DagRuns should a scheduler examine (and lock) when scheduling -# and queuing tasks. -# -# Variable: AIRFLOW__SCHEDULER__MAX_DAGRUNS_PER_LOOP_TO_SCHEDULE -# -max_dagruns_per_loop_to_schedule = 20 - -# Should the Task supervisor process perform a "mini scheduler" to attempt to schedule more tasks of the -# same DAG. Leaving this on will mean tasks in the same DAG execute quicker, but might starve out other -# dags in some circumstances -# -# Variable: AIRFLOW__SCHEDULER__SCHEDULE_AFTER_TASK_EXECUTION -# -schedule_after_task_execution = True - -# The scheduler reads dag files to extract the airflow modules that are going to be used, -# and imports them ahead of time to avoid having to re-do it for each parsing process. -# This flag can be set to ``False`` to disable this behavior in case an airflow module needs -# to be freshly imported each time (at the cost of increased DAG parsing time). -# -# Variable: AIRFLOW__SCHEDULER__PARSING_PRE_IMPORT_MODULES -# -parsing_pre_import_modules = True - -# The scheduler can run multiple processes in parallel to parse dags. -# This defines how many processes will run. -# -# Variable: AIRFLOW__SCHEDULER__PARSING_PROCESSES -# -parsing_processes = 2 - -# One of ``modified_time``, ``random_seeded_by_host`` and ``alphabetical``. -# The scheduler will list and sort the dag files to decide the parsing order. -# -# * ``modified_time``: Sort by modified time of the files. This is useful on large scale to parse the -# recently modified DAGs first. -# * ``random_seeded_by_host``: Sort randomly across multiple Schedulers but with same order on the -# same host. This is useful when running with Scheduler in HA mode where each scheduler can -# parse different DAG files. -# * ``alphabetical``: Sort by filename -# -# Variable: AIRFLOW__SCHEDULER__FILE_PARSING_SORT_MODE -# -file_parsing_sort_mode = modified_time - -# Whether the dag processor is running as a standalone process or it is a subprocess of a scheduler -# job. -# -# Variable: AIRFLOW__SCHEDULER__STANDALONE_DAG_PROCESSOR -# -standalone_dag_processor = False - -# Only applicable if ``[scheduler] standalone_dag_processor`` is true and callbacks are stored -# in database. Contains maximum number of callbacks that are fetched during a single loop. -# -# Variable: AIRFLOW__SCHEDULER__MAX_CALLBACKS_PER_LOOP -# -max_callbacks_per_loop = 20 - -# Only applicable if ``[scheduler] standalone_dag_processor`` is true. -# Time in seconds after which dags, which were not updated by Dag Processor are deactivated. -# -# Variable: AIRFLOW__SCHEDULER__DAG_STALE_NOT_SEEN_DURATION -# -dag_stale_not_seen_duration = 600 - -# Turn off scheduler use of cron intervals by setting this to ``False``. -# DAGs submitted manually in the web UI or with trigger_dag will still run. -# -# Variable: AIRFLOW__SCHEDULER__USE_JOB_SCHEDULE -# -use_job_schedule = True - -# Allow externally triggered DagRuns for Execution Dates in the future -# Only has effect if schedule_interval is set to None in DAG -# -# Variable: AIRFLOW__SCHEDULER__ALLOW_TRIGGER_IN_FUTURE -# -allow_trigger_in_future = False - -# How often to check for expired trigger requests that have not run yet. -# -# Variable: AIRFLOW__SCHEDULER__TRIGGER_TIMEOUT_CHECK_INTERVAL -# -trigger_timeout_check_interval = 15 - -# Amount of time a task can be in the queued state before being retried or set to failed. -# -# Variable: AIRFLOW__SCHEDULER__TASK_QUEUED_TIMEOUT -# -task_queued_timeout = 300.0 - -# How often to check for tasks that have been in the queued state for -# longer than ``[scheduler] task_queued_timeout``. -# -# Variable: AIRFLOW__SCHEDULER__TASK_QUEUED_TIMEOUT_CHECK_INTERVAL -# -task_queued_timeout_check_interval = 120.0 - -# The run_id pattern used to verify the validity of user input to the run_id parameter when -# triggering a DAG. This pattern cannot change the pattern used by scheduler to generate run_id -# for scheduled DAG runs or DAG runs triggered without changing the run_id parameter. -# -# Variable: AIRFLOW__SCHEDULER__ALLOWED_RUN_ID_PATTERN -# -allowed_run_id_pattern = ^[A-Za-z0-9_.~:+-]+$ - -# Whether to create DAG runs that span an interval or one single point in time for cron schedules, when -# a cron string is provided to ``schedule`` argument of a DAG. -# -# * ``True``: **CronDataIntervalTimetable** is used, which is suitable -# for DAGs with well-defined data interval. You get contiguous intervals from the end of the previous -# interval up to the scheduled datetime. -# * ``False``: **CronTriggerTimetable** is used, which is closer to the behavior of cron itself. -# -# Notably, for **CronTriggerTimetable**, the logical date is the same as the time the DAG Run will -# try to schedule, while for **CronDataIntervalTimetable**, the logical date is the beginning of -# the data interval, but the DAG Run will try to schedule at the end of the data interval. -# -# Variable: AIRFLOW__SCHEDULER__CREATE_CRON_DATA_INTERVALS -# -create_cron_data_intervals = True - -[triggerer] -# How many triggers a single Triggerer will run at once, by default. -# -# Variable: AIRFLOW__TRIGGERER__DEFAULT_CAPACITY -# -default_capacity = 1000 - -# How often to heartbeat the Triggerer job to ensure it hasn't been killed. -# -# Variable: AIRFLOW__TRIGGERER__JOB_HEARTBEAT_SEC -# -job_heartbeat_sec = 5 - -# If the last triggerer heartbeat happened more than ``[triggerer] triggerer_health_check_threshold`` -# ago (in seconds), triggerer is considered unhealthy. -# This is used by the health check in the **/health** endpoint and in ``airflow jobs check`` CLI -# for TriggererJob. -# -# Variable: AIRFLOW__TRIGGERER__TRIGGERER_HEALTH_CHECK_THRESHOLD -# -triggerer_health_check_threshold = 30 - -[kerberos] -# Location of your ccache file once kinit has been performed. -# -# Variable: AIRFLOW__KERBEROS__CCACHE -# -ccache = /tmp/airflow_krb5_ccache - -# gets augmented with fqdn -# -# Variable: AIRFLOW__KERBEROS__PRINCIPAL -# -principal = airflow - -# Determines the frequency at which initialization or re-initialization processes occur. -# -# Variable: AIRFLOW__KERBEROS__REINIT_FREQUENCY -# -reinit_frequency = 3600 - -# Path to the kinit executable -# -# Variable: AIRFLOW__KERBEROS__KINIT_PATH -# -kinit_path = kinit - -# Designates the path to the Kerberos keytab file for the Airflow user -# -# Variable: AIRFLOW__KERBEROS__KEYTAB -# -keytab = airflow.keytab - -# Allow to disable ticket forwardability. -# -# Variable: AIRFLOW__KERBEROS__FORWARDABLE -# -forwardable = True - -# Allow to remove source IP from token, useful when using token behind NATted Docker host. -# -# Variable: AIRFLOW__KERBEROS__INCLUDE_IP -# -include_ip = True - -[sensors] -# Sensor default timeout, 7 days by default (7 * 24 * 60 * 60). -# -# Variable: AIRFLOW__SENSORS__DEFAULT_TIMEOUT -# -default_timeout = 604800 - -[aws] -# This section contains settings for Amazon Web Services (AWS) integration. - -# session_factory = -cloudwatch_task_handler_json_serializer = airflow.providers.amazon.aws.log.cloudwatch_task_handler.json_serialize_legacy - -[aws_batch_executor] -# This section only applies if you are using the AwsBatchExecutor in -# Airflow's ``[core]`` configuration. -# For more information on any of these execution parameters, see the link below: -# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/batch.html#Batch.Client.submit_job -# For boto3 credential management, see -# https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html - -conn_id = aws_default -# region_name = -max_submit_job_attempts = 3 -check_health_on_startup = True -# job_name = -# job_queue = -# job_definition = -# submit_job_kwargs = - -[aws_ecs_executor] -# This section only applies if you are using the AwsEcsExecutor in -# Airflow's ``[core]`` configuration. -# For more information on any of these execution parameters, see the link below: -# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ecs/client/run_task.html -# For boto3 credential management, see -# https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html - -conn_id = aws_default -# region_name = -assign_public_ip = False -# cluster = -# capacity_provider_strategy = -# container_name = -# launch_type = -platform_version = LATEST -# security_groups = -# subnets = -# task_definition = -max_run_task_attempts = 3 -# run_task_kwargs = -check_health_on_startup = True - -[aws_auth_manager] -# This section only applies if you are using the AwsAuthManager. In other words, if you set -# ``[core] auth_manager = airflow.providers.amazon.aws.auth_manager.aws_auth_manager.AwsAuthManager`` in -# Airflow's configuration. - -enable = False -conn_id = aws_default -# region_name = -# saml_metadata_url = -# avp_policy_store_id = - -[celery_kubernetes_executor] -# This section only applies if you are using the ``CeleryKubernetesExecutor`` in -# ``[core]`` section above - -# Define when to send a task to ``KubernetesExecutor`` when using ``CeleryKubernetesExecutor``. -# When the queue of a task is the value of ``kubernetes_queue`` (default ``kubernetes``), -# the task is executed via ``KubernetesExecutor``, -# otherwise via ``CeleryExecutor`` -# -# Variable: AIRFLOW__CELERY_KUBERNETES_EXECUTOR__KUBERNETES_QUEUE -# -kubernetes_queue = kubernetes - -[celery] -# This section only applies if you are using the CeleryExecutor in -# ``[core]`` section above - -# The app name that will be used by celery -# -# Variable: AIRFLOW__CELERY__CELERY_APP_NAME -# -celery_app_name = airflow.providers.celery.executors.celery_executor - -# The concurrency that will be used when starting workers with the -# ``airflow celery worker`` command. This defines the number of task instances that -# a worker will take, so size up your workers based on the resources on -# your worker box and the nature of your tasks -# -# Variable: AIRFLOW__CELERY__WORKER_CONCURRENCY -# -worker_concurrency = 32 - -# The maximum and minimum number of pool processes that will be used to dynamically resize -# the pool based on load.Enable autoscaling by providing max_concurrency,min_concurrency -# with the ``airflow celery worker`` command (always keep minimum processes, -# but grow to maximum if necessary). -# Pick these numbers based on resources on worker box and the nature of the task. -# If autoscale option is available, worker_concurrency will be ignored. -# https://docs.celeryq.dev/en/latest/reference/celery.bin.worker.html#cmdoption-celery-worker-autoscale -# -# Example: worker_autoscale = 16,12 -# -# Variable: AIRFLOW__CELERY__WORKER_AUTOSCALE -# -# worker_autoscale = - -# Used to increase the number of tasks that a worker prefetches which can improve performance. -# The number of processes multiplied by worker_prefetch_multiplier is the number of tasks -# that are prefetched by a worker. A value greater than 1 can result in tasks being unnecessarily -# blocked if there are multiple workers and one worker prefetches tasks that sit behind long -# running tasks while another worker has unutilized processes that are unable to process the already -# claimed blocked tasks. -# https://docs.celeryq.dev/en/stable/userguide/optimizing.html#prefetch-limits -# -# Variable: AIRFLOW__CELERY__WORKER_PREFETCH_MULTIPLIER -# -worker_prefetch_multiplier = 2 - -# Specify if remote control of the workers is enabled. -# In some cases when the broker does not support remote control, Celery creates lots of -# ``.*reply-celery-pidbox`` queues. You can prevent this by setting this to false. -# However, with this disabled Flower won't work. -# https://docs.celeryq.dev/en/stable/getting-started/backends-and-brokers/index.html#broker-overview -# -# Variable: AIRFLOW__CELERY__WORKER_ENABLE_REMOTE_CONTROL -# -worker_enable_remote_control = true - -# The Celery broker URL. Celery supports RabbitMQ, Redis and experimentally -# a sqlalchemy database. Refer to the Celery documentation for more information. -# -# Variable: AIRFLOW__CELERY__BROKER_URL -# -# This will be configured via environment variables, as it differs between master and workers. -# broker_url = - -# The Celery result_backend. When a job finishes, it needs to update the -# metadata of the job. Therefore it will post a message on a message bus, -# or insert it into a database (depending of the backend) -# This status is used by the scheduler to update the state of the task -# The use of a database is highly recommended -# When not specified, sql_alchemy_conn with a db+ scheme prefix will be used -# https://docs.celeryq.dev/en/latest/userguide/configuration.html#task-result-backend-settings -# -# Example: result_backend = db+postgresql://postgres:airflow@postgres/airflow -# -# Variable: AIRFLOW__CELERY__RESULT_BACKEND -# -# The result_backend is intentionally left blank. -# When blank, Airflow's CeleryExecutor defaults to using the value from -# `sql_alchemy_conn` as the result backend, which is the recommended setup. -result_backend = - -# Optional configuration dictionary to pass to the Celery result backend SQLAlchemy engine. -# -# Example: result_backend_sqlalchemy_engine_options = {"pool_recycle": 1800} -# -# Variable: AIRFLOW__CELERY__RESULT_BACKEND_SQLALCHEMY_ENGINE_OPTIONS -# -result_backend_sqlalchemy_engine_options = - -# Celery Flower is a sweet UI for Celery. Airflow has a shortcut to start -# it ``airflow celery flower``. This defines the IP that Celery Flower runs on -# -# Variable: AIRFLOW__CELERY__FLOWER_HOST -# -flower_host = 0.0.0.0 - -# The root URL for Flower -# -# Example: flower_url_prefix = /flower -# -# Variable: AIRFLOW__CELERY__FLOWER_URL_PREFIX -# -flower_url_prefix = - -# This defines the port that Celery Flower runs on -# -# Variable: AIRFLOW__CELERY__FLOWER_PORT -# -flower_port = 5555 - -# Securing Flower with Basic Authentication -# Accepts user:password pairs separated by a comma -# -# Example: flower_basic_auth = user1:password1,user2:password2 -# -# Variable: AIRFLOW__CELERY__FLOWER_BASIC_AUTH -# -flower_basic_auth = - -# How many processes CeleryExecutor uses to sync task state. -# 0 means to use max(1, number of cores - 1) processes. -# -# Variable: AIRFLOW__CELERY__SYNC_PARALLELISM -# -sync_parallelism = 0 - -# Import path for celery configuration options -# -# Variable: AIRFLOW__CELERY__CELERY_CONFIG_OPTIONS -# -celery_config_options = airflow.providers.celery.executors.default_celery.DEFAULT_CELERY_CONFIG - -# -# Variable: AIRFLOW__CELERY__SSL_ACTIVE -# -ssl_active = False - -# Path to the client key. -# -# Variable: AIRFLOW__CELERY__SSL_KEY -# -ssl_key = - -# Path to the client certificate. -# -# Variable: AIRFLOW__CELERY__SSL_CERT -# -ssl_cert = - -# Path to the CA certificate. -# -# Variable: AIRFLOW__CELERY__SSL_CACERT -# -ssl_cacert = - -# Celery Pool implementation. -# Choices include: ``prefork`` (default), ``eventlet``, ``gevent`` or ``solo``. -# See: -# https://docs.celeryq.dev/en/latest/userguide/workers.html#concurrency -# https://docs.celeryq.dev/en/latest/userguide/concurrency/eventlet.html -# -# Variable: AIRFLOW__CELERY__POOL -# -pool = prefork - -# The number of seconds to wait before timing out ``send_task_to_executor`` or -# ``fetch_celery_task_state`` operations. -# -# Variable: AIRFLOW__CELERY__OPERATION_TIMEOUT -# -operation_timeout = 1.0 - -task_acks_late = True -# Celery task will report its status as 'started' when the task is executed by a worker. -# This is used in Airflow to keep track of the running tasks and if a Scheduler is restarted -# or run in HA mode, it can adopt the orphan tasks launched by previous SchedulerJob. -# -# Variable: AIRFLOW__CELERY__TASK_TRACK_STARTED -# -task_track_started = True - -# The Maximum number of retries for publishing task messages to the broker when failing -# due to ``AirflowTaskTimeout`` error before giving up and marking Task as failed. -# -# Variable: AIRFLOW__CELERY__TASK_PUBLISH_MAX_RETRIES -# -task_publish_max_retries = 3 - -# Worker initialisation check to validate Metadata Database connection -# -# Variable: AIRFLOW__CELERY__WORKER_PRECHECK -# -worker_precheck = False - -# Extra celery configs to include in the celery worker. -# Any of the celery config can be added to this config and it -# will be applied while starting the celery worker. e.g. {"worker_max_tasks_per_child": 10} -# See also: -# https://docs.celeryq.dev/en/stable/userguide/configuration.html#configuration-and-defaults -# -# Variable: AIRFLOW__CELERY__EXTRA_CELERY_CONFIG -# -extra_celery_config = {} - -[celery_broker_transport_options] -# This section is for specifying options which can be passed to the -# underlying celery broker transport. See: -# https://docs.celeryq.dev/en/latest/userguide/configuration.html#std:setting-broker_transport_options - -# The visibility timeout defines the number of seconds to wait for the worker -# to acknowledge the task before the message is redelivered to another worker. -# Make sure to increase the visibility timeout to match the time of the longest -# ETA you're planning to use. -# visibility_timeout is only supported for Redis and SQS celery brokers. -# See: -# https://docs.celeryq.dev/en/stable/getting-started/backends-and-brokers/redis.html#visibility-timeout -# -# Example: visibility_timeout = 21600 -# -# Variable: AIRFLOW__CELERY_BROKER_TRANSPORT_OPTIONS__VISIBILITY_TIMEOUT -# -# visibility_timeout = - -# The sentinel_kwargs parameter allows passing additional options to the Sentinel client. -# In a typical scenario where Redis Sentinel is used as the broker and Redis servers are -# password-protected, the password needs to be passed through this parameter. Although its -# type is string, it is required to pass a string that conforms to the dictionary format. -# See: -# https://docs.celeryq.dev/en/stable/getting-started/backends-and-brokers/redis.html#configuration -# -# Example: sentinel_kwargs = {"password": "password_for_redis_server"} -# -# Variable: AIRFLOW__CELERY_BROKER_TRANSPORT_OPTIONS__SENTINEL_KWARGS -# -# sentinel_kwargs = - -[local_kubernetes_executor] -# This section only applies if you are using the ``LocalKubernetesExecutor`` in -# ``[core]`` section above - -# Define when to send a task to ``KubernetesExecutor`` when using ``LocalKubernetesExecutor``. -# When the queue of a task is the value of ``kubernetes_queue`` (default ``kubernetes``), -# the task is executed via ``KubernetesExecutor``, -# otherwise via ``LocalExecutor`` -# -# Variable: AIRFLOW__LOCAL_KUBERNETES_EXECUTOR__KUBERNETES_QUEUE -# -kubernetes_queue = kubernetes - -[kubernetes_executor] -# Kwargs to override the default urllib3 Retry used in the kubernetes API client -# -# Example: api_client_retry_configuration = { "total": 3, "backoff_factor": 0.5 } -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__API_CLIENT_RETRY_CONFIGURATION -# -api_client_retry_configuration = - -# Flag to control the information added to kubernetes executor logs for better traceability -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__LOGS_TASK_METADATA -# -logs_task_metadata = False - -# Path to the YAML pod file that forms the basis for KubernetesExecutor workers. -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__POD_TEMPLATE_FILE -# -pod_template_file = - -# The repository of the Kubernetes Image for the Worker to Run -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__WORKER_CONTAINER_REPOSITORY -# -worker_container_repository = - -# The tag of the Kubernetes Image for the Worker to Run -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__WORKER_CONTAINER_TAG -# -worker_container_tag = - -# The Kubernetes namespace where airflow workers should be created. Defaults to ``default`` -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__NAMESPACE -# -namespace = default - -# If True, all worker pods will be deleted upon termination -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__DELETE_WORKER_PODS -# -delete_worker_pods = True - -# If False (and delete_worker_pods is True), -# failed worker pods will not be deleted so users can investigate them. -# This only prevents removal of worker pods where the worker itself failed, -# not when the task it ran failed. -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__DELETE_WORKER_PODS_ON_FAILURE -# -delete_worker_pods_on_failure = False - -worker_pod_pending_fatal_container_state_reasons = CreateContainerConfigError,ErrImagePull,CreateContainerError,ImageInspectError, InvalidImageName -# Number of Kubernetes Worker Pod creation calls per scheduler loop. -# Note that the current default of "1" will only launch a single pod -# per-heartbeat. It is HIGHLY recommended that users increase this -# number to match the tolerance of their kubernetes cluster for -# better performance. -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__WORKER_PODS_CREATION_BATCH_SIZE -# -worker_pods_creation_batch_size = 1 - -# Allows users to launch pods in multiple namespaces. -# Will require creating a cluster-role for the scheduler, -# or use multi_namespace_mode_namespace_list configuration. -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__MULTI_NAMESPACE_MODE -# -multi_namespace_mode = False - -# If multi_namespace_mode is True while scheduler does not have a cluster-role, -# give the list of namespaces where the scheduler will schedule jobs -# Scheduler needs to have the necessary permissions in these namespaces. -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__MULTI_NAMESPACE_MODE_NAMESPACE_LIST -# -multi_namespace_mode_namespace_list = - -# Use the service account kubernetes gives to pods to connect to kubernetes cluster. -# It's intended for clients that expect to be running inside a pod running on kubernetes. -# It will raise an exception if called from a process not running in a kubernetes environment. -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__IN_CLUSTER -# -in_cluster = True - -# When running with in_cluster=False change the default cluster_context or config_file -# options to Kubernetes client. Leave blank these to use default behaviour like ``kubectl`` has. -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__CLUSTER_CONTEXT -# -# cluster_context = - -# Path to the kubernetes configfile to be used when ``in_cluster`` is set to False -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__CONFIG_FILE -# -# config_file = - -# Keyword parameters to pass while calling a kubernetes client core_v1_api methods -# from Kubernetes Executor provided as a single line formatted JSON dictionary string. -# List of supported params are similar for all core_v1_apis, hence a single config -# variable for all apis. See: -# https://raw.githubusercontent.com/kubernetes-client/python/41f11a09995efcd0142e25946adc7591431bfb2f/kubernetes/client/api/core_v1_api.py -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__KUBE_CLIENT_REQUEST_ARGS -# -kube_client_request_args = - -# Optional keyword arguments to pass to the ``delete_namespaced_pod`` kubernetes client -# ``core_v1_api`` method when using the Kubernetes Executor. -# This should be an object and can contain any of the options listed in the ``v1DeleteOptions`` -# class defined here: -# https://github.com/kubernetes-client/python/blob/41f11a09995efcd0142e25946adc7591431bfb2f/kubernetes/client/models/v1_delete_options.py#L19 -# -# Example: delete_option_kwargs = {"grace_period_seconds": 10} -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__DELETE_OPTION_KWARGS -# -delete_option_kwargs = - -# Enables TCP keepalive mechanism. This prevents Kubernetes API requests to hang indefinitely -# when idle connection is time-outed on services like cloud load balancers or firewalls. -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__ENABLE_TCP_KEEPALIVE -# -enable_tcp_keepalive = True - -# When the `enable_tcp_keepalive` option is enabled, TCP probes a connection that has -# been idle for `tcp_keep_idle` seconds. -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__TCP_KEEP_IDLE -# -tcp_keep_idle = 120 - -# When the `enable_tcp_keepalive` option is enabled, if Kubernetes API does not respond -# to a keepalive probe, TCP retransmits the probe after `tcp_keep_intvl` seconds. -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__TCP_KEEP_INTVL -# -tcp_keep_intvl = 30 - -# When the `enable_tcp_keepalive` option is enabled, if Kubernetes API does not respond -# to a keepalive probe, TCP retransmits the probe `tcp_keep_cnt number` of times before -# a connection is considered to be broken. -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__TCP_KEEP_CNT -# -tcp_keep_cnt = 6 - -# Set this to false to skip verifying SSL certificate of Kubernetes python client. -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__VERIFY_SSL -# -verify_ssl = True - -# How often in seconds to check for task instances stuck in "queued" status without a pod -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__WORKER_PODS_QUEUED_CHECK_INTERVAL -# -worker_pods_queued_check_interval = 60 - -# Path to a CA certificate to be used by the Kubernetes client to verify the server's SSL certificate. -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__SSL_CA_CERT -# -ssl_ca_cert = - -# The Maximum number of retries for queuing the task to the kubernetes scheduler when -# failing due to Kube API exceeded quota errors before giving up and marking task as failed. -# -1 for unlimited times. -# -# Variable: AIRFLOW__KUBERNETES_EXECUTOR__TASK_PUBLISH_MAX_RETRIES -# -task_publish_max_retries = 0 - -[common.io] -# Common IO configuration section - -# Path to a location on object storage where XComs can be stored in url format. -# -# Example: xcom_objectstorage_path = s3://conn_id@bucket/path -# -# Variable: AIRFLOW__COMMON.IO__XCOM_OBJECTSTORAGE_PATH -# -xcom_objectstorage_path = - -# Threshold in bytes for storing XComs in object storage. -1 means always store in the -# database. 0 means always store in object storage. Any positive number means -# it will be stored in object storage if the size of the value is greater than the threshold. -# -# Example: xcom_objectstorage_threshold = 1000000 -# -# Variable: AIRFLOW__COMMON.IO__XCOM_OBJECTSTORAGE_THRESHOLD -# -xcom_objectstorage_threshold = -1 - -# Compression algorithm to use when storing XComs in object storage. Supported algorithms -# are a.o.: snappy, zip, gzip, bz2, and lzma. If not specified, no compression will be used. -# Note that the compression algorithm must be available in the Python installation (e.g. -# python-snappy for snappy). Zip, gz, bz2 are available by default. -# -# Example: xcom_objectstorage_compression = gz -# -# Variable: AIRFLOW__COMMON.IO__XCOM_OBJECTSTORAGE_COMPRESSION -# -xcom_objectstorage_compression = - - - -[fab] -# This section contains configs specific to FAB provider. - -# Boolean for enabling rate limiting on authentication endpoints. -# -# Variable: AIRFLOW__FAB__AUTH_RATE_LIMITED -# -auth_rate_limited = True - -# Rate limit for authentication endpoints. -# -# Variable: AIRFLOW__FAB__AUTH_RATE_LIMIT -# -auth_rate_limit = 5 per 40 second - -# Update FAB permissions and sync security manager roles -# on webserver startup -# -# Variable: AIRFLOW__FAB__UPDATE_FAB_PERMS -# -update_fab_perms = True - -[imap] -# Options for IMAP provider. - -# ssl_context = - -[azure_remote_logging] -# Configuration that needs to be set for enable remote logging in Azure Blob Storage - -remote_wasb_log_container = airflow-logs - -[openlineage] -# This section applies settings for OpenLineage integration. -# More about configuration and it's precedence can be found at -# https://airflow.apache.org/docs/apache-airflow-providers-openlineage/stable/guides/user.html#transport-setup - -# Disable sending events without uninstalling the OpenLineage Provider by setting this to true. -# -# Variable: AIRFLOW__OPENLINEAGE__DISABLED -# -disabled = False - -# Exclude some Operators from emitting OpenLineage events by passing a string of semicolon separated -# full import paths of Operators to disable. -# -# Example: disabled_for_operators = airflow.providers.standard.operators.bash.BashOperator; airflow.providers.standard.operators.python.PythonOperator -# -# Variable: AIRFLOW__OPENLINEAGE__DISABLED_FOR_OPERATORS -# -disabled_for_operators = - -# If this setting is enabled, OpenLineage integration won't collect and emit metadata, -# unless you explicitly enable it per `DAG` or `Task` using `enable_lineage` method. -# -# Variable: AIRFLOW__OPENLINEAGE__SELECTIVE_ENABLE -# -selective_enable = False - -# Set namespace that the lineage data belongs to, so that if you use multiple OpenLineage producers, -# events coming from them will be logically separated. -# -# Example: namespace = my_airflow_instance_1 -# -# Variable: AIRFLOW__OPENLINEAGE__NAMESPACE -# -# namespace = - -# Register custom OpenLineage Extractors by passing a string of semicolon separated full import paths. -# -# Example: extractors = full.path.to.ExtractorClass;full.path.to.AnotherExtractorClass -# -# Variable: AIRFLOW__OPENLINEAGE__EXTRACTORS -# -# extractors = - -# Register custom run facet functions by passing a string of semicolon separated full import paths. -# -# Example: custom_run_facets = full.path.to.custom_facet_function;full.path.to.another_custom_facet_function -# -# Variable: AIRFLOW__OPENLINEAGE__CUSTOM_RUN_FACETS -# -custom_run_facets = - -# Specify the path to the YAML configuration file. -# This ensures backwards compatibility with passing config through the `openlineage.yml` file. -# -# Example: config_path = full/path/to/openlineage.yml -# -# Variable: AIRFLOW__OPENLINEAGE__CONFIG_PATH -# -config_path = - -# Pass OpenLineage Client transport configuration as JSON string. It should contain type of the -# transport and additional options (different for each transport type). For more details see: -# https://openlineage.io/docs/client/python/#built-in-transport-types -# -# Currently supported types are: -# -# * HTTP -# * Kafka -# * Console -# * File -# -# Example: transport = {"type": "http", "url": "http://localhost:5000", "endpoint": "api/v1/lineage"} -# -# Variable: AIRFLOW__OPENLINEAGE__TRANSPORT -# -transport = - -# Disable the inclusion of source code in OpenLineage events by setting this to `true`. -# By default, several Operators (e.g. Python, Bash) will include their source code in the events -# unless disabled. -# -# Variable: AIRFLOW__OPENLINEAGE__DISABLE_SOURCE_CODE -# -disable_source_code = False - -# Number of processes to utilize for processing DAG state changes -# in an asynchronous manner within the scheduler process. -# -# Variable: AIRFLOW__OPENLINEAGE__DAG_STATE_CHANGE_PROCESS_POOL_SIZE -# -dag_state_change_process_pool_size = 1 - -# Maximum amount of time (in seconds) that OpenLineage can spend executing metadata extraction. -# -# Variable: AIRFLOW__OPENLINEAGE__EXECUTION_TIMEOUT -# -execution_timeout = 10 - -# If true, OpenLineage event will include full task info - potentially containing large fields. -# -# Variable: AIRFLOW__OPENLINEAGE__INCLUDE_FULL_TASK_INFO -# -include_full_task_info = False - -# If true, OpenLineage events will include information useful for debugging - potentially -# containing large fields e.g. all installed packages and their versions. -# -# Variable: AIRFLOW__OPENLINEAGE__DEBUG_MODE -# -debug_mode = False - -# Automatically inject OpenLineage's parent job (namespace, job name, run id) information into Spark -# application properties for supported Operators. -# -# Variable: AIRFLOW__OPENLINEAGE__SPARK_INJECT_PARENT_JOB_INFO -# -spark_inject_parent_job_info = False - -[smtp_provider] -# Options for SMTP provider. - -# ssl context to use when using SMTP and IMAP SSL connections. By default, the context is "default" -# which sets it to ``ssl.create_default_context()`` which provides the right balance between -# compatibility and security, it however requires that certificates in your operating system are -# updated and that SMTP/IMAP servers of yours have valid certificates that have corresponding public -# keys installed on your machines. You can switch it to "none" if you want to disable checking -# of the certificates, but it is not recommended as it allows MITM (man-in-the-middle) attacks -# if your infrastructure is not sufficiently secured. It should only be set temporarily while you -# are fixing your certificate configuration. This can be typically done by upgrading to newer -# version of the operating system you run Airflow components on,by upgrading/refreshing proper -# certificates in the OS or by updating certificates for your mail servers. -# -# If you do not set this option explicitly, it will use Airflow "email.ssl_context" configuration, -# but if this configuration is not present, it will use "default" value. -# -# Example: ssl_context = default -# -# Variable: AIRFLOW__SMTP_PROVIDER__SSL_CONTEXT -# -# ssl_context = - -# Allows overriding of the standard templated email subject line when the SmtpNotifier is used. -# Must provide a path to the template. -# -# Example: templated_email_subject_path = path/to/override/email_subject.html -# -# Variable: AIRFLOW__SMTP_PROVIDER__TEMPLATED_EMAIL_SUBJECT_PATH -# -# templated_email_subject_path = - -# Allows overriding of the standard templated email path when the SmtpNotifier is used. Must provide -# a path to the template. -# -# Example: templated_html_content_path = path/to/override/email.html -# -# Variable: AIRFLOW__SMTP_PROVIDER__TEMPLATED_HTML_CONTENT_PATH -# -# templated_html_content_path = - -[docker] -docker_url = unix://var/run/docker.sock diff --git a/airflow/config/airflow_local_settings.py b/airflow/config/airflow_local_settings.py deleted file mode 100644 index c963cb9..0000000 --- a/airflow/config/airflow_local_settings.py +++ /dev/null @@ -1,26 +0,0 @@ -import logging -import os -import sys -from copy import deepcopy -from airflow.config_templates.airflow_local_settings import DEFAULT_LOGGING_CONFIG - -logger = logging.getLogger(__name__) - -# Add the config directory to the path to allow for local imports like `custom_task_hooks`. -# This is necessary because this file is executed by the Airflow scheduler in a context -# where the config directory is not automatically on the Python path. -config_dir = os.path.dirname(os.path.abspath(__file__)) -if config_dir not in sys.path: - sys.path.insert(0, config_dir) - logger.info(f"Added '{config_dir}' to sys.path for local imports.") - - -LOGGING_CONFIG = deepcopy(DEFAULT_LOGGING_CONFIG) - -# The task_instance_mutation_hook is now self-registering to be robust -# against different loading configurations. See custom_task_hooks.py for details. -try: - import custom_task_hooks - logger.info(f"Successfully imported custom_task_hooks module (Version: {getattr(custom_task_hooks, '__version__', 'unknown')}).") -except ImportError as e: - logger.warning(f"Could not import custom_task_hooks: {e}. Worker pinning will not function.", exc_info=True) diff --git a/airflow/config/camoufox_endpoints.json b/airflow/config/camoufox_endpoints.json deleted file mode 100644 index fc578e3..0000000 --- a/airflow/config/camoufox_endpoints.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "endpoints": {} -} diff --git a/airflow/config/custom_task_hooks.py b/airflow/config/custom_task_hooks.py deleted file mode 100644 index a2e2dd6..0000000 --- a/airflow/config/custom_task_hooks.py +++ /dev/null @@ -1,58 +0,0 @@ -# Version: 2025-09-22-08 -__version__ = "2025-09-22-08" -# This file contains custom hooks for the Airflow environment. -import logging -from airflow import settings -from airflow.configuration import conf - -logger = logging.getLogger(__name__) - -def task_instance_mutation_hook(ti): - """ - This hook modifies the task instance queue at runtime for worker pinning. - It relies exclusively on parsing the queue from the run_id, which is guaranteed - to be set by the dispatcher DAG. This avoids database race conditions. - """ - logger.debug(f"MUTATION HOOK: Running for dag '{ti.dag_id}', task '{ti.task_id}'.") - # This hook targets all worker DAGs, which follow a naming convention. - if 'worker_per_url' in ti.dag_id: - # If the run_id isn't populated yet, just return. The hook may be called again. - if not ti.run_id: - logger.debug(f"MUTATION HOOK: run_id not yet available for task '{ti.task_id}'. Skipping this invocation.") - return - - logger.debug(f"MUTATION HOOK: Matched DAG '{ti.dag_id}'. Attempting to pin task '{ti.task_id}' for run_id '{ti.run_id}'.") - worker_queue = None - # The dispatcher embeds the queue in the run_id like: ..._q_queue-dl-worker-hostname - if ti.run_id and '_q_' in ti.run_id: - try: - parsed_queue = ti.run_id.split('_q_')[-1] - # Check for valid v1 (dl) or v2 (auth/dl) queue prefixes. - if parsed_queue.startswith(('queue-dl-', 'queue-auth-')): - worker_queue = parsed_queue - except Exception as e: - logger.error(f"MUTATION HOOK: CRITICAL: Error parsing queue from run_id '{ti.run_id}': {e}.", exc_info=True) - - if worker_queue: - logger.debug(f"MUTATION HOOK: Pinning task '{ti.task_id}' (run_id: {ti.run_id}) to queue '{worker_queue}' from run_id.") - ti.queue = worker_queue - else: - # If the queue is not found, it's a critical failure in the dispatching logic. - # We fall back to the default queue but log it as a high-severity warning. - fallback_queue = 'queue-auth' if 'auth' in ti.dag_id else 'queue-dl' - logger.warning(f"MUTATION HOOK: Could not find worker queue in run_id '{ti.run_id}'. Falling back to '{fallback_queue}'. Pinning will fail.") - ti.queue = fallback_queue - - -# --- Hook Registration --- -# This registration logic is placed here to work around environments where this file -# might be loaded directly as the local settings file via AIRFLOW__CORE__LOCAL_SETTINGS_PATH. -try: - if not conf.get('core', 'executor').lower().startswith('debug'): - settings.task_instance_mutation_hook = task_instance_mutation_hook - logger.info(f"Successfully self-registered task_instance_mutation_hook (Version: {__version__}) for worker pinning.") - else: - logger.info("Skipping self-registration of task_instance_mutation_hook due to DebugExecutor.") -except Exception as e: - logger.warning(f"Could not self-register custom_task_hooks: {e}. Worker pinning may not function.", exc_info=True) - diff --git a/airflow/config/docker_hub_repo.json b/airflow/config/docker_hub_repo.json deleted file mode 100644 index f7d48b5..0000000 --- a/airflow/config/docker_hub_repo.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "docker_hub": - { - "conn_type": "docker", - "host": "https://index.docker.io/v1/", - "login": "pangramia", - "password": "dckr_pat_PEDco1yeURKYFY9cSXTCokQNb4A" - } -} \ No newline at end of file diff --git a/airflow/config/envoy.yaml b/airflow/config/envoy.yaml deleted file mode 100644 index fdd7b82..0000000 --- a/airflow/config/envoy.yaml +++ /dev/null @@ -1,52 +0,0 @@ -# Jinja2 template for Envoy configuration -admin: - address: - socket_address: - address: 0.0.0.0 - port_value: 9901 - -static_resources: - listeners: - # Listener for ytdlp-ops Thrift traffic - - name: ytdlp_ops_listener - address: - socket_address: - address: 0.0.0.0 - port_value: 9080 - filter_chains: - - filters: - - name: envoy.filters.network.thrift_proxy - typed_config: - "@type": type.googleapis.com/envoy.extensions.filters.network.thrift_proxy.v3.ThriftProxy - stat_prefix: thrift_ingress - transport: FRAMED - protocol: BINARY - route_config: - name: local_route - routes: - - match: - method_name: "" - route: - cluster: ytdlp_ops_cluster - - clusters: - # Cluster for the ytdlp-ops workers - - name: ytdlp_ops_cluster - connect_timeout: 5s - type: STRICT_DNS - lb_policy: ROUND_ROBIN - health_checks: - - timeout: 1s - interval: 5s - unhealthy_threshold: 3 - healthy_threshold: 2 - tcp_health_check: {} - load_assignment: - cluster_name: ytdlp_ops_cluster - endpoints: - - lb_endpoints: - - endpoint: - address: - socket_address: - address: ytdlp-ops-server - port_value: 9090 diff --git a/airflow/config/minio_default_conn.json.j2 b/airflow/config/minio_default_conn.json.j2 deleted file mode 100644 index 24be986..0000000 --- a/airflow/config/minio_default_conn.json.j2 +++ /dev/null @@ -1,16 +0,0 @@ -{ - "minio_default": { - "conn_type": "aws", - "host": "{{ hostvars[groups['airflow_master'][0]].ansible_host }}", - "login": "admin", - "password": "0153093693-0009", - "port": 80, - "extra": { - "endpoint_url": "http://{{ hostvars[groups['airflow_master'][0]].ansible_host }}:80", - "region_name": "us-east-1", - "aws_access_key_id": "admin", - "aws_secret_access_key": "0153093693-0009", - "verify": false - } - } -} diff --git a/airflow/config/redis_default_conn.json.j2 b/airflow/config/redis_default_conn.json.j2 deleted file mode 100644 index 6ab9d01..0000000 --- a/airflow/config/redis_default_conn.json.j2 +++ /dev/null @@ -1,10 +0,0 @@ -{ - "redis_default": - { - "conn_type": "redis", - "host": "{{ hostvars[groups['airflow_master'][0]].ansible_host }}", - "port": 52909, - "password": "{{ vault_redis_password }}", - "extra": "{\"db\": 0}" - } -} diff --git a/airflow/configs/.env.master b/airflow/configs/.env.master deleted file mode 100644 index 1f848ab..0000000 --- a/airflow/configs/.env.master +++ /dev/null @@ -1,35 +0,0 @@ -# This file is managed by Ansible. Do not edit manually. - -# --- Common Settings --- -HOSTNAME="af-green" -COMPOSE_PROJECT_NAME="ytdlp-ops-management" -TZ="Europe/Moscow" -service_role="management" - -# --- Docker Image Settings --- -YTDLP_OPS_IMAGE="pangramia/ytdlp-ops-server:latest" -AIRFLOW_IMAGE_NAME="pangramia/ytdlp-ops-airflow:latest" - -# --- Network Settings --- -ENVOY_PORT=9080 -ENVOY_ADMIN_PORT=9901 -YTDLP_BASE_PORT=9090 -YTDLP_WORKERS=3 -MANAGEMENT_SERVICE_PORT=9091 -REDIS_PORT=52909 -POSTGRES_PORT=5432 - -# --- Security Settings --- -REDIS_PASSWORD="rOhTAIlTFFylXsjhqwxnYxDChFc" -POSTGRES_PASSWORD="pgdb_pwd_A7bC2xY9zE1wV5uP" -AIRFLOW_ADMIN_PASSWORD="2r234sdfrt3q454arq45q355" -FLOWER_PASSWORD="dO4eXm7UkF81OdMvT8E2tIKFtPYPCzyzwlcZ4RyOmCsmG4qzrNFqM5sNTOT9" - -# --- User and Group IDs --- -AIRFLOW_UID=1003 -AIRFLOW_GID=1001 - -# --- Master-specific settings --- -MASTER_HOST_IP=89.253.221.173 -# Camoufox is not used on master, but the config generator expects the variable. -CAMOUFOX_PROXIES= diff --git a/airflow/configs/.env.worker b/airflow/configs/.env.worker deleted file mode 100644 index 9288246..0000000 --- a/airflow/configs/.env.worker +++ /dev/null @@ -1,4 +0,0 @@ -# This file should be generated from ansible/templates/.env.ytdlp.j2 -# Do not edit manually - your changes will be overwritten. -# -# To generate this file, run the Ansible playbook that processes the templates. diff --git a/airflow/configs/Caddyfile b/airflow/configs/Caddyfile deleted file mode 100644 index a6c1176..0000000 --- a/airflow/configs/Caddyfile +++ /dev/null @@ -1,33 +0,0 @@ -:8080 { - # Serve pre-compressed static assets and enable on-the-fly compression for other assets. - encode gzip - - # Define routes for static assets. - # Caddy will automatically look for pre-gzipped files (.gz) if available. - route /static/appbuilder* { - uri strip_prefix /static/appbuilder - root * /usr/share/caddy/static/appbuilder - file_server { - precompressed gzip - } - } - - route /static/dist* { - uri strip_prefix /static/dist - root * /usr/share/caddy/static/dist - file_server { - precompressed gzip - } - } - - # Reverse proxy all other requests to the Airflow webserver. - route { - reverse_proxy airflow-webserver:8080 { - # Set headers to ensure correct proxy behavior - header_up Host {http.request.host} - header_up X-Real-IP {http.request.remote.ip} - header_up X-Forwarded-For {http.request.remote.ip} - header_up X-Forwarded-Proto {http.request.scheme} - } - } -} diff --git a/airflow/configs/docker-compose-dl.yaml.j2 b/airflow/configs/docker-compose-dl.yaml.j2 deleted file mode 100644 index f0f2cbb..0000000 --- a/airflow/configs/docker-compose-dl.yaml.j2 +++ /dev/null @@ -1,297 +0,0 @@ -# Airflow remote DL worker configuration. -# This file should be used on a remote machine to run a download worker. -# It requires a master Airflow instance running with services exposed. -# -# Before running, create a .env file in this directory with: -# MASTER_HOST_IP=... a.b.c.d ... # IP address of the machine running docker-compose-master.yaml -# POSTGRES_PASSWORD=... # The password for the PostgreSQL database from the master compose file -# REDIS_PASSWORD=... # The password for Redis from the master compose file -# AIRFLOW_UID=... # User ID for file permissions, should match master ---- -x-airflow-common: - &airflow-common - # This should point to the same image used by the master. - # If you built a custom image for master, you need to push it to a registry - # and reference it here. - image: ${AIRFLOW_IMAGE_NAME:-pangramia/ytdlp-ops-airflow:latest} - # Add extra hosts here to allow workers to resolve other hosts by name. - # This section is auto-generated by Ansible from the inventory. - extra_hosts: -{% for host in groups['all'] %} - - "{{ hostvars[host]['inventory_hostname'] }}:{{ hostvars[host]['ansible_host'] | default(hostvars[host]['inventory_hostname']) }}" -{% endfor %} - env_file: - # The .env file is located in the project root (e.g., /srv/airflow_dl_worker), - # so we provide an absolute path to it. - - "{{ airflow_worker_dir }}/.env" - environment: - &airflow-common-env - - AIRFLOW__CORE__PARALLELISM: 128 - AIRFLOW__CORE__MAX_ACTIVE_TASKS_PER_DAG: 64 - AIRFLOW__SCHEDULER__PARSING_PROCESSES: 8 - AIRFLOW__WEBSERVER__WORKERS: 5 - AIRFLOW__WEBSERVER__WORKER_CLASS: "gevent" - - AIRFLOW__LOGGING__SECRET_MASK_EXCEPTION_ARGS: False - - - # Prevent slow webserver when low memory? - GUNICORN_CMD_ARGS: --max-requests 20 --max-requests-jitter 3 --worker-tmp-dir /dev/shm - - - # Airflow Core - AIRFLOW__CORE__EXECUTOR: CeleryExecutor - AIRFLOW__CORE__LOAD_EXAMPLES: 'false' - AIRFLOW__CORE__FERNET_KEY: '' # Should be same as master, but worker does not need it. - - # Backend connections - These should point to the master node - # Set MASTER_HOST_IP, POSTGRES_PASSWORD, and REDIS_PASSWORD in your .env file - AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:${{ '{' }}POSTGRES_PASSWORD{{ '}' }}@${{ '{' }}MASTER_HOST_IP{{ '}' }}:{{ postgres_port }}/airflow - AIRFLOW__CELERY__RESULT_BACKEND: db+postgresql+psycopg2://airflow:${{ '{' }}POSTGRES_PASSWORD{{ '}' }}@${{ '{' }}MASTER_HOST_IP{{ '}' }}:{{ postgres_port }}/airflow - AIRFLOW__CELERY__BROKER_URL: redis://:${REDIS_PASSWORD}@${MASTER_HOST_IP}:{{ redis_port }}/0 - - # Remote Logging - connection is configured directly via environment variables - #_PIP_ADDITIONAL_REQUIREMENTS: ${{ '{' }}_PIP_ADDITIONAL_REQUIREMENTS:- apache-airflow-providers-docker apache-airflow-providers-http thrift>=0.16.0,<=0.20.0 backoff>=2.2.1 python-dotenv==1.0.1 psutil>=5.9.0 apache-airflow-providers-amazon{{ '}' }} - AIRFLOW__LOGGING__REMOTE_LOGGING: "True" - AIRFLOW__LOGGING__REMOTE_BASE_LOG_FOLDER: "s3://videos/airflow-logs" - AIRFLOW__LOGGING__REMOTE_LOG_CONN_ID: s3_delivery_connection - AIRFLOW__LOGGING__ENCRYPT_S3_LOGS: "False" - #AIRFLOW__LOGGING__LOG_ID_TEMPLATE: "{dag_id}-{task_id}-{run_id}-{try_number}" - AIRFLOW__WEBSERVER__SECRET_KEY: 'qmALu5JCAW0518WGAqkVZQ==' - AIRFLOW__CORE__INTERNAL_API_SECRET_KEY: 'qmALu5JCAW0518WGAqkVZQ==' - AIRFLOW__CORE__LOCAL_SETTINGS_PATH: "/opt/airflow/config/custom_task_hooks.py" - - volumes: - # Mount dags to get any utility scripts, but the worker will pull the DAG from the DB - - ${AIRFLOW_PROJ_DIR:-.}/dags:/opt/airflow/dags - # Mount logs locally in case remote logging fails - - ${AIRFLOW_PROJ_DIR:-.}/logs:/opt/airflow/logs - # Mount config for local settings and other configurations - - ${AIRFLOW_PROJ_DIR:-.}/config:/opt/airflow/config - - ${AIRFLOW_PROJ_DIR:-.}/config/airflow.cfg:/opt/airflow/airflow.cfg - # Mount download directories - - ${AIRFLOW_PROJ_DIR:-.}/downloadfiles:/opt/airflow/downloadfiles - - ${AIRFLOW_PROJ_DIR:-.}/addfiles:/opt/airflow/addfiles - - ${AIRFLOW_PROJ_DIR:-.}/inputfiles:/opt/airflow/inputfiles - # Mount the generated pangramia package to ensure workers have the latest version - - ${AIRFLOW_PROJ_DIR:-.}/pangramia:/app/pangramia - # Use AIRFLOW_UID from .env file to fix permission issues. GID is set to 0 for compatibility with the Airflow image. - user: "${{ '{' }}AIRFLOW_UID:-50000{{ '}' }}:0" - -services: - airflow-worker-dl: - <<: *airflow-common - container_name: airflow-worker-dl-1 - hostname: ${HOSTNAME:-dl001} - # The DL worker listens on the generic dl queue AND its own dedicated queue. - command: airflow celery worker -q queue-dl,queue-dl-${HOSTNAME:-dl001} - deploy: - resources: - limits: - memory: ${AIRFLOW_WORKER_DOWNLOAD_MEM_LIMIT:-8G} - reservations: - memory: ${AIRFLOW_WORKER_DOWNLOAD_MEM_RESERV:-2G} - healthcheck: - test: - - "CMD-SHELL" - - 'celery --app airflow.providers.celery.executors.celery_executor.app inspect ping -d "worker-dl@$$(hostname)"' - interval: 30s - timeout: 30s - retries: 5 - start_period: 30s - environment: - <<: *airflow-common-env - HOSTNAME: ${HOSTNAME:-dl001} - DUMB_INIT_SETSID: "0" - AIRFLOW__CELERY__WORKER_QUEUES: "queue-dl,queue-dl-${HOSTNAME:-dl001}" - AIRFLOW__CELERY__WORKER_TAGS: "dl" - AIRFLOW__CELERY__WORKER_PREFETCH_MULTIPLIER: "1" - AIRFLOW__CELERY__WORKER_AUTOSCALE: "16,8" - AIRFLOW__CELERY__POOL: "prefork" - AIRFLOW__CELERY__TASK_ACKS_LATE: "False" - AIRFLOW__CELERY__OPERATION_TIMEOUT: "2.0" - AIRFLOW__CELERY__WORKER_NAME: "worker-dl@%h" - AIRFLOW__CELERY__WORKER_MAX_TASKS_PER_CHILD: "100" - AIRFLOW__CELERY__WORKER_MAX_MEMORY_PER_CHILD: "524288" # 512MB - ports: - - "8793:8793" - networks: - - default - - proxynet - restart: always - - airflow-worker-s3: - <<: *airflow-common - container_name: airflow-worker-s3-1 - hostname: ${HOSTNAME:-s3-001} - # The S3 worker listens on the generic s3 queue AND its own dedicated queue. - command: airflow celery worker -q queue-s3,queue-s3-${HOSTNAME:-s3-001} - deploy: - resources: - limits: - memory: ${AIRFLOW_WORKER_S3_MEM_LIMIT:-1G} - reservations: - memory: ${AIRFLOW_WORKER_S3_MEM_RESERV:-256M} - healthcheck: - test: - - "CMD-SHELL" - - 'celery --app airflow.providers.celery.executors.celery_executor.app inspect ping -d "worker-s3@$$(hostname)"' - interval: 30s - timeout: 30s - retries: 5 - start_period: 30s - environment: - <<: *airflow-common-env - S3_DELIVERY_AWS_ACCESS_KEY_ID: "{{ vault_s3_delivery_access_key_id }}" - S3_DELIVERY_AWS_SECRET_ACCESS_KEY: "{{ vault_s3_delivery_secret_access_key }}" - S3_DELIVERY_AWS_REGION: "{{ vault_s3_delivery_aws_region }}" - S3_DELIVERY_ENDPOINT: "{{ vault_s3_delivery_endpoint }}" - S3_DELIVERY_BUCKET: "{{ vault_s3_delivery_bucket }}" - HOSTNAME: ${HOSTNAME:-s3-001} - DUMB_INIT_SETSID: "0" - AIRFLOW__CELERY__WORKER_QUEUES: "queue-s3,queue-s3-${HOSTNAME:-s3-001}" - AIRFLOW__CELERY__WORKER_TAGS: "s3" - AIRFLOW__CELERY__WORKER_PREFETCH_MULTIPLIER: "1" - # S3 tasks are lightweight. - AIRFLOW__CELERY__WORKER_AUTOSCALE: "2,1" - AIRFLOW__CELERY__POOL: "prefork" - AIRFLOW__CELERY__TASK_ACKS_LATE: "False" - AIRFLOW__CELERY__OPERATION_TIMEOUT: "2.0" - AIRFLOW__CELERY__WORKER_NAME: "worker-s3@%h" - AIRFLOW__CELERY__WORKER_MAX_TASKS_PER_CHILD: "100" - AIRFLOW__CELERY__WORKER_MAX_MEMORY_PER_CHILD: "262144" # 256MB - networks: - - default - - proxynet - restart: always - - airflow-worker-auth: - <<: *airflow-common - container_name: airflow-worker-auth-1 - hostname: ${HOSTNAME:-auth001} - # The Auth worker listens on the generic auth queue AND its own dedicated queue. - command: airflow celery worker -q queue-auth,queue-auth-${HOSTNAME:-auth001} - deploy: - resources: - limits: - memory: ${AIRFLOW_WORKER_AUTH_MEM_LIMIT:-4G} - reservations: - memory: ${AIRFLOW_WORKER_AUTH_MEM_RESERV:-1G} - healthcheck: - test: - - "CMD-SHELL" - - 'celery --app airflow.providers.celery.executors.celery_executor.app inspect ping -d "worker-auth@$$(hostname)"' - interval: 30s - timeout: 30s - retries: 5 - start_period: 30s - environment: - <<: *airflow-common-env - HOSTNAME: ${HOSTNAME:-auth001} - DUMB_INIT_SETSID: "0" - AIRFLOW__CELERY__WORKER_QUEUES: "queue-auth,queue-auth-${HOSTNAME:-auth001}" - AIRFLOW__CELERY__WORKER_TAGS: "auth" - AIRFLOW__CELERY__WORKER_PREFETCH_MULTIPLIER: "1" - # Auth tasks are less resource intensive but we want fewer of them to avoid service overload. - AIRFLOW__CELERY__WORKER_AUTOSCALE: "2,1" - AIRFLOW__CELERY__POOL: "prefork" - AIRFLOW__CELERY__TASK_ACKS_LATE: "False" - AIRFLOW__CELERY__OPERATION_TIMEOUT: "2.0" - AIRFLOW__CELERY__WORKER_NAME: "worker-auth@%h" - AIRFLOW__CELERY__WORKER_MAX_TASKS_PER_CHILD: "100" - AIRFLOW__CELERY__WORKER_MAX_MEMORY_PER_CHILD: "262144" # 256MB - networks: - - default - - proxynet - restart: always - - docker-socket-proxy: - profiles: - - disabled - image: tecnativa/docker-socket-proxy:0.1.1 - environment: - CONTAINERS: 1 - IMAGES: 1 - AUTH: 1 - POST: 1 - privileged: true - volumes: - - /var/run/docker.sock:/var/run/docker.sock:ro - restart: always - - airflow-worker-mgmt: - <<: *airflow-common - container_name: airflow-worker-mgmt-1 - hostname: ${HOSTNAME:-mgmt001} - # The Mgmt worker listens on the generic mgmt queue AND its own dedicated queue. - command: airflow celery worker -q queue-mgmt,queue-mgmt-${HOSTNAME:-mgmt001} - deploy: - resources: - limits: - memory: ${AIRFLOW_WORKER_MGMT_MEM_LIMIT:-2G} - reservations: - memory: ${AIRFLOW_WORKER_MGMT_MEM_RESERV:-512M} - healthcheck: - test: - - "CMD-SHELL" - - 'celery --app airflow.providers.celery.executors.celery_executor.app inspect ping -d "worker-mgmt@$$(hostname)"' - interval: 30s - timeout: 30s - retries: 5 - start_period: 30s - environment: - <<: *airflow-common-env - HOSTNAME: ${HOSTNAME:-mgmt001} - DUMB_INIT_SETSID: "0" - AIRFLOW__CELERY__WORKER_QUEUES: "queue-mgmt,queue-mgmt-${HOSTNAME:-mgmt001}" - AIRFLOW__CELERY__WORKER_TAGS: "mgmt" - AIRFLOW__CELERY__WORKER_PREFETCH_MULTIPLIER: "1" - # Mgmt tasks are lightweight. - AIRFLOW__CELERY__WORKER_AUTOSCALE: "4,2" - AIRFLOW__CELERY__POOL: "prefork" - AIRFLOW__CELERY__TASK_ACKS_LATE: "False" - AIRFLOW__CELERY__OPERATION_TIMEOUT: "2.0" - AIRFLOW__CELERY__WORKER_NAME: "worker-mgmt@%h" - AIRFLOW__CELERY__WORKER_MAX_TASKS_PER_CHILD: "100" - AIRFLOW__CELERY__WORKER_MAX_MEMORY_PER_CHILD: "262144" # 256MB - networks: - - default - - proxynet - restart: always - - aria2-pro: - container_name: aria2-pro - build: - context: "{{ airflow_worker_dir }}/aria2-pro-docker" - environment: - - PUID=${AIRFLOW_UID:-50000} - - PGID=0 - - UMASK_SET=022 - - RPC_SECRET={{ vault_aria2_rpc_secret }} - - RPC_PORT=6800 - - LISTEN_PORT=6888 - - DISK_CACHE=64M - - IPV6_MODE=false - - UPDATE_TRACKERS=false - - CUSTOM_TRACKER_URL= - - TZ=Asia/Shanghai - volumes: - - ${AIRFLOW_PROJ_DIR:-.}/aria2-config:/config - - ${AIRFLOW_PROJ_DIR:-.}/downloadfiles/videos/in-progress:/downloads - ports: - - "127.0.0.1:6800:6800" - - "6888:6888" - - "6888:6888/udp" - networks: - - proxynet - restart: unless-stopped - logging: - driver: json-file - options: - max-size: 1m - -networks: - proxynet: - name: airflow_proxynet - external: true diff --git a/airflow/configs/docker-compose-dl.yaml.v1.j2 b/airflow/configs/docker-compose-dl.yaml.v1.j2 deleted file mode 100644 index b073039..0000000 --- a/airflow/configs/docker-compose-dl.yaml.v1.j2 +++ /dev/null @@ -1,151 +0,0 @@ -# Airflow remote DL worker configuration. -# This file should be used on a remote machine to run a download worker. -# It requires a master Airflow instance running with services exposed. -# -# Before running, create a .env file in this directory with: -# MASTER_HOST_IP=... a.b.c.d ... # IP address of the machine running docker-compose-master.yaml -# POSTGRES_PASSWORD=... # The password for the PostgreSQL database from the master compose file -# REDIS_PASSWORD=... # The password for Redis from the master compose file -# AIRFLOW_UID=... # User ID for file permissions, should match master ---- -x-airflow-common: - &airflow-common - # This should point to the same image used by the master. - # If you built a custom image for master, you need to push it to a registry - # and reference it here. - image: ${AIRFLOW_IMAGE_NAME:-pangramia/ytdlp-ops-airflow:latest} - # Add extra hosts here to allow workers to resolve other hosts by name. - # This section is auto-generated by Ansible from the inventory. - extra_hosts: -{% for host in groups['all'] %} - - "{{ hostvars[host]['inventory_hostname'] }}:{{ hostvars[host]['ansible_host'] | default(hostvars[host]['inventory_hostname']) }}" -{% endfor %} - env_file: - # The .env file is located in the project root (e.g., /srv/airflow_dl_worker), - # so we provide an absolute path to it. - - "{{ airflow_worker_dir }}/.env" - environment: - &airflow-common-env - - AIRFLOW__CORE__PARALLELISM: 128 - AIRFLOW__CORE__MAX_ACTIVE_TASKS_PER_DAG: 64 - AIRFLOW__SCHEDULER__PARSING_PROCESSES: 8 - AIRFLOW__WEBSERVER__WORKERS: 5 - AIRFLOW__WEBSERVER__WORKER_CLASS: "gevent" - - AIRFLOW__LOGGING__SECRET_MASK_EXCEPTION_ARGS: False - - - # Prevent slow webserver when low memory? - GUNICORN_CMD_ARGS: --max-requests 20 --max-requests-jitter 3 --worker-tmp-dir /dev/shm - - - # Airflow Core - AIRFLOW__CORE__EXECUTOR: CeleryExecutor - AIRFLOW__CORE__LOAD_EXAMPLES: 'false' - AIRFLOW__CORE__FERNET_KEY: '' # Should be same as master, but worker does not need it. - - # Backend connections - These should point to the master node - # Set MASTER_HOST_IP, POSTGRES_PASSWORD, and REDIS_PASSWORD in your .env file - AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:${{ '{' }}POSTGRES_PASSWORD{{ '}' }}@${{ '{' }}MASTER_HOST_IP{{ '}' }}:{{ postgres_port }}/airflow - AIRFLOW__CELERY__RESULT_BACKEND: db+postgresql+psycopg2://airflow:${{ '{' }}POSTGRES_PASSWORD{{ '}' }}@${{ '{' }}MASTER_HOST_IP{{ '}' }}:{{ postgres_port }}/airflow - AIRFLOW__CELERY__BROKER_URL: redis://:${REDIS_PASSWORD}@${MASTER_HOST_IP}:{{ redis_port }}/0 - - # Remote Logging - connection is configured directly via environment variables - #_PIP_ADDITIONAL_REQUIREMENTS: ${{ '{' }}_PIP_ADDITIONAL_REQUIREMENTS:- apache-airflow-providers-docker apache-airflow-providers-http thrift>=0.16.0,<=0.20.0 backoff>=2.2.1 python-dotenv==1.0.1 psutil>=5.9.0 apache-airflow-providers-amazon{{ '}' }} - AIRFLOW__LOGGING__REMOTE_LOGGING: "True" - AIRFLOW__LOGGING__REMOTE_BASE_LOG_FOLDER: "s3://airflow-logs" - AIRFLOW__LOGGING__REMOTE_LOG_CONN_ID: minio_default - AIRFLOW__LOGGING__ENCRYPT_S3_LOGS: "False" - #AIRFLOW__LOGGING__LOG_ID_TEMPLATE: "{dag_id}-{task_id}-{run_id}-{try_number}" - AIRFLOW__WEBSERVER__SECRET_KEY: 'qmALu5JCAW0518WGAqkVZQ==' - AIRFLOW__CORE__INTERNAL_API_SECRET_KEY: 'qmALu5JCAW0518WGAqkVZQ==' - AIRFLOW__CORE__LOCAL_SETTINGS_PATH: "/opt/airflow/config/custom_task_hooks.py" - - volumes: - # Mount dags to get any utility scripts, but the worker will pull the DAG from the DB - - ${AIRFLOW_PROJ_DIR:-.}/dags:/opt/airflow/dags - # Mount logs locally in case remote logging fails - - ${AIRFLOW_PROJ_DIR:-.}/logs:/opt/airflow/logs - # Mount config for local settings and other configurations - - ${AIRFLOW_PROJ_DIR:-.}/config:/opt/airflow/config - - ${AIRFLOW_PROJ_DIR:-.}/config/airflow.cfg:/opt/airflow/airflow.cfg - # Mount download directories - - ${AIRFLOW_PROJ_DIR:-.}/downloadfiles:/opt/airflow/downloadfiles - - ${AIRFLOW_PROJ_DIR:-.}/addfiles:/opt/airflow/addfiles - - ${AIRFLOW_PROJ_DIR:-.}/inputfiles:/opt/airflow/inputfiles - # Mount the generated pangramia package to ensure workers have the latest version - - ${AIRFLOW_PROJ_DIR:-.}/pangramia:/app/pangramia - # Use AIRFLOW_UID from .env file to fix permission issues. GID is set to 0 for compatibility with the Airflow image. - user: "${{ '{' }}AIRFLOW_UID:-50000{{ '}' }}:0" - -services: - airflow-worker: - <<: *airflow-common - container_name: airflow-dl-worker-1 - hostname: ${HOSTNAME:-dl001} - # The worker now listens on the generic queue AND its own dedicated queue. - # The hostname is dynamically inserted into the queue name. - command: airflow celery worker -q queue-dl,queue-dl-${HOSTNAME:-dl001} - deploy: - resources: - limits: - # Increased from 4G to 8G to support higher memory per child process. - memory: ${AIRFLOW_WORKER_DOWNLOAD_MEM_LIMIT:-8G} - reservations: - memory: ${AIRFLOW_WORKER_DOWNLOAD_MEM_RESERV:-2G} - healthcheck: - test: - - "CMD-SHELL" - - 'celery --app airflow.providers.celery.executors.celery_executor.app inspect ping -d "worker-dl@$$(hostname)"' - interval: 30s - timeout: 30s - retries: 5 - start_period: 30s - environment: - <<: *airflow-common-env - HOSTNAME: ${HOSTNAME:-dl001} # Explicitly set inside container - DUMB_INIT_SETSID: "0" - AIRFLOW__CELERY__WORKER_QUEUES: "queue-dl,queue-dl-${HOSTNAME:-dl001}" - AIRFLOW__CELERY__WORKER_TAGS: "dl" - AIRFLOW__CELERY__WORKER_PREFETCH_MULTIPLIER: "1" - # Use autoscaling to adjust number of workers based on load. - # Format is max_concurrency,min_concurrency. - AIRFLOW__CELERY__WORKER_AUTOSCALE: "16,8" - # Use prefork pool for better compatibility with blocking libraries. - AIRFLOW__CELERY__POOL: "prefork" - AIRFLOW__CELERY__TASK_ACKS_LATE: "False" - AIRFLOW__CELERY__OPERATION_TIMEOUT: "2.0" - AIRFLOW__CELERY__WORKER_NAME: "worker-dl@%h" - AIRFLOW__CELERY__WORKER_MAX_TASKS_PER_CHILD: "100" - # Increased from 256MB to 512MB for memory-intensive yt-dlp tasks. - # This value is in KB. 512 * 1024 = 524288. - AIRFLOW__CELERY__WORKER_MAX_MEMORY_PER_CHILD: "524288" # 512MB - # The hostname is now managed by Docker Compose to ensure uniqueness when scaling. - # It will be generated based on project, service, and replica number (e.g., airflow-airflow-dl-worker-1). - # hostname: "dl-worker-${HOSTNAME_SUFFIX:-$$(hostname)}" - ports: - - "8793:8793" - networks: - - default - - proxynet - restart: always - - docker-socket-proxy: - profiles: - - disabled - image: tecnativa/docker-socket-proxy:0.1.1 - environment: - CONTAINERS: 1 - IMAGES: 1 - AUTH: 1 - POST: 1 - privileged: true - volumes: - - /var/run/docker.sock:/var/run/docker.sock:ro - restart: always - -networks: - proxynet: - name: airflow_proxynet - external: true diff --git a/airflow/configs/docker-compose-master.yaml.j2 b/airflow/configs/docker-compose-master.yaml.j2 deleted file mode 100644 index 110d119..0000000 --- a/airflow/configs/docker-compose-master.yaml.j2 +++ /dev/null @@ -1,574 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -# Basic Airflow cluster configuration for CeleryExecutor with Redis and PostgreSQL. -# -# WARNING: This configuration is for local development. Do not use it in a production deployment. -# -# This configuration supports basic configuration using environment variables or an .env file -# The following variables are supported: -# -# AIRFLOW_IMAGE_NAME - Docker image name used to run Airflow. -# Default: apache/airflow:2.10.5 -# AIRFLOW_UID - User ID in Airflow containers -# Default: 50000 -# AIRFLOW_PROJ_DIR - Base path to which all the files will be volumed. -# Default: . -# Those configurations are useful mostly in case of standalone testing/running Airflow in test/try-out mode -# -# _AIRFLOW_WWW_USER_USERNAME - Username for the administrator account (if requested). -# Default: airflow -# _AIRFLOW_WWW_USER_PASSWORD - Password for the administrator account (if requested). -# Default: airflow -# _PIP_ADDITIONAL_REQUIREMENTS - Additional PIP requirements to add when starting all containers. -# Use this option ONLY for quick checks. Installing requirements at container -# startup is done EVERY TIME the service is started. -# A better way is to build a custom image or extend the official image -# as described in https://airflow.apache.org/docs/docker-stack/build.html. -# Default: '' -# -# Feel free to modify this file to suit your needs. ---- -name: airflow-master -x-airflow-common: - &airflow-common - # In order to add custom dependencies or upgrade provider packages you can use your extended image. - # This will build the image from the Dockerfile in this directory and tag it. - image: ${{ '{' }}AIRFLOW_IMAGE_NAME:-pangramia/ytdlp-ops-airflow:latest{{ '}' }} - build: . - # Add extra hosts here to allow the master services (webserver, scheduler) to resolve - # the hostnames of your remote DL workers. This is crucial for fetching logs. - # Format: - "hostname:ip_address" - # This section is auto-generated by Ansible from the inventory. - extra_hosts: -{% for host in groups['all'] %} - - "{{ hostvars[host]['inventory_hostname'] }}:{{ hostvars[host]['ansible_host'] | default(hostvars[host]['inventory_hostname']) }}" -{% endfor %} - env_file: - # The .env file is located in the project root, one level above the 'configs' directory. - - ".env" - networks: - - proxynet - environment: - &airflow-common-env - AIRFLOW__CORE__PARALLELISM: 128 - AIRFLOW__CORE__MAX_ACTIVE_TASKS_PER_DAG: 64 - AIRFLOW__SCHEDULER__PARSING_PROCESSES: 4 - AIRFLOW__WEBSERVER__WORKER_CLASS: sync - AIRFLOW__WEBSERVER__WORKERS: 8 - - AIRFLOW__LOGGING__SECRET_MASK_EXCEPTION_ARGS: 'false' - - - # Prevent slow webserver when low memory? - GUNICORN_CMD_ARGS: --worker-tmp-dir /dev/shm - - AIRFLOW__CORE__EXECUTOR: CeleryExecutor - # For master services, connect to Postgres and Redis using internal Docker service names. - # Passwords are sourced from the .env file. - AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:${{ '{' }}POSTGRES_PASSWORD{{ '}' }}@postgres:5432/airflow - AIRFLOW__CELERY__RESULT_BACKEND: db+postgresql+psycopg2://airflow:${{ '{' }}POSTGRES_PASSWORD{{ '}' }}@postgres:5432/airflow - - AIRFLOW__CELERY__BROKER_URL: redis://:${{ '{' }}REDIS_PASSWORD{{ '}' }}@redis:6379/0 - AIRFLOW__CORE__FERNET_KEY: '' - AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: 'true' - AIRFLOW__CORE__LOAD_EXAMPLES: 'false' - AIRFLOW__API__AUTH_BACKENDS: 'airflow.api.auth.backend.basic_auth,airflow.api.auth.backend.session' - AIRFLOW__WEBSERVER__SECRET_KEY: 'qmALu5JCAW0518WGAqkVZQ==' - AIRFLOW__WEBSERVER__WORKER_TIMEOUT: '120' - AIRFLOW__CORE__INTERNAL_API_SECRET_KEY: 'qmALu5JCAW0518WGAqkVZZQ==' - # yamllint disable rule:line-length - # Use simple http server on scheduler for health checks - # See https://airflow.apache.org/docs/apache-airflow/stable/administration-and-deployment/logging-monitoring/check-health.html#scheduler-health-check-server - # yamllint enable rule:line-length - AIRFLOW__SCHEDULER__ENABLE_HEALTH_CHECK: 'true' - AIRFLOW__DATABASE__LOAD_DEFAULT_CONNECTIONS: 'false' - AIRFLOW__LOGGING__REMOTE_LOGGING: 'true' - AIRFLOW__LOGGING__REMOTE_BASE_LOG_FOLDER: "s3://airflow-logs" - AIRFLOW__LOGGING__REMOTE_LOG_CONN_ID: minio_default - AIRROW__LOGGING__ENCRYPT_S3_LOGS: 'false' - AIRFLOW__CORE__LOCAL_SETTINGS_PATH: "/opt/airflow/config/custom_task_hooks.py" - volumes: - - ${{ '{' }}AIRFLOW_PROJ_DIR:-.{{ '}' }}/dags:/opt/airflow/dags - - ${{ '{' }}AIRFLOW_PROJ_DIR:-.{{ '}' }}/logs:/opt/airflow/logs - - ${{ '{' }}AIRFLOW_PROJ_DIR:-.{{ '}' }}/config:/opt/airflow/config - - ${{ '{' }}AIRFLOW_PROJ_DIR:-.{{ '}' }}/config/airflow.cfg:/opt/airflow/airflow.cfg - - ${{ '{' }}AIRFLOW_PROJ_DIR:-.{{ '}' }}/plugins:/opt/airflow/plugins - - ${{ '{' }}AIRFLOW_PROJ_DIR:-.{{ '}' }}/downloadfiles:/opt/airflow/downloadfiles - - ${{ '{' }}AIRFLOW_PROJ_DIR:-.{{ '}' }}/addfiles:/opt/airflow/addfiles - - ${{ '{' }}AIRFLOW_PROJ_DIR:-.{{ '}' }}/inputfiles:/opt/airflow/inputfiles - # Mount the generated pangramia package to ensure master services have the latest version - - ${{ '{' }}AIRFLOW_PROJ_DIR:-.{{ '}' }}/pangramia:/app/pangramia - user: "${{ '{' }}AIRFLOW_UID:-50000{{ '}' }}:0" - depends_on: - &airflow-common-depends-on - redis: - condition: service_healthy - postgres: - condition: service_healthy - minio-init: - condition: service_completed_successfully - -services: - postgres: - image: postgres:13 - env_file: - - .env - networks: - - proxynet - environment: - POSTGRES_USER: airflow - POSTGRES_PASSWORD: ${{ '{' }}POSTGRES_PASSWORD:-pgdb_pwd_A7bC2xY9zE1wV5uP{{ '}' }} - POSTGRES_DB: airflow - command: - - "postgres" - - "-c" - - "shared_buffers=512MB" - - "-c" - - "effective_cache_size=1536MB" - volumes: - - ./postgres-data:/var/lib/postgresql/data - ports: - - "${{ '{' }}POSTGRES_PORT:-5432{{ '}' }}:5432" - healthcheck: - test: ["CMD", "pg_isready", "-U", "airflow"] - interval: 10s - retries: 5 - start_period: 5s - restart: always - user: "999:999" - - redis: - # Redis is limited to 7.2-bookworm due to licencing change - # https://redis.io/blog/redis-adopts-dual-source-available-licensing/ - image: redis:7.2-bookworm - env_file: - - .env - networks: - - proxynet - command: - - "redis-server" - - "--requirepass" - - "${{ '{' }}REDIS_PASSWORD:-rOhTAIlTFFylXsjhqwxnYxDChFc{{ '}' }}" - - "--bind" - - "*" - - "--protected-mode" - - "no" - - "--save" - - "60" - - "1" - - "--loglevel" - - "warning" - - "--appendonly" - - "yes" - volumes: - - redis-data:/data - expose: - - 6379 - ports: - - "${{ '{' }}REDIS_PORT:-6379{{ '}' }}:6379" - healthcheck: - test: ["CMD", "redis-cli", "-a", "${{ '{' }}REDIS_PASSWORD:-rOhTAIlTFFylXsjhqwxnYxDChFc{{ '}' }}", "ping"] - interval: 10s - timeout: 30s - retries: 50 - start_period: 30s - restart: always - sysctls: - - net.core.somaxconn=1024 - ulimits: - memlock: -1 - - redis-proxy-account-clear: - image: redis:7.2-bookworm - container_name: redis-proxy-account-clear - env_file: - - .env - networks: - - proxynet - command: > - sh -c " - echo 'Clearing proxy and account statuses from Redis...'; - redis-cli -h redis -a $${{ '{' }}REDIS_PASSWORD:-rOhTAIlTFFylXsjhqwxnYxDChFc{{ '}' }} --scan --pattern 'proxy_status:*' | xargs -r redis-cli -h redis -a $${{ '{' }}REDIS_PASSWORD:-rOhTAIlTFFylXsjhqwxnYxDChFc{{ '}' }} DEL; - redis-cli -h redis -a $${{ '{' }}REDIS_PASSWORD:-rOhTAIlTFFylXsjhqwxnYxDChFc{{ '}' }} --scan --pattern 'account_status:*' | xargs -r redis-cli -h redis -a $${{ '{' }}REDIS_PASSWORD:-rOhTAIlTFFylXsjhqwxnYxDChFc{{ '}' }} DEL; - echo 'Redis cleanup complete.' - " - depends_on: - redis: - condition: service_healthy - - minio: - image: minio/minio:latest - container_name: minio - networks: - - proxynet - volumes: - - ./minio-data:/data - ports: - - "9001:9000" - - "9002:9001" - environment: - MINIO_ROOT_USER: ${{ '{' }}MINIO_ROOT_USER:-admin{{ '}' }} - MINIO_ROOT_PASSWORD: ${{ '{' }}MINIO_ROOT_PASSWORD:-0153093693-0009{{ '}' }} - command: server /data --console-address ":9001" - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] - interval: 30s - timeout: 20s - retries: 3 - restart: always - - nginx-minio-lb: - image: nginx:alpine - container_name: nginx-minio-lb - networks: - - proxynet - ports: - - "80:80" - - "81:81" - volumes: - - ./configs/nginx.conf:/etc/nginx/nginx.conf:ro - depends_on: - minio: - condition: service_healthy - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:80/minio/health/live"] - interval: 30s - timeout: 10s - retries: 5 - restart: always - - minio-init: - image: minio/mc - container_name: minio-init - networks: - - proxynet - depends_on: - nginx-minio-lb: - condition: service_healthy - entrypoint: > - /bin/sh -c " - set -e; - /usr/bin/mc alias set minio http://nginx-minio-lb:80 $$MINIO_ROOT_USER $$MINIO_ROOT_PASSWORD; - # Retry loop for bucket creation - MAX_ATTEMPTS=10 - SUCCESS=false - # Use a for loop for robustness, as it's generally more portable than `until`. - for i in $$(seq 1 $$MAX_ATTEMPTS); do - # Check if the bucket exists. If so, we're done. - if /usr/bin/mc ls minio/airflow-logs > /dev/null 2>&1; then - echo 'MinIO bucket already exists.' - SUCCESS=true - break - fi - # If not, try to create it. If successful, we're done. - # We redirect output because `mc mb` can error if another process creates it in the meantime. - if /usr/bin/mc mb minio/airflow-logs > /dev/null 2>&1; then - echo 'MinIO bucket created.' - SUCCESS=true - break - fi - # If we reach here, both checks failed. Wait and retry. - echo "Attempt $$i/$$MAX_ATTEMPTS: Waiting for MinIO bucket..." - sleep 2 - done - - # After the loop, check if we succeeded. - if [ "$$SUCCESS" = "false" ]; then - echo "Failed to create MinIO bucket after $$MAX_ATTEMPTS attempts." - exit 1 - fi - /usr/bin/mc anonymous set download minio/airflow-logs; - echo 'MinIO initialized: bucket airflow-logs created and policy set to download.'; - " - env_file: - - .env - environment: - MINIO_ROOT_USER: ${{ '{' }}MINIO_ROOT_USER:-admin{{ '}' }} - MINIO_ROOT_PASSWORD: ${{ '{' }}MINIO_ROOT_PASSWORD:-0153093693-0009{{ '}' }} - restart: on-failure - - caddy: - build: - context: . - dockerfile: Dockerfile.caddy - image: pangramia/ytdlp-ops-caddy:latest - container_name: caddy - networks: - - proxynet - ports: - - "8080:8080" - depends_on: - airflow-webserver: - condition: service_started - restart: always - - airflow-webserver: - <<: *airflow-common - command: webserver - expose: - - "8080" - environment: - <<: *airflow-common-env - healthcheck: - test: ["CMD", "curl", "--fail", "http://localhost:8080/health"] - interval: 30s - timeout: 30s - retries: 5 - start_period: 30s - restart: always - depends_on: - <<: *airflow-common-depends-on - airflow-init: - condition: service_completed_successfully - - airflow-scheduler: - <<: *airflow-common - command: scheduler - healthcheck: - test: ["CMD", "curl", "--fail", "http://localhost:8974/health"] - interval: 30s - timeout: 10s - retries: 5 - start_period: 30s - restart: always - depends_on: - <<: *airflow-common-depends-on - airflow-init: - condition: service_completed_successfully - - airflow-master-worker: - <<: *airflow-common - command: airflow celery worker -q main,default - healthcheck: - # yamllint disable rule:line-length - test: - - "CMD-SHELL" - - 'celery --app airflow.providers.celery.executors.celery_executor.app inspect ping -d "worker-master@$$(hostname)"' - interval: 30s - timeout: 10s - retries: 5 - start_period: 30s - environment: - <<: *airflow-common-env - # Required to handle warm shutdown of the celery workers properly - # See https://airflow.apache.org/docs/docker-stack/entrypoint.html#signal-propagation - DUMB_INIT_SETSID: 0 - AIRFLOW__CELERY__WORKER_QUEUES: "main,default" - AIRFLOW__CELERY__WORKER_TAGS: "master" - AIRFLOW__CELERY__WORKER_CONCURRENCY: "16" - AIRFLOW__CELERY__WORKER_PREFETCH_MULTIPLIER: "1" - AIRFLOW__CELERY__TASK_ACKS_LATE: "True" - AIRFLOW__CELERY__OPERATION_TIMEOUT: "2.0" - AIRFLOW__CELERY__WORKER_NAME: "worker-master@%h" - AIRFLOW__CELERY__WORKER_MAX_TASKS_PER_CHILD: "100" - # Max memory per child process before it's recycled. Helps prevent memory leaks. - # 256MB is sufficient for master worker tasks. DL workers use a higher limit. - AIRFLOW__CELERY__WORKER_MAX_MEMORY_PER_CHILD: "262144" # 256MB - - hostname: ${{ '{' }}HOSTNAME{{ '}' }} - restart: always - depends_on: - <<: *airflow-common-depends-on - airflow-init: - condition: service_completed_successfully - - airflow-triggerer: - <<: *airflow-common - command: triggerer - hostname: ${{ '{' }}HOSTNAME{{ '}' }} - environment: - <<: *airflow-common-env - PYTHONASYNCIODEBUG: "1" - healthcheck: - test: ["CMD-SHELL", 'airflow jobs check --job-type TriggererJob --hostname "$${{ '{' }}HOSTNAME{{ '}' }}"'] - interval: 30s - timeout: 10s - retries: 5 - start_period: 30s - restart: always - depends_on: - <<: *airflow-common-depends-on - airflow-init: - condition: service_completed_successfully - - airflow-regression-runner: - <<: *airflow-common - entrypoint: "" - container_name: airflow-regression-runner - command: ["tail", "-f", "/dev/null"] - hostname: ${{ '{' }}HOSTNAME{{ '}' }} - environment: - <<: *airflow-common-env - restart: always - depends_on: - <<: *airflow-common-depends-on - airflow-init: - condition: service_completed_successfully - - airflow-init: - <<: *airflow-common - depends_on: - <<: *airflow-common-depends-on - redis-proxy-account-clear: - condition: service_completed_successfully - entrypoint: /bin/bash - # yamllint disable rule:line-length - command: - - -c - - | - # This container runs as root and is responsible for initializing the environment. - # It sets permissions on mounted directories to ensure the 'airflow' user (running with AIRFLOW_UID) - # can write to them. This is crucial for logs, dags, and plugins. - echo "Creating scheduler & dag processor log directories..." - mkdir -p /opt/airflow/logs/scheduler /opt/airflow/logs/dag_processor_manager - echo "Initializing permissions for Airflow directories..." - chown -R "${{ '{' }}AIRFLOW_UID{{ '}' }}:0" /opt/airflow/dags /opt/airflow/logs /opt/airflow/plugins /opt/airflow/config /opt/airflow/downloadfiles /opt/airflow/addfiles /opt/airflow/inputfiles - echo "Setting group-writable and setgid permissions on logs directory..." - find /opt/airflow/logs -type d -exec chmod g+rws {} + - find /opt/airflow/logs -type f -exec chmod g+rw {} + - echo "Permissions set." - - # Install curl and setup MinIO connection - echo "Installing curl and setting up MinIO connection..." - apt-get update -yqq && apt-get install -yqq curl - echo "MinIO connection setup complete." - - if [[ -z "${{ '{' }}AIRFLOW_UID{{ '}' }}" ]]; then - echo - echo -e "\033[1;33mWARNING!!!: AIRFLOW_UID not set!\e[0m" - echo "If you are on Linux, you SHOULD follow the instructions below to set " - echo "AIRFLOW_UID environment variable, otherwise files will be owned by root." - echo "For other operating systems you can get rid of the warning with manually created .env file:" - echo " See: https://airflow.apache.org/docs/apache-airflow/stable/howto/docker-compose/index.html#setting-the-right-airflow-user" - echo - fi - # This container's job is to initialize the database, create a user, and import connections. - # Wait for db to be ready. - airflow db check --retry 30 --retry-delay 5 - - # Initialize the database if needed - echo "Initializing Airflow database..." - airflow db init - echo "Database initialization complete." - - # Run database migrations. - echo "Running database migrations..." - airflow db upgrade - echo "Database migrations complete." - - # Create the admin user if it doesn't exist. - # The '|| true' prevents the script from failing if the user already exists. - echo "Checking for and creating admin user..." - airflow users create \ - --username "admin" \ - --password "${{ '{' }}AIRFLOW_ADMIN_PASSWORD:-admin_pwd_X9yZ3aB1cE5dF7gH{{ '}' }}" \ - --firstname Admin \ - --lastname User \ - --role Admin \ - --email admin@example.com || true - echo "Admin user check/creation complete." - - # Create/update the redis_default connection to ensure password is correct - echo "Creating/updating redis_default connection..." - airflow connections add 'redis_default' \ - --conn-uri "redis://:${{ '{' }}REDIS_PASSWORD{{ '}' }}@redis:6379/0" \ - || echo "Failed to add redis_default connection, but continuing." - echo "Redis connection setup complete." - - # Import connections from any .json file in the config directory. - echo "Searching for connection files in /opt/airflow/config..." - if [ -d "/opt/airflow/config" ] && [ -n "$(ls -A /opt/airflow/config/*.json 2>/dev/null)" ]; then - for conn_file in /opt/airflow/config/*.json; do - if [ -f "$$conn_file" ]; then - # Exclude files that are not meant to be Airflow connections. - if [ "$(basename "$$conn_file")" = "camoufox_endpoints.json" ]; then - echo "Skipping '$$conn_file' as it is not an Airflow connection file." - continue - fi - echo "Importing connections from $$conn_file" - airflow connections import "$$conn_file" || echo "Failed to import $$conn_file, but continuing." - fi - done - else - echo "No connection files found to import, or /opt/airflow/config is empty/missing." - fi - echo "Connection import process complete." - # yamllint enable rule:line-length - environment: - <<: *airflow-common-env - _AIRFLOW_DB_MIGRATE: 'true' - _AIRFLOW_WWW_USER_CREATE: 'false' # Set to false as we handle it manually - user: "0:0" - - airflow-cli: - <<: *airflow-common - profiles: - - debug - environment: - <<: *airflow-common-env - CONNECTION_CHECK_MAX_COUNT: "0" - # Workaround for entrypoint issue. See: https://github.com/apache/airflow/issues/16252 - command: - - bash - - -c - - airflow - - flower: - <<: *airflow-common - command: celery flower - ports: - - "5555:5555" - environment: - <<: *airflow-common-env - FLOWER_BASIC_AUTH: "flower:${{ '{' }}FLOWER_PASSWORD{{ '}' }}" - healthcheck: - test: ["CMD", "curl", "--fail", "http://localhost:5555/"] - interval: 30s - timeout: 10s - retries: 5 - start_period: 30s - restart: always - depends_on: - <<: *airflow-common-depends-on - airflow-init: - condition: service_completed_successfully - - docker-socket-proxy: - profiles: - - disabled - image: tecnativa/docker-socket-proxy:0.1.1 - networks: - - proxynet - environment: - CONTAINERS: 1 - IMAGES: 1 - AUTH: 1 - POST: 1 - privileged: true - volumes: - - /var/run/docker.sock:/var/run/docker.sock:ro - restart: always - -volumes: - redis-data: - -networks: - proxynet: - name: airflow_proxynet - external: true diff --git a/airflow/configs/docker-compose-ytdlp-ops.yaml.j2 b/airflow/configs/docker-compose-ytdlp-ops.yaml.j2 deleted file mode 100644 index 2032582..0000000 --- a/airflow/configs/docker-compose-ytdlp-ops.yaml.j2 +++ /dev/null @@ -1,142 +0,0 @@ -name: ytdlp-ops -services: - bgutil-provider: - image: brainicism/bgutil-ytdlp-pot-provider - container_name: bgutil-provider - init: true - ports: - - "4416:4416" - restart: unless-stopped - networks: - - proxynet - - context-prepper: - image: busybox:latest - restart: "no" - volumes: - - ./context:/app/context - networks: - - proxynet - command: - - "/bin/sh" - - "-c" - - | - set -e - CONTEXT_BASE_DIR="/app/context" - TIMESTAMP_DIR="$${CONTEXT_BASE_DIR}/context-data_$$(date +%Y%m%d_%H%M%S)" - mkdir -p "$${TIMESTAMP_DIR}" - ln -sfn "$${TIMESTAMP_DIR}" "$${CONTEXT_BASE_DIR}/context-data" - echo "Context prepper finished. Data will be in: $${TIMESTAMP_DIR}" - - envoy: - image: envoyproxy/envoy:v1.29-latest - {% if service_role != 'management' %} - container_name: envoy-thrift-lb-${HOSTNAME} - {% else %} - container_name: envoy-thrift-lb - {% endif %} - restart: unless-stopped - volumes: - # Mount the generated config file from the host - - ./configs/envoy.yaml:/etc/envoy/envoy.yaml:ro - ports: - # This is the single public port for all Thrift traffic - - "${ENVOY_PORT:-9080}:${ENVOY_PORT:-9080}" - # Expose the admin port for debugging - - "${ENVOY_ADMIN_PORT:-9901}:${ENVOY_ADMIN_PORT:-9901}" - networks: - - proxynet - # This service depends on ytdlp-ops-service, which in turn waits for camoufox. - depends_on: - - ytdlp-ops-service - - ytdlp-ops-service: - image: pangramia/ytdlp-ops-server:latest # Don't comment out or remove, build is performed externally - # container_name is omitted; Docker will use the service name for DNS. - # This service depends on the camoufox-group service, which ensures all camoufox - # instances are started before this service. - depends_on: - context-prepper: - condition: service_completed_successfully - # Ports are no longer exposed directly. Envoy will connect to them on the internal network. - # entrypoint: - # - /bin/sh - # - -c - # - | - # set -e - # echo "[$(date)] Updating yt-dlp to latest nightly master..." - # python3 -m pip install -U --pre "yt-dlp[default]" --upgrade-strategy eager --force-reinstall --no-cache-dir - # echo "[$(date)] yt-dlp updated to:" - # yt-dlp --version - # echo "[$(date)] Starting original entrypoint..." - # exec /usr/local/bin/docker-entrypoint.sh "$$@" - env_file: - - ./.env # Path is relative to the project directory - volumes: - - ./context:/app/context - - ./logs/yt-dlp-ops/communication_logs:/app/logs/yt-dlp-ops/communication_logs -{% if service_role != 'management' %} - # Mount the generated endpoints file to make it available to the server - - ./configs/camoufox_endpoints.json:/app/config/camoufox_endpoints.json:ro -{% endif %} - # Mount the plugin source code for live updates without rebuilding the image. - # Assumes the plugin source is in a 'bgutil-ytdlp-pot-provider' directory - # next to your docker-compose.yaml file. - #- ./bgutil-ytdlp-pot-provider:/app/bgutil-ytdlp-pot-provider - networks: - - proxynet - command: - # --- Parameters for ALL service roles --- - - "--port" - - "${YTDLP_BASE_PORT:-9090}" - - "--timeout" - - "${YTDLP_TIMEOUT:-600}" - - "--workers" - - "${YTDLP_WORKERS:-3}" - - "--verbose" - - "--server-identity" - - "${SERVER_IDENTITY:-ytdlp-ops-airflow-service}" - - "--redis-host" - - "${MASTER_HOST_IP:-redis}" - - "--redis-port" - - "${REDIS_PORT:-52909}" - - "--redis-password" - - "${REDIS_PASSWORD}" - - "--service-role" - - "{{ service_role }}" - - # --- S3 Logging Parameters --- - #- "--s3-endpoint-url" - #- "${S3_ENDPOINT_URL}" - #- "--s3-access-key-id" - #- "${S3_ACCESS_KEY_ID}" - #- "--s3-secret-access-key" - #- "${S3_SECRET_ACCESS_KEY}" - #- "--s3-region-name" - #- "${S3_REGION_NAME}" -{% if service_role is defined and service_role != 'management' %} - # --- Parameters for worker/all-in-one roles ONLY --- - - "--script-dir" - - "/app" - - "--context-dir" - - "/app/context/context-data" - - "--clean-context-dir" - - "--clients" - - "${YT_CLIENTS:-web,mweb,ios,android}" - - "--proxies" - - "${CAMOUFOX_PROXIES}" - - "--camoufox-endpoints-file" - - "/app/config/camoufox_endpoints.json" - - "--stop-if-no-proxy" - - "--comms-log-root-dir" - - "/app/logs/yt-dlp-ops/communication_logs" - #- "--visitor-rotation-threshold" - #- "250" -{% endif %} - restart: unless-stopped - pull_policy: always - -networks: - proxynet: - name: airflow_proxynet - external: true diff --git a/airflow/configs/docker-compose.camoufox.yaml.j2 b/airflow/configs/docker-compose.camoufox.yaml.j2 deleted file mode 100644 index cd92c27..0000000 --- a/airflow/configs/docker-compose.camoufox.yaml.j2 +++ /dev/null @@ -1,69 +0,0 @@ -# THIS FILE IS AUTO-GENERATED BY generate_envoy_config.py -# DO NOT EDIT MANUALLY. -# -# It contains the service definitions for the camoufox instances -# and adds the necessary dependencies to the main services. -services: -{% for proxy in camoufox_proxies %} -{% set proxy_port = _get_port_from_proxy_url(proxy.url) | int %} -{% set container_base_port = camoufox_port + loop.index0 * worker_count %} -{% set host_base_port = container_base_port %} - camoufox-{{ proxy_port }}-{{ loop.index }}: - build: - context: ../camoufox - dockerfile: Dockerfile - args: - VNC_PASSWORD: "{{ vnc_password }}" - image: camoufox:latest - container_name: ytdlp-ops-camoufox-{{ proxy_port }}-{{ loop.index }}-1 - restart: unless-stopped - shm_size: '2gb' # Mitigates browser crashes due to shared memory limitations - ports: - - "{{ host_base_port }}-{{ host_base_port + worker_count - 1 }}:{{ container_base_port }}-{{ container_base_port + worker_count - 1 }}" - environment: - - DISPLAY=:99 - - MOZ_HEADLESS_STACKSIZE=2097152 - - CAMOUFOX_MAX_MEMORY_MB=2048 - - CAMOUFOX_MAX_CONCURRENT_CONTEXTS=8 - - CAMOUFOX_RESTART_THRESHOLD_MB=1500 - volumes: - - /tmp/.X11-unix:/tmp/.X11-unix:rw - - camoufox-data-{{ proxy_port }}-{{ loop.index }}:/app/context-data - - camoufox-browser-cache:/root/.cache/ms-playwright # Persist browser binaries - command: [ - "--ws-host", "0.0.0.0", - "--port", "{{ container_base_port }}", - "--num-instances", "{{ worker_count }}", - "--ws-path", "mypath", - "--proxy-url", "{{ proxy.url }}", - "--headless", - "--monitor-resources", - "--memory-restart-threshold", "1800", - "--preferences", "layers.acceleration.disabled=true,dom.ipc.processCount=2,media.memory_cache_max_size=102400,browser.cache.memory.capacity=102400" - ] - deploy: - resources: - limits: - memory: 2.5G - logging: - driver: "json-file" - options: - max-size: "100m" - max-file: "3" - networks: - - proxynet -{% endfor %} - -volumes: -{% for proxy in camoufox_proxies %} -{% set proxy_port = _get_port_from_proxy_url(proxy.url) | int %} - camoufox-data-{{ proxy_port }}-{{ loop.index }}: -{% endfor %} -{% if camoufox_proxies %} - camoufox-browser-cache: -{% endif %} - -networks: - proxynet: - name: airflow_proxynet - external: true diff --git a/airflow/configs/docker-compose.config-generate.yaml b/airflow/configs/docker-compose.config-generate.yaml deleted file mode 100644 index 46a3b85..0000000 --- a/airflow/configs/docker-compose.config-generate.yaml +++ /dev/null @@ -1,13 +0,0 @@ -version: '3.8' - -services: - config-generator: - image: python:3.12-slim - working_dir: /app - env_file: - - ./.env - volumes: - # Mount the entire project directory to access scripts and write output files - - .:/app - command: > - sh -c "pip install jinja2 && python3 /app/generate_envoy_config.py" diff --git a/airflow/configs/envoy.yaml.j2 b/airflow/configs/envoy.yaml.j2 deleted file mode 100644 index 054cc80..0000000 --- a/airflow/configs/envoy.yaml.j2 +++ /dev/null @@ -1,54 +0,0 @@ -# Jinja2 template for Envoy configuration -admin: - address: - socket_address: - address: 0.0.0.0 - port_value: {{ envoy_admin_port }} - -static_resources: - listeners: - # Listener for ytdlp-ops Thrift traffic - - name: ytdlp_ops_listener - address: - socket_address: - address: 0.0.0.0 - port_value: {{ envoy_port }} - filter_chains: - - filters: - - name: envoy.filters.network.thrift_proxy - typed_config: - "@type": type.googleapis.com/envoy.extensions.filters.network.thrift_proxy.v3.ThriftProxy - stat_prefix: thrift_ingress - transport: FRAMED - protocol: BINARY - route_config: - name: local_route - routes: - - match: - method_name: "" - route: - cluster: ytdlp_ops_cluster - - clusters: - # Cluster for the ytdlp-ops workers - - name: ytdlp_ops_cluster - connect_timeout: 5s - type: {{ envoy_cluster_type }} - lb_policy: ROUND_ROBIN - health_checks: - - timeout: 1s - interval: 5s - unhealthy_threshold: 3 - healthy_threshold: 2 - tcp_health_check: {} - load_assignment: - cluster_name: ytdlp_ops_cluster - endpoints: - - lb_endpoints: - {% for i in range(worker_count) %} - - endpoint: - address: - socket_address: - address: {{ backend_address }} - port_value: {{ base_port + i }} - {% endfor %} diff --git a/airflow/configs/nginx.conf b/airflow/configs/nginx.conf deleted file mode 100644 index 9772baa..0000000 --- a/airflow/configs/nginx.conf +++ /dev/null @@ -1,35 +0,0 @@ -events { - worker_connections 1024; -} - -http { - upstream minio_servers { - server 172.17.0.1:9001; - } - - upstream minio_console_servers { - server 172.17.0.1:9002; - } - - server { - listen 80; - location / { - proxy_pass http://minio_servers; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - } - } - - server { - listen 81; - location / { - proxy_pass http://minio_console_servers; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - } - } -} diff --git a/airflow/dags/.DS_Store b/airflow/dags/.DS_Store deleted file mode 100644 index 5008ddf..0000000 Binary files a/airflow/dags/.DS_Store and /dev/null differ diff --git a/airflow/dags/QUEUE.md b/airflow/dags/QUEUE.md deleted file mode 100644 index d34b42a..0000000 --- a/airflow/dags/QUEUE.md +++ /dev/null @@ -1,76 +0,0 @@ -V2 System: Separated Auth & Download Flow - -The v2 system splits the process into two distinct stages, each with its own set of queues. The base names for these queues are queue2_auth and queue2_dl. - -───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── -1. Authentication Stage (ytdlp_ops_v02_worker_per_url_auth) - -This stage is responsible for taking a raw YouTube URL, authenticating with the yt-ops-server to get an info.json, and creating granular download tasks. - - • Getting Data (Input): - • Queue: queue2_auth_inbox - • Redis Type: LIST - • Purpose: This is the main entry point for the entire v2 system. Raw YouTube URLs or video IDs are pushed here. The ytdlp_ops_v02_dispatcher_auth DAG pulls URLs from this list to start the process. - • Reporting Results: - • Success: - • Queue: queue2_auth_result (Redis HASH) - A success record for the authentication step is stored here. - • Queue: queue_dl_format_tasks (Redis LIST) - This is the critical handoff queue. Upon successful authentication, the auth worker resolves the desired formats (e.g., bestvideo+bestaudio) into specific format IDs (e.g., 299, 140) and pushes one JSON job payload for each format into this list. This queue - feeds the download stage. - • Failure: - • Queue: queue2_auth_fail (Redis HASH) - If the authentication fails due to a system error (like bot detection or a proxy failure), the error details are stored here. - • Skipped: - • Queue: queue2_auth_skipped (Redis HASH) - If the video is unavailable for a non-system reason (e.g., it's private, deleted, or geo-restricted), the URL is logged here. This is not considered a system failure. - • Tracking Tasks: - • Queue: queue2_auth_progress - • Redis Type: HASH - • Purpose: When an auth worker picks up a URL, it adds an entry to this hash to show that the URL is actively being processed. The entry is removed upon completion (success, failure, or skip). - -───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── -2. Download Stage (ytdlp_ops_v02_worker_per_url_dl) - -This stage is responsible for executing the download and probing of a single media format, based on the job created by the auth worker. - - • Getting Data (Input): - • Queue: queue_dl_format_tasks - • Redis Type: LIST - • Purpose: The ytdlp_ops_v02_worker_per_url_dl DAG pulls granular job payloads from this list. Each payload contains everything needed to download a single format (the path to the info.json, the format ID, etc.). - • Reporting Results: - • Success: - • Queue: queue2_dl_result (Redis HASH) - A success record for the download of a specific format is stored here. - • Failure: - • Queue: queue2_dl_fail (Redis HASH) - If the download or probe fails, the error is logged here. As seen in ytdlp_mgmt_queues.py, these failed items can be requeued, which sends them back to queue2_auth_inbox to start the process over. - • Skipped: - • Queue: queue2_dl_skipped (Redis HASH) - Used for unrecoverable download errors (e.g., HTTP 403 Forbidden), similar to the auth stage. - • Tracking Tasks: - • Queue: queue2_dl_progress - • Redis Type: HASH - • Purpose: Tracks download tasks that are actively in progress. - -Summary Table (V2) - - - Queue Name Pattern Redis Type Purpose - ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - queue2_auth_inbox LIST Input for Auth: Holds raw YouTube URLs to be authenticated. - queue2_auth_progress HASH Tracks URLs currently being authenticated. - queue2_auth_result HASH Stores successful authentication results. - queue2_auth_fail HASH Stores failed authentication attempts. - queue2_auth_skipped HASH Stores URLs skipped due to content issues (private, deleted, etc.). - queue_dl_format_tasks LIST Input for Download: Holds granular download jobs (one per format) created by the auth worker. - queue2_dl_progress HASH Tracks download jobs currently in progress. - queue2_dl_result HASH Stores successful download results. - queue2_dl_fail HASH Stores failed download attempts. - queue2_dl_skipped HASH Stores downloads skipped due to unrecoverable errors (e.g., 403 Forbidden). - - -───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── -V1 System (Monolithic) for Contrast - -For completeness, the older v1 system (ytdlp_ops_v01_worker_per_url) uses a simpler, monolithic set of queues, typically with the base name video_queue. - - • Input: video_queue_inbox (Redis LIST) - • Results: video_queue_result, video_queue_fail, video_queue_skipped (all Redis HASHes) - • In-Progress: video_queue_progress (Redis HASH) - -In this model, there is no handoff between stages; a single worker handles both authentication and download for all requested formats of a URL. - diff --git a/airflow/dags/README.ru.md b/airflow/dags/README.ru.md deleted file mode 100644 index 5978c1b..0000000 --- a/airflow/dags/README.ru.md +++ /dev/null @@ -1,163 +0,0 @@ -# Архитектура и описание YTDLP Airflow DAGs - -Этот документ описывает архитектуру и назначение DAG'ов, используемых для скачивания видео с YouTube. Система построена на модели непрерывного, самоподдерживающегося цикла для параллельной и отказоустойчивой обработки. - -## Основной цикл обработки - -Обработка выполняется двумя основными DAG'ами, которые работают в паре: оркестратор и воркер. - -### `ytdlp_ops_orchestrator` (Система "зажигания") - -- **Назначение:** Этот DAG действует как "система зажигания" для запуска обработки. Он запускается вручную для старта указанного количества параллельных циклов-воркеров. -- **Принцип работы:** - - Он **не** обрабатывает URL-адреса самостоятельно. - - Его единственная задача — запустить сконфигурированное количество DAG'ов `ytdlp_ops_worker_per_url`. - - Он передает всю необходимую конфигурацию (пул аккаунтов, подключение к Redis и т.д.) воркерам. - -### `ytdlp_ops_worker_per_url` (Самоподдерживающийся воркер) - -- **Назначение:** Этот DAG обрабатывает один URL и спроектирован для работы в непрерывном цикле. -- **Принцип работы:** - 1. **Запуск:** Начальный запуск инициируется `ytdlp_ops_orchestrator`. - 2. **Получение задачи:** Воркер извлекает один URL из очереди `_inbox` в Redis. Если очередь пуста, выполнение воркера завершается, и его "линия" обработки останавливается. - 3. **Обработка:** Он взаимодействует с сервисом `ytdlp-ops-server` для получения `info.json` и прокси, после чего скачивает видео. - 4. **Продолжение или остановка:** - - **В случае успеха:** Он запускает новый экземпляр самого себя, создавая непрерывный цикл для обработки следующего URL. - - **В случае сбоя:** Цикл прерывается (если `stop_on_failure` установлено в `True`), останавливая эту "линию" обработки. Это предотвращает остановку всей системы из-за одного проблемного URL или аккаунта. - -## Управляющие DAG'и - -### `ytdlp_mgmt_proxy_account` - -- **Назначение:** Это основной инструмент для мониторинга и управления состоянием ресурсов, используемых `ytdlp-ops-server`. -- **Функциональность:** - - **Просмотр статусов:** Позволяет увидеть текущий статус всех прокси и аккаунтов (например, `ACTIVE`, `BANNED`, `RESTING`). - - **Управление прокси:** Позволяет вручную банить, разбанивать или сбрасывать статус прокси. - - **Управление аккаунтами:** Позволяет вручную банить или разбанивать аккаунты. - -### `ytdlp_mgmt_queues` - -- **Назначение:** Предоставляет набор инструментов для управления очередями Redis, используемыми в конвейере обработки. -- **Функциональность (через параметр `action`):** - - `add_videos`: Добавление одного или нескольких URL-адресов YouTube в очередь. - - `clear_queue`: Очистка (удаление) указанного ключа Redis. - - `list_contents`: Просмотр содержимого ключа Redis (списка или хэша). - - `check_status`: Проверка общего состояния очередей (тип, размер). - - `requeue_failed`: Перемещение всех URL-адресов из очереди сбоев `_fail` обратно в очередь `_inbox` для повторной обработки. - -## Стратегия управления ресурсами (Прокси и Аккаунты) - -Система использует интеллектуальную стратегию для управления жизненным циклом и состоянием аккаунтов и прокси, чтобы максимизировать процент успеха и минимизировать блокировки. - -- **Жизненный цикл аккаунта ("Cooldown"):** - - Чтобы предотвратить "выгорание", аккаунты автоматически переходят в состояние "отдыха" (`RESTING`) после периода интенсивного использования. - - По истечении периода отдыха они автоматически возвращаются в `ACTIVE` и снова становятся доступными для воркеров. - -- **Умная стратегия банов:** - - **Сначала бан аккаунта:** При возникновении серьезной ошибки (например, `BOT_DETECTED`) система наказывает **только аккаунт**, который вызвал сбой. Прокси при этом продолжает работать. - - **Бан прокси по "скользящему окну":** Прокси банится автоматически, только если он демонстрирует **систематические сбои с РАЗНЫМИ аккаунтами** за короткий промежуток времени. Это является надежным индикатором того, что проблема именно в прокси. - -- **Мониторинг:** - - DAG `ytdlp_mgmt_proxy_account` является основным инструментом для мониторинга. Он показывает текущий статус всех ресурсов, включая время, оставшееся до активации забаненных или отдыхающих аккаунтов. - - Граф выполнения DAG `ytdlp_ops_worker_per_url` теперь явно показывает шаги, такие как `assign_account`, `get_token`, `ban_account`, `retry_get_token`, что делает процесс отладки более наглядным. - -## Внешние сервисы - -### `ytdlp-ops-server` (Thrift Service) - -- **Назначение:** Внешний сервис, который предоставляет аутентификационные данные (токены, cookies, proxy) для скачивания видео. -- **Взаимодействие:** Worker DAG (`ytdlp_ops_worker_per_url`) обращается к этому сервису перед началом загрузки для получения необходимых данных для `yt-dlp`. - -## Логика работы Worker DAG (`ytdlp_ops_worker_per_url`) - -Этот DAG является "рабочей лошадкой" системы. Он спроектирован как самоподдерживающийся цикл для обработки одного URL за запуск. - -### Задачи и их назначение: - -- **`pull_url_from_redis`**: Извлекает один URL из очереди `_inbox` в Redis. Если очередь пуста, DAG завершается со статусом `skipped`, останавливая эту "линию" обработки. -- **`assign_account`**: Выбирает аккаунт для выполнения задачи. Он будет повторно использовать тот же аккаунт, который был успешно использован в предыдущем запуске в своей "линии" (привязка аккаунта). Если это первый запуск, он выбирает случайный аккаунт. -- **`get_token`**: Основная задача. Она обращается к `ytdlp-ops-server` для получения `info.json`. -- **`handle_bannable_error_branch`**: Если `get_token` завершается с ошибкой, требующей бана, эта задача-развилка решает, что делать дальше, в зависимости от политики `on_bannable_failure`. -- **`ban_account_and_prepare_for_retry`**: Если политика разрешает повтор, эта задача банит сбойный аккаунт и выбирает новый для повторной попытки. -- **`retry_get_token`**: Выполняет вторую попытку получить токен с новым аккаунтом. -- **`ban_second_account_and_proxy`**: Если и вторая попытка неудачна, эта задача банит второй аккаунт и использованный прокси. -- **`download_and_probe`**: Если `get_token` (или `retry_get_token`) завершилась успешно, эта задача использует `yt-dlp` для скачивания медиа и `ffmpeg` для проверки целостности скачанного файла. -- **`mark_url_as_success`**: Если `download_and_probe` завершилась успешно, эта задача записывает результат в хэш `_result` в Redis. -- **`handle_generic_failure`**: Если любая из основных задач завершается с неисправимой ошибкой, эта задача записывает подробную информацию об ошибке в хэш `_fail` в Redis. -- **`decide_what_to_do_next`**: Задача-развилка, которая запускается после успеха или неудачи. Она решает, продолжать ли цикл. -- **`trigger_self_run`**: Задача, которая фактически запускает следующий экземпляр DAG, создавая непрерывный цикл. - -## Управление Воркерами (Пауза/Возобновление) - -Система предоставляет механизм для "охлаждения" или временной приостановки работы воркера. Это полезно для проведения технического обслуживания, безопасного выключения машины или уменьшения нагрузки на кластер без генерации ошибок. - -### Принцип работы - -Механизм основан на файле-блокировке (`lock file`), который создается на узле воркера с помощью Ansible. - -1. **Пауза:** Администратор запускает Ansible-плейбук, который создает пустой файл `AIRFLOW.PREVENT_URL_PULL.lock` в рабочей директории воркера (`/srv/airflow_dl_worker`). -2. **Проверка:** DAG `ytdlp_ops_dispatcher`, который отвечает за распределение URL-адресов, перед тем как взять новую задачу из Redis, проверяет наличие этого файла. -3. **Пропуск задачи:** Если файл существует, `dispatcher` логирует, что воркер на паузе, и завершает свою задачу со статусом `skipped`. Это предотвращает получение новых URL-адресов этим воркером, но не влияет на уже запущенные задачи. -4. **Возобновление:** Администратор запускает другой Ansible-плейбук, который переименовывает файл блокировки (добавляя временную метку), тем самым "разблокируя" воркер. При следующем запуске `dispatcher` не найдет файл и продолжит работу в обычном режиме. - -### Команды для управления - -Для управления состоянием воркера используются специальные Ansible-плейбуки. Команды следует выполнять из корневой директории проекта. - -**Поставить воркер на паузу:** -(Замените `"hostname"` на имя хоста из вашего inventory-файла) -```bash -ansible-playbook -i ansible/inventory.ini ansible/playbooks/pause_worker.yml --limit "hostname" -``` - -**Возобновить работу воркера:** -```bash -ansible-playbook -i ansible/inventory.ini ansible/playbooks/resume_worker.yml --limit "hostname" -``` - -## Механизм привязки воркеров к конкретным машинам (Worker Pinning / Affinity) - -Для обеспечения того, чтобы все задачи, связанные с обработкой одного конкретного URL, выполнялись на одной и той же машине (воркере), система использует комбинацию из трех компонентов: Оркестратора, Диспетчера и специального хука Airflow. - -### 1. `ytdlp_ops_orchestrator` (Оркестратор) - -- **Роль:** Инициирует процесс обработки. -- **Действие:** При запуске он создает несколько DAG-запусков `ytdlp_ops_dispatcher`. Каждый такой запуск предназначен для обработки одного URL. -- **Передача параметров:** Оркестратор передает свои параметры конфигурации (например, `account_pool`, `redis_conn_id`, `service_ip`) каждому запуску диспетчера. - -### 2. `ytdlp_ops_dispatcher` (Диспетчер) - -- **Роль:** Основной механизм обеспечения привязки. -- **Действие:** - 1. **Получает URL:** Извлекает один URL из очереди Redis (`_inbox`). - 2. **Определяет воркер:** Использует `socket.gethostname()` для определения имени текущей машины (воркера), на которой он выполняется. - 3. **Формирует имя очереди:** Создает уникальное имя очереди для этого воркера, например, `queue-dl-dl-worker-1`. - 4. **Запускает Worker DAG:** Инициирует запуск DAG `ytdlp_ops_worker_per_url`, передавая ему: - * Извлеченный `url_to_process`. - * Сформированное имя очереди `worker_queue` через параметр `conf`. - * Все остальные параметры, полученные от оркестратора. -- **Ключевой момент:** Именно на этом этапе устанавливается связь между конкретным URL и конкретным воркером, на котором началась обработка этого URL. - -### 3. `task_instance_mutation_hook` (Хук изменения задач) - -- **Расположение:** `airflow/config/custom_task_hooks.py` -- **Роль:** Является механизмом, который обеспечивает выполнение *всех* задач Worker DAG на нужной машине. -- **Как это работает:** - 1. **Регистрация:** Хук регистрируется в конфигурации Airflow и вызывается перед запуском *каждой* задачи. - 2. **Проверка DAG ID:** Хук проверяет, принадлежит ли задача (`TaskInstance`) DAG `ytdlp_ops_worker_per_url`. - 3. **Извлечение `conf`:** Если да, он безопасно извлекает `conf` из `DagRun`, связанного с этой задачей. - 4. **Изменение очереди:** - * Если в `conf` найден ключ `worker_queue` (что будет true для всех запусков, инициированных диспетчером), хук *переопределяет* стандартную очередь задачи на это значение. - * Это означает, что Airflow планировщик поставит эту задачу именно в ту очередь, которая прослушивается нужным воркером. - 5. **Резервный вариант:** Если `worker_queue` не найден (например, DAG запущен вручную), задача возвращается в стандартную очередь `queue-dl`. -- **Ключевой момент:** Этот хук гарантирует, что *все последующие задачи* в рамках одного запуска `ytdlp_ops_worker_per_url` (например, `get_token`, `download_and_probe`, `mark_url_as_success`) будут выполнены на том же воркере, который изначально получил URL в диспетчере. - -### Резюме - -Комбинация `Оркестратор -> Диспетчер -> Хук` эффективно реализует привязку задач к воркерам: - -1. **Оркестратор** запускает процесс. -2. **Диспетчер** связывает конкретный URL с конкретным воркером, определяя его имя хоста и передавая его как `worker_queue` в Worker DAG. -3. **Хук** гарантирует, что все задачи Worker DAG выполняются в очереди, соответствующей этому воркеру. - -Это позволяет системе использовать локальные ресурсы воркера (например, кэш, временные файлы) эффективно и предсказуемо для обработки каждого отдельного URL. diff --git a/airflow/dags/get_ip.py b/airflow/dags/get_ip.py deleted file mode 100644 index 123d644..0000000 --- a/airflow/dags/get_ip.py +++ /dev/null @@ -1,23 +0,0 @@ -import socket -import logging - -logger = logging.getLogger(__name__) - -def get_ip_address(): - """ - Get the primary IP address of the host. - This is used by Airflow workers to advertise their IP for log serving, - ensuring the webserver can reach them in a multi-host environment. - """ - s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) - try: - # This doesn't even have to be reachable - s.connect(('10.255.255.255', 1)) - ip_address = s.getsockname()[0] - logger.info(f"Determined host IP address as: {ip_address}") - except Exception as e: - logger.warning(f"Could not determine IP address, falling back to 127.0.0.1. Error: {e}") - ip_address = '127.0.0.1' - finally: - s.close() - return ip_address diff --git a/airflow/dags/proxy_health_check_dag.py b/airflow/dags/proxy_health_check_dag.py deleted file mode 100644 index 481e68a..0000000 --- a/airflow/dags/proxy_health_check_dag.py +++ /dev/null @@ -1,86 +0,0 @@ -import logging -import time -import requests -from datetime import datetime - -from airflow.decorators import task -from airflow.models.dag import DAG -from airflow.models.param import Param -from airflow.models.variable import Variable - -logger = logging.getLogger(__name__) - -# Get the master host IP from an Airflow variable, which is set via the .env file. -# This allows the default health check target to be dynamic based on cluster.yml. -DEFAULT_MASTER_IP = Variable.get("MASTER_HOST_IP", default_var="127.0.0.1") - -with DAG( - dag_id='proxy_health_check', - start_date=datetime(2023, 1, 1), - schedule=None, - catchup=False, - tags=['monitoring', 'proxy'], - doc_md=""" - ### Proxy Health Check DAG - - This DAG runs a continuous loop to check a target URL through a SOCKS5 proxy. - It is designed for monitoring proxy connectivity and performance. Once triggered, it will run forever - until the DAG run is manually stopped. - - **Parameters:** - - `target_url`: The URL to check. Defaults to the internal nginx service. - - `socks5_host`: The SOCKS5 proxy host. For Docker, `host.docker.internal` often works to target the host machine. - - `socks5_port`: The SOCKS5 proxy port. - - `check_interval_seconds`: How often to run the check. - - `latency_threshold_seconds`: A warning will be logged if the request takes longer than this. - - `timeout_seconds`: The timeout for the web request. - """, - params={ - 'target_url': Param(f'http://{DEFAULT_MASTER_IP}:8888', type='string', description="The URL to check. Defaults to the master node's nginx healthcheck service."), - 'socks5_host': Param('sslocal-rust-1087', type='string', description="SOCKS5 proxy host. Use 'host.docker.internal' for Docker host."), - 'socks5_port': Param(1087, type='integer', description="SOCKS5 proxy port."), - 'check_interval_seconds': Param(25, type='integer', description="Seconds to wait between checks."), - 'latency_threshold_seconds': Param(2, type='integer', description="Log a warning if latency exceeds this threshold."), - 'timeout_seconds': Param(10, type='integer', description="Request timeout in seconds."), - }, -) as dag: - - @task - def run_proxy_check_loop(**context): - """ - Continuously checks a URL through a SOCKS5 proxy and logs if latency is high. - This task will run indefinitely until the DAG run is manually stopped or fails. - """ - params = context['params'] - target_url = params['target_url'] - proxy_host = params['socks5_host'] - proxy_port = params['socks5_port'] - interval = params['check_interval_seconds'] - threshold = params['latency_threshold_seconds'] - timeout = params['timeout_seconds'] - - proxy_url = f"socks5h://{proxy_host}:{proxy_port}" - proxies = { - 'http': proxy_url, - 'https': proxy_url, - } - - logger.info(f"Starting proxy health check loop. Target: {target_url}, Proxy: {proxy_url}, Interval: {interval}s, Threshold: {threshold}s") - - while True: - start_time = time.time() - try: - response = requests.get(target_url, proxies=proxies, timeout=timeout) - response.raise_for_status() - latency = time.time() - start_time - - if latency > threshold: - logger.warning(f"High latency detected! Latency: {latency:.2f}s, Threshold: {threshold}s, Target: {target_url}") - - except requests.exceptions.RequestException as e: - latency = time.time() - start_time - logger.error(f"Proxy check failed for {target_url} via {proxy_url}. Latency: {latency:.2f}s. Error: {e}") - - time.sleep(interval) - - run_proxy_check_loop() diff --git a/airflow/dags/scripts/regression.py b/airflow/dags/scripts/regression.py deleted file mode 100644 index ae79946..0000000 --- a/airflow/dags/scripts/regression.py +++ /dev/null @@ -1,636 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Regression testing script for the ytdlp-ops system. - -This script orchestrates a regression test by: -1. Populating a Redis queue with video URLs from an input file. -2. Triggering the `ytdlp_ops_orchestrator` Airflow DAG to start processing. -3. Monitoring the progress of the processing for a specified duration. -4. Generating a report of any failures. -5. Optionally cleaning up the Redis queues after the test. -""" - -import argparse -import csv -import json -import logging -import os -import re -import requests -import subprocess -import signal -import sys -import time -from datetime import datetime, timedelta -from pathlib import Path - -import redis -from tabulate import tabulate - -# It's safe to import these as the script runs in the same container as Airflow -# where the yt_ops_services package is installed. -try: - from yt_ops_services.client_utils import get_thrift_client, format_timestamp - from pangramia.yt.exceptions.ttypes import PBServiceException, PBUserException -except ImportError: - logging.error("Could not import Thrift modules. Ensure this script is run in the 'airflow-regression-runner' container.") - sys.exit(1) - -# --- Configuration --- -logging.basicConfig( - level=logging.INFO, - format="[%(asctime)s] [%(levelname)s] %(message)s", - datefmt="%Y-%m-%d %H:%M:%S", -) - -INTERRUPTED = False - -def signal_handler(sig, frame): - """Handles Ctrl+C interruption.""" - global INTERRUPTED - if not INTERRUPTED: - logging.warning("Ctrl+C detected. Initiating graceful shutdown...") - INTERRUPTED = True - else: - logging.warning("Second Ctrl+C detected. Forcing exit.") - sys.exit(1) - - -# --- Helper Functions --- - -def _get_redis_client(redis_url: str): - """Gets a Redis client from a URL.""" - try: - # from_url is the modern way to connect and handles password auth - client = redis.from_url(redis_url, decode_responses=True) - client.ping() - logging.info(f"Successfully connected to Redis at {client.connection_pool.connection_kwargs.get('host')}:{client.connection_pool.connection_kwargs.get('port')}") - return client - except redis.exceptions.ConnectionError as e: - logging.error(f"Failed to connect to Redis: {e}") - sys.exit(1) - except Exception as e: - logging.error(f"An unexpected error occurred while connecting to Redis: {e}") - sys.exit(1) - - -def _get_webserver_url(): - """ - Determines the Airflow webserver URL, prioritizing MASTER_HOST_IP from .env. - """ - master_host_ip = os.getenv("MASTER_HOST_IP") - if master_host_ip: - url = f"http://{master_host_ip}:8080" - logging.info(f"Using MASTER_HOST_IP for webserver URL: {url}") - return url - - # Fallback to AIRFLOW_WEBSERVER_URL or the default service name - url = os.getenv("AIRFLOW_WEBSERVER_URL", "http://airflow-webserver:8080") - logging.info(f"Using default webserver URL: {url}") - return url - -def _normalize_to_url(item: str) -> str | None: - """ - Validates if an item is a recognizable YouTube URL or video ID, - and normalizes it to a standard watch URL format. - """ - if not item: - return None - - video_id_pattern = r"^[a-zA-Z0-9_-]{11}$" - if re.match(video_id_pattern, item): - return f"https://www.youtube.com/watch?v={item}" - - url_patterns = [r"(?:v=|\/v\/|youtu\.be\/|embed\/|shorts\/)([a-zA-Z0-9_-]{11})"] - for pattern in url_patterns: - match = re.search(pattern, item) - if match: - return f"https://www.youtube.com/watch?v={match.group(1)}" - - logging.warning(f"Could not recognize '{item}' as a valid YouTube URL or video ID.") - return None - -def _read_input_file(file_path: str) -> list[str]: - """Reads video IDs/URLs from a file (CSV or JSON list).""" - path = Path(file_path) - if not path.is_file(): - logging.error(f"Input file not found: {file_path}") - sys.exit(1) - - content = path.read_text(encoding='utf-8') - - # Try parsing as JSON list first - if content.strip().startswith('['): - try: - data = json.loads(content) - if isinstance(data, list): - logging.info(f"Successfully parsed {file_path} as a JSON list.") - return [str(item) for item in data] - except json.JSONDecodeError: - logging.warning("File looks like JSON but failed to parse. Will try treating as CSV/text.") - - # Fallback to CSV/text (one item per line) - items = [] - # Use io.StringIO to handle the content as a file for the csv reader - from io import StringIO - # Sniff to see if it has a header - try: - has_header = csv.Sniffer().has_header(content) - except csv.Error: - has_header = False # Not a CSV, treat as plain text - - reader = csv.reader(StringIO(content)) - if has_header: - next(reader) # Skip header row - - for row in reader: - if row: - items.append(row[0].strip()) # Assume the ID/URL is in the first column - - logging.info(f"Successfully parsed {len(items)} items from {file_path} as CSV/text.") - return items - - -def _get_api_auth(): - """Gets Airflow API credentials from environment variables.""" - username = os.getenv("AIRFLOW_ADMIN_USERNAME", "admin") - password = os.getenv("AIRFLOW_ADMIN_PASSWORD") - if not password: - logging.error("AIRFLOW_ADMIN_PASSWORD not found in environment. Cannot interact with API.") - return None, None - return username, password - -def _pause_dag(dag_id: str, is_paused: bool = True): - """Pauses or unpauses an Airflow DAG via the REST API.""" - logging.info(f"Attempting to {'pause' if is_paused else 'unpause'} DAG: {dag_id}...") - username, password = _get_api_auth() - if not username: - return - - webserver_url = _get_webserver_url() - endpoint = f"{webserver_url}/api/v1/dags/{dag_id}" - payload = {"is_paused": is_paused} - - try: - response = requests.patch(endpoint, auth=(username, password), json=payload, timeout=30) - response.raise_for_status() - logging.info(f"Successfully {'paused' if is_paused else 'unpaused'} DAG '{dag_id}'.") - except requests.exceptions.RequestException as e: - logging.error(f"Failed to {'pause' if is_paused else 'unpause'} DAG '{dag_id}': {e}") - if e.response is not None: - logging.error(f"Response: {e.response.text}") - -def _fail_running_dag_runs(dag_id: str): - """Finds all running DAG runs for a given DAG and marks them as failed.""" - logging.info(f"Attempting to fail all running instances of DAG '{dag_id}'...") - username, password = _get_api_auth() - if not username: - return - - webserver_url = _get_webserver_url() - list_endpoint = f"{webserver_url}/api/v1/dags/{dag_id}/dagRuns?state=running" - - try: - # Get running DAGs - response = requests.get(list_endpoint, auth=(username, password), timeout=30) - response.raise_for_status() - running_runs = response.json().get("dag_runs", []) - - if not running_runs: - logging.info(f"No running DAG runs found for '{dag_id}'.") - return - - logging.info(f"Found {len(running_runs)} running DAG run(s) to fail.") - - for run in running_runs: - dag_run_id = run["dag_run_id"] - update_endpoint = f"{webserver_url}/api/v1/dags/{dag_id}/dagRuns/{dag_run_id}" - payload = {"state": "failed"} - try: - update_response = requests.patch(update_endpoint, auth=(username, password), json=payload, timeout=30) - update_response.raise_for_status() - logging.info(f" - Successfully marked DAG run '{dag_run_id}' as failed.") - except requests.exceptions.RequestException as e: - logging.error(f" - Failed to mark DAG run '{dag_run_id}' as failed: {e}") - - except requests.exceptions.RequestException as e: - logging.error(f"Failed to list running DAG runs for '{dag_id}': {e}") - if e.response is not None: - logging.error(f"Response: {e.response.text}") - - -# --- Core Logic Functions --- - -def step_0_populate_queue(redis_client, queue_name: str, input_file: str): - """Reads URLs from a file and populates the Redis inbox queue.""" - logging.info("--- Step 0: Populating Redis Queue ---") - raw_items = _read_input_file(input_file) - if not raw_items: - logging.error("No items found in the input file. Aborting.") - sys.exit(1) - - valid_urls = [] - for item in raw_items: - url = _normalize_to_url(item) - if url and url not in valid_urls: - valid_urls.append(url) - - if not valid_urls: - logging.error("No valid YouTube URLs or IDs were found in the input file. Aborting.") - sys.exit(1) - - inbox_queue = f"{queue_name}_inbox" - logging.info(f"Adding {len(valid_urls)} unique and valid URLs to Redis queue '{inbox_queue}'...") - - with redis_client.pipeline() as pipe: - for url in valid_urls: - pipe.rpush(inbox_queue, url) - pipe.execute() - - logging.info(f"Successfully populated queue. Total items in '{inbox_queue}': {redis_client.llen(inbox_queue)}") - return len(valid_urls) - - -def step_1_trigger_orchestrator(args: argparse.Namespace): - """Triggers the ytdlp_ops_orchestrator DAG using the Airflow REST API.""" - logging.info("--- Step 1: Triggering Orchestrator DAG via REST API ---") - - # Get API details from environment variables - webserver_url = _get_webserver_url() - api_endpoint = f"{webserver_url}/api/v1/dags/ytdlp_ops_orchestrator/dagRuns" - - # Default admin user is 'admin' - username = os.getenv("AIRFLOW_ADMIN_USERNAME", "admin") - password = os.getenv("AIRFLOW_ADMIN_PASSWORD") - - if not password: - logging.error("AIRFLOW_ADMIN_PASSWORD not found in environment. Please set it in your .env file.") - sys.exit(1) - - # Construct the configuration for the DAG run - conf = { - "total_workers": args.workers, - "workers_per_bunch": args.workers_per_bunch, - "clients": args.client, - } - - payload = { - "conf": conf - } - - logging.info(f"Triggering DAG at endpoint: {api_endpoint}") - - try: - response = requests.post( - api_endpoint, - auth=(username, password), - json=payload, - timeout=30 # 30 second timeout - ) - response.raise_for_status() # Raises an HTTPError for bad responses (4xx or 5xx) - - logging.info("Successfully triggered the orchestrator DAG.") - logging.debug(f"Airflow API response:\n{response.json()}") - - except requests.exceptions.RequestException as e: - logging.error("Failed to trigger the orchestrator DAG via REST API.") - logging.error(f"Error: {e}") - if e.response is not None: - logging.error(f"Response status code: {e.response.status_code}") - logging.error(f"Response text: {e.response.text}") - sys.exit(1) - - -def step_2_monitor_progress(args: argparse.Namespace, redis_client, queue_name: str, total_urls: int, run_time_min: int, interval_min: int, show_status: bool): - """Monitors the Redis queues for the duration of the test.""" - logging.info("--- Step 2: Monitoring Progress ---") - - end_time = datetime.now() + timedelta(minutes=run_time_min) - inbox_q = f"{queue_name}_inbox" - progress_q = f"{queue_name}_progress" - result_q = f"{queue_name}_result" - fail_q = f"{queue_name}_fail" - - while datetime.now() < end_time and not INTERRUPTED: - try: - inbox_len = redis_client.llen(inbox_q) - progress_len = redis_client.hlen(progress_q) - result_len = redis_client.hlen(result_q) - fail_len = redis_client.hlen(fail_q) - - processed = result_len + fail_len - success_len = 0 - if result_len > 0: - # This is inefficient but gives a more accurate success count - results = redis_client.hgetall(result_q) - success_len = sum(1 for v in results.values() if '"status": "success"' in v) - - logging.info( - f"Progress: {processed}/{total_urls} | " - f"Success: {success_len} | Failed: {fail_len} | " - f"In Progress: {progress_len} | Inbox: {inbox_len}" - ) - if show_status: - # This function now connects directly to services to get status - get_system_status(args, redis_client) - except Exception as e: - logging.error(f"Error while querying Redis for progress: {e}") - - # Wait for the interval, but check for interruption every second - # for a more responsive shutdown. - wait_until = time.time() + interval_min * 60 - while time.time() < wait_until and not INTERRUPTED: - # Check if we are past the main end_time - if datetime.now() >= end_time: - break - time.sleep(1) - - if INTERRUPTED: - logging.info("Monitoring interrupted.") - else: - logging.info("Monitoring period has ended.") - - -# --- System Status Functions (Direct Connect) --- - -def _list_proxy_statuses(client, server_identity=None): - """Lists proxy statuses by connecting directly to the Thrift service.""" - logging.info(f"--- Proxy Statuses (Server: {server_identity or 'ALL'}) ---") - try: - statuses = client.getProxyStatus(server_identity) - if not statuses: - logging.info("No proxy statuses found.") - return - - status_list = [] - headers = ["Server", "Proxy URL", "Status", "Success", "Failures", "Last Success", "Last Failure"] - for s in statuses: - status_list.append({ - "Server": s.serverIdentity, "Proxy URL": s.proxyUrl, "Status": s.status, - "Success": s.successCount, "Failures": s.failureCount, - "Last Success": format_timestamp(s.lastSuccessTimestamp), - "Last Failure": format_timestamp(s.lastFailureTimestamp), - }) - logging.info("\n" + tabulate(status_list, headers='keys', tablefmt='grid')) - except (PBServiceException, PBUserException) as e: - logging.error(f"Failed to get proxy statuses: {e.message}") - except Exception as e: - logging.error(f"An unexpected error occurred while getting proxy statuses: {e}", exc_info=True) - -def _list_account_statuses(client, redis_client, account_id=None): - """Lists account statuses from Thrift, enriched with live Redis data.""" - logging.info(f"--- Account Statuses (Account: {account_id or 'ALL'}) ---") - try: - statuses = client.getAccountStatus(accountId=account_id, accountPrefix=None) - if not statuses: - logging.info("No account statuses found.") - return - - status_list = [] - for s in statuses: - status_str = s.status - if 'RESTING' in status_str: - try: - expiry_ts_bytes = redis_client.hget(f"account_status:{s.accountId}", "resting_until") - if expiry_ts_bytes: - expiry_ts = float(expiry_ts_bytes) - now = datetime.now().timestamp() - if now < expiry_ts: - remaining_seconds = int(expiry_ts - now) - status_str = f"RESTING ({remaining_seconds}s left)" - except Exception: - pass # Ignore if parsing fails - - last_success = float(s.lastSuccessTimestamp) if s.lastSuccessTimestamp else 0 - last_failure = float(s.lastFailureTimestamp) if s.lastFailureTimestamp else 0 - last_activity = max(last_success, last_failure) - - status_list.append({ - "Account ID": s.accountId, "Status": status_str, "Success": s.successCount, - "Failures": s.failureCount, "Last Success": format_timestamp(s.lastSuccessTimestamp), - "Last Failure": format_timestamp(s.lastFailureTimestamp), "Last Proxy": s.lastUsedProxy or "N/A", - "_last_activity": last_activity, - }) - - status_list.sort(key=lambda item: item.get('_last_activity', 0), reverse=True) - for item in status_list: - del item['_last_activity'] - - logging.info("\n" + tabulate(status_list, headers='keys', tablefmt='grid')) - except (PBServiceException, PBUserException) as e: - logging.error(f"Failed to get account statuses: {e.message}") - except Exception as e: - logging.error(f"An unexpected error occurred while getting account statuses: {e}", exc_info=True) - -def _list_client_statuses(redis_client): - """Lists client statistics from Redis.""" - logging.info("--- Client Statuses ---") - try: - stats_key = "client_stats" - all_stats_raw = redis_client.hgetall(stats_key) - if not all_stats_raw: - logging.info("No client stats found in Redis.") - return - - status_list = [] - for client, stats_json in all_stats_raw.items(): - try: - stats = json.loads(stats_json) - def format_latest(data): - if not data: return "N/A" - ts = format_timestamp(data.get('timestamp')) - url = data.get('url', 'N/A') - video_id_match = re.search(r'v=([a-zA-Z0-9_-]{11})', url) - video_id = video_id_match.group(1) if video_id_match else 'N/A' - return f"{ts} ({video_id})" - - status_list.append({ - "Client": client, "Success": stats.get('success_count', 0), - "Failures": stats.get('failure_count', 0), - "Last Success": format_latest(stats.get('latest_success')), - "Last Failure": format_latest(stats.get('latest_failure')), - }) - except (json.JSONDecodeError, AttributeError): - status_list.append({"Client": client, "Success": "ERROR", "Failures": "ERROR", "Last Success": "Parse Error", "Last Failure": "Parse Error"}) - - status_list.sort(key=lambda item: item.get('Client', '')) - logging.info("\n" + tabulate(status_list, headers='keys', tablefmt='grid')) - except Exception as e: - logging.error(f"An unexpected error occurred while getting client statuses: {e}", exc_info=True) - -def get_system_status(args: argparse.Namespace, redis_client): - """Connects to services and prints status tables.""" - logging.info("--- Getting System Status ---") - client, transport = None, None - try: - client, transport = get_thrift_client(args.management_host, args.management_port) - _list_proxy_statuses(client) - _list_account_statuses(client, redis_client) - _list_client_statuses(redis_client) - except Exception as e: - logging.error(f"Could not get system status: {e}") - finally: - if transport and transport.isOpen(): - transport.close() - - -def step_3_generate_report(redis_client, queue_name: str, report_file: str | None): - """Generates a CSV report of failed items.""" - logging.info("--- Step 3: Generating Report ---") - fail_q = f"{queue_name}_fail" - - failed_items = redis_client.hgetall(fail_q) - if not failed_items: - logging.info("No items found in the fail queue. No report will be generated.") - return - - logging.info(f"Found {len(failed_items)} failed items. Writing to report...") - - report_data = [] - for url, data_json in failed_items.items(): - try: - data = json.loads(data_json) - error_details = data.get('error_details', {}) - report_data.append({ - 'url': url, - 'video_id': _normalize_to_url(url).split('v=')[-1] if _normalize_to_url(url) else 'N/A', - 'error_message': error_details.get('error_message', 'N/A'), - 'error_code': error_details.get('error_code', 'N/A'), - 'proxy_url': error_details.get('proxy_url', 'N/A'), - 'timestamp': datetime.fromtimestamp(data.get('end_time', 0)).isoformat(), - }) - except (json.JSONDecodeError, AttributeError): - report_data.append({'url': url, 'video_id': 'N/A', 'error_message': 'Could not parse error data', 'error_code': 'PARSE_ERROR', 'proxy_url': 'N/A', 'timestamp': 'N/A'}) - - if report_file: - try: - with open(report_file, 'w', newline='', encoding='utf-8') as f: - writer = csv.DictWriter(f, fieldnames=report_data[0].keys()) - writer.writeheader() - writer.writerows(report_data) - logging.info(f"Successfully wrote report to {report_file}") - except IOError as e: - logging.error(f"Could not write report to file {report_file}: {e}") - else: - # Print to stdout if no file is specified - logging.info("--- Failure Report (stdout) ---") - for item in report_data: - logging.info(f"URL: {item['url']}, Error: {item['error_code']} - {item['error_message']}") - logging.info("--- End of Report ---") - - -def handle_interruption(redis_client, queue_name, report_file): - """Graceful shutdown logic for when the script is interrupted.""" - logging.warning("--- Interruption Detected: Starting Shutdown Procedure ---") - - # 1. Pause DAGs - _pause_dag("ytdlp_ops_orchestrator") - _pause_dag("ytdlp_ops_dispatcher") - - # 2. Fail running per_url jobs - _fail_running_dag_runs("ytdlp_ops_worker_per_url") - - # 3. Generate report - logging.info("Generating final report due to interruption...") - step_3_generate_report(redis_client, queue_name, report_file) - # Also print to stdout if a file was specified, so user sees it immediately - if report_file: - logging.info("Printing report to stdout as well...") - step_3_generate_report(redis_client, queue_name, None) - - -def step_4_cleanup_queues(redis_client, queue_name: str): - """Cleans up the Redis queues used by the test.""" - logging.info("--- Step 4: Cleaning Up Queues ---") - queues_to_delete = [ - f"{queue_name}_inbox", - f"{queue_name}_progress", - f"{queue_name}_result", - f"{queue_name}_fail", - ] - logging.warning(f"This will delete the following Redis keys: {queues_to_delete}") - - deleted_count = redis_client.delete(*queues_to_delete) - logging.info(f"Cleanup complete. Deleted {deleted_count} key(s).") - - -def main(): - """Main function to parse arguments and run the regression test.""" - # Register the signal handler for Ctrl+C - signal.signal(signal.SIGINT, signal_handler) - - parser = argparse.ArgumentParser(description="Run a regression test for the ytdlp-ops system.") - - # Environment - parser.add_argument("--redis-host", type=str, default="redis", help="Hostname or IP address of the Redis server. Defaults to 'redis' for in-container execution.") - parser.add_argument("--management-host", type=str, default=os.getenv("MANAGEMENT_SERVICE_HOST", "envoy-thrift-lb"), help="Hostname of the management Thrift service.") - parser.add_argument("--management-port", type=int, default=int(os.getenv("MANAGEMENT_SERVICE_PORT", 9080)), help="Port of the management Thrift service.") - - # Test Configuration - parser.add_argument("--client", type=str, required=True, help="Client persona to test (e.g., 'mweb').") - parser.add_argument("--workers", type=int, required=True, help="Total number of worker loops to start.") - parser.add_argument("--workers-per-bunch", type=int, default=1, help="Number of workers per bunch.") - parser.add_argument("--run-time-min", type=int, required=True, help="How long to let the test run, in minutes.") - parser.add_argument("--input-file", type=str, help="Path to a file containing video IDs/URLs. If not provided, the existing queue will be used.") - - # Monitoring & Reporting - parser.add_argument("--progress-interval-min", type=int, default=2, help="How often to query and print progress, in minutes.") - parser.add_argument("--report-file", type=str, help="Path to a CSV file to write the list of failed URLs to.") - parser.add_argument("--show-status", action="store_true", help="If set, show proxy and account statuses during progress monitoring.") - - # Actions - parser.add_argument("--cleanup", action="store_true", help="If set, clear the Redis queues after the test completes.") - parser.add_argument("--skip-populate", action="store_true", help="If set, skip populating the queue (assumes it's already populated).") - parser.add_argument("--skip-trigger", action="store_true", help="If set, skip triggering the orchestrator (assumes it's already running).") - - args = parser.parse_args() - - # --- Setup --- - redis_password = os.getenv("REDIS_PASSWORD") - if not redis_password: - logging.error("REDIS_PASSWORD not found in environment. Please set it in your .env file.") - sys.exit(1) - - # Use the provided redis-host, defaulting to 'redis' for in-container execution - redis_url = f"redis://:{redis_password}@{args.redis_host}:6379/0" - redis_client = _get_redis_client(redis_url) - - queue_name = "video_queue" # Hardcoded for now, could be an arg - total_urls = 0 - - # --- Execution --- - if not args.skip_populate: - if args.input_file: - total_urls = step_0_populate_queue(redis_client, queue_name, args.input_file) - else: - logging.info("No input file provided, using existing queue.") - total_urls = redis_client.llen(f"{queue_name}_inbox") - if total_urls == 0: - logging.warning("Queue is empty and no input file was provided. The test may not have any work to do.") - else: - total_urls = redis_client.llen(f"{queue_name}_inbox") - logging.info(f"Skipping population. Found {total_urls} URLs in the inbox.") - - if not args.skip_trigger: - step_1_trigger_orchestrator(args) - else: - logging.info("Skipping orchestrator trigger.") - - step_2_monitor_progress(args, redis_client, queue_name, total_urls, args.run_time_min, args.progress_interval_min, args.show_status) - - if INTERRUPTED: - handle_interruption(redis_client, queue_name, args.report_file) - else: - step_3_generate_report(redis_client, queue_name, args.report_file) - - if args.cleanup: - step_4_cleanup_queues(redis_client, queue_name) - - if INTERRUPTED: - logging.warning("Regression test script finished due to user interruption.") - sys.exit(130) # Standard exit code for Ctrl+C - else: - logging.info("Regression test script finished.") - -if __name__ == "__main__": - main() diff --git a/airflow/dags/utils/__init__.py b/airflow/dags/utils/__init__.py deleted file mode 100644 index 3dc96c9..0000000 --- a/airflow/dags/utils/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -# -*- coding: utf-8 -*- -# vim:fenc=utf-8 -# -# Copyright © 2024 rl -# -# Distributed under terms of the MIT license. - -""" -Airflow DAG Utilities -""" diff --git a/airflow/dags/utils/redis_utils.py b/airflow/dags/utils/redis_utils.py deleted file mode 100644 index e4d6f39..0000000 --- a/airflow/dags/utils/redis_utils.py +++ /dev/null @@ -1,32 +0,0 @@ -# -*- coding: utf-8 -*- -# vim:fenc=utf-8 -# -# Copyright © 2024 rl -# -# Distributed under terms of the MIT license. - -""" -Redis utility functions for Airflow DAGs. -""" - -from airflow.exceptions import AirflowException -from airflow.providers.redis.hooks.redis import RedisHook -import logging -import redis - -logger = logging.getLogger(__name__) - -def _get_redis_client(redis_conn_id): - """Gets a Redis client connection using RedisHook.""" - try: - hook = RedisHook(redis_conn_id=redis_conn_id) - client = hook.get_conn() - client.ping() - logger.info(f"Successfully connected to Redis using connection '{redis_conn_id}'.") - return client - except redis.exceptions.AuthenticationError: - logger.error(f"Redis authentication failed for connection '{redis_conn_id}'. Check password.") - raise AirflowException(f"Redis authentication failed for '{redis_conn_id}'.") - except Exception as e: - logger.error(f"Failed to get Redis client for connection '{redis_conn_id}': {e}") - raise AirflowException(f"Redis connection failed for '{redis_conn_id}': {e}") diff --git a/airflow/dags/ytdlp_mgmt_proxy_account.py b/airflow/dags/ytdlp_mgmt_proxy_account.py deleted file mode 100644 index fe8fa7d..0000000 --- a/airflow/dags/ytdlp_mgmt_proxy_account.py +++ /dev/null @@ -1,678 +0,0 @@ -""" -DAG to manage the state of proxies and accounts used by the ytdlp-ops-server. -""" -from __future__ import annotations - -# --- Add project root to path to allow for yt-ops-client imports --- -import sys -# The yt-ops-client package is installed in editable mode in /app -if '/app' not in sys.path: - sys.path.insert(0, '/app') - -import logging -import json -import re -import time -from datetime import datetime -import socket - -from airflow.exceptions import AirflowException -from airflow.models.dag import DAG -from airflow.models.dagbag import DagBag -from airflow.models.dagrun import DagRun -from airflow.models.param import Param -from airflow.models.taskinstance import TaskInstance -from airflow.operators.python import PythonOperator -from airflow.decorators import task -from airflow.utils.dates import days_ago -from airflow.models.variable import Variable -from airflow.providers.redis.hooks.redis import RedisHook -from airflow.utils.session import create_session - -# Configure logging -logger = logging.getLogger(__name__) - -# Import and apply Thrift exceptions patch for Airflow compatibility -try: - from thrift_exceptions_patch import patch_thrift_exceptions - patch_thrift_exceptions() - logger.info("Applied Thrift exceptions patch for Airflow compatibility.") -except ImportError: - logger.warning("Could not import thrift_exceptions_patch. Compatibility may be affected.") -except Exception as e: - logger.error(f"Error applying Thrift exceptions patch: {e}") - -# Thrift imports (kept for DEPRECATED proxy management) -try: - from ytops_client.profile_manager_tool import ProfileManager, format_duration, format_timestamp - from pangramia.yt.exceptions.ttypes import PBServiceException, PBUserException - from yt_ops_services.client_utils import get_thrift_client -except ImportError as e: - logger.critical(f"Could not import project modules: {e}. Ensure yt-ops-client and services are installed correctly.") - # Fail DAG parsing if thrift modules are not available - raise - -DEFAULT_MANAGEMENT_SERVICE_IP = Variable.get("MANAGEMENT_SERVICE_HOST", default_var="172.17.0.1") -DEFAULT_MANAGEMENT_SERVICE_PORT = Variable.get("MANAGEMENT_SERVICE_PORT", default_var=9080) -DEFAULT_REDIS_CONN_ID = "redis_default" - -# Version tracking for debugging -DAG_VERSION = "1.7.1" # Updated to handle Redis configuration errors - - -# Helper function to connect to Redis, similar to other DAGs -def _get_redis_client(redis_conn_id: str): - """Gets a Redis client from an Airflow connection.""" - try: - # Use the imported RedisHook - redis_hook = RedisHook(redis_conn_id=redis_conn_id) - # get_conn returns a redis.Redis client - return redis_hook.get_conn() - except Exception as e: - logger.error(f"Failed to connect to Redis using connection '{redis_conn_id}': {e}") - # Use the imported AirflowException - raise AirflowException(f"Redis connection failed: {e}") - - - -def _list_proxy_statuses(client, server_identity): - """Lists the status of proxies.""" - logger.info(f"Listing proxy statuses for server: {server_identity or 'ALL'}") - logger.warning("DEPRECATED: Proxy management is now handled by the standalone policy-enforcer.") - logger.info("NOTE: Proxy statuses are read from server's internal state via Thrift service") - try: - statuses = client.getProxyStatus(server_identity) - except PBServiceException as e: - if "Redis is not configured for this server" in e.message: - logger.error(f"Redis not configured on server: {e.message}") - print(f"\nERROR: Server configuration issue - {e.message}\n") - print("This server does not have Redis configured for proxy management.\n") - return - else: - # Re-raise if it's a different PBServiceException - raise - except Exception as e: - logger.error(f"Unexpected error getting proxy statuses: {e}", exc_info=True) - print(f"\nERROR: Unexpected error getting proxy statuses: {e}\n") - return - - if not statuses: - logger.info("No proxy statuses found.") - return - - from tabulate import tabulate - status_list = [] - # This is forward-compatible: it checks for new attributes before using them. - has_extended_info = hasattr(statuses[0], 'recentAccounts') or hasattr(statuses[0], 'recentMachines') - - headers = ["Server", "Proxy URL", "Status", "Success", "Failures", "Last Success", "Last Failure"] - if has_extended_info: - headers.extend(["Recent Accounts", "Recent Machines"]) - - for s in statuses: - status_item = { - "Server": s.serverIdentity, - "Proxy URL": s.proxyUrl, - "Status": s.status, - "Success": s.successCount, - "Failures": s.failureCount, - "Last Success": format_timestamp(s.lastSuccessTimestamp), - "Last Failure": format_timestamp(s.lastFailureTimestamp), - } - if has_extended_info: - recent_accounts = getattr(s, 'recentAccounts', []) - recent_machines = getattr(s, 'recentMachines', []) - status_item["Recent Accounts"] = "\n".join(recent_accounts) if recent_accounts else "N/A" - status_item["Recent Machines"] = "\n".join(recent_machines) if recent_machines else "N/A" - status_list.append(status_item) - - print("\n--- Proxy Statuses ---") - # The f-string with a newline ensures the table starts on a new line in the logs. - print(f"\n{tabulate(status_list, headers='keys', tablefmt='grid')}") - print("----------------------\n") - if not has_extended_info: - logger.warning("Server does not seem to support 'recentAccounts' or 'recentMachines' fields yet.") - print("NOTE: To see Recent Accounts/Machines, the server's `getProxyStatus` method must be updated to return these fields.") - - -def _list_account_statuses(pm: ProfileManager, account_id_prefix: str | None): - """Lists the status of profiles from Redis using ProfileManager.""" - logger.info(f"Listing v2 profile statuses from Redis for prefix: {account_id_prefix or 'ALL'}") - - try: - profiles = pm.list_profiles() - if not profiles: - print("\n--- V2 Profile Statuses ---\nNo profiles found.\n---------------------------\n") - return - - from tabulate import tabulate - status_list = [] - now = time.time() - - for p in profiles: - if account_id_prefix and not p['name'].startswith(account_id_prefix): - continue - - status = p.get('state', 'UNKNOWN') - if status == 'RESTING': - rest_until = p.get('rest_until', 0) - if rest_until > now: - status += f" ({format_duration(rest_until - now)} left)" - elif status == 'COOLDOWN': - cooldown_until = p.get('cooldown_until', 0) - if cooldown_until > now: - status += f" ({format_duration(cooldown_until - now)} left)" - - - status_item = { - "Name": p.get('name'), - "Status": status, - "Proxy": p.get('proxy', 'N/A'), - "Success": p.get('success', 0), - "Failures": p.get('failure', 0), - "Last Activity": format_timestamp(p.get('last_activity_ts', 0)), - "Owner": p.get('owner', 'None'), - "Lock Time": format_duration(now - p.get('lock_ts', 0)) if p.get('state') == 'LOCKED' else 'N/A', - } - status_list.append(status_item) - - status_list.sort(key=lambda item: item.get('Name', '')) - - print("\n--- V2 Profile Statuses ---") - print(f"\n{tabulate(status_list, headers='keys', tablefmt='grid')}") - print("---------------------------\n") - except Exception as e: - logger.error(f"An unexpected error occurred while getting v2 profile statuses: {e}", exc_info=True) - print(f"\nERROR: An unexpected error occurred: {e}\n") - - -def _list_client_statuses(redis_conn_id): - """Lists the status of different client types from Redis.""" - logger.info("Listing client statuses from Redis key 'client_stats'") - - try: - redis_client = _get_redis_client(redis_conn_id) - stats_key = "client_stats" - all_stats_raw = redis_client.hgetall(stats_key) - - if not all_stats_raw: - print("\n--- Client Statuses ---\nNo client stats found in Redis.\n-----------------------\n") - return - - from tabulate import tabulate - status_list = [] - - for client_bytes, stats_json_bytes in all_stats_raw.items(): - client_name = client_bytes.decode('utf-8') - try: - stats = json.loads(stats_json_bytes.decode('utf-8')) - - def format_latest(data): - if not data: return "N/A" - ts = format_timestamp(data.get('timestamp')) - url = data.get('url') or 'N/A' - machine = data.get('machine_id', 'N/A') - video_id_match = re.search(r'v=([a-zA-Z0-9_-]{11})', url) - video_id = video_id_match.group(1) if video_id_match else 'N/A' - return f"{ts}\nMachine: {machine}\nVideo ID: {video_id}" - - status_item = { - "Client": client_name, - "Success": stats.get('success_count', 0), - "Failures": stats.get('failure_count', 0), - "Last Success": format_latest(stats.get('latest_success')), - "Last Failure": format_latest(stats.get('latest_failure')), - } - status_list.append(status_item) - except (json.JSONDecodeError, AttributeError) as e: - logger.error(f"Could not parse stats for client '{client_name}': {e}") - status_list.append({ - "Client": client_name, "Success": "ERROR", "Failures": "ERROR", - "Last Success": "Could not parse data", "Last Failure": "Could not parse data" - }) - - status_list.sort(key=lambda item: item.get('Client', '')) - - print("\n--- Client Statuses ---") - print(f"\n{tabulate(status_list, headers='keys', tablefmt='grid')}") - print("-----------------------\n") - - except Exception as e: - logger.error(f"An unexpected error occurred while getting client statuses: {e}", exc_info=True) - print(f"\nERROR: An unexpected error occurred: {e}\n") - - -def _list_activity_counters(redis_conn_id: str): - """Lists current activity rates for proxies and accounts from Redis.""" - logger.info("Listing activity counters from Redis keys 'activity:per_proxy:*' and 'activity:per_account:*'") - - try: - redis_client = _get_redis_client(redis_conn_id) - from tabulate import tabulate - now = time.time() - - def process_keys(pattern, entity_name): - keys = redis_client.scan_iter(pattern) - status_list = [] - for key_bytes in keys: - key = key_bytes.decode('utf-8') - entity_id = key.split(':', 2)[-1] - - # Clean up old entries before counting - redis_client.zremrangebyscore(key, '-inf', now - 3660) # Clean up > 1hr old - - count_1m = redis_client.zcount(key, now - 60, now) - count_5m = redis_client.zcount(key, now - 300, now) - count_1h = redis_client.zcount(key, now - 3600, now) - - if count_1h == 0: # Don't show entities with no recent activity - continue - - status_list.append({ - entity_name: entity_id, - "Activity (Last 1m)": count_1m, - "Activity (Last 5m)": count_5m, - "Activity (Last 1h)": count_1h, - }) - - status_list.sort(key=lambda item: item.get(entity_name, '')) - - print(f"\n--- {entity_name} Activity Counters ---") - if not status_list: - print(f"No recent activity found for {entity_name.lower()}s.") - else: - print(f"\n{tabulate(status_list, headers='keys', tablefmt='grid')}") - print("-----------------------------------\n") - - process_keys("activity:per_proxy:*", "Proxy URL") - process_keys("activity:per_account:*", "Account ID") - - except Exception as e: - logger.error(f"An unexpected error occurred while getting activity counters: {e}", exc_info=True) - print(f"\nERROR: An unexpected error occurred: {e}\n") - - -def _create_profiles_from_json(**context): - """Creates profiles by calling the yt-ops-client setup-profiles tool.""" - import subprocess - import tempfile - import yaml - - params = context['params'] - json_payload_str = params.get('create_profiles_json') - if not json_payload_str: - raise AirflowException("Parameter 'create_profiles_json' is empty.") - - try: - # We accept JSON but the setup tool uses YAML, so we parse and dump. - # This also serves as validation. - json_payload = json.loads(json_payload_str) - yaml_payload = yaml.dump(json_payload) - except (json.JSONDecodeError, yaml.YAMLError) as e: - raise AirflowException(f"Invalid JSON/YAML in 'create_profiles_json': {e}") - - with tempfile.NamedTemporaryFile(mode='w+', delete=True, suffix='.yaml', prefix='airflow-profile-setup-') as temp_policy_file: - temp_policy_file.write(yaml_payload) - temp_policy_file.flush() - logger.info(f"Created temporary policy file for profile setup: {temp_policy_file.name}") - - cmd = [ - 'ytops-client', 'setup-profiles', - '--policy', temp_policy_file.name, - ] - # Pass through Redis connection params if provided - if params.get('redis_conn_id') != DEFAULT_REDIS_CONN_ID: - logger.warning("Custom Redis connection is not supported for `create_profiles` yet. It will use the default from .env or localhost.") - - logger.info(f"Running command: {' '.join(cmd)}") - process = subprocess.run(cmd, capture_output=True, text=True, timeout=300) - - if process.stdout: - print(f"\n--- yt-ops-client setup-profiles STDOUT ---\n{process.stdout}\n----------------------------------------\n") - if process.stderr: - print(f"\n--- yt-ops-client setup-profiles STDERR ---\n{process.stderr}\n----------------------------------------\n") - - if process.returncode != 0: - raise AirflowException(f"Profile creation failed with exit code {process.returncode}.") - -def manage_system_callable(**context): - """Main callable to interact with the system management endpoints.""" - # Log version for debugging - logger.info(f"Running ytdlp_mgmt_proxy_account DAG version {DAG_VERSION}") - - params = context["params"] - entity = params["entity"] - action = params["action"] - - # For Thrift actions, use the new management host/port - if entity not in ["activity_counters", "account"]: - host = params["management_host"] - port = params["management_port"] - else: - host, port = None, None # Not needed for meta actions - - server_identity = params.get("server_identity") - proxy_url = params.get("proxy_url") - account_id = params.get("account_id") # Used as prefix for v2 profiles - redis_env = params.get("redis_env") - - # --- Validate Action/Entity Combination and Parameters --- - valid_actions = { - "proxy": ["list_with_status", "ban", "unban", "ban_all", "unban_all", "delete_from_redis"], - "account": ["list_with_status", "create_profiles", "ban", "unban", "activate", "pause", "delete", "delete_all"], - "client": ["list_with_status", "delete_from_redis"], - "accounts_and_proxies": ["list_with_status", "ban", "unban", "ban_all", "unban_all", "delete_from_redis"], - "activity_counters": ["list_with_status"], - } - - if action not in valid_actions.get(entity, []): - raise ValueError( - f"The action '{action}' is not valid for entity '{entity}'.\n" - f"Valid actions for '{entity}' are: {', '.join(valid_actions.get(entity, ['None']))}." - ) - - # Validate required parameters for the chosen action - if entity == "proxy": - if action in ["ban", "unban"] and not server_identity: - raise ValueError(f"A 'server_identity' is required for proxy action '{action}'.") - if action in ["ban", "unban"] and not proxy_url: - raise ValueError(f"A 'proxy_url' is required for proxy action '{action}'.") - - if entity == "account": - if action in ["ban", "unban", "pause", "activate", "delete"] and not account_id: - raise ValueError(f"An 'account_id' (profile name) is required for account action '{action}'.") - - # --- ProfileManager setup for v2 account actions --- - pm = None - if entity == "account": - try: - redis_hook = RedisHook(redis_conn_id=params["redis_conn_id"]) - if redis_env: - key_prefix = f"{redis_env}_profile_mgmt_" - else: - raise ValueError("A 'redis_env' (e.g., 'sim_auth') must be provided for v2 profile actions.") - - pm = ProfileManager(redis_hook=redis_hook, key_prefix=key_prefix) - logger.info(f"Initialized ProfileManager for env '{redis_env}' (Redis key prefix: '{key_prefix}')") - except Exception as e: - raise AirflowException(f"Failed to initialize ProfileManager: {e}") - - # --- Handle Activity Counter action --- - if entity == "activity_counters": - if action == "list_with_status": - _list_activity_counters(params["redis_conn_id"]) - return # End execution - else: - raise ValueError(f"Action '{action}' is not valid for entity 'activity_counters'. Only 'list_with_status' is supported.") - - # Handle direct Redis deletion actions - if action == "delete_from_redis": - if entity == "client": - logger.info("Deleting all client stats from Redis...") - redis_client = _get_redis_client(params["redis_conn_id"]) - result = redis_client.delete("client_stats") - if result > 0: - print(f"\nSuccessfully deleted 'client_stats' key from Redis.\n") - else: - print(f"\nKey 'client_stats' not found in Redis. Nothing to delete.\n") - return - - # All other delete actions are handled by Thrift for now. - client, transport = None, None - try: - client, transport = get_thrift_client(host, port) - - if entity == "proxy": - logger.warning("DEPRECATED: Proxy management is now handled by the standalone policy-enforcer.") - proxy_url = params.get("proxy_url") - server_identity = params.get("server_identity") - - if proxy_url and server_identity: - logger.info(f"Deleting proxy '{proxy_url}' for server '{server_identity}' from Redis via Thrift service...") - result = client.deleteProxyFromRedis(proxy_url, server_identity) - if result: - print(f"\nSuccessfully deleted proxy '{proxy_url}' for server '{server_identity}' from Redis.\n") - else: - print(f"\nFailed to delete proxy '{proxy_url}' for server '{server_identity}' from Redis.\n") - else: - logger.info("Deleting all proxies from Redis via Thrift service...") - result = client.deleteAllProxiesFromRedis(server_identity) - if server_identity: - print(f"\nSuccessfully deleted all proxies for server '{server_identity}' from Redis. Count: {result}\n") - else: - print(f"\nSuccessfully deleted all proxies from Redis across ALL servers. Count: {result}\n") - - except (PBServiceException, PBUserException) as e: - logger.error(f"Thrift error performing delete action: {e.message}", exc_info=True) - print(f"\nERROR: Thrift service error: {e.message}\n") - raise - except Exception as e: - logger.error(f"Error performing delete action: {e}", exc_info=True) - print(f"\nERROR: An unexpected error occurred: {e}\n") - raise - finally: - if transport and transport.isOpen(): - transport.close() - logger.info("Thrift connection closed.") - return - - # --- Main Action Handler --- - client, transport = None, None - try: - # Connect to Thrift only if needed - if entity == "proxy": - client, transport = get_thrift_client(host, port) - - if entity == "client": - if action == "list_with_status": - _list_client_statuses(params["redis_conn_id"]) - - elif entity == "proxy": - logger.warning("DEPRECATED: Proxy management is now handled by the standalone policy-enforcer. These actions are for legacy support.") - if action == "list_with_status": - _list_proxy_statuses(client, server_identity) - elif action == "ban": - if not proxy_url: raise ValueError("A 'proxy_url' is required.") - logger.info(f"Banning proxy '{proxy_url}' for server '{server_identity}'...") - client.banProxy(proxy_url, server_identity) - print(f"Successfully sent request to ban proxy '{proxy_url}'.") - elif action == "unban": - if not proxy_url: raise ValueError("A 'proxy_url' is required.") - logger.info(f"Unbanning proxy '{proxy_url}' for server '{server_identity}'...") - client.unbanProxy(proxy_url, server_identity) - print(f"Successfully sent request to unban proxy '{proxy_url}'.") - elif action == "ban_all": - if server_identity: - logger.info(f"Banning all proxies for server '{server_identity}'...") - client.banAllProxies(server_identity) - print(f"Successfully sent request to ban all proxies for '{server_identity}'.") - else: - raise ValueError("A 'server_identity' is required for 'ban_all' on proxies.") - elif action == "unban_all": - if server_identity: - logger.info(f"Unbanning all proxy statuses for server '{server_identity}'...") - client.resetAllProxyStatuses(server_identity) - print(f"Successfully sent request to unban all proxy statuses for '{server_identity}'.") - else: - raise ValueError("A 'server_identity' is required for 'unban_all' on proxies.") - - elif entity == "account": - if action == "list_with_status": - _list_account_statuses(pm, account_id) - elif action == "create_profiles": - # This action is handled by a separate PythonOperator - pass - elif action == "ban": - logger.info(f"Banning profile '{account_id}' in env '{redis_env}'...") - pm.update_profile_state(account_id, "BANNED", f"Manual ban from Airflow mgmt DAG") - print(f"Successfully set state of profile '{account_id}' to BANNED.") - elif action == "unban" or action == "activate": - logger.info(f"Activating profile '{account_id}' in env '{redis_env}'...") - pm.update_profile_state(account_id, "ACTIVE", f"Manual activation from Airflow mgmt DAG") - print(f"Successfully set state of profile '{account_id}' to ACTIVE.") - elif action == "pause": - logger.info(f"Pausing (resting) profile '{account_id}' in env '{redis_env}'...") - pm.update_profile_state(account_id, "RESTING", f"Manual pause from Airflow mgmt DAG") - print(f"Successfully set state of profile '{account_id}' to RESTING.") - elif action == "delete": - logger.info(f"Deleting profile '{account_id}' in env '{redis_env}'...") - pm.delete_profile(account_id) - print(f"Successfully deleted profile '{account_id}'.") - elif action == "delete_all": - logger.warning(f"DESTRUCTIVE: Deleting all profiles with prefix '{account_id}' in env '{redis_env}'...") - profiles = pm.list_profiles() - deleted_count = 0 - for p in profiles: - if not account_id or p['name'].startswith(account_id): - pm.delete_profile(p['name']) - deleted_count += 1 - print(f"Successfully deleted {deleted_count} profile(s).") - - elif entity == "accounts_and_proxies": - logger.warning("DEPRECATED: Combined 'accounts_and_proxies' actions are no longer supported in v2. Please manage accounts and proxies separately.") - if action == "list_with_status": - print("\n--- Listing statuses for Proxies, V2 Profiles, and Clients ---") - _list_proxy_statuses(client, server_identity) - _list_account_statuses(pm, account_id) - _list_client_statuses(params["redis_conn_id"]) - return - - except (PBServiceException, PBUserException) as e: - logger.error(f"Thrift error performing action '{action}': {e.message}", exc_info=True) - raise - except NotImplementedError as e: - logger.error(f"Feature not implemented: {e}", exc_info=True) - raise - except Exception as e: - logger.error(f"Error performing action '{action}': {e}", exc_info=True) - raise - finally: - if transport and transport.isOpen(): - transport.close() - logger.info("Thrift connection closed.") - -with DAG( - dag_id="ytdlp_mgmt_proxy_account", - default_args={"queue": "queue-mgmt"}, - start_date=days_ago(1), - schedule=None, - catchup=False, - tags=["ytdlp", "mgmt", "master"], - doc_md=""" - ### YT-DLP v2 Profile and System Manager - - This DAG provides tools to manage the state of **v2 profiles** (formerly accounts) and other system components. - Select an `entity` and an `action` to perform. - - **V2 Profile Management (`entity: account`):** - - All account/profile actions are now performed directly on Redis using the `ProfileManager`. - - A `redis_env` (e.g., `sim_auth` or `sim_download`) is **required** to target the correct set of profiles. - - Actions include `list`, `create`, `ban`, `activate`, `pause`, and `delete`. - - **Legacy Proxy Management (`entity: proxy`):** - - **DEPRECATED**: Proxy state is now managed automatically by the standalone `policy-enforcer` service. - - These actions are provided for legacy support and interact with the old Thrift service. They may be removed in the future. - """, - params={ - "management_host": Param(DEFAULT_MANAGEMENT_SERVICE_IP, type="string", title="Management Service Host (DEPRECATED)", description="The hostname or IP of the management service. Used only for legacy proxy actions."), - "management_port": Param(DEFAULT_MANAGEMENT_SERVICE_PORT, type="integer", title="Management Service Port (DEPRECATED)", description="The port of the dedicated management service."), - "entity": Param( - "account", - type="string", - enum=["account", "proxy", "client", "activity_counters", "accounts_and_proxies"], - description="The type of entity to manage.", - ), - "action": Param( - "list_with_status", - type="string", - enum=["list_with_status", "create_profiles", "ban", "unban", "activate", "pause", "delete", "delete_all", "ban_all", "unban_all", "delete_from_redis"], - description="""The management action to perform. - --- - #### Actions for `entity: account` (V2 Profiles) - - `list_with_status`: View status of all profiles, optionally filtered by `account_id` as a prefix. - - `create_profiles`: Creates new profiles from a JSON payload. See `create_profiles_json` param. - - `ban`: Sets a profile's state to BANNED. Requires `account_id`. - - `unban`/`activate`: Sets a profile's state to ACTIVE. Requires `account_id`. - - `pause`: Sets a profile's state to RESTING. Requires `account_id`. - - `delete`: Deletes a single profile. Requires `account_id`. - - `delete_all`: **(Destructive)** Deletes all profiles, or those matching the `account_id` as a prefix. - - #### Actions for `entity: proxy` (DEPRECATED) - - `list_with_status`, `ban`, `unban`, `ban_all`, `unban_all`, `delete_from_redis`. - - #### Actions for `entity: client` - - `list_with_status`: View success/failure statistics for each client type. - - `delete_from_redis`: **(Destructive)** Deletes all client stats from Redis. - - #### Actions for `entity: activity_counters` - - `list_with_status`: View current activity rates for proxies and accounts. - """, - ), - "redis_env": Param( - "sim_auth", - type="string", - enum=["sim_auth", "sim_download"], - title="[V2 Profiles] Redis Environment", - description="The environment for v2 profile management (e.g., 'sim_auth'). Determines the Redis key prefix.", - ), - "account_id": Param( - None, - type=["null", "string"], - description="For v2 profiles: The profile name (e.g., 'auth_user_0') or a prefix for `list` and `delete_all`.", - ), - "create_profiles_json": Param( - """{ - "auth_profile_setup": { - "env": "sim_auth", - "cleanup_before_run": false, - "pools": [ - { - "prefix": "auth_user", - "proxy": "sslocal-rust-1090:1090", - "count": 2 - } - ] - } -}""", - type="string", - title="[V2 Profiles] Create Profiles JSON", - description="For action `create_profiles`. A JSON payload defining the profiles to create. This is passed to `yt-ops-client setup-profiles`.", - **{'ui_widget': 'json', 'multi_line': True} - ), - "server_identity": Param( - None, - type=["null", "string"], - description="[DEPRECATED] The server identity for proxy management.", - ), - "proxy_url": Param( - None, - type=["null", "string"], - description="[DEPRECATED] The proxy URL to act upon.", - ), - "redis_conn_id": Param( - DEFAULT_REDIS_CONN_ID, - type="string", - title="Redis Connection ID", - description="The Airflow connection ID for the Redis server.", - ), - }, -) as dag: - - @task.branch(task_id="branch_on_action") - def branch_on_action(**context): - action = context["params"]["action"] - if action == "create_profiles": - return "create_profiles_task" - return "system_management_task" - - create_profiles_task = PythonOperator( - task_id="create_profiles_task", - python_callable=_create_profiles_from_json, - ) - - system_management_task = PythonOperator( - task_id="system_management_task", - python_callable=manage_system_callable, - ) - - branch_on_action() >> [create_profiles_task, system_management_task] diff --git a/airflow/dags/ytdlp_mgmt_queues.py b/airflow/dags/ytdlp_mgmt_queues.py deleted file mode 100644 index 5c979c5..0000000 --- a/airflow/dags/ytdlp_mgmt_queues.py +++ /dev/null @@ -1,979 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Airflow DAG for manually adding YouTube URLs or Video IDs to a Redis queue. -""" - -from __future__ import annotations - -import json -import logging -import re -from typing import List, Optional -import csv -import os -from datetime import datetime - -from airflow.exceptions import AirflowException -from airflow.models.dag import DAG -from airflow.models.dagrun import DagRun -from airflow.models.param import Param -from airflow.models.taskinstance import TaskInstance -from airflow.operators.python import PythonOperator, BranchPythonOperator -from airflow.operators.empty import EmptyOperator -from airflow.operators.bash import BashOperator -from airflow.providers.celery.executors.celery_executor import app as celery_app -from airflow.providers.redis.hooks.redis import RedisHook -from airflow.utils.dates import days_ago -from airflow.models.variable import Variable -from airflow.utils.session import create_session -import requests - -# Configure logging -logger = logging.getLogger(__name__) - -# Default settings -DEFAULT_REDIS_CONN_ID = "redis_default" -DEFAULT_QUEUE_NAME = "video_queue" -DEFAULT_QUEUE_TO_CLEAR = 'PLEASE_SPECIFY_QUEUE_TO_CLEAR' -DEFAULT_URL_LISTS_DIR = '/opt/airflow/inputfiles' - - -# --- Helper Functions --- - -def _get_redis_client(redis_conn_id: str): - """Gets a Redis client from an Airflow connection.""" - try: - redis_hook = RedisHook(redis_conn_id=redis_conn_id) - return redis_hook.get_conn() - except Exception as e: - logger.error(f"Failed to connect to Redis using connection '{redis_conn_id}': {e}") - raise AirflowException(f"Redis connection failed: {e}") - - -def _get_predefined_url_lists(): - """Returns a static list of predefined URL list files.""" - # This is a static list to ensure options are always visible in the UI, - # even if the files don't exist on the filesystem at parse time. - # The DAG will check for the file's existence at runtime. - predefined_files = [ - 'urls.dh128.json', - 'urls.ixbt2045.json', - 'urls.news1000.json', - 'urls.rt100.json', - 'urls.rt250_01.txt', - 'urls.rt250_02.txt', - 'urls.rt250_03.txt', - 'urls.rt250_04.txt', - 'urls.rt250_05.txt', - 'urls.rt250_06.txt', - 'urls.rt250_07.txt', - 'urls.rt250_08.txt', - 'urls.rt250_11.txt', - 'urls.rt250_12.txt', - 'urls.rt250_13.txt', - 'urls.rt250_14.txt', - 'urls.rt250_15.txt', - 'urls.rt250_16.txt', - 'urls.rt250_17.txt', - 'urls.rt250_18.txt', - 'urls.rt3700.txt', - 'urls.sky28.json', - 'urls.sky3.json', - 'urls.tq46.json', - ] - return ['None'] + sorted(predefined_files) - - -def _get_urls_from_source(**params) -> List[str]: - """ - Determines the source of video inputs based on the 'input_source' param and returns a list of raw items. - """ - input_source = params.get("input_source", "manual") - predefined_list = params.get("predefined_url_list") - file_path_or_url = params.get("url_list_file_path") - manual_inputs = params.get("video_inputs") - - # Source 1: Predefined file - if input_source == 'predefined_file': - if not predefined_list or predefined_list == 'None': - raise AirflowException("Input source is 'predefined_file', but no file was selected from the list.") - - default_path = DEFAULT_URL_LISTS_DIR - url_lists_dir = Variable.get('YTDLP_URL_LISTS_DIR', default_var=default_path) - file_path = os.path.join(url_lists_dir, predefined_list) - logger.info(f"Loading URLs from predefined file: {file_path}") - if not os.path.exists(file_path): - raise AirflowException(f"Selected predefined file does not exist: {file_path}") - - with open(file_path, 'r', encoding='utf-8') as f: - if predefined_list.lower().endswith('.json'): - logger.info(f"Parsing '{predefined_list}' as a JSON file.") - try: - data = json.load(f) - if not isinstance(data, list): - raise AirflowException(f"JSON file '{predefined_list}' must contain a list of strings.") - return [str(item) for item in data] - except json.JSONDecodeError: - raise AirflowException(f"Failed to parse JSON from file: {predefined_list}") - elif predefined_list.lower().endswith('.txt'): - logger.info(f"Parsing '{predefined_list}' as a text file (one URL per line).") - return [line.strip() for line in f if line.strip()] - else: - raise AirflowException(f"Unsupported file type for predefined file: '{predefined_list}'. Must be .json or .txt.") - - # Source 2: File path or URL - elif input_source == 'file_path_or_url': - if not file_path_or_url: - raise AirflowException("Input source is 'file_path_or_url', but no path/URL was provided.") - - logger.info(f"Loading URLs from provided path/URL: {file_path_or_url}") - content = "" - if file_path_or_url.startswith(('http://', 'https://')): - try: - response = requests.get(file_path_or_url, timeout=30) - response.raise_for_status() - content = response.text - except requests.RequestException as e: - raise AirflowException(f"Failed to fetch URL list from '{file_path_or_url}': {e}") - else: # Assume local file path - if not os.path.exists(file_path_or_url): - raise AirflowException(f"Provided file path does not exist: {file_path_or_url}") - with open(file_path_or_url, 'r', encoding='utf-8') as f: - content = f.read() - - try: - data = json.loads(content) - if not isinstance(data, list): - raise AirflowException("JSON content from path/URL must contain a list of strings.") - return [str(item) for item in data] - except json.JSONDecodeError: - raise AirflowException(f"Failed to parse JSON from path/URL: {file_path_or_url}") - - # Source 3: Manual input - elif input_source == 'manual': - if not manual_inputs: - logger.info("Input source is 'manual', but no inputs were provided. Nothing to do.") - return [] - logger.info("Loading URLs from manual input.") - return parse_video_inputs(manual_inputs) - - else: - logger.warning(f"No valid input source selected or no data provided for the selected source. Nothing to do.") - return [] - - -def parse_video_inputs(input_str: str) -> List[str]: - """Parses a flexible string of video inputs into a list of individual items.""" - if not input_str or not isinstance(input_str, str): - return [] - - input_str = input_str.strip() - - # 1. Try to parse as a JSON array - if input_str.startswith("[") and input_str.endswith("]"): - try: - items = json.loads(input_str) - if isinstance(items, list): - logger.info("Successfully parsed input as a JSON array.") - return [str(item).strip() for item in items] - except json.JSONDecodeError: - logger.warning("Input looked like a JSON array but failed to parse. Treating as a comma-separated string.") - - # 2. Treat as a comma-separated string - items = [item.strip() for item in input_str.split(",")] - - # 3. Clean up quotes and extra whitespace from each item - cleaned_items = [] - for item in items: - if item.startswith(('"', "'")) and item.endswith(('"', "'")): - item = item[1:-1] - if item: # Only add non-empty items - cleaned_items.append(item.strip()) - - return cleaned_items - - -def normalize_to_url(item: str) -> Optional[str]: - """ - Validates if an item is a recognizable YouTube URL or video ID, - and normalizes it to a standard watch URL format. - """ - if not item: - return None - - # Regex for a standard 11-character YouTube video ID - video_id_pattern = r"^[a-zA-Z0-9_-]{11}$" - - # Check if the item itself is a video ID - if re.match(video_id_pattern, item): - video_id = item - return f"https://www.youtube.com/watch?v={video_id}" - - # Comprehensive regex to extract video ID from various URL formats - # Covers: watch, youtu.be, shorts, embed, /v/ - url_patterns = [ - r"(?:v=|\/v\/|youtu\.be\/|embed\/|shorts\/)([a-zA-Z0-9_-]{11})" - ] - for pattern in url_patterns: - match = re.search(pattern, item) - if match: - video_id = match.group(1) - return f"https://www.youtube.com/watch?v={video_id}" - - logger.warning(f"Could not recognize '{item}' as a valid YouTube URL or video ID.") - return None - - -def dump_redis_data_to_csv(redis_client, dump_dir, patterns): - """Dumps data from Redis keys matching patterns to separate CSV files in a timestamped directory.""" - timestamp_dir = datetime.now().strftime('%Y%m%d_%H%M%S') - full_dump_path = os.path.join(dump_dir, timestamp_dir) - - os.makedirs(full_dump_path, exist_ok=True) - logger.info(f"Created dump directory: {full_dump_path}") - - for pattern in patterns: - if not pattern: continue - - # Sanitize pattern for filename - sanitized_pattern = re.sub(r'[^a-zA-Z0-9_-]', '_', pattern) - timestamp_file = datetime.now().strftime('%Y%m%d') - dump_file_name = f'redis_dump_{sanitized_pattern}_{timestamp_file}.csv' - dump_file_path = os.path.join(full_dump_path, dump_file_name) - - logger.info(f"Dumping keys matching '{pattern}' to {dump_file_path}") - - try: - with open(dump_file_path, 'w', newline='', encoding='utf-8') as csvfile: - writer = csv.writer(csvfile) - writer.writerow(['key', 'type', 'field_or_index', 'value']) - - keys_found = 0 - for key_bytes in redis_client.scan_iter(pattern): - key = key_bytes.decode('utf-8') - keys_found += 1 - key_type = redis_client.type(key).decode('utf-8') - - if key_type == 'hash': - for field, value in redis_client.hgetall(key).items(): - writer.writerow([key, key_type, field.decode('utf-8'), value.decode('utf-8')]) - elif key_type == 'list': - for index, value in enumerate(redis_client.lrange(key, 0, -1)): - writer.writerow([key, key_type, index, value.decode('utf-8')]) - elif key_type == 'set': - for member in redis_client.smembers(key): - writer.writerow([key, key_type, None, member.decode('utf-8')]) - elif key_type == 'string': - value = redis_client.get(key) - if value: - writer.writerow([key, key_type, None, value.decode('utf-8')]) - - if keys_found > 0: - logger.info(f"Successfully dumped {keys_found} keys for pattern '{pattern}' to {dump_file_path}") - else: - logger.info(f"No keys found for pattern '{pattern}'. Empty CSV file created at {dump_file_path}") - - except Exception as e: - logger.error(f"Failed to dump Redis data for pattern '{pattern}': {e}", exc_info=True) - raise AirflowException(f"Failed to dump Redis data for pattern '{pattern}': {e}") - - -def clear_queue_callable(**context): - """ - Dumps Redis data to CSV and/or clears specified Redis keys based on selection. - The `_skipped` queue is for videos that are unavailable due to external reasons (e.g., private, removed). - """ - params = context['params'] - ti = context['task_instance'] - logger.info(f"Task '{ti.task_id}' running on queue '{ti.queue}'.") - redis_conn_id = params['redis_conn_id'] - - queue_system = params.get('queue_system', 'v1_monolithic') - queue_base_names_to_clear = [] - if queue_system == 'v1_monolithic': - queue_base_names_to_clear.append(params['queue_base_name']) - elif queue_system.startswith('v2_'): - # For v2, clear both auth and dl queues for a complete clear. - queue_base_names_to_clear.extend(['queue2_auth', 'queue2_dl']) - else: - raise ValueError(f"Invalid queue_system: {queue_system}") - logger.info(f"Operating on queue system '{queue_system}' with base names: {queue_base_names_to_clear}.") - - queues_to_clear_options = params.get('queues_to_clear_options', []) - confirm_clear = params.get('confirm_clear', False) - dump_queues = params['dump_queues'] - dump_dir = context['templates_dict']['dump_dir'] - dump_patterns = params['dump_patterns'].split(',') if params.get('dump_patterns') else [] - - if not confirm_clear: - message = "Action is 'clear_queue', but 'Confirm Deletion' was not checked. Aborting to prevent accidental data loss." - logger.error(message) - raise AirflowException(message) - - # If no queues are selected, default to clearing all of them. - if not queues_to_clear_options: - logger.warning("No specific queues selected to clear. Defaulting to '_all'.") - queues_to_clear_options = ['_all'] - - redis_client = _get_redis_client(redis_conn_id) - - if dump_queues and dump_patterns: - logger.info("Dumping is enabled. Performing dump before clearing.") - dump_redis_data_to_csv(redis_client, dump_dir, dump_patterns) - - all_suffixes = ['_inbox', '_fail', '_result', '_progress', '_skipped'] - special_queues = ['queue_dl_format_tasks'] - keys_to_delete = set() - - # Handle special queues first - for q in special_queues: - if q in queues_to_clear_options: - keys_to_delete.add(q) - - for queue_base_name in queue_base_names_to_clear: - if '_all' in queues_to_clear_options: - logger.info(f"'_all' option selected. Clearing all standard queues for base '{queue_base_name}'.") - for suffix in all_suffixes: - keys_to_delete.add(f"{queue_base_name}{suffix}") - else: - for suffix in queues_to_clear_options: - if suffix in all_suffixes: - keys_to_delete.add(f"{queue_base_name}{suffix}") - - if not keys_to_delete: - logger.warning("No valid queue suffixes were selected. Nothing to delete.") - return - - logger.info(f"Attempting to clear {len(keys_to_delete)} Redis key(s): {sorted(list(keys_to_delete))}") - try: - deleted_count = redis_client.delete(*keys_to_delete) - logger.info(f"Successfully sent delete command for {len(keys_to_delete)} key(s). Redis reported {deleted_count} deleted.") - except Exception as e: - logger.error(f"Failed to clear Redis keys: {e}", exc_info=True) - raise AirflowException(f"Failed to clear Redis keys: {e}") - - -def list_contents_callable(**context): - """Lists the contents of the specified Redis key(s) (list or hash).""" - params = context['params'] - ti = context['task_instance'] - logger.info(f"Task '{ti.task_id}' running on queue '{ti.queue}'.") - redis_conn_id = params['redis_conn_id'] - queues_to_list_str = params.get('queue_to_list') - max_items = params.get('max_items', 10) - - if not queues_to_list_str: - raise ValueError("Parameter 'queue_to_list' cannot be empty.") - - queues_to_list = [q.strip() for q in queues_to_list_str.split(',') if q.strip()] - - if not queues_to_list: - logger.info("No valid queue names provided in 'queue_to_list'. Nothing to do.") - return - - logger.info(f"Attempting to list contents for {len(queues_to_list)} Redis key(s): {queues_to_list}") - - redis_client = _get_redis_client(redis_conn_id) - - for queue_to_list in queues_to_list: - # Add a newline for better separation in logs - logger.info(f"\n--- Listing contents of Redis key '{queue_to_list}' (max: {max_items}) ---") - try: - key_type_bytes = redis_client.type(queue_to_list) - key_type = key_type_bytes.decode('utf-8') # Decode type - - if key_type == 'list': - list_length = redis_client.llen(queue_to_list) - items_to_fetch = min(max_items, list_length) - contents_bytes = redis_client.lrange(queue_to_list, -items_to_fetch, -1) - contents = [item.decode('utf-8') for item in contents_bytes] - contents.reverse() - logger.info(f"--- Contents of Redis List '{queue_to_list}' ---") - logger.info(f"Total items in list: {list_length}") - if contents: - logger.info(f"Showing most recent {len(contents)} item(s):") - for i, item in enumerate(contents): - logger.info(f" [recent_{i}]: {item}") - if list_length > len(contents): - logger.info(f" ... ({list_length - len(contents)} older items not shown)") - logger.info(f"--- End of List Contents ---") - - elif key_type == 'hash': - hash_size = redis_client.hlen(queue_to_list) - if hash_size > max_items * 2: - logger.warning(f"Hash '{queue_to_list}' has {hash_size} fields, which is large. Listing might be slow or incomplete. Consider using redis-cli HSCAN.") - contents_bytes = redis_client.hgetall(queue_to_list) - contents = {k.decode('utf-8'): v.decode('utf-8') for k, v in contents_bytes.items()} - logger.info(f"--- Contents of Redis Hash '{queue_to_list}' ---") - logger.info(f"Total fields in hash: {hash_size}") - if contents: - logger.info(f"Showing up to {max_items} item(s):") - item_count = 0 - for key, value in contents.items(): - if item_count >= max_items: - logger.info(f" ... (stopped listing after {max_items} items of {hash_size})") - break - try: - parsed_value = json.loads(value) - pretty_value = json.dumps(parsed_value, indent=2) - logger.info(f" '{key}':\n{pretty_value}") - except json.JSONDecodeError: - logger.info(f" '{key}': {value}") - item_count += 1 - logger.info(f"--- End of Hash Contents ---") - - elif key_type == 'none': - logger.info(f"Redis key '{queue_to_list}' does not exist.") - else: - logger.info(f"Redis key '{queue_to_list}' is of type '{key_type}'. Listing contents for this type is not implemented.") - - except Exception as e: - logger.error(f"Failed to list contents of Redis key '{queue_to_list}': {e}", exc_info=True) - # Continue to the next key in the list instead of failing the whole task - - -def check_status_callable(**context): - """ - Checks the status (type and size) of all standard Redis queues for a given base name. - The `_skipped` queue is for videos that are unavailable due to external reasons (e.g., private, removed). - """ - params = context['params'] - ti = context['task_instance'] - logger.info(f"Task '{ti.task_id}' running on queue '{ti.queue}'.") - redis_conn_id = params['redis_conn_id'] - queue_system = params.get('queue_system', 'v1_monolithic') - - queue_base_names_to_check = [] - if queue_system == 'v1_monolithic': - queue_base_names_to_check.append(params.get('queue_base_name', DEFAULT_QUEUE_NAME)) - elif queue_system.startswith('v2_'): - # For v2, always check both auth and dl queues for a complete picture. - queue_base_names_to_check.extend(['queue2_auth', 'queue2_dl']) - else: - raise ValueError(f"Invalid queue_system: {queue_system}") - - queue_suffixes = ['_inbox', '_progress', '_result', '_fail', '_skipped'] - special_queues = ['queue_dl_format_tasks'] - - logger.info(f"--- Checking Status for Queue System: '{queue_system}' ---") - - try: - redis_client = _get_redis_client(redis_conn_id) - - for queue_name in queue_base_names_to_check: - logger.info(f"--- Base Name: '{queue_name}' ---") - for suffix in queue_suffixes: - queue_to_check = f"{queue_name}{suffix}" - key_type = redis_client.type(queue_to_check).decode('utf-8') - size = 0 - if key_type == 'list': - size = redis_client.llen(queue_to_check) - elif key_type == 'hash': - size = redis_client.hlen(queue_to_check) - - if key_type != 'none': - logger.info(f" - Queue '{queue_to_check}': Type='{key_type.upper()}', Size={size}") - else: - logger.info(f" - Queue '{queue_to_check}': Does not exist.") - - logger.info(f"--- Special Queues ---") - for queue_name in special_queues: - key_type = redis_client.type(queue_name).decode('utf-8') - size = 0 - if key_type == 'list': - size = redis_client.llen(queue_name) - - if key_type != 'none': - logger.info(f" - Queue '{queue_name}': Type='{key_type.upper()}', Size={size}") - else: - logger.info(f" - Queue '{queue_name}': Does not exist.") - - logger.info(f"--- End of Status Check ---") - - except Exception as e: - logger.error(f"Failed to check queue status for system '{queue_system}': {e}", exc_info=True) - raise AirflowException(f"Failed to check queue status: {e}") - - -def requeue_failed_callable(**context): - """ - Copies all URLs from the fail hash to the inbox list and optionally clears the fail hash. - Adapts behavior for v1 and v2 queue systems. - """ - params = context['params'] - ti = context['task_instance'] - logger.info(f"Task '{ti.task_id}' running on queue '{ti.queue}'.") - redis_conn_id = params['redis_conn_id'] - clear_fail_queue = params['clear_fail_queue_after_requeue'] - queue_system = params.get('queue_system', 'v1_monolithic') - - fail_queue_name = "" - inbox_queue_name = "" - - if queue_system == 'v1_monolithic': - queue_name = params['queue_base_name'] - fail_queue_name = f"{queue_name}_fail" - inbox_queue_name = f"{queue_name}_inbox" - elif queue_system == 'v2_separated_auth': - fail_queue_name = "queue2_auth_fail" - inbox_queue_name = "queue2_auth_inbox" - elif queue_system == 'v2_separated_dl': - fail_queue_name = "queue2_dl_fail" - # DL failures must be re-authenticated, so they go back to the auth inbox. - inbox_queue_name = "queue2_auth_inbox" - else: - raise ValueError(f"Invalid queue_system: {queue_system}") - - logger.info(f"Requeuing failed URLs from '{fail_queue_name}' to '{inbox_queue_name}' (system: {queue_system}).") - - redis_client = _get_redis_client(redis_conn_id) - - try: - # The fail queue is a hash. The keys are the URLs. - failed_urls_bytes = redis_client.hkeys(fail_queue_name) - if not failed_urls_bytes: - logger.info(f"Fail queue '{fail_queue_name}' is empty. Nothing to requeue.") - return - - failed_urls = [url.decode('utf-8') for url in failed_urls_bytes] - logger.info(f"Found {len(failed_urls)} URLs to requeue:") - for url in failed_urls: - logger.info(f" - {url}") - - # Add URLs to the inbox list - if failed_urls: - with redis_client.pipeline() as pipe: - pipe.rpush(inbox_queue_name, *failed_urls) - if clear_fail_queue: - pipe.delete(fail_queue_name) - pipe.execute() - - final_list_length = redis_client.llen(inbox_queue_name) - success_message = ( - f"Successfully requeued {len(failed_urls)} URLs to '{inbox_queue_name}'. " - f"The list now contains {final_list_length} items." - ) - logger.info(success_message) - - if clear_fail_queue: - logger.info(f"Successfully cleared fail queue '{fail_queue_name}'.") - else: - logger.info(f"Fail queue '{fail_queue_name}' was not cleared as per configuration.") - - except Exception as e: - logger.error(f"Failed to requeue failed URLs: {e}", exc_info=True) - raise AirflowException(f"Failed to requeue failed URLs: {e}") - - -def purge_celery_queue_callable(**context): - """ - Purges messages from the specified Celery queues using the Airflow Celery app. - This is more reliable than shelling out to `celery purge` as it uses the same - app context and broker connection as the workers. - """ - params = context['params'] - if not params.get('confirm_purge'): - raise AirflowException("'Confirm Purge' is not checked. Aborting to prevent accidental data loss.") - - queues_to_purge_str = params.get('celery_queue_to_purge') - if not queues_to_purge_str: - raise AirflowException("No Celery queues specified to purge.") - - queues = [q.strip() for q in queues_to_purge_str.split(',') if q.strip()] - - logger.info(f"Attempting to purge {len(queues)} Celery queue(s): {queues}") - logger.info(f"Using broker: {celery_app.conf.broker_url}") - - purged_counts = {} - with celery_app.connection_for_read() as conn: - with conn.channel() as channel: - for queue in queues: - try: - message_count = channel.queue_purge(queue) - purged_counts[queue] = message_count - logger.info(f"Purged {message_count} messages from queue '{queue}'.") - except Exception as e: - # This can happen if the queue doesn't exist on the broker. - # kombu might raise an operational error. - logger.error(f"Failed to purge queue '{queue}': {e}", exc_info=True) - purged_counts[queue] = f"ERROR: {e}" - - logger.info("--- Celery Purge Summary ---") - for queue, result in purged_counts.items(): - logger.info(f" - {queue}: {result}") - logger.info("--- Purge complete. ---") - - -def clear_dag_runs_callable(**context): - """ - Deletes DAG run history and associated task instances from the database. - """ - params = context['params'] - dag_id = params.get("dag_id_to_manage") - clear_scope = params.get("clear_scope") - - log_target = f"DAG '{dag_id}'" if dag_id != "ALL_DAGS" else "ALL DAGS (except ytdlp_mgmt_queues)" - logger.info(f"Attempting to delete DagRuns for {log_target} with scope '{clear_scope}'.") - - with create_session() as session: - dag_run_query = session.query(DagRun) - if dag_id == "ALL_DAGS": - dag_run_query = dag_run_query.filter(DagRun.dag_id != 'ytdlp_mgmt_queues') - else: - dag_run_query = dag_run_query.filter(DagRun.dag_id == dag_id) - - if clear_scope == "last_run": - if dag_id == "ALL_DAGS": - raise AirflowException("Cannot clear 'last_run' for ALL_DAGS. Please select a specific DAG.") - - last_run = dag_run_query.order_by(DagRun.execution_date.desc()).first() - if not last_run: - logger.info(f"No runs found for DAG '{dag_id}'. Nothing to delete.") - print(f"\nNo runs found for DAG '{dag_id}'.\n") - return - - logger.warning(f"Deleting last DagRun for DAG '{dag_id}' (run_id: {last_run.run_id}, execution_date: {last_run.execution_date}). This will also delete its task instances.") - session.delete(last_run) - deleted_count = 1 - else: # all_runs - logger.warning(f"Deleting ALL DagRuns and associated TaskInstances for {log_target}. This will remove all history from the UI.") - - ti_query = session.query(TaskInstance) - if dag_id == "ALL_DAGS": - ti_query = ti_query.filter(TaskInstance.dag_id != 'ytdlp_mgmt_queues') - else: - ti_query = ti_query.filter(TaskInstance.dag_id == dag_id) - - ti_deleted_count = ti_query.delete(synchronize_session=False) - logger.info(f"Deleted {ti_deleted_count} TaskInstance records for {log_target}.") - - deleted_count = dag_run_query.delete(synchronize_session=False) - - # The session is committed automatically by the `with create_session()` context manager. - logger.info(f"Successfully deleted {deleted_count} DagRun(s) for {log_target}.") - print(f"\nSuccessfully deleted {deleted_count} DagRun(s) for {log_target}.\n") - - -def add_videos_to_queue_callable(**context): - """ - Parses video inputs from manual text, a predefined file, or a file path/URL, - normalizes them to URLs, and adds them to a Redis queue. - """ - params = context["params"] - ti = context['task_instance'] - logger.info(f"Task '{ti.task_id}' running on queue '{ti.queue}'.") - - queue_system = params.get('queue_system', 'v1_monolithic') - if queue_system.startswith('v2_'): - # For v2 systems, raw URLs are always added to the auth queue. - queue_name = 'queue2_auth' - logger.info(f"Queue system is '{queue_system}'. Adding URLs to '{queue_name}_inbox'.") - else: - queue_name = params["queue_base_name"] - - redis_conn_id = params["redis_conn_id"] - dry_run = params["dry_run"] - - # This function will get the list of strings from the correct source based on precedence - raw_items = _get_urls_from_source(**params) - - if not raw_items: - logger.info("No video inputs found from any source. Nothing to do.") - return - - valid_urls = [] - for item in raw_items: - url = normalize_to_url(item) - if url and url not in valid_urls: - valid_urls.append(url) - elif not url: - logger.warning(f"Skipping invalid input item: '{item}'") - - if not valid_urls: - raise AirflowException("No valid YouTube URLs or IDs were found in the provided input.") - - logger.info(f"Found {len(valid_urls)} valid and unique URLs to add to the queue:") - for url in valid_urls: - logger.info(f" - {url}") - - if dry_run: - logger.info("Dry run is enabled. Skipping Redis operation.") - print(f"\n[DRY RUN] Would have added {len(valid_urls)} URLs to the Redis list '{queue_name}_inbox'.") - return - - # --- Add to Redis --- - try: - redis_client = _get_redis_client(redis_conn_id) - inbox_queue = f"{queue_name}_inbox" - - # Use a pipeline for atomic and efficient addition - with redis_client.pipeline() as pipe: - for url in valid_urls: - pipe.rpush(inbox_queue, url) - pipe.execute() - - final_list_length = redis_client.llen(inbox_queue) - - success_message = ( - f"Successfully added {len(valid_urls)} URLs to Redis list '{inbox_queue}'. " - f"The list now contains {final_list_length} items." - ) - logger.info(success_message) - - except Exception as e: - logger.error(f"Failed to add URLs to Redis queue '{inbox_queue}': {e}", exc_info=True) - raise AirflowException(f"Failed to add URLs to Redis: {e}") - - -# --- DAG Definition --- -with DAG( - dag_id="ytdlp_mgmt_queues", - default_args={ - "owner": "airflow", - "start_date": days_ago(1), - "retries": 0, - "queue": "queue-mgmt", - }, - schedule=None, - catchup=False, - tags=["ytdlp", "mgmt", "master"], - doc_md=""" - ### YT-DLP Queue Management - - This DAG provides a set of tools to manage Redis queues used by the YTDLP processing pipeline. - Select an `action` to perform when triggering the DAG. - - **Actions:** - - `add_videos`: Add one or more YouTube videos to a queue. You can provide input manually, select a predefined file from the server, or provide a path/URL to a file. - - `clear_queue`: Dump and/or delete a specific Redis key. - - `list_contents`: View the contents of a Redis key (list or hash). - - `check_status`: Check the overall status of the queues. - - `requeue_failed`: Copy all URLs from the `_fail` hash to the `_inbox` list and clear the `_fail` hash. - - `purge_celery_queue`: **(Destructive)** Removes all tasks from a specified Celery worker queue (e.g., `queue-dl`). This is useful for clearing out a backlog of tasks that were queued before a dispatcher was paused. - - `clear_dag_runs`: **(Destructive)** Deletes DAG run history and associated task instances from the database, removing them from the UI. - """, - params={ - "action": Param( - "list_contents", - type="string", - enum=["add_videos", "clear_queue", "list_contents", "check_status", "requeue_failed", "inspect_celery_cluster", "purge_celery_queue", "clear_dag_runs"], - title="Action", - description="The management action to perform.", - ), - "queue_system": Param( - "v1_monolithic", - type="string", - enum=["v1_monolithic", "v2_separated_auth", "v2_separated_dl"], - title="Queue System", - description="Select the target queue system to manage. This choice affects which queues are targeted by actions.", - ), - "queue_base_name": Param( - DEFAULT_QUEUE_NAME, - type="string", - title="Queue Base Name (v1 only)", - description="Base name for queues. Only used when 'Queue System' is 'v1_monolithic'.", - ), - # --- Params for 'add_videos' --- - "input_source": Param( - "predefined_file", - type="string", - enum=["manual", "predefined_file", "file_path_or_url"], - title="[add_videos] Video Input Source", - description="Choose how to provide the video URLs. This choice determines which of the following parameters is used.", - ), - "video_inputs": Param( - None, - type=["null", "string"], - title="[add_videos] 1. Manual Input", - description="Used if 'Input Source' is 'manual'. Paste a single item, a comma-separated list, or a JSON array of YouTube URLs or Video IDs.", - ), - "predefined_url_list": Param( - "None", - type="string", - enum=_get_predefined_url_lists(), - title="[add_videos] 2. Predefined File", - description=( - "Used if 'Input Source' is 'predefined_file'. Select a JSON file from the server's URL list directory " - f"(defined by Airflow Variable 'YTDLP_URL_LISTS_DIR', defaults to '{DEFAULT_URL_LISTS_DIR}')." - ), - ), - "url_list_file_path": Param( - None, - type=["null", "string"], - title="[add_videos] 3. File Path or URL", - description="Used if 'Input Source' is 'file_path_or_url'. Enter a local file path (on the Airflow worker) or a remote URL to a JSON file containing a list of URLs/IDs.", - ), - "dry_run": Param( - False, - type="boolean", - title="[add_videos] Dry Run", - description="If True, validate inputs without adding them to the queue.", - ), - # --- Params for 'clear_queue' --- - "queues_to_clear_options": Param( - None, - type=["null", "array"], - title="[clear_queue] Queues to Clear", - description="Select which standard queues to clear. '_all' clears all standard queues. 'queue_dl_format_tasks' is the new granular download task queue.", - items={ - "type": "string", - "enum": ["_inbox", "_fail", "_result", "_progress", "_skipped", "_all", "queue_dl_format_tasks"], - } - ), - "confirm_clear": Param( - False, - type="boolean", - title="[clear_queue] Confirm Deletion", - description="Must be set to True to execute the 'clear_queue' action. This is a destructive operation.", - ), - "dump_queues": Param( - True, - type="boolean", - title="[clear_queue] Dump Data", - description="If True, dump data before clearing.", - ), - "dump_dir": Param( - None, - type=["null", "string"], - title="[clear_queue] Dump Directory", - description="Base directory to save CSV dump files. Supports Jinja. If empty, defaults to Airflow variable 'YTDLP_REDIS_DUMP_DIR' or '/opt/airflow/dumps'.", - ), - "dump_patterns": Param( - 'ytdlp:*,video_queue_*', - type="string", - title="[clear_queue] Dump Patterns", - description="Comma-separated list of key patterns to dump.", - ), - # --- Params for 'list_contents' --- - "queue_to_list": Param( - 'queue2_auth_inbox,queue_dl_format_tasks,queue2_dl_inbox', - type="string", - title="[list_contents] Queues to List", - description="Comma-separated list of exact Redis key names to list.", - ), - "max_items": Param( - 10, - type="integer", - title="[list_contents] Max Items to List", - description="Maximum number of items to show.", - ), - # --- Params for 'requeue_failed' --- - "clear_fail_queue_after_requeue": Param( - True, - type="boolean", - title="[requeue_failed] Clear Fail Queue", - description="If True, deletes the `_fail` hash after requeueing items.", - ), - # --- Params for 'purge_celery_queue' --- - "celery_queue_to_purge": Param( - "queue-dl,queue-auth", - type="string", - title="[purge_celery_queue] Celery Queues to Purge", - description="Comma-separated list of Celery queue names to purge from the broker. This is a destructive action.", - ), - "confirm_purge": Param( - False, - type="boolean", - title="[purge_celery_queue] Confirm Purge", - description="Must be set to True to execute the 'purge_celery_queue' action. This is a destructive operation that removes all tasks from the specified Celery queue(s).", - ), - # --- Params for 'clear_dag_runs' --- - "dag_id_to_manage": Param( - "ALL_DAGS", - type="string", - enum=["ALL_DAGS", "ytdlp_ops_v01_orchestrator", "ytdlp_ops_v01_dispatcher", "ytdlp_ops_v01_worker_per_url", "ytdlp_ops_v02_orchestrator_auth", "ytdlp_ops_v02_dispatcher_auth", "ytdlp_ops_v02_worker_per_url_auth", "ytdlp_ops_v02_orchestrator_dl", "ytdlp_ops_v02_dispatcher_dl", "ytdlp_ops_v02_worker_per_url_dl"], - title="[clear_dag_runs] DAG ID", - description="The DAG ID to perform the action on. Select 'ALL_DAGS' to clear history for all DAGs.", - ), - "clear_scope": Param( - "all_runs", - type="string", - enum=["last_run", "all_runs"], - title="[clear_dag_runs] Clear Scope", - description="For 'clear_dag_runs' action, specifies the scope of runs to clear.", - ), - # --- Common Params --- - "redis_conn_id": Param( - DEFAULT_REDIS_CONN_ID, - type="string", - title="Redis Connection ID", - ), - }, -) as dag: - branch_on_action = BranchPythonOperator( - task_id="branch_on_action", - python_callable=lambda **context: f"action_{context['params']['action']}", - ) - - action_add_videos = PythonOperator( - task_id="action_add_videos", - python_callable=add_videos_to_queue_callable, - ) - - action_clear_queue = PythonOperator( - task_id="action_clear_queue", - python_callable=clear_queue_callable, - templates_dict={'dump_dir': "{{ params.dump_dir or var.value.get('YTDLP_REDIS_DUMP_DIR', '/opt/airflow/dumps') }}"}, - ) - - action_list_contents = PythonOperator( - task_id="action_list_contents", - python_callable=list_contents_callable, - ) - - action_check_status = PythonOperator( - task_id="action_check_status", - python_callable=check_status_callable, - ) - - action_requeue_failed = PythonOperator( - task_id="action_requeue_failed", - python_callable=requeue_failed_callable, - ) - - action_inspect_celery_cluster = BashOperator( - task_id="action_inspect_celery_cluster", - bash_command=""" - # Get the broker URL from Airflow config - BROKER_URL=$(airflow config get-value celery broker_url) - echo "--- Inspecting Celery Cluster (Broker: $BROKER_URL) ---" - - echo "" - echo "--- Active Queues (shows queues with consumers) ---" - celery -A airflow.providers.celery.executors.celery_executor.app -b "$BROKER_URL" inspect active_queues - - echo "" - echo "--- Worker Stats (shows connected workers) ---" - celery -A airflow.providers.celery.executors.celery_executor.app -b "$BROKER_URL" inspect stats - - echo "" - echo "--- Active Tasks (tasks currently running) ---" - celery -A airflow.providers.celery.executors.celery_executor.app -b "$BROKER_URL" inspect active - - echo "" - echo "--- Reserved Tasks (tasks prefetched by workers) ---" - celery -A airflow.providers.celery.executors.celery_executor.app -b "$BROKER_URL" inspect reserved - """, - ) - - action_purge_celery_queue = PythonOperator( - task_id="action_purge_celery_queue", - python_callable=purge_celery_queue_callable, - ) - - action_clear_dag_runs = PythonOperator( - task_id="action_clear_dag_runs", - python_callable=clear_dag_runs_callable, - ) - - # --- Wire up tasks --- - branch_on_action >> [ - action_add_videos, - action_clear_queue, - action_list_contents, - action_check_status, - action_requeue_failed, - action_inspect_celery_cluster, - action_purge_celery_queue, - action_clear_dag_runs, - ] diff --git a/airflow/dags/ytdlp_ops_account_maintenance.py b/airflow/dags/ytdlp_ops_account_maintenance.py deleted file mode 100644 index bf3054b..0000000 --- a/airflow/dags/ytdlp_ops_account_maintenance.py +++ /dev/null @@ -1,47 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright © 2024 rl -# -# Distributed under terms of the MIT license. - -# -*- coding: utf-8 -*- -# -# Copyright © 2024 rl -# -# Distributed under terms of the MIT license. - -""" -DEPRECATED: Maintenance DAG for managing the lifecycle of ytdlp-ops accounts. -""" -from __future__ import annotations - -from airflow.models.dag import DAG -from airflow.utils.dates import days_ago - -DEFAULT_ARGS = { - 'owner': 'airflow', - 'retries': 0, - 'queue': 'queue-mgmt', -} - -with DAG( - dag_id='ytdlp_ops_account_maintenance', - default_args=DEFAULT_ARGS, - schedule=None, # Disabled - start_date=days_ago(1), - catchup=False, - is_paused_upon_creation=True, - tags=['ytdlp', 'maintenance', 'deprecated'], - doc_md=""" - ### DEPRECATED: YT-DLP Account Maintenance - - This DAG is **DEPRECATED** and should not be used. Its functionality has been replaced - by a standalone, continuously running `policy-enforcer` service. - - To run the new enforcer, use the following command on a management node: - `bin/ytops-client policy-enforcer --policy policies/8_unified_simulation_enforcer.yaml --live` - - This DAG is paused by default and will be removed in a future version. - """, -) as dag: - pass diff --git a/airflow/dags/ytdlp_ops_v01_dispatcher.py b/airflow/dags/ytdlp_ops_v01_dispatcher.py deleted file mode 100644 index 0835603..0000000 --- a/airflow/dags/ytdlp_ops_v01_dispatcher.py +++ /dev/null @@ -1,105 +0,0 @@ -# -*- coding: utf-8 -*- -""" -DAG to dispatch work to ytdlp_ops_worker_per_url DAGs. -It pulls a URL from Redis and triggers a worker with a pinned queue. -""" - -from __future__ import annotations -import logging -import os -import socket -from datetime import timedelta - -from airflow.decorators import task -from airflow.exceptions import AirflowSkipException -from airflow.models.dag import DAG -from airflow.models.param import Param -from airflow.api.common.trigger_dag import trigger_dag -from airflow.utils.dates import days_ago - -from utils.redis_utils import _get_redis_client - -logger = logging.getLogger(__name__) - -DEFAULT_QUEUE_NAME = 'video_queue' -DEFAULT_REDIS_CONN_ID = 'redis_default' - -@task(queue='queue-dl') -def dispatch_url_to_worker(**context): - """ - Pulls one URL from Redis, determines the current worker's dedicated queue, - and triggers the main worker DAG to process the URL on that specific queue. - """ - ti = context['task_instance'] - logger.info(f"Dispatcher task '{ti.task_id}' running on queue '{ti.queue}'.") - - # --- Check for worker pause lock file --- - # This path must be consistent with the Ansible playbook. - lock_file_path = '/opt/airflow/inputfiles/AIRFLOW.PREVENT_URL_PULL.lockfile' - hostname = socket.gethostname() - if os.path.exists(lock_file_path): - logger.info(f"Worker '{hostname}' is paused. Lock file found at '{lock_file_path}'. Skipping URL pull.") - raise AirflowSkipException(f"Worker '{hostname}' is paused.") - else: - logger.info(f"Worker '{hostname}' is active (no lock file found at '{lock_file_path}'). Proceeding to pull URL.") - - params = context['params'] - redis_conn_id = params['redis_conn_id'] - queue_name = params['queue_name'] - inbox_queue = f"{queue_name}_inbox" - - logger.info(f"Attempting to pull one URL from Redis queue '{inbox_queue}'...") - client = _get_redis_client(redis_conn_id) - url_bytes = client.lpop(inbox_queue) - - if not url_bytes: - logger.info("Redis queue is empty. No work to dispatch. Skipping task.") - raise AirflowSkipException("Redis queue is empty. No work to dispatch.") - - url_to_process = url_bytes.decode('utf-8') - logger.info(f"Pulled URL '{url_to_process}' from the queue.") - - # Determine the worker-specific queue for affinity - hostname = socket.gethostname() - worker_queue = f"queue-dl-{hostname}" - logger.info(f"Running on worker '{hostname}'. Dispatching job to its dedicated queue '{worker_queue}'.") - - # The orchestrator passes all its params, which we will pass through to the worker. - # We add the specific URL and the determined worker queue to the configuration. - conf_to_pass = {**params, 'url_to_process': url_to_process, 'worker_queue': worker_queue} - - # Embed the worker queue in the run_id to avoid DB race conditions in the mutation hook. - # The hook will parse the queue name from the run_id itself. - run_id = f"worker_run_{context['dag_run'].run_id}_{context['ts_nodash']}_q_{worker_queue}" - - logger.info(f"Triggering 'ytdlp_ops_v01_worker_per_url' with run_id '{run_id}'") - trigger_dag( - dag_id='ytdlp_ops_v01_worker_per_url', - run_id=run_id, - conf=conf_to_pass, - replace_microseconds=False - ) - -with DAG( - dag_id='ytdlp_ops_v01_dispatcher', - default_args={'owner': 'airflow', 'retries': 0}, - schedule=None, # This DAG is only triggered by the orchestrator. - start_date=days_ago(1), - catchup=False, - tags=['ytdlp', 'worker', 'dispatcher'], - doc_md=""" - ### YT-DLP URL Dispatcher - - This DAG is responsible for dispatching a single URL to a worker with a pinned queue. - 1. It pulls a single URL from the Redis `_inbox` queue. - 2. It runs on the generic `queue-dl` to find any available worker. - 3. It determines the worker's hostname and constructs a dedicated queue name (e.g., `queue-dl-dl-worker-1`). - 4. It triggers the `ytdlp_ops_v01_worker_per_url` DAG, passing the URL and the dedicated queue name in the configuration. - - This dispatcher-led affinity, combined with the `task_instance_mutation_hook` cluster policy, ensures that all subsequent processing for that URL happens on the same machine. - The `ytdlp_ops_v01_orchestrator` is used to trigger a batch of these dispatcher runs. - """, - # All params are passed through from the orchestrator - render_template_as_native_obj=True, -) as dag: - dispatch_url_to_worker() diff --git a/airflow/dags/ytdlp_ops_v01_orchestrator.py b/airflow/dags/ytdlp_ops_v01_orchestrator.py deleted file mode 100644 index 9602d85..0000000 --- a/airflow/dags/ytdlp_ops_v01_orchestrator.py +++ /dev/null @@ -1,407 +0,0 @@ -# -*- coding: utf-8 -*- -# vim:fenc=utf-8 -# -# Copyright © 2024 rl -# -# Distributed under terms of the MIT license. - -""" -DAG to orchestrate ytdlp_ops_dispatcher DAG runs based on a defined policy. -It fetches URLs from a Redis queue and launches dispatchers in controlled bunches, -which in turn trigger workers with affinity. -""" - -from airflow import DAG -from airflow.exceptions import AirflowException, AirflowSkipException -from airflow.operators.python import PythonOperator -from airflow.models.param import Param -from airflow.models.variable import Variable -from airflow.utils.dates import days_ago -from airflow.api.common.trigger_dag import trigger_dag -from airflow.models.dagrun import DagRun -from airflow.models.dag import DagModel -from datetime import timedelta, datetime -import logging -import random -import time -import json - -# Import utility functions -from utils.redis_utils import _get_redis_client - -# Import Thrift modules for proxy status check -from pangramia.yt.tokens_ops import YTTokenOpService -from thrift.protocol import TBinaryProtocol -from thrift.transport import TSocket, TTransport - -# Configure logging -logger = logging.getLogger(__name__) - -# Default settings -DEFAULT_QUEUE_NAME = 'video_queue' -DEFAULT_REDIS_CONN_ID = 'redis_default' -DEFAULT_TOTAL_WORKERS = 1 -DEFAULT_WORKERS_PER_BUNCH = 1 -DEFAULT_WORKER_DELAY_S = 1 -DEFAULT_BUNCH_DELAY_S = 1 - -DEFAULT_YT_AUTH_SERVICE_IP = Variable.get("YT_AUTH_SERVICE_IP", default_var="172.17.0.1") -DEFAULT_YT_AUTH_SERVICE_PORT = Variable.get("YT_AUTH_SERVICE_PORT", default_var=9080) - -# Default ytdlp.json content for the unified config parameter -DEFAULT_YTDLP_CONFIG = { - "ytops": { - "force_renew": [], - "session_params": { - # "visitor_rotation_threshold": 250 - } - }, - "ytdlp_params": { - "debug_printtraffic": True, - "write_pages": True, - "verbose": True, - "no_color": True, - "ignoreerrors": True, - "noresizebuffer": True, - "buffersize": "4M", - "concurrent_fragments": 8, - "socket_timeout": 60, - "outtmpl": { - "default": "%(id)s.f%(format_id)s.%(ext)s" - }, - "restrictfilenames": True, - "updatetime": False, - "noplaylist": True, - "match_filter": "!is_live", - "writeinfojson": True, - "skip_download": True, - "allow_playlist_files": False, - "clean_infojson": True, - "getcomments": False, - "writesubtitles": False, - "writethumbnail": False, - "sleep_interval_requests": 0.75, - "parse_metadata": [ - ":(?P)" - ], - "extractor_args": { - "youtube": { - "player_client": ["tv_simply"], - "formats": ["duplicate"], - "jsc_trace": ["true"], - "pot_trace": ["true"], - "skip": ["translated_subs", "hls"] - }, - "youtubepot-bgutilhttp": { - "base_url": ["http://172.17.0.1:4416"] - } - }, - "noprogress": True, - "format_sort": [ - "res", - "ext:mp4:m4a" - ], - "remuxvideo": "mp4", - "nooverwrites": True, - "continuedl": True - } -} - -# --- Helper Functions --- - -def _check_application_queue(redis_client, queue_base_name: str) -> int: - """Checks and logs the length of the application's inbox queue.""" - inbox_queue_name = f"{queue_base_name}_inbox" - logger.info(f"--- Checking Application Work Queue ---") - try: - q_len = redis_client.llen(inbox_queue_name) - logger.info(f"Application work queue '{inbox_queue_name}' has {q_len} item(s).") - return q_len - except Exception as e: - logger.error(f"Failed to check application queue '{inbox_queue_name}': {e}", exc_info=True) - return -1 # Indicate an error - -def _inspect_celery_queues(redis_client, queue_names: list): - """Inspects Celery queues in Redis and logs their status.""" - logger.info("--- Inspecting Celery Queues in Redis ---") - for queue_name in queue_names: - try: - q_len = redis_client.llen(queue_name) - logger.info(f"Queue '{queue_name}': Length = {q_len}") - - if q_len > 0: - logger.info(f"Showing up to 10 tasks in '{queue_name}':") - # Fetch up to 10 items from the start of the list (queue) - items_bytes = redis_client.lrange(queue_name, 0, 9) - for i, item_bytes in enumerate(items_bytes): - try: - # Celery tasks are JSON-encoded strings - task_data = json.loads(item_bytes.decode('utf-8')) - # Pretty print for readability in logs - pretty_task_data = json.dumps(task_data, indent=2) - logger.info(f" Task {i+1}:\n{pretty_task_data}") - except (json.JSONDecodeError, UnicodeDecodeError) as e: - logger.warning(f" Task {i+1}: Could not decode/parse task data. Error: {e}. Raw: {item_bytes!r}") - except Exception as e: - logger.error(f"Failed to inspect queue '{queue_name}': {e}", exc_info=True) - logger.info("--- End of Queue Inspection ---") - - -# --- Main Orchestration Callable --- - -def orchestrate_workers_ignition_callable(**context): - """ - Main orchestration logic. Triggers a specified number of dispatcher DAGs - to initiate self-sustaining processing loops. - """ - params = context['params'] - ti = context['task_instance'] - logger.info(f"Orchestrator task '{ti.task_id}' running on queue '{ti.queue}'.") - logger.info("Starting dispatcher ignition sequence.") - - dispatcher_dag_id = 'ytdlp_ops_v01_dispatcher' - worker_queue = 'queue-dl' - app_queue_name = params['queue_name'] - - logger.info(f"Running in v1 (monolithic) mode. Dispatcher DAG: '{dispatcher_dag_id}', Worker Queue: '{worker_queue}'") - - dag_model = DagModel.get_dagmodel(dispatcher_dag_id) - if dag_model and dag_model.is_paused: - logger.warning(f"Dispatcher DAG '{dispatcher_dag_id}' is paused. Skipping dispatcher ignition.") - raise AirflowSkipException(f"Dispatcher DAG '{dispatcher_dag_id}' is paused.") - - total_workers = int(params['total_workers']) - workers_per_bunch = int(params['workers_per_bunch']) - - # --- Input Validation --- - if total_workers <= 0: - logger.warning(f"'total_workers' is {total_workers}. No workers will be started. Skipping ignition.") - raise AirflowSkipException(f"No workers to start (total_workers={total_workers}).") - - if workers_per_bunch <= 0: - logger.error(f"'workers_per_bunch' must be a positive integer, but got {workers_per_bunch}. Aborting.") - raise AirflowException(f"'workers_per_bunch' must be a positive integer, but got {workers_per_bunch}.") - # --- End Input Validation --- - - worker_delay = int(params['delay_between_workers_s']) - bunch_delay = int(params['delay_between_bunches_s']) - - # Create a list of worker numbers to trigger - worker_indices = list(range(total_workers)) - bunches = [worker_indices[i:i + workers_per_bunch] for i in range(0, len(worker_indices), workers_per_bunch)] - - # --- Inspect Queues before starting --- - try: - redis_conn_id = params.get('redis_conn_id', DEFAULT_REDIS_CONN_ID) - redis_client = _get_redis_client(redis_conn_id) - - # First, check the application queue for work - app_queue_len = _check_application_queue(redis_client, app_queue_name) - - if params.get('skip_if_queue_empty') and app_queue_len == 0: - logger.info("'skip_if_queue_empty' is True and application queue is empty. Skipping worker ignition.") - raise AirflowSkipException("Application work queue is empty.") - - # Then, inspect the target Celery queue for debugging - _inspect_celery_queues(redis_client, [worker_queue]) - except AirflowSkipException: - raise # Re-raise to let Airflow handle the skip - except Exception as e: - logger.error(f"Could not inspect queues due to an error: {e}. Continuing with ignition sequence.") - # --- End of Inspection --- - - logger.info(f"Plan: Triggering {total_workers} total dispatcher runs in {len(bunches)} bunches. Each run will attempt to process one URL.") - - dag_run_id = context['dag_run'].run_id - total_triggered = 0 - - # --- Generate a consistent timestamped prefix for this orchestrator run --- - # This ensures all workers spawned from this run use the same set of accounts. - final_account_pool_prefix = params['account_pool'] - - # --- Unified JSON Config Handling --- - # Start with the JSON config from params, then merge legacy params into it. - try: - ytdlp_config = json.loads(params.get('ytdlp_config_json', '{}')) - except json.JSONDecodeError as e: - logger.error(f"Invalid ytdlp_config_json parameter. Must be valid JSON. Error: {e}") - raise AirflowException("Invalid ytdlp_config_json parameter.") - - if params.get('prepend_client_to_account') and params.get('account_pool_size') is not None: - try: - clients_str = ','.join(ytdlp_config['ytdlp_params']['extractor_args']['youtube']['player_client']) - except KeyError: - clients_str = '' - - primary_client = clients_str.split(',')[0].strip() if clients_str else 'unknown' - timestamp = datetime.now().strftime('%Y%m%d%H%M%S') - final_account_pool_prefix = f"{params['account_pool']}_{timestamp}_{primary_client}" - logger.info(f"Generated consistent account prefix for this run: '{final_account_pool_prefix}'") - - final_ytdlp_config_str = json.dumps(ytdlp_config) - # --- End of JSON Config Handling --- - - for i, bunch in enumerate(bunches): - logger.info(f"--- Triggering Bunch {i+1}/{len(bunches)} (contains {len(bunch)} dispatcher(s)) ---") - for j, worker_index in enumerate(bunch): - # Create a unique run_id for each dispatcher run - run_id = f"dispatched_{dag_run_id}_{total_triggered}" - - # Pass all orchestrator params to the dispatcher, which will then pass them to the worker. - conf_to_pass = {p: params[p] for p in params} - # Override account_pool with the generated prefix and set the unified JSON config - conf_to_pass['account_pool'] = final_account_pool_prefix - conf_to_pass['worker_index'] = worker_index - conf_to_pass['ytdlp_config_json'] = final_ytdlp_config_str - - logger.info(f"Triggering dispatcher {j+1}/{len(bunch)} in bunch {i+1} (run {total_triggered + 1}/{total_workers}, worker_index: {worker_index}) (Run ID: {run_id})") - logger.debug(f"Full conf for dispatcher run {run_id}: {conf_to_pass}") - - trigger_dag( - dag_id=dispatcher_dag_id, - run_id=run_id, - conf=conf_to_pass, - replace_microseconds=False - ) - total_triggered += 1 - - # Delay between dispatches in a bunch - if j < len(bunch) - 1: - logger.info(f"Waiting {worker_delay}s before next dispatcher in bunch...") - time.sleep(worker_delay) - - # Delay between bunches - if i < len(bunches) - 1: - logger.info(f"--- Bunch {i+1} triggered. Waiting {bunch_delay}s before next bunch... ---") - time.sleep(bunch_delay) - - logger.info(f"--- Ignition sequence complete. Total dispatcher runs triggered: {total_triggered}. ---") - - # --- Final Queue Inspection --- - final_check_delay = 30 # seconds - logger.info(f"Waiting {final_check_delay}s for a final queue status check to see if workers picked up tasks...") - time.sleep(final_check_delay) - - try: - redis_conn_id = params.get('redis_conn_id', DEFAULT_REDIS_CONN_ID) - redis_client = _get_redis_client(redis_conn_id) - - # Log connection details for debugging broker mismatch issues - conn_kwargs = redis_client.connection_pool.connection_kwargs - logger.info(f"Final check using Redis connection '{redis_conn_id}': " - f"host={conn_kwargs.get('host')}, " - f"port={conn_kwargs.get('port')}, " - f"db={conn_kwargs.get('db')}") - - _inspect_celery_queues(redis_client, [worker_queue]) - logger.info("Final queue inspection complete. If queues are not empty, workers have not picked up tasks yet. " - "If queues are empty, workers have started processing.") - except Exception as e: - logger.error(f"Could not perform final queue inspection: {e}. This does not affect worker ignition.") - - - - -# ============================================================================= -# DAG Definition -# ============================================================================= - -default_args = { - 'owner': 'airflow', - 'depends_on_past': False, - 'email_on_failure': False, - 'email_on_retry': False, - 'retries': 1, - 'retry_delay': timedelta(minutes=1), - 'start_date': days_ago(1), -} - -with DAG( - dag_id='ytdlp_ops_v01_orchestrator', - default_args=default_args, - schedule=None, # This DAG runs only when triggered. - max_active_runs=1, # Only one ignition process should run at a time. - catchup=False, - description='Ignition system for ytdlp_ops_v01_dispatcher DAGs. Starts self-sustaining worker loops via dispatchers.', - doc_md=""" - ### YT-DLP v1 (Monolithic) Worker Ignition System - - This DAG acts as an "ignition system" to start one or more self-sustaining worker loops for the **v1 monolithic worker**. - It does **not** process URLs itself. Its only job is to trigger a specified number of `ytdlp_ops_v01_dispatcher` DAGs, - which in turn pull URLs and trigger `ytdlp_ops_v01_worker_per_url` with worker affinity. - - #### How it Works: - - 1. **Manual Trigger:** You manually trigger this DAG with parameters defining how many dispatcher loops to start (`total_workers`), in what configuration (`workers_per_bunch`, delays). - 2. **Ignition:** The orchestrator triggers the initial set of dispatcher DAGs in a "fire-and-forget" manner, passing all its configuration parameters to them. - 3. **Completion:** Once all initial dispatchers have been triggered, the orchestrator's job is complete. - - The dispatchers then take over, each pulling a URL, determining affinity, and triggering a worker DAG. - - #### Client Selection (`clients` parameter): - The `clients` parameter determines which YouTube client persona is used for token generation. Different clients have different capabilities and requirements. - - **Supported Clients:** - - | Client | Visitor ID | Player poToken | GVS poToken | Cookies Support | Notes | - | ---------------- | ------------ | -------------- | ------------ | --------------- | ------------------------------------------------------------------ | - | `tv` | Required | Not Required | Not Required | Supported | All formats may have DRM if you request too much. | - | `web_safari` | Required | Required | Required* | Supported | *Provides HLS (m3u8) formats which may not require a GVS token. | - | `mweb` | Required | Required | Required | Supported | | - | `web_camoufox` | Required | Required | Required | Supported | Camoufox variant of `web`. | - - **Untested / Not Recommended Clients:** - - | Client | Visitor ID | Player poToken | GVS poToken | Cookies Support | Notes | - | ---------------- | ------------ | -------------- | ------------ | --------------- | ------------------------------------------------------------------ | - | `web` | Required | Required | Required | Supported | Only SABR formats available. | - | `tv_simply` | Required | Not Required | Not Required | Not Supported | | - | `tv_embedded` | Required | Not Required | Not Required | Supported | Requires account cookies for most videos. | - | `web_embedded` | Required | Not Required | Not Required | Supported | Only for embeddable videos. | - | `web_music` | Required | Required | Required | Supported | | - | `web_creator` | Required | Required | Required | Supported | Requires account cookies. | - | `android` | Required | Required | Required | Not Supported | | - | `android_vr` | Required | Not Required | Not Required | Not Supported | YouTube Kids videos are not available. | - | `ios` | Required | Required | Required | Not Supported | | - - Other `_camoufox` variants are also available but untested. - """, - tags=['ytdlp', 'mgmt', 'master'], - params={ - # --- Ignition Control Parameters --- - 'total_workers': Param(DEFAULT_TOTAL_WORKERS, type="integer", description="Total number of dispatcher loops to start."), - 'workers_per_bunch': Param(DEFAULT_WORKERS_PER_BUNCH, type="integer", description="Number of dispatchers to start in each bunch."), - 'delay_between_workers_s': Param(DEFAULT_WORKER_DELAY_S, type="integer", description="Delay in seconds between starting each dispatcher within a bunch."), - 'delay_between_bunches_s': Param(DEFAULT_BUNCH_DELAY_S, type="integer", description="Delay in seconds between starting each bunch."), - 'skip_if_queue_empty': Param(False, type="boolean", title="[Ignition Control] Skip if Queue Empty", description="If True, the orchestrator will not start any dispatchers if the application's work queue is empty."), - - # --- Unified Worker Configuration --- - 'ytdlp_config_json': Param( - json.dumps(DEFAULT_YTDLP_CONFIG, indent=2), - type="string", - title="[Worker Param] Unified yt-dlp JSON Config", - description="A JSON string containing all parameters for both yt-ops-server and the yt-dlp downloaders. This is the primary way to configure workers.", - **{'ui_widget': 'json', 'multi_line': True} - ), - - # --- Worker Passthrough Parameters --- - # These are used by the orchestrator itself and are also passed to workers. - 'queue_name': Param(DEFAULT_QUEUE_NAME, type="string", description="[Worker Param] Base name for Redis queues."), - 'redis_conn_id': Param(DEFAULT_REDIS_CONN_ID, type="string", description="[Worker Param] Airflow Redis connection ID."), - 'account_pool': Param('ytdlp_account', type="string", description="[Worker Param] Account pool prefix or comma-separated list."), - 'account_pool_size': Param(10, type=["integer", "null"], description="[Worker Param] If using a prefix for 'account_pool', this specifies the number of accounts to generate (e.g., 10 for 'prefix_01' through 'prefix_10'). Required when using a prefix."), - 'prepend_client_to_account': Param(True, type="boolean", title="[Worker Param] Prepend Client to Account", description="If True, prepends client and timestamp to account names in prefix mode. Format: prefix_YYYYMMDDHHMMSS_client_XX."), - } -) as dag: - - orchestrate_task = PythonOperator( - task_id='start_worker_loops', - python_callable=orchestrate_workers_ignition_callable, - queue='queue-mgmt', - ) - orchestrate_task.doc_md = """ - ### Start Worker Loops - This is the main task that executes the ignition policy. - - It triggers `ytdlp_ops_dispatcher` DAGs according to the batch settings. - - It passes all its parameters down to the dispatchers, which will use them to trigger workers. - """ diff --git a/airflow/dags/ytdlp_ops_v01_worker_per_url.py b/airflow/dags/ytdlp_ops_v01_worker_per_url.py deleted file mode 100644 index 815b340..0000000 --- a/airflow/dags/ytdlp_ops_v01_worker_per_url.py +++ /dev/null @@ -1,2311 +0,0 @@ -# -*- coding: utf-8 -*- -# vim:fenc=utf-8 -# -# Copyright © 2024 rl -# -# Distributed under terms of the MIT license. - -""" -DAG for processing a single YouTube URL passed via DAG run configuration. -This is the "Worker" part of a Sensor/Worker pattern. -This DAG has been refactored to use the TaskFlow API to implement worker affinity, -ensuring all tasks for a single URL run on the same machine. -""" - -from __future__ import annotations - -from airflow.decorators import task, task_group -from airflow.exceptions import AirflowException, AirflowSkipException -from airflow.models import Variable -from airflow.models.dag import DAG, DagModel -from airflow.models.param import Param -from airflow.models.xcom_arg import XComArg -from airflow.operators.dummy import DummyOperator -from airflow.utils.dates import days_ago -from airflow.utils.task_group import TaskGroup -from airflow.api.common.trigger_dag import trigger_dag -from copy import copy -from datetime import datetime, timedelta -import concurrent.futures -import json -import logging -import os -import random -import re -import redis -import socket -import time -import traceback -import uuid -import shutil - -# Import utility functions and Thrift modules -from utils.redis_utils import _get_redis_client -from pangramia.yt.common.ttypes import TokenUpdateMode, AirflowLogContext -from pangramia.yt.exceptions.ttypes import PBServiceException, PBUserException -from pangramia.yt.tokens_ops import YTTokenOpService -from thrift.protocol import TBinaryProtocol -from thrift.transport import TSocket, TTransport -from thrift.transport.TTransport import TTransportException - -# Configure logging -logger = logging.getLogger(__name__) - - -# --- Client Stats Helper --- - -def _update_client_stats(redis_client, clients_str: str, status: str, url: str, machine_id: str, dag_run_id: str): - """Updates success/failure statistics for a client type in Redis.""" - if not clients_str: - logger.warning("Cannot update client stats: 'clients' string is empty.") - return - - # Assumption: The service tries clients in the order provided. - # We attribute the result to the first client in the list. - primary_client = clients_str.split(',')[0].strip() - if not primary_client: - logger.warning("Cannot update client stats: could not determine primary client.") - return - - stats_key = "client_stats" - - try: - # Using a pipeline with WATCH for safe concurrent updates. - with redis_client.pipeline() as pipe: - pipe.watch(stats_key) - - current_stats_json = redis_client.hget(stats_key, primary_client) - stats = {} - if current_stats_json: - try: - stats = json.loads(current_stats_json) - except json.JSONDecodeError: - logger.warning(f"Could not parse existing stats for client '{primary_client}'. Resetting stats.") - stats = {} - - stats.setdefault('success_count', 0) - stats.setdefault('failure_count', 0) - - details = { - 'timestamp': time.time(), 'url': url, - 'machine_id': machine_id, 'dag_run_id': dag_run_id, - } - - if status == 'success': - stats['success_count'] += 1 - stats['latest_success'] = details - elif status == 'failure': - stats['failure_count'] += 1 - stats['latest_failure'] = details - - pipe.multi() - pipe.hset(stats_key, primary_client, json.dumps(stats)) - pipe.execute() - - logger.info(f"Successfully updated '{status}' stats for client '{primary_client}'.") - - except redis.exceptions.WatchError: - logger.warning(f"WatchError updating stats for client '{primary_client}'. Another process updated it. Skipping this update.") - except Exception as e: - logger.error(f"Failed to update client stats for '{primary_client}': {e}", exc_info=True) - - -# Default settings from Airflow Variables or hardcoded fallbacks -DEFAULT_QUEUE_NAME = 'video_queue' -DEFAULT_REDIS_CONN_ID = 'redis_default' -DEFAULT_TIMEOUT = 3600 -DEFAULT_YT_AUTH_SERVICE_IP = Variable.get("YT_AUTH_SERVICE_IP", default_var="172.17.0.1") -DEFAULT_YT_AUTH_SERVICE_PORT = Variable.get("YT_AUTH_SERVICE_PORT", default_var=9080) - -# The queue is set to a fallback here. The actual worker-specific queue is -# assigned just-in-time by the task_instance_mutation_hook (see: airflow/config/custom_task_hooks.py), -# which parses the target queue from the DAG run_id. -DEFAULT_ARGS = { - 'owner': 'airflow', - 'retries': 0, - 'queue': 'queue-dl', # Fallback queue. Will be overridden by the policy hook. -} - - -# --- Helper Functions --- - -def _get_thrift_client(host, port, timeout): - """Helper to create and connect a Thrift client.""" - transport = TSocket.TSocket(host, port) - transport.setTimeout(timeout * 1000) - transport = TTransport.TFramedTransport(transport) - protocol = TBinaryProtocol.TBinaryProtocol(transport) - client = YTTokenOpService.Client(protocol) - transport.open() - logger.info(f"Connected to Thrift server at {host}:{port}") - return client, transport - -def _extract_video_id(url): - """Extracts YouTube video ID from a URL or returns the input if it's already a valid ID.""" - if not url or not isinstance(url, str): - return None - - # Check if the input is already a valid 11-character video ID - if re.fullmatch(r'[a-zA-Z0-9_-]{11}', url): - return url - - patterns = [r'v=([a-zA-Z0-9_-]{11})', r'youtu\.be/([a-zA-Z0-9_-]{11})'] - for pattern in patterns: - match = re.search(pattern, url) - if match: - return match.group(1) - return None - -def _get_account_pool(params: dict) -> list: - """ - Gets the list of accounts to use for processing, filtering out banned/resting accounts. - Supports explicit list, prefix-based generation, and single account modes. - """ - account_pool_str = params.get('account_pool', 'default_account') - accounts = [] - is_prefix_mode = False - - if ',' in account_pool_str: - accounts = [acc.strip() for acc in account_pool_str.split(',') if acc.strip()] - else: - prefix = account_pool_str - pool_size_param = params.get('account_pool_size') - if pool_size_param is not None: - is_prefix_mode = True - pool_size = int(pool_size_param) - - # The orchestrator now generates the full prefix if prepend_client_to_account is True. - # The worker just appends the numbers. - accounts = [f"{prefix}_{i:02d}" for i in range(1, pool_size + 1)] - else: - accounts = [prefix] - - if not accounts: - raise AirflowException("Initial account pool is empty.") - - redis_conn_id = params.get('redis_conn_id', DEFAULT_REDIS_CONN_ID) - try: - redis_client = _get_redis_client(redis_conn_id) - active_accounts = [] - for account in accounts: - status_bytes = redis_client.hget(f"account_status:{account}", "status") - status = status_bytes.decode('utf-8') if status_bytes else "ACTIVE" - if status not in ['BANNED'] and 'RESTING' not in status: - active_accounts.append(account) - - if not active_accounts and accounts: - auto_create = params.get('auto_create_new_accounts_on_exhaustion', False) - if auto_create and is_prefix_mode: - new_account_id = f"{account_pool_str}-auto-{str(uuid.uuid4())[:8]}" - logger.warning(f"Account pool exhausted. Auto-creating new account: '{new_account_id}'") - active_accounts.append(new_account_id) - else: - raise AirflowException("All accounts in the configured pool are currently exhausted.") - accounts = active_accounts - except Exception as e: - logger.error(f"Could not filter accounts from Redis. Using unfiltered pool. Error: {e}", exc_info=True) - - if not accounts: - raise AirflowException("Account pool is empty after filtering.") - - logger.info(f"Final active account pool with {len(accounts)} accounts.") - return accounts - -# ============================================================================= -# TASK DEFINITIONS (TaskFlow API) -# ============================================================================= - -def _get_worker_params(params: dict) -> dict: - """Loads and returns the worker_params dict from the unified JSON config.""" - try: - ytdlp_config = json.loads(params.get('ytdlp_config_json', '{}')) - return ytdlp_config.get('ytops', {}).get('worker_params', {}) - except json.JSONDecodeError: - logger.error("Could not parse ytdlp_config_json. Using empty worker_params.") - return {} - -@task -def get_url_and_assign_account(**context): - """ - Gets the URL to process from the DAG run configuration and assigns an active account. - This is the first task in the pinned-worker DAG. - """ - params = context['params'] - ti = context['task_instance'] - worker_params = _get_worker_params(params) - - # Log the active policies - auth_policy = worker_params.get('on_auth_failure', 'not_set') - download_policy = worker_params.get('on_download_failure', 'not_set') - logger.info(f"--- Worker Policies ---") - logger.info(f" Auth Failure Policy: {auth_policy}") - logger.info(f" Download Failure Policy: {download_policy}") - logger.info(f"-----------------------") - - # --- Worker Pinning Verification --- - # This is a safeguard against a known Airflow issue where clearing a task - # can cause the task_instance_mutation_hook to be skipped, breaking pinning. - # See: https://github.com/apache/airflow/issues/20143 - expected_queue = None - if ti.run_id and '_q_' in ti.run_id: - expected_queue = ti.run_id.split('_q_')[-1] - - if not expected_queue: - # Fallback to conf if run_id parsing fails for some reason - expected_queue = params.get('worker_queue') - - if expected_queue and ti.queue != expected_queue: - error_msg = ( - f"WORKER PINNING FAILURE: Task is running on queue '{ti.queue}' but was expected on '{expected_queue}'. " - "This usually happens after manually clearing a task, which is not the recommended recovery method for this DAG. " - "To recover a failed URL, let the DAG run fail, use the 'ytdlp_mgmt_queues' DAG to requeue the URL, " - "and use the 'ytdlp_ops_orchestrator' to start a new worker loop if needed." - ) - logger.error(error_msg) - raise AirflowException(error_msg) - elif expected_queue: - logger.info(f"Worker pinning verified. Task is correctly running on queue '{ti.queue}'.") - # --- End Verification --- - - # The URL is passed by the dispatcher DAG via 'url_to_process'. - # For manual runs, we fall back to 'manual_url_to_process'. - url_to_process = params.get('url_to_process') - if not url_to_process: - manual_url_input = params.get('manual_url_to_process') - if manual_url_input: - logger.info(f"Using URL from manual run parameter: '{manual_url_input}'") - if manual_url_input == 'PULL_FROM_QUEUE': - logger.info("Manual run is set to pull from queue.") - redis_conn_id = params.get('redis_conn_id', DEFAULT_REDIS_CONN_ID) - queue_name = params.get('queue_name', DEFAULT_QUEUE_NAME) - inbox_queue = f"{queue_name}_inbox" - client = _get_redis_client(redis_conn_id) - url_bytes = client.lpop(inbox_queue) - if not url_bytes: - logger.info("Redis queue is empty. No work to do. Skipping task.") - raise AirflowSkipException("Redis queue is empty. No work to do.") - url_to_process = url_bytes.decode('utf-8') - logger.info(f"Pulled URL '{url_to_process}' from queue '{inbox_queue}'.") - else: - url_to_process = manual_url_input - - if not url_to_process: - raise AirflowException("No URL to process. For manual runs, please provide a URL in the 'manual_url_to_process' parameter, or 'PULL_FROM_QUEUE'.") - logger.info(f"Received URL '{url_to_process}' to process.") - - # Mark the URL as in-progress in Redis - try: - redis_conn_id = params.get('redis_conn_id', DEFAULT_REDIS_CONN_ID) - queue_name = params.get('queue_name', DEFAULT_QUEUE_NAME) - progress_queue = f"{queue_name}_progress" - client = _get_redis_client(redis_conn_id) - - progress_data = { - 'status': 'in_progress', - 'start_time': time.time(), - 'dag_run_id': context['dag_run'].run_id, - 'hostname': socket.gethostname(), - } - client.hset(progress_queue, url_to_process, json.dumps(progress_data)) - logger.info(f"Marked URL '{url_to_process}' as in-progress.") - except Exception as e: - logger.error(f"Could not mark URL as in-progress in Redis: {e}", exc_info=True) - - # Account assignment logic - account_id = params.get('account_id') - if account_id: - logger.info(f"Using sticky account '{account_id}' passed from previous run.") - else: - account_pool = _get_account_pool(params) - worker_index = params.get('worker_index') - if worker_index is not None: - account_id = account_pool[worker_index % len(account_pool)] - logger.info(f"Selected account '{account_id}' deterministically using worker_index {worker_index}.") - else: - # Fallback to random choice if no worker_index is provided (e.g., for manual runs) - account_id = random.choice(account_pool) - logger.warning(f"No worker_index provided. Selected account '{account_id}' randomly as a fallback.") - - return { - 'url_to_process': url_to_process, - 'account_id': account_id, - 'accounts_tried': [account_id], - } - -@task -def get_token(initial_data: dict, **context): - """Makes a single attempt to get a token by calling the Thrift service directly.""" - ti = context['task_instance'] - params = context['params'] - - account_id = initial_data['account_id'] - url = initial_data['url_to_process'] - info_json_dir = os.path.join(Variable.get('DOWNLOADS_TEMP', '/opt/airflow/downloadfiles'), 'videos', 'in-progress') - - host, port = params['service_ip'], int(params['service_port']) - machine_id = params.get('machine_id') or socket.gethostname() - - # For sticky proxy - assigned_proxy_url = params.get('assigned_proxy_url') - - # The unified JSON config is now the primary source of parameters. - request_params_json = params.get('ytdlp_config_json', '{}') - clients = None # This will be read from the JSON config on the server side. - - video_id = _extract_video_id(url) - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - job_dir_name = f"{timestamp}-{video_id or 'unknown'}" - job_dir_path = os.path.join(info_json_dir, job_dir_name) - os.makedirs(job_dir_path, exist_ok=True) - info_json_path = os.path.join(job_dir_path, f"info_{video_id or 'unknown'}_{account_id}_{timestamp}.json") - - # Save the received JSON config to the job directory for the download tool. - ytdlp_config_path = os.path.join(job_dir_path, 'ytdlp.json') - try: - with open(ytdlp_config_path, 'w', encoding='utf-8') as f: - # Pretty-print the JSON for readability - config_data = json.loads(request_params_json) - json.dump(config_data, f, indent=2) - logger.info(f"Saved ytdlp config to {ytdlp_config_path}") - except (IOError, json.JSONDecodeError) as e: - logger.error(f"Failed to save ytdlp.json config: {e}") - # Continue anyway, but download may fail. - ytdlp_config_path = None - - client, transport = None, None - try: - timeout = int(params.get('timeout', DEFAULT_TIMEOUT)) - client, transport = _get_thrift_client(host, port, timeout) - - airflow_log_context = AirflowLogContext( - taskId=ti.task_id, - runId=ti.run_id, - tryNumber=ti.try_number - ) - - logger.info(f"--- Attempting to get token for URL '{url}' with account '{account_id}' (Clients: {clients}, Proxy: {assigned_proxy_url or 'any'}) ---") - - token_data = client.getOrRefreshToken( - accountId=account_id, - updateType=TokenUpdateMode.AUTO, - url=url, - clients=clients, - machineId=machine_id, - airflowLogContext=airflow_log_context, - requestParamsJson=request_params_json, - assignedProxyUrl=assigned_proxy_url - ) - - # --- Log server-side details for debugging --- - if hasattr(token_data, 'serverVersionInfo') and token_data.serverVersionInfo: - logger.info(f"--- Server Version Info ---\n{token_data.serverVersionInfo}") - - if hasattr(token_data, 'requestSummary') and token_data.requestSummary: - try: - summary_data = json.loads(token_data.requestSummary) - summary_text = summary_data.get('summary', 'Not available.') - prefetch_log = summary_data.get('prefetch_log', 'Not available.') - nodejs_log = summary_data.get('nodejs_log', 'Not available.') - ytdlp_log = summary_data.get('ytdlp_log', 'Not available.') - - logger.info(f"--- Request Summary ---\n{summary_text}") - logger.info(f"--- Prefetch Log ---\n{prefetch_log}") - logger.info(f"--- Node.js Log ---\n{nodejs_log}") - logger.info(f"--- yt-dlp Log ---\n{ytdlp_log}") - except (json.JSONDecodeError, AttributeError): - logger.info(f"--- Raw Request Summary (could not parse JSON) ---\n{token_data.requestSummary}") - - if hasattr(token_data, 'communicationLogPaths') and token_data.communicationLogPaths: - logger.info("--- Communication Log Paths on Server ---") - for log_path in token_data.communicationLogPaths: - logger.info(f" - {log_path}") - # --- End of server-side logging --- - - if not token_data or not token_data.infoJson: - raise AirflowException("Thrift service did not return valid info.json data.") - - # Save info.json to file - with open(info_json_path, 'w', encoding='utf-8') as f: - f.write(token_data.infoJson) - - proxy = token_data.socks - - # Rename file with proxy - final_info_json_path = info_json_path - if proxy: - sanitized_proxy = proxy.replace('://', '---') - new_filename = f"info_{video_id or 'unknown'}_{account_id}_{timestamp}_proxy_{sanitized_proxy}.json" - new_path = os.path.join(job_dir_path, new_filename) - try: - os.rename(info_json_path, new_path) - final_info_json_path = new_path - logger.info(f"Renamed info.json to include proxy: {new_path}") - except OSError as e: - logger.error(f"Failed to rename info.json to include proxy: {e}. Using original path.") - - return { - 'info_json_path': final_info_json_path, - 'job_dir_path': job_dir_path, - 'socks_proxy': proxy, - 'ytdlp_command': None, - 'successful_account_id': account_id, - 'original_url': url, - 'ytdlp_config_path': ytdlp_config_path, - } - - except (PBServiceException, PBUserException) as e: - error_message = e.message or "Unknown Thrift error" - error_code = getattr(e, 'errorCode', 'THRIFT_ERROR') - - # If a "Video unavailable" error mentions rate-limiting, it's a form of bot detection. - if error_code == 'VIDEO_UNAVAILABLE' and 'rate-limited' in error_message.lower(): - logger.warning("Re-classifying rate-limit-related 'VIDEO_UNAVAILABLE' error as 'BOT_DETECTED'.") - error_code = 'BOT_DETECTED' - - logger.error(f"Thrift error getting token: {error_code} - {error_message}") - - error_details = { - 'error_message': error_message, - 'error_code': error_code, - 'proxy_url': None - } - ti.xcom_push(key='error_details', value=error_details) - raise AirflowException(f"ytops-client get-info failed: {error_message}") - except TTransportException as e: - logger.error(f"Thrift transport error: {e}", exc_info=True) - error_details = { - 'error_message': f"Thrift transport error: {e}", - 'error_code': 'TRANSPORT_ERROR', - 'proxy_url': None - } - ti.xcom_push(key='error_details', value=error_details) - raise AirflowException(f"Thrift transport error: {e}") - finally: - if transport and transport.isOpen(): - transport.close() - -@task.branch -def handle_bannable_error_branch(task_id_to_check: str, **context): - """ - Inspects a failed task and routes to retry logic if the error is retryable. - Routes to a fatal error handler for non-retryable infrastructure issues. - """ - ti = context['task_instance'] - params = context['params'] - - # Try to get error details from the specified task - error_details = None - try: - error_details = ti.xcom_pull(task_ids=task_id_to_check, key='error_details') - except Exception as e: - logger.warning(f"Could not pull error details from task '{task_id_to_check}': {e}") - - # If not found, try to get from any task in the DAG run - if not error_details: - # Look for error details in any task that may have pushed them - # This is a fallback mechanism - dag_run = ti.get_dagrun() - task_instances = dag_run.get_task_instances() - for task_instance in task_instances: - if task_instance.task_id != ti.task_id: - try: - details = task_instance.xcom_pull(key='error_details') - if details: - error_details = details - logger.info(f"Found error details in task '{task_instance.task_id}'") - break - except Exception: - pass - - if not error_details: - logger.error(f"Task {task_id_to_check} failed without error details. Marking as fatal.") - return 'handle_fatal_error' - - error_message = error_details.get('error_message', '').strip() - error_code = error_details.get('error_code', '').strip() - policy = params.get('on_auth_failure', 'retry_with_new_account') - - # Unrecoverable video errors that should not be retried or treated as system failures. - unrecoverable_video_errors = [ - "AGE_GATED_SIGN_IN", "MEMBERS_ONLY", "VIDEO_PROCESSING", "COPYRIGHT_REMOVAL", - "GEO_RESTRICTED", "VIDEO_UNAVAILABLE", "PRIVATE_VIDEO", "VIDEO_REMOVED" - ] - - if error_code in unrecoverable_video_errors: - logger.warning(f"Unrecoverable video error '{error_code}' detected for '{task_id_to_check}'. This is a content issue, not a system failure.") - return 'handle_unrecoverable_video_error' - - # Fatal Thrift connection errors that should stop all processing. - if error_code == 'TRANSPORT_ERROR': - logger.error(f"Fatal Thrift connection error from '{task_id_to_check}'. Stopping processing.") - return 'handle_fatal_error' - - # Service-side connection errors that are potentially retryable. - connection_errors = ['SOCKS5_CONNECTION_FAILED', 'SOCKET_TIMEOUT', 'CAMOUFOX_TIMEOUT'] - if error_code in connection_errors: - logger.info(f"Handling connection error '{error_code}' from '{task_id_to_check}'. Policy: '{policy}'") - if policy == 'stop_loop': - logger.warning(f"Connection error with 'stop_loop' policy. Marking as fatal.") - return 'handle_fatal_error' - elif policy == 'retry_without_ban': - logger.info("Retrying with a new account without banning.") - return 'assign_new_account_for_direct_retry' - else: # 'retry_with_new_account' and 'proceed_loop_under_manual_inspection' should also retry without ban on connection error - logger.info(f"Connection error with policy '{policy}'. Retrying with a new account without banning.") - return 'assign_new_account_for_direct_retry' - - # Bannable errors (e.g., bot detection) that can be retried with a new account. - is_bannable = error_code in ["BOT_DETECTED", "BOT_DETECTION_SIGN_IN_REQUIRED"] - logger.info(f"Handling failure from '{task_id_to_check}'. Error code: '{error_code}', Policy: '{policy}'") - if is_bannable: - if policy == 'retry_with_new_account': - return 'ban_account_and_prepare_for_retry' - if policy == 'retry_without_ban': - return 'assign_new_account_for_direct_retry' - if policy == 'stop_loop': - return 'ban_and_report_immediately' - if policy == 'proceed_loop_under_manual_inspection': - logger.warning(f"Bannable error with 'proceed_loop_under_manual_inspection' policy. Reporting failure and continuing loop. MANUAL INTERVENTION IS LIKELY REQUIRED.") - return 'report_bannable_and_continue' - - # Any other error is considered fatal for this run. - logger.error(f"Unhandled or non-retryable error '{error_code}' from '{task_id_to_check}'. Marking as fatal.") - return 'handle_fatal_error' - -@task_group(group_id='ban_and_retry_logic') -def ban_and_retry_logic(initial_data: dict): - """ - Task group that checks for sliding window failures before banning an account. - If the account meets ban criteria, it's banned. Otherwise, the ban is skipped - but the retry proceeds. - """ - - @task.branch - def check_sliding_window_for_ban(data: dict, **context): - """ - Checks Redis for recent failures. If thresholds are met, proceeds to ban. - Otherwise, proceeds to a dummy task to allow retry without ban. - """ - params = context['params'] - account_id = data['account_id'] - redis_conn_id = params.get('redis_conn_id', DEFAULT_REDIS_CONN_ID) - - # These thresholds should ideally be Airflow Variables to be configurable - failure_window_seconds = 3600 # 1 hour - failure_threshold_count = 5 - failure_threshold_unique_proxies = 3 - - try: - redis_client = _get_redis_client(redis_conn_id) - failure_key = f"account_failures:{account_id}" - now = time.time() - window_start = now - failure_window_seconds - - # 1. Remove old failures and get recent ones - redis_client.zremrangebyscore(failure_key, '-inf', window_start) - recent_failures = redis_client.zrange(failure_key, 0, -1) - - if len(recent_failures) >= failure_threshold_count: - # Decode from bytes to string for processing - recent_failures_str = [f.decode('utf-8') for f in recent_failures] - # Failure format is "context:job_id:timestamp" - unique_proxies = {f.split(':')[0] for f in recent_failures_str} - - if len(unique_proxies) >= failure_threshold_unique_proxies: - logger.warning( - f"Account {account_id} has failed {len(recent_failures)} times " - f"with {len(unique_proxies)} unique contexts in the last hour. Proceeding to ban." - ) - return 'ban_account_task' - else: - logger.info( - f"Account {account_id} has {len(recent_failures)} failures, but only " - f"from {len(unique_proxies)} unique contexts (threshold is {failure_threshold_unique_proxies}). Skipping ban." - ) - else: - logger.info(f"Account {account_id} has {len(recent_failures)} failures (threshold is {failure_threshold_count}). Skipping ban.") - - except Exception as e: - logger.error(f"Error during sliding window check for account {account_id}: {e}. Skipping ban as a precaution.", exc_info=True) - - return 'skip_ban_task' - - @task(task_id='ban_account_task') - def ban_account_task(data: dict, **context): - """Wrapper task to call the main ban_account function.""" - _ban_account(initial_data=data, reason="Banned by Airflow worker after sliding window check", context=context) - - @task(task_id='skip_ban_task') - def skip_ban_task(): - """Dummy task to represent the 'skip ban' path.""" - pass - - check_task = check_sliding_window_for_ban(data=initial_data) - ban_task_in_group = ban_account_task(data=initial_data) - skip_task = skip_ban_task() - - check_task >> [ban_task_in_group, skip_task] - - -def _ban_account(initial_data: dict, reason: str, context: dict): - """Bans a single account via the Thrift service.""" - params = context['params'] - account_id = initial_data['account_id'] - client, transport = None, None - try: - host, port, timeout = params['service_ip'], int(params['service_port']), int(params.get('timeout', DEFAULT_TIMEOUT)) - client, transport = _get_thrift_client(host, port, timeout) - logger.warning(f"Banning account '{account_id}'. Reason: {reason}") - client.banAccount(accountId=account_id, reason=reason) - except BaseException as e: - # Catch BaseException to include SystemExit, which may be raised by the Thrift client - logger.error(f"Failed to issue ban for account '{account_id}': {e}", exc_info=True) - finally: - if transport and transport.isOpen(): - transport.close() - -@task -def assign_new_account_for_direct_retry(initial_data: dict, **context): - """Selects a new, unused account for a direct retry (e.g., after connection error).""" - params = context['params'] - accounts_tried = initial_data['accounts_tried'] - account_pool = _get_account_pool(params) - available_for_retry = [acc for acc in account_pool if acc not in accounts_tried] - if not available_for_retry: - raise AirflowException("No other accounts available in the pool for a retry.") - - new_account_id = random.choice(available_for_retry) - accounts_tried.append(new_account_id) - logger.info(f"Selected new account for retry: '{new_account_id}'") - - # Return updated initial_data with new account - return { - 'url_to_process': initial_data['url_to_process'], - 'account_id': new_account_id, - 'accounts_tried': accounts_tried, - } - -@task -def assign_new_account_after_ban_check(initial_data: dict, **context): - """Selects a new, unused account for the retry attempt after a ban check.""" - params = context['params'] - accounts_tried = initial_data['accounts_tried'] - account_pool = _get_account_pool(params) - available_for_retry = [acc for acc in account_pool if acc not in accounts_tried] - if not available_for_retry: - raise AirflowException("No other accounts available in the pool for a retry.") - - new_account_id = random.choice(available_for_retry) - accounts_tried.append(new_account_id) - logger.info(f"Selected new account for retry: '{new_account_id}'") - - # Return updated initial_data with new account - return { - 'url_to_process': initial_data['url_to_process'], - 'account_id': new_account_id, - 'accounts_tried': accounts_tried, - } - -@task(retries=0) -def ban_and_report_immediately(**context): - """Bans an account and prepares for failure reporting and stopping the loop.""" - ti = context['task_instance'] - # Manually pull initial_data. This is more robust if the upstream task was skipped. - initial_data = ti.xcom_pull(task_ids='get_url_and_assign_account') - if not initial_data: - logger.error("Could not retrieve initial_data to ban account.") - # Return a default dict to allow downstream reporting to proceed. - return {'account_id': 'unknown', 'url_to_process': context['params'].get('url_to_process', 'unknown')} - - try: - reason = "Banned by Airflow worker (policy is stop_loop)" - _ban_account(initial_data, reason, context) - logger.info(f"Account '{initial_data.get('account_id')}' banned. Proceeding to report failure.") - except BaseException as e: - # Catch BaseException to include SystemExit, which may be raised by the Thrift client - logger.error(f"Error during ban_and_report_immediately: {e}", exc_info=True) - # Swallow the exception to ensure this task succeeds. The loop will be stopped by downstream tasks. - - # Always return the initial data, even if banning failed - # Make a copy to ensure we're not returning a reference that might be modified elsewhere - return dict(initial_data) if initial_data else {} - -@task -def list_available_formats(token_data: dict, **context): - """ - Lists available formats for the given video using the info.json. - This is for debugging and informational purposes. - """ - import subprocess - import shlex - - info_json_path = token_data.get('info_json_path') - if not (info_json_path and os.path.exists(info_json_path)): - logger.warning(f"Cannot list formats: info.json path is missing or file does not exist ({info_json_path}).") - return [] - - try: - cmd = [ - 'yt-dlp', - '--verbose', - '--list-formats', - '--load-info-json', info_json_path, - ] - - copy_paste_cmd = ' '.join(shlex.quote(arg) for arg in cmd) - logger.info(f"Executing yt-dlp command to list formats: {copy_paste_cmd}") - - process = subprocess.run(cmd, capture_output=True, text=True, timeout=60) - - if process.stderr: - logger.info(f"yt-dlp --list-formats STDERR:\n{process.stderr}") - - if process.returncode != 0: - logger.error(f"yt-dlp --list-formats failed with exit code {process.returncode}") - - available_formats = [] - if process.stdout: - logger.info(f"--- Available Formats ---\n{process.stdout}\n--- End of Formats ---") - # Parse the output to get format IDs - lines = process.stdout.split('\n') - header_found = False - for line in lines: - if line.startswith('ID '): - header_found = True - continue - if header_found and line.strip() and line.strip()[0].isdigit(): - format_id = line.split()[0] - available_formats.append(format_id) - logger.info(f"Parsed available format IDs: {available_formats}") - - return available_formats - - except Exception as e: - logger.error(f"An error occurred while trying to list formats: {e}", exc_info=True) - return [] - - -def _resolve_generic_selector(selector: str, info_json_path: str, logger) -> str | list[str] | None: - """ - Uses yt-dlp to resolve a generic format selector into specific, numeric format ID(s). - Returns a numeric selector string (e.g., '18'), a list of IDs for '+' selectors - (e.g., ['299', '140']), or None if resolution fails. - """ - import subprocess - import shlex - - try: - cmd = [ - 'yt-dlp', - '--print', 'format_id', - '-f', selector, - '--load-info-json', info_json_path, - ] - - copy_paste_cmd = ' '.join(shlex.quote(arg) for arg in cmd) - logger.info(f"Resolving generic selector '{selector}' with command: {copy_paste_cmd}") - - process = subprocess.run(cmd, capture_output=True, text=True, timeout=60) - - if process.stderr: - # yt-dlp often prints warnings to stderr that are not fatal. - # e.g., "Requested format selector '...' contains no available formats" - logger.info(f"yt-dlp resolver STDERR for selector '{selector}':\n{process.stderr}") - - if process.returncode != 0: - logger.error(f"yt-dlp resolver for selector '{selector}' failed with exit code {process.returncode}") - return None - - output_ids = process.stdout.strip().split('\n') - output_ids = [fid for fid in output_ids if fid] # Remove empty lines - - if not output_ids: - logger.warning(f"Selector '{selector}' resolved to no format IDs.") - return None - - # yt-dlp might return '137+140' on one line, or '137\n140' on multiple. - # We need to handle both to get individual IDs. - final_ids = [] - for fid in output_ids: - final_ids.extend(fid.split('+')) - - # If the original selector was for merging (contained '+'), return individual IDs for separate downloads. - # Otherwise, yt-dlp has already chosen the best one from a fallback list, so we just use it. - if '+' in selector: - resolved_selector = final_ids - else: - resolved_selector = final_ids[0] # yt-dlp gives the single best choice - - logger.info(f"Successfully resolved selector '{selector}' to '{resolved_selector}'.") - return resolved_selector - - except Exception as e: - logger.error(f"An error occurred while resolving selector '{selector}': {e}", exc_info=True) - return None - - -@task -def download_and_probe(token_data: dict, available_formats: list[str], **context): - """ - Uses retrieved token data to download and probe media files. - Supports parallel downloading of specific, comma-separated format IDs. - If probing fails, retries downloading only the failed files. - """ - import subprocess - import shlex - import concurrent.futures - - try: - params = context['params'] - info_json_path = token_data.get('info_json_path') - original_url = token_data.get('original_url') - ytdlp_config_path = token_data.get('ytdlp_config_path') - ytdlp_config = {} - if ytdlp_config_path and os.path.exists(ytdlp_config_path): - try: - with open(ytdlp_config_path, 'r', encoding='utf-8') as f: - ytdlp_config = json.load(f) - except (IOError, json.JSONDecodeError) as e: - logger.warning(f"Could not load ytdlp config from {ytdlp_config_path}: {e}") - - # Extract proxy from filename, with fallback to token_data for backward compatibility - proxy = None - if info_json_path: - filename = os.path.basename(info_json_path) - proxy_match = re.search(r'_proxy_(.+)\.json$', filename) - if proxy_match: - sanitized_proxy = proxy_match.group(1) - # Reverse sanitization from auth worker (replace '---' with '://') - proxy = sanitized_proxy.replace('---', '://') - logger.info(f"Extracted proxy '{proxy}' from filename.") - - if not proxy: - logger.warning("Proxy not found in filename. Falling back to 'socks_proxy' from token_data.") - proxy = token_data.get('socks_proxy') - - download_dir = token_data.get('job_dir_path') - if not download_dir: - # Fallback for older runs or if job_dir_path is missing - download_dir = os.path.dirname(info_json_path) - - download_format = params.get('download_format') - if not download_format: - raise AirflowException("The 'download_format' parameter is missing or empty.") - - output_template = params.get('output_path_template', "%(id)s.f%(format_id)s.%(ext)s") - full_output_path = os.path.join(download_dir, output_template) - retry_on_probe_failure = params.get('retry_on_probe_failure', False) - - if not (info_json_path and os.path.exists(info_json_path)): - raise AirflowException(f"Error: info.json path is missing or file does not exist ({info_json_path}).") - - # WORKAROUND: The auth service may inject a 'js_runtimes' key into the info.json - # that is incompatible with the yt-dlp library's expectations, causing a crash. - # We remove it here before passing it to the download tool. - try: - with open(info_json_path, 'r+', encoding='utf-8') as f: - info_data = json.load(f) - if 'js_runtimes' in info_data: - logger.info("Found 'js_runtimes' key in info.json. Removing it as a workaround for yt-dlp library incompatibility.") - del info_data['js_runtimes'] - f.seek(0) - json.dump(info_data, f) - f.truncate() - except Exception as e: - logger.warning(f"Could not process/remove 'js_runtimes' from info.json: {e}", exc_info=True) - - def run_yt_dlp_command(format_selector: str): - """Constructs and runs a yt-ops-client download command, returning a list of final filenames.""" - downloader = params.get('downloader', 'py') - cmd = ['ytops-client', 'download', downloader, '--load-info-json', info_json_path, '-f', format_selector] - - # Pass the unified config file to the download tool - if ytdlp_config_path: - cmd.extend(['--config', ytdlp_config_path]) - - if downloader == 'py': - if proxy: - cmd.extend(['--proxy', proxy]) - cmd.extend(['--output-dir', download_dir]) - # The 'py' tool maps many yt-dlp flags via --extra-ytdlp-args - - # The 'py' tool maps many yt-dlp flags via --extra-ytdlp-args - py_extra_args = ['--output', output_template] - if params.get('yt_dlp_test_mode'): - py_extra_args.append('--test') - - # Get extra args from the config file now - existing_extra_str = ytdlp_config.get('ytops', {}).get('worker_params', {}).get('yt_dlp_extra_args', '') - existing_extra = shlex.split(existing_extra_str or '') - final_extra_args_list = existing_extra + py_extra_args - if final_extra_args_list: - final_extra_args_str = shlex.join(final_extra_args_list) - cmd.extend(['--extra-ytdlp-args', final_extra_args_str]) - - elif downloader == 'aria-rpc': - # For aria2c running on the host, the proxy (if also on the host) should be referenced via localhost. - # The user-agent is set by yt-dlp's extractor, not directly here. The default is Cobalt-based. - if proxy: - proxy_port_match = re.search(r':(\d+)$', proxy) - if proxy_port_match: - proxy_port = proxy_port_match.group(1) - aria_proxy = f"socks5://127.0.0.1:{proxy_port}" - cmd.extend(['--proxy', aria_proxy]) - logger.info(f"Using translated proxy for host-based aria2c: {aria_proxy}") - else: - logger.warning(f"Could not parse port from proxy '{proxy}'. Passing it to aria2c as-is.") - cmd.extend(['--proxy', proxy]) - - # The remote-dir is the path relative to aria2c's working directory on the host. - # The output-dir is the container's local path to the same shared volume. - remote_dir = os.path.relpath(download_dir, '/opt/airflow/downloadfiles/videos') - - # Get aria params from config file - worker_params = ytdlp_config.get('ytops', {}).get('worker_params', {}) - cmd.extend([ - '--aria-host', worker_params.get('aria_host', '172.17.0.1'), - '--aria-port', str(worker_params.get('aria_port', 6800)), - '--aria-secret', worker_params.get('aria_secret'), - '--wait', - '--output-dir', download_dir, - '--remote-dir', remote_dir, - ]) - if 'dashy' in format_selector: - cmd.extend([ - '--auto-merge-fragments', - '--fragments-dir', download_dir, - ]) - if params.get('yt_dlp_cleanup_mode'): - cmd.append('--cleanup') - - elif downloader == 'cli': - # Overwrite cmd to call yt-dlp directly - cmd = ['yt-dlp', '--load-info-json', info_json_path, '-f', format_selector] - if proxy: - cmd.extend(['--proxy', proxy]) - - # The 'cli' tool is the old yt-dlp wrapper, so it takes similar arguments. - cli_extra_args = ['--output', full_output_path, '--verbose'] - if params.get('yt_dlp_test_mode'): - cli_extra_args.append('--test') - - existing_extra = shlex.split(params.get('yt_dlp_extra_args') or '') - final_extra_args = existing_extra + cli_extra_args - if final_extra_args: - cmd.extend(final_extra_args) - - copy_paste_cmd = ' '.join(shlex.quote(arg) for arg in cmd) - tool_name = 'yt-dlp' if downloader == 'cli' else 'ytops-client' - logger.info(f"--- Preparing to execute {tool_name} ---") - logger.info(f"Full {tool_name} command for format '{format_selector}':") - logger.info(copy_paste_cmd) - logger.info(f"-----------------------------------------") - process = subprocess.run(cmd, capture_output=True, text=True, timeout=3600) - - if process.stdout: - logger.info(f"Download tool STDOUT for format '{format_selector}':\n{process.stdout}") - if process.stderr: - logger.info(f"Download tool STDERR for format '{format_selector}':\n{process.stderr}") - - if process.returncode != 0 or "ERROR:" in process.stderr: - logger.error(f"Download tool failed for format '{format_selector}' with exit code {process.returncode}") - if "ERROR:" in process.stderr and process.returncode == 0: - logger.error("Detected 'ERROR:' in stderr, treating as failure despite exit code 0.") - - # Pass stderr in the exception for better parsing in the outer try/except block - raise AirflowException(f"Download command failed for format '{format_selector}'. Stderr: {process.stderr}") - - output_files = [] - if downloader == 'cli': - # Parse yt-dlp's verbose output to find the final filename - final_filename = None - for line in process.stdout.strip().split('\n'): - # Case 1: Simple download, no merge - dest_match = re.search(r'\[download\] Destination: (.*)', line) - if dest_match: - final_filename = dest_match.group(1).strip() - - # Case 2: Formats are merged into a new file. This path is absolute if -o is absolute. - merge_match = re.search(r'\[Merger\] Merging formats into "(.*)"', line) - if merge_match: - final_filename = merge_match.group(1).strip() - - if final_filename and os.path.exists(final_filename): - output_files.append(final_filename) - else: # Logic for 'py' and 'aria-rpc' - for line in process.stdout.strip().split('\n'): - # For aria-rpc, parse "Download and merge successful: " or "Download successful: " - match = re.search(r'successful: (.+)', line) - if match: - filepath = match.group(1).strip() - if os.path.exists(filepath): - output_files.append(filepath) - else: - logger.warning(f"File path from aria-rpc output does not exist locally: '{filepath}'") - # For py, it's just the path - elif os.path.exists(line.strip()): - output_files.append(line.strip()) - - if not params.get('yt_dlp_test_mode') and not output_files: - raise AirflowException(f"Download for format '{format_selector}' finished but no output files were found or exist.") - - log_prefix = "SUCCESS (Test Mode):" if params.get('yt_dlp_test_mode') else "SUCCESS:" - logger.info(f"{log_prefix} Command for format '{format_selector}' complete. Files: {output_files}") - return output_files - - def run_ffmpeg_probe(filename): - """Probes a file with ffmpeg to check for corruption.""" - logger.info(f"Probing downloaded file: {filename}") - try: - subprocess.run(['ffmpeg', '-v', 'error', '-sseof', '-10', '-i', filename, '-c', 'copy', '-f', 'null', '-'], check=True, capture_output=True, text=True) - logger.info(f"SUCCESS: Probe confirmed valid media file: {filename}") - except subprocess.CalledProcessError as e: - logger.error(f"ffmpeg probe failed for '{filename}'. File may be corrupt.") - logger.error(f"ffmpeg STDERR: {e.stderr}") - raise AirflowException(f"ffmpeg probe failed for {filename}.") - - def _download_and_probe_formats(formats_to_process: list[str] | str): - """ - Helper to download a list of format IDs (or a single complex selector) and probe the results. - Returns a tuple of (successful_files, failed_probe_files). - """ - all_downloaded_files = [] - delay_between_formats = params.get('delay_between_formats_s', 0) - - if isinstance(formats_to_process, list) and formats_to_process: - logger.info(f"Downloading {len(formats_to_process)} format(s) sequentially: {formats_to_process}") - for i, fid in enumerate(formats_to_process): - all_downloaded_files.extend(run_yt_dlp_command(fid)) - if delay_between_formats > 0 and i < len(formats_to_process) - 1: - logger.info(f"Waiting {delay_between_formats}s before next format download...") - time.sleep(delay_between_formats) - - elif isinstance(formats_to_process, str): - logger.info(f"Using complex format selector '{formats_to_process}'. Running as a single command.") - all_downloaded_files = run_yt_dlp_command(formats_to_process) - - if not all_downloaded_files: - logger.warning("Download process completed but produced no files.") - return [], [] - - if params.get('yt_dlp_test_mode'): - logger.info("Test mode is enabled. Skipping probe of output files.") - return all_downloaded_files, [] - - if params.get('skip_probe'): - logger.info("Skipping probe of output files as per configuration.") - return all_downloaded_files, [] - - successful_probes, failed_probes = [], [] - logger.info(f"Probing {len(all_downloaded_files)} downloaded file(s) sequentially...") - for filename in all_downloaded_files: - try: - run_ffmpeg_probe(filename) - successful_probes.append(filename) - except Exception: - failed_probes.append(filename) - - return successful_probes, failed_probes - - # --- Main Execution Logic --- - with open(info_json_path, 'r', encoding='utf-8') as f: - info = json.load(f) - - ytdlp_params = ytdlp_config.get('ytdlp_params', {}) - download_format = ytdlp_params.get('format') - - final_formats_to_download = [] - downloader = params.get('downloader', 'cli') - pass_without_splitting = params.get('pass_without_formats_splitting', False) - - if pass_without_splitting and downloader != 'aria-rpc': - logger.info("'pass_without_formats_splitting' is True. Passing download format string directly to the download tool.") - final_formats_to_download = download_format - else: - if pass_without_splitting and downloader == 'aria-rpc': - logger.warning("'pass_without_formats_splitting' is True but is not compatible with 'aria-rpc' downloader. Splitting formats as normal.") - - # Split the format string by commas to get a list of individual format selectors. - # This enables parallel downloads of different formats or format groups. - # For example, '18,140,299/298' becomes ['18', '140', '299/298'], - # and each item will be downloaded in a separate yt-dlp process. - if download_format and isinstance(download_format, str): - formats_to_download_initial = [selector.strip() for selector in download_format.split(',') if selector.strip()] - else: - # Fallback for safety, though download_format should always be a string. - formats_to_download_initial = [] - - if not formats_to_download_initial: - raise AirflowException("No valid download format selectors were found after parsing.") - - # --- Filter and resolve requested formats --- - if not available_formats: - logger.warning("List of available formats is empty. Cannot validate numeric selectors, but will attempt to resolve generic selectors.") - - for selector in formats_to_download_initial: - # A selector is considered generic if it contains keywords like 'best' or filter brackets '[]'. - is_generic = bool(re.search(r'(best|\[|\])', selector)) - - if is_generic: - resolved_selector = _resolve_generic_selector(selector, info_json_path, logger) - if resolved_selector: - # The resolver returns a list for '+' selectors, or a string for others. - resolved_formats = resolved_selector if isinstance(resolved_selector, list) else [resolved_selector] - - for res_format in resolved_formats: - # Prefer -dashy version if available and the format is a simple numeric ID - if res_format.isdigit() and f"{res_format}-dashy" in available_formats: - final_format = f"{res_format}-dashy" - logger.info(f"Resolved format '{res_format}' from selector '{selector}'. Preferred '-dashy' version: '{final_format}'.") - else: - final_format = res_format - - # Validate the chosen format against available formats - if available_formats: - individual_ids = re.split(r'[/+]', final_format) - is_available = any(fid in available_formats for fid in individual_ids) - - if is_available: - final_formats_to_download.append(final_format) - else: - logger.warning(f"Resolved format '{final_format}' (from '{selector}') contains no available formats. Skipping.") - else: - # Cannot validate, so we trust the resolver's output. - final_formats_to_download.append(final_format) - else: - logger.warning(f"Could not resolve generic selector '{selector}' using yt-dlp. Skipping.") - else: - # This is a numeric-based selector (e.g., '140' or '299/298' or '140-dashy'). - # Validate it against the available formats. - if not available_formats: - logger.warning(f"Cannot validate numeric selector '{selector}' because available formats list is empty. Assuming it's valid.") - final_formats_to_download.append(selector) - continue - - individual_ids = re.split(r'[/+]', selector) - is_available = any(fid in available_formats for fid in individual_ids) - - if is_available: - final_formats_to_download.append(selector) - else: - logger.warning(f"Requested numeric format selector '{selector}' contains no available formats. Skipping.") - - if not final_formats_to_download: - raise AirflowException("None of the requested formats are available for this video.") - - # --- Initial Download and Probe --- - successful_files, failed_files = _download_and_probe_formats(final_formats_to_download) - - if params.get('yt_dlp_test_mode'): - logger.info(f"Test mode: yt-dlp returned {len(successful_files)} filenames. Skipping probe failure checks.") - if not successful_files: - raise AirflowException("Test run did not produce any filenames.") - # Do not return here. Proceed to the cleanup and move logic. - - final_success_list = successful_files - if failed_files: - # --- Handle Probe Failures and Retry --- - if not retry_on_probe_failure: - raise AirflowException(f"Probe failed for {len(failed_files)} file(s) and retry is disabled: {failed_files}") - - logger.warning(f"Probe failed for {len(failed_files)} file(s). Attempting one re-download for failed files...") - - delay_between_formats = params.get('delay_between_formats_s', 0) - if delay_between_formats > 0: - logger.info(f"Waiting {delay_between_formats}s before re-download attempt...") - time.sleep(delay_between_formats) - - format_ids_to_retry = [] - # Since each download is now for a specific selector and the output template - # includes the format_id, we can always attempt to extract the format_id - # from the failed filename for a targeted retry. - for f in failed_files: - match = re.search(r'\.f([\d]+)\.', f) - if match: - format_ids_to_retry.append(match.group(1)) - else: - logger.error(f"Could not extract format_id from failed file '{f}'. Cannot retry this specific file.") - formats_to_download_retry = format_ids_to_retry - - if not formats_to_download_retry: - raise AirflowException("Probe failed, but could not determine which formats to retry.") - - # Rename failed files to allow for a fresh download attempt - for f in failed_files: - try: - failed_path = f"{f}.probe_failed_{int(time.time())}" - os.rename(f, failed_path) - logger.info(f"Renamed corrupted file to {failed_path}") - except OSError as rename_err: - logger.error(f"Could not rename corrupted file '{f}': {rename_err}") - - # --- Retry Download and Probe --- - retried_successful_files, retried_failed_files = _download_and_probe_formats(formats_to_download_retry) - - if retried_failed_files: - logger.error(f"Probe failed again for {len(retried_failed_files)} file(s) after retry: {retried_failed_files}") - - final_success_list = successful_files + retried_successful_files - logger.info(f"Retry complete. Final success count: {len(final_success_list)} file(s).") - - if not final_success_list: - raise AirflowException("Download and probe process completed but produced no valid files.") - - if params.get('yt_dlp_cleanup_mode', False): - logger.info(f"Cleanup mode is enabled. Creating .empty files and deleting originals for {len(final_success_list)} files.") - for f in final_success_list: - try: - empty_file_path = f"{f}.empty" - with open(empty_file_path, 'w') as fp: - pass # create empty file - logger.info(f"Created empty file: {empty_file_path}") - os.remove(f) - logger.info(f"Deleted original file: {f}") - except Exception as e: - logger.error(f"Error during cleanup for file {f}: {e}", exc_info=True) - # Do not fail the task for a cleanup error, just log it. - - # --- Move completed job directory to final destination --- - try: - video_id = _extract_video_id(original_url) - if not video_id: - logger.error(f"Could not extract video_id from URL '{original_url}' for final move. Skipping.") - else: - # --- Rename info.json to a simple format before moving --- - path_to_info_json_for_move = info_json_path # Default to original path - try: - # info_json_path is the full path to the original info.json - if info_json_path and os.path.exists(info_json_path): - new_info_json_name = f"info_{video_id}.json" - new_info_json_path = os.path.join(os.path.dirname(info_json_path), new_info_json_name) - - if info_json_path != new_info_json_path: - logger.info(f"Renaming '{info_json_path}' to '{new_info_json_path}' for final delivery.") - os.rename(info_json_path, new_info_json_path) - path_to_info_json_for_move = new_info_json_path - else: - logger.info("info.json already has the simple name. No rename needed.") - else: - logger.warning("Could not find info.json to rename before moving.") - except Exception as rename_e: - logger.error(f"Failed to rename info.json before move: {rename_e}", exc_info=True) - # --- End of rename logic --- - - source_dir = download_dir # This is the job_dir_path - - # Group downloads into 10-minute batch folders based on completion time. - now = datetime.now() - rounded_minute = (now.minute // 10) * 10 - timestamp_str = now.strftime('%Y%m%dT%H') + f"{rounded_minute:02d}" - - final_dir_base = os.path.join(Variable.get('DOWNLOADS_TEMP', '/opt/airflow/downloadfiles'), 'videos', 'ready', timestamp_str) - final_dir_path = os.path.join(final_dir_base, video_id) - - os.makedirs(final_dir_base, exist_ok=True) - - logger.info(f"Moving completed job from '{source_dir}' to final destination '{final_dir_path}'") - if os.path.exists(final_dir_path): - logger.warning(f"Destination '{final_dir_path}' already exists. It will be removed and replaced.") - shutil.rmtree(final_dir_path) - - # Create the destination directory and move only the essential files, then clean up the source. - # This ensures no temporary or junk files are carried over. - os.makedirs(final_dir_path) - - # 1. Move the info.json file - if path_to_info_json_for_move and os.path.exists(path_to_info_json_for_move): - shutil.move(path_to_info_json_for_move, final_dir_path) - logger.info(f"Moved '{os.path.basename(path_to_info_json_for_move)}' to destination.") - - # 2. Move the media files (or their .empty placeholders) - files_to_move = [] - if params.get('yt_dlp_cleanup_mode', False): - files_to_move = [f"{f}.empty" for f in final_success_list] - else: - files_to_move = final_success_list - - for f in files_to_move: - if os.path.exists(f): - shutil.move(f, final_dir_path) - logger.info(f"Moved '{os.path.basename(f)}' to destination.") - else: - logger.warning(f"File '{f}' expected but not found for moving.") - - # 3. Clean up the original source directory - logger.info(f"Cleaning up original source directory '{source_dir}'") - shutil.rmtree(source_dir) - logger.info(f"Successfully moved job to '{final_dir_path}' and cleaned up source.") - except Exception as e: - logger.error(f"Failed to move completed job directory: {e}", exc_info=True) - # Do not fail the task for a move error, just log it. - - return final_success_list - except Exception as e: - ti = context['task_instance'] - error_message = str(e) - error_code = "DOWNLOAD_FAILED" - msg_lower = error_message.lower() - - unrecoverable_patterns = { - "AGE_GATED_SIGN_IN": ['sign in to confirm your age'], - "MEMBERS_ONLY": ['members-only content'], - "VIDEO_PROCESSING": ['processing this video'], - "COPYRIGHT_REMOVAL": ['copyright'], - "GEO_RESTRICTED": ['in your country'], - "PRIVATE_VIDEO": ['private video'], - "VIDEO_REMOVED": ['video has been removed'], - "VIDEO_UNAVAILABLE": ['video unavailable'], - "HTTP_403_FORBIDDEN": ['http error 403: forbidden'] - } - - for code, patterns in unrecoverable_patterns.items(): - if any(p in msg_lower for p in patterns): - error_code = code - break - - # Always push details to XCom for the branch operator to inspect. - error_details = {'error_code': error_code, 'error_message': error_message} - ti.xcom_push(key='download_error_details', value=error_details) - - raise AirflowException(f"Download and probe failed: {e}") from e - -@task -def mark_url_as_success(initial_data: dict, downloaded_file_paths: list, token_data: dict, **context): - """Records the successful result in Redis.""" - params = context['params'] - url = initial_data['url_to_process'] - result_data = { - 'status': 'success', 'end_time': time.time(), 'url': url, - 'downloaded_file_paths': downloaded_file_paths, **token_data, - 'dag_run_id': context['dag_run'].run_id, - } - client = _get_redis_client(params['redis_conn_id']) - - # Update activity counters - try: - proxy_url = token_data.get('socks_proxy') - account_id = token_data.get('successful_account_id') - now = time.time() - # Use a unique member to prevent collisions, e.g., dag_run_id - member = context['dag_run'].run_id - - if proxy_url: - proxy_key = f"activity:per_proxy:{proxy_url}" - client.zadd(proxy_key, {member: now}) - client.expire(proxy_key, 3600 * 2) # Expire after 2 hours - if account_id: - account_key = f"activity:per_account:{account_id}" - client.zadd(account_key, {member: now}) - client.expire(account_key, 3600 * 2) # Expire after 2 hours - except Exception as e: - logger.error(f"Could not update activity counters: {e}", exc_info=True) - - # Update client-specific stats - try: - machine_id = params.get('machine_id') or socket.gethostname() - _update_client_stats(client, params.get('clients', ''), 'success', url, machine_id, context['dag_run'].run_id) - except Exception as e: - logger.error(f"Could not update client stats on success: {e}", exc_info=True) - - progress_queue = f"{params['queue_name']}_progress" - result_queue = f"{params['queue_name']}_result" - - with client.pipeline() as pipe: - pipe.hset(result_queue, url, json.dumps(result_data)) - pipe.hdel(progress_queue, url) - pipe.execute() - - logger.info(f"Stored success result for URL '{url}' and removed from progress queue.") - - return token_data - -@task(trigger_rule='one_failed') -def report_failure_and_stop(**context): - """ - Handles a failed URL processing attempt by recording a detailed error report to Redis - and stopping the worker loop. - """ - params = context['params'] - ti = context['task_instance'] - url = params.get('url_to_process') - - # Ensure we have a valid URL string for Redis keys - if not url or url == 'None': - url = f"unknown_url_{context['dag_run'].run_id}" - logger.warning(f"No valid URL found in params. Using generated key: {url}") - - # Collect error details from XCom - error_details = {} - - # Check for error details from get_token tasks - first_token_task_id = 'get_token' - retry_token_task_id = 'retry_get_token' - - first_token_error = ti.xcom_pull(task_ids=first_token_task_id, key='error_details') - retry_token_error = ti.xcom_pull(task_ids=retry_token_task_id, key='error_details') - - # Use the most recent error details - if retry_token_error: - error_details = retry_token_error - elif first_token_error: - error_details = first_token_error - else: - pass - - logger.error(f"A failure occurred while processing URL '{url}'. Reporting to Redis and stopping loop.") - - result_data = { - 'status': 'failed', - 'end_time': time.time(), - 'url': url, - 'dag_run_id': context['dag_run'].run_id, - 'error_details': error_details - } - - try: - client = _get_redis_client(params['redis_conn_id']) - - # Update client-specific stats - try: - machine_id = params.get('machine_id') or socket.gethostname() - _update_client_stats(client, params.get('clients', ''), 'failure', url, machine_id, context['dag_run'].run_id) - except Exception as e: - logger.error(f"Could not update client stats on failure: {e}", exc_info=True) - - result_queue = f"{params['queue_name']}_result" - fail_queue = f"{params['queue_name']}_fail" - - progress_queue = f"{params['queue_name']}_progress" - - with client.pipeline() as pipe: - pipe.hset(result_queue, url, json.dumps(result_data)) - pipe.hset(fail_queue, url, json.dumps(result_data)) - # Only try to remove from progress queue if we have a real URL - if url != f"unknown_url_{context['dag_run'].run_id}": - pipe.hdel(progress_queue, url) - pipe.execute() - - logger.info(f"Stored failure result for URL '{url}' in '{result_queue}' and '{fail_queue}' and removed from progress queue.") - except Exception as e: - logger.error(f"Could not report failure to Redis: {e}", exc_info=True) - return None - - -@task(trigger_rule='one_failed') -def report_failure_and_continue(**context): - """ - Handles a failed URL processing attempt by recording a detailed error report to Redis. - This is a common endpoint for various failure paths that should not stop the overall dispatcher loop. - """ - params = context['params'] - ti = context['task_instance'] - url = params.get('url_to_process') - - # Ensure we have a valid URL string for Redis keys - if not url or url == 'None': - url = f"unknown_url_{context['dag_run'].run_id}" - logger.warning(f"No valid URL found in params. Using generated key: {url}") - - # Collect error details from XCom - error_details = {} - - # Check for error details from get_token tasks - first_token_task_id = 'get_token' - retry_token_task_id = 'retry_get_token' - - first_token_error = ti.xcom_pull(task_ids=first_token_task_id, key='error_details') - retry_token_error = ti.xcom_pull(task_ids=retry_token_task_id, key='error_details') - - # Use the most recent error details - if retry_token_error: - error_details = retry_token_error - elif first_token_error: - error_details = first_token_error - else: - # Check for other possible error sources - # This is a simplified approach - in a real implementation you might want to - # check more task IDs or use a more sophisticated error collection mechanism - pass - - logger.error(f"A failure occurred while processing URL '{url}'. Reporting to Redis.") - - result_data = { - 'status': 'failed', - 'end_time': time.time(), - 'url': url, - 'dag_run_id': context['dag_run'].run_id, - 'error_details': error_details - } - - try: - client = _get_redis_client(params['redis_conn_id']) - - # Update client-specific stats - try: - machine_id = params.get('machine_id') or socket.gethostname() - _update_client_stats(client, params.get('clients', ''), 'failure', url, machine_id, context['dag_run'].run_id) - except Exception as e: - logger.error(f"Could not update client stats on failure: {e}", exc_info=True) - - result_queue = f"{params['queue_name']}_result" - fail_queue = f"{params['queue_name']}_fail" - - progress_queue = f"{params['queue_name']}_progress" - - with client.pipeline() as pipe: - pipe.hset(result_queue, url, json.dumps(result_data)) - pipe.hset(fail_queue, url, json.dumps(result_data)) - # Only try to remove from progress queue if we have a real URL - if url != f"unknown_url_{context['dag_run'].run_id}": - pipe.hdel(progress_queue, url) - pipe.execute() - - logger.info(f"Stored failure result for URL '{url}' in '{result_queue}' and '{fail_queue}' and removed from progress queue.") - except Exception as e: - logger.error(f"Could not report failure to Redis: {e}", exc_info=True) - - -@task(trigger_rule='one_failed') -def handle_fatal_error(**context): - """ - Handles fatal, non-retryable errors (e.g., infrastructure issues). - This task reports the failure to Redis before failing the DAG run to ensure - failed URLs are queued for later reprocessing, then stops the processing loop. - """ - params = context['params'] - ti = context['task_instance'] - url = params.get('url_to_process') - - # Ensure we have a valid URL string for Redis keys - if not url or url == 'None': - url = f"unknown_url_{context['dag_run'].run_id}" - logger.warning(f"No valid URL found in params. Using generated key: {url}") - - # Collect error details - error_details = {} - first_token_task_id = 'get_token' - retry_token_task_id = 'retry_get_token' - - first_token_error = ti.xcom_pull(task_ids=first_token_task_id, key='error_details') - retry_token_error = ti.xcom_pull(task_ids=retry_token_task_id, key='error_details') - - # Use the most recent error details - if retry_token_error: - error_details = retry_token_error - elif first_token_error: - error_details = first_token_error - - logger.error(f"A fatal, non-retryable error occurred for URL '{url}'. See previous task logs for details.") - - # Report failure to Redis so the URL can be reprocessed later - try: - client = _get_redis_client(params['redis_conn_id']) - - # Update client-specific stats - try: - machine_id = params.get('machine_id') or socket.gethostname() - _update_client_stats(client, params.get('clients', ''), 'failure', url, machine_id, context['dag_run'].run_id) - except Exception as e: - logger.error(f"Could not update client stats on fatal error: {e}", exc_info=True) - - result_data = { - 'status': 'failed', - 'end_time': time.time(), - 'url': url, - 'dag_run_id': context['dag_run'].run_id, - 'error': 'fatal_error', - 'error_message': 'Fatal non-retryable error occurred', - 'error_details': error_details - } - result_queue = f"{params['queue_name']}_result" - fail_queue = f"{params['queue_name']}_fail" - - progress_queue = f"{params['queue_name']}_progress" - - with client.pipeline() as pipe: - pipe.hset(result_queue, url, json.dumps(result_data)) - pipe.hset(fail_queue, url, json.dumps(result_data)) - # Only try to remove from progress queue if we have a real URL - if url != f"unknown_url_{context['dag_run'].run_id}": - pipe.hdel(progress_queue, url) - pipe.execute() - - logger.info(f"Stored fatal error result for URL '{url}' in '{result_queue}' and '{fail_queue}' for later reprocessing.") - except Exception as e: - logger.error(f"Could not report fatal error to Redis: {e}", exc_info=True) - - # Instead of raising an exception, log a clear message and return a result - # This allows the task to complete successfully while still indicating the error - logger.error("FATAL ERROR: The dispatcher loop will stop due to a non-retryable error.") - return {'status': 'fatal_error', 'url': url} - - -@task(trigger_rule='one_success') -def continue_processing_loop(token_data: dict | None = None, **context): - """ - After a run, triggers a new dispatcher to continue the processing loop, - passing along the account/proxy to make them sticky if available. - """ - params = context['params'] - dag_run = context['dag_run'] - ti = context['task_instance'] - - # Check if we're coming from a fatal error path - fatal_error_result = ti.xcom_pull(task_ids='handle_fatal_error') - if fatal_error_result and isinstance(fatal_error_result, dict) and fatal_error_result.get('status') == 'fatal_error': - logger.error("Not continuing processing loop due to fatal error in previous task.") - return - - # Do not continue the loop for manual runs of the worker DAG. - # A worker DAG triggered by the dispatcher will have a run_id starting with 'worker_run_'. - if not dag_run.run_id.startswith('worker_run_'): - logger.info(f"DAG run '{dag_run.run_id}' does not appear to be triggered by the dispatcher. Stopping processing loop.") - return - - dispatcher_dag_id = 'ytdlp_ops_v01_dispatcher' - dag_model = DagModel.get_dagmodel(dispatcher_dag_id) - if dag_model and dag_model.is_paused: - logger.warning(f"Dispatcher DAG '{dispatcher_dag_id}' is paused. Stopping processing loop.") - return - - # Create a new unique run_id for the dispatcher. - new_dispatcher_run_id = f"retriggered_by_worker_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{str(uuid.uuid4())[:8]}" - - # Pass all original parameters from the orchestrator through to the new dispatcher run. - conf_to_pass = {k: v for k, v in params.items() if v is not None} - conf_to_pass['worker_index'] = params.get('worker_index') - - if token_data: - # On success path, make the account and proxy "sticky" for the next run. - conf_to_pass['account_id'] = token_data.get('successful_account_id') - conf_to_pass['assigned_proxy_url'] = token_data.get('socks_proxy') - logger.info(f"Worker finished successfully. Triggering a new dispatcher ('{new_dispatcher_run_id}') to continue the loop with sticky account/proxy.") - logger.info(f" - Sticky Account: {conf_to_pass.get('account_id')}") - logger.info(f" - Sticky Proxy: {conf_to_pass.get('assigned_proxy_url')}") - else: - # On failure/skip paths, no token_data is passed. Clear sticky params to allow re-selection. - conf_to_pass.pop('account_id', None) - conf_to_pass.pop('assigned_proxy_url', None) - logger.info(f"Worker finished on a non-success path. Triggering a new dispatcher ('{new_dispatcher_run_id}') to continue the loop without sticky account/proxy.") - - # The new dispatcher will pull its own URL and determine its own queue. - conf_to_pass.pop('url_to_process', None) - conf_to_pass.pop('worker_queue', None) - - trigger_dag( - dag_id=dispatcher_dag_id, - run_id=new_dispatcher_run_id, - conf=conf_to_pass, - replace_microseconds=False - ) - - -@task.branch(trigger_rule='one_failed') -def handle_retry_failure_branch(task_id_to_check: str, **context): - """ - Inspects a failed retry attempt and decides on the final action. - On retry, most errors are considered fatal for the URL, but not for the system. - """ - ti = context['task_instance'] - params = context['params'] - error_details = ti.xcom_pull(task_ids=task_id_to_check, key='error_details') - if not error_details: - return 'handle_fatal_error' - - error_message = error_details.get('error_message', '').strip() - error_code = error_details.get('error_code', '').strip() - - # Unrecoverable video errors that should not be retried or treated as system failures. - unrecoverable_video_errors = [ - "AGE_GATED_SIGN_IN", "MEMBERS_ONLY", "VIDEO_PROCESSING", "COPYRIGHT_REMOVAL", - "GEO_RESTRICTED", "VIDEO_UNAVAILABLE", "PRIVATE_VIDEO", "VIDEO_REMOVED" - ] - - if error_code in unrecoverable_video_errors: - logger.warning(f"Unrecoverable video error '{error_code}' detected on retry for '{task_id_to_check}'.") - return 'handle_unrecoverable_video_error' - - if error_code == 'TRANSPORT_ERROR': - logger.error(f"Fatal Thrift connection error on retry from '{task_id_to_check}'.") - return 'handle_fatal_error' - - is_bannable = error_code in ["BOT_DETECTED", "BOT_DETECTION_SIGN_IN_REQUIRED"] - if is_bannable: - policy = params.get('on_auth_failure', 'retry_with_new_account') - if policy == 'proceed_loop_under_manual_inspection': - logger.warning(f"Bannable error '{error_code}' on retry with 'proceed_loop_under_manual_inspection' policy. Reporting failure and continuing loop. MANUAL INTERVENTION IS LIKELY REQUIRED.") - return 'report_bannable_and_continue' - # On retry failure, we always ban and stop the loop for this URL. - logger.warning(f"Bannable error '{error_code}' on retry. Banning account and reporting failure.") - return 'ban_and_report_after_retry' - - logger.error(f"URL failed on retry with code '{error_code}'. Reporting failure and continuing loop.") - return 'report_failure_and_continue' - - -@task -def ban_and_report_after_retry(**context): - """Bans the account used in a failed retry and prepares for failure reporting.""" - ti = context['task_instance'] - reason = "Banned by Airflow worker after failed retry" - - # Manually pull XCom because trigger rules can make XComArgs resolve to None. - retry_data = ti.xcom_pull(task_ids='retry_logic.coalesce_retry_data') - if not retry_data: - # This can happen if the upstream task that generates the data was skipped. - logger.error("Could not retrieve retry data to ban account. This may be due to an unexpected task flow.") - # Instead of failing, return a default dict with enough info to continue - return {'account_id': 'unknown', 'url_to_process': context['params'].get('url_to_process', 'unknown')} - - # The account to ban is the one from the retry attempt. - _ban_account(retry_data, reason, context) - logger.info(f"Account '{retry_data.get('account_id')}' banned after retry failed. Proceeding to report failure.") - return retry_data - - -@task.branch(trigger_rule='one_failed') -def handle_download_failure_branch(**context): - """ - If download or probe fails, decide whether to stop the loop, continue, or retry - based on the `on_download_failure` policy. - """ - params = context['params'] - policy = params.get('on_download_failure', 'proceed_loop') - ti = context['task_instance'] - - download_error_details = ti.xcom_pull(task_ids='download_processing.download_and_probe', key='download_error_details') - - # First, check for specific error codes that override the general policy. - if download_error_details: - error_code = download_error_details.get('error_code') - - # Unrecoverable video errors always go to the 'skipped' handler. - unrecoverable_video_errors = [ - "AGE_GATED_SIGN_IN", "MEMBERS_ONLY", "VIDEO_PROCESSING", "COPYRIGHT_REMOVAL", - "GEO_RESTRICTED", "VIDEO_UNAVAILABLE", "PRIVATE_VIDEO", "VIDEO_REMOVED" - ] - if error_code in unrecoverable_video_errors: - logger.warning(f"Unrecoverable video error '{error_code}' during download. Skipping.") - return 'handle_unrecoverable_video_error' - - # A 403 Forbidden error is not retryable, regardless of policy. - if error_code == 'HTTP_403_FORBIDDEN': - logger.error("Download failed with HTTP 403 Forbidden. This is not retryable. Reporting failure and continuing loop.") - return 'report_failure_and_continue' - - # Now, apply the general policy for other download failures. - if policy == 'retry_with_new_token': - logger.info("Download failed. Policy is to retry with a new token. Branching to retry logic.") - return 'retry_logic_for_download' - - if policy == 'stop_loop': - logger.error(f"Download or probe failed with policy '{policy}'. Stopping loop by routing to fatal error handler.") - return 'handle_fatal_error' - - # Default policy is 'proceed_loop' - logger.warning(f"Download or probe failed with policy '{policy}'. Reporting failure and continuing loop.") - return 'report_failure_and_continue' - - -@task(trigger_rule='one_success') -def coalesce_token_data(get_token_result=None, retry_get_token_result=None): - """ - Selects the successful token data from either the first attempt or the retry. - The task that did not run or failed will have a result of None. - """ - if retry_get_token_result: - logger.info("Using token data from retry attempt.") - return retry_get_token_result - if get_token_result: - logger.info("Using token data from initial attempt.") - return get_token_result - # This should not be reached if trigger_rule='one_success' is working correctly. - raise AirflowException("Could not find a successful token result from any attempt.") - - -# FIX: Use 'all_done' trigger rule so this task runs even when upstream tasks fail. -# The branch operator will skip other branches, but this task needs to run -# when the branch points to it, regardless of the failed get_token task. -@task(trigger_rule='all_done') -def handle_unrecoverable_video_error(**context): - """ - Handles errors for videos that are unavailable (private, removed, etc.). - These are not system failures, so the URL is logged to a 'skipped' queue - and the processing loop continues without marking the run as failed. - """ - ti = context['task_instance'] - - # Check if this task was actually selected by the branch operator. - # If it was skipped by the branch, we should not execute the logic. - # We can check if the branch task's result points to us. - dag_run = ti.get_dagrun() - - # Check multiple possible branch tasks that could route here - branch_task_ids = [ - 'initial_attempt.handle_bannable_error_branch', - 'retry_logic.handle_retry_failure_branch', - 'download_processing.handle_download_failure_branch' - ] - - was_selected_by_branch = False - for branch_task_id in branch_task_ids: - try: - branch_result = ti.xcom_pull(task_ids=branch_task_id) - if branch_result == 'handle_unrecoverable_video_error': - was_selected_by_branch = True - logger.info(f"Task was selected by branch '{branch_task_id}'") - break - except Exception: - pass - - if not was_selected_by_branch: - logger.info("Task was not selected by any branch operator. Skipping execution.") - raise AirflowSkipException("Not selected by branch operator") - - params = context['params'] - url = params.get('url_to_process', 'unknown') - - # Collect error details from the failed task - error_details = {} - auth_error = ti.xcom_pull(task_ids='initial_attempt.get_token', key='error_details') - auth_retry_error = ti.xcom_pull(task_ids='retry_logic.retry_get_token', key='error_details') - download_error = ti.xcom_pull(task_ids='download_processing.download_and_probe', key='download_error_details') - - if auth_retry_error: error_details = auth_retry_error - elif auth_error: error_details = auth_error - elif download_error: error_details = download_error - - error_code = error_details.get('error_code', 'UNKNOWN_VIDEO_ERROR') - error_message = error_details.get('error_message', 'Video is unavailable for an unknown reason.') - - logger.warning(f"Skipping URL '{url}' due to unrecoverable video error: {error_code} - {error_message}") - - result_data = { - 'status': 'skipped', - 'end_time': time.time(), - 'url': url, - 'dag_run_id': context['dag_run'].run_id, - 'reason': error_code, - 'details': error_message, - 'error_details': error_details - } - - try: - client = _get_redis_client(params['redis_conn_id']) - - skipped_queue = f"{params['queue_name']}_skipped" - progress_queue = f"{params['queue_name']}_progress" - - with client.pipeline() as pipe: - pipe.hset(skipped_queue, url, json.dumps(result_data)) - pipe.hdel(progress_queue, url) - pipe.execute() - - logger.info(f"Stored skipped result for URL '{url}' in '{skipped_queue}' and removed from progress queue.") - except Exception as e: - logger.error(f"Could not report skipped video to Redis: {e}", exc_info=True) - - # Return a marker so downstream tasks know this path was taken - return {'status': 'skipped', 'url': url} - - -# FIX: Use 'all_done' trigger rule for the same reason as handle_unrecoverable_video_error -@task(trigger_rule='all_done') -def report_bannable_and_continue(**context): - """ - Handles a bannable error by reporting it, but continues the loop - as per the 'proceed_loop_under_manual_inspection' policy. - """ - ti = context['task_instance'] - - # Check if this task was actually selected by the branch operator. - dag_run = ti.get_dagrun() - - branch_task_ids = [ - 'initial_attempt.handle_bannable_error_branch', - 'retry_logic.handle_retry_failure_branch' - ] - - was_selected_by_branch = False - for branch_task_id in branch_task_ids: - try: - branch_result = ti.xcom_pull(task_ids=branch_task_id) - if branch_result == 'report_bannable_and_continue': - was_selected_by_branch = True - logger.info(f"Task was selected by branch '{branch_task_id}'") - break - except Exception: - pass - - if not was_selected_by_branch: - logger.info("Task was not selected by any branch operator. Skipping execution.") - raise AirflowSkipException("Not selected by branch operator") - - params = context['params'] - url = params.get('url_to_process', 'unknown') - - # Collect error details - error_details = {} - first_token_task_id = 'initial_attempt.get_token' - retry_token_task_id = 'retry_logic.retry_get_token' - - first_token_error = ti.xcom_pull(task_ids=first_token_task_id, key='error_details') - retry_token_error = ti.xcom_pull(task_ids=retry_token_task_id, key='error_details') - - # Use the most recent error details - if retry_token_error: - error_details = retry_token_error - elif first_token_error: - error_details = first_token_error - - logger.error(f"Bannable error for URL '{url}'. Policy is to continue loop under manual supervision.") - - # Report failure to Redis - try: - client = _get_redis_client(params['redis_conn_id']) - - # Update client-specific stats - try: - machine_id = params.get('machine_id') or socket.gethostname() - _update_client_stats(client, params.get('clients', ''), 'failure', url, machine_id, context['dag_run'].run_id) - except Exception as e: - logger.error(f"Could not update client stats on bannable error: {e}", exc_info=True) - - result_data = { - 'status': 'failed', - 'end_time': time.time(), - 'url': url, - 'dag_run_id': context['dag_run'].run_id, - 'error': 'bannable_error_manual_override', - 'error_message': 'Bannable error occurred, but policy is set to continue loop under manual supervision.', - 'error_details': error_details - } - result_queue = f"{params['queue_name']}_result" - fail_queue = f"{params['queue_name']}_fail" - - progress_queue = f"{params['queue_name']}_progress" - - with client.pipeline() as pipe: - pipe.hset(result_queue, url, json.dumps(result_data)) - pipe.hset(fail_queue, url, json.dumps(result_data)) - pipe.hdel(progress_queue, url) - pipe.execute() - - logger.info(f"Stored bannable error for URL '{url}' in '{result_queue}' and '{fail_queue}'.") - except Exception as e: - logger.error(f"Could not report bannable error to Redis: {e}", exc_info=True) - - # Return a marker so downstream tasks know this path was taken - return {'status': 'bannable_reported', 'url': url} - - - - -# ============================================================================= -# DAG Definition with TaskGroups -# ============================================================================= -with DAG( - dag_id='ytdlp_ops_v01_worker_per_url', - default_args=DEFAULT_ARGS, - schedule=None, - start_date=days_ago(1), - catchup=False, - tags=['ytdlp', 'worker'], - doc_md=__doc__, - render_template_as_native_obj=True, - params={ - 'queue_name': Param(DEFAULT_QUEUE_NAME, type="string"), - 'redis_conn_id': Param(DEFAULT_REDIS_CONN_ID, type="string"), - 'service_ip': Param(DEFAULT_YT_AUTH_SERVICE_IP, type="string"), - 'service_port': Param(DEFAULT_YT_AUTH_SERVICE_PORT, type="integer"), - 'account_pool': Param('default_account', type="string"), - 'account_pool_size': Param(None, type=["integer", "null"]), - 'prepend_client_to_account': Param(True, type="boolean", title="[Worker Param] Prepend Client to Account", description="If True, prepends client and timestamp to account names in prefix mode."), - 'machine_id': Param(None, type=["string", "null"]), - 'assigned_proxy_url': Param(None, type=["string", "null"], title="[Manual/Worker Param] Assigned Proxy URL", description="For manual runs or sticky loops: a specific proxy URL to use, overriding the server's proxy pool logic."), - 'timeout': Param(DEFAULT_TIMEOUT, type="integer"), - 'output_path_template': Param("%(id)s.f%(format_id)s.%(ext)s", type="string", title="[Worker Param] Output Path Template", description="Output filename template for yt-dlp. It is highly recommended to include `%(format_id)s` to prevent filename collisions when downloading multiple formats."), - 'on_auth_failure': Param( - 'retry_with_new_account', - type="string", - enum=['stop_loop', 'retry_with_new_account', 'retry_without_ban', 'proceed_loop_under_manual_inspection'], - title="[Worker Param] On Authentication Failure Policy", - description="Policy for handling bannable authentication failures." - ), - 'on_download_failure': Param( - 'proceed_loop', - type="string", - enum=['stop_loop', 'proceed_loop', 'retry_with_new_token'], - title="[Worker Param] On Download Failure Policy", - description="Policy for handling download or probe failures." - ), - 'retry_on_probe_failure': Param(False, type="boolean"), - 'skip_probe': Param(False, type="boolean", title="[Worker Param] Skip Probe", description="If True, skips the ffmpeg probe of downloaded files."), - 'yt_dlp_cleanup_mode': Param(False, type="boolean", title="[Worker Param] yt-dlp Cleanup Mode", description="If True, creates a .empty file and deletes the original media file after successful download and probe."), - 'auto_create_new_accounts_on_exhaustion': Param(True, type="boolean"), - 'delay_between_formats_s': Param(15, type="integer", title="[Worker Param] Delay Between Formats (s)", description="Delay in seconds between downloading each format when multiple formats are specified. A 22s wait may be effective for batch downloads, while 6-12s may suffice if cookies are refreshed regularly."), - 'yt_dlp_test_mode': Param(False, type="boolean", title="[Worker Param] yt-dlp Test Mode", description="If True, runs yt-dlp with --test flag (dry run without downloading)."), - 'download_format': Param( - 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best', - type="string", - title="[Worker Param] Download Format", - description="Custom yt-dlp format string. Common presets: [1] 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best' (Default, best quality MP4). [2] '18,140-dashy/140,133-dashy/134-dashy/136-dashy/137-dashy/298-dashy/299-dashy' (Legacy formats). [3] '299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy' (High-framerate formats)." - ), - 'pass_without_formats_splitting': Param( - False, - type="boolean", - title="[Worker Param] Pass format string without splitting", - description="If True, passes the entire 'download_format' string to the download tool as-is. This is for complex selectors. Not compatible with 'aria-rpc' downloader." - ), - 'downloader': Param( - 'cli', - type="string", - enum=['py', 'aria-rpc', 'cli'], - title="Download Tool", - description="Choose the download tool to use: 'py' (native python, recommended), 'aria-rpc' (send to aria2c daemon), 'cli' (legacy yt-dlp wrapper)." - ), - 'aria_host': Param('172.17.0.1', type="string", title="Aria2c Host", description="For 'aria-rpc' downloader: Host of the aria2c RPC server."), - 'aria_port': Param(6800, type="integer", title="Aria2c Port", description="For 'aria-rpc' downloader: Port of the aria2c RPC server."), - 'aria_secret': Param('SQGCQPLVFQIASMPNPOJYLVGJYLMIDIXDXAIXOTX', type="string", title="Aria2c Secret", description="For 'aria-rpc' downloader: Secret token."), - # --- Unified JSON Config (passed from orchestrator) --- - 'ytdlp_config_json': Param('{}', type="string", title="[Internal] Unified JSON config from orchestrator."), - # --- Manual Run / Internal Parameters --- - 'manual_url_to_process': Param('iPwdia3gAnk', type=["string", "null"], title="[Manual Run] URL to Process", description="For manual runs, provide a single YouTube URL, or the special value 'PULL_FROM_QUEUE' to pull one URL from the Redis inbox. This is ignored if triggered by the dispatcher."), - 'url_to_process': Param(None, type=["string", "null"], title="[Internal] URL from Dispatcher", description="This parameter is set by the dispatcher DAG and should not be used for manual runs."), - 'worker_queue': Param(None, type=["string", "null"], title="[Internal] Worker Queue", description="This parameter is set by the dispatcher DAG and should not be used for manual runs."), - 'worker_index': Param(None, type=["integer", "null"], title="[Internal] Worker Index", description="A unique index for each parallel worker loop, assigned by the orchestrator."), - 'account_id': Param(None, type=["string", "null"], title="[Internal] Assigned Account ID", description="A specific account_id to use, making the account 'sticky' for a worker loop."), - } -) as dag: - initial_data = get_url_and_assign_account() - - # --- Task Instantiation with TaskGroups --- - - # Main success/failure handlers (outside groups for clear end points) - # These tasks are targets of branch operators that run after failures. - # They need trigger_rule='all_done' to run when the branch points to them. - fatal_error_task = handle_fatal_error.override(trigger_rule='all_done')() - report_failure_and_stop_task = report_failure_and_stop.override(trigger_rule='all_done')() - report_failure_task = report_failure_and_continue.override(trigger_rule='all_done')() - unrecoverable_video_error_task = handle_unrecoverable_video_error() - report_bannable_and_continue_task = report_bannable_and_continue() - - # --- Task Group 1: Initial Attempt --- - with TaskGroup("initial_attempt", tooltip="Initial token acquisition attempt") as initial_attempt_group: - first_token_attempt = get_token(initial_data) - initial_branch_task = handle_bannable_error_branch.override(trigger_rule='one_failed')( - task_id_to_check=first_token_attempt.operator.task_id - ) - - # Tasks for the "stop_loop" policy on initial attempt - ban_and_report_immediately_task = ban_and_report_immediately.override(task_id='ban_and_report_immediately')() - - first_token_attempt >> initial_branch_task - initial_branch_task >> [fatal_error_task, ban_and_report_immediately_task, unrecoverable_video_error_task, report_bannable_and_continue_task] - - # --- Task Group 2: Retry Logic --- - with TaskGroup("retry_logic", tooltip="Retry logic with account management") as retry_logic_group: - # Retry path tasks - ban_and_retry_group = ban_and_retry_logic.override(group_id='ban_account_and_prepare_for_retry')( - initial_data=initial_data - ) - # This task is for retries after a ban check - after_ban_account_task = assign_new_account_after_ban_check.override(task_id='assign_new_account_after_ban_check')( - initial_data=initial_data - ) - # This task is for direct retries (e.g., on connection error) - direct_retry_account_task = assign_new_account_for_direct_retry.override(task_id='assign_new_account_for_direct_retry')( - initial_data=initial_data - ) - - @task(trigger_rule='one_success') - def coalesce_retry_data(direct_retry_data=None, after_ban_data=None): - """Coalesces account data from one of the two mutually exclusive retry paths.""" - if direct_retry_data: - return direct_retry_data - if after_ban_data: - return after_ban_data - raise AirflowException("Could not find valid account data for retry.") - - coalesced_retry_data = coalesce_retry_data( - direct_retry_data=direct_retry_account_task, - after_ban_data=after_ban_account_task - ) - - retry_token_task = get_token.override(task_id='retry_get_token')( - initial_data=coalesced_retry_data - ) - - # Retry failure branch and its tasks - retry_branch_task = handle_retry_failure_branch.override(trigger_rule='one_failed')( - task_id_to_check=retry_token_task.operator.task_id - ) - ban_after_retry_report_task = ban_and_report_after_retry.override(task_id='ban_and_report_after_retry', trigger_rule='all_done')() - - # Internal dependencies within retry group - ban_and_retry_group >> after_ban_account_task - after_ban_account_task >> coalesced_retry_data - direct_retry_account_task >> coalesced_retry_data - coalesced_retry_data >> retry_token_task - retry_token_task >> retry_branch_task - retry_branch_task >> [fatal_error_task, report_failure_task, ban_after_retry_report_task, unrecoverable_video_error_task, report_bannable_and_continue_task] - ban_after_retry_report_task >> report_failure_and_stop_task - - # --- Task Group 3: Download and Processing --- - with TaskGroup("download_processing", tooltip="Download and media processing") as download_processing_group: - # Coalesce, download, and success tasks - token_data = coalesce_token_data( - get_token_result=first_token_attempt, - retry_get_token_result=retry_token_task - ) - list_formats_task = list_available_formats(token_data=token_data) - download_task = download_and_probe( - token_data=token_data, - available_formats=list_formats_task, - ) - download_branch_task = handle_download_failure_branch.override(trigger_rule='one_failed')() - - # Internal dependencies within download group - first_token_attempt >> token_data - retry_token_task >> token_data - token_data >> list_formats_task - list_formats_task >> download_task - download_task >> download_branch_task - - # --- Task Group 4: Download Retry Logic --- - with TaskGroup("retry_logic_for_download", tooltip="Retry download with a new account after a 403 error") as retry_logic_for_download_group: - new_account_data = assign_new_account_for_direct_retry.override(task_id='assign_new_account_for_download_retry')( - initial_data=initial_data - ) - new_token_data = get_token.override(task_id='get_token_for_download_retry')( - initial_data=new_account_data - ) - new_formats = list_available_formats.override(task_id='list_formats_for_download_retry')( - token_data=new_token_data - ) - retry_download_task = download_and_probe.override(task_id='retry_download_and_probe')( - token_data=new_token_data, - available_formats=new_formats - ) - - # If any task in this group fails, the entire group fails. - # The group's failure will trigger the top-level `report_failure_task`. - new_account_data >> new_token_data >> new_formats >> retry_download_task - - # --- Coalesce final results for success tasks --- - @task(trigger_rule='one_success') - def coalesce_final_download_files(initial_dl=None, retry_dl=None, **context): - """ - Selects the successful list of downloaded files from either the first attempt or the retry. - This version checks task instance states to be more robust against ambiguous None results. - """ - try: - ti = context['task_instance'] - dag_run = ti.get_dagrun() - - retry_dl_ti = dag_run.get_task_instance('retry_logic_for_download.retry_download_and_probe') - - if retry_dl_ti and retry_dl_ti.state == 'success': - logger.info("Using downloaded files from the retry download path.") - return retry_dl - except Exception as e: - logger.warning(f"Could not check state of retry download task, falling back to initial download result. Error: {e}") - - logger.info("Using downloaded files from the initial download path.") - return initial_dl - - @task(trigger_rule='one_success') - def coalesce_final_token_data(initial_token_task_result=None, download_retry_token_task_result=None, **context): - """ - Selects the correct token data for the success report. - It checks if the download retry path was taken by seeing if its token task ran and succeeded. - """ - # We can't just check for a result, as a skipped task might have a `None` result. - # We need to check the state of the task instance. - # If the download retry token task succeeded, it means that path was taken. - try: - ti = context['task_instance'] - retry_token_ti = ti.get_dagrun().get_task_instance('retry_logic_for_download.get_token_for_download_retry') - if retry_token_ti and retry_token_ti.state == 'success': - logger.info("Using token data from download retry path.") - return download_retry_token_task_result - except Exception as e: - logger.warning(f"Could not check state of retry token task, falling back to initial token. Error: {e}") - - logger.info("Using token data from initial auth path.") - return initial_token_task_result - - final_files = coalesce_final_download_files( - initial_dl=download_task, - retry_dl=retry_download_task - ) - final_token = coalesce_final_token_data( - initial_token_task_result=coalesce_token_data( - get_token_result=first_token_attempt, - retry_get_token_result=retry_token_task - ), - download_retry_token_task_result=new_token_data - ) - - # Instantiate final success task - final_success_task = mark_url_as_success( - initial_data=initial_data, - downloaded_file_paths=final_files, - token_data=final_token - ) - - # Coalesce all paths that lead to the continuation of the loop. - @task(trigger_rule='one_success') - def coalesce_all_continue_paths(success_result=None, unrecoverable_result=None, bannable_result=None, failure_result=None, fatal_error_result=None): - """ - Gathers results from all possible paths that can continue the processing loop. - Only the success path provides data; others provide None. - """ - if fatal_error_result and isinstance(fatal_error_result, dict) and fatal_error_result.get('status') == 'fatal_error': - logger.error("Fatal error detected in coalesce_all_continue_paths. Will not continue processing loop.") - return {'status': 'fatal_error'} - - if success_result: - return success_result - return None - - final_data_for_loop = coalesce_all_continue_paths( - success_result=final_success_task, - unrecoverable_result=unrecoverable_video_error_task, - bannable_result=report_bannable_and_continue_task, - failure_result=report_failure_task, - fatal_error_result=fatal_error_task, - ) - - # Final task to trigger the next DAG run - continue_processing_loop(token_data=final_data_for_loop) - - # Final success task, fed by coalesced results - final_files >> final_success_task - final_token >> final_success_task - - # --- DAG Dependencies between TaskGroups --- - # Initial attempt can lead to retry logic or direct failure - initial_branch_task >> [retry_logic_group, fatal_error_task, ban_and_report_immediately_task, unrecoverable_video_error_task, report_bannable_and_continue_task] - - # Ban and report immediately leads to failure reporting - ban_and_report_immediately_task >> report_failure_and_stop_task - - # Connect download failure branch to the new retry group - download_branch_task >> [retry_logic_for_download_group, report_failure_task, fatal_error_task, unrecoverable_video_error_task] - - # Connect success paths to the coalescing tasks - download_task >> final_files - retry_download_task >> final_files - - # The token from the initial auth path is one input to the final token coalesce - coalesce_token_data(get_token_result=first_token_attempt, retry_get_token_result=retry_token_task) >> final_token - # The token from the download retry path is the other input - new_token_data >> final_token - - # Connect coalesced results to the final success task - final_files >> final_success_task - final_token >> final_success_task - - # If the download retry group fails, it should trigger the generic failure handler - retry_logic_for_download_group >> report_failure_task diff --git a/airflow/dags/ytdlp_ops_v02_dispatcher_auth.py b/airflow/dags/ytdlp_ops_v02_dispatcher_auth.py deleted file mode 100644 index 5b899e8..0000000 --- a/airflow/dags/ytdlp_ops_v02_dispatcher_auth.py +++ /dev/null @@ -1,98 +0,0 @@ -# -*- coding: utf-8 -*- -""" -DAG to dispatch work to ytdlp_ops_worker_per_url_auth DAGs. -It pulls a URL from Redis and triggers an auth worker with a pinned queue. -""" - -from __future__ import annotations -import logging -import os -import socket -from datetime import timedelta - -from airflow.decorators import task -from airflow.exceptions import AirflowSkipException -from airflow.models.dag import DAG -from airflow.models.param import Param -from airflow.api.common.trigger_dag import trigger_dag -from airflow.utils.dates import days_ago - -from utils.redis_utils import _get_redis_client - -logger = logging.getLogger(__name__) - -DEFAULT_QUEUE_NAME = 'queue2_auth' -DEFAULT_REDIS_CONN_ID = 'redis_default' - -@task(queue='queue-auth') -def dispatch_url_to_auth_worker(**context): - """ - Pulls one URL from Redis, determines the current worker's dedicated queue, - and triggers the auth worker DAG to process the URL on that specific queue. - """ - ti = context['task_instance'] - logger.info(f"Auth Dispatcher task '{ti.task_id}' running on queue '{ti.queue}'.") - - # --- Check for worker pause lock file --- - lock_file_path = '/opt/airflow/inputfiles/AIRFLOW.PREVENT_URL_PULL.lockfile' - hostname = socket.gethostname() - if os.path.exists(lock_file_path): - logger.info(f"Worker '{hostname}' is paused. Lock file found at '{lock_file_path}'. Skipping URL pull.") - raise AirflowSkipException(f"Worker '{hostname}' is paused.") - else: - logger.info(f"Worker '{hostname}' is active (no lock file found at '{lock_file_path}'). Proceeding to pull URL.") - - params = context['params'] - redis_conn_id = params['redis_conn_id'] - queue_name = params['queue_name'] - inbox_queue = f"{queue_name}_inbox" - - logger.info(f"Attempting to pull one URL from Redis queue '{inbox_queue}'...") - client = _get_redis_client(redis_conn_id) - url_bytes = client.lpop(inbox_queue) - - if not url_bytes: - logger.info("Redis auth inbox queue is empty. No work to dispatch. Skipping task.") - raise AirflowSkipException("Redis auth inbox queue is empty. No work to dispatch.") - - url_to_process = url_bytes.decode('utf-8') - logger.info(f"Pulled URL '{url_to_process}' from the queue.") - - # Determine the worker-specific queue for affinity - hostname = socket.gethostname() - worker_queue = f"queue-auth-{hostname}" - logger.info(f"Running on worker '{hostname}'. Dispatching job to its dedicated queue '{worker_queue}'.") - - conf_to_pass = {**params, 'url_to_process': url_to_process, 'worker_queue': worker_queue} - - run_id = f"worker_run_auth_{context['dag_run'].run_id}_{context['ts_nodash']}_q_{worker_queue}" - - logger.info(f"Triggering 'ytdlp_ops_v02_worker_per_url_auth' with run_id '{run_id}'") - trigger_dag( - dag_id='ytdlp_ops_v02_worker_per_url_auth', - run_id=run_id, - conf=conf_to_pass, - replace_microseconds=False - ) - -with DAG( - dag_id='ytdlp_ops_v02_dispatcher_auth', - default_args={'owner': 'airflow', 'retries': 0}, - schedule=None, - start_date=days_ago(1), - catchup=False, - tags=['ytdlp', 'worker', 'dispatcher', 'auth'], - is_paused_upon_creation=True, - doc_md=""" - ### YT-DLP Auth URL Dispatcher - - This DAG dispatches a single URL to an auth worker with a pinned queue. - It pulls from the `queue2_auth_inbox` Redis queue and triggers the `ytdlp_ops_v02_worker_per_url_auth` DAG. - """, - render_template_as_native_obj=True, - params={ - 'queue_name': Param(DEFAULT_QUEUE_NAME, type='string', title='Queue Name', description='The base name of the Redis queue to pull URLs from.'), - 'redis_conn_id': Param(DEFAULT_REDIS_CONN_ID, type='string', title='Redis Connection ID'), - }, -) as dag: - dispatch_url_to_auth_worker() diff --git a/airflow/dags/ytdlp_ops_v02_dispatcher_dl.py b/airflow/dags/ytdlp_ops_v02_dispatcher_dl.py deleted file mode 100644 index 78614ef..0000000 --- a/airflow/dags/ytdlp_ops_v02_dispatcher_dl.py +++ /dev/null @@ -1,79 +0,0 @@ -# -*- coding: utf-8 -*- -""" -DAG to dispatch download jobs to ytdlp_ops_worker_per_url_dl DAGs. -It pulls a job payload from Redis and triggers a download worker. -""" - -from __future__ import annotations -import logging -import os -import socket -from datetime import timedelta - -from airflow.decorators import task -from airflow.exceptions import AirflowSkipException -from airflow.models.dag import DAG -from airflow.models.param import Param -from airflow.api.common.trigger_dag import trigger_dag -from airflow.utils.dates import days_ago - -from utils.redis_utils import _get_redis_client - -logger = logging.getLogger(__name__) - -DEFAULT_QUEUE_NAME = 'queue2_dl' -DEFAULT_REDIS_CONN_ID = 'redis_default' - -@task(queue='queue-dl') -def dispatch_job_to_dl_worker(**context): - """ - Triggers a v2 download worker for the 'profile-first' model. - The worker itself is responsible for locking a profile and finding a suitable task. - This dispatcher simply starts a worker process. - """ - ti = context['task_instance'] - logger.info(f"Download Dispatcher task '{ti.task_id}' running on queue '{ti.queue}'.") - params = context['params'] - - # Determine the worker-specific queue for affinity - hostname = socket.gethostname() - worker_queue = f"queue-dl-{hostname}" - logger.info(f"Running on worker '{hostname}'. Dispatching a new profile-first worker instance to its dedicated queue '{worker_queue}'.") - - # Pass all orchestrator params, but remove job_data as the worker finds its own job. - conf_to_pass = {**params, 'worker_queue': worker_queue} - conf_to_pass.pop('job_data', None) - - run_id = f"worker_run_dl_{context['dag_run'].run_id}_{context['ts_nodash']}_q_{worker_queue}" - - logger.info(f"Triggering 'ytdlp_ops_v02_worker_per_url_dl' with run_id '{run_id}'") - trigger_dag( - dag_id='ytdlp_ops_v02_worker_per_url_dl', - run_id=run_id, - conf=conf_to_pass, - replace_microseconds=False - ) - -with DAG( - dag_id='ytdlp_ops_v02_dispatcher_dl', - default_args={'owner': 'airflow', 'retries': 0}, - schedule=None, - start_date=days_ago(1), - catchup=False, - tags=['ytdlp', 'worker', 'dispatcher', 'download'], - is_paused_upon_creation=True, - doc_md=""" - ### YT-DLP v2 Download Worker Dispatcher (Profile-First) - - This DAG dispatches a single "profile-first" download worker. - It does **not** pull a job from a queue. Instead, it triggers the `ytdlp_ops_v02_worker_per_url_dl` DAG, - which is responsible for locking an available download profile and then finding a matching task - from the `queue_dl_format_tasks` Redis list. - """, - render_template_as_native_obj=True, - params={ - 'queue_name': Param(DEFAULT_QUEUE_NAME, type='string', title='Queue Name', description='The base name of the Redis queue to pull job payloads from.'), - 'redis_conn_id': Param(DEFAULT_REDIS_CONN_ID, type='string', title='Redis Connection ID'), - }, -) as dag: - dispatch_job_to_dl_worker() diff --git a/airflow/dags/ytdlp_ops_v02_orchestrator_auth.py b/airflow/dags/ytdlp_ops_v02_orchestrator_auth.py deleted file mode 100644 index 02eb52e..0000000 --- a/airflow/dags/ytdlp_ops_v02_orchestrator_auth.py +++ /dev/null @@ -1,362 +0,0 @@ -# -*- coding: utf-8 -*- -# vim:fenc=utf-8 -# -# Copyright © 2024 rl -# -# Distributed under terms of the MIT license. - -""" -DAG to orchestrate ytdlp_ops_dispatcher_v2_auth DAG runs based on a defined policy. -""" - -from airflow import DAG -from airflow.exceptions import AirflowException, AirflowSkipException -from airflow.operators.python import PythonOperator -from airflow.models.param import Param -from airflow.models.variable import Variable -from airflow.utils.dates import days_ago -from airflow.api.common.trigger_dag import trigger_dag -from airflow.models.dagrun import DagRun -from airflow.models.dag import DagModel -from datetime import timedelta, datetime -import logging -import random -import time -import json - -# --- Add project root to path to allow for yt-ops-client imports --- -import sys -# The yt-ops-client package is installed in editable mode in /app -if '/app' not in sys.path: - sys.path.insert(0, '/app') - -# Import utility functions -from utils.redis_utils import _get_redis_client - -# Import Thrift modules for proxy status check -from pangramia.yt.tokens_ops import YTTokenOpService -from thrift.protocol import TBinaryProtocol -from thrift.transport import TSocket, TTransport - -# Configure logging -logger = logging.getLogger(__name__) - -# Default settings -DEFAULT_REDIS_CONN_ID = 'redis_default' -DEFAULT_TOTAL_WORKERS = 8 -DEFAULT_WORKERS_PER_BUNCH = 1 -DEFAULT_WORKER_DELAY_S = 1 -DEFAULT_BUNCH_DELAY_S = 1 - -DEFAULT_YT_AUTH_SERVICE_IP = Variable.get("YT_AUTH_SERVICE_IP", default_var="172.17.0.1") -DEFAULT_YT_AUTH_SERVICE_PORT = Variable.get("YT_AUTH_SERVICE_PORT", default_var=9080) - -# Default ytdlp.json content for the unified config parameter -DEFAULT_YTDLP_CONFIG = { - "ytops": { - "force_renew": [], - "session_params": { - # "visitor_rotation_threshold": 250 - } - }, - "ytdlp_params": { - "debug_printtraffic": True, - "write_pages": True, - "verbose": True, - "no_color": True, - "ignoreerrors": True, - "noresizebuffer": True, - "buffersize": "4M", - "concurrent_fragments": 8, - "socket_timeout": 60, - "outtmpl": { - "default": "%(id)s.f%(format_id)s.%(ext)s" - }, - "restrictfilenames": True, - "updatetime": False, - "noplaylist": True, - "match_filter": "!is_live", - "writeinfojson": True, - "skip_download": True, - "allow_playlist_files": False, - "clean_infojson": True, - "getcomments": False, - "writesubtitles": False, - "writethumbnail": False, - "sleep_interval_requests": 0.75, - "parse_metadata": [ - ":(?P)" - ], - "extractor_args": { - "youtube": { - "player_client": ["tv_simply"], - "formats": ["duplicate"], - "jsc_trace": ["true"], - "pot_trace": ["true"], - "skip": ["translated_subs", "hls"] - }, - "youtubepot-bgutilhttp": { - "base_url": ["http://172.17.0.1:4416"] - } - }, - "noprogress": True, - "format_sort": [ - "res", - "ext:mp4:m4a" - ], - "remuxvideo": "mp4", - "nooverwrites": True, - "continuedl": True - } -} - - -# --- Helper Functions --- - -def _check_application_queue(redis_client, queue_base_name: str) -> int: - """Checks and logs the length of the application's inbox queue.""" - inbox_queue_name = f"{queue_base_name}_inbox" - logger.info(f"--- Checking Application Work Queue ---") - try: - q_len = redis_client.llen(inbox_queue_name) - logger.info(f"Application work queue '{inbox_queue_name}' has {q_len} item(s).") - return q_len - except Exception as e: - logger.error(f"Failed to check application queue '{inbox_queue_name}': {e}", exc_info=True) - return -1 # Indicate an error - -def _inspect_celery_queues(redis_client, queue_names: list): - """Inspects Celery queues in Redis and logs their status.""" - logger.info("--- Inspecting Celery Queues in Redis ---") - for queue_name in queue_names: - try: - q_len = redis_client.llen(queue_name) - logger.info(f"Queue '{queue_name}': Length = {q_len}") - - if q_len > 0: - logger.info(f"Showing up to 10 tasks in '{queue_name}':") - # Fetch up to 10 items from the start of the list (queue) - items_bytes = redis_client.lrange(queue_name, 0, 9) - for i, item_bytes in enumerate(items_bytes): - try: - # Celery tasks are JSON-encoded strings - task_data = json.loads(item_bytes.decode('utf-8')) - # Pretty print for readability in logs - pretty_task_data = json.dumps(task_data, indent=2) - logger.info(f" Task {i+1}:\n{pretty_task_data}") - except (json.JSONDecodeError, UnicodeDecodeError) as e: - logger.warning(f" Task {i+1}: Could not decode/parse task data. Error: {e}. Raw: {item_bytes!r}") - except Exception as e: - logger.error(f"Failed to inspect queue '{queue_name}': {e}", exc_info=True) - logger.info("--- End of Queue Inspection ---") - - -# --- Main Orchestration Callable --- - -def orchestrate_workers_ignition_callable(**context): - """ - Main orchestration logic. Triggers a specified number of dispatcher DAGs - to initiate self-sustaining processing loops. - """ - params = context['params'] - ti = context['task_instance'] - logger.info(f"Orchestrator task '{ti.task_id}' running on queue '{ti.queue}'.") - logger.info("Starting dispatcher ignition sequence.") - - dispatcher_dag_id = 'ytdlp_ops_v02_dispatcher_auth' - worker_queue = 'queue-auth' - app_queue_name = 'queue2_auth' - - logger.info(f"Running in v2 (auth) mode. Dispatcher DAG: '{dispatcher_dag_id}', Worker Queue: '{worker_queue}'") - - dag_model = DagModel.get_dagmodel(dispatcher_dag_id) - if dag_model and dag_model.is_paused: - logger.warning(f"Dispatcher DAG '{dispatcher_dag_id}' is paused. Skipping dispatcher ignition.") - raise AirflowSkipException(f"Dispatcher DAG '{dispatcher_dag_id}' is paused.") - - total_workers = int(params['total_workers']) - workers_per_bunch = int(params['workers_per_bunch']) - - # --- Input Validation --- - if total_workers <= 0: - logger.warning(f"'total_workers' is {total_workers}. No workers will be started. Skipping ignition.") - raise AirflowSkipException(f"No workers to start (total_workers={total_workers}).") - - if workers_per_bunch <= 0: - logger.error(f"'workers_per_bunch' must be a positive integer, but got {workers_per_bunch}. Aborting.") - raise AirflowException(f"'workers_per_bunch' must be a positive integer, but got {workers_per_bunch}.") - # --- End Input Validation --- - - worker_delay = int(params['delay_between_workers_s']) - bunch_delay = int(params['delay_between_bunches_s']) - - # Create a list of worker numbers to trigger - worker_indices = list(range(total_workers)) - bunches = [worker_indices[i:i + workers_per_bunch] for i in range(0, len(worker_indices), workers_per_bunch)] - - # --- Inspect Queues before starting --- - try: - redis_conn_id = params.get('redis_conn_id', DEFAULT_REDIS_CONN_ID) - redis_client = _get_redis_client(redis_conn_id) - - # First, check the application queue for work - app_queue_len = _check_application_queue(redis_client, app_queue_name) - - if params.get('skip_if_queue_empty') and app_queue_len == 0: - logger.info("'skip_if_queue_empty' is True and application queue is empty. Skipping worker ignition.") - raise AirflowSkipException("Application work queue is empty.") - - # Then, inspect the target Celery queue for debugging - _inspect_celery_queues(redis_client, [worker_queue]) - except AirflowSkipException: - raise # Re-raise to let Airflow handle the skip - except Exception as e: - logger.error(f"Could not inspect queues due to an error: {e}. Continuing with ignition sequence.") - # --- End of Inspection --- - - logger.info(f"Plan: Triggering {total_workers} total dispatcher runs in {len(bunches)} bunches. Each run will attempt to process one URL.") - - dag_run_id = context['dag_run'].run_id - total_triggered = 0 - - # --- End of Inspection --- - - logger.info(f"Plan: Triggering {total_workers} total dispatcher runs in {len(bunches)} bunches. Each run will attempt to process one URL.") - - dag_run_id = context['dag_run'].run_id - total_triggered = 0 - - for i, bunch in enumerate(bunches): - logger.info(f"--- Triggering Bunch {i+1}/{len(bunches)} (contains {len(bunch)} dispatcher(s)) ---") - for j, worker_index in enumerate(bunch): - # Create a unique run_id for each dispatcher run - run_id = f"dispatched_{dag_run_id}_{total_triggered}" - - # Pass all orchestrator params to the dispatcher, which will then pass them to the worker. - conf_to_pass = {p: params[p] for p in params} - - logger.info(f"Triggering dispatcher {j+1}/{len(bunch)} in bunch {i+1} (run {total_triggered + 1}/{total_workers}) (Run ID: {run_id})") - logger.debug(f"Full conf for dispatcher run {run_id}: {conf_to_pass}") - - trigger_dag( - dag_id=dispatcher_dag_id, - run_id=run_id, - conf=conf_to_pass, - replace_microseconds=False - ) - total_triggered += 1 - - # Delay between dispatches in a bunch - if j < len(bunch) - 1: - logger.info(f"Waiting {worker_delay}s before next dispatcher in bunch...") - time.sleep(worker_delay) - - # Delay between bunches - if i < len(bunches) - 1: - logger.info(f"--- Bunch {i+1} triggered. Waiting {bunch_delay}s before next bunch... ---") - time.sleep(bunch_delay) - - logger.info(f"--- Ignition sequence complete. Total dispatcher runs triggered: {total_triggered}. ---") - - # --- Final Queue Inspection --- - final_check_delay = 30 # seconds - logger.info(f"Waiting {final_check_delay}s for a final queue status check to see if workers picked up tasks...") - time.sleep(final_check_delay) - - try: - redis_conn_id = params.get('redis_conn_id', DEFAULT_REDIS_CONN_ID) - redis_client = _get_redis_client(redis_conn_id) - - # Log connection details for debugging broker mismatch issues - conn_kwargs = redis_client.connection_pool.connection_kwargs - logger.info(f"Final check using Redis connection '{redis_conn_id}': " - f"host={conn_kwargs.get('host')}, " - f"port={conn_kwargs.get('port')}, " - f"db={conn_kwargs.get('db')}") - - _inspect_celery_queues(redis_client, [worker_queue]) - logger.info("Final queue inspection complete. If queues are not empty, workers have not picked up tasks yet. " - "If queues are empty, workers have started processing.") - except Exception as e: - logger.error(f"Could not perform final queue inspection: {e}. This does not affect worker ignition.") - - - - -# ============================================================================= -# DAG Definition -# ============================================================================= - -default_args = { - 'owner': 'airflow', - 'depends_on_past': False, - 'email_on_failure': False, - 'email_on_retry': False, - 'retries': 1, - 'retry_delay': timedelta(minutes=1), - 'start_date': days_ago(1), -} - -with DAG( - dag_id='ytdlp_ops_v02_orchestrator_auth', - default_args=default_args, - schedule=None, # This DAG runs only when triggered. - max_active_runs=1, # Only one ignition process should run at a time. - catchup=False, - description='Ignition system for ytdlp_ops_v02_dispatcher_auth DAGs.', - doc_md=""" - ### YT-DLP v2 (Auth) Worker Ignition System - - This DAG acts as an "ignition system" to start one or more self-sustaining worker loops for the **v2 authentication worker**. - It triggers `ytdlp_ops_v02_dispatcher_auth` DAGs, which pull raw URLs from `queue2_auth_inbox` and trigger `ytdlp_ops_v02_worker_per_url_auth` workers. - """, - tags=['ytdlp', 'mgmt', 'master'], - params={ - # --- Ignition Control Parameters --- - 'total_workers': Param(DEFAULT_TOTAL_WORKERS, type="integer", description="Total number of dispatcher loops to start."), - 'workers_per_bunch': Param(DEFAULT_WORKERS_PER_BUNCH, type="integer", description="Number of dispatchers to start in each bunch."), - 'delay_between_workers_s': Param(DEFAULT_WORKER_DELAY_S, type="integer", description="Delay in seconds between starting each dispatcher within a bunch."), - 'delay_between_bunches_s': Param(DEFAULT_BUNCH_DELAY_S, type="integer", description="Delay in seconds between starting each bunch."), - 'skip_if_queue_empty': Param(False, type="boolean", title="[Ignition Control] Skip if Queue Empty", description="If True, the orchestrator will not start any dispatchers if the application's work queue is empty."), - - # --- Unified Worker Configuration --- - 'ytdlp_config_json': Param( - json.dumps(DEFAULT_YTDLP_CONFIG, indent=2), - type="string", - title="[Worker Param] Unified yt-dlp JSON Config", - description="A JSON string containing all parameters for both yt-ops-server and the yt-dlp downloaders. This is the primary way to configure workers.", - **{'ui_widget': 'json', 'multi_line': True} - ), - - # --- Worker Passthrough Parameters --- - # --- V2 Profile Management Parameters --- - 'redis_env': Param("sim_auth", type="string", title="[V2 Profiles] Redis Environment", description="The environment for v2 profile management (e.g., 'sim_auth'). Determines the Redis key prefix."), - 'profile_prefix': Param("auth_user", type="string", title="[V2 Profiles] Profile Prefix", description="The prefix for auth profiles that workers should attempt to lock."), - - # --- Worker Passthrough Parameters --- - 'on_bannable_failure': Param('proceed_loop_under_manual_inspection', type="string", title="DEPRECATED: Worker handles failures internally."), - 'redis_conn_id': Param(DEFAULT_REDIS_CONN_ID, type="string", description="[Worker Param] Airflow Redis connection ID."), - 'service_ip': Param(DEFAULT_YT_AUTH_SERVICE_IP, type="string", description="[Worker Param] IP of the ytdlp-ops-server. Default is from Airflow variable YT_AUTH_SERVICE_IP or hardcoded."), - 'service_port': Param(DEFAULT_YT_AUTH_SERVICE_PORT, type="integer", description="[Worker Param] Port of the Envoy load balancer. Default is from Airflow variable YT_AUTH_SERVICE_PORT or hardcoded."), - 'machine_id': Param("ytdlp-ops-airflow-service", type="string", description="[Worker Param] Identifier for the client machine."), - - # --- DEPRECATED PARAMS --- - 'account_pool': Param('ytdlp_account', type="string", description="DEPRECATED: Use profile_prefix instead."), - 'account_pool_size': Param(10, type=["integer", "null"], description="DEPRECATED: Pool size is managed in Redis."), - 'prepend_client_to_account': Param(True, type="boolean", description="DEPRECATED"), - 'assigned_proxy_url': Param(None, type=["string", "null"], description="DEPRECATED: Proxy is determined by the locked profile."), - 'auto_create_new_accounts_on_exhaustion': Param(True, type="boolean", description="DEPRECATED"), - } -) as dag: - - orchestrate_task = PythonOperator( - task_id='start_worker_loops', - python_callable=orchestrate_workers_ignition_callable, - queue='queue-mgmt', - ) - orchestrate_task.doc_md = """ - ### Start Worker Loops - This is the main task that executes the ignition policy. - - It triggers `ytdlp_ops_v02_dispatcher_auth` DAGs according to the batch settings. - - It passes all its parameters down to the dispatchers, which will use them to trigger workers. - """ diff --git a/airflow/dags/ytdlp_ops_v02_orchestrator_dl.py b/airflow/dags/ytdlp_ops_v02_orchestrator_dl.py deleted file mode 100644 index e478bbc..0000000 --- a/airflow/dags/ytdlp_ops_v02_orchestrator_dl.py +++ /dev/null @@ -1,305 +0,0 @@ -# -*- coding: utf-8 -*- -# vim:fenc=utf-8 -# -# Copyright © 2024 rl -# -# Distributed under terms of the MIT license. - -""" -DAG to orchestrate ytdlp_ops_dispatcher_v2_dl DAG runs based on a defined policy. -""" - -from airflow import DAG -from airflow.exceptions import AirflowException, AirflowSkipException -from airflow.operators.python import PythonOperator -from airflow.models.param import Param -from airflow.models.variable import Variable -from airflow.utils.dates import days_ago -from airflow.api.common.trigger_dag import trigger_dag -from airflow.models.dagrun import DagRun -from airflow.models.dag import DagModel -from datetime import timedelta -import logging -import random -import time -import json - -# --- Add project root to path to allow for yt-ops-client imports --- -import sys -# The yt-ops-client package is installed in editable mode in /app -if '/app' not in sys.path: - sys.path.insert(0, '/app') - -# Import utility functions -from utils.redis_utils import _get_redis_client - -# Import Thrift modules for proxy status check -from pangramia.yt.tokens_ops import YTTokenOpService -from thrift.protocol import TBinaryProtocol -from thrift.transport import TSocket, TTransport - -# Configure logging -logger = logging.getLogger(__name__) - -# Default settings -DEFAULT_REDIS_CONN_ID = 'redis_default' -DEFAULT_TOTAL_WORKERS = 8 -DEFAULT_WORKERS_PER_BUNCH = 1 -DEFAULT_WORKER_DELAY_S = 1 -DEFAULT_BUNCH_DELAY_S = 1 - -# --- Helper Functions --- - -def _check_application_queue(redis_client, queue_base_name: str) -> int: - """Checks and logs the length of the application's inbox queue.""" - inbox_queue_name = f"{queue_base_name}_inbox" - logger.info(f"--- Checking Application Work Queue ---") - try: - q_len = redis_client.llen(inbox_queue_name) - logger.info(f"Application work queue '{inbox_queue_name}' has {q_len} item(s).") - return q_len - except Exception as e: - logger.error(f"Failed to check application queue '{inbox_queue_name}': {e}", exc_info=True) - return -1 # Indicate an error - -def _inspect_celery_queues(redis_client, queue_names: list): - """Inspects Celery queues in Redis and logs their status.""" - logger.info("--- Inspecting Celery Queues in Redis ---") - for queue_name in queue_names: - try: - q_len = redis_client.llen(queue_name) - logger.info(f"Queue '{queue_name}': Length = {q_len}") - - if q_len > 0: - logger.info(f"Showing up to 10 tasks in '{queue_name}':") - # Fetch up to 10 items from the start of the list (queue) - items_bytes = redis_client.lrange(queue_name, 0, 9) - for i, item_bytes in enumerate(items_bytes): - try: - # Celery tasks are JSON-encoded strings - task_data = json.loads(item_bytes.decode('utf-8')) - # Pretty print for readability in logs - pretty_task_data = json.dumps(task_data, indent=2) - logger.info(f" Task {i+1}:\n{pretty_task_data}") - except (json.JSONDecodeError, UnicodeDecodeError) as e: - logger.warning(f" Task {i+1}: Could not decode/parse task data. Error: {e}. Raw: {item_bytes!r}") - except Exception as e: - logger.error(f"Failed to inspect queue '{queue_name}': {e}", exc_info=True) - logger.info("--- End of Queue Inspection ---") - - -# --- Main Orchestration Callable --- - -def orchestrate_workers_ignition_callable(**context): - """ - Main orchestration logic. Triggers a specified number of dispatcher DAGs - to initiate self-sustaining processing loops. - """ - params = context['params'] - ti = context['task_instance'] - logger.info(f"Orchestrator task '{ti.task_id}' running on queue '{ti.queue}'.") - logger.info("Starting dispatcher ignition sequence.") - - dispatcher_dag_id = 'ytdlp_ops_v02_dispatcher_dl' - worker_queue = 'queue-dl' - app_queue_name = 'queue2_dl' - - logger.info(f"Running in v2 (download) mode. Dispatcher DAG: '{dispatcher_dag_id}', Worker Queue: '{worker_queue}'") - - dag_model = DagModel.get_dagmodel(dispatcher_dag_id) - if dag_model and dag_model.is_paused: - logger.warning(f"Dispatcher DAG '{dispatcher_dag_id}' is paused. Skipping dispatcher ignition.") - raise AirflowSkipException(f"Dispatcher DAG '{dispatcher_dag_id}' is paused.") - - total_workers = int(params['total_workers']) - workers_per_bunch = int(params['workers_per_bunch']) - - # --- Input Validation --- - if total_workers <= 0: - logger.warning(f"'total_workers' is {total_workers}. No workers will be started. Skipping ignition.") - raise AirflowSkipException(f"No workers to start (total_workers={total_workers}).") - - if workers_per_bunch <= 0: - logger.error(f"'workers_per_bunch' must be a positive integer, but got {workers_per_bunch}. Aborting.") - raise AirflowException(f"'workers_per_bunch' must be a positive integer, but got {workers_per_bunch}.") - # --- End Input Validation --- - - worker_delay = int(params['delay_between_workers_s']) - bunch_delay = int(params['delay_between_bunches_s']) - - # Create a list of worker numbers to trigger - worker_indices = list(range(total_workers)) - bunches = [worker_indices[i:i + workers_per_bunch] for i in range(0, len(worker_indices), workers_per_bunch)] - - # --- Inspect Queues before starting --- - try: - redis_conn_id = params.get('redis_conn_id', DEFAULT_REDIS_CONN_ID) - redis_client = _get_redis_client(redis_conn_id) - - # First, check the application queue for work - app_queue_len = _check_application_queue(redis_client, app_queue_name) - - if params.get('skip_if_queue_empty') and app_queue_len == 0: - logger.info("'skip_if_queue_empty' is True and application queue is empty. Skipping worker ignition.") - raise AirflowSkipException("Application work queue is empty.") - - # Then, inspect the target Celery queue for debugging - _inspect_celery_queues(redis_client, [worker_queue]) - except AirflowSkipException: - raise # Re-raise to let Airflow handle the skip - except Exception as e: - logger.error(f"Could not inspect queues due to an error: {e}. Continuing with ignition sequence.") - # --- End of Inspection --- - - logger.info(f"Plan: Triggering {total_workers} total dispatcher runs in {len(bunches)} bunches. Each run will attempt to process one URL.") - - dag_run_id = context['dag_run'].run_id - total_triggered = 0 - - for i, bunch in enumerate(bunches): - logger.info(f"--- Triggering Bunch {i+1}/{len(bunches)} (contains {len(bunch)} dispatcher(s)) ---") - for j, _ in enumerate(bunch): - # Create a unique run_id for each dispatcher run - run_id = f"dispatched_{dag_run_id}_{total_triggered}" - - # Pass all orchestrator params to the dispatcher, which will then pass them to the worker. - conf_to_pass = {p: params[p] for p in params} - - logger.info(f"Triggering dispatcher {j+1}/{len(bunch)} in bunch {i+1} (run {total_triggered + 1}/{total_workers}) (Run ID: {run_id})") - logger.debug(f"Full conf for dispatcher run {run_id}: {conf_to_pass}") - - trigger_dag( - dag_id=dispatcher_dag_id, - run_id=run_id, - conf=conf_to_pass, - replace_microseconds=False - ) - total_triggered += 1 - - # Delay between dispatches in a bunch - if j < len(bunch) - 1: - logger.info(f"Waiting {worker_delay}s before next dispatcher in bunch...") - time.sleep(worker_delay) - - # Delay between bunches - if i < len(bunches) - 1: - logger.info(f"--- Bunch {i+1} triggered. Waiting {bunch_delay}s before next bunch... ---") - time.sleep(bunch_delay) - - logger.info(f"--- Ignition sequence complete. Total dispatcher runs triggered: {total_triggered}. ---") - - # --- Final Queue Inspection --- - final_check_delay = 30 # seconds - logger.info(f"Waiting {final_check_delay}s for a final queue status check to see if workers picked up tasks...") - time.sleep(final_check_delay) - - try: - redis_conn_id = params.get('redis_conn_id', DEFAULT_REDIS_CONN_ID) - redis_client = _get_redis_client(redis_conn_id) - - # Log connection details for debugging broker mismatch issues - conn_kwargs = redis_client.connection_pool.connection_kwargs - logger.info(f"Final check using Redis connection '{redis_conn_id}': " - f"host={conn_kwargs.get('host')}, " - f"port={conn_kwargs.get('port')}, " - f"db={conn_kwargs.get('db')}") - - _inspect_celery_queues(redis_client, [worker_queue]) - logger.info("Final queue inspection complete. If queues are not empty, workers have not picked up tasks yet. " - "If queues are empty, workers have started processing.") - except Exception as e: - logger.error(f"Could not perform final queue inspection: {e}. This does not affect worker ignition.") - - - - -# ============================================================================= -# DAG Definition -# ============================================================================= - -default_args = { - 'owner': 'airflow', - 'depends_on_past': False, - 'email_on_failure': False, - 'email_on_retry': False, - 'retries': 1, - 'retry_delay': timedelta(minutes=1), - 'start_date': days_ago(1), -} - -with DAG( - dag_id='ytdlp_ops_v02_orchestrator_dl', - default_args=default_args, - schedule=None, # This DAG runs only when triggered. - max_active_runs=1, # Only one ignition process should run at a time. - catchup=False, - description='Ignition system for ytdlp_ops_v02_dispatcher_dl DAGs.', - doc_md=""" - ### YT-DLP v2 (Download) Worker Ignition System - - This DAG acts as an "ignition system" to start one or more self-sustaining worker loops for the **v2 download worker**. - It triggers `ytdlp_ops_v02_dispatcher_dl` DAGs, which pull job payloads from `queue2_dl_inbox` and trigger `ytdlp_ops_v02_worker_per_url_dl` workers. - """, - tags=['ytdlp', 'mgmt', 'master'], - params={ - # --- Ignition Control Parameters --- - 'total_workers': Param(DEFAULT_TOTAL_WORKERS, type="integer", description="Total number of dispatcher loops to start."), - 'workers_per_bunch': Param(DEFAULT_WORKERS_PER_BUNCH, type="integer", description="Number of dispatchers to start in each bunch."), - 'delay_between_workers_s': Param(DEFAULT_WORKER_DELAY_S, type="integer", description="Delay in seconds between starting each dispatcher within a bunch."), - 'delay_between_bunches_s': Param(DEFAULT_BUNCH_DELAY_S, type="integer", description="Delay in seconds between starting each bunch."), - 'skip_if_queue_empty': Param(False, type="boolean", title="[Ignition Control] Skip if Queue Empty", description="If True, the orchestrator will not start any dispatchers if the application's work queue is empty."), - - # --- V2 Profile Management Parameters --- - 'redis_env': Param("sim_download", type="string", title="[V2 Profiles] Redis Environment", description="The environment for v2 profile management (e.g., 'sim_download'). Determines the Redis key prefix."), - 'profile_prefix': Param("download_user", type="string", title="[V2 Profiles] Profile Prefix", description="The prefix for download profiles that workers should attempt to lock."), - - 'redis_conn_id': Param(DEFAULT_REDIS_CONN_ID, type="string", description="[Worker Param] Airflow Redis connection ID."), - 'clients': Param('mweb,web_camoufox,tv', type="string", title="[Worker Param] Clients", description="Comma-separated list of clients for token generation. e.g. mweb,tv,web_camoufox"), - - # --- Download Control Parameters --- - 'delay_between_formats_s': Param(15, type="integer", title="[Worker Param] Delay Between Formats (s)", description="Delay in seconds between downloading each format when multiple formats are specified. A 22s wait may be effective for batch downloads, while 6-12s may suffice if cookies are refreshed regularly."), - 'yt_dlp_test_mode': Param(False, type="boolean", title="[Worker Param] yt-dlp Test Mode", description="If True, runs yt-dlp with --test flag (dry run without downloading)."), - 'skip_probe': Param(True, type="boolean", title="[Worker Param] Skip Probe", description="If True, skips the ffmpeg probe of downloaded files."), - 'yt_dlp_cleanup_mode': Param(False, type="boolean", title="[Worker Param] yt-dlp Cleanup Mode", description="If True, creates a .empty file and deletes the original media file after successful download and probe."), - 'download_format': Param( - 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best', - type="string", - title="[Worker Param] Download Format", - description="Custom yt-dlp format string. Common presets: [1] 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best' (Default, best quality MP4). [2] '18,140-dashy/140,133-dashy/134-dashy/136-dashy/137-dashy/298-dashy/299-dashy' (Legacy formats). [3] '299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy' (High-framerate formats)." - ), - 'pass_without_formats_splitting': Param( - False, - type="boolean", - title="[Worker Param] Pass format string without splitting", - description="If True, passes the entire 'download_format' string to the download tool as-is. This is for complex selectors. Not compatible with 'aria-rpc' downloader." - ), - 'downloader': Param( - 'cli', - type="string", - enum=['py', 'aria-rpc', 'cli'], - title="[Worker Param] Download Tool", - description="Choose the download tool to use: 'py' (native python, recommended), 'aria-rpc' (send to aria2c daemon), 'cli' (legacy yt-dlp wrapper)." - ), - 'aria_host': Param('172.17.0.1', type="string", title="[Worker Param] Aria2c Host", description="For 'aria-rpc' downloader: Host of the aria2c RPC server. Can be set via Airflow Variable 'YTDLP_ARIA_HOST'."), - 'aria_port': Param(6800, type="integer", title="[Worker Param] Aria2c Port", description="For 'aria-rpc' downloader: Port of the aria2c RPC server. Can be set via Airflow Variable 'YTDLP_ARIA_PORT'."), - 'aria_secret': Param('SQGCQPLVFQIASMPNPOJYLVGJYLMIDIXDXAIXOTX', type="string", title="[Worker Param] Aria2c Secret", description="For 'aria-rpc' downloader: Secret token. Can be set via Airflow Variable 'YTDLP_ARIA_SECRET'."), - 'yt_dlp_extra_args': Param( - '--verbose --no-resize-buffer --buffer-size 4M --fragment-retries 2 --concurrent-fragments 8 --socket-timeout 15 --sleep-interval 5 --max-sleep-interval 10 --no-part --restrict-filenames', - type=["string", "null"], - title="[Worker Param] Extra yt-dlp arguments", - description="Extra command-line arguments for yt-dlp during download." - ), - } -) as dag: - - orchestrate_task = PythonOperator( - task_id='start_worker_loops', - python_callable=orchestrate_workers_ignition_callable, - queue='queue-mgmt', - ) - orchestrate_task.doc_md = """ - ### Start Worker Loops - This is the main task that executes the ignition policy. - - It triggers `ytdlp_ops_v02_dispatcher_dl` DAGs according to the batch settings. - - It passes all its parameters down to the dispatchers, which will use them to trigger workers. - """ diff --git a/airflow/dags/ytdlp_ops_v02_worker_per_url_auth.py b/airflow/dags/ytdlp_ops_v02_worker_per_url_auth.py deleted file mode 100644 index 4b6a9f6..0000000 --- a/airflow/dags/ytdlp_ops_v02_worker_per_url_auth.py +++ /dev/null @@ -1,770 +0,0 @@ -# -*- coding: utf-8 -*- -# vim:fenc=utf-8 -# -# Copyright © 2024 rl -# -# Distributed under terms of the MIT license. - -""" -DAG for authenticating a single YouTube URL passed via DAG run configuration. -This is the "Auth Worker" part of a separated Auth/Download pattern. -It acquires a token, saves the info.json, and pushes the token data to a -Redis queue for the download worker. -""" - -from __future__ import annotations - -from airflow.decorators import task, task_group -from airflow.exceptions import AirflowException, AirflowSkipException -from airflow.models import Variable -from airflow.models.dag import DAG, DagModel -from airflow.models.param import Param -from airflow.models.xcom_arg import XComArg -from airflow.operators.dummy import DummyOperator -from airflow.utils.dates import days_ago -from airflow.utils.task_group import TaskGroup -from airflow.api.common.trigger_dag import trigger_dag -from datetime import datetime, timedelta -import concurrent.futures -import json -import logging -import os -import random -import re -import redis -import socket -import time -import traceback -import uuid - -# Import utility functions and Thrift modules -from utils.redis_utils import _get_redis_client -from pangramia.yt.common.ttypes import TokenUpdateMode, AirflowLogContext -from pangramia.yt.exceptions.ttypes import PBServiceException, PBUserException -from pangramia.yt.tokens_ops import YTTokenOpService -from thrift.protocol import TBinaryProtocol -from thrift.transport import TSocket, TTransport -from thrift.transport.TTransport import TTransportException - -# Configure logging -logger = logging.getLogger(__name__) - -# ytops_client imports for v2 profile management -try: - from ytops_client.profile_manager_tool import ProfileManager, format_duration, format_timestamp -except ImportError as e: - logger.critical(f"Could not import ytops_client modules: {e}. Ensure yt-ops-client package is installed correctly in Airflow's environment.") - raise - - -# Default settings from Airflow Variables or hardcoded fallbacks -DEFAULT_QUEUE_NAME = 'queue2_auth' -DEFAULT_REDIS_CONN_ID = 'redis_default' -DEFAULT_TIMEOUT = 3600 -DEFAULT_YT_AUTH_SERVICE_IP = Variable.get("YT_AUTH_SERVICE_IP", default_var="172.17.0.1") -DEFAULT_YT_AUTH_SERVICE_PORT = Variable.get("YT_AUTH_SERVICE_PORT", default_var=9080) - -DEFAULT_REQUEST_PARAMS = { - "context_reuse_policy": { - "enabled": True, - "max_age_seconds": 86400, - "reuse_visitor_id": True, - "reuse_cookies": True - }, - "token_generation_strategy": { - "youtubei_js": { - "generate_po_token": True, - "generate_gvs_token": True - } - }, - "ytdlp_params": { - "use_curl_prefetch": False, - "token_supplement_strategy": { - "youtubepot_bgutilhttp_extractor": { - "enabled": True - } - }, - "visitor_id_override": { - "enabled": True - } - }, - "session_params": { - "lang": "en-US", - "location": "US", - "deviceCategory": "TV", - "user_agents": { - # "youtubei_js": "Mozilla/5.0 (Linux; Cobalt) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36", - "youtubei_js": "Mozilla/5.0 (ChromiumStylePlatform) Cobalt/Version", - # "yt_dlp": "Mozilla/5.0 (Linux; Cobalt) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36" - "yt_dlp": "Mozilla/5.0 (ChromiumStylePlatform) Cobalt/Version" - } - } -} - -# The queue is set to a fallback here. The actual worker-specific queue is -# assigned just-in-time by the task_instance_mutation_hook (see: airflow/config/custom_task_hooks.py), -# which parses the target queue from the DAG run_id. -DEFAULT_ARGS = { - 'owner': 'airflow', - 'retries': 0, - 'queue': 'queue-auth', # Fallback queue. Will be overridden by the policy hook. -} - - -# --- Helper Functions --- - -def _get_thrift_client(host, port, timeout): - """Helper to create and connect a Thrift client.""" - transport = TSocket.TSocket(host, port) - transport.setTimeout(timeout * 1000) - transport = TTransport.TFramedTransport(transport) - protocol = TBinaryProtocol.TBinaryProtocol(transport) - client = YTTokenOpService.Client(protocol) - transport.open() - logger.info(f"Connected to Thrift server at {host}:{port}") - return client, transport - -def _extract_video_id(url): - """Extracts YouTube video ID from a URL or returns the input if it's already a valid ID.""" - if not url or not isinstance(url, str): - return None - - # Check if the input is already a valid 11-character video ID - if re.fullmatch(r'[a-zA-Z0-9_-]{11}', url): - return url - - patterns = [r'v=([a-zA-Z0-9_-]{11})', r'youtu\.be/([a-zA-Z0-9_-]{11})'] - for pattern in patterns: - match = re.search(pattern, url) - if match: - return match.group(1) - return None - - -@task -def list_available_formats(token_data: dict, **context): - """ - Lists available formats for the given video using the info.json. - This is for debugging and informational purposes. - """ - import subprocess - import shlex - - info_json_path = token_data.get('info_json_path') - if not (info_json_path and os.path.exists(info_json_path)): - logger.warning(f"Cannot list formats: info.json path is missing or file does not exist ({info_json_path}).") - return [] - - try: - cmd = [ - 'yt-dlp', - '--verbose', - '--list-formats', - '--load-info-json', info_json_path, - ] - - copy_paste_cmd = ' '.join(shlex.quote(arg) for arg in cmd) - logger.info(f"Executing yt-dlp command to list formats: {copy_paste_cmd}") - - process = subprocess.run(cmd, capture_output=True, text=True, timeout=60) - - if process.stderr: - logger.info(f"yt-dlp --list-formats STDERR:\n{process.stderr}") - - if process.returncode != 0: - logger.error(f"yt-dlp --list-formats failed with exit code {process.returncode}") - - available_formats = [] - if process.stdout: - logger.info(f"--- Available Formats ---\n{process.stdout}\n--- End of Formats ---") - # Parse the output to get format IDs - lines = process.stdout.split('\n') - header_found = False - for line in lines: - if line.startswith('ID '): - header_found = True - continue - if header_found and line.strip() and line.strip()[0].isdigit(): - format_id = line.split()[0] - available_formats.append(format_id) - logger.info(f"Parsed available format IDs: {available_formats}") - - return available_formats - - except Exception as e: - logger.error(f"An error occurred while trying to list formats: {e}", exc_info=True) - return [] - - -# ============================================================================= -# TASK DEFINITIONS (TaskFlow API) -# ============================================================================= - -def _resolve_formats(info_json_path: str, format_selector: str, logger) -> list[str]: - """Uses yt-dlp to resolve a format selector into a list of specific format IDs.""" - import subprocess - import shlex - - if not format_selector: - return [] - - try: - cmd = [ - 'yt-dlp', '--print', 'format_id', - '-f', format_selector, - '--load-info-json', info_json_path, - ] - - copy_paste_cmd = ' '.join(shlex.quote(arg) for arg in cmd) - logger.info(f"Resolving format selector '{format_selector}' with command: {copy_paste_cmd}") - - process = subprocess.run(cmd, capture_output=True, text=True, timeout=60) - - if process.stderr: - logger.info(f"yt-dlp format resolver STDERR:\n{process.stderr}") - - if process.returncode != 0: - logger.error(f"yt-dlp format resolver failed with exit code {process.returncode}") - return [] - - output_ids = [fid for fid in process.stdout.strip().split('\n') if fid] - final_ids = [] - for fid in output_ids: - final_ids.extend(fid.split('+')) - - logger.info(f"Resolved selector '{format_selector}' to {len(final_ids)} format(s): {final_ids}") - return final_ids - - except Exception as e: - logger.error(f"An error occurred while resolving format selector: {e}", exc_info=True) - return [] - - -@task -def get_url_and_lock_profile(**context): - """ - Gets the URL to process, then locks an available auth profile from the Redis pool. - This is the first task in the pinned-worker DAG. - """ - params = context['params'] - ti = context['task_instance'] - - # Log the active policies - auth_policy = params.get('on_bannable_failure', 'not_set') - logger.info(f"--- Worker Policies ---") - logger.info(f" Auth Failure Policy: {auth_policy}") - logger.info(f"-----------------------") - - # --- Worker Pinning Verification --- - # This is a safeguard against a known Airflow issue where clearing a task - # can cause the task_instance_mutation_hook to be skipped, breaking pinning. - # See: https://github.com/apache/airflow/issues/20143 - expected_queue = None - if ti.run_id and '_q_' in ti.run_id: - expected_queue = ti.run_id.split('_q_')[-1] - - if not expected_queue: - # Fallback to conf if run_id parsing fails for some reason - expected_queue = params.get('worker_queue') - - if expected_queue and ti.queue != expected_queue: - error_msg = ( - f"WORKER PINNING FAILURE: Task is running on queue '{ti.queue}' but was expected on '{expected_queue}'. " - "This usually happens after manually clearing a task, which is not the recommended recovery method for this DAG. " - "To recover a failed URL, let the DAG run fail, use the 'ytdlp_mgmt_queues' DAG to requeue the URL, " - "and use the 'ytdlp_ops_orchestrator' to start a new worker loop if needed." - ) - logger.error(error_msg) - raise AirflowException(error_msg) - elif expected_queue: - logger.info(f"Worker pinning verified. Task is correctly running on queue '{ti.queue}'.") - # --- End Verification --- - - # The URL is passed by the dispatcher DAG via 'url_to_process'. - # For manual runs, we fall back to 'manual_url_to_process'. - url_to_process = params.get('url_to_process') - if not url_to_process: - manual_url_input = params.get('manual_url_to_process') - if manual_url_input: - logger.info(f"Using URL from manual run parameter: '{manual_url_input}'") - if manual_url_input == 'PULL_FROM_QUEUE': - logger.info("Manual run is set to pull from queue.") - redis_conn_id = params.get('redis_conn_id', DEFAULT_REDIS_CONN_ID) - queue_name = params.get('queue_name', DEFAULT_QUEUE_NAME) - inbox_queue = f"{queue_name}_inbox" - client = _get_redis_client(redis_conn_id) - url_bytes = client.lpop(inbox_queue) - if not url_bytes: - logger.info("Redis queue is empty. No work to do. Skipping task.") - raise AirflowSkipException("Redis queue is empty. No work to do.") - url_to_process = url_bytes.decode('utf-8') - logger.info(f"Pulled URL '{url_to_process}' from queue '{inbox_queue}'.") - else: - url_to_process = manual_url_input - - if not url_to_process: - raise AirflowException("No URL to process. For manual runs, please provide a URL in the 'manual_url_to_process' parameter, or 'PULL_FROM_QUEUE'.") - logger.info(f"Received URL '{url_to_process}' to process.") - - # Mark the URL as in-progress in Redis - try: - redis_conn_id = params.get('redis_conn_id', DEFAULT_REDIS_CONN_ID) - queue_name = params.get('queue_name', DEFAULT_QUEUE_NAME) - progress_queue = f"{queue_name}_progress" - client = _get_redis_client(redis_conn_id) - - progress_data = { - 'status': 'in_progress', - 'start_time': time.time(), - 'dag_run_id': context['dag_run'].run_id, - 'hostname': socket.gethostname(), - } - client.hset(progress_queue, url_to_process, json.dumps(progress_data)) - logger.info(f"Marked URL '{url_to_process}' as in-progress.") - except Exception as e: - logger.error(f"Could not mark URL as in-progress in Redis: {e}", exc_info=True) - - # V2 Profile Locking - redis_conn_id = params['redis_conn_id'] - redis_env = params['redis_env'] - profile_prefix = params['profile_prefix'] - - try: - redis_hook = _get_redis_client(redis_conn_id, return_hook=True) - key_prefix = f"{redis_env}_profile_mgmt_" - pm = ProfileManager(redis_hook=redis_hook, key_prefix=key_prefix) - logger.info(f"Initialized ProfileManager for env '{redis_env}' (Redis key prefix: '{key_prefix}')") - except Exception as e: - raise AirflowException(f"Failed to initialize ProfileManager: {e}") - - owner_id = f"airflow_auth_worker_{context['dag_run'].run_id}" - locked_profile = None - logger.info(f"Attempting to lock a profile with owner '{owner_id}' and prefix '{profile_prefix}'...") - - lock_attempts = 0 - while not locked_profile: - locked_profile = pm.lock_profile(owner=owner_id, profile_prefix=profile_prefix) - if not locked_profile: - logger.info("No auth profiles available to lock. Waiting for 15 seconds...") - time.sleep(15) - lock_attempts += 1 - if lock_attempts > 20: # 5 minutes timeout - raise AirflowException("Timed out waiting to lock an auth profile.") - - logger.info(f"Successfully locked profile: {locked_profile['name']}") - - return { - 'url_to_process': url_to_process, - 'locked_profile': locked_profile, - } - -@task -def get_token(initial_data: dict, **context): - """Makes a single attempt to get a token by calling the Thrift service directly.""" - ti = context['task_instance'] - params = context['params'] - - locked_profile = initial_data['locked_profile'] - account_id = locked_profile['name'] - assigned_proxy_url = locked_profile['proxy'] - url = initial_data['url_to_process'] - info_json_dir = os.path.join(Variable.get('DOWNLOADS_TEMP', '/opt/airflow/downloadfiles'), 'videos', 'in-progress') - - host, port = params['service_ip'], int(params['service_port']) - machine_id = params.get('machine_id') or socket.gethostname() - - # The unified JSON config is now the primary source of parameters. - request_params_json = params.get('ytdlp_config_json', '{}') - clients = None # This will be read from the JSON config on the server side. - - video_id = _extract_video_id(url) - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - job_dir_name = f"{timestamp}-{video_id or 'unknown'}" - job_dir_path = os.path.join(info_json_dir, job_dir_name) - os.makedirs(job_dir_path, exist_ok=True) - info_json_path = os.path.join(job_dir_path, f"info_{video_id or 'unknown'}_{account_id}_{timestamp}.json") - - # Save the received JSON config to the job directory for the download worker. - ytdlp_config_path = os.path.join(job_dir_path, 'ytdlp.json') - try: - with open(ytdlp_config_path, 'w', encoding='utf-8') as f: - # Pretty-print the JSON for readability - config_data = json.loads(request_params_json) - json.dump(config_data, f, indent=2) - logger.info(f"Saved ytdlp config to {ytdlp_config_path}") - except (IOError, json.JSONDecodeError) as e: - logger.error(f"Failed to save ytdlp.json config: {e}") - # Continue anyway, but download worker may fail. - ytdlp_config_path = None - - - client, transport = None, None - try: - timeout = int(params.get('timeout', DEFAULT_TIMEOUT)) - client, transport = _get_thrift_client(host, port, timeout) - - airflow_log_context = AirflowLogContext( - taskId=ti.task_id, - runId=ti.run_id, - tryNumber=ti.try_number - ) - - logger.info(f"--- Attempting to get token for URL '{url}' with account '{account_id}' (Clients: {clients}, Proxy: {assigned_proxy_url or 'any'}) ---") - - token_data = client.getOrRefreshToken( - accountId=account_id, - updateType=TokenUpdateMode.AUTO, - url=url, - clients=clients, - machineId=machine_id, - airflowLogContext=airflow_log_context, - requestParamsJson=request_params_json, - assignedProxyUrl=assigned_proxy_url - ) - - # --- Log server-side details for debugging --- - if hasattr(token_data, 'serverVersionInfo') and token_data.serverVersionInfo: - logger.info(f"--- Server Version Info ---\n{token_data.serverVersionInfo}") - - if hasattr(token_data, 'requestSummary') and token_data.requestSummary: - try: - summary_data = json.loads(token_data.requestSummary) - summary_text = summary_data.get('summary', 'Not available.') - prefetch_log = summary_data.get('prefetch_log', 'Not available.') - nodejs_log = summary_data.get('nodejs_log', 'Not available.') - ytdlp_log = summary_data.get('ytdlp_log', 'Not available.') - - logger.info(f"--- Request Summary ---\n{summary_text}") - logger.info(f"--- Prefetch Log ---\n{prefetch_log}") - logger.info(f"--- Node.js Log ---\n{nodejs_log}") - logger.info(f"--- yt-dlp Log ---\n{ytdlp_log}") - except (json.JSONDecodeError, AttributeError): - logger.info(f"--- Raw Request Summary (could not parse JSON) ---\n{token_data.requestSummary}") - - if hasattr(token_data, 'communicationLogPaths') and token_data.communicationLogPaths: - logger.info("--- Communication Log Paths on Server ---") - for log_path in token_data.communicationLogPaths: - logger.info(f" - {log_path}") - # --- End of server-side logging --- - - if not token_data or not token_data.infoJson: - raise AirflowException("Thrift service did not return valid info.json data.") - - # Save info.json to file - with open(info_json_path, 'w', encoding='utf-8') as f: - f.write(token_data.infoJson) - - proxy = token_data.socks - - # Rename file with proxy - final_info_json_path = info_json_path - if proxy: - sanitized_proxy = proxy.replace('://', '---') - new_filename = f"info_{video_id or 'unknown'}_{account_id}_{timestamp}_proxy_{sanitized_proxy}.json" - new_path = os.path.join(job_dir_path, new_filename) - try: - os.rename(info_json_path, new_path) - final_info_json_path = new_path - logger.info(f"Renamed info.json to include proxy: {new_path}") - except OSError as e: - logger.error(f"Failed to rename info.json to include proxy: {e}. Using original path.") - - return { - 'info_json_path': final_info_json_path, - 'job_dir_path': job_dir_path, - 'socks_proxy': proxy, - 'ytdlp_command': None, - 'successful_account_id': account_id, - 'original_url': url, - 'ytdlp_config_path': ytdlp_config_path, - 'ytdlp_config_json': request_params_json, - # Pass locked profile through for unlock/activity tasks - 'locked_profile': locked_profile, - } - - except (PBServiceException, PBUserException) as e: - error_message = e.message or "Unknown Thrift error" - error_code = getattr(e, 'errorCode', 'THRIFT_ERROR') - - # If a "Video unavailable" error mentions rate-limiting, it's a form of bot detection. - if error_code == 'VIDEO_UNAVAILABLE' and 'rate-limited' in error_message.lower(): - logger.warning("Re-classifying rate-limit-related 'VIDEO_UNAVAILABLE' error as 'BOT_DETECTED'.") - error_code = 'BOT_DETECTED' - - unrecoverable_video_errors = [ - "AGE_GATED_SIGN_IN", "MEMBERS_ONLY", "VIDEO_PROCESSING", "COPYRIGHT_REMOVAL", - "GEO_RESTRICTED", "VIDEO_UNAVAILABLE", "PRIVATE_VIDEO", "VIDEO_REMOVED" - ] - - if error_code in unrecoverable_video_errors: - error_details = { - 'error_message': error_message, - 'error_code': error_code, - 'proxy_url': None - } - ti.xcom_push(key='error_details', value=error_details) - logger.warning(f"Unrecoverable video error '{error_code}' - {error_message}. Marking for skip without failing the task.") - return {'status': 'unrecoverable_video_error'} - else: - logger.error(f"Thrift error getting token: {error_code} - {error_message}") - - error_details = { - 'error_message': error_message, - 'error_code': error_code, - 'proxy_url': None - } - ti.xcom_push(key='error_details', value=error_details) - raise AirflowException(f"ytops-client get-info failed: {error_message}") - except TTransportException as e: - logger.error(f"Thrift transport error: {e}", exc_info=True) - error_details = { - 'error_message': f"Thrift transport error: {e}", - 'error_code': 'TRANSPORT_ERROR', - 'proxy_url': None - } - ti.xcom_push(key='error_details', value=error_details) - raise AirflowException(f"Thrift transport error: {e}") - finally: - if transport and transport.isOpen(): - transport.close() - - -@task -def generate_and_push_download_tasks(token_data: dict, **context): - """ - On success, resolves the format selector into individual format IDs and pushes - granular download tasks to the `queue_dl_format_tasks` Redis list. - Also records the successful auth activity for the profile. - """ - params = context['params'] - url = token_data['original_url'] - info_json_path = token_data['info_json_path'] - locked_profile = token_data['locked_profile'] - - # Resolve format selector from the JSON config - try: - ytdlp_config = json.loads(token_data.get('ytdlp_config_json', '{}')) - download_format_selector = ytdlp_config.get('download_format', 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best') - # This profile prefix is for the *download* worker that will pick up the task - download_profile_prefix = ytdlp_config.get('download_profile_prefix', 'download_user') - except (json.JSONDecodeError, KeyError): - logger.error("Could not parse download_format from ytdlp_config_json. Falling back to default.") - download_format_selector = 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best' - download_profile_prefix = 'download_user' - - resolved_formats = _resolve_formats(info_json_path, download_format_selector, logger) - if not resolved_formats: - raise AirflowException(f"Format selector '{download_format_selector}' resolved to no formats for {url}.") - - tasks = [] - for format_id in resolved_formats: - task_payload = { - "info_json_path": info_json_path, - "format_id": format_id, - "profile_prefix": download_profile_prefix, - "original_url": url, - "dag_run_id": context['dag_run'].run_id, - } - tasks.append(json.dumps(task_payload)) - - dl_task_queue = "queue_dl_format_tasks" - auth_result_queue = f"{params['queue_name']}_result" - progress_queue = f"{params['queue_name']}_progress" - - result_data = { - 'status': 'success', - 'end_time': time.time(), - 'url': url, - 'dag_run_id': context['dag_run'].run_id, - 'token_data': {k: v for k, v in token_data.items() if k != 'locked_profile'} # Don't store profile in result - } - - try: - redis_client = _get_redis_client(params['redis_conn_id']) - with redis_client.pipeline() as pipe: - pipe.rpush(dl_task_queue, *tasks) - pipe.hset(auth_result_queue, url, json.dumps(result_data)) - pipe.hdel(progress_queue, url) - pipe.execute() - - logger.info(f"Pushed {len(tasks)} granular download task(s) for URL '{url}' to '{dl_task_queue}'.") - logger.info(f"Stored success result for auth on URL '{url}' in '{auth_result_queue}'.") - except Exception as e: - logger.error(f"Failed to push download tasks to Redis: {e}", exc_info=True) - raise AirflowException("Failed to push tasks to Redis.") - - # Return the original token_data (including locked_profile) for the unlock task - return token_data - - - -@task(trigger_rule='one_success') -def continue_processing_loop(token_data: dict | None = None, **context): - """ - After a run, triggers a new dispatcher to continue the processing loop, - passing along the account/proxy to make them sticky if available. - """ - params = context['params'] - dag_run = context['dag_run'] - - # Do not continue the loop for manual runs of the worker DAG. - # A worker DAG triggered by the dispatcher will have a run_id starting with 'worker_run_'. - if not dag_run.run_id.startswith('worker_run_'): - logger.info(f"DAG run '{dag_run.run_id}' does not appear to be triggered by the dispatcher. Stopping processing loop.") - return - - dispatcher_dag_id = 'ytdlp_ops_v02_dispatcher_auth' - dag_model = DagModel.get_dagmodel(dispatcher_dag_id) - if dag_model and dag_model.is_paused: - logger.warning(f"Dispatcher DAG '{dispatcher_dag_id}' is paused. Stopping processing loop.") - return - - # Create a new unique run_id for the dispatcher. - new_dispatcher_run_id = f"retriggered_by_worker_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{str(uuid.uuid4())[:8]}" - - # Pass all original parameters from the orchestrator through to the new dispatcher run. - conf_to_pass = {k: v for k, v in params.items() if v is not None} - conf_to_pass['worker_index'] = params.get('worker_index') - - if token_data: - # On success path, make the account and proxy "sticky" for the next run. - conf_to_pass['account_id'] = token_data.get('successful_account_id') - conf_to_pass['assigned_proxy_url'] = token_data.get('socks_proxy') - logger.info(f"Worker finished successfully. Triggering a new dispatcher ('{new_dispatcher_run_id}') to continue the loop with sticky account/proxy.") - logger.info(f" - Sticky Account: {conf_to_pass.get('account_id')}") - logger.info(f" - Sticky Proxy: {conf_to_pass.get('assigned_proxy_url')}") - else: - # On failure/skip paths, no token_data is passed. Clear sticky params to allow re-selection. - conf_to_pass.pop('account_id', None) - conf_to_pass.pop('assigned_proxy_url', None) - logger.info(f"Worker finished on a non-success path. Triggering a new dispatcher ('{new_dispatcher_run_id}') to continue the loop without sticky account/proxy.") - - # The new dispatcher will pull its own URL and determine its own queue. - conf_to_pass.pop('url_to_process', None) - conf_to_pass.pop('worker_queue', None) - - trigger_dag( - dag_id=dispatcher_dag_id, - run_id=new_dispatcher_run_id, - conf=conf_to_pass, - replace_microseconds=False - ) - - - - - - -# ============================================================================= -# DAG Definition with TaskGroups -# ============================================================================= -@task(trigger_rule='all_done') -def unlock_profile(**context): - """ - Unlocks the profile and records activity (success or failure). - This task runs regardless of upstream success or failure. - """ - params = context['params'] - dag_run = context['dag_run'] - - failed_tasks = [ti for ti in dag_run.get_task_instances() if ti.state == 'failed'] - is_success = not failed_tasks - activity_type = 'auth' if is_success else 'auth_error' - - ti = context['task_instance'] - initial_data = ti.xcom_pull(task_ids='get_url_and_lock_profile') - - locked_profile = initial_data.get('locked_profile') if initial_data else None - - if not locked_profile: - logger.warning("No locked_profile data found. Cannot unlock or record activity.") - return - - profile_name = locked_profile.get('name') - owner_id = f"airflow_auth_worker_{dag_run.run_id}" - - try: - redis_conn_id = params['redis_conn_id'] - redis_env = params['redis_env'] - redis_hook = _get_redis_client(redis_conn_id, return_hook=True) - key_prefix = f"{redis_env}_profile_mgmt_" - pm = ProfileManager(redis_hook=redis_hook, key_prefix=key_prefix) - - logger.info(f"Recording activity '{activity_type}' for profile '{profile_name}'.") - pm.record_activity(profile_name, activity_type) - - logger.info(f"Unlocking profile '{profile_name}' with owner '{owner_id}'.") - pm.unlock_profile(profile_name, owner=owner_id) - - except Exception as e: - logger.error(f"Failed to unlock profile or record activity for '{profile_name}': {e}", exc_info=True) - - -# ============================================================================= -# DAG Definition with TaskGroups -# ============================================================================= -with DAG( - dag_id='ytdlp_ops_v02_worker_per_url_auth', - default_args=DEFAULT_ARGS, - schedule=None, - start_date=days_ago(1), - catchup=False, - tags=['ytdlp', 'worker', 'v2'], - doc_md=__doc__, - render_template_as_native_obj=True, - is_paused_upon_creation=True, - params={ - # V2 Profile Params - 'redis_env': Param("sim_auth", type="string", title="[V2 Profiles] Redis Environment", description="The environment for v2 profile management (e.g., 'sim_auth'). Determines the Redis key prefix."), - 'profile_prefix': Param("auth_user", type="string", title="[V2 Profiles] Profile Prefix", description="The prefix for auth profiles that workers should attempt to lock."), - - 'queue_name': Param(DEFAULT_QUEUE_NAME, type="string"), - 'redis_conn_id': Param(DEFAULT_REDIS_CONN_ID, type="string"), - 'service_ip': Param(DEFAULT_YT_AUTH_SERVICE_IP, type="string"), - 'service_port': Param(DEFAULT_YT_AUTH_SERVICE_PORT, type="integer"), - # DEPRECATED PARAMS (kept for reference, but no longer used) - 'account_pool': Param('default_account', type="string", description="DEPRECATED: Use profile_prefix instead."), - 'account_pool_size': Param(None, type=["integer", "null"], description="DEPRECATED: Pool size is managed in Redis."), - 'prepend_client_to_account': Param(True, type="boolean", description="DEPRECATED"), - 'assigned_proxy_url': Param(None, type=["string", "null"], description="DEPRECATED: Proxy is now determined by the locked profile."), - 'account_id': Param(None, type=["string", "null"], description="DEPRECATED: Profile is locked dynamically."), - 'worker_index': Param(None, type=["integer", "null"], description="DEPRECATED"), - 'auto_create_new_accounts_on_exhaustion': Param(True, type="boolean", description="DEPRECATED"), - - 'machine_id': Param(None, type=["string", "null"]), - 'clients': Param('tv_simply', type="string", description="DEPRECATED: This is now read from the ytdlp_config_json."), - 'timeout': Param(DEFAULT_TIMEOUT, type="integer"), - 'on_bannable_failure': Param('stop_loop_on_auth_proceed_on_download_error', type="string", enum=['stop_loop', 'retry_with_new_account', 'retry_without_ban', 'retry_and_ban_account_only', 'retry_on_connection_error', 'proceed_loop_under_manual_inspection', 'stop_loop_on_auth_proceed_on_download_error']), - # --- Unified JSON Config (passed from orchestrator) --- - 'ytdlp_config_json': Param('{}', type="string", title="[Internal] Unified JSON config from orchestrator."), - # --- Manual Run / Internal Parameters --- - 'manual_url_to_process': Param('iPwdia3gAnk', type=["string", "null"], title="[Manual Run] URL to Process", description="For manual runs, provide a single YouTube URL, or the special value 'PULL_FROM_QUEUE' to pull one URL from the Redis inbox. This is ignored if triggered by the dispatcher."), - 'url_to_process': Param(None, type=["string", "null"], title="[Internal] URL from Dispatcher", description="This parameter is set by the dispatcher DAG and should not be used for manual runs."), - 'worker_queue': Param(None, type=["string", "null"], title="[Internal] Worker Queue", description="This parameter is set by the dispatcher DAG and should not be used for manual runs."), - } -) as dag: - initial_data = get_url_and_lock_profile() - unlock_profile_task = unlock_profile() - - # --- Task Instantiation with TaskGroups --- - - # This is simplified. The auth worker does not retry with different accounts anymore, - # as the policy enforcer is responsible for managing profile health. If get_token fails, - # the profile is unlocked with a failure, and the loop continues to the next URL. - - # --- Task Group 1: Initial Attempt --- - with TaskGroup("auth_attempt", tooltip="Token acquisition attempt") as auth_attempt_group: - token_data = get_token(initial_data) - list_formats_task = list_available_formats(token_data=token_data) - generate_tasks = generate_and_push_download_tasks(token_data=token_data) - - token_data >> list_formats_task >> generate_tasks - - # --- Failure Handling --- - # `unlock_profile` is the terminal task, running after all upstream tasks are done. - # It determines success/failure and records activity. - - # --- DAG Dependencies --- - initial_data >> auth_attempt_group - auth_attempt_group >> unlock_profile_task - unlock_profile_task >> continue_processing_loop(token_data=None) # Continue loop regardless of outcome diff --git a/airflow/dags/ytdlp_ops_v02_worker_per_url_dl.py b/airflow/dags/ytdlp_ops_v02_worker_per_url_dl.py deleted file mode 100644 index d878e34..0000000 --- a/airflow/dags/ytdlp_ops_v02_worker_per_url_dl.py +++ /dev/null @@ -1,1110 +0,0 @@ -# -*- coding: utf-8 -*- -# vim:fenc=utf-8 -# -# Copyright © 2024 rl -# -# Distributed under terms of the MIT license. - -""" -DAG for downloading a single YouTube URL based on pre-fetched token data. -This is the "Download Worker" part of a separated Auth/Download pattern. -It receives a job payload with all necessary token info and handles only the -downloading and probing of media files. -""" -from __future__ import annotations - -# --- Add project root to path to allow for yt-ops-client imports --- -import sys -# The yt-ops-client package is installed in editable mode in /app -if '/app' not in sys.path: - sys.path.insert(0, '/app') - -from airflow.decorators import task, task_group -from airflow.exceptions import AirflowException, AirflowSkipException -from airflow.models import Variable -from airflow.models.dag import DAG, DagModel -from airflow.models.param import Param -from airflow.models.xcom_arg import XComArg -from airflow.operators.dummy import DummyOperator -from airflow.utils.dates import days_ago -from airflow.utils.task_group import TaskGroup -from airflow.api.common.trigger_dag import trigger_dag -from datetime import datetime, timedelta -import concurrent.futures -import json -import logging -import os -import random - -# Configure logging -logger = logging.getLogger(__name__) -import re -import redis -import socket -import time -import traceback -import uuid -import shutil - -# Import utility functions and Thrift modules -from utils.redis_utils import _get_redis_client -from pangramia.yt.common.ttypes import TokenUpdateMode, AirflowLogContext -from pangramia.yt.exceptions.ttypes import PBServiceException, PBUserException -from pangramia.yt.tokens_ops import YTTokenOpService -from thrift.protocol import TBinaryProtocol -from thrift.transport import TSocket, TTransport -from thrift.transport.TTransport import TTransportException - -# ytops_client imports for v2 profile management -try: - from ytops_client.profile_manager_tool import ProfileManager, format_duration, format_timestamp -except ImportError as e: - logger.critical(f"Could not import ytops_client modules: {e}. Ensure yt-ops-client package is installed correctly in Airflow's environment.") - raise - - -# --- Client Stats Helper --- - -def _update_client_stats(redis_client, clients_str: str, status: str, url: str, machine_id: str, dag_run_id: str): - """Updates success/failure statistics for a client type in Redis.""" - if not clients_str: - logger.warning("Cannot update client stats: 'clients' string is empty.") - return - - # Assumption: The service tries clients in the order provided. - # We attribute the result to the first client in the list. - primary_client = clients_str.split(',')[0].strip() - if not primary_client: - logger.warning("Cannot update client stats: could not determine primary client.") - return - - stats_key = "client_stats" - - try: - # Using a pipeline with WATCH for safe concurrent updates. - with redis_client.pipeline() as pipe: - pipe.watch(stats_key) - - current_stats_json = redis_client.hget(stats_key, primary_client) - stats = {} - if current_stats_json: - try: - stats = json.loads(current_stats_json) - except json.JSONDecodeError: - logger.warning(f"Could not parse existing stats for client '{primary_client}'. Resetting stats.") - stats = {} - - stats.setdefault('success_count', 0) - stats.setdefault('failure_count', 0) - - details = { - 'timestamp': time.time(), 'url': url, - 'machine_id': machine_id, 'dag_run_id': dag_run_id, - } - - if status == 'success': - stats['success_count'] += 1 - stats['latest_success'] = details - elif status == 'failure': - stats['failure_count'] += 1 - stats['latest_failure'] = details - - pipe.multi() - pipe.hset(stats_key, primary_client, json.dumps(stats)) - pipe.execute() - - logger.info(f"Successfully updated '{status}' stats for client '{primary_client}'.") - - except redis.exceptions.WatchError: - logger.warning(f"WatchError updating stats for client '{primary_client}'. Another process updated it. Skipping this update.") - except Exception as e: - logger.error(f"Failed to update client stats for '{primary_client}': {e}", exc_info=True) - - -# Default settings from Airflow Variables or hardcoded fallbacks -DEFAULT_QUEUE_NAME = 'queue2_dl' -DEFAULT_REDIS_CONN_ID = 'redis_default' -DEFAULT_TIMEOUT = 3600 -DEFAULT_YT_AUTH_SERVICE_IP = Variable.get("YT_AUTH_SERVICE_IP", default_var="172.17.0.1") -DEFAULT_YT_AUTH_SERVICE_PORT = Variable.get("YT_AUTH_SERVICE_PORT", default_var=9080) - -# The queue is set to a fallback here. The actual worker-specific queue is -# assigned just-in-time by the task_instance_mutation_hook (see: airflow/config/custom_task_hooks.py), -# which parses the target queue from the DAG run_id. -DEFAULT_ARGS = { - 'owner': 'airflow', - 'retries': 0, - 'queue': 'queue-dl', # Fallback queue. Will be overridden by the policy hook. -} - - -# --- Helper Functions --- - -def _extract_video_id(url): - """Extracts YouTube video ID from a URL or returns the input if it's already a valid ID.""" - if not url or not isinstance(url, str): - return None - - # Check if the input is already a valid 11-character video ID - if re.fullmatch(r'[a-zA-Z0-9_-]{11}', url): - return url - - patterns = [r'v=([a-zA-Z0-9_-]{11})', r'youtu\.be/([a-zA-Z0-9_-]{11})'] - for pattern in patterns: - match = re.search(pattern, url) - if match: - return match.group(1) - return None - -# ============================================================================= -# TASK DEFINITIONS (TaskFlow API) -# ============================================================================= - -@task -def lock_profile_and_find_task(**context): - """ - Profile-first worker logic: - 1. Locks an available download profile from the Redis pool. - 2. Scans the granular download task queue for a job matching the profile's prefix. - 3. Returns both the locked profile and the claimed job data. - """ - params = context['params'] - ti = context['task_instance'] - dag_run = context['dag_run'] - - redis_conn_id = params['redis_conn_id'] - redis_env = params['redis_env'] - profile_prefix = params['profile_prefix'] - - # Initialize ProfileManager - try: - redis_hook = _get_redis_client(redis_conn_id, return_hook=True) - key_prefix = f"{redis_env}_profile_mgmt_" - pm = ProfileManager(redis_hook=redis_hook, key_prefix=key_prefix) - logger.info(f"Initialized ProfileManager for env '{redis_env}' (Redis key prefix: '{key_prefix}')") - except Exception as e: - raise AirflowException(f"Failed to initialize ProfileManager: {e}") - - # Step 1: Lock a profile - owner_id = f"airflow_dl_worker_{dag_run.run_id}" - locked_profile = None - logger.info(f"Attempting to lock a profile with owner '{owner_id}' and prefix '{profile_prefix}'...") - - # This is a blocking loop until a profile is found or the task times out. - while not locked_profile: - locked_profile = pm.lock_profile(owner=owner_id, profile_prefix=profile_prefix) - if not locked_profile: - logger.info("No download profiles available to lock. Waiting for 15 seconds...") - time.sleep(15) - - logger.info(f"Successfully locked profile: {locked_profile['name']}") - - # Step 2: Find a matching task - task_queue = "queue_dl_format_tasks" - job_data = None - logger.info(f"Scanning Redis list '{task_queue}' for a matching task...") - - # This is a simple, non-atomic 'claim' logic suitable for Airflow's concurrency model. - # It's not perfectly race-proof but is a reasonable starting point. - redis_client = pm.redis - max_scan_attempts = 100 # To prevent infinite loops on a busy queue - - for i in range(max_scan_attempts): - task_json = redis_client.lpop(task_queue) - if not task_json: - logger.info("Task queue is empty. Waiting for 10 seconds...") - time.sleep(10) - continue - - try: - task_data = json.loads(task_json) - if task_data.get('profile_prefix') == profile_prefix: - job_data = task_data - logger.info(f"Claimed task for profile prefix '{profile_prefix}': {job_data}") - break - else: - # Not a match, push it back to the end of the queue and try again. - redis_client.rpush(task_queue, task_json) - except (json.JSONDecodeError, TypeError): - logger.error(f"Could not parse task from queue. Discarding item: {task_json}") - - if not job_data: - # If no task is found, unlock the profile and fail gracefully. - pm.unlock_profile(locked_profile['name'], owner=owner_id) - raise AirflowSkipException(f"Could not find a matching task in '{task_queue}' for prefix '{profile_prefix}' after {max_scan_attempts} attempts.") - - # Combine profile and job data to pass to the next task - return { - 'locked_profile': locked_profile, - 'job_data': job_data, - } - -@task -def list_available_formats(worker_data: dict, **context): - """ - Lists available formats for the given video using the info.json. - This is for debugging and informational purposes. - """ - import subprocess - import shlex - - info_json_path = worker_data['job_data'].get('info_json_path') - if not (info_json_path and os.path.exists(info_json_path)): - logger.warning(f"Cannot list formats: info.json path is missing or file does not exist ({info_json_path}).") - return [] - - try: - cmd = [ - 'yt-dlp', - '--verbose', - '--list-formats', - '--load-info-json', info_json_path, - ] - - copy_paste_cmd = ' '.join(shlex.quote(arg) for arg in cmd) - logger.info(f"Executing yt-dlp command to list formats: {copy_paste_cmd}") - - process = subprocess.run(cmd, capture_output=True, text=True, timeout=60) - - if process.stderr: - logger.info(f"yt-dlp --list-formats STDERR:\n{process.stderr}") - - if process.returncode != 0: - logger.error(f"yt-dlp --list-formats failed with exit code {process.returncode}") - - available_formats = [] - if process.stdout: - logger.info(f"--- Available Formats ---\n{process.stdout}\n--- End of Formats ---") - # Parse the output to get format IDs - lines = process.stdout.split('\n') - header_found = False - for line in lines: - if line.startswith('ID '): - header_found = True - continue - if header_found and line.strip() and line.strip()[0].isdigit(): - format_id = line.split()[0] - available_formats.append(format_id) - logger.info(f"Parsed available format IDs: {available_formats}") - - return available_formats - - except Exception as e: - logger.error(f"An error occurred while trying to list formats: {e}", exc_info=True) - return [] - - -def _resolve_generic_selector(selector: str, info_json_path: str, logger) -> str | list[str] | None: - """ - Uses yt-dlp to resolve a generic format selector into specific, numeric format ID(s). - Returns a numeric selector string (e.g., '18'), a list of IDs for '+' selectors - (e.g., ['299', '140']), or None if resolution fails. - """ - import subprocess - import shlex - - try: - cmd = [ - 'yt-dlp', - '--print', 'format_id', - '-f', selector, - '--load-info-json', info_json_path, - ] - - copy_paste_cmd = ' '.join(shlex.quote(arg) for arg in cmd) - logger.info(f"Resolving generic selector '{selector}' with command: {copy_paste_cmd}") - - process = subprocess.run(cmd, capture_output=True, text=True, timeout=60) - - if process.stderr: - # yt-dlp often prints warnings to stderr that are not fatal. - # e.g., "Requested format selector '...' contains no available formats" - logger.info(f"yt-dlp resolver STDERR for selector '{selector}':\n{process.stderr}") - - if process.returncode != 0: - logger.error(f"yt-dlp resolver for selector '{selector}' failed with exit code {process.returncode}") - return None - - output_ids = process.stdout.strip().split('\n') - output_ids = [fid for fid in output_ids if fid] # Remove empty lines - - if not output_ids: - logger.warning(f"Selector '{selector}' resolved to no format IDs.") - return None - - # yt-dlp might return '137+140' on one line, or '137\n140' on multiple. - # We need to handle both to get individual IDs. - final_ids = [] - for fid in output_ids: - final_ids.extend(fid.split('+')) - - # If the original selector was for merging (contained '+'), return individual IDs for separate downloads. - # Otherwise, yt-dlp has already chosen the best one from a fallback list, so we just use it. - if '+' in selector: - resolved_selector = final_ids - else: - resolved_selector = final_ids[0] # yt-dlp gives the single best choice - - logger.info(f"Successfully resolved selector '{selector}' to '{resolved_selector}'.") - return resolved_selector - - except Exception as e: - logger.error(f"An error occurred while resolving selector '{selector}': {e}", exc_info=True) - return None - - -def _check_format_expiry(info_json_path: str, formats_to_check: list[str], logger) -> bool: - """ - Checks if any of the specified format URLs have expired using yt-ops-client. - Returns True if any format is expired, False otherwise. - """ - import subprocess - import shlex - - if not formats_to_check: - return False - - logger.info(f"Checking for URL expiry for formats: {formats_to_check}") - - # We can check all formats at once. The tool will report if any of them are expired. - try: - cmd = [ - 'ytops-client', 'check-expiry', - '--load-info-json', info_json_path, - '-f', ','.join(formats_to_check), - ] - - copy_paste_cmd = ' '.join(shlex.quote(arg) for arg in cmd) - logger.info(f"Executing expiry check for all selected formats: {copy_paste_cmd}") - - process = subprocess.run(cmd, capture_output=True, text=True, timeout=60) - - if process.stdout: - logger.info(f"ytops-client check-expiry STDOUT:\n{process.stdout}") - if process.stderr: - logger.info(f"ytops-client check-expiry STDERR:\n{process.stderr}") - - # The tool exits with a non-zero code if a URL is expired. - if process.returncode != 0: - logger.error("Expiry check failed. One or more URLs are likely expired.") - return True # An expiry was found - - except Exception as e: - logger.error(f"An error occurred during expiry check: {e}", exc_info=True) - # To be safe, treat this as a potential expiry to trigger re-authentication. - return True - - logger.info("No expired URLs found for the selected formats.") - return False - - -@task -def download_and_probe(worker_data: dict, **context): - """ - Uses profile and job data to download and probe a single media format. - """ - try: - import subprocess - import shlex - import concurrent.futures - - params = context['params'] - job_data = worker_data['job_data'] - locked_profile = worker_data['locked_profile'] - - info_json_path = job_data.get('info_json_path') - proxy = locked_profile.get('proxy') - - if not (info_json_path and os.path.exists(info_json_path)): - raise AirflowException(f"Error: info.json path is missing or file does not exist ({info_json_path}).") - - # WORKAROUND: The auth service may inject a 'js_runtimes' key into the info.json - # that is incompatible with the yt-dlp library's expectations, causing a crash. - # We remove it here before passing it to the download tool. - try: - with open(info_json_path, 'r+', encoding='utf-8') as f: - info_data = json.load(f) - if 'js_runtimes' in info_data: - logger.info("Found 'js_runtimes' key in info.json. Removing it as a workaround for yt-dlp library incompatibility.") - del info_data['js_runtimes'] - f.seek(0) - json.dump(info_data, f) - f.truncate() - except Exception as e: - logger.warning(f"Could not process/remove 'js_runtimes' from info.json: {e}", exc_info=True) - - download_dir = os.path.dirname(info_json_path) - - download_format = job_data.get('format_id') - if not download_format: - raise AirflowException("The 'format_id' is missing from the job data.") - - output_template = params.get('output_path_template', "%(id)s.f%(format_id)s.%(ext)s") - full_output_path = os.path.join(download_dir, output_template) - retry_on_probe_failure = params.get('retry_on_probe_failure', False) - - def run_yt_dlp_command(format_selector: str): - """Constructs and runs a yt-ops-client download command, returning a list of final filenames.""" - downloader = params.get('downloader', 'py') - cmd = ['ytops-client', 'download', downloader, '--load-info-json', info_json_path, '-f', format_selector] - - if downloader == 'py': - if proxy: - cmd.extend(['--proxy', proxy]) - cmd.extend(['--output-dir', download_dir]) - # The 'py' tool maps many yt-dlp flags via --extra-ytdlp-args - - # The 'py' tool maps many yt-dlp flags via --extra-ytdlp-args - py_extra_args = ['--output', output_template] - if params.get('yt_dlp_test_mode'): - py_extra_args.append('--test') - - existing_extra = shlex.split(params.get('yt_dlp_extra_args') or '') - final_extra_args_list = existing_extra + py_extra_args - if final_extra_args_list: - final_extra_args_str = shlex.join(final_extra_args_list) - cmd.extend(['--extra-ytdlp-args', final_extra_args_str]) - - elif downloader == 'aria-rpc': - # For aria2c running on the host, the proxy (if also on the host) should be referenced via localhost. - # The user-agent is set by yt-dlp's extractor, not directly here. The default is Cobalt-based. - if proxy: - proxy_port_match = re.search(r':(\d+)$', proxy) - if proxy_port_match: - proxy_port = proxy_port_match.group(1) - aria_proxy = f"socks5://127.0.0.1:{proxy_port}" - cmd.extend(['--proxy', aria_proxy]) - logger.info(f"Using translated proxy for host-based aria2c: {aria_proxy}") - else: - logger.warning(f"Could not parse port from proxy '{proxy}'. Passing it to aria2c as-is.") - cmd.extend(['--proxy', proxy]) - - # The remote-dir is the path relative to aria2c's working directory on the host. - # The output-dir is the container's local path to the same shared volume. - remote_dir = os.path.relpath(download_dir, '/opt/airflow/downloadfiles/videos') - cmd.extend([ - '--aria-host', params.get('aria_host', '172.17.0.1'), - '--aria-port', str(params.get('aria_port', 6800)), - '--aria-secret', params.get('aria_secret'), - '--wait', - '--output-dir', download_dir, - '--remote-dir', remote_dir, - ]) - if 'dashy' in format_selector: - cmd.extend([ - '--auto-merge-fragments', - '--fragments-dir', download_dir, - ]) - if params.get('yt_dlp_cleanup_mode'): - cmd.append('--cleanup') - - elif downloader == 'cli': - # Overwrite cmd to call yt-dlp directly - cmd = ['yt-dlp', '--load-info-json', info_json_path, '-f', format_selector] - if proxy: - cmd.extend(['--proxy', proxy]) - - # The 'cli' tool is the old yt-dlp wrapper, so it takes similar arguments. - cli_extra_args = ['--output', full_output_path, '--verbose'] - if params.get('yt_dlp_test_mode'): - cli_extra_args.append('--test') - - existing_extra = shlex.split(params.get('yt_dlp_extra_args') or '') - final_extra_args = existing_extra + cli_extra_args - if final_extra_args: - cmd.extend(final_extra_args) - - copy_paste_cmd = ' '.join(shlex.quote(arg) for arg in cmd) - tool_name = 'yt-dlp' if downloader == 'cli' else 'ytops-client' - logger.info(f"--- Preparing to execute {tool_name} ---") - logger.info(f"Full {tool_name} command for format '{format_selector}':") - logger.info(copy_paste_cmd) - logger.info(f"-----------------------------------------") - process = subprocess.run(cmd, capture_output=True, text=True, timeout=3600) - - if process.stdout: - logger.info(f"Download tool STDOUT for format '{format_selector}':\n{process.stdout}") - if process.stderr: - logger.info(f"Download tool STDERR for format '{format_selector}':\n{process.stderr}") - - if process.returncode != 0 or "ERROR:" in process.stderr: - logger.error(f"Download tool failed for format '{format_selector}' with exit code {process.returncode}") - if "ERROR:" in process.stderr and process.returncode == 0: - logger.error("Detected 'ERROR:' in stderr, treating as failure despite exit code 0.") - # Pass stderr in the exception for better parsing in the outer try/except block - raise AirflowException(f"Download command failed for format '{format_selector}'. Stderr: {process.stderr}") - - output_files = [] - if downloader == 'cli': - # Parse yt-dlp's verbose output to find the final filename - final_filename = None - for line in process.stdout.strip().split('\n'): - # Case 1: Simple download, no merge - dest_match = re.search(r'\[download\] Destination: (.*)', line) - if dest_match: - final_filename = dest_match.group(1).strip() - - # Case 2: Formats are merged into a new file. This path is absolute if -o is absolute. - merge_match = re.search(r'\[Merger\] Merging formats into "(.*)"', line) - if merge_match: - final_filename = merge_match.group(1).strip() - - if final_filename and os.path.exists(final_filename): - output_files.append(final_filename) - else: # Logic for 'py' and 'aria-rpc' - for line in process.stdout.strip().split('\n'): - # For aria-rpc, parse "Download and merge successful: " or "Download successful: " - match = re.search(r'successful: (.+)', line) - if match: - filepath = match.group(1).strip() - if os.path.exists(filepath): - output_files.append(filepath) - else: - logger.warning(f"File path from aria-rpc output does not exist locally: '{filepath}'") - # For py, it's just the path - elif os.path.exists(line.strip()): - output_files.append(line.strip()) - - if not params.get('yt_dlp_test_mode') and not output_files: - raise AirflowException(f"Download for format '{format_selector}' finished but no output files were found or exist.") - - log_prefix = "SUCCESS (Test Mode):" if params.get('yt_dlp_test_mode') else "SUCCESS:" - logger.info(f"{log_prefix} Command for format '{format_selector}' complete. Files: {output_files}") - return output_files - - def run_ffmpeg_probe(filename): - """Probes a file with ffmpeg to check for corruption.""" - logger.info(f"Probing downloaded file: {filename}") - try: - subprocess.run(['ffmpeg', '-v', 'error', '-sseof', '-10', '-i', filename, '-c', 'copy', '-f', 'null', '-'], check=True, capture_output=True, text=True) - logger.info(f"SUCCESS: Probe confirmed valid media file: {filename}") - except subprocess.CalledProcessError as e: - logger.error(f"ffmpeg probe failed for '{filename}'. File may be corrupt.") - logger.error(f"ffmpeg STDERR: {e.stderr}") - raise AirflowException(f"ffmpeg probe failed for {filename}.") - - def _download_and_probe_formats(formats_to_process: list[str] | str): - """ - Helper to download a list of format IDs (or a single complex selector) and probe the results. - Returns a tuple of (successful_files, failed_probe_files). - """ - all_downloaded_files = [] - delay_between_formats = params.get('delay_between_formats_s', 0) - - if isinstance(formats_to_process, list) and formats_to_process: - logger.info(f"Downloading {len(formats_to_process)} format(s) sequentially: {formats_to_process}") - for i, fid in enumerate(formats_to_process): - all_downloaded_files.extend(run_yt_dlp_command(fid)) - if delay_between_formats > 0 and i < len(formats_to_process) - 1: - logger.info(f"Waiting {delay_between_formats}s before next format download...") - time.sleep(delay_between_formats) - - elif isinstance(formats_to_process, str): - logger.info(f"Using complex format selector '{formats_to_process}'. Running as a single command.") - all_downloaded_files = run_yt_dlp_command(formats_to_process) - - if not all_downloaded_files: - logger.warning("Download process completed but produced no files.") - return [], [] - - if params.get('yt_dlp_test_mode'): - logger.info("Test mode is enabled. Skipping probe of output files.") - return all_downloaded_files, [] - - if params.get('skip_probe'): - logger.info("Skipping probe of output files as per configuration.") - return all_downloaded_files, [] - - successful_probes, failed_probes = [], [] - logger.info(f"Probing {len(all_downloaded_files)} downloaded file(s) sequentially...") - for filename in all_downloaded_files: - try: - run_ffmpeg_probe(filename) - successful_probes.append(filename) - except Exception: - failed_probes.append(filename) - - return successful_probes, failed_probes - - # --- Main Execution Logic --- - final_formats_to_download = download_format - - if not final_formats_to_download: - raise AirflowException("The format_id for this job is empty.") - - # --- Check for expired URLs before attempting download --- - if _check_format_expiry(info_json_path, [final_formats_to_download], logger): - # If URL is expired, we need to fail the task so it can be re-queued for auth. - # We also need to record a failure for the profile. - raise AirflowException("Format URL has expired. The job must be re-authenticated.") - - # --- Initial Download and Probe --- - # The worker now handles one format at a time. - successful_files, failed_files = _download_and_probe_formats(final_formats_to_download) - - if params.get('yt_dlp_test_mode'): - logger.info(f"Test mode: yt-dlp returned {len(successful_files)} filenames. Skipping probe failure checks.") - if not successful_files: - raise AirflowException("Test run did not produce any filenames.") - # Do not return here. Proceed to the cleanup and move logic. - - final_success_list = successful_files - if failed_files: - # --- Handle Probe Failures and Retry --- - if not retry_on_probe_failure: - raise AirflowException(f"Probe failed for {len(failed_files)} file(s) and retry is disabled: {failed_files}") - - logger.warning(f"Probe failed for {len(failed_files)} file(s). Attempting one re-download for failed files...") - - delay_between_formats = params.get('delay_between_formats_s', 0) - # This delay is no longer needed in the profile-first model. - # if delay_between_formats > 0: - # logger.info(f"Waiting {delay_between_formats}s before re-download attempt...") - # time.sleep(delay_between_formats) - - format_ids_to_retry = [] - # Since each download is now for a specific selector and the output template - # includes the format_id, we can always attempt to extract the format_id - # from the failed filename for a targeted retry. - for f in failed_files: - match = re.search(r'\.f([\d]+)\.', f) - if match: - format_ids_to_retry.append(match.group(1)) - else: - logger.error(f"Could not extract format_id from failed file '{f}'. Cannot retry this specific file.") - formats_to_download_retry = format_ids_to_retry - - if not formats_to_download_retry: - raise AirflowException("Probe failed, but could not determine which formats to retry.") - - # Rename failed files to allow for a fresh download attempt - for f in failed_files: - try: - failed_path = f"{f}.probe_failed_{int(time.time())}" - os.rename(f, failed_path) - logger.info(f"Renamed corrupted file to {failed_path}") - except OSError as rename_err: - logger.error(f"Could not rename corrupted file '{f}': {rename_err}") - - # --- Retry Download and Probe --- - retried_successful_files, retried_failed_files = _download_and_probe_formats(formats_to_download_retry) - - if retried_failed_files: - logger.error(f"Probe failed again for {len(retried_failed_files)} file(s) after retry: {retried_failed_files}") - - final_success_list = successful_files + retried_successful_files - logger.info(f"Retry complete. Final success count: {len(final_success_list)} file(s).") - - if not final_success_list: - raise AirflowException("Download and probe process completed but produced no valid files.") - - if params.get('yt_dlp_cleanup_mode', False): - logger.info(f"Cleanup mode is enabled. Creating .empty files and deleting originals for {len(final_success_list)} files.") - for f in final_success_list: - try: - empty_file_path = f"{f}.empty" - with open(empty_file_path, 'w') as fp: - pass # create empty file - logger.info(f"Created empty file: {empty_file_path}") - os.remove(f) - logger.info(f"Deleted original file: {f}") - except Exception as e: - logger.error(f"Error during cleanup for file {f}: {e}", exc_info=True) - # Do not fail the task for a cleanup error, just log it. - - # The logic for moving files to a final destination is now handled by the `ytops-client download py` tool - # when `output_to_airflow_ready_dir` is used. This worker no longer needs to perform the move. - # It just needs to return the list of successfully downloaded files. - - return final_success_list - except Exception as e: - ti = context['task_instance'] - error_message = str(e) - error_code = "DOWNLOAD_FAILED" - msg_lower = error_message.lower() - - unrecoverable_patterns = { - "AGE_GATED_SIGN_IN": ['sign in to confirm your age'], - "MEMBERS_ONLY": ['members-only content'], - "VIDEO_PROCESSING": ['processing this video'], - "COPYRIGHT_REMOVAL": ['copyright'], - "GEO_RESTRICTED": ['in your country'], - "PRIVATE_VIDEO": ['private video'], - "VIDEO_REMOVED": ['video has been removed'], - "VIDEO_UNAVAILABLE": ['video unavailable'], - "HTTP_403_FORBIDDEN": ['http error 403: forbidden'], - "URL_EXPIRED": ['urls have expired'] - } - - for code, patterns in unrecoverable_patterns.items(): - if any(p in msg_lower for p in patterns): - error_code = code - break - - error_details = {'error_code': error_code, 'error_message': error_message} - ti.xcom_push(key='download_error_details', value=error_details) - raise e - -@task(trigger_rule='all_done') -def unlock_profile(worker_data: dict, **context): - """ - Unlocks the profile and records activity (success or failure). - This task runs regardless of upstream success or failure. - """ - params = context['params'] - dag_run = context['dag_run'] - - # Check if the DAG run failed - failed_tasks = [ti for ti in dag_run.get_task_instances() if ti.state == 'failed'] - is_success = not failed_tasks - activity_type = 'download' if is_success else 'download_error' - - # Use XCom pull to get the data from the initial task, which is more robust - # in case of upstream failures where the data is not passed directly. - ti = context['task_instance'] - worker_data_pulled = ti.xcom_pull(task_ids='lock_profile_and_find_task') - - locked_profile = worker_data_pulled.get('locked_profile') if worker_data_pulled else None - - if not locked_profile: - logger.warning("No locked_profile data found from 'lock_profile_and_find_task'. Cannot unlock or record activity.") - return - - profile_name = locked_profile.get('name') - owner_id = f"airflow_dl_worker_{dag_run.run_id}" - - try: - redis_conn_id = params['redis_conn_id'] - redis_env = params['redis_env'] - redis_hook = _get_redis_client(redis_conn_id, return_hook=True) - key_prefix = f"{redis_env}_profile_mgmt_" - pm = ProfileManager(redis_hook=redis_hook, key_prefix=key_prefix) - - logger.info(f"Recording activity '{activity_type}' for profile '{profile_name}'.") - pm.record_activity(profile_name, activity_type) - - logger.info(f"Unlocking profile '{profile_name}' with owner '{owner_id}'.") - # Read cooldown from config if available - cooldown_str = pm.get_config('unlock_cooldown_seconds') - cooldown = int(cooldown_str) if cooldown_str and cooldown_str.isdigit() else None - - pm.unlock_profile(profile_name, owner=owner_id, rest_for_seconds=cooldown) - if cooldown: - logger.info(f"Profile '{profile_name}' was put into COOLDOWN for {cooldown} seconds.") - - except Exception as e: - logger.error(f"Failed to unlock profile or record activity for '{profile_name}': {e}", exc_info=True) - # Do not fail the task, as this is a cleanup step. - -@task(trigger_rule='one_failed') -def report_failure_and_continue(**context): - """ - Handles a failed download attempt by recording an error report to Redis. - """ - params = context['params'] - ti = context['task_instance'] - - job_data = params.get('job_data', {}) - if isinstance(job_data, str): - try: - job_data = json.loads(job_data) - except json.JSONDecodeError: - job_data = {} - url = job_data.get('original_url', 'unknown') - - # No token errors to collect, just report a generic download failure. - error_details = {'error_message': 'Download or probe stage failed.'} - - logger.error(f"A failure occurred while processing URL '{url}'. Reporting to Redis.") - - result_data = { - 'status': 'failed', - 'end_time': time.time(), - 'url': url, - 'dag_run_id': context['dag_run'].run_id, - 'error_details': error_details - } - - try: - client = _get_redis_client(params['redis_conn_id']) - - # Update client-specific stats - try: - machine_id = params.get('machine_id') or socket.gethostname() - clients_str = job_data.get('clients', params.get('clients', '')) # Prefer clients from job, fallback to params - _update_client_stats(client, clients_str, 'failure', url, machine_id, context['dag_run'].run_id) - except Exception as e: - logger.error(f"Could not update client stats on failure: {e}", exc_info=True) - - result_queue = f"{params['queue_name']}_result" - fail_queue = f"{params['queue_name']}_fail" - - progress_queue = f"{params['queue_name']}_progress" - - with client.pipeline() as pipe: - pipe.hset(result_queue, url, json.dumps(result_data)) - pipe.hset(fail_queue, url, json.dumps(result_data)) - pipe.hdel(progress_queue, url) - pipe.execute() - - logger.info(f"Stored failure result for URL '{url}' in '{result_queue}' and '{fail_queue}' and removed from progress queue.") - except Exception as e: - logger.error(f"Could not report failure to Redis: {e}", exc_info=True) - - -@task -def handle_unrecoverable_video_error(**context): - """ - Handles errors for videos that are unavailable (private, removed, etc.) during download. - These are not system failures, so the URL is logged to a 'skipped' queue - and the processing loop continues without marking the run as failed. - """ - params = context['params'] - ti = context['task_instance'] - - job_data = params.get('job_data', {}) - if isinstance(job_data, str): - try: - job_data = json.loads(job_data) - except json.JSONDecodeError: - job_data = {} - url = job_data.get('original_url', 'unknown') - - # Collect error details from the failed download_and_probe task - error_details = ti.xcom_pull(task_ids='download_processing.download_and_probe', key='download_error_details') or {} - - error_code = error_details.get('error_code', 'UNKNOWN_VIDEO_ERROR') - error_message = error_details.get('error_message', 'Video is unavailable for an unknown reason.') - - logger.warning(f"Skipping URL '{url}' due to unrecoverable video error during download: {error_code} - {error_message}") - - result_data = { - 'status': 'skipped', - 'end_time': time.time(), - 'url': url, - 'dag_run_id': context['dag_run'].run_id, - 'reason': error_code, - 'details': error_message, - 'error_details': error_details - } - - try: - client = _get_redis_client(params['redis_conn_id']) - - # New queue for skipped videos - skipped_queue = f"{params['queue_name']}_skipped" - progress_queue = f"{params['queue_name']}_progress" - - with client.pipeline() as pipe: - pipe.hset(skipped_queue, url, json.dumps(result_data)) - pipe.hdel(progress_queue, url) - pipe.execute() - - logger.info(f"Stored skipped result for URL '{url}' in '{skipped_queue}' and removed from progress queue.") - except Exception as e: - logger.error(f"Could not report skipped video to Redis: {e}", exc_info=True) - - -@task(trigger_rule='one_failed') -def handle_fatal_error(**context): - """ - Handles fatal, non-retryable errors (e.g., infrastructure issues). - This task reports the failure to Redis to ensure failed URLs are queued - for later reprocessing, but allows the processing loop to continue. - """ - params = context['params'] - ti = context['task_instance'] - - job_data = params.get('job_data', {}) - if isinstance(job_data, str): - try: - job_data = json.loads(job_data) - except json.JSONDecodeError: - job_data = {} - url = job_data.get('original_url', 'unknown') - - error_details = {'error_message': 'Fatal error during download stage.'} - - logger.error(f"A fatal, non-retryable error occurred for URL '{url}'. See previous task logs for details.") - - # Report failure to Redis so the URL can be reprocessed later - try: - client = _get_redis_client(params['redis_conn_id']) - - # Update client-specific stats - try: - machine_id = params.get('machine_id') or socket.gethostname() - clients_str = job_data.get('clients', params.get('clients', '')) # Prefer clients from job, fallback to params - _update_client_stats(client, clients_str, 'failure', url, machine_id, context['dag_run'].run_id) - except Exception as e: - logger.error(f"Could not update client stats on fatal error: {e}", exc_info=True) - - result_data = { - 'status': 'failed', - 'end_time': time.time(), - 'url': url, - 'dag_run_id': context['dag_run'].run_id, - 'error': 'fatal_error', - 'error_message': 'Fatal non-retryable error occurred', - 'error_details': error_details - } - result_queue = f"{params['queue_name']}_result" - fail_queue = f"{params['queue_name']}_fail" - - progress_queue = f"{params['queue_name']}_progress" - - with client.pipeline() as pipe: - pipe.hset(result_queue, url, json.dumps(result_data)) - pipe.hset(fail_queue, url, json.dumps(result_data)) - pipe.hdel(progress_queue, url) - pipe.execute() - - logger.info(f"Stored fatal error result for URL '{url}' in '{result_queue}' and '{fail_queue}' for later reprocessing.") - except Exception as e: - logger.error(f"Could not report fatal error to Redis: {e}", exc_info=True) - - # Do not fail the DAG run. Allow the processing loop to continue. - logger.warning("A fatal error was handled, but the DAG is configured to continue the processing loop.") - - -@task(trigger_rule='one_success') -def continue_processing_loop(**context): - """ - After a successful run, triggers a new dispatcher to continue the processing loop, - effectively asking for the next URL to be processed. - """ - params = context['params'] - dag_run = context['dag_run'] - - # Do not continue the loop for manual runs of the worker DAG. - # A worker DAG triggered by the dispatcher will have a run_id starting with 'worker_run_'. - if not dag_run.run_id.startswith('worker_run_'): - logger.info(f"DAG run '{dag_run.run_id}' does not appear to be triggered by the dispatcher. Stopping processing loop.") - return - - dispatcher_dag_id = 'ytdlp_ops_v02_dispatcher_dl' - dag_model = DagModel.get_dagmodel(dispatcher_dag_id) - if dag_model and dag_model.is_paused: - logger.warning(f"Dispatcher DAG '{dispatcher_dag_id}' is paused. Stopping processing loop.") - return - - # Create a new unique run_id for the dispatcher. - # Using a timestamp and UUID ensures the ID is unique and does not grow in length over time, - # preventing database errors. - new_dispatcher_run_id = f"retriggered_by_worker_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{str(uuid.uuid4())[:8]}" - - # Pass all original parameters from the orchestrator through to the new dispatcher run. - conf_to_pass = {k: v for k, v in params.items() if v is not None} - - # The new dispatcher will pull its own job data and determine its own queue, so we don't pass these. - conf_to_pass.pop('job_data', None) - conf_to_pass.pop('worker_queue', None) - - logger.info(f"Worker finished successfully. Triggering a new dispatcher ('{new_dispatcher_run_id}') to continue the loop.") - trigger_dag( - dag_id=dispatcher_dag_id, - run_id=new_dispatcher_run_id, - conf=conf_to_pass, - replace_microseconds=False - ) - - -@task.branch(trigger_rule='one_failed') -def handle_download_failure_branch(**context): - """ - If download or probe fails, check for unrecoverable video errors. - If found, route to the 'skipped' handler. Otherwise, report a standard failure. - """ - ti = context['task_instance'] - download_error_details = ti.xcom_pull(task_ids='download_processing.download_and_probe', key='download_error_details') - - if download_error_details: - error_code = download_error_details.get('error_code') - unrecoverable_video_errors = [ - "AGE_GATED_SIGN_IN", "MEMBERS_ONLY", "VIDEO_PROCESSING", "COPYRIGHT_REMOVAL", - "GEO_RESTRICTED", "VIDEO_UNAVAILABLE", "PRIVATE_VIDEO", "VIDEO_REMOVED" - ] - if error_code in unrecoverable_video_errors: - logger.warning(f"Unrecoverable video error '{error_code}' during download. Skipping.") - return 'handle_unrecoverable_video_error' - - logger.warning("Download or probe failed. Reporting failure and continuing loop.") - return 'report_failure_and_continue' - - - - -# ============================================================================= -# DAG Definition with TaskGroups -# ============================================================================= -with DAG( - dag_id='ytdlp_ops_v02_worker_per_url_dl', - default_args=DEFAULT_ARGS, - schedule=None, - start_date=days_ago(1), - catchup=False, - tags=['ytdlp', 'worker', 'v2'], - doc_md=__doc__, - render_template_as_native_obj=True, - is_paused_upon_creation=True, - params={ - # --- V2 Profile Management Parameters --- - 'redis_env': Param("sim_download", type="string", title="[V2 Profiles] Redis Environment", description="The environment for v2 profile management (e.g., 'sim_download'). Determines the Redis key prefix."), - 'profile_prefix': Param("download_user", type="string", title="[V2 Profiles] Profile Prefix", description="The prefix for download profiles that workers should attempt to lock."), - - 'redis_conn_id': Param(DEFAULT_REDIS_CONN_ID, type="string"), - 'machine_id': Param(None, type=["string", "null"]), - 'clients': Param('tv_simply', type="string", description="Comma-separated list of clients for token generation. e.g. mweb,tv,web_camoufox"), - 'output_path_template': Param("%(id)s.f%(format_id)s.%(ext)s", type="string", title="[Worker Param] Output Path Template", description="Output filename template for yt-dlp. It is highly recommended to include `%(format_id)s` to prevent filename collisions when downloading multiple formats."), - 'retry_on_probe_failure': Param(False, type="boolean"), - 'skip_probe': Param(True, type="boolean", title="[Worker Param] Skip Probe", description="If True, skips the ffmpeg probe of downloaded files."), - 'yt_dlp_cleanup_mode': Param(False, type="boolean", title="[Worker Param] yt-dlp Cleanup Mode", description="If True, creates a .empty file and deletes the original media file after successful download and probe."), - 'delay_between_formats_s': Param(0, type="integer", title="[Worker Param] Delay Between Formats (s)", description="No longer used in profile-first model, as each format is a separate task."), - 'yt_dlp_test_mode': Param(False, type="boolean", title="[Worker Param] yt-dlp Test Mode", description="If True, runs yt-dlp with --test flag (dry run without downloading)."), - 'download_format': Param(None, type=["string", "null"], title="[DEPRECATED] Download Format", description="This is now specified in the granular task generated by the auth worker."), - 'pass_without_formats_splitting': Param(False, type="boolean", title="[DEPRECATED] Pass format string without splitting"), - 'downloader': Param( - 'py', - type="string", - enum=['py', 'aria-rpc', 'cli'], - title="Download Tool", - description="Choose the download tool to use: 'py' (native python, recommended), 'aria-rpc' (send to aria2c daemon), 'cli' (legacy yt-dlp wrapper)." - ), - 'aria_host': Param('172.17.0.1', type="string", title="Aria2c Host", description="For 'aria-rpc' downloader: Host of the aria2c RPC server."), - 'aria_port': Param(6800, type="integer", title="Aria2c Port", description="For 'aria-rpc' downloader: Port of the aria2c RPC server."), - 'aria_secret': Param('SQGCQPLVFQIASMPNPOJYLVGJYLMIDIXDXAIXOTX', type="string", title="Aria2c Secret", description="For 'aria-rpc' downloader: Secret token."), - 'yt_dlp_extra_args': Param( - '--verbose --no-resize-buffer --buffer-size 4M --fragment-retries 2 --concurrent-fragments 8 --socket-timeout 15 --sleep-interval 5 --max-sleep-interval 10 --no-part --restrict-filenames', - type=["string", "null"], - title="Extra yt-dlp arguments", - description="Extra command-line arguments for yt-dlp during download." - ), - # --- Manual Run / Internal Parameters --- - 'manual_job_input': Param(None, type=["string", "null"], title="[DEPRECATED] Job Input"), - 'job_data': Param(None, type=["object", "string", "null"], title="[Internal] Job Data from Dispatcher", description="This is no longer used. The worker finds its own job."), - 'worker_queue': Param(None, type=["string", "null"], title="[Internal] Worker Queue", description="This parameter is set by the dispatcher DAG."), - } -) as dag: - # --- Task Instantiation for Profile-First Model --- - - # 1. Start by locking a profile and finding a task. - worker_data = lock_profile_and_find_task() - - # 2. Define the download processing group. - with TaskGroup("download_processing", tooltip="Download and media processing") as download_processing_group: - list_formats_task = list_available_formats(worker_data=worker_data) - download_task = download_and_probe(worker_data=worker_data) - list_formats_task >> download_task - - # 3. Define the final cleanup and loop continuation tasks. - unlock_profile_task = unlock_profile(worker_data=worker_data) - continue_loop_task = continue_processing_loop() - - # --- DAG Dependencies --- - # Start -> Download Group -> Unlock -> Continue Loop - worker_data >> download_processing_group - download_processing_group >> unlock_profile_task - - # The loop continues regardless of whether the download succeeded or failed. - # The unlock_profile task (with trigger_rule='all_done') ensures it always runs. - unlock_profile_task >> continue_loop_task diff --git a/airflow/dags/ytdlp_s3_uploader.py b/airflow/dags/ytdlp_s3_uploader.py deleted file mode 100644 index 8c3fb63..0000000 --- a/airflow/dags/ytdlp_s3_uploader.py +++ /dev/null @@ -1,455 +0,0 @@ -# -*- coding: utf-8 -*- -""" -DAG to upload completed video directories to an S3-compatible service. -This DAG creates one long-running task for each configured S3 worker. -""" - -from __future__ import annotations - -import logging -import os -import shutil -import subprocess -import time -from datetime import datetime, timedelta - -from airflow.decorators import task -from airflow.exceptions import AirflowException -from airflow.models.dag import DAG -from airflow.models.param import Param -from airflow.models.variable import Variable -from airflow.operators.dummy import DummyOperator -from airflow.providers.amazon.aws.hooks.s3 import S3Hook -from airflow.utils.dates import days_ago - -logger = logging.getLogger(__name__) - -DEFAULT_ARGS = { - 'owner': 'airflow', - 'retries': 1, - 'retry_delay': timedelta(minutes=1), -} - -BASE_DOWNLOAD_PATH = '/opt/airflow/downloadfiles' -VIDEOS_PATH = os.path.join(BASE_DOWNLOAD_PATH, 'videos') -READY_PATH = os.path.join(VIDEOS_PATH, 'ready') - -def run_s3_upload_batch(**context): - """ - This function runs in a continuous loop to check for completed video directories and upload them to S3. - If no videos are found, it sleeps for a configurable interval before checking again. - Dry run mode is non-destructive and will pause briefly after checking to prevent tight loops. - """ - params = context['params'] - ti = context['task_instance'] - - # Log the configured execution timeout for debugging purposes. - # This helps verify that the timeout setting from the DAG file is being applied. - timeout_delta = ti.task.execution_timeout - logger.info(f"Task is configured with execution_timeout: {timeout_delta}") - - concurrency = params['concurrency'] - mode = params['mode'] - dry_run = params['dry_run'] - sleep_interval_min = params['sleep_if_no_videos_min'] - sleep_interval_sec = sleep_interval_min * 60 - s3_conn_id = params['s3_conn_id'] - s3_bucket = params['s3_bucket_name'] - - s3_access_key_id = None - s3_secret_access_key = None - s3_endpoint = None - s3_region = None - config_source = "Unknown" - profile_name = "rusonyx" - - # --- Attempt 1: Get S3 Configuration from Airflow Connection --- - if s3_conn_id: - try: - logger.info(f"Attempting to load S3 configuration from Airflow connection '{s3_conn_id}'.") - s3_hook = S3Hook(aws_conn_id=s3_conn_id) - s3_conn = s3_hook.get_connection(s3_conn_id) - - s3_access_key_id = s3_conn.login - s3_secret_access_key = s3_conn.password - s3_endpoint = s3_conn.host - - extra_config = s3_conn.extra_dejson - s3_region = extra_config.get('region_name') - - if not all([s3_access_key_id, s3_secret_access_key, s3_endpoint, s3_region]): - logger.warning("S3 connection from Airflow is missing one or more required fields (excluding bucket). Will attempt to fall back to environment variables.") - s3_access_key_id = s3_secret_access_key = s3_endpoint = s3_region = None # Reset all - else: - config_source = f"Airflow Connection '{s3_conn_id}'" - profile_name = "rusonyx-airflow" - - except Exception as e: - logger.warning(f"Failed to load S3 configuration from Airflow connection '{s3_conn_id}': {e}. Will attempt to fall back to environment variables.") - - # --- Attempt 2: Fallback to Environment Variables --- - if not all([s3_access_key_id, s3_secret_access_key, s3_endpoint, s3_region]): - try: - logger.info("Attempting to load S3 configuration from environment variables as a fallback.") - s3_access_key_id = os.environ['S3_DELIVERY_AWS_ACCESS_KEY_ID'] - s3_secret_access_key = os.environ['S3_DELIVERY_AWS_SECRET_ACCESS_KEY'] - s3_endpoint = os.environ['S3_DELIVERY_ENDPOINT'] - s3_region = os.environ['S3_DELIVERY_AWS_REGION'] - - if not all([s3_access_key_id, s3_secret_access_key, s3_endpoint, s3_region]): - raise ValueError("One or more S3 configuration environment variables are empty (excluding bucket).") - config_source = "Environment Variables" - profile_name = "rusonyx" - - except (KeyError, ValueError) as e: - logger.error(f"Having problems reading S3 configuration from environment variables: {e}", exc_info=True) - raise AirflowException("S3 configuration is missing. Could not load from Airflow connection or environment variables.") - - if not s3_bucket: - raise AirflowException("S3 bucket name is not specified in DAG parameters.") - - s3_destination = f"s3://{s3_bucket}/" - - logger.info(f"Starting S3 upload loop. Watching source '{READY_PATH}' for delivery to '{s3_destination}'.") - logger.info(f"Mode: {mode}, Dry Run: {dry_run}, Idle Sleep: {sleep_interval_min} min") - logger.info(f"S3 Config loaded from {config_source}: Endpoint='{s3_endpoint}', Bucket='{s3_bucket}', Region='{s3_region}', Profile='{profile_name}'") - - # --- Write credentials to file for s5cmd profile --- - aws_credentials_path = os.path.expanduser("~/.aws/credentials") - aws_config_path = os.path.expanduser("~/.aws/config") - - try: - os.makedirs(os.path.dirname(aws_credentials_path), exist_ok=True) - - with open(aws_credentials_path, 'w') as f: - f.write(f"[{profile_name}]\n") - f.write(f"aws_access_key_id = {s3_access_key_id}\n") - f.write(f"aws_secret_access_key = {s3_secret_access_key}\n") - logger.info(f"Wrote credentials for profile '{profile_name}' to {aws_credentials_path}") - - with open(aws_config_path, 'w') as f: - f.write(f"[profile {profile_name}]\n") - f.write(f"region = {s3_region}\n") - logger.info(f"Wrote config for profile '{profile_name}' to {aws_config_path}") - - except Exception as e: - logger.error(f"Failed to write AWS credentials/config file: {e}", exc_info=True) - raise AirflowException(f"Failed to write AWS credentials/config file: {e}") - - while True: - logger.info("--- Starting new S3 upload cycle ---") - - # --- Dry Run Logic (Non-destructive) --- - if dry_run: - logger.info("[DRY RUN] Checking for completed video batches...") - if not os.path.exists(READY_PATH): - logger.info(f"[DRY RUN] Source directory '{READY_PATH}' does not exist. Nothing to upload.") - else: - now = datetime.now() - wait_minutes = params['batch_completion_wait_min'] - cutoff_time = now - timedelta(minutes=wait_minutes) - rounded_minute = (cutoff_time.minute // 10) * 10 - cutoff_batch_ts = cutoff_time.strftime('%Y%m%dT%H') + f"{rounded_minute:02d}" - logger.info(f"[DRY RUN] Current time is {now.strftime('%H:%M:%S')}. With a {wait_minutes} min wait, processing batches up to and including '{cutoff_batch_ts}'.") - - all_video_dirs_to_process = [] - processed_batch_dirs = set() - all_batch_dirs = sorted([d for d in os.listdir(READY_PATH) if os.path.isdir(os.path.join(READY_PATH, d))]) - - for ts_dir in all_batch_dirs: - if ts_dir > cutoff_batch_ts: - continue - - batch_dir_path = os.path.join(READY_PATH, ts_dir) - video_dirs_in_batch = [os.path.join(batch_dir_path, d) for d in os.listdir(batch_dir_path) if os.path.isdir(os.path.join(batch_dir_path, d))] - - if video_dirs_in_batch: - all_video_dirs_to_process.extend(video_dirs_in_batch) - processed_batch_dirs.add(batch_dir_path) - else: - logger.info(f"[DRY RUN] Batch directory '{batch_dir_path}' is empty. Would remove it.") - - if all_video_dirs_to_process: - logger.info(f"[DRY RUN] Found {len(all_video_dirs_to_process)} total video director(y/ies) in {len(processed_batch_dirs)} batch(es) to process.") - - # Construct and log the command that would be run - cmd = [ - 's5cmd', '--endpoint-url', s3_endpoint, '--log', 'debug', '--no-verify-ssl', - '--use-list-objects-v1', '--profile', profile_name, '--stat', - '--numworkers', str(concurrency), 'run' - ] - cmd_str = ' '.join(cmd) - - # Construct the commands to be piped - commands_to_pipe = '\n'.join([f"cp \"{dir_path}\" \"{s3_destination}\"" for dir_path in all_video_dirs_to_process]) - - logger.info(f"[DRY RUN] The following command would be executed:\n{cmd_str}") - logger.info(f"[DRY RUN] The following commands would be piped to stdin:\n{commands_to_pipe}") - - if mode == 'mv': - logger.info(f"[DRY RUN] Mode is 'mv'. Would delete {len(processed_batch_dirs)} source batch directories after successful upload.") - - # Pause briefly in dry-run mode if videos are found to avoid a fast, noisy loop. - dry_run_pause_s = 10 - logger.info(f"[DRY RUN] Pausing for {dry_run_pause_s} seconds to prevent rapid re-listing of the same files (this is a short, fixed pause for dry-run only).") - time.sleep(dry_run_pause_s) - continue # Go to the start of the next cycle - else: - logger.info("[DRY RUN] No completed video batches found.") - - # If in dry-run and no videos are found, sleep for the main interval. - logger.info(f"[DRY RUN] Sleeping for {sleep_interval_min} minute(s)...") - time.sleep(sleep_interval_sec) - continue - - # --- Normal Operation Logic (Destructive) --- - work_done_in_cycle = False - try: - # --- 1. Find all videos to upload from all completed batches --- - if not os.path.exists(READY_PATH): - logger.info(f"Ready directory '{READY_PATH}' does not exist. Nothing to upload.") - else: - now = datetime.now() - wait_minutes = params['batch_completion_wait_min'] - cutoff_time = now - timedelta(minutes=wait_minutes) - rounded_minute = (cutoff_time.minute // 10) * 10 - cutoff_batch_ts = cutoff_time.strftime('%Y%m%dT%H') + f"{rounded_minute:02d}" - logger.info(f"Current time is {now.strftime('%H:%M:%S')}. With a {wait_minutes} min wait, processing batches up to and including '{cutoff_batch_ts}'.") - - all_video_dirs_to_process = [] - processed_batch_dirs = set() - all_batch_dirs = sorted([d for d in os.listdir(READY_PATH) if os.path.isdir(os.path.join(READY_PATH, d))]) - - for ts_dir in all_batch_dirs: - if ts_dir > cutoff_batch_ts: - continue # This batch is not old enough to be processed - - batch_dir_path = os.path.join(READY_PATH, ts_dir) - video_dirs_in_batch = [os.path.join(batch_dir_path, d) for d in os.listdir(batch_dir_path) if os.path.isdir(os.path.join(batch_dir_path, d))] - - if not video_dirs_in_batch: - logger.info(f"Batch directory '{batch_dir_path}' is empty. Removing it.") - try: - os.rmdir(batch_dir_path) - except OSError as e: - logger.error(f"Could not remove empty batch directory {batch_dir_path}: {e}") - continue # Move to the next batch - - all_video_dirs_to_process.extend(video_dirs_in_batch) - processed_batch_dirs.add(batch_dir_path) - - # --- 2. Upload All Found Videos in a Single Batch Command --- - if all_video_dirs_to_process: - work_done_in_cycle = True - logger.info(f"Found {len(all_video_dirs_to_process)} total video director(y/ies) in {len(processed_batch_dirs)} batch(es) to upload.") - - cmd = [ - 's5cmd', '--endpoint-url', s3_endpoint, '--log', 'debug', '--no-verify-ssl', - '--use-list-objects-v1', '--profile', profile_name, '--stat', - '--numworkers', str(concurrency), 'run' - ] - cmd_str = ' '.join(cmd) - - # Construct the commands to be piped to stdin - commands_to_pipe = '\n'.join([f"cp \"{dir_path}\" \"{s3_destination}\"" for dir_path in all_video_dirs_to_process]) - - logger.info(f"Executing s5cmd batch command:\n{cmd_str}") - logger.info(f"Piping {len(all_video_dirs_to_process)} 'cp' commands to stdin.") - - upload_start_time = time.time() - process = subprocess.run(cmd, check=True, capture_output=True, text=True, input=commands_to_pipe) - upload_duration = time.time() - upload_start_time - - logger.info(f"s5cmd STDOUT: {process.stdout}") - if process.stderr: - logger.info(f"s5cmd STDERR: {process.stderr}") - logger.info(f"Upload command completed successfully in {upload_duration:.2f} seconds.") - logger.info(f"Successfully copied {len(all_video_dirs_to_process)} director(y/ies) to S3.") - - # --- 3. Cleanup --- - if mode == 'mv': - logger.info(f"Mode is 'mv'. Cleaning up {len(processed_batch_dirs)} source batch director(y/ies).") - cleanup_start_time = time.time() - - # Create a temporary empty directory to use as a source for rsync deletion - empty_dir_for_rsync = os.path.join(READY_PATH, f"__empty_{int(time.time())}") - os.makedirs(empty_dir_for_rsync, exist_ok=True) - - try: - for batch_dir_path in processed_batch_dirs: - try: - # Use rsync with an empty source to efficiently delete the contents of the batch directory. - # This is a performant alternative to `shutil.rmtree`, which can be slow with many small files. - # The trailing slash on both source and destination is important for rsync's behavior. - rsync_cmd = [ - 'rsync', - '-a', '--delete', - f'{empty_dir_for_rsync}/', - f'{batch_dir_path}/' - ] - subprocess.run(rsync_cmd, check=True, capture_output=True, text=True) - - # After the contents are deleted, remove the now-empty directory - os.rmdir(batch_dir_path) - logger.info(f"Successfully removed {batch_dir_path}") - except Exception as cleanup_e: - if isinstance(cleanup_e, OSError) and "Directory not empty" in str(cleanup_e): - # This can happen in a race condition where a download worker adds a new video - # to the batch directory after rsync has emptied it but before rmdir runs. - # We log it as a warning; the directory will be re-processed in the next cycle - # because this task rescans all directories on each run. - logger.warning(f"Could not remove directory {batch_dir_path}, it was not empty: {cleanup_e}. It will be re-processed on the next run.") - else: - logger.error(f"Failed to remove directory {batch_dir_path}: {cleanup_e}", exc_info=True) - if isinstance(cleanup_e, subprocess.CalledProcessError): - logger.error(f"rsync STDERR: {cleanup_e.stderr}") - finally: - # Clean up the temporary empty directory - shutil.rmtree(empty_dir_for_rsync) - - cleanup_duration = time.time() - cleanup_start_time - logger.info(f"Cleanup complete in {cleanup_duration:.2f} seconds.") - else: # mode == 'cp' - logger.info(f"Mode is 'cp'. Source directories will be left for inspection.") - - if not work_done_in_cycle: - logger.info(f"No completed video batches found in '{READY_PATH}'.") - - except Exception as e: - logger.error(f"An error occurred during the S3 upload cycle: {e}", exc_info=True) - if isinstance(e, subprocess.CalledProcessError): - logger.error(f"s5cmd STDERR: {e.stderr}") - # On error, we do NOT clean up, to allow for investigation and retries. - # The failed directories will be picked up in the next cycle. - # Treat errors as "no work done" to trigger sleep and prevent fast failure loops - work_done_in_cycle = False - - # --- Loop Control --- - if not work_done_in_cycle: - logger.info(f"No work done in this cycle. Sleeping for {sleep_interval_min} minute(s)...") - time.sleep(sleep_interval_sec) - else: - logger.info("Work was completed in this cycle. Checking for more immediately.") - -with DAG( - dag_id='ytdlp_s3_uploader', - default_args=DEFAULT_ARGS, - schedule=None, - start_date=days_ago(1), - catchup=False, - tags=['ytdlp', 's3', 'upload'], - doc_md="""### S3 Uploader DAG - -1. This DAG creates dynamic uploader tasks with clear names depicting their worker machine (e.g., `upload_batch_on_dl001`). -2. Ansible updates an Airflow Variable named `s3_worker_hostnames` with a JSON list of all active uploader workers (typically dlXXX machines). Each worker listens to its own queue (e.g., `queue-dl-dl001`). -3. This DAG reads the variable on manual trigger or after a pause/resume cycle to create the dynamic tasks. This allows for easy inspection of per-worker logs and status from the Airflow UI. -4. Each dynamic task watches a shared folder (`/opt/airflow/downloadfiles/videos/ready`). Download workers place completed videos into timestamped sub-folders (e.g., `20241122T1050`). The uploader processes these 10-minute batches, copying them to S3 with `s5cmd` and then deleting the source directories. This design avoids race conditions and improves performance. - -#### Why use 10-minute batch folders? - -While an `mv` command (atomic on the same filesystem) is sufficient to ensure a single video directory is complete when it appears in the `ready` folder, the batching system solves higher-level concurrency and efficiency problems in a high-throughput environment. - -- **Concurrency Management**: The uploader needs to process a discrete *set* of videos. By working on batches from a *previous* time window (e.g., uploading the `10:40` batch after `10:50`), it guarantees that no new files will be added to that batch while it's being processed. This creates a clean, reliable unit of work and prevents the uploader from missing videos that are moved in while it's compiling its list. -- **Bulk Operation Efficiency**: It is far more efficient to upload hundreds of videos in a single bulk command than one by one. The batching system allows videos to accumulate, and the uploader sends them all to S3 in one highly optimized `s5cmd run` command. Similarly, after a successful upload, the uploader can delete the single parent batch directory, which is much faster than deleting hundreds of individual video folders. -- **Continuous Operation**: The uploader task is a long-running loop. If processing a batch takes longer than 10 minutes (e.g., due to a large volume of videos or slow network), the uploader will continue working on that batch until it is complete. It only sleeps when it has processed all available completed batches and is waiting for new ones to become ready. - -#### Cleanup Method: `rsync` vs `shutil.rmtree` - -The cleanup process uses the `rsync` empty-folder trick to delete the contents of the batch directory before removing the directory itself. This is a deliberate performance optimization. The command is effectively: `rsync -a --delete /path/to/empty/ /path/to/delete/`. - -- Python's `shutil.rmtree` can be slow as it makes an individual `os.remove()` system call for every file. -- The `rsync` method is a well-known and highly efficient alternative for this scenario, as `rsync` is a mature C program optimized for these operations. More details on this performance difference can be found here: https://stackoverflow.com/questions/5470939/why-is-shutil-rmtree-so-slow - """, - params={ - 'mode': Param( - 'mv', type="string", enum=['cp', 'mv'], title="Operation Mode", - description="`mv` (move): After a successful upload, the temporary batch directory is deleted. This is the standard behavior. `cp` (copy): The temporary batch directory is left intact for debugging; it will be cleaned up on the next run." - ), - 'dry_run': Param( - True, type="boolean", title="Dry Run", - description="If True, the DAG will perform all steps except the actual upload and cleanup. `s5cmd` will be run with `--dry-run`, and the final directory removal will be skipped. Log messages will indicate what would have happened." - ), - 'concurrency': Param(10, type="integer", title="s5cmd Concurrency"), - 'sleep_if_no_videos_min': Param(5, type="integer", title="Sleep if Idle (minutes)", description="How many minutes the task should sleep if no videos are found to upload. This should be less than any external timeout (e.g., Celery's worker_proc_timeout)."), - 'batch_completion_wait_min': Param(0, type="integer", title="Batch Completion Wait (minutes)", description="How many minutes to wait after a 10-minute batch window closes before considering it for upload. Default is 0, which processes the current batch immediately. A value of 10 restores the old behavior of waiting for the next 10-minute window."), - 's3_conn_id': Param('s3_delivery_connection', type="string", title="S3 Connection ID", description="The Airflow connection ID for the S3-compatible storage. If this connection is invalid or missing, the task will fall back to environment variables."), - 's3_bucket_name': Param( - 'videos', - type="string", - title="S3 Bucket Name", - description="The name of the S3 bucket to upload to. Common values are 'videos' or 'videos-prod'." - ), - } -) as dag: - - # Dynamically create one task per S3 worker hostname - # IMPORTANT: The tasks are created when this DAG file is parsed by the Airflow Scheduler. - # If you add/change the 's3_worker_hostnames' Airflow Variable, you may need to - # wait a few minutes for the scheduler to re-parse the file and update the tasks. - # Forcing a re-parse can be done by pausing and un-pausing the DAG in the UI. - s3_worker_hostnames = [] # Initialize to be safe - try: - # The variable should be a JSON list of strings, e.g., ["s3-001", "s3-002"] - s3_worker_hostnames = Variable.get("s3_worker_hostnames", deserialize_json=True, default_var=[]) - logger.info(f"DAG 'ytdlp_s3_uploader' successfully loaded s3_worker_hostnames variable. Value: {s3_worker_hostnames}") - if not isinstance(s3_worker_hostnames, list): - logger.error(f"Airflow Variable 's3_worker_hostnames' is not a valid JSON list. Value: {s3_worker_hostnames}") - s3_worker_hostnames = [] # Reset to empty to prevent errors - except Exception as e: - logger.error( - f"Could not read or parse Airflow Variable 's3_worker_hostnames'. " - f"Please create it in the Airflow UI as a JSON list of your S3 worker hostnames (e.g., [\"s3-001\"]). " - f"No S3 worker tasks will be created. Error: {e}", - exc_info=True - ) - s3_worker_hostnames = [] - - @task(task_id='check_s3_worker_configuration') - def check_s3_worker_configuration_callable(): - """Logs the current value of the s3_worker_hostnames variable at runtime for debugging.""" - logger.info("--- S3 Worker Configuration Check (at runtime) ---") - try: - hostnames = Variable.get("s3_worker_hostnames", deserialize_json=True, default_var=None) - if hostnames is None: - logger.error("Airflow Variable 's3_worker_hostnames' is not defined.") - logger.info("Please create it in the Airflow UI (Admin -> Variables) as a JSON list of strings, e.g., [\"s3-worker-01\"]") - elif not isinstance(hostnames, list): - logger.error(f"Airflow Variable 's3_worker_hostnames' is not a valid JSON list. Current value: {hostnames}") - elif not hostnames: - logger.warning("Airflow Variable 's3_worker_hostnames' is defined but is an empty list []. No worker tasks will be run.") - else: - logger.info(f"Successfully read 's3_worker_hostnames'. It contains {len(hostnames)} worker(s): {hostnames}") - logger.info("If you see this task but no worker tasks in the UI, it means the DAG did not find these workers when it was parsed by the scheduler.") - logger.info("This can happen due to caching. Please wait a few minutes for the scheduler to re-parse the DAG file, or pause/un-pause the DAG.") - - except Exception as e: - logger.error(f"An error occurred while trying to read the 's3_worker_hostnames' variable at runtime: {e}", exc_info=True) - logger.info("--- End of Configuration Check ---") - - check_s3_worker_configuration_task = check_s3_worker_configuration_callable() - check_s3_worker_configuration_task.doc_md = """ - ### S3 Worker Configuration Check - - This task runs at the start of every DAG run to check the `s3_worker_hostnames` Airflow Variable. - - The dynamic worker tasks are created based on this variable *at the time the DAG is parsed by the scheduler*. - - **Check the logs for this task to see the current value of the variable as read at runtime.** This can help diagnose why worker tasks may not have been created. - - If the logs show the variable is correct but you don't see the worker tasks in the UI, you may need to wait for the scheduler to re-parse the DAG file. You can force this by pausing and un-pausing the DAG. - """ - if s3_worker_hostnames: - worker_tasks = [] - for hostname in s3_worker_hostnames: - # Sanitize hostname for task_id - task_id_hostname = hostname.replace('.', '_') - - # Create a task for each worker, pinned to its specific queue - upload_task = task( - task_id=f'upload_batch_on_{task_id_hostname}', - queue=f'queue-s3-{hostname}', - execution_timeout=timedelta(days=1), - )(run_s3_upload_batch)() - worker_tasks.append(upload_task) - - check_s3_worker_configuration_task >> worker_tasks diff --git a/airflow/generate_envoy_config.py b/airflow/generate_envoy_config.py deleted file mode 100644 index fe9e1bd..0000000 --- a/airflow/generate_envoy_config.py +++ /dev/null @@ -1,287 +0,0 @@ -#!/usr/bin/env python3 -import os -import sys -import json -import re -try: - from jinja2 import Environment, FileSystemLoader -except ImportError: - print("FATAL: jinja2 is not installed. Please run 'pip install jinja2'.", file=sys.stderr) - exit(1) -import logging -import ipaddress -from typing import Optional - -# Configure logging -logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') - -def is_ip_address(address: str) -> bool: - """Checks if a given string is a valid IP address (IPv4 or IPv6).""" - if not address: - return False - try: - ipaddress.ip_address(address) - return True - except ValueError: - return False - - -def load_dotenv(dotenv_path): - """ - Loads environment variables from a .env file. - Does not override existing environment variables from the system. - """ - if not os.path.exists(dotenv_path): - logging.warning(f".env file not found at {dotenv_path}. Using system environment variables or defaults.") - return False - try: - with open(dotenv_path) as f: - for line in f: - line = line.strip() - if line and not line.startswith('#') and '=' in line: - key, value = line.split('=', 1) - key = key.strip() - value = value.strip() - # Remove surrounding quotes which are common in .env files - # Handle both single and double quotes - if (value.startswith('"') and value.endswith('"')) or \ - (value.startswith("'") and value.endswith("'")): - value = value[1:-1] - - # os.environ only takes strings - value = str(value) - - if key not in os.environ: - os.environ[key] = value - logging.info(f"Successfully loaded variables from {dotenv_path}") - return True - except Exception as e: - logging.error(f"Failed to read or parse {dotenv_path}: {e}") - return False - -def _get_port_from_proxy_url(url: str) -> Optional[str]: - """Extracts the port from a proxy URL string.""" - if not url or not isinstance(url, str): - return None - match = re.search(r':(\d+)$', url.strip()) - return match.group(1) if match else None - -def expand_env_vars(value: str) -> str: - """ - Expands environment variables in a string, including default values. - Supports ${VAR} and ${VAR:-default}. - """ - if not isinstance(value, str): - return value - - # Regex to find ${VAR:-default} or ${VAR} - pattern = re.compile(r'\$\{(?P\w+)(?::-(?P.*?))?\}') - - def replacer(match): - var_name = match.group('var') - default_value = match.group('default') - # Get value from os.environ, or use default, or empty string - return os.getenv(var_name, default_value if default_value is not None else '') - - return pattern.sub(replacer, value) - -def generate_configs(): - """ - Generates envoy.yaml, docker-compose.camoufox.yaml, and camoufox_endpoints.json - from Jinja2 templates and environment variables. - """ - try: - # --- Setup Paths --- - # The script runs from /app. Configs and templates are in /app/configs. - project_root = os.path.dirname(os.path.abspath(__file__)) # This will be /app - configs_dir = os.path.join(project_root, 'configs') - - # Load .env from the project root ONLY - no fallback - dotenv_path = os.path.join(project_root, '.env') - logging.info(f"Looking for .env file at: {dotenv_path}") - - if os.path.exists(dotenv_path): - if load_dotenv(dotenv_path): - logging.info(f"Using .env file from: {dotenv_path}") - else: - logging.error(f"Failed to load .env file from: {dotenv_path}") - exit(1) - else: - logging.warning(f".env file not found at {dotenv_path}. Using system environment variables or defaults.") - - # --- Common Configuration --- - ytdlp_workers_str = os.getenv('YTDLP_WORKERS', '3').strip() - try: - # Handle empty string case by defaulting to 3, otherwise convert to int. - worker_count = int(ytdlp_workers_str) if ytdlp_workers_str else 3 - except (ValueError, TypeError): - logging.warning(f"Invalid value for YTDLP_WORKERS: '{ytdlp_workers_str}'. Defaulting to 3.") - worker_count = 3 - - if worker_count == 0: - worker_count = os.cpu_count() or 1 - logging.info(f"YTDLP_WORKERS is 0, auto-detected {worker_count} CPU cores for worker and camoufox config.") - - # The templates are in the 'configs' directory. - env = Environment(loader=FileSystemLoader(configs_dir), trim_blocks=True, lstrip_blocks=True) - # Make the helper function available to Jinja2 templates - env.globals['_get_port_from_proxy_url'] = _get_port_from_proxy_url - - # Get service role from environment to determine what to generate - # Ensure we strip any remaining quotes that might have slipped through - service_role = os.getenv('service_role', 'management') - # Additional stripping of quotes for robustness - if (service_role.startswith('"') and service_role.endswith('"')) or \ - (service_role.startswith("'") and service_role.endswith("'")): - service_role = service_role[1:-1] - - logging.info(f"Service role for generation: '{service_role}'") - - # --- Camoufox Configuration (only for worker/all-in-one roles) --- - logging.info("--- Camoufox (Remote Browser) Configuration ---") - camoufox_proxies = [] - expanded_camoufox_proxies_str = "" - if service_role != 'management': - logging.info("--- Generating Camoufox (Remote Browser) Configuration ---") - camoufox_proxies_str = os.getenv('CAMOUFOX_PROXIES') - if not camoufox_proxies_str: - logging.warning("CAMOUFOX_PROXIES environment variable not set. No camoufox instances will be generated.") - else: - # Expand environment variables within the string before splitting - expanded_camoufox_proxies_str = expand_env_vars(camoufox_proxies_str) - logging.info(f"Expanded CAMOUFOX_PROXIES from '{camoufox_proxies_str}' to '{expanded_camoufox_proxies_str}'") - camoufox_proxies = [{'url': p.strip()} for p in expanded_camoufox_proxies_str.split(',') if p.strip()] - logging.info(f"Found {len(camoufox_proxies)} proxy/proxies for Camoufox.") - logging.info(f"Each Camoufox instance will support {worker_count} concurrent browser sessions.") - logging.info(f"Total browser sessions supported on this worker: {len(camoufox_proxies) * worker_count}") - - - vnc_password = os.getenv('VNC_PASSWORD', 'supersecret') - base_vnc_port = int(os.getenv('CAMOUFOX_BASE_VNC_PORT', 5901)) - camoufox_port = int(os.getenv('CAMOUFOX_PORT', 12345)) - camoufox_backend_prefix = os.getenv('CAMOUFOX_BACKEND_PREFIX', 'camoufox-') - - # --- Generate docker-compose.camoufox.yaml --- - compose_template = env.get_template('docker-compose.camoufox.yaml.j2') - compose_output_file = os.path.join(configs_dir, 'docker-compose.camoufox.yaml') - - camoufox_config_data = { - 'camoufox_proxies': camoufox_proxies, - 'vnc_password': vnc_password, - 'camoufox_port': camoufox_port, - 'worker_count': worker_count, - } - - rendered_compose_config = compose_template.render(camoufox_config_data) - with open(compose_output_file, 'w') as f: - f.write(rendered_compose_config) - - logging.info(f"Successfully generated {compose_output_file} with {len(camoufox_proxies)} camoufox service(s).") - logging.info("This docker-compose file defines the remote browser services, one for each proxy.") - logging.info("----------------------------------------------------------") - - # --- Generate camoufox_endpoints.json --- - endpoints_map = {} - for i, proxy in enumerate(camoufox_proxies): - proxy_port = _get_port_from_proxy_url(proxy['url']) - if proxy_port: - # Use the correct container name pattern that matches the docker-compose template - # The container name in the template is: ytdlp-ops-camoufox-{{ proxy_port }}-{{ loop.index }}-1 - container_name = f"ytdlp-ops-camoufox-{proxy_port}-{i+1}-1" - container_base_port = camoufox_port + i * worker_count - endpoints = [] - for j in range(worker_count): - port = container_base_port + j - endpoints.append(f"ws://{container_name}:{port}/mypath") - - endpoints_map[proxy_port] = { - "ws_endpoints": endpoints - } - else: - logging.warning(f"Could not extract port from proxy URL: {proxy['url']}. Skipping for endpoint map.") - - endpoints_data = {"endpoints": endpoints_map} - # The camoufox directory is at the root of the project context, not under 'airflow'. - # camoufox_dir = os.path.join(project_root, 'camoufox') - # os.makedirs(camoufox_dir, exist_ok=True) - endpoints_output_file = os.path.join(configs_dir, 'camoufox_endpoints.json') - with open(endpoints_output_file, 'w') as f: - json.dump(endpoints_data, f, indent=2) - logging.info(f"Successfully generated {endpoints_output_file} with {len(endpoints_map)} port-keyed endpoint(s).") - logging.info("This file maps each proxy to a list of WebSocket endpoints for Camoufox.") - logging.info("The token_generator uses this map to connect to the correct remote browser.") - else: - logging.info("Skipping Camoufox configuration generation.") - - # --- Generate docker-compose-ytdlp-ops.yaml --- - ytdlp_ops_template = env.get_template('docker-compose-ytdlp-ops.yaml.j2') - ytdlp_ops_output_file = os.path.join(configs_dir, 'docker-compose-ytdlp-ops.yaml') - - # Combine all proxies (camoufox and general) into a single string for the server. - all_proxies = [] - - # Track if we have any explicit proxy configuration - has_explicit_proxies = False - - # Add camoufox proxies if they exist - if expanded_camoufox_proxies_str: - camoufox_proxy_list = [p.strip() for p in expanded_camoufox_proxies_str.split(',') if p.strip()] - all_proxies.extend(camoufox_proxy_list) - if camoufox_proxy_list: - has_explicit_proxies = True - logging.info(f"Added {len(camoufox_proxy_list)} camoufox proxies: {camoufox_proxy_list}") - - - combined_proxies_str = ",".join(all_proxies) - logging.info(f"Combined proxy string for ytdlp-ops-service: '{combined_proxies_str}'") - - ytdlp_ops_config_data = { - 'combined_proxies_str': combined_proxies_str, - 'service_role': service_role, - 'camoufox_proxies': camoufox_proxies, - } - rendered_ytdlp_ops_config = ytdlp_ops_template.render(ytdlp_ops_config_data) - with open(ytdlp_ops_output_file, 'w') as f: - f.write(rendered_ytdlp_ops_config) - logging.info(f"Successfully generated {ytdlp_ops_output_file}") - - # --- Envoy Configuration --- - envoy_port = int(os.getenv('ENVOY_PORT', 9080)) - base_port = int(os.getenv('YTDLP_BASE_PORT', 9090)) - envoy_admin_port = int(os.getenv('ENVOY_ADMIN_PORT', 9901)) - # For local dev, ENVOY_BACKEND_ADDRESS is set to 127.0.0.1. For Docker, it's unset, so we default to the service name. - backend_address = os.getenv('ENVOY_BACKEND_ADDRESS', 'ytdlp-ops-service') - # Use STATIC for IP addresses, and STRICT_DNS for anything else (hostnames). - envoy_cluster_type = 'STATIC' if is_ip_address(backend_address) else 'STRICT_DNS' - - # --- Generate envoy.yaml --- - envoy_template = env.get_template('envoy.yaml.j2') - # Output envoy.yaml to the configs directory, where other generated files are. - envoy_output_file = os.path.join(configs_dir, 'envoy.yaml') - - logging.info("--- Generating Envoy Configuration ---") - logging.info(f"Envoy will listen on public port: {envoy_port}") - logging.info(f"It will load balance requests across {worker_count} internal gRPC endpoints of the 'ytdlp-ops-service'.") - logging.info(f"The backend service is located at: '{backend_address}' (type: {envoy_cluster_type})") - - envoy_config_data = { - 'envoy_port': envoy_port, - 'worker_count': worker_count, - 'base_port': base_port, - 'envoy_admin_port': envoy_admin_port, - 'backend_address': backend_address, - 'envoy_cluster_type': envoy_cluster_type, - } - - rendered_envoy_config = envoy_template.render(envoy_config_data) - with open(envoy_output_file, 'w') as f: - f.write(rendered_envoy_config) - logging.info(f"Successfully generated {envoy_output_file}") - logging.info("--- Configuration Generation Complete ---") - - except Exception as e: - logging.error(f"Failed to generate configurations: {e}", exc_info=True) - exit(1) - -if __name__ == '__main__': - generate_configs() diff --git a/airflow/inputfiles/urls.dh128.json b/airflow/inputfiles/urls.dh128.json deleted file mode 100644 index d3b40cd..0000000 --- a/airflow/inputfiles/urls.dh128.json +++ /dev/null @@ -1,130 +0,0 @@ -[ -"https://www.youtube.com/watch?v=EH81MQiDyFs", -"https://www.youtube.com/watch?v=YwC2VtRFBPs", -"https://www.youtube.com/watch?v=keSo7x42Xis", -"https://www.youtube.com/watch?v=K6OlxDi1cws", -"https://www.youtube.com/watch?v=eIYjjvR_k6w", -"https://www.youtube.com/watch?v=CprKmvtw-TE", -"https://www.youtube.com/watch?v=4vB1bDJ8dvA", -"https://www.youtube.com/watch?v=kJcvr693bjI", -"https://www.youtube.com/watch?v=NPQz5Hn6XKM", -"https://www.youtube.com/watch?v=DCo-7dCw2OY", -"https://www.youtube.com/watch?v=Q0996ndUMxU", -"https://www.youtube.com/watch?v=IxbFckR3yIc", -"https://www.youtube.com/watch?v=xt5QQgEqVzs", -"https://www.youtube.com/watch?v=L9pzC26i3BU", -"https://www.youtube.com/watch?v=YlkzSAqV0jE", -"https://www.youtube.com/watch?v=v9ZxQw3NQA8", -"https://www.youtube.com/watch?v=EB_eBvRsGqM", -"https://www.youtube.com/watch?v=xJ4PHYU3oY4", -"https://www.youtube.com/watch?v=kHf-eCb7q2I", -"https://www.youtube.com/watch?v=q3hNcqo5qdY", -"https://www.youtube.com/watch?v=097ujVv38LU", -"https://www.youtube.com/watch?v=VYnzo8xa_dw", -"https://www.youtube.com/watch?v=2y690c69yb4", -"https://www.youtube.com/watch?v=R_JiPanFbEs", -"https://www.youtube.com/watch?v=_VF9sk-IjOE", -"https://www.youtube.com/watch?v=01yS1dPQsZc", -"https://www.youtube.com/watch?v=0xW7slvHwiU", -"https://www.youtube.com/watch?v=qeeC7i5HTpU", -"https://www.youtube.com/watch?v=McvQBwZ_MfY", -"https://www.youtube.com/watch?v=ssQ456jGiKs", -"https://www.youtube.com/watch?v=Xz84juOdgVY", -"https://www.youtube.com/watch?v=6jw_rFi75YA", -"https://www.youtube.com/watch?v=XVtwjyQESLI", -"https://www.youtube.com/watch?v=GCuRuMZG2CU", -"https://www.youtube.com/watch?v=SLGT3nSHjKY", -"https://www.youtube.com/watch?v=KfXZckcDnwc", -"https://www.youtube.com/watch?v=krlijOR_314", -"https://www.youtube.com/watch?v=c5TIIXZTWYU", -"https://www.youtube.com/watch?v=xbFlak2wDPU", -"https://www.youtube.com/watch?v=ESiCVT43y4M", -"https://www.youtube.com/watch?v=9K-8HK9NGPo", -"https://www.youtube.com/watch?v=AXfq7U9EHHY", -"https://www.youtube.com/watch?v=oWGeLLFTwhk", -"https://www.youtube.com/watch?v=dGTid_QDq3M", -"https://www.youtube.com/watch?v=s2GdkHY7e74", -"https://www.youtube.com/watch?v=EYRnywNSHfM", -"https://www.youtube.com/watch?v=8QcanJptlFs", -"https://www.youtube.com/watch?v=8_B0MrjTDqw", -"https://www.youtube.com/watch?v=2LealZ7TTlY", -"https://www.youtube.com/watch?v=dtBosQzUqDs", -"https://www.youtube.com/watch?v=PuQwOWigWVA", -"https://www.youtube.com/watch?v=LOlVXM27ap8", -"https://www.youtube.com/watch?v=JtgKbx6nm7I", -"https://www.youtube.com/watch?v=owFxod3Pe70", -"https://www.youtube.com/watch?v=dmBpn2ZjNW4", -"https://www.youtube.com/watch?v=7Do8GAKRFsw", -"https://www.youtube.com/watch?v=7oysSz1unf0", -"https://www.youtube.com/watch?v=Z4Wn7qrR0nU", -"https://www.youtube.com/watch?v=wvgwnY0x6wo", -"https://www.youtube.com/watch?v=qUGZg985hqA", -"https://www.youtube.com/watch?v=pWvyocl7dhI", -"https://www.youtube.com/watch?v=BMzSz3aiBFU", -"https://www.youtube.com/watch?v=mgOGXUctR8U", -"https://www.youtube.com/watch?v=1rIhg0Z-Ylo", -"https://www.youtube.com/watch?v=K4hj2aQ8vCM", -"https://www.youtube.com/watch?v=jzMt0J7eohg", -"https://www.youtube.com/watch?v=LeYfSHB1zZw", -"https://www.youtube.com/watch?v=hBS3QbVFHQk", -"https://www.youtube.com/watch?v=2mBdZZm8Syo", -"https://www.youtube.com/watch?v=zaZE_AHeRIc", -"https://www.youtube.com/watch?v=DBod4x5OZsM", -"https://www.youtube.com/watch?v=lNYnMLhMMNc", -"https://www.youtube.com/watch?v=Feo_5sWRjY0", -"https://www.youtube.com/watch?v=tYWLm75nibA", -"https://www.youtube.com/watch?v=xx1HYybZDH0", -"https://www.youtube.com/watch?v=EyIY0BKYIrA", -"https://www.youtube.com/watch?v=BfAoe4GbKt4", -"https://www.youtube.com/watch?v=qmizxZdHB7A", -"https://www.youtube.com/watch?v=7K73KytWJR4", -"https://www.youtube.com/watch?v=hPyi-EnO_Dw", -"https://www.youtube.com/watch?v=M4Gp7eMj2IQ", -"https://www.youtube.com/watch?v=rPOOnshXEOk", -"https://www.youtube.com/watch?v=fmOB4FNj4MM", -"https://www.youtube.com/watch?v=UgwjPBJ-iyA", -"https://www.youtube.com/watch?v=tInqj66fkxc", -"https://www.youtube.com/watch?v=tok-jMC1V0E", -"https://www.youtube.com/watch?v=2IuaROF1pMs", -"https://www.youtube.com/watch?v=Ak5JpqBA5No", -"https://www.youtube.com/watch?v=A_yH2vzq7CY", -"https://www.youtube.com/watch?v=4nzsI5fxdlA", -"https://www.youtube.com/watch?v=1FfwsJInFOM", -"https://www.youtube.com/watch?v=uRjJbkgf_3I", -"https://www.youtube.com/watch?v=HMjduefTG4E", -"https://www.youtube.com/watch?v=Cw9hUSFppnw", -"https://www.youtube.com/watch?v=vrobF1L3BJ8", -"https://www.youtube.com/watch?v=tIiVUsKPCEY", -"https://www.youtube.com/watch?v=7qprIRCTX6A", -"https://www.youtube.com/watch?v=HREKaNF7TT8", -"https://www.youtube.com/watch?v=xlIgqZ1sW5A", -"https://www.youtube.com/watch?v=6_uA0osze4w", -"https://www.youtube.com/watch?v=jarbK6tvflw", -"https://www.youtube.com/watch?v=RWmeSE312FA", -"https://www.youtube.com/watch?v=hhI7lAonIrU", -"https://www.youtube.com/watch?v=4k23-uYPObU", -"https://www.youtube.com/watch?v=rIxiOD0dA3w", -"https://www.youtube.com/watch?v=Ry-_mpn3Pe8", -"https://www.youtube.com/watch?v=m-H4fOb1o2Q", -"https://www.youtube.com/watch?v=NhGxI_tgSwI", -"https://www.youtube.com/watch?v=VTslivtVfAI", -"https://www.youtube.com/watch?v=huSCDYe04Fk", -"https://www.youtube.com/watch?v=LF82qA5a05E", -"https://www.youtube.com/watch?v=kHaHsbFg28M", -"https://www.youtube.com/watch?v=NKDFri_kL94", -"https://www.youtube.com/watch?v=BPIlpDQwWqA", -"https://www.youtube.com/watch?v=UTCAshkc8qk", -"https://www.youtube.com/watch?v=EkUtGGKaX_I", -"https://www.youtube.com/watch?v=tuLyfqdpYxU", -"https://www.youtube.com/watch?v=snxBL-8IGCA", -"https://www.youtube.com/watch?v=Mo9m8EdR8_Y", -"https://www.youtube.com/watch?v=5nBipdnGAbU", -"https://www.youtube.com/watch?v=sLs6vp5TH_w", -"https://www.youtube.com/watch?v=OYM5PrQtT34", -"https://www.youtube.com/watch?v=FX3wjgGWn1s", -"https://www.youtube.com/watch?v=1FfwsJInFOM", -"https://www.youtube.com/watch?v=osWMBc6h5Rs", -"https://www.youtube.com/watch?v=aojc0sLBm5Y", -"https://www.youtube.com/watch?v=akf_6pAx024", -"https://www.youtube.com/watch?v=SgSkvKpAxMQ" -] diff --git a/airflow/inputfiles/urls.ixbt2045.json b/airflow/inputfiles/urls.ixbt2045.json deleted file mode 100644 index d1fd377..0000000 --- a/airflow/inputfiles/urls.ixbt2045.json +++ /dev/null @@ -1 +0,0 @@ -["p1B92pjO2Vc","062lUDkVh_k","STuhwk3ajkU","w98BKfN3A48","YvKNperw76A","nJCLbOKji_Q","V4UEWTdaOJw","8r6GHBBNcEI","j__dvM7_ZIc","ER9JxIEzI7Q","Y0cxl-UWoKo","uIbeVuHi6_4","m4qVjxEKjHU","bM4nVx7lfUU","8uHBdeUR8Qg","vAk8Vko8rVg","OHh-TcBH0os","EDSz_vvz6rI","l7HLon3qQwI","ws8kt_5_ZqA","-4K8Epoxxls","-75hacRy9WI","BkeHIe2kSyY","64AMj8Upl-o","iC73HvJm2YU","0EIPzRB8XJQ","yzq7fCeVtwk","RLeg-90lNpw","ah7abb-HNvY","I1zmulStbf0","3pS5tsK6Sl8","lfaYb7GzODw","PTm1Qr33kc4","rv2BrcuCQcQ","YYbCe68z0MI","5mmFqyc6boo","zxGdR0JvKVk","H4tsNz_LsIg","QuImRih_qow","7-6L6pBqZrM","cDEMt5CUgQo","y-XWkmtSou0","QyclJG3U5Uk","YsMb1sco0l4","hKJyUPMODCU","yRQbu0WqR4g","6GdjD0SHPx4","EBi18KTn3-c","s_cgQeSYxuo","OVlxRdcciXg","ZMXhiDvSQ4g","0jjE0sFExzo","LZknJU-ogW8","my6VP20M5iQ","cwc6BY6j-7M","luN-tRLZC24","hT5aMPOECIA","jbOmf3KbMD8","XRL8IS66LCs","addGY10XZ1Q","wf8ruFMRVnE","LYCvxtimpQs","tV9_2SLzVnk","90Nc6mfh-vU","qrxORAOfbhc","11gLQbEhtr8","hZgNN1fcYuY","C3e6l3Wvm0I","GLBp15LyCzc","Wuw3soXf9-0","dqOl6iwH3RI","rTBtMk2Wjho","buJOsdIVYfo","yZDg0uuIS7s","aqMc-5wC9Bo","RFx4lpTEznU","oxYDUR7qZfo","Oh8IEwx4Byw","5qVYbbl_aO4","rbIlyK3EJbs","HV5RaA2EWsM","HxgoxTSNblg","47NhE9wcKRo","4jy9eMyJItQ","G398qrHoZ1M","bseEc5u_dW4","3Ef01OKKbPo","pGLb8KRyoCA","QmN58B_FobI","6OnLDUR7rUI","HBki9u50eys","qmeTQsBqPtw","TLBV9daxuss","F6X8gWfPCRw","qX53pkjfRuk","TsZ_NHFkDJI","1UId9Wgtr4U","q1fsfHyKbQg","u6kJQm7wjpM","Cs9Z3NgVDPA","AI-3mQy71CU","h9r3DdDDAAI","HiH9giXE9XY","S8s2SNpX0w0","2_bYIgnkZvw","eIihrN5VcdE","beix_aMbhH4","2DIu6vq_SJY","Pu6Mq7jEb24","RW1wuKG3UVU","1JQpXZxuLw4","InjxTddLJ2A","KPuxhTBXQf4","4jWbs5-jofA","3xBdJHvjedc","grCLAx00FlM","StKI6p_Hs8I","R6EC3XM0wow","kGBV2xzdm5A","mDh7GzoDAGs","NZ-FodQy-Ho","KE7B-3KKUaU","krKXm7y0eMI","n2v1OHW91BU","On5ceeJeYNU","9s1YmtsH5hE","dDUWGsfBkdI","ys0qTrxyp1g","FLIVACWYes8","s5FY1_e3Bp0","Vc0-inWGIdI","tweok2IyGLg","cWG6dFwJLSM","PbJZRhj7yI0","PSYoRmru9A4","odLZUTPUp8g","-Q-9TzJEsvc","pN7_Qgr5TmA","zeO06Zi2abA","EbPp2YzddJs","dp72kHM192g","HStRYYIQlxQ","bXxcVaDLE9k","uYRjkvyT_Q4","FEmxgR5F5Pc","me8P3OUTTJk","YNRW_Fzspic","BsScWWJRBlg","pco1Nur8dr0","J-eImTuwzOs","Q3DwXQrQbP8","leHW45pTc6k","266bnNmTzrY","EwJi9jyvsB0","ufv6yCt8AzM","Uo7OTxtQs7I","PxpxKu3NXTg","SCGcwDMJzB4","IWngwMjdFvg","AqxT3zw5z2U","YjeTdUEW5uA","BNqMbucspTs","K47b0CRlTjw","5jcb7Jy452o","7FXXZDndGck","TrlH_jLqAQg","X57oxNG7FNg","A1Cs1IJLii8","2ND3BUKx0DU","TckToeQkCEk","RLKQDUrEqIY","x6sfqz7L-cw","M5k6WJosX2o","9rSk8kiJvAQ","lxyGCAGIV5k","re4-zG39y1Q","xFcH_CovL2o","7bzJ1pDeP80","Ak_uSvQ8za8","3O1jJEKaKeQ","noevAR9Gjjg","ydueQz9eNIY","V_voBpqgBgE","cpmuZOSsX8Q","-3aZyO_Se1Y","WQRIeplEIbU","O7ZDFzXV7eo","c4ZlnKOKTeo","mxCt34nAGvc","Iwh0W8m85lA","8U93dyGAbiU","IkbO6BJRp54","YKf30Wi8Ds4","HsQa-O3jEOk","rTCi5lK4YVk","ab4BQaf5JHs","yfNEVuWC3Mc","H74SLPAyTv4","VXzHKnoY7x4","xUcRCgSb1jQ","sRWWcEzz8NA","dPVB3J_JkYA","AyIw-mxYS40","XPMOYanFDAM","xSSz23FlNLk","1bK3xBCFads","OhXWqpQvPrs","yUOcSez7Vhw","BdZpT__gusI","hBXZbgj6FxY","abQwWtLOBIc","Pb7ktCHcJbQ","39FeRy3JB_c","_L2ifxvVEh4","nvppfICXXRg","nuq_BtRqeNI","Ce8Ppb-FPgE","8BAEs2vsg5w","gxAhy4hmlvc","6xCqPJ0UbzI","2c67M6FYgog","t-QyLeUP7F4","nWkSP4m3D7Y","qSpC1zPcmx0","wi8gS9J-LV0","xsvHxL1NVng","mjrn_sqvN54","14WgEHdRSfc","DLX3ygCIbsM","_18rJvhqihI","imE5eHSqOQY","FXYH5lMH3J8","nQuG9pj1bow","o7jlrpLY8x8","RmkZS9wcGQM","0FKIGnKtIFU","a_qo7bHddNs","PWMEu0AjPmc","Th1Om6paxOM","dBvT-qmgbW4","isCDdF9JuaM","sz78ugPEYM8","2wKOAuSYxSk","wzQFmTYoBvc","p0NgUiauS4E","3_F4oeNb_UM","XFP80_sFtzA","pM4tCnvafeM","rXKckuaAVLU","qr0TWE0rbtI","7-dKQ9Hrs_g","GIBZ-QXfbmU","r_ic443Xmpw","hZF2VFetWP8","Y0HkCSP_8IM","Koq60NxcO8I","EivvuvVMTZk","kKiep49BAMk","fmZO9vMgBOU","lTvdS_DND9o","ge7ap_9098s","M_BzLcg1NUg","nj0goMgGvWg","jn4joZgRctk","2qSF-LLN88Q","yYYvGmFtgO0","uLuxgHWmupU","oCp4tj_buMA","ewf_NOWL6m0","U3pxUh1p2-8","-11_uCF5xB8","INyVgd463Mo","by65uwfdDBQ","ucu5ZdVz6GY","FSfgX7rQavk","OWWJk2vc9xg","Nj1DvfQK83Q","JWpgi_mTRp4","HuQ2Tg7wytA","uwgRydrTFRI","c-6uCYfMSWg","db_sGzvZOKo","cD8N6Kik774","qyIFcRHzjNU","WMNVePeAyKM","zOd54z0PO-I","iyWgWbTJ1MA","TRUxMRnY0-A","0NNoKLUUixM","qZqkuZ2ELRo","9o5wULkhlo4","3KcDyMmM-e8","Q7CzMxAbRG8","ahqc3ytRa4c","DlImlawJ-hk","x52WaZe9wwA","eCLzqbpCVnU","Krt1pPvGIOw","_80PbZVjQQg","c0cVGTVVnnA","oRhoC9yAsm0","LkT4KXRRQAY","YEbVx0PZCQ8","inaFGvLbhKE","UIvat65UMjA","Ed_-i9Ho-CI","s_ehnN2bPHQ","9CWsOfIUMBE","fYAVGDpNpZ8","DG8cxBMgPys","Hlf9fMTGKVk","aqEu_dOsd8o","BV2q0eZRpoA","0v-F944AC2s","nOuOn42LSx0","n7bZaRl4U2o","yMMYkONKCjs","eRtmFxpDT90","XBpCB8r-99M","Zotdc_gc4os","6VjzS4e2YQ0","97AhxKf-sbk","aVktEHKIfpQ","6I8KpmCNrPM","Em8cb4EzACs","yRr7AtI02IE","QLmnEKnKiBI","OYp2xR-nKRA","Vqya_9xHpms","ZyMyD4hO4gQ","KWqVTxXH464","Os2ZmnLtuLE","-UUamr5vBpU","VQ1n6qOc8AE","2awV9P83kS0","SPWM895T_QA","6GnzajuojUc","1TN2bUPDUwU","6ISrb3eHVtE","21wU6187_3w","4V_l0423q00","-n4RnPbXNk0","dzyEYh4fhyA","Yz5_iHv5Mwo","Th4UG034jNs","abwXKTHsbhE","j9YtoG9Zp4s","yP-hZ1iN4eQ","ERpLbAlgAro","dtNc2d-PJ_k","NMsTD2RlTno","orQ1ym0wLkY","tChdrPK8sTs","iABNU-OcwnI","s9w-FAcW-5c","kmcflbCdG0c","VszslhiecWo","oigM5UoYS0Q","QnRyyFkLjvE","SOLKzEDiMmc","vnJ_6_1Jrtw","QTwojdrABtE","00nTwvKMLFs","pov2DAZMr-A","jn1drf0TQoI","Y_oP6oZRrU0","XgKukxMwSEs","GJVxU1UMx5o","CBxtShN271c","_LaWHnDfJwk","WYlBTk72umo","DzNxmsOhhyI","_Utk8UrmaF4","SKP1cYqJu1o","uQDzW-PUmVc","hIhQHMiT898","G33gwYvMNX8","xQ-2v-dc7og","Ogjs69WdJ84","smlHuGyUm_w","SAsuNdgveOU","7LFK8846YdA","CeF6scWzlKw","FfRFvkooSUQ","YSrRF6oDzCQ","gmCCijhXjOE","wcpYvV-zBHg","jPC4sdzqvkw","HHjnsP_YqZk","1CIiv_z8qKY","SSEcHW6vr0w","EsJOgJ9QUj0","esX4xutFSH8","fe-m24KTyR0","TcGXr5U2fok","ifiCagmNUVk","mHXNGapI6yA","2gb7FodM9-g","OoGwCSKIUjM","bicxck-s0Q0","byghDTq-KXs","Pll4_rk8rjI","t7C1DF4wOlc","DC1_-UjdKYk","6CNfXQKnfwI","JkgI0lrTnmM","MwNZ2uuHgSQ","JW_ZYEcKVWg","JNxygGL30nw","NXuQbN2zDfI","w-wnvWRpUg0","zP_6EUQJCFs","z9LKyZM6lvU","fnhTdbSzbsw","Im9o2dlsIFc","owO2Hg8TFEo","OZOAZNVSHW8","mz8EX7Lncu8","sBtCoz2ctjw","dL2OXxxj2ME","2OSV4VzrLrg","9xwXpmEk8gs","_BzEXp-1V8k","SxVxhtxO8zQ","tjmxil7CQBU","hfzdfiD_1AA","6T2SChbKuy8","UjB0z20Nawk","cs0HGblZrZ0","93DcnDcdzTA","mYAjKp6h4yg","Rhmwlv5tYs8","4PZ4RJxbzrQ","F1RFxEFVA7g","neWAy6JclI8","tvvZOv21hGI","agAaW9jhVl4","xIr8xSIb-NQ","zwAaB-tq0hg","TZfywT8GMvs","OyJxeTdxd4g","6ku3c-j74CY","u18wSmo5f6Y","3o_Us45MrAQ","oU0jsZq57o8","1q-iV00pAVc","AI_sHMLnWZQ","X_0G2NuRqKg","EIqmU4sINtg","Nz7pIdeZkYY","mhgpeoQ3UKs","gndN4u99jFo","fVk5vDCGtQA","I0mtoxYHJTM","zLHVOozxN9Y","iIXo7CdN544","RBKtimBbwUI","4IR-E6TbDFY","ZFyWypaGzSw","WrPIQTEuOA4","SMB26Zs3jhE","VFzC_IdOm5w","bFFOpCtAztc","3jJnESz_BNg","HEE8EFriZiA","u9UMswzoVBE","AnTvhWmv7rQ","rmqwVZcCNI0","klfmcXbgbY0","aA5nazQz9p0","O7GBZZhY1u0","hhHEanxLTOA","mRC7cv1EZvI","WjPrizhmKoA","O3n73S_EskY","4FFZwp2sZMg","KfnihO9i_hw","JSOLFnA2f7k","9i0sgRpGtpg","nJqnBlR_log","T0tL9KRx9ls","eybwTEMRBrk","sIX6GdMlIvs","D8E9O1y6Lcc","zpm68zdUgqo","PfEDTcZIFNg","G2Jm3Ex6Mnk","OsF2qu6HQiE","59hyySMHiWM","Gm6ntr-Kc_E","j-9Y5Qk9Yo0","C3rZd_WZOIg","-QP77ppCgq8","5LcEflmtysA","ivp5UXI8_do","8IB6ofOMDDI","X442qXctKkI","HFrZqJHCy1c","h7E_HqeXayU","U46WhnsOVRE","Mkt79aNU3QQ","TeKwlhlEsbg","WVDSnlUDs1I","_guG9_R1WF0","Zlbhk7NTNdY","gOCm0Quvx6E","FlYYquJbHzk","JhOtliUso6Y","nvThGfRYMzA","eHTW6vQ1vJE","ZC7JGyf0l_Q","0nwF0Ov8x3o","yubcrWsBsYs","yJ9BN17YUVA","GWxTtP34L7M","wYvh3O-y100","GkI5B8I5Q_8","jBGkVF2eyb4","ddEmigKT9zo","suYLijcBOzs","ihGPLBNErPk","7UtMcrpf9Hg","YaywnCVU93U","qhKs2dDVVGc","Dy4UiAHlmns","zMrj6N_tm0c","e8VzICkwK2k","5Wv1zEo9t3o","PkUQT0n9ySw","cL4ZC56-sW0","UYGYbqA4SNs","sF96zUvXitE","bj3FHA0j2Nc","lh9X2gs0tbE","T6cmpoFO2Xs","BxI-ljAZStQ","mYsDMDa8itk","AOg7OzvXmkc","QSJ50finszc","AI3h73BjixM","k5y95rwNEdo","RhKeLs28nrQ","E6w3dprMRC4","6N19xXQAwcs","joML3SVjUSY","W3C0f6H5ls0","rdABNMUAvDE","csDtgjL0xIc","QB4HIxG-OJE","9LZdPS2vor8","Al8tuzyqa3o","TZqKD76Edtg","_16U5WQ7OvA","PCSCu9VuBh4","DzZF6bQjG6c","iaBJUZmjBB8","zRYNxOPIh14","_Yyk21ph87I","_jzonzBoG98","dMzT1W4PK3E","q9VMuog02Rw","eotnBNzyMDo","uDWLLE2c8nw","WRp0lleIp6o","6iAn2EanHJ4","TF1_WqLZXIk","rh2DAzMQfpc","iy-AkqIwf68","nwkgu0PxVXI","0MiLw6VNIZI","X4-jzfJTgzg","WB7ZVsdpGbo","Rpke09Mvg9I","HHK_iXJeK0U","zP5L4IPjxI4","n5SbkVHW3mQ","vMCYyjZr7d8","hVh3utBOMYE","oLkV4zSFq0Y","kixnk2MEm4k","K9uirAe6ExA","G0SeoByLE-k","niv2GfyhQos","IGKs91fIJUI","-CaJo8jFUvc","EvqQGEZZftk","ZJe6ucsmlkg","SbDLxYZyhIE","HDltg6vAjbM","CIr65btI_Kg","8bb7WhNVvNU","XbL5IkGyaks","4AJdWazYoxI","UbUS386J4gY","5UO7Msxc9g0","3Q8sYpulfdo","CX6w1qiqaKk","iW8DxvGUV5k","6Efy6IjgavA","N6cXJRMNw2U","azsrd-J8vTM","fH26pD01wTQ","NhUnf1tWKcg","w_5A3Ic1fBk","4DtQj8z_bR4","VJLs1azEseY","vS6Yq3hlycQ","1QhmGxw-Hvk","eQpA0x9xNY8","ZNfvVwhf33U","ppZLA9sMb-4","nz2iCBN96nk","RxCigUb8le4","P7i0jiW0UYY","4B-quRt0ZTY","tw5fkFD9G_8","IXoJu4_KAKI","TvRG9JneQ6c","vcspoU9gY-Q","JyanmOgb0pw","95DUuZoWn60","v27FSP6L7-M","RkPf2vMwDcI","8WBh25yNR6s","UqOFfhmQwxU","ufyEed9-njA","rMRH_5yjns8","OpmrbG495a4","3Vbh6WTW-Gg","2x4KbMJFRjM","h7nEKdGUvfQ","lOA5XisEoDM","6HouxzE1p-A","HsYjS-ZIKdc","w-x9eDzh0_Y","geoxsVRdlxQ","3Lj36dgR5lU","9D5_dqYbd50","62sLJqKbHoc","3r5j8V3-Oq0","Uay2gZZcbpk","OFY1QcoEAAE","lPyOzWB9wbg","Zdl67eOtIbU","XZmjFenjBsA","XbEYt-cj0qA","i4XZYWt9G2s","mDzrssg3I-w","dAOhKx-tprQ","oiVMxEoVyiw","tLGI_zwm9g0","mzlglxjkCYk","WHfPO8SGJu0","ju7NvZc-gEg","SF98mDyLLhU","ZxYtph4VBCI","duZBz6VV1DU","QkyoUjJt6aY","Os955e-J1ig","YiUJViZD-uE","agza-BF1FC0","evjumTW2QEs","0aRQYttOpog","1Lwpil7IOrI","zs2r-Etqfo4","LBXgWHNQrps","dS5PplN5ynE","DGd5urkBX3U","IjGASi6s5OI","UqN79GMESLI","lk_fCHLAfns","3avyjdtcB1o","lhEiOxdvmg0","lZJ1fTqe-Fo","WozHmZQdYuI","7Z5ul4lyi84","gyWrazme61g","XOzED6QtOGI","Y28af_iPHd4","LIMzn6hk-SQ","-Kk7KaHhHvE","qTrd_8zb79g","vGizD-q8mlA","Ph2zwRz-uY8","ps7GVUHf7nI","LzIXj_5tAW0","UpNhyj8FgsM","JbqJ2YsHY40","WtoRblP8ynk","kw4SILzw3p4","LXv9_cSKQaw","OiJhIi0t3Oc","uYO1PL5XwOU","Yqbxh2W8GWE","7AEeiW3WCpA","jHJAOe5wmy8","txrEBm1-FUo","BpkXzdxLZXk","8BaNOHTPoDI","yK4rKukCCio","xoyDohlpOu8","F-ISHb7A4fw","2MdCZ9u4nis","tb6mrQOHr5E","GmxAtRA5HN0","cjOYmhjItzs","wpTGy8FfXCs","jUdseSriQFY","BVlX0g0QmL8","JyK03mDYyzU","O3jUgZA6loU","upUym6j17ok","oLUhdyzBoAg","B_5665Fp45I","RjZHJRs8r8k","KeQWxVeUYi4","_VGTw-Ug9Eo","_yYDfbEmmYQ","xqggovJvhqw","4iHXuMOUOI8","qmmZaiCnrb8","o3lbszHxgf8","Yy_m44vlXBk","0zxQ3tkpeiQ","ZpwusZW1d8k","lygtU-Z76FE","4MFR3uZQSlg","Sg0YaAN_2eE","-LanMHfat8A","dYWw9L4wV-8","b_s-M1OWKvU","iSRfQdDBjPs","VszAO9ZIvGE","HcunpoMzAVw","1i9xKXMh09Q","2ZX-2UhWg2E","iDKqAkS2zrU","Ww6tM2f9g98","VpwwYl7nGsQ","BlU3zJzGZzU","UGrCJX01pYM","cvcVqyltmug","TQPLZCpB5ZM","7MHBT8yHGuE","xFxa9AR7kYA","6jUFnZGJ54U","bKcXBUIgNMk","XxCHNSsJtEo","sGK6mq_gOvY","yW002WCNrU0","R5BJGCE401Y","TK9vaktGzYI","Z9FSEodDBsk","EiqWvN4oce8","qaSUdCthgGw","D_STVxyXAE8","vWPabtfVKHA","4DjirKanLhc","V47qpErzMQw","VS0cihr4ujk","lFVgJEYL65M","PGvuTD-y-O0","rfCJySTApwg","SeVsUAk0iVw","VQtYbZwQoDw","iEQ8brOjMIo","veHq81zGh00","vBZok7LeLF8","z1sfhKYrFB0","qO3Im1M259U","D57QUuZ_XSI","qnyvK4-Sg_E","GWjrGiukFOA","ee3KaUGjk9c","C1RN79rzY0c","vuKQYrWJE58","6wVPKMDGaNU","HFtP--9rd64","wvWpXSU_W-o","BAqvPIDdva0","rX_cZo0-EPM","PcMSlnFXSJs","3-JfWjxvYw8","LLvnqC-Jq0A","qxSQ343mGjw","88sR7ntH0zk","IaufNUh5SRc","uiEOdX1dfEk","Xhd2po39r7o","LmuZI8sCgNo","PVntaWe8IZY","gt3MJr9ThrA","Voai7BHCZiU","5Ax_L1eY6Ck","feHVXMtlL3E","qugxaGaVjLo","mpdpVYJREl4","GV9y8u5O5Cw","tk1zin6mPtE","DrQEGC8Y_As","-WiVpDD0Ofs","E95B52loV1g","6qvevu9oq7g","1q4xavkd8fI","_Zj6Cc6CfFs","hiLZMRlsTNA","iUKnvk_ZnFw","xRzSdunJAzE","fRsDmYDmQm0","PPPj5gDIoQ4","cBPZ10uOp5M","0N455FApQMY","i505HvohU_I","rLDj1YXsXhU","kXbLh9Cyo7E","jfmRgLpwMqA","2akMvCny0QY","VSeeYsY1bcU","AovJEadlWMg","LrO-lwgH1LQ","V7FewtfNT1c","xfaBqRIWPQA","zLs9pp8oPHE","6QhlB3yhC_Y","NpEjzgZhyYw","bmw74B5o90U","lmx3m9dAiEE","Eed9-ovgpsw","oMIatfIufmQ","uLrJ7JHPCcA","c_IWg7fXu88","PccWoNclI4M","OSO67L55gUs","q04xHbyRTAg","levZl_W0Io8","8Sy40HVAB_M","tnidaWvnJwM","OsIYAsKhXJg","-DHplooM45M","45YpIbP1b7Q","X59WgY34rLw","wdFCjzWQCYQ","tIn0P1vNas4","wH9NyBMLt4Y","1JU-0RpdMuk","cKVhUXDrLRU","cG_fiIBOM2s","jJXi0QcDkIg","npFiQsxf8Qc","qN5HmFdooHo","787gq4Ewa_c","05cq5uP2CTM","BzHhPOsYJ6I","QKbbkzzvH2k","T-wTF-ZgAqE","Rh7HdCAnB6M","8iJjbRPwUZA","nYKPQfIF9VY","YeVZOYM7vvs","5rbQ901GfVs","kzLfxBLHcWk","WPJbf7GKULM","u9Wv2ccyIT8","R1MTrrSblfY","Lu8jdMOopII","1MJ759KvHZY","Ia29XSmvhsU","_uOdV0X0Q6Y","GY-QFD-E6C8","zbIR5vcKo8Y","yRoqYyFpwjc","xVfLKkdc6Lk","1RMQFWqb1w4","Q955iUr9eYY","AsNY6X52mhI","ln43_8gyDcg","l78mGDPkmWM","vgGUqsVihYo","0CE2w-fSclY","GFIeT4HhZ6Q","pdhjPB6Bxrg","sfv23BHBzoI","cb5tqlPGh0s","A0ZyZHA_f1o","djc_7r-Nv6M","zWLmc47P8Uw","u_QN6ScnB4A","5GRAU7u087I","kHNG3KDcDXg","obzkJUzqEig","rHyCtXXQpOo","096XZALv8lw","4fE3mUWveJk","NMI4GDgO89I","fQ2UKE1hFP8","Kz-nj-mrRWY","eJlUFOyTxD4","RBeMZPS8gck","rZ5CUPMB2YQ","jms_HtH9FcY","XaYvaHbI6jY","rtiL169AXVI","LDgumtrsfQU","TEbACvrfjws","G2xkX75bYbQ","BZSTmEboYfo","M7nnOVRa3Jg","av7b6fFjmaY","6XjQmcXNaJo","ydJSazn2AW0","8zjCzymhb4c","-78Pytn7zhw","gvHoGiiCT24","I5fWtacX5Cc","RaIKWXifHYE","4TOiNYZj9xA","b0ta5Z90j1w","J4o9uvZNPRA","j_24k25jYVM","v6cRYpYs4xM","Gj_oVR-FHw8","_ss4V9Y_zYw","9hOW8CbYQgE","eAjgvzPBu_U","UPIeioS8WxQ","L7COH0421KE","aMsltp_qYNw","bXBXpff-MFA","1XV0pKNaKjs","_mt80wR3Zbk","P_6FON4YYrg","92ZUgBlgIik","b9WInD6S0wc","jbwszniPVZE","j-E6ZU8Y3rE","5VJbJTcLBXU","CktqyVBtLVU","c6F4HwQGJaw","RixFBTnnJpY","MQVw3F4pZfs","3b1_CPTnuLU","DT-NCl-qZLE","DTu4Z0Hmddg","_q5Kw7EwUC0","KuRTpGPVcGs","hw5Emp23myA","4wTqkzu8PMs","HPvdWF3U1Ic","psM0Ad_Jjb8","jWLXli6g4bM","-6a_uAmzXpQ","4TkIlb7qjBw","GJPLKGRe3YI","XmmtHXpBQXo","7Qk-Gfd-CBw","AMZRIjzG1T4","zz702UTOzhY","2ByzVcqToP4","bYpt0lbU_Uw","bTWa55V5-QI","saDNJPZFfNM","8f-cb2UDH1E","SF-hZHUXWnQ","ujPVbgM5BmE","PcaIObKMYc8","iLDsELPEqs8","cuNY8EjMyCU","iYY1PiORQGI","sWe4YnugJqE","6vUc9mq-gjw","F338eUsWPKs","a7Xg2yT65bg","4SwEJbXjb94","9K12B2hR6nU","kQ8pUm1W_8o","r8y_zlAFAmA","jl1rMWn2SV0","7lQcg2uWfe4","B86RWjDsAcg","LLOQ640Tj3g","0qKQsxXtAOE","L_08gsFuWZk","OSTfBnj8-ec","TS9ywvrO3yg","rt0F1vvSGh4","OByhOo41v7I","w4fH-xw-SjI","Su44789JrzE","CMoZzx8cg1c","qWdcBcontaA","IpxC8KVFYmo","VS2C0S2pXIo","tDoPM63ASQo","Za9bJoWHlEA","r8yerC7D5Z8","1ryDR9aQkzY","ZLOJar3anU4","L_cputnRa9s","O7DMwBUJnRg","xP9nQpgh2iU","YGWLTelq9fQ","sDjmtAb5vgA","1Qvy4hr_73o","M6VgwvUXg8E","W-tVmlP0gyE","vdJHVhUYGCE","HQz4_CvtgFs","GkGpdXm-luM","S2hlcoEL3TI","e-VWGO86ZnY","vbggx8JOO7g","AOkw7nZzVyo","iGO4CXHFLBo","EvG5XySezjI","mozGGe0eIcU","SRmTk2VJeXk","cTSsq8Yx2GM","FIASrfzExIg","H4GxvMK2XNU","kokBtog3DEE","LIKBtegF7Ac","nmAMpTU_cJw","DP1i-mrClSo","QSuJLQXAR0Q","cN1086311dk","ObNQNrOqK1U","ablBHAG7Woc","lPeAbyFe8NA","ySCi13y6EKQ","ZYAqZkGeamU","CX8i4xTaKI0","0e3Du4L7Ods","vIGQMYvsxms","HfTJUtw-C38","jaCw8yxfaEE","Rl5yArhHGg8","5ZkKVqrVd9c","41o59EOqEQc","8tHozzwqKSk","IjPIUtzZwlE","A12NwcO8X2U","oL2cZ8ALNcE","8aTiD3O0g-Q","zDkZzW-qd5A","VtCH2xR26C4","n8RSMZq41Z4","aviE2MPiM54","GER7QL6z5qU","G9z3uFq_3Fo","9MpzU1qqrSk","uaLQHIjQB0A","oOztuoB8qlo","Iv2PrQK_264","FsWosQPkruc","krwxB0lTqwA","pQWXHe4EQ0k","Ly92jjC4pgo","aJLDIC_fYsE","J6ZqnOB47D8","KVEUGwf41A0","DiimLlOWsu4","pz73LTUN4rw","mgj6L6fSUAA","18nHuQs_tIM","4SJbHFw84Rg","HndvoCrHDYU","WbKySyo_wxk","EP6kQ2kEZhc","7WXxDgMlzLQ","4jdQtjMll8w","NaPynvG31mM","vDlJFDuryVo","E3CdvoaHWNY","ZU3ygC2k5G4","bYraAP4UbF4","dzlDxcjtm5U","4QTpAw-OMfQ","q8kjM83_R70","eF2I05Ak2sg","D5QQkzodKaE","WlOyZvkZ7gE","US-Z5wCd7q0","HOr171N0AvM","0Nj6zkND88U","-46v0dy-nHw","77RJKS3adYo","4LENbWGHy0s","ErNZ8fkMNzc","LqloqW6C6sg","XWWlsoema3A","yQJqCUa7CIE","qhRBM7A0Urk","s0NWSg_EKUk","sY8ZdWHiqGU","zurOlkMmfbo","19enPzkBlfM","10a21GaFe7c","dIN-LBbW0pA","xov6xQ4axfU","UpkxmUDFUZM","xNxoR38-nW0","SoEm2FS-ZSE","moSgOCWV4Mo","HdzVNkQlojY","PuthbVplXYw","JXNlpWfkHOA","oQs7o0lSm4s","SCzqGkrGAoU","yn1aa2cZJ5k","LVOxEHxMsT0","ZbPFUSQatjY","sd4-h2-5CpY","t7hMPrZC1HM","XsSnGFJCsMw","km1mBjtLqvY","j_yccXLREfw","vsdIqbnApi0","Z2Q7f7KB5jI","ckCBZCFtYmg","OuBkG8IIPpc","fqKqXGIANPI","az_DtOEvs20","HfqIG91Ad2c","CFVbuB9skfI","f-OG0RJ_iMc","_DBaDpS3Ru4","DlSMW-ADFLs","ay-lWqGizO8","ndq35HnZCys","N2HR1i5rR74","KHu2WSwWMgk","KCI-AztouR4","xDGmYCJnvI4","jSZALLH_0tg","LNL-4PO0RiE","pe9u5bwqVtw","RoukZUq-byU","_ePYBftEr9Q","BL2eVYk9d6Y","WuQLFtG6ZX4","47ojtik9QUE","RYMacmk8cCw","5GznziYjcX4","06JwlNNvIMw","6SC26f0iAjs","9aTPqh-XcsM","dLemIdmbHCs","7kZ-03DW9lk","i640hakT6M0","srR334VdLuw","RyCl0ZYp7wM","7j3JdMLbjBY","qxw3hf_UW-A","QWYSpQr2wQc","iakBWMz0G2Y","MsrBRZBMotw","fLOB-6n8F9A","zQ71JpmPnko","Q0ZBmhPa07c","ZDaBCCq9fIQ","LdnNkUYLTJ0","npoi7XqykSo","s4paRB_6LQc","-jSeF9HBp_g","S8ywMFfSTG4","yV0n9Br9vPs","oQTekwyNKxo","KCUBXaC5Ums","PQna8nCXQRY","5mgVm83VhlE","CSN6HbFWeMI","9DvBI5O60ok","m6gBXbf58Uk","CdoXoovGzN8","kcLAJGnKJtg","4x79y5_v7ZY","tBsr79UQ1qI","nQViOVspYg0","0QZ1sJQJvRc","fdD0MGDCWPo","_IFB-zju75Q","HX_h4300Q-Q","tjqqOKzjRFM","pOos5p2FULc","6jCX3SI7zVM","iBZN7vmVkMs","-mgNwr7excU","WVlfvKuFi1s","17uvyO0SUDc","TMpbGeEjRyE","Am4exDp1YC0","zRlpvjovsOo","v_q9Gt15kTo","ak16iUdSM_M","RFTzqk7QG3U","S7k6fwggDKw","eM07-3URrHE","obczoXEapWg","z7wnMSEvcds","WSbIw7-aDJU","WhwU4lDy3Kk","4cMqm7Is918","vSBg_0jF30c","nH0J2-GVCbE","kDZbPjHyIno","cuFCJ0Fwi-8","EX3rRwAD8Hc","ExDHWjX8GsI","aqToISQ1De8","238A-bHaB20","_0n64f1tik8","XNAcSsWW7uY","pAi-fDLRpfk","VRs_CauUMwc","8KCbrc04al4","HhSwbYOIFS0","TS6cWXrmS9o","uR5c25N_NyM","lXaz7DETf1Q","J37ECGzZdxk","Ahh1VNehnqY","NKEXnZnZ46s","xqIlfwDynDI","NakVGl-pfBk","5oavF70723o","jdHuiixITE8","fYxiY8kENUU","bzzWxX1Lcg8","v8TG9qyOzPo","wZCadf1KWzU","E9xlI_bmo3g","Rui1N2joY0U","0P8Y4hGVkqo","oDKBuqSAKlo","wgBOy6fVJaY","lbbGMlDpAtk","xnGqJDxTqVc","qgOS5m9xMHQ","WJCHJ_FKP1o","2iByZ_MiETc","OKhz2j8FRDI","CXRBOW_Kfxo","ppbFz_OKib4","5FgXFMShh48","Rj3wC5e3WSA","qreKY3mAc_s","R0FdxIOq7kc","-v0YbHb2vMg","hH5PalH0SoQ","O5yjv4EIr1I","L0olvRow8AI","cv6Pen53rO8","c1KUjAz3mDc","XrllVQCRCh4","z0n5cj4591M","thpla2iHGB8","UDnRlihEpBo","cp5T0JstLy4","mZOxmRzklVA","Jx-e0mzOYug","S-uwFPLSr54","6q2zjLITU3A","KMuHBYNf1l0","THQu3enXv4k","SW_hSA37a0Y","27DP0c-3S-M","y6sVTrQQ17M","3lqC9u4W5bI","VqGkrSKjlOk","m5gPZt6Q_HU","ehmJo3VHwZU","jt3YTrKE3Sc","tl66nRaf3m4","YFaK2pOVKDQ","XPQuLNH5OJs","44I8A_plNvI","JgS0Q3c6Zuo","5obBqla-Wo0","4II7OfwwX-4","X3vH8V5MRsQ","yEwaZuXbFog","rJHx3VF3FHk","VkaDtAY_1-U","d2x90JTHmlA","IsLu5PyDNR0","Xvb7Rrh4LdI","Ay6aNXTAXeI","uJ_DQDhldPU","gP9AD_Aw-TM","X6__mOQuVfI","D7euNHQQgbQ","FfisDfB5RqQ","_aprorrkFMo","ZbBGcyGaqtk","7gMXLeTJdqo","HzBemRwfHT0","Ii_30LYDga4","idB0FP1TPj0","mB5R6NcwCcc","MuRKmYj8pKM","0ohu06LmROA","-wP6iM_SjOA","swV2UDbQjRI","BpxWcCtiZOQ","Wnp0-xXd0y0","xf_GlIF2hU8","Zv-nYBft9cA","zPIOjjDTkC4","HlMGyMWV3ZE","WOeqgZS0T3k","3Rw-j6J8Hv8","_vNSIuA0hJg","RiLbF71NZCU","CYJtg9_PZJo","eWaVqRzKNHQ","IKs8nGVkJaM","YcZ0_M4Ib9s","1MFRtx8V4kk","J9jSREWcgMU","3BOXzvjc1-8","9Onnwa44-z0","2hCKACqIEQ4","5JqsHAIGtSE","NLmcdkDPOOU","3tKQZ32YXdM","uZV0m8WERtM","umL3PMdF6Pg","OH2jgMc9xf0","uYStRxLf8js","DBPOCaexybY","MR3aZTLyz2Q","uHVu8hREBOI","r2QbRmlKYbM","lfzdrvR9_ag","DSzGv859gas","qrLxWkBk1vQ","CQUzSgLG3NM","IFFEZdUsh5U","Ot_4zywirKk","At2gL_ulyD4","AfGGxwWYu7I","GGTgci5wn0o","5grjUKm-cbY","6vvkao_UVcw","p3aAGsUE-ls","qSFM4HmmLNU","5ZcOxdEtFvk","7jxg3bQYWn0","WqqCOBtiEEY","xra0iif3TWc","cU7NR1nGlFo","27fx2wuWa1g","CbFOqLZ8pq8","KFnJDnPRlM0","499T2wpBVjg","cz-TKAgbrIw","7zpHszpnLUI","zpeXc2yUHTg","U674tMT_DFE","Xc20Rr3oo1Q","9VwBFwgIjgs","R-dGVt7HD-I","zjt2QewceoE","3kz7xjgNZh4","rr1WWWgaFEg","jz4STbro-tI","rigIXHiNxuA","U_cUvgkanIE","KZrs-yiU0no","ek8Ks_c6d_o","34Ky9eftUQg","JJHlzj9g-Qk","0hHoSOHyI1E","t0Unonz0FQU","9yh91n923Ks","i1NedihVvlg","HSwZn3j5p3E","k8j_n1Vm9IA","mKPCuJ7VWNg","GdTo3JxMTMU","7Nx6w6wYU4o","uPE5weGxIb0","cax1Xott9O8","eH3NXBtVmys","qSlKTwHqOv8","vV7Hs_umMnI","zitZFq8q5Uk","HFCuspTYjEo","8Q60VNcomTM","K2yChEF0nJM","bXX3ZDE5bgA","YC86nuDndak","VRdaz5a1Jw8","r6Gbf0ZJNbw","MVnTR7nqOAg","o-2XdolsHAg","Q5xOF81QrPw","HYF8egqJ4IA","PXgUj5_XYnk","yWEyP6kGzSU","zj2v2oEd-4Q","_kUIery-yMc","YKyRD5V43Uc","P3nHvZjZW-Y","uY3AfgDfw6k","iyaO7YdpJR0","ijYgl4nTtVc","_rv07cSrx1E","Jq2oTn5SO7M","1pzW-2f1VQE","_OhNxUkST0k","q-TceD8QXR0","IQri_jmduEI","K3rKsjxFpRo","Fuuzv4IO9NU","O0gzMweAAmM","M8bPspLDN8E","M74TQBuorR0","AfklqIo0WFc","Y6icciXTNYY","5CAG_Ami1ww","1uzn6blN8zg","YcZZeFQ7j-s","tGyzzkhTLEk","m0l7lvflI_A","fEDTnwnqzf8","cz6CHsjpN8k","8ZE8imJc8Ek","vp5k--PTkc0","2VqmrHQB8Nc","Gapk9v3B4KM","ev9vossjmVc","dlxTSxntP_c","F_0AVNYC0WI","8RQF49gszX8","nqFInKf6pUA","sI4zZtD1rNY","LQpUFmS9MAI","dSxJf5RFANs","XMk829doPvw","z1H0iDOSvmo","zDcMsOcRCNw","xQ96OfqruO4","_d35U2oouTs","IJDX1m12XeA","bxkOvCjSyDE","V7NbdWcR0tE","XKzNtFMvqis","gGWb9WLRmgk","PUJgVTFc0Qo","CHXDt7XopAQ","b6VlLsY1CbE","iDoItudZKVk","8BuSJ422Tzw","heCbKGAm3Dk","l5NUeVLYBmQ","io54hApHpM4","G-RT8tiBDyQ","i4pJYd20aw4","BPHACQAdqIM","YmVx8ctBD1s","q0ZuXSXtPr4","WOKZmiZUmho","N8b81rphb24","DQaWOOYPX80","yL8wLuQN3E4","wpZhIeKd8_A","y3KMM6NVOnk","RPFjAtYJenE","XbJcl-e54OU","lRi2itAjdgU","YsEVReQ25zM","HQ5r8rY8zew","71JVUyHiM6I","WQKMmVzzUg0","9mBgK-mKdV0","C2Bfpuic6Ek","CbgZkrtDeZ4","d4Yr0MamhxQ","PumFuS3PayY","Uo3StK482kg","sFpBIVUBQDA","-QKa-VrrsQE","4oR_JkK1BNw","ItDCcNjP9IE","EObqnesOTi8","EEvr-V-XfGw","8M5Wl1f5vbQ","fLOAZWcV52s","4pKWnhxD5kA","2wV4aDypOsM","ZvgOA2vYT30","dTV0SvzsW-M","-MjOXCgf3cs","OxL0DLysGcQ","2ARpnMR0uOA","7G6Wq2539PA","ENnUCpOY-jc","9mQ_oZ-FSpM","CV2HuI02uHM","UAC7pBkuBds","iFvS8y2xHWE","aPYAPzU7exc","vTh1BZzqLjk","rLS22ZqP0iQ","kpt-dhx0kB4","CwJOia1N1hI","2LvC4nsDhLI","UKU2CJxgZJI","AWDdGlTR62I","Xgz9SvuTr6s","S_yL2rgxU-c","gQUyeqaeWyI","UV3JOUL6QuQ","hmxm44LRd2k","tPSoqMyGWvE","ujQ06fHPeiA","U9AaOyB5PMQ","zMHno9Vj4L8","p7d6io7lDUs","X8lDxHIozdI","2R0uCGrtqnY","D2OZuqaMHA0","5X4N5BKVpQo","_aaKk2jh6_E","DSkjKzmhISY","Ogubr-fzSe0","JpXzahhTRsA","yi0NwtiVHYE","bMkgauqaimA","EBtBI5FYgFI","TCTpdBrYdJQ","bfvxr8dagfI","IkUOUI7rRT0","ceBhcFjrYCk","ERR3jXInG5w","4XtXwD9v4gw","StmBoEdNjC0","Bd-Pru9s480","QfVwXYfINtg","Q98U0AK9JOs","fVjVlmuIot8","F4KzpHYsnKw","uMADtLrsWZw","MR6nprMdURk","AEuodVgsJzY","cxCnJ5XZnT8","9K6_9JciqUQ","rynsYHhBGs0","TgEPH-dFWuM","1cpwDD9J7HI","Hq9Kb5IVY5A","vHL1ZvXR35U","pP0GUEcQFrg","xOtI5r2I_BY","Zgyrf5B0lCg","TqdoPtuVNxE","y0FDLy9krNY","3onTNZ2jdIM","AkYeE99k3Pc","ZEE6CKbpe24","Zg05TqUGnfw","naQAOGE2Pfo","9QHuCQWFp2Q","-4Yn-OwWUpg","lRp1AjJW0aw","Wdlv-le0W-Q","B4pousltBFw","zc-IZrQ46vw","ky1j5bLkJyE","2Hygi1erQ0w","WnKHpnyCyQ4","LHuGMOMFos0","6thLfuWJBaU","i_OfNytI0d0","UMpDvgxZvu0","uXAHmaGiw0w","Fy4kqpDMbgI","-azsUyyba1g","KqO7UkGMvL0","8i4t4nh_VhQ","zHNVkCjb3sY","dbAuxrH5BqQ","NJLrJtKOP7s","UkpHUKwCmqc","9_4EgtFR8PI","BdUiPWnHeoE","BnseVaWjyDs","8_mpkLIF0qY","ptU0z4qJLaE","CSUUC68CfzM","ng0ZCAXhSug","HfxrT_Qg5KE","nQOSSkS_4z8","tr8bw43nu64","VJf-_zjxqe8","xOethoL_lXU","IjUaQMzA1-o","oTfF3l3M23I","5dWd4BmPat4","iCI3rMuiD1o","leBTckujQG4","YiIdEkJGf3w","_THrMx0elKQ","fzBTAtG6DVA","8za7__T87Cs","d1nJLaikjZM","lKXqlHGP3Bk","_h8l9CzGdo8","w8Ly5lbLlFw","VgzA2oULHaU","IB7sStSzoNE","cEpn5b5zSiQ","mzlqpMwzGos","woluyhzJ1v0","BGDGJ1CtSZM","dEsqvagB5aE","6VswZwF03Gc","BJ3rCJa5ne4","3SEdlS4I7Ek","1GRYJka6mmQ","oxtuY9BJpPs","1O4jH38ebGc","VcFXu21oL5Q","DWqfvHUUBb8","ohO17__Ehf4","UjpSm00_kaQ","fG-ShZipmKA","nmNJEttTRYw","bIHn660-04c","Wk-ZVPM1xIc","St_VHk-8JK8","3v-NOKTcNkM","_mUvCgWYzQA","3lUQG5EF7-c","omrZyMf4zmw","oKghEd74QMM","blsvsCmaezU","CV57xIQiTk0","T5V6hiwW5Ts","iSPwB4_dpRI","eF9lSDe9hHg","SYEpvVeVmFc","6fW-Jj8ZR9o","LjlIBKi-ifw","6N6cwiXEZZk","WzlE_tdvAvg","_tn1mSOJJHc","98YuikKbUB4","g4Y0SWSC9Jg","v2TTrbWnCNY","X1qRCZ5Rqso","-fXsUsp4fjo","Yt8lMDxWVvU","LNa36-Q6pc8","4Q_XftfKeBo","UAAzPalue_c","HNbc4S5ZWMI","P6C8NlC-jWM","1OyHruFkEa8","zWsX6SdZbUY","O73inMTe-CQ","bQSuf6TZ_Ys","T5k8Ud7JBeo","FQf-NC5lmqk","bhQqhZeQLCg","-nMk7I2qJxY","2OwZi_xhOR4","IVKF5-4WP0Y","pmqv74ZP8CY","k0DElBjSz1c","__mkY-xa76M","EeHcCfkdIqA","aDUqmDYHc9E","U5HV3JKJrHw","MERagAFn5Js","8p5F7tY-mII","5kXv8o_Diz0","2Rpmb5j8ngM","bCzf0Lu2zG0","LhM3kkN7TiY","sWwzPi_aG0E","XBPsfy92ld8","96sIO3sAbc8","IQie_GKeKBA","z934nYAXA4o","wyIdWXqXXzA","taBNPPPL388","kc_U56QbTG0","TPGEyBdwQE8","h9s2bDSZEFM","Be6jVi3m714","kcvmVjbThDU","dX0O4l_BVqw","IlpPTdsb_Dw","6kqha-JpmdY","SGs9hQPKDNI","F7It0cTBYZE","SMGKnOCfPm0","fFGapFsG-WQ","-VaP2uCgmzw","gbeFpeduyfo","Chi-CKJ7eWk","KPvdHXl_U-k","uw3SEY7LR3A","wCy-yvObuYw","EcaIQ59Cm3s","D8y41GxKslM","-rN9vayvN3U","hJhYk-YJU1I","1L3RseBiXSc","va-9ups8Ss4","YlI7LcNN0R0","sVtBPVJ1m64","KxkeNlarE00","PjqIJqUD8_8","_zAqQzqQz50","-aNw6DeQuY8","0HDnY94eEV0","wRw1s6KXce0","Qa7DB7HXasE","NNhxwjakSDU","10-GylH9Iu0","xVRMFjph03U","vIxv5ZWknPw","xBoSDXy1CEU","0edca35_I-Q","AmzmbmFHoMk","E7bJBnIJGr0","w-j8UfVYatQ","vfob-TgdGss","T3XMU52AftY","91UboGl9L9U","T5UMaJM2Awc","w0Bl-93ISpo","Y1p5bUqEjiI","-FPyNzHtPNU","q2nzn5cqFU0","2xEHV7EbbAs","mWGbHhb7fig","nUwDhoS0fsw","KpV5NME5D74","RBA4d8DtDZA","hJr0yLuzBIE","Wsv5gTuLpZk","b4Q7g_MLXW4","xMumBRcR3ow","-wiVCkO9jag","dSBcc6LvNAQ","uSo9vkfDJIo","Z_0DZwJxbN4","BQmdTIKy4M8","FRWauQGS-u8","lNoM3nxx-wc","C0UM6Vw6Su4","RE6uvUYr3UI","Qv9xfkfDgKM","q_bgDNniVCY","9ou3ojYh_JY","8BIOvfDZzVU","_GuDDUJzQJQ","AgRa_hG5lPs","rvytAHfpKow","JxanRdAuhsE","4JndaHBXm94","WMKe_IGf6U8","DQNPlRWd8EE","e7vor6clO2Q","kMXF9pWAN6E","ZpqVI0smn_Y","ygRK78LM9a0","hm6nyj9jIZQ","V3VLIgOJ99c","eAiolINI75g","ar5y0lXOZZI","uews-Ui30I8","813O3cZwwYQ","VwBQX8wYfw8","bgUHQb-N298","XutLFvASSRE","4lojDIDC_4Q","AZqYn9Nmpn8","GplQ_HwkkSQ","jx3eYLjDgv8","8V0JVfgsifU","ikJz1UogpuU","MUTgivOYkoY","6asS8xZe9lg","Kl5ZJETkXIE","b5KnRKlculo","kH4CPdqzCNs","Mw63OBctt7U","x5XjgqsJOFQ","gC6RUhh7bFE","W-0V8cOnC3k","uOhAuFyXa7Y","xbd2z4OV_-0","rzgEglvPvGw","F9vkTMqOZ20","7dbVRgvfIAs","MtUct_vz5sI","SpHKq_l1bQA","Qkh-GDXgWuI","xbKMK2lVTEY","N0oTi2Qc3pE","wCKLy-nhpxA","3ZV6Ekwe6KU","-eENqjFGYwo","ChkoSMQdv8A","Cei0yU4fhyI","4YjJ3KLDgTI","Ympxchrhn14","9HXlOI3fo-k","kMd54EkjRgk","5qTM6Pdyqwk","F-7h6FRipqM","OCwRhseig5Y","en9jPtweqWQ","B89W9_JS8rI","0XsFyvKE4Kc","SPR8gljyVKM","Sn72M7eoWE0","j77I6pP6zv8","Led1MVYHMQk","PxdcqIgJtI0","mL169JOz3OU","IU1ARweAbY8","VzGexMmg0EE","aHOpYaPI0kc","zrmv0Zcc18w","QyklxzOoa28","OLh0g0x4h1M","d3-X4x3jN7E","3jBZYRopiGs","o6HG1l-twwM","SnHduzut_jY","Zuz5WQiE73s","2EklE7DLwHo","4EUxZUokTjQ","_59VNAwTpEg","jBgfG-AsuP4","YHAhafgKHLk","HCriUth9hHo","zcrT8Pv4niA","lvOIr9QWHzE","GJR4Do109FY","CsQwoiFGmZs","kW1FOCRWPHk","rBoUfUoWJyA","jgOoN5MzvMU","O1SM8gnsNnM","6R7fG882KRg","UBhv2lmdqEc","w4KuDovCF9k","khFOgw5EnUg","ZyIy5KAolNg","KwIh6YZ8I34","uX0gyGl9GJw","cT90iGaC1go","xPOQckPl2o4","9PFjgUG7mM0","-1-tozu_apE","JTKk4fcm6Fc","8Uh1JbjVa3s","HRBmpCUyfgE","rmbMZVQVO5Q","lJMuiiSgffY","7FBAiqzRYOo","e-0p5mEGvJ4","sLMaQNZZ0Ps","T94ZBo6nC1s","wcZBICV11XE","NVeu-tSp2Ts","D5CDFp3VRW4","R_JdEbtixXQ","YVfqLvdMAws","685_Oa0OPpM","cA7cWtAqrN4","VrK5fNv-nsU","3ExZU7glhng","D86poOgkryU","wpxA7IdcGe0","HdjlkNJRhGM","bATnz12zk_k","IS1cgr79naQ","SgWrHKQbD8k","neHPVZ4rSHg","u4RJH4B_oaQ","UDXsxDFeFNs","xdNMqwJlf7A","qK6mvioBcak","71r1tz4GDtA","iP9cNg9d1_w","mvJ764CWXKk","QgFVyxH358k","EPl6EF60mjE","xC3tW-0EkaA","vj3Gz8PBCEM","OWVZjE3aW6A","RKpuRGhu-Uc","1v_fOEYaX0U","UeSzSzSzJ0I","m5nNIRdOIxk","IKGgqxgTZuU","kU-xFfa5T4A","nyERSwdoUuw","ckcSOFpzfjY","NC9OrfSjLXM","rfDWzdzSjUY","P62URz9vXjs","IneAynpcXyA","L0Yoi2GOR2Y","FpQeM2H-lFY","XozarwRWfcc","Pn-cbusLA7c","5EqKBg4kYPY","9R6EtOtsdug","txJNQ84RuNA","R1-myxHrj5I","qyW2KFLR1DM","a2U0wyhKRUY","5YkXAL4mJhU","HuduUBRGP_g","2ID_UUfYHuM","hPRgY8t-sr0","eOuDOUmvC2k","HekXEVS8xOE","71GVWmfG5_k","jOZUUrkLAtA","UCUC59f7viI","EtNobm1O9f4","Em3JM1QStP0","rbXrV7_xONo","2w_Wl6kBLx0","ndAltMIEw3o","Mh6b4ej6aSc","AzgFjorarBs","D2oFjsEcghc","VTlNDws5xQg","w2QLw9PL87A","X5wQlOznf0k","KF5_u0m88uk","5He6dIvoJDI","tQ8eCGPr9Jc","MxOlrEdEfZY","BsX7_44uBv8","SKusufYeSWs","McrN8r6Udjw","bK4GacRufnE","4dq8tcsmrCI","mft5WO4Dtw0","l3b9SAgVQnA","1sfJ-77wPo0","8S48A0hfKC8","m8rmMrrEdMI","iUSkx5scg1I","kWtpIimk1Us","MqSUh7a3TPc","AVJEUkStIec","j25MkHXXmkQ","jhe16JFU77o","FhoZpz80XLg","e5KAoUF5do8","Jgma8S0lp-Q","bkisBax5MpQ","WbJwdK327jg","z_QDvc9Akjs","HDwXhJQSWlg","VhlQ0CLNFfw","2wXOS1gHjCA","xMkgIjGFjoo","mPgW6D-MzOo","DLyiTa8eAXs","AUJkozSl7xA","EDJPDIHzs_Y","4-jqgACS1pk","aENMzriC2sk","l7n5rYd7MLc","OCxnLlELpAk","BbdC2kXTZd4","c-YyDZqNW1I","bCiZvO3EpKE","MVqwXcwVx8I","XS53MTcL0vQ","mwLa4BF4WCw","XlLMGUVc8O4","biO-O4-MwsY","1y4VHhGYBqI","Zf-z3_nmPJA","DRS-nB-1BEA","UETGM1KHuC8","OGu3ae2N8o8","ftGXgA8FzSE","h6NMTxLFDi4","RqwwPIXKmSQ","0-KBkl1YvXI","1FXrOUPxJSA","SXiC9Mdxy7Q","FazIwjzoLOk","ji5zBZcc06g","PHHdWzCbE0E","8IipS25vqW8","uGiYgtjYS9E","17LkSCso2yE","a9JrUXiXN3w","eYyPCSwl0kc","6ICKhlSrcFs","p5oWwsXpg9E","JXk_8roDKIc","g0KL9ZKzb6k","-dzPt1nMoQo","XHAgmSgXmdg","BXuetEAVNnA","bg5KDHz_jYw","FCjfFwIUzvg","64PBY9ZXS94","IYpmGM6OMo0","C5YZ1xZhn-M","kmFmSkONlYI","JXPCHAmXiiM","rRuGeBwoIsk","fOfx9W7bxow","1ho1JAXkLuY","ze_qfbQe5r8","i9su4QJo0LU","p5NJx1uajuQ","oUzmUVr2j_4","456r8T_rTRQ","Cs8_P6zUpkE","qsar4wHZAtg","b-w3cGIrF_Q","0YR3TAzZjic","pnYlde16GPs","wRLoHVxhrQE","gnuU5WgGSPU","eehNBa7_WSs","0OXMgmPobrk","HSBDitZGKIY","WfBw0MEeUWw","Tb1fboxLZCw","P-Al084S4J4","zkq8_k9zeB0","1RdLoUXJpg8","Wdu7Cx3MtVE","Kvcn4Furmgc"] diff --git a/airflow/inputfiles/urls.news1000.json b/airflow/inputfiles/urls.news1000.json deleted file mode 100644 index df7497d..0000000 --- a/airflow/inputfiles/urls.news1000.json +++ /dev/null @@ -1 +0,0 @@ -["PeXAiQHAdmw","T08HBqgiVi8","JRLEiJxNDng","l2yBZp-BEe4","HgC5pjlVOI4","7xPBUpQ8NhA","wBTtb98oSdU","Yq21ypy5RwU","Hd9-ILuCKL0","fpqlUDugti4","ryZRMF1HV7w","Wlels10_u-w","ClS2Fm9UeN8","qb6zb2B2fJw","AlnSC2tSu-U","zL_RxjVxjEo","Us_XNKJKnc0","eieCen0HbQQ","SrhyHYiMNv4","tPRJFjGnDfk","tkZb-j425_A","9QOn5kOP-Tk","5CyeW5dxcPI","Px_QCUnyAN4","9617yqwSlzc","eMbDGHlcldI","ZPA6oQeHb7w","TnLmfOtYyR0","XLaZc3hz7_w","ID4JnAvtuNk","tEggfY6fjKE","OUMrVOluxpE","j8n6iL4d3po","G5KoKdtThKg","8jJx8P63mGA","8Re0o-6fMvA","8muBD2pYMOo","_w_oZ4xPUMI","txk8_H0ji3o","maLBoo28uGg","4OKjDFP8400","Q5WZeeBBCCY","-TJk9ZvKQno","aCoo6UVbKGQ","SeSl7fVUvKU","CJzztCT_4Ys","11PfHOEPikg","WCZgAr3Lj-U","3MwOUcyZyjc","Zsyc87ssnBg","TKadCNnVsJk","yCqGkD4u_-0","QnqsxtIhBzY","Hco_GkMqE0U","J0eicLqACpg","0vAmQylAVEk","br1FNlNdEpE","vCul5k18epM","SfeM7uwDrwE","qk0gJ4QVstc","FYNI4rT-16s","KMqxs4tfWUc","N_qq0Fiy27I","GgiDMmPevPc","38DK5Q_WhKY","AgnCWxssbzQ","gD_xUYvv01o","iRk7YW5-Dvg","ihb2bGXO_D0","aMWKxCfJWNg","6gG1cFMh-Bw","PZ7-LxzY47I","fNsjTW-w_kU","FYXy5pjDMbM","wD4ybqmdzyE","Srxs4qJrAhM","MoHdkBK-AJo","VWAGjr8qRCw","WtQ9U9ROIRM","f8IKdhI9DlE","t-tsj-8b_hY","o2V7JVQgTbg","7qNI4JHBwRw","CgfirLn1oXk","9FXsKVSZFUU","rKyzWeAQa6E","KCtYvjj00z4","vPpc6wwBPyI","RN1XlwXXsPg","8wl3j-ayOJg","TkMSoGEz41s","N3mIzlpmWwg","G01gfxT_n1U","YmjaqppyMGY","JjP3B6ul0e4","vG9XILhDx1A","y2lWwjFpwyA","ov-29QaM0sg","YrulaOt09ss","og9HdKfYXZ4","vBfVgGxu4jI","m7SdVnMnMJU","UKPwyq4gOkI","3TqsHURDIow","kSc6uK2JlL0","F91KFMv2tZY","CsZPuZ58y-8","AUyEN8knHXI","-1AYbkA3IjU","8R2dUjtzITk","uP_wjbRHtG4","9IPJqCUu30w","FjWmZyFBTSc","89_I_2PW88U","FoqmlOZTp_s","zFFC05DtuUQ","5TeGJratt5M","PFCG1J-D0NY","nWoRpWa2ehE","kFHRAuug08I","_hixk78pdAQ","sC2xBWDD2aQ","zN4HxVeCSJQ","xeTK-cVFW-c","EiV3YlxJ9bw","bZ-nb24xuj0","gywCckwYu0k","j80eTJGvflc","Ti-7nfIEZzU","VbHZgrAfz-c","0ZX04jWsXcM","U7J4DtSOZNg","ILl7MF1COdg","Ps0-B6HDln4","xijDAaJ0D38","cj16FU8Ph6k","m9t7bCakqSw","R6lMgyh6Sxk","zNtOPh79Xto","fm7KpwfDLeA","WXkwd5bOajU","9BPu7snM5kU","klYqqQ3WyqY","FbTOkDKaT-U","IpzkUmtwrRA","ea-5NKRNNJc","35kTUp455S4","i5kAYEbDdY4","SWnWLnlja2k","3yQx_ujcNqI","fs32j4Iom_s","qcTi9X2_fE8","57PQXFP7bso","ddo2QOjkN08","UEHkDsYTuWQ","-jy7zgA71LM","1d5qKpWXNtc","IY1z7pJ7w8M","ukQxRJltMy4","nh9Qfo2x5uA","EQC0VcW4BW8","IFYFFIWlX_k","7KoAZN38PAs","-o7YF40Gr-I","atm4eeWUZeU","1hBNtX1bNYY","l6Qd_7tw09Q","bgqPxaXFwH0","L1dREIMM_aA","mTkxir6Je7o","QMYYrCWE7bg","n4vvpNt3Bqs","KIZYtkOqEuA","dJm5eKY0NOg","NFSCqZcQv34","v5qn54gZBZ0","StTDUfIttRk","F-uU9Mk2LHA","Ju6GJLB_nC8","NaZYn6pr8Co","w1Vad2a-YHY","hv9P3p3RTTI","EwgN9kBImqI","tlQIEM6Lul4","nuz4HbcNH-A","mEzrhMnpXi4","jnWXbymfZbo","o80oJdGqTnk","83O0OoMK2xU","3M2sOerTtEE","9G3EkUxp7d4","vskPNK3SQfU","5sjWGtW9mT8","G1YL0bPDvq8","NbMy2_34OBg","eg3FiVuycZ8","SBF2gWwqSC4","FwgKyZv33F8","kxLBa2sMnF4","BrxIbNPs5vo","px-IX8UkzlM","26oK2Dnncdc","-d6DpNgAMBI","dc_xSNQ8HZA","3IcctPnH208","ou4edVuHibM","Jd71UyGwAzc","d0IdmUJ-OJI","RG_Ql4UlxGI","Ag48GiBkc_M","PnryyCgbhDQ","1K0GJlQGxIk","TzVhdzJfkGk","qqKr9hK8_N4","P1vCyilkFUY","mL7a2zkZ2gY","UjOgE3fmcbU","P5Cin-mK0Vk","2Ya5hMPkSnk","Gzh1cJbvGno","We97og6wAYU","Ubt66uHJIgM","lj5ndJ4bpqA","Ap9xum-2AnY","L0qMs0NxCrE","ol1U_iBG32M","53l6WRadLN0","rfhTew-b5sY","6BNPpjFMUe0","LhmziLTqd3M","2k6PBurqDrE","m-YUKzBo2Yg","Aox1kUbZ17Y","rrOh7e7fUnU","pb4lhgPTR3c","Ap5halwB19g","F0qlSYHzS2g","bedI28k-Ci8","JHykHlv8dRA","ZQjvHT4p-jc","LiVYhRJYmG4","2osqYlq8lKM","xgHE33ED3wo","BXPe_UnkH3I","w8j7HTotCYs","r0j4G_LgQyA","ZE1ld-K_1io","gtZx_b-Y4ZE","o2VR6b4M8CE","6w8h7-HhNGc","N029wdP-ZTU","p66YZ6ztBnI","FlAyRc0olMg","k9N5snLzc7A","HscErsnz9jc","YJDEj8f4Ljo","RzcQh37l-Vc","iNUACINPWrs","o_cABLvmajo","9OlLB3H4Dzs","anEa-BpUwr8","fet8vsHUvYw","-Jjb7EGjlUk","BIH_WNdoB1s","rb5VbMtDihM","JHPElyyNkvg","9d7sudETdQQ","Fv-eary480g","2D-ew87MiS4","YX6_jhyp3_c","FTyZe0KsXYA","Qn_NdlfL_Do","c5BNA9EDJCA","a1SwmNh0FPA","1Z6p7FaW0e4","72aq9E7cCEY","ArFGUIOoV_Q","Rkynkcxa7SM","M-Mr_eYX-tc","Pz0eNfaV45M","jKces7BJBNU","tXOYsM3nMzI","CYd4OXqov_o","ljIP0DPTxvg","z3z_EaBOxPI","4b9sOCvluD4","2KtMnar_QYc","QguVBW2Hq34","oW0x6NyuxKI","aAy3pAnP658","nRCDsgTT6fQ","F6jProsFg4s","NlOdFJmkEls","3qULYSV4Mys","Xi7xJTF2scM","ENdaYA81n9Y","mb5QvN_7fJk","ny2NvXZyfn4","HF3pS_S-oPA","pqm3vd_BbBM","i064d6F2T4w","iCSHdJmuGTY","TSRP0nGuXJM","TlQ9rPpFfN8","dc615wTy61k","xuEdK7_k9mA","9lygDKXN5-A","pUfqMWq1FaU","_gbZ5lBBi3A","qMwR1L_j1DU","Pqwa3x690vM","S2RttLOjReo","8tLRhtPKWTA","-cVKQB2aj54","ajn2wei9sWY","gyh0u3edm_8","IQsudoX86lw","qEvRY6nwl6g","Y7Tf7HMifho","sk2XO8qAIWc","U4JOMtPJhZo","1WDlWi4UlIM","0mwkiQ8OgXk","kzYf2QkoLtU","3NaTCQUDOxE","gHb2TJfShNc","dkvANA5nlnc","E-x27w8Xd5s","YIGxWV4wPic","7NsinKf3Ax4","n6eWE8n8Dec","WpmGqOcZNIo","-hnLU2F-obM","ID0sqDDQUUI","sFCGTLbf68E","u4tJ_hVB_bQ","aUv_dMLcqCM","CtMVhz_vJUw","kN6MsC4VVnE","yLGuPEwZcPM","ZPdJbknM_50","9Alt1VUsN9k","a2ZxmlMvYYU","sTpuRVn9RlA","_uujDGLeOtk","dDlUlQ6_vgg","NMuL0p7KECA","sOTqO46VgVQ","lcTb62-seN0","1JPTNROauO0","XI_UaKsxhkU","8PTd0WvBcAs","N3ngPUp9vRU","muEyPkSQmuY","PI8-yS3ug7Q","jmnfItU3XRU","IsoAG--0ysM","6wxRP_zO5e8","SaJl3US7Uyc","h2fJS3Z4B-Y","sUs1-t7AybY","0gEHmCY0QC8","LbNVA7qM31o","Vy1XnQrN_Tk","nZGj-q0INLw","Js9KPL49W3g","k4JfTaZK0JI","KMvJ-_fntu8","5tPIdbEusqc","nKOfHNW5WAc","0G36GKx32OM","d-lycLvqC_g","H0PJDqpHSrQ","LdSchB0Yx5s","RXjlJwAkxmk","yfs6ef5YcFs","rAyi5O541ZM","f-1LXa54tDA","miPfhQs4OX8","nKpdl8E8edk","n-phVB-EmR8","ZTvGYjPSXbg","qpyKEkMSfBs","M3m5l33pygA","cG175e7N3B0","g3awDijFtYQ","e7B5xEqfO9M","y0ntkylO48c","VjZJCN3BfSE","h10aoRSyHhI","3unTLVQvTjM","Z6pgfUITbQM","LUlFaKDXwz8","MfLXsnN9bFA","1Zih03wq0kA","s0Z77I1jjOY","2wQ6ENRMww0","JH7Jzqsu-Ms","naxHuJbI3_U","F6ByEc6pbng","7WJ56mz3wjQ","b98B5E7H3JM","anS9xQEPTsU","KgUt-awPnTs","n7DinJQQQHk","Vfyh2zRg79o","Rctp5-LwsYo","2iHtXhdw_lY","PPIipwaiGPY","txhhy9HZJSI","cWdHYBW246s","VsO7PQPHkAA","0bt_aU0VmyA","R05wA6i8uTI","o36y3HbCvDc","x_T4i_eaFDE","73NQuGXaM_w","kcHYOLAs4sE","m2nosGolMT8","Rh-1Xzm6gaE","1TMOv8HHXbU","sDH0wosdorY","eZd1klKuY7M","ZptyL7bhXfY","aS-ZB7PjNDM","ft34vTV_Xho","qZcCw27JJXw","Kr0bLwu43Lc","ly3JrzS4TF8","JGQ7MlDKo_c","_wDgh9-ajXY","LOkHzLdV-Dc","UnVa3HLnzrs","2C7EoBoPB7s","tpveOXgpVmU","wYNQYjjIIrM","rgDJP4qDTsY","mXONI-wWjjo","Gk8xU7ukvtU","6_VUcBP5mTc","3gQmJxoWQsY","UPU-5IWV3QU","-Ma_y_6L8Cc","xk-cpHB75rg","-eBwszb3FTA","mAe2kXHD8Nw","kglP8HDzB9k","VF82MkQPNAw","zxPdisOB7Ow","cvqZfYlf05k","gO6Uut078Nc","t-rNq3QjpI4","Z5H9zivJK9g","O5WTQUEJUJQ","YRej9Xf_q5o","Wr2crpug1j4","Wu4zjZ3djEM","NrVDkIxbwqg","HVP4vf6_BSc","4wTSj2tkP4E","TASV-DX-Y0I","knBhA4S4qgI","Y9ExO9lL_24","b0Z49SWgFzg","Y5FRWIKTJIE","KW4Q2IHOR0E","Sl04mpJhS_8","hO9WWdmg2EQ","CqNJRd-Nr14","0oEgnnlubhY","cgLwQUxzYSY","cn-aP7V9XtE","LIpO9QDcdIo","XgLpmL9p3Go","734mEPj_BOQ","z90TqVf0LRI","E07D4D9wGJk","O-EFpZ616b0","uxsT7FkD_0g","iUDhEwuf3kw","gf0T41xaLOM","Zgv0bT5-MB0","-4coONQo15s","uU8-XPCV7zs","a9Qrq1wFZGY","3Rzbu9xmjjU","JyQRlk0Dlb8","zbnXFp3IgH4","8hROrkml2W0","63U67bMqFFU","Fwe1Sra6xOI","hEcI0WkMCX4","_lV9fhDs1mM","xyBUBtHj7AQ","lMcd4QY_68A","fBzLOOc7nIE","7w6mXL22F50","6uEEnbz9KDc","Xbqnlb5MSaI","3FA3ZSzBdV0","EwIYZ96AAIQ","Qm51tLjkMb0","k2weSLRUfg8","7pqxfgKCC4Y","kj7LWxVpLSg","Xc6zc53n5rw","vi0_kOBo4k0","iaaoqnZ9v-E","6uFVuM1im7E","7KGvrnexbfY","o43mnzOmltg","O9LLiJDZNU0","MG7bE_LvOrc","eiKSR-2AwJg","HQlb5zTBWj0","rZCZJ1DjL3s","9LrC22PuBXE","BREZTGNZXt0","l0zJKARr1u4","n3TThMCte1A","k6uNl-0gGp0","EeAUdaV0ndI","4zXfN7wz_rU","ph3qJtKW9aw","SNNwcnsid_8","2I3COsbJdvE","dU0o91P0dBI","tDaPqLjO0kU","_3R2d8ojNfY","HiWPb1c1uPI","4RM7c9SvRcU","HAuZN5t0p1Q","i2zzYXXW1ZI","Ap-d4TH5Iq4","uDC3AxFeVe4","-gB_YAX_gP4","q2YiwZJJ1CM","_kCAPJsPhgs","QzLWUZ-AlYc","N6vETHC8Xo8","5bC-714cW8I","Kb1POBpqxUg","pSU8dAlczPk","LBhRgtHSrk8","Ils_kgWY-pI","3yf0VflN_lg","GhQK7ohwdyM","ieUdTbLKrlA","vNZs---YQdA","ziOkjSGdLbI","jonGaaQBaf8","8yqWBj3JD8o","rTPL_wqYMPM","S0gpwNqCKRs","ZvB5G9SqQJw","ef67K3IPg3w","WwKNxML9W_E","atPor2EGuPc","KMfcDjgSpDY","3ozf6XbCEAM","mvptUP2xlRU","T5z0VlJ6E_k","F9TvpwaTHuw","WwwrstcIdHI","TyjLxbvwzUU","jMvI4JdtElc","QOeLCN8UxuY","Qg6B8Ocjk8U","MHmtyhCEnGw","1SDuXwsJzMw","BpWBd2toKKg","z37Op-5WEsI","7t5j_qdo_d8","61LJ_P7ys8E","9Eh82y_-Y8A","b45ME2XAhqo","VtIMG0TgEdM","O4EKIoLYpWI","2u6jeuPjYvE","QXB6PTkdinY","tnuhoQe7qdg","7jtU5-YSwB8","JPlE2EadGQ8","htbDXsvXc84","Hr0mSq2h-lc","e99ufVkcZRI","rIYvZCHPOJs","2At0vuGjU78","vBxeu3AK2gU","8wcAXwOpEhg","tQTHIfGRHMQ","LZhVZ52jero","g97Zejzw9eI","ahRNOXT3xcI","kl4YTsmo0H8","smwVLB8XEDU","jkVtTkbu_g4","BJ0UIW9Dtpw","X5UBndfOfqs","0oHK3v1jEiM","n7zNeXELRdU","4EwsQizA4cg","EnWNuUGoYFI","LFfKjhvz8jo","cATLBdNtL2c","3EEs6pUsZIU","jobQNBfUp0I","kJVLnC7b0Go","9fp2B9ZrfIQ","9yXYegd8hUA","KaJRpDw_jqs","yBtqjXUa7h4","fcOYpzzX2AA","mcAOaPQtArE","x_bQmZTCuLY","CYi-MmJqIlY","5nyLKmDWmEY","8eY09ziGk_A","UqfQ4SR3lcI","9179GWmkLC4","hvX-nNqPhL0","ewJB6w5hVto","oDVLX19netM","13rBbvzavz8","y4qn38666Vg","VdCyaTO-XiA","GcolKrgRqiQ","0HUgXE_9bE4","KO8EPOnQkzs","GQzaU7GPA5M","dRWQKnzxPGo","10c2ImYsfqI","OuQPEpezGAU","oS7KOm3XAhk","roySziTuPsg","K9bjuhJk4vY","sMrPYMhN2TY","p4_KlsI5RBg","I10wastBGCE","S5jJ-s6_uuU","UragBhQhVUs","Csm0_szLyig","VQIpD2MLTWg","9VQRZubQCYE","jKu-RMJy9nw","j_XLUh5OEfo","T_XaLXSKwbY","jHkrh1Hilxk","R_MogYjPbvc","PbfauD6ro5s","hmz8km9Sg6M","_Hro2zw02Mc","0yU-IgrBoT4","NDi9V_-sB5I","-4TZJhjrczI","PHEc-DPmik4","oUwR5TsXTfE","biIyWad0jkw","zXxjkQBGj0E","UaxBH8tFx0Q","e0-YYwXv2i8","6AdV-oxQ6qM","EGLqo--PeOM","eaAG3jgrheg","riQVb4uUQ9c","qLmd2w6YVgI","ukQvKKvvq2I","h9jePzXQqqw","4kseP9TvqTg","vBmSTxi0d34","fG-_IInUaV8","Bqpi01uBQ1s","tIm0u9Gpg8Y","li95F7R1hos","TCpMMg83G_0","HFPm-xrY7M0","j5-XVjZOCWM","uAHdlobFP70","UOFKGOJF5Uc","ULH1jtCEPn4","GL60kG7tQGY","2hqy3WeWobI","L42mJkNDwxo","l1D6eLco0jY","muj_oS0x2lU","GnO_Dp7jLjs","SJoinPmzm5o","YOThkcT8CBY","E7oG38LETyA","2bFgGsAEFBE","6z27kfkYXhc","DKaSIkwGO-I","Z6Kzhob9oxs","9lpAnbaYjDI","lDXAWxUPwtk","JqLeXsM0gzU","blBam2vvTy8","BetXB_huew8","5quaNMShZHA","ceuRq5RQv7k","krdUKpEJynA","sBy-ROll6eQ","YzThZdodLc8","b8Wf1ipUQhc","zfO5kAHTGM0","a3tfx-YRUMI","1DeO-0S7hdM","1Lq08P91L-A","tphYThZSbdY","xBj-vgkHfRs","MaQz3Tf71Nc","YZiN71ll2o8","ArwT31OTti8","ipKc3lJK2D0","s-bOl4ZZOB0","lcXLDrhgb1A","12BU1A7zEbI","ztcL4iKCark","zMk-kA3THzQ","3pzsW2T-PU8","zJbz4EOUOTQ","-C6ew2iSneQ","NmWrW4-p3uk","9y-l9DeKTgI","oSox4P1filc","2O696ourLrc","QVf9Bio5GuE","Oy5cITcEuCI","XnuQLpkUuAc","Lx-qycq4STY","0Dtc-T_7hvw","WVKhBXGLceM","zwi2COZsWio","jySNDGJhcGI","aobkEvPb6nA","3ZnXOQ6dkyk","DZZNGLTs044","R8UsH4E9YmA","5L8DJZiLtJY","8CphBxwpI9s","RPVx8x9f5N4","Ofc0buS0oK0","8ngC6NsA0lU","xDbqBUq92Nc","PTGriBHkHi0","XWOKxDomXpo","73-Jr4WCpk4","X_LAANp8iNU","g54aXhr9bIg","QBOSEb1RBcI","4lRlCckVKrM","kazJhtdJfW8","UFYjWVq6LcE","BGKwzwZfkK8","C2-a-dvbxdQ","4ZdKjYjWtcE","6oOaTyF7Nwc","W5Wp-mknzsA","2gYDIHaNYps","T1vaTN6NvDI","93Hd76_tavg","RpIf6WRcJMo","NEu4FGJRFeg","bTCSnVm1ehQ","q8W2wYJX3H0","BfOZwy6C8Nw","Ze1nC46nyDE","srMhEEcanmA","9MGoWl280VY","p8_4Mlkpcqo","yMywr3g-FiY","YqEmfu3rKIk","qVC05M-VLMs","QpN3CDhs580","woGxfQ5XwhI","s_tA2ZNyLhY","DWRNHV-OPfM","8_fuVRcxg9w","gTcnmHApBLk","fZnmmSzGTR8","hN7IIsytVOE","XMmeluTr-8w","MDWRdjE4Xdg","z-r6mlMUZJo","n1jXVOQ9tSM","R0dTHR3SrF4","UASp0j00ymw","mTQw-uF_Thc","220rwsAdYuA","IIKP3I-qzu4","ebXW56b-jwc","btQZaMLj5Uk","4MbKdqUPlvc","ehyzWtbrqrY","tK2L11pF3DA","vgdhntWo5OU","AAfs00XB8eQ","ntm0qSKzOXU","nfTn6M0p-NY","1dkNtOXlJ4s","_j8mZJOpjDA","AuSeRTKfl6g","LoviB0HVTKw","xKuecoCqakg","B7Y_um-Vp60","qHUHCZc7FvY","mCQF8g7Uf8o","4Pe0eYU1sSU","BWQBWizqCOI","32QjMnThvwo","OPGre1_Qqcg","T55R_fJPndc","z5TkqBwgxuk","0OLZPTk3zy0","LI72Rcg12EI","kbg1XcTMqAc","HDu9WQ13gPg","fB5BoFCkfw8","CKKKH79GMRM","xOat_wob9C4","QzCqXPXXaqU","17_sMwkW1Dg","b8_r_5eZJzg","-hnoSXIp-dY","3TyXRKkGutA","q6f9QQ6zacM","0KqkQtOgPko","9pz5vcw0qFc","Cemkdm-n7LA","9N07sh-XFcU","KdLXl7GwSk0","PL9ffp_KL6s","UyOOK9pOUII","Jt7AZQLKgMU","f5PIEV_nB50","gtW6kPOyEkQ","qyadnE-De8U","G2cwqiwSAtI","gOhXgbXtvWY","8BrEiys-kyo","b1ZqJnjrKyk","xG0pEWliczw","6PVRDRkri8k","lfHlu-r6B4A","f8oeyqUlyOM","1ref_XRIibk","xh1s1fSzNQ4","I9B90I36n30","jFt3te1E7Mo","U_CglHar770","EgpiOeFsBtQ","v5bx5uxdN2E","s82DZUhWzBE","Xkb4UaC2t-8","lWP-tbMjy1k","7byLZ9-Da_o","0bdXPIN6htI","RIL-zAsLIBg","dO83Gdd_VTU","8w0QlLrt48M","bEaKBr8N-Gg","WL7rh72N1KE","DTQM6Qzl9jk","pn0njqFsrQk","Xro5sqJQ-Jo","mwqhUEY5tgw","HRbnyKvnYWg","ZQJeHE45Eq8","H_LNMixz8sY","DP7QShSvC9Q","PuU0i9bkDZ8","LlzZquXUZTY","t5HmcPxZpR4","nntrKpq3yCY","28aCyhhFl-c","G322Edcfedc","4V-Ys6LX7FA","uiYS67UzDCQ","lJYwLv1tJ4Y","oK8LGZFO-zU","j1ocCLubpTk","wAwjSJaqXXo","8O0FXXYYIlw","GK_f4fYluMM","XDiYZehSPQg","_vSKW-w5FQc","Nbt7aLHvBA4","pXhHjxD8nIA","WQno429YdtY","gST58YIhPvc","szfnryl8qTA","SPVDb__Kz58","d_xYCgC4ZJE","oCzCAadMik8","bZ5r08FJSPE","KL-x7ReXces","WIjqfoGY_K4","gSa1GO-c2zM","cA-swVVkUgw","V8ONTCaG7xY","1a4UsJyowPE","KMx-kaBASmw","81Rhj_zbb1Q","Lo2B8cgb4BY","Zp8myQqIiYk","p7aci2cTIRY","szOIko7cL5o","Wym5b3rmz4Q","xrywL1uUp2k","TCfeIfjO9xc","c0IJ5TvIy5U","k6TgaAPX9_Y","7hoZCXyAu2w","QBtht_VamNs","CKucB2xxYA8","Yc7CJScWe2U","OtyCeCs_j1s","7cWn5RmWhkg","WQg1ocvTLTQ","7mufsDoZJ80","kvzgbOUy1nc","s0J_cOt-NpA","iF8KW841uEE","qjtu-8HRbEk","BTFSQKXLKgo","XxrOA8Ek-J0","4gD_84BV3Cc","j-haMugYkHg","12Bes3eb-hw","9DvIYICJF38","0_wP4oa8zJw","TrhVTB2oRqk","tjJDL8R8Sk0","GTOnNeX62wI","Op0W8Bta7aw","2JI5N6BPPoo","z7_PTrxywD0","KNVks_CG22s","tChb_DYoycE","b9WRNsb1kkU","r3CO4VSY2q4","-IC63lui9J4","c-l_L4Roq9w","_LaPJAx6zcQ","i5Qppw-4kVo","qDefcGDAaIk","XCVi47bH0dg","6cx85rMjPVc","j1H9VDE0N-c","Zs3DG1OqNis","6hLdOmWg9xc","zvow6p2rSJ0","NhqxWnr8rb4","wegBa02ylBc","JVAuT7OjgA4","TJ4D8JORtSU","JgrAAoZ6wS8","QybB7e9kXgI","Ed2pFwp3u5E","V8XFiRuXK5M","Qw_FSQ79HVo","sJWZO8qmf9g","DLwpL9Pmtqc","_vWG-cZtX5M","AT6g5LN2ZLI","k3eOpIWxz90","-lqAxeKcvF8","6-MS_EefUuk","TCeTBAFgVUE","rAyhu1ASiCI","3QUNgukBds4","ZHLNknKAsyc","BcKh7N1kKNI","VjGK0Ycdo1U","xzVK7_liyEE","4h_0kzoPkXQ","0QjJ_b4t4e8","u4g7xKAUqS0","zLOYwLUWE6A","KuG9XDhcPxw","Bn1EMJXkTNA","xF-tvL0re6Q","mxpJvQlDZv0","6-yDFX05yvo","lVcmtXQFHTc","MxojVzkFUHE","SAYDEYrg5wE","4K93GKlshO8","Ro-i4r4tfd4","P0qy-g0NJ_Y","7s1NIekeP34","-CKXFCJ9zJw","GLEFnp8fLbM","heLlnloLbcc","lPfI8QpCDXE","LJ86yuU44eU","nACN2JgF8qA","9BefNrf7788","3EKMHA8409g","BJzz_jLTpBs","e2v0ApEygok"] diff --git a/airflow/inputfiles/urls.rt100.json b/airflow/inputfiles/urls.rt100.json deleted file mode 100644 index d7b9d15..0000000 --- a/airflow/inputfiles/urls.rt100.json +++ /dev/null @@ -1,101 +0,0 @@ -[ -"https://www.youtube.com/watch?v=Y0WQdA4srb0", -"https://www.youtube.com/watch?v=uFyraEVj848", -"https://www.youtube.com/watch?v=VxPx0Qjgbos", -"https://www.youtube.com/watch?v=FuKOn-_rfeE", -"https://www.youtube.com/watch?v=mn9t5eOs30c", -"https://www.youtube.com/watch?v=7YOE0GEUrVo", -"https://www.youtube.com/watch?v=4L8kv6qVTfY", -"https://www.youtube.com/watch?v=7WSEWOft4Y4", -"https://www.youtube.com/watch?v=bmDsn0_1-f0", -"https://www.youtube.com/watch?v=IILtHOqYndA", -"https://www.youtube.com/watch?v=tyGqbWBjSWE", -"https://www.youtube.com/watch?v=3tgZTpkZQkQ", -"https://www.youtube.com/watch?v=JJH-CkjiQWI", -"https://www.youtube.com/watch?v=4hLWn4hHKNM", -"https://www.youtube.com/watch?v=IFwr6QGxoJo", -"https://www.youtube.com/watch?v=Fj-NKUoMbmI", -"https://www.youtube.com/watch?v=zvoxV3wLjFE", -"https://www.youtube.com/watch?v=EcC4CIyUI2Q", -"https://www.youtube.com/watch?v=jtjiTuTKCT4", -"https://www.youtube.com/watch?v=am28qDtXLLU", -"https://www.youtube.com/watch?v=WNVW86YBkMg", -"https://www.youtube.com/watch?v=kG51upknRCw", -"https://www.youtube.com/watch?v=E-HpdWghf2U", -"https://www.youtube.com/watch?v=GuaAOc9ZssE", -"https://www.youtube.com/watch?v=r1JkW0zfPOA", -"https://www.youtube.com/watch?v=OBYmpN8uAag", -"https://www.youtube.com/watch?v=0HuGAMKHXD4", -"https://www.youtube.com/watch?v=eDmdalDaPdU", -"https://www.youtube.com/watch?v=ZjDR1XMd904", -"https://www.youtube.com/watch?v=HGrsrP4idE8", -"https://www.youtube.com/watch?v=l-J_J7YFDYY", -"https://www.youtube.com/watch?v=Kr5rl0935K4", -"https://www.youtube.com/watch?v=KgK4bu9O384", -"https://www.youtube.com/watch?v=BDq3_y4mXYo", -"https://www.youtube.com/watch?v=slRiaDz12m8", -"https://www.youtube.com/watch?v=iX1oWEsHh0A", -"https://www.youtube.com/watch?v=0zJcsxB6-UU", -"https://www.youtube.com/watch?v=NTOokrCHzJA", -"https://www.youtube.com/watch?v=CXYXqQ-VuYo", -"https://www.youtube.com/watch?v=xaxZtPTEraU", -"https://www.youtube.com/watch?v=wX1wNCPZdE8", -"https://www.youtube.com/watch?v=DOt7ckIGN4Y", -"https://www.youtube.com/watch?v=bncasw-Z4Ow", -"https://www.youtube.com/watch?v=nbVWfXlo7kQ", -"https://www.youtube.com/watch?v=Uu6DmhonkEE", -"https://www.youtube.com/watch?v=HGWigeoSMvA", -"https://www.youtube.com/watch?v=rjbLCaC9yFE", -"https://www.youtube.com/watch?v=Uew7f09gW4o", -"https://www.youtube.com/watch?v=uzc-jLt65mY", -"https://www.youtube.com/watch?v=ZX7qnLuAsMU", -"https://www.youtube.com/watch?v=ZlSgDvCP5UI", -"https://www.youtube.com/watch?v=RmGIid7Yctw", -"https://www.youtube.com/watch?v=u9g0_eR5gEk", -"https://www.youtube.com/watch?v=wu9Cw905NUU", -"https://www.youtube.com/watch?v=cNhQVoY5V5Q", -"https://www.youtube.com/watch?v=I63iJNKOb8I", -"https://www.youtube.com/watch?v=3G5ceoSK6jg", -"https://www.youtube.com/watch?v=JF4TbV940PM", -"https://www.youtube.com/watch?v=0yGaVHfmGa0", -"https://www.youtube.com/watch?v=r8cgtI_ZQIY", -"https://www.youtube.com/watch?v=OcG3-r98XEM", -"https://www.youtube.com/watch?v=w7hooOUEMQI", -"https://www.youtube.com/watch?v=yipW8SF5Gxk", -"https://www.youtube.com/watch?v=LH4PqRiuxts", -"https://www.youtube.com/watch?v=IfAsA3ezUqQ", -"https://www.youtube.com/watch?v=5cUg8I0yps4", -"https://www.youtube.com/watch?v=lCea6bQj3eg", -"https://www.youtube.com/watch?v=5Ie0MAv4XCY", -"https://www.youtube.com/watch?v=57eomGPy1PU", -"https://www.youtube.com/watch?v=TEnk3OfU8Gc", -"https://www.youtube.com/watch?v=1uA4xXlDhvE", -"https://www.youtube.com/watch?v=aXF8ijpn4bM", -"https://www.youtube.com/watch?v=3vKmCDomyJ8", -"https://www.youtube.com/watch?v=z7jLEWJ59uY", -"https://www.youtube.com/watch?v=0TTsKnyH6EY", -"https://www.youtube.com/watch?v=PcqA6Y1RfVQ", -"https://www.youtube.com/watch?v=f1Ar3ydryqc", -"https://www.youtube.com/watch?v=N2nLayOIjxM", -"https://www.youtube.com/watch?v=Cziyx9qaYVM", -"https://www.youtube.com/watch?v=RTJCbIJ294w", -"https://www.youtube.com/watch?v=GC1FB-bZTvA", -"https://www.youtube.com/watch?v=kKYv5uLBSFk", -"https://www.youtube.com/watch?v=jfQHlnNeKzw", -"https://www.youtube.com/watch?v=J7e8PRu9kSU", -"https://www.youtube.com/watch?v=UoHf6pdy0oE", -"https://www.youtube.com/watch?v=JOwNcwSupXs", -"https://www.youtube.com/watch?v=gxwk-bb78-U", -"https://www.youtube.com/watch?v=_lrDwiK544A", -"https://www.youtube.com/watch?v=6i8BVQ9GE1g", -"https://www.youtube.com/watch?v=8c_l9D1qyKY", -"https://www.youtube.com/watch?v=KFCr5BdjFB8", -"https://www.youtube.com/watch?v=orEvHn7lL4A", -"https://www.youtube.com/watch?v=6BhGJxrp8P4", -"https://www.youtube.com/watch?v=n2t8beFnhyA", -"https://www.youtube.com/watch?v=GJzZ2-f_k30", -"https://www.youtube.com/watch?v=oId850O591s", -"https://www.youtube.com/watch?v=f2XmdQdwppw", -"https://www.youtube.com/watch?v=iWM_oe-JY_k", -"https://www.youtube.com/watch?v=GHEDWE9LjRY" -] \ No newline at end of file diff --git a/airflow/inputfiles/urls.rt250_01.txt b/airflow/inputfiles/urls.rt250_01.txt deleted file mode 100644 index 38a32c5..0000000 --- a/airflow/inputfiles/urls.rt250_01.txt +++ /dev/null @@ -1,236 +0,0 @@ -https://www.youtube.com/watch?v=W_ovLaoQ5pI -https://www.youtube.com/watch?v=KnDAl7BqOq0 -https://www.youtube.com/watch?v=Ixwy1_Y1wPE -https://www.youtube.com/watch?v=oYHN2vgcpls -https://www.youtube.com/watch?v=WwYtzky4mjs -https://www.youtube.com/watch?v=dc7-aDyAeL4 -https://www.youtube.com/watch?v=XQDrjYTFVfE -https://www.youtube.com/watch?v=F0s9IJR4CN4 -https://www.youtube.com/watch?v=qrZ7RVXHdzo -https://www.youtube.com/watch?v=gwOjcuexMWU -https://www.youtube.com/watch?v=B4gVYr_9XP4 -https://www.youtube.com/watch?v=9bjUH5xViPE -https://www.youtube.com/watch?v=kbVZoqBfjCo -https://www.youtube.com/watch?v=hWXnlqOatS8 -https://www.youtube.com/watch?v=0izQuMMfIIo -https://www.youtube.com/watch?v=1x32zwHrg8s -https://www.youtube.com/watch?v=uzU1fxHS2dU -https://www.youtube.com/watch?v=qckyrafr4W4 -https://www.youtube.com/watch?v=tuvMl-Gbs1E -https://www.youtube.com/watch?v=uByH48D79KI -https://www.youtube.com/watch?v=qJ9qia29Kwk -https://www.youtube.com/watch?v=17DIjWMYBNs -https://www.youtube.com/watch?v=Ex9z0iIVluU -https://www.youtube.com/watch?v=DJP67SSAM9A -https://www.youtube.com/watch?v=OUe2oNykheg -https://www.youtube.com/watch?v=CgpuKqQl1tU -https://www.youtube.com/watch?v=4RNiyOZexag -https://www.youtube.com/watch?v=d4d8B-axCJU -https://www.youtube.com/watch?v=AMkVydmdLDU -https://www.youtube.com/watch?v=s6D1xf3VCCs -https://www.youtube.com/watch?v=ns1u61Zrzzk -https://www.youtube.com/watch?v=Ysqesg9CQ94 -https://www.youtube.com/watch?v=_VvNRtc3K0w -https://www.youtube.com/watch?v=zt-wA67nfJA -https://www.youtube.com/watch?v=bProjpkgXn8 -https://www.youtube.com/watch?v=8Gb8ccijtxI -https://www.youtube.com/watch?v=iFJJ_7_SUns -https://www.youtube.com/watch?v=9HBlnMlGSpI -https://www.youtube.com/watch?v=ioTF6x9mhz0 -https://www.youtube.com/watch?v=k1pdrHKb3P4 -https://www.youtube.com/watch?v=wprhAP6S7MY -https://www.youtube.com/watch?v=WFKKd_o89wI -https://www.youtube.com/watch?v=w59IixVPPUs -https://www.youtube.com/watch?v=9XeYW0l7JKU -https://www.youtube.com/watch?v=HGSIM6WiIMo -https://www.youtube.com/watch?v=SGXZJ8z8QHg -https://www.youtube.com/watch?v=IsqDx71KLyo -https://www.youtube.com/watch?v=NXOHkWf_sTY -https://www.youtube.com/watch?v=ICYe8ZSy0LQ -https://www.youtube.com/watch?v=GMKd0gjPCsU -https://www.youtube.com/watch?v=fbZYCKUCJs4 -https://www.youtube.com/watch?v=br1jufv9rW0 -https://www.youtube.com/watch?v=Y_flpHI4uEw -https://www.youtube.com/watch?v=FlVTAEpf53s -https://www.youtube.com/watch?v=fN6olULFwTA -https://www.youtube.com/watch?v=EsXZIyx9fmc -https://www.youtube.com/watch?v=s-TQlPdyoiE -https://www.youtube.com/watch?v=GyczQSFPTfQ -https://www.youtube.com/watch?v=ER1qbc1pMzs -https://www.youtube.com/watch?v=ceyVCBX-wGc -https://www.youtube.com/watch?v=BcNvEGIWolk -https://www.youtube.com/watch?v=15idHcwUAfk -https://www.youtube.com/watch?v=WrC6SRasFnU -https://www.youtube.com/watch?v=lBzdu8tLe-I -https://www.youtube.com/watch?v=T7U3BOIwVNc -https://www.youtube.com/watch?v=o9wi6Tt4Z3w -https://www.youtube.com/watch?v=rDKH2dhVE_c -https://www.youtube.com/watch?v=hgzkDHhImoI -https://www.youtube.com/watch?v=3Nz1Vym36ak -https://www.youtube.com/watch?v=TCwcSAZkppc -https://www.youtube.com/watch?v=btS5Kzy_zB0 -https://www.youtube.com/watch?v=csimc-rbrhE -https://www.youtube.com/watch?v=TQymjBogMlw -https://www.youtube.com/watch?v=iMkXhj31UKQ -https://www.youtube.com/watch?v=T0Gdsnmffro -https://www.youtube.com/watch?v=2z6HSIypJ_w -https://www.youtube.com/watch?v=oX_3TTyEnKk -https://www.youtube.com/watch?v=89yqiHuBvQQ -https://www.youtube.com/watch?v=eLympVyGQQI -https://www.youtube.com/watch?v=Gy67TZyPn2Q -https://www.youtube.com/watch?v=qw8437Em-3k -https://www.youtube.com/watch?v=z7Y8KiT7bPk -https://www.youtube.com/watch?v=ojJhtn1mEGg -https://www.youtube.com/watch?v=D1Cic7Uc0ns -https://www.youtube.com/watch?v=-63519KiUqM -https://www.youtube.com/watch?v=qJfHfBR20-g -https://www.youtube.com/watch?v=QwbNv-bG9oA -https://www.youtube.com/watch?v=nwfiyKx9x18 -https://www.youtube.com/watch?v=L3aPsthTBW8 -https://www.youtube.com/watch?v=zjfCX6bpUFg -https://www.youtube.com/watch?v=2hdR_bpvjDM -https://www.youtube.com/watch?v=leoxUrT9DJg -https://www.youtube.com/watch?v=fongJ-r7Uac -https://www.youtube.com/watch?v=vas0e7e8bmI -https://www.youtube.com/watch?v=MRrUq0oqFmo -https://www.youtube.com/watch?v=lgWMCCYXqGg -https://www.youtube.com/watch?v=hwqXFvpujRs -https://www.youtube.com/watch?v=V3ZWVvrmPvw -https://www.youtube.com/watch?v=gP2QkIJaQHE -https://www.youtube.com/watch?v=7U1Q4tscmUU -https://www.youtube.com/watch?v=T5oO9HYyT_8 -https://www.youtube.com/watch?v=N7St23woljA -https://www.youtube.com/watch?v=_iWSQ3XD_eQ -https://www.youtube.com/watch?v=ev1urHanjCo -https://www.youtube.com/watch?v=ebvVkT_gHOQ -https://www.youtube.com/watch?v=IUrylOsLD6A -https://www.youtube.com/watch?v=aJ2DWpOhM98 -https://www.youtube.com/watch?v=HldN0Atn5LA -https://www.youtube.com/watch?v=f6eY71i7TfI -https://www.youtube.com/watch?v=m1A5aOGYGM8 -https://www.youtube.com/watch?v=US0in27JPv4 -https://www.youtube.com/watch?v=SWAWTrsXH5E -https://www.youtube.com/watch?v=0K6F8TJowCw -https://www.youtube.com/watch?v=O-uAmbRDCjQ -https://www.youtube.com/watch?v=TdqayW3Yhus -https://www.youtube.com/watch?v=mZ1Gieg2PbU -https://www.youtube.com/watch?v=D7DoRpB_p7g -https://www.youtube.com/watch?v=y2j03DYoC9k -https://www.youtube.com/watch?v=H6UwY_jvIkg -https://www.youtube.com/watch?v=jPHdMovcsno -https://www.youtube.com/watch?v=Ui9ioQhlYB8 -https://www.youtube.com/watch?v=16A9rW-bYOw -https://www.youtube.com/watch?v=0HjKLqPZlk8 -https://www.youtube.com/watch?v=KFcUjf9pJzE -https://www.youtube.com/watch?v=qslMthxSRWU -https://www.youtube.com/watch?v=Jt7haujk3sk -https://www.youtube.com/watch?v=MJK7NX0E2_4 -https://www.youtube.com/watch?v=OMdjNk3aQdk -https://www.youtube.com/watch?v=n4HCQrbYc_w -https://www.youtube.com/watch?v=3gFC-igZPr8 -https://www.youtube.com/watch?v=aqS6aduySeo -https://www.youtube.com/watch?v=ylLsucs0PRY -https://www.youtube.com/watch?v=-hIPIMAAk9E -https://www.youtube.com/watch?v=SHjD3xOkWac -https://www.youtube.com/watch?v=FxiWcpW1hOc -https://www.youtube.com/watch?v=s-wpgAK-fzg -https://www.youtube.com/watch?v=82XT7UQbF-w -https://www.youtube.com/watch?v=4bsjF-d9ODc -https://www.youtube.com/watch?v=LPfOH1_9gYU -https://www.youtube.com/watch?v=ITD6zT6SNZo -https://www.youtube.com/watch?v=mBSP343k7Xk -https://www.youtube.com/watch?v=SpXgj9PI1FI -https://www.youtube.com/watch?v=xkiqMGZEYbc -https://www.youtube.com/watch?v=ph2UXTChSsw -https://www.youtube.com/watch?v=Lq0cra_cqLc -https://www.youtube.com/watch?v=W1SS9Yt4PNI -https://www.youtube.com/watch?v=yV5g1sufBVI -https://www.youtube.com/watch?v=y7jNwdmysbo -https://www.youtube.com/watch?v=3i4Q9EgSuA8 -https://www.youtube.com/watch?v=-j7C5MfDXrA -https://www.youtube.com/watch?v=BkSglsAO7-w -https://www.youtube.com/watch?v=05dqwprWsnc -https://www.youtube.com/watch?v=b2xwjdv5nxY -https://www.youtube.com/watch?v=06RshyLtUic -https://www.youtube.com/watch?v=Mm1DH0lAtQs -https://www.youtube.com/watch?v=OfxsLW1ZUsk -https://www.youtube.com/watch?v=_AZs4CG7CbA -https://www.youtube.com/watch?v=RzZRssOgH7A -https://www.youtube.com/watch?v=Mqe8ZhqIISI -https://www.youtube.com/watch?v=6w0qYD46Afo -https://www.youtube.com/watch?v=YVtFh0283YU -https://www.youtube.com/watch?v=jCCH685ldpg -https://www.youtube.com/watch?v=Ut1_9Ma9fZg -https://www.youtube.com/watch?v=pjV_fCpJgLc -https://www.youtube.com/watch?v=a4NtRH9sZLk -https://www.youtube.com/watch?v=XjVj9wipu70 -https://www.youtube.com/watch?v=CWMhNCPMXeI -https://www.youtube.com/watch?v=MC9YYtWLadQ -https://www.youtube.com/watch?v=7yI1tP5oWQw -https://www.youtube.com/watch?v=wxh9zh-ygig -https://www.youtube.com/watch?v=Ul3JY18tiJ0 -https://www.youtube.com/watch?v=suj-r9RCMGY -https://www.youtube.com/watch?v=AbL6ZGOjc_M -https://www.youtube.com/watch?v=VGjHw351GdU -https://www.youtube.com/watch?v=32W16gzB3E8 -https://www.youtube.com/watch?v=PTujVtVZD-c -https://www.youtube.com/watch?v=SFbnmsMa_i0 -https://www.youtube.com/watch?v=X_FpFtE9mGM -https://www.youtube.com/watch?v=g6xy8KXaxDE -https://www.youtube.com/watch?v=UO6BR3rXpHs -https://www.youtube.com/watch?v=-a14SXc9ERk -https://www.youtube.com/watch?v=Hl73pJhS1Jk -https://www.youtube.com/watch?v=wXgLpByOcos -https://www.youtube.com/watch?v=FH3nQzkKc08 -https://www.youtube.com/watch?v=wFYAbEfajd0 -https://www.youtube.com/watch?v=zcyM1HzB4OY -https://www.youtube.com/watch?v=cLXZ9INHYyI -https://www.youtube.com/watch?v=Iq7Sjbcw5Ek -https://www.youtube.com/watch?v=kgX5IcFE2HE -https://www.youtube.com/watch?v=Dnq4p0BZ1zA -https://www.youtube.com/watch?v=bitMfdK4mAE -https://www.youtube.com/watch?v=HWqLp-gGOPw -https://www.youtube.com/watch?v=g6DFwFhfqSE -https://www.youtube.com/watch?v=81VXi1v_6Gg -https://www.youtube.com/watch?v=sCf09W7u_as -https://www.youtube.com/watch?v=MIRmwYQ0pnw -https://www.youtube.com/watch?v=dT0PJCDY-WY -https://www.youtube.com/watch?v=Tia6NFKI29c -https://www.youtube.com/watch?v=ZEH4XMI2gi0 -https://www.youtube.com/watch?v=JFC-hkuLwz8 -https://www.youtube.com/watch?v=xfjVsnGk92M -https://www.youtube.com/watch?v=aeF2hf_R2h0 -https://www.youtube.com/watch?v=D_j_k8nWY3g -https://www.youtube.com/watch?v=lvO8Dq1yORA -https://www.youtube.com/watch?v=tvovFzmiF6E -https://www.youtube.com/watch?v=KUsI9cxtJPU -https://www.youtube.com/watch?v=vgLVkrAnBLI -https://www.youtube.com/watch?v=M_ofMDC-FEQ -https://www.youtube.com/watch?v=O4gqva5ROqw -https://www.youtube.com/watch?v=DTgN-m3lAY4 -https://www.youtube.com/watch?v=bqgdyYcM4_Q -https://www.youtube.com/watch?v=hNCpvI-d6Fk -https://www.youtube.com/watch?v=va96DIxvE44 -https://www.youtube.com/watch?v=ZOwqc5DjkDk -https://www.youtube.com/watch?v=1i6UAXkjy9A -https://www.youtube.com/watch?v=gbaxWdn_Uq0 -https://www.youtube.com/watch?v=ygjE8I2k5m8 -https://www.youtube.com/watch?v=U6TUDhJ4KF4 -https://www.youtube.com/watch?v=-OSZqBAF-ck -https://www.youtube.com/watch?v=MXTnUTdBLaU -https://www.youtube.com/watch?v=38JAI9MIprU -https://www.youtube.com/watch?v=VE7TqzCQypI -https://www.youtube.com/watch?v=TdkXjlJiQq4 -https://www.youtube.com/watch?v=TTQ6N9GNeGo -https://www.youtube.com/watch?v=JnTv_K8ah0E -https://www.youtube.com/watch?v=kNI1tYegCZY -https://www.youtube.com/watch?v=Tc3A1vJf4Rg -https://www.youtube.com/watch?v=xBvINP1ddSo -https://www.youtube.com/watch?v=p-Z-cx-43eA -https://www.youtube.com/watch?v=0sqjAko-vgI -https://www.youtube.com/watch?v=W0i1_RaLrho -https://www.youtube.com/watch?v=2G3RFYBcHds -https://www.youtube.com/watch?v=Yirxj1qPBnU -https://www.youtube.com/watch?v=_e9JfXsM9ks -https://www.youtube.com/watch?v=V-UlKut8NbU -https://www.youtube.com/watch?v=C7D19AiYG4c \ No newline at end of file diff --git a/airflow/inputfiles/urls.rt250_02.txt b/airflow/inputfiles/urls.rt250_02.txt deleted file mode 100644 index 227ae1e..0000000 --- a/airflow/inputfiles/urls.rt250_02.txt +++ /dev/null @@ -1,237 +0,0 @@ -https://www.youtube.com/watch?v=Qj_u_rAgqDU -https://www.youtube.com/watch?v=ftObtlgqcFM -https://www.youtube.com/watch?v=wa02oKjBgvA -https://www.youtube.com/watch?v=yPjuAumM7g8 -https://www.youtube.com/watch?v=lpGq2LRqwAk -https://www.youtube.com/watch?v=jeD7tHfacHw -https://www.youtube.com/watch?v=CIxVCg71xZo -https://www.youtube.com/watch?v=gyKj3b-MbAQ -https://www.youtube.com/watch?v=yTCAjBGiUTg -https://www.youtube.com/watch?v=rNtMzj0thHg -https://www.youtube.com/watch?v=eDf4QdHkM5A -https://www.youtube.com/watch?v=KF7elMYIiyk -https://www.youtube.com/watch?v=tfoyuy5jQoc -https://www.youtube.com/watch?v=ZoXnm8dR3VU -https://www.youtube.com/watch?v=eGmo34B_OVo -https://www.youtube.com/watch?v=OR4pk7e4KbA -https://www.youtube.com/watch?v=cSVcsJ6jK4Y -https://www.youtube.com/watch?v=Ye1_bZGicWU -https://www.youtube.com/watch?v=n98hMqGK16k -https://www.youtube.com/watch?v=gXmfUJhgvAg -https://www.youtube.com/watch?v=ZHaZsBQzycY -https://www.youtube.com/watch?v=SfEQCvh6OmI -https://www.youtube.com/watch?v=xqe3MAkyGVc -https://www.youtube.com/watch?v=mc6BvRRyN8M -https://www.youtube.com/watch?v=wzPxLW7GZr0 -https://www.youtube.com/watch?v=zLJd9PBomIA -https://www.youtube.com/watch?v=DlLER38zpq4 -https://www.youtube.com/watch?v=lg9hBws5KS4 -https://www.youtube.com/watch?v=pSgRbPFNgj4 -https://www.youtube.com/watch?v=gX8tm4sP1qY -https://www.youtube.com/watch?v=-2EYqmuGLLM -https://www.youtube.com/watch?v=kKTq3Ndpu7E -https://www.youtube.com/watch?v=KvsOV5hHnq4 -https://www.youtube.com/watch?v=DPuK9pasFDA -https://www.youtube.com/watch?v=zQVSEIb4uJ4 -https://www.youtube.com/watch?v=AuupjeyKLnw -https://www.youtube.com/watch?v=-iDaJ1KO8A0 -https://www.youtube.com/watch?v=OheF39Zcees -https://www.youtube.com/watch?v=kqjOVTQlGrI -https://www.youtube.com/watch?v=QcLD4KdJkKA -https://www.youtube.com/watch?v=bHngc3m0Xdk -https://www.youtube.com/watch?v=Ti8ZnrOD5_0 -https://www.youtube.com/watch?v=dIUNVVnFC0U -https://www.youtube.com/watch?v=6Mc1Q7Ii55c -https://www.youtube.com/watch?v=kwxDk9nT9J4 -https://www.youtube.com/watch?v=B9WBEPkNf-w -https://www.youtube.com/watch?v=1Pt5Zrakvdg -https://www.youtube.com/watch?v=dsiu7kXFBI8 -https://www.youtube.com/watch?v=AQiIRKhgFHE -https://www.youtube.com/watch?v=geFIHaBoKaY -https://www.youtube.com/watch?v=QA4a9Db8m88 -https://www.youtube.com/watch?v=OIFU4k1f0Ec -https://www.youtube.com/watch?v=2iSBnEwWwjo -https://www.youtube.com/watch?v=VcWAOEoue1Y -https://www.youtube.com/watch?v=rdR7_4da4Js -https://www.youtube.com/watch?v=hOkvzOkipaM -https://www.youtube.com/watch?v=wLOR5mlx7VY -https://www.youtube.com/watch?v=PFcDLcK_zcY -https://www.youtube.com/watch?v=cP8Q7DIl3nI -https://www.youtube.com/watch?v=x_pJksDZSzU -https://www.youtube.com/watch?v=BeOF0c-EzIQ -https://www.youtube.com/watch?v=7PQ2uRSFd94 -https://www.youtube.com/watch?v=v_ZmsRnDmsw -https://www.youtube.com/watch?v=2Y-9Rznk8ug -https://www.youtube.com/watch?v=U6flSitpCM0 -https://www.youtube.com/watch?v=VVDf4mcyPAw -https://www.youtube.com/watch?v=m8jrjn64MVk -https://www.youtube.com/watch?v=rq8chzZeDpo -https://www.youtube.com/watch?v=e9c6Is5-XYM -https://www.youtube.com/watch?v=SxTrAm_2oT8 -https://www.youtube.com/watch?v=tSy7g2s9_eo -https://www.youtube.com/watch?v=zxs7UeUJr0s -https://www.youtube.com/watch?v=FvryEetPxrI -https://www.youtube.com/watch?v=o9qn_UHBKQ0 -https://www.youtube.com/watch?v=PBNpVOwoXLY -https://www.youtube.com/watch?v=PpMPvuSX1CY -https://www.youtube.com/watch?v=dqjASGYlWRU -https://www.youtube.com/watch?v=DGfo_K6NTwo -https://www.youtube.com/watch?v=WpUpTVFW3S4 -https://www.youtube.com/watch?v=dCgjywvszFE -https://www.youtube.com/watch?v=FuW3lMJF2zA -https://www.youtube.com/watch?v=bKaU95ceeUw -https://www.youtube.com/watch?v=Ynwqt_R3faM -https://www.youtube.com/watch?v=td70vUbqAgw -https://www.youtube.com/watch?v=9ZwKVhtzFM4 -https://www.youtube.com/watch?v=xAvwjZxkp_s -https://www.youtube.com/watch?v=FlwadWqd9jY -https://www.youtube.com/watch?v=grosmlJJpOQ -https://www.youtube.com/watch?v=8tj04EuSuR8 -https://www.youtube.com/watch?v=bCdkBP6nYrY -https://www.youtube.com/watch?v=9BbMwzKy7pY -https://www.youtube.com/watch?v=0A55FZ5R0MI -https://www.youtube.com/watch?v=S7Z5XDc5X3I -https://www.youtube.com/watch?v=sWO5gY7UbKM -https://www.youtube.com/watch?v=UrkAAASpCis -https://www.youtube.com/watch?v=iAiQBMEeeV4 -https://www.youtube.com/watch?v=XnRekopCpZ0 -https://www.youtube.com/watch?v=bnstqG8YJ-E -https://www.youtube.com/watch?v=dk5UlOS6IYI -https://www.youtube.com/watch?v=uelzEzmIhh0 -https://www.youtube.com/watch?v=gq1pUYxILOc -https://www.youtube.com/watch?v=OgXkB9S_GmA -https://www.youtube.com/watch?v=mvGD7RRehaI -https://www.youtube.com/watch?v=s3df_PR0x7Y -https://www.youtube.com/watch?v=mRDmsxKQurs -https://www.youtube.com/watch?v=bhKN_KOeWhI -https://www.youtube.com/watch?v=EStYpTS-TRU -https://www.youtube.com/watch?v=357YonN45w0 -https://www.youtube.com/watch?v=UGJfPbOpiCA -https://www.youtube.com/watch?v=1F6uYuHgOdI -https://www.youtube.com/watch?v=PvsqLRbCJlA -https://www.youtube.com/watch?v=P96IOk9mQgk -https://www.youtube.com/watch?v=M5EqG9d-3Ug -https://www.youtube.com/watch?v=R6eDBa6UjmY -https://www.youtube.com/watch?v=CZvUQEU2cvs -https://www.youtube.com/watch?v=giazbLbDdv0 -https://www.youtube.com/watch?v=JFoI12_47ck -https://www.youtube.com/watch?v=q5dZ396lYbk -https://www.youtube.com/watch?v=McPkR_D7zI0 -https://www.youtube.com/watch?v=774oBwazxHw -https://www.youtube.com/watch?v=-_5AipO_dfw -https://www.youtube.com/watch?v=MnlU4BPrLuk -https://www.youtube.com/watch?v=24LuuQH4hnc -https://www.youtube.com/watch?v=e4ivBc0l7Ok -https://www.youtube.com/watch?v=S4ff7HgfULA -https://www.youtube.com/watch?v=AxhJcEndmjs -https://www.youtube.com/watch?v=NdeCQFd2blY -https://www.youtube.com/watch?v=xgl4ltsE_8E -https://www.youtube.com/watch?v=in5xKqvxrAk -https://www.youtube.com/watch?v=TAU_0EpXBgQ -https://www.youtube.com/watch?v=3DbMqaactuU -https://www.youtube.com/watch?v=BWuY55TfChs -https://www.youtube.com/watch?v=41ecD9culo4 -https://www.youtube.com/watch?v=kYV8Q5UpDTw -https://www.youtube.com/watch?v=wOQlIDXHkD4 -https://www.youtube.com/watch?v=vMXVse5OuFI -https://www.youtube.com/watch?v=Fem5C3R60Sg -https://www.youtube.com/watch?v=OhySOErdxjM -https://www.youtube.com/watch?v=KC09gbct8u4 -https://www.youtube.com/watch?v=bJ4vPNkjRdE -https://www.youtube.com/watch?v=RvBrUzLugjA -https://www.youtube.com/watch?v=QuNxtlXivBk -https://www.youtube.com/watch?v=yz6OjqZfdLM -https://www.youtube.com/watch?v=CoOLkzZCcGE -https://www.youtube.com/watch?v=FecXRY-8IPw -https://www.youtube.com/watch?v=KUQBqesn-6M -https://www.youtube.com/watch?v=NAF9kveijEA -https://www.youtube.com/watch?v=hl4j6E-ICco -https://www.youtube.com/watch?v=yg5tpHvElvM -https://www.youtube.com/watch?v=ZddCBXb10hw -https://www.youtube.com/watch?v=hTKjjdN8MGQ -https://www.youtube.com/watch?v=X9hHQaYj7Lo -https://www.youtube.com/watch?v=P0B-L66ffLw -https://www.youtube.com/watch?v=5mQ4hc8Uvn8 -https://www.youtube.com/watch?v=KaPSeF592h0 -https://www.youtube.com/watch?v=xerdSyr3sSU -https://www.youtube.com/watch?v=ZW2jcFuHdhA -https://www.youtube.com/watch?v=ek3TqzF-KVE -https://www.youtube.com/watch?v=sMT8I7qIoZs -https://www.youtube.com/watch?v=hPvr-qxf52s -https://www.youtube.com/watch?v=__Ier-gibdA -https://www.youtube.com/watch?v=Wqs0Im26Bfg -https://www.youtube.com/watch?v=auk6LFmPgC8 -https://www.youtube.com/watch?v=OEX2aUVFYNI -https://www.youtube.com/watch?v=i_suF4tWuj4 -https://www.youtube.com/watch?v=Gmy9pDH26do -https://www.youtube.com/watch?v=ktXSBjJdd5Q -https://www.youtube.com/watch?v=p3kzKLus9yg -https://www.youtube.com/watch?v=tB2l4wTK4OE -https://www.youtube.com/watch?v=gm4XxHSJePc -https://www.youtube.com/watch?v=uy7z2ywGb8c -https://www.youtube.com/watch?v=OmfINsA961s -https://www.youtube.com/watch?v=8impJJlnKS8 -https://www.youtube.com/watch?v=aKCzJoP2bsY -https://www.youtube.com/watch?v=Q0jNgwJDXYk -https://www.youtube.com/watch?v=ratCJH1TN9Y -https://www.youtube.com/watch?v=kpiCo2tDedQ -https://www.youtube.com/watch?v=Jxsj2VSYp_I -https://www.youtube.com/watch?v=FeS1TqWJLqE -https://www.youtube.com/watch?v=xJJnQWo50lA -https://www.youtube.com/watch?v=FMqeu-2OCC8 -https://www.youtube.com/watch?v=wHaVTysBL9U -https://www.youtube.com/watch?v=lfmVNlorAV8 -https://www.youtube.com/watch?v=mD1d0YLwbHQ -https://www.youtube.com/watch?v=BZHKlc3N_wA -https://www.youtube.com/watch?v=7X4vxF9V9PE -https://www.youtube.com/watch?v=s_ftU_N-KAc -https://www.youtube.com/watch?v=LMXj3C2JhdA -https://www.youtube.com/watch?v=iq6sC58oSMo -https://www.youtube.com/watch?v=ZV3e4CtYltc -https://www.youtube.com/watch?v=TBB6xBg7isY -https://www.youtube.com/watch?v=majq3tuDPlg -https://www.youtube.com/watch?v=A62-iVYtkvg -https://www.youtube.com/watch?v=oH-hzXI7RzE -https://www.youtube.com/watch?v=OqAu24YGNKM -https://www.youtube.com/watch?v=YcgFu0urTjo -https://www.youtube.com/watch?v=L_qDQ2WALdc -https://www.youtube.com/watch?v=76nZ2RSxxik -https://www.youtube.com/watch?v=s4mnCMUrMV0 -https://www.youtube.com/watch?v=eAhZel9fdcE -https://www.youtube.com/watch?v=TXchNmKFu8I -https://www.youtube.com/watch?v=KqLLKx7jJxM -https://www.youtube.com/watch?v=wBkH4Sho9Uw -https://www.youtube.com/watch?v=3UeYut9Nm3E -https://www.youtube.com/watch?v=rNHZh5931hA -https://www.youtube.com/watch?v=fU6GFD3wNDs -https://www.youtube.com/watch?v=WAFiutRXPHU -https://www.youtube.com/watch?v=d9PoN3qbkUA -https://www.youtube.com/watch?v=jjbVZ6fPReI -https://www.youtube.com/watch?v=avHoMxrGh3c -https://www.youtube.com/watch?v=zxdhR5cBKYA -https://www.youtube.com/watch?v=XoK7nSXYmgQ -https://www.youtube.com/watch?v=ZhzwfaYrcvc -https://www.youtube.com/watch?v=-cHTdfy6CUI -https://www.youtube.com/watch?v=hJ6se5Ms3ko -https://www.youtube.com/watch?v=Zxfcj4uc0h4 -https://www.youtube.com/watch?v=yTG5zrbbxmg -https://www.youtube.com/watch?v=EH8BsC2MKNY -https://www.youtube.com/watch?v=fGXTLaO7aPo -https://www.youtube.com/watch?v=p2jo-VXkzr4 -https://www.youtube.com/watch?v=DN47veER2K0 -https://www.youtube.com/watch?v=h3dMZC3V_mA -https://www.youtube.com/watch?v=4KBB_CxKN6M -https://www.youtube.com/watch?v=nVoSg1NfPrE -https://www.youtube.com/watch?v=GHzS1ogWdMI -https://www.youtube.com/watch?v=r6Q8GLUGWY4 -https://www.youtube.com/watch?v=-t0U70j9DHY -https://www.youtube.com/watch?v=gHYAwsSXsNI -https://www.youtube.com/watch?v=XTGlxwURgJo -https://www.youtube.com/watch?v=Dj-Zrmh_a54 -https://www.youtube.com/watch?v=GNrt-iNaKvQ -https://www.youtube.com/watch?v=vrvYFPHxVMg -https://www.youtube.com/watch?v=bdlZlk0wvvo -https://www.youtube.com/watch?v=qd789Zfq5iU -https://www.youtube.com/watch?v=G4h-B9lI_vA -https://www.youtube.com/watch?v=HtJIvuVRR_s -https://www.youtube.com/watch?v=eiB4V7hSqa4 \ No newline at end of file diff --git a/airflow/inputfiles/urls.rt250_03.txt b/airflow/inputfiles/urls.rt250_03.txt deleted file mode 100644 index b1f6097..0000000 --- a/airflow/inputfiles/urls.rt250_03.txt +++ /dev/null @@ -1,237 +0,0 @@ -https://www.youtube.com/watch?v=B_Ay09BH2qU -https://www.youtube.com/watch?v=jA-64XSM2s4 -https://www.youtube.com/watch?v=IzPqkrsEPSc -https://www.youtube.com/watch?v=pHJvLpyb2tA -https://www.youtube.com/watch?v=S3t3wvksx9U -https://www.youtube.com/watch?v=YdmaCruUVDM -https://www.youtube.com/watch?v=zlnoM_Le0C4 -https://www.youtube.com/watch?v=ppoa5SJDmA0 -https://www.youtube.com/watch?v=fD9Jz4GuSY4 -https://www.youtube.com/watch?v=DGzLoTmx6JQ -https://www.youtube.com/watch?v=dovFWM5KjLU -https://www.youtube.com/watch?v=ZvlXe9HEQXQ -https://www.youtube.com/watch?v=-69wodyyiVw -https://www.youtube.com/watch?v=ymd-9Mlusbg -https://www.youtube.com/watch?v=5NM8qn6Hz20 -https://www.youtube.com/watch?v=aZLq0ODPkJs -https://www.youtube.com/watch?v=E8idIfNUTGA -https://www.youtube.com/watch?v=9tLuJxoySL0 -https://www.youtube.com/watch?v=TAOo-sTxYWw -https://www.youtube.com/watch?v=6vqRtTHv2l8 -https://www.youtube.com/watch?v=u2-XFiAeZ0M -https://www.youtube.com/watch?v=GSevUSqfbKM -https://www.youtube.com/watch?v=ZAx17rgrhM4 -https://www.youtube.com/watch?v=beW0wPoKU08 -https://www.youtube.com/watch?v=ffi0bNkc7iw -https://www.youtube.com/watch?v=YN_bIXecRzk -https://www.youtube.com/watch?v=f5XA4-NaHfk -https://www.youtube.com/watch?v=41L2f8-Gp1E -https://www.youtube.com/watch?v=cyN4abf_cUQ -https://www.youtube.com/watch?v=kZwS6uracK0 -https://www.youtube.com/watch?v=ssJ2YAl-W60 -https://www.youtube.com/watch?v=Zn_ngvMUp8s -https://www.youtube.com/watch?v=of80gd4-_rU -https://www.youtube.com/watch?v=scj5d81nEWY -https://www.youtube.com/watch?v=24SMoy1JqxU -https://www.youtube.com/watch?v=SsqrVhVWBtQ -https://www.youtube.com/watch?v=qRuuDA3Oy1k -https://www.youtube.com/watch?v=E-II-TTGm1s -https://www.youtube.com/watch?v=a3kKG0hEbE4 -https://www.youtube.com/watch?v=UUi3KUyAJVw -https://www.youtube.com/watch?v=0f4Tl-y1SHY -https://www.youtube.com/watch?v=rNWD8g2gYlU -https://www.youtube.com/watch?v=E5KbVk6kFo8 -https://www.youtube.com/watch?v=9EQPXEvgaT0 -https://www.youtube.com/watch?v=VijmHIURpAg -https://www.youtube.com/watch?v=XZVeeC2MFps -https://www.youtube.com/watch?v=MU1izPlV7mE -https://www.youtube.com/watch?v=YzvEiKysxfI -https://www.youtube.com/watch?v=S-zswgmxRWk -https://www.youtube.com/watch?v=irR7K8QC5Mw -https://www.youtube.com/watch?v=8fboEbvBP4U -https://www.youtube.com/watch?v=Ehi60JJR6K8 -https://www.youtube.com/watch?v=unQ37i1fI3E -https://www.youtube.com/watch?v=Sl1xZIVwQzE -https://www.youtube.com/watch?v=EsgjQP8kc-4 -https://www.youtube.com/watch?v=-CS0ojb2VjA -https://www.youtube.com/watch?v=9_1qSUWAtzM -https://www.youtube.com/watch?v=H5_guRjO7qc -https://www.youtube.com/watch?v=GEi5YyVLB5M -https://www.youtube.com/watch?v=kMdZXT_6Jmo -https://www.youtube.com/watch?v=Kw1KahIAPkI -https://www.youtube.com/watch?v=sCat14cTzYA -https://www.youtube.com/watch?v=oQeBFLY3WL4 -https://www.youtube.com/watch?v=G0wp8-Il2RY -https://www.youtube.com/watch?v=PWlydfB627s -https://www.youtube.com/watch?v=aDUtimJ1GL8 -https://www.youtube.com/watch?v=vdaLx-wJ118 -https://www.youtube.com/watch?v=SggLcqRWUcU -https://www.youtube.com/watch?v=X_jKmYUcbmE -https://www.youtube.com/watch?v=DBi96HRvEug -https://www.youtube.com/watch?v=W-RrbD170uM -https://www.youtube.com/watch?v=z3JNl4ABEMU -https://www.youtube.com/watch?v=0vQu4oSMdQI -https://www.youtube.com/watch?v=s1Z1731q5e0 -https://www.youtube.com/watch?v=Et1ErNdiqXI -https://www.youtube.com/watch?v=b8HdEnMG9Mw -https://www.youtube.com/watch?v=4Jn35uhyAdU -https://www.youtube.com/watch?v=7g8nStM_XbM -https://www.youtube.com/watch?v=fNYdM_UWg_I -https://www.youtube.com/watch?v=EA2TyYTpMlQ -https://www.youtube.com/watch?v=-gqWADvlZ44 -https://www.youtube.com/watch?v=pjsMnKiyMmI -https://www.youtube.com/watch?v=-B8qoqH0i1Y -https://www.youtube.com/watch?v=nE-vyXDpjYU -https://www.youtube.com/watch?v=0Ok2fGNfbDU -https://www.youtube.com/watch?v=IXee870AMAs -https://www.youtube.com/watch?v=ye9lx_rbQ8o -https://www.youtube.com/watch?v=Ku8aWQaLIBQ -https://www.youtube.com/watch?v=junMQDztHck -https://www.youtube.com/watch?v=xngdVZtDh7I -https://www.youtube.com/watch?v=DARiF_HgHts -https://www.youtube.com/watch?v=jw7-9lEo1kc -https://www.youtube.com/watch?v=ODPGJPoga1A -https://www.youtube.com/watch?v=ZBybfxJyQuE -https://www.youtube.com/watch?v=mrPeOtu_6cU -https://www.youtube.com/watch?v=E3fxsRem3rA -https://www.youtube.com/watch?v=5Oa8Uec_eBg -https://www.youtube.com/watch?v=pcJxFJcHkuo -https://www.youtube.com/watch?v=Zl4Gbaun1pA -https://www.youtube.com/watch?v=EhKgh4N1AXc -https://www.youtube.com/watch?v=urGuSLPunlU -https://www.youtube.com/watch?v=4o0i3UFDJBA -https://www.youtube.com/watch?v=JXX3NQKvpIg -https://www.youtube.com/watch?v=EEwrQrEtivk -https://www.youtube.com/watch?v=FSxmGJBvYbU -https://www.youtube.com/watch?v=_eCnHwhXaTI -https://www.youtube.com/watch?v=xGtKzO7r0GI -https://www.youtube.com/watch?v=U5S9E6KMNns -https://www.youtube.com/watch?v=zF0yuAUI1F0 -https://www.youtube.com/watch?v=6lEE8tAk8YE -https://www.youtube.com/watch?v=dXrLLUqzlCM -https://www.youtube.com/watch?v=hOLrUvPyF68 -https://www.youtube.com/watch?v=aFwCB5VOk_c -https://www.youtube.com/watch?v=nKCjiJ5MJ9s -https://www.youtube.com/watch?v=il_9MesqVEw -https://www.youtube.com/watch?v=DhHrSXSJ9sQ -https://www.youtube.com/watch?v=vRshEolL8eM -https://www.youtube.com/watch?v=_UOmXO1t0ms -https://www.youtube.com/watch?v=_Lq0LKMTsTc -https://www.youtube.com/watch?v=1rZgsDAohi8 -https://www.youtube.com/watch?v=rZl1NAjtlr8 -https://www.youtube.com/watch?v=KFWegI-YGBw -https://www.youtube.com/watch?v=Nhqny-t2BoA -https://www.youtube.com/watch?v=rHf1PBmve8U -https://www.youtube.com/watch?v=Qhm3rTNuu1c -https://www.youtube.com/watch?v=_mGDcyryvuQ -https://www.youtube.com/watch?v=qo0cNivWHwI -https://www.youtube.com/watch?v=KjoN6pDVw7c -https://www.youtube.com/watch?v=hYLAZNqx9Sc -https://www.youtube.com/watch?v=Y4GEzEh4BDY -https://www.youtube.com/watch?v=SFfDx-SSDzo -https://www.youtube.com/watch?v=vt6I-SUokgs -https://www.youtube.com/watch?v=4Eqz9U1oEpE -https://www.youtube.com/watch?v=iBdn0aG6SCY -https://www.youtube.com/watch?v=5YGOwYF5zlE -https://www.youtube.com/watch?v=iqdS0qfA1iw -https://www.youtube.com/watch?v=S6SvIe3Kxa0 -https://www.youtube.com/watch?v=0JV0SuPtWwU -https://www.youtube.com/watch?v=UB-YooM-NIY -https://www.youtube.com/watch?v=4f7uOAxYQKk -https://www.youtube.com/watch?v=ODYcEncY9Z8 -https://www.youtube.com/watch?v=z1gsZhSRs_A -https://www.youtube.com/watch?v=lQHEDa6vDhk -https://www.youtube.com/watch?v=Y33t3LEoTlM -https://www.youtube.com/watch?v=SOk9ROkKPrA -https://www.youtube.com/watch?v=lxPy60KW3VY -https://www.youtube.com/watch?v=reT95LPQCoM -https://www.youtube.com/watch?v=jmrqWtANVm0 -https://www.youtube.com/watch?v=3TfciDvpMOU -https://www.youtube.com/watch?v=HW677VglUgs -https://www.youtube.com/watch?v=DtYkKDkGrqo -https://www.youtube.com/watch?v=128YFZf8DGo -https://www.youtube.com/watch?v=KujWR5rPJ1o -https://www.youtube.com/watch?v=RZ6g7zRVaOA -https://www.youtube.com/watch?v=3L_yf8TO1P0 -https://www.youtube.com/watch?v=RJuY4t_58Y0 -https://www.youtube.com/watch?v=DLTlIVuawAE -https://www.youtube.com/watch?v=QfF9mpVq_14 -https://www.youtube.com/watch?v=OqGD8MNJKnI -https://www.youtube.com/watch?v=O4BqK1cylmQ -https://www.youtube.com/watch?v=vu2FttWQKMg -https://www.youtube.com/watch?v=Yh2nT6crCiE -https://www.youtube.com/watch?v=dKbRkBvtohg -https://www.youtube.com/watch?v=D0eDZjIwAmI -https://www.youtube.com/watch?v=AD37qE7t0ck -https://www.youtube.com/watch?v=l3UU8A8JEE8 -https://www.youtube.com/watch?v=GshhMRTjwZs -https://www.youtube.com/watch?v=-R-0EKCgXNY -https://www.youtube.com/watch?v=WhXJJQzmlTQ -https://www.youtube.com/watch?v=x0Fx3YGbvrs -https://www.youtube.com/watch?v=P5T1dXkG7-I -https://www.youtube.com/watch?v=7VO6E6Nj75c -https://www.youtube.com/watch?v=CivVo4AbbVo -https://www.youtube.com/watch?v=MKrMnu22z9c -https://www.youtube.com/watch?v=2YgNc05_Z7E -https://www.youtube.com/watch?v=6HJi1cg-gBE -https://www.youtube.com/watch?v=felrJtLc3UY -https://www.youtube.com/watch?v=U8HIIF-W3zE -https://www.youtube.com/watch?v=EYwNAObexJk -https://www.youtube.com/watch?v=iq4lpHbGQ60 -https://www.youtube.com/watch?v=ANpWkCGe6Zk -https://www.youtube.com/watch?v=a7W0t-Q5cFs -https://www.youtube.com/watch?v=9oDYxrEfVzM -https://www.youtube.com/watch?v=okbFZMnxoGQ -https://www.youtube.com/watch?v=Npb88SaLOPc -https://www.youtube.com/watch?v=S6u-py6UlX4 -https://www.youtube.com/watch?v=FXmNHhFOUuM -https://www.youtube.com/watch?v=q9p7i0Jb4rg -https://www.youtube.com/watch?v=6ZaG2I7mXcM -https://www.youtube.com/watch?v=ojDYVBeK_d4 -https://www.youtube.com/watch?v=HisWu1kZcTI -https://www.youtube.com/watch?v=WXm5T0AWE04 -https://www.youtube.com/watch?v=6mI2rvHbCQE -https://www.youtube.com/watch?v=hEYARh2flvc -https://www.youtube.com/watch?v=MVTmbHpeQwg -https://www.youtube.com/watch?v=DN7Pp-qdKY8 -https://www.youtube.com/watch?v=nGLB3uIhvdA -https://www.youtube.com/watch?v=VDq2whjVOQ8 -https://www.youtube.com/watch?v=2uFJkQJHX7s -https://www.youtube.com/watch?v=fRJrdKVfA4E -https://www.youtube.com/watch?v=JXtbeBL7iog -https://www.youtube.com/watch?v=1HUZpi6Kx5g -https://www.youtube.com/watch?v=j-UII0WaS-w -https://www.youtube.com/watch?v=HMjlEN2YgLg -https://www.youtube.com/watch?v=9TqFfzAzbNE -https://www.youtube.com/watch?v=GZSslRRYJg8 -https://www.youtube.com/watch?v=sR123A-THRs -https://www.youtube.com/watch?v=bt3X8MJgJWo -https://www.youtube.com/watch?v=NXEmtBe3R2c -https://www.youtube.com/watch?v=Aw7KY5ryvNo -https://www.youtube.com/watch?v=iIyfFJZuxSs -https://www.youtube.com/watch?v=JR2R1yTcUyk -https://www.youtube.com/watch?v=ID2HSLcAKmE -https://www.youtube.com/watch?v=sxuixFTH4Y8 -https://www.youtube.com/watch?v=4veS9jm-utw -https://www.youtube.com/watch?v=-bNr6-8xHhE -https://www.youtube.com/watch?v=mLscN6cV89k -https://www.youtube.com/watch?v=cZlMf1khndo -https://www.youtube.com/watch?v=FXWA9qKxhXM -https://www.youtube.com/watch?v=Gen3Ng42Md4 -https://www.youtube.com/watch?v=AqYhwYiXmvM -https://www.youtube.com/watch?v=rUnmd1j4mkM -https://www.youtube.com/watch?v=b3C6X_DUwbs -https://www.youtube.com/watch?v=I7xIcND2oBs -https://www.youtube.com/watch?v=grkcZZo9nXE -https://www.youtube.com/watch?v=rF3gnarthUE -https://www.youtube.com/watch?v=ft-hzWHU9ac -https://www.youtube.com/watch?v=w4tasjBq9pI -https://www.youtube.com/watch?v=JiCFWNxWwAs -https://www.youtube.com/watch?v=DpaZOzMXHnI -https://www.youtube.com/watch?v=2OwwoTVQ7Uc -https://www.youtube.com/watch?v=MdqKCXXXRBs -https://www.youtube.com/watch?v=bZgOyRXBqMM -https://www.youtube.com/watch?v=dmAteOuRBfQ -https://www.youtube.com/watch?v=h_e-al5r4gk -https://www.youtube.com/watch?v=pi3N_wQS2n0 -https://www.youtube.com/watch?v=4SD-hsZ2Cso \ No newline at end of file diff --git a/airflow/inputfiles/urls.rt250_04.txt b/airflow/inputfiles/urls.rt250_04.txt deleted file mode 100644 index b87e8c4..0000000 --- a/airflow/inputfiles/urls.rt250_04.txt +++ /dev/null @@ -1,235 +0,0 @@ -https://www.youtube.com/watch?v=mtd2vwD7t5I -https://www.youtube.com/watch?v=0zj6do0S0rc -https://www.youtube.com/watch?v=sO_hQgkul_w -https://www.youtube.com/watch?v=p3nf7CuskI4 -https://www.youtube.com/watch?v=Ck-RlFjl6UA -https://www.youtube.com/watch?v=GWg_lkXHg6A -https://www.youtube.com/watch?v=_I34dV-BhaA -https://www.youtube.com/watch?v=SWpRsRAAyXE -https://www.youtube.com/watch?v=uXZqwKp255E -https://www.youtube.com/watch?v=60Tt-BZ2QF4 -https://www.youtube.com/watch?v=acBMw3OL_8k -https://www.youtube.com/watch?v=aZpKNjN-mnA -https://www.youtube.com/watch?v=eQ9LWl_NpxE -https://www.youtube.com/watch?v=QbVtkJI56x4 -https://www.youtube.com/watch?v=0S4iduG2DoY -https://www.youtube.com/watch?v=5IAD-EFqgX4 -https://www.youtube.com/watch?v=hVAanRQ6kmI -https://www.youtube.com/watch?v=GKMr408YsLI -https://www.youtube.com/watch?v=trrwQ2_mr10 -https://www.youtube.com/watch?v=EN-mjnSO8rU -https://www.youtube.com/watch?v=ssujDv_dWCQ -https://www.youtube.com/watch?v=0gZCi6hZjqc -https://www.youtube.com/watch?v=ueATjovo5TM -https://www.youtube.com/watch?v=l5nzkdMPeTM -https://www.youtube.com/watch?v=ejxUiHoZqoo -https://www.youtube.com/watch?v=jK04XF1iK3g -https://www.youtube.com/watch?v=7M33poh57dg -https://www.youtube.com/watch?v=gwfCxfvMQuQ -https://www.youtube.com/watch?v=RxX_q9ra7XM -https://www.youtube.com/watch?v=5qFfbpA6hjg -https://www.youtube.com/watch?v=PbFKX8EZGAM -https://www.youtube.com/watch?v=Lc_SsKBQqMs -https://www.youtube.com/watch?v=unsrEUNXxGQ -https://www.youtube.com/watch?v=VKLI2iMY7v4 -https://www.youtube.com/watch?v=ymgo_bReR9A -https://www.youtube.com/watch?v=DOruZP0mpNc -https://www.youtube.com/watch?v=TndhOZv6ejE -https://www.youtube.com/watch?v=Ex_5kLFxpHk -https://www.youtube.com/watch?v=5Oga5OYv-sU -https://www.youtube.com/watch?v=fzEcs1s1jNk -https://www.youtube.com/watch?v=kKjY7Pj4K3o -https://www.youtube.com/watch?v=-OTZxe3IZ0Y -https://www.youtube.com/watch?v=rnUxkSviq0w -https://www.youtube.com/watch?v=u9ob0AOX3DE -https://www.youtube.com/watch?v=HWplP9yPSrk -https://www.youtube.com/watch?v=bi4qdsDEQk4 -https://www.youtube.com/watch?v=K-dvZrLMPFg -https://www.youtube.com/watch?v=05OF14NhSVQ -https://www.youtube.com/watch?v=56T4a3FoWxc -https://www.youtube.com/watch?v=dJr-KpDS86M -https://www.youtube.com/watch?v=d8lGr1e7rHc -https://www.youtube.com/watch?v=DoCiNSlYEuM -https://www.youtube.com/watch?v=pHhIGaECBIU -https://www.youtube.com/watch?v=jbNxu-lkamQ -https://www.youtube.com/watch?v=Mu3klSAn8_M -https://www.youtube.com/watch?v=GaqU6VbRfjA -https://www.youtube.com/watch?v=fq4Q90CjxFw -https://www.youtube.com/watch?v=oSXLaLC6zks -https://www.youtube.com/watch?v=Y8jW2jm6Rho -https://www.youtube.com/watch?v=uRAc3_UsXs8 -https://www.youtube.com/watch?v=W1n5dvzGph0 -https://www.youtube.com/watch?v=uWhiz9or3PA -https://www.youtube.com/watch?v=_A-3gkPVmc0 -https://www.youtube.com/watch?v=huVwqWg2vM4 -https://www.youtube.com/watch?v=_P6k6AdXts4 -https://www.youtube.com/watch?v=OcMnLtgMHEY -https://www.youtube.com/watch?v=r87feQvLfLI -https://www.youtube.com/watch?v=39A9XqLgtsk -https://www.youtube.com/watch?v=f5D79JWJq20 -https://www.youtube.com/watch?v=M6znxNv8TvI -https://www.youtube.com/watch?v=9rv04oK-PSo -https://www.youtube.com/watch?v=6KWq6lNjdQ8 -https://www.youtube.com/watch?v=sZWHJvC847c -https://www.youtube.com/watch?v=Jzoc9dyaOZ8 -https://www.youtube.com/watch?v=6253hY1D0oY -https://www.youtube.com/watch?v=TpRAHZun_wo -https://www.youtube.com/watch?v=EX8y8u2nTJQ -https://www.youtube.com/watch?v=ZEbobft1Awo -https://www.youtube.com/watch?v=SFe1RqTiAYc -https://www.youtube.com/watch?v=5KM9hc1V_qw -https://www.youtube.com/watch?v=G9B-P8msN74 -https://www.youtube.com/watch?v=-3zkv6CKMfU -https://www.youtube.com/watch?v=Cckr3dhQOLI -https://www.youtube.com/watch?v=Gy2NB2ncaMU -https://www.youtube.com/watch?v=gN44tf6nx78 -https://www.youtube.com/watch?v=ShdOISVUjHw -https://www.youtube.com/watch?v=Lm5IFC9ALfk -https://www.youtube.com/watch?v=_rZYSwKpLX0 -https://www.youtube.com/watch?v=L92zNi0MNRM -https://www.youtube.com/watch?v=n0UoWuP9OiA -https://www.youtube.com/watch?v=vg335u2KZy4 -https://www.youtube.com/watch?v=HlwRYcXoM-A -https://www.youtube.com/watch?v=DyaWG0Pff6w -https://www.youtube.com/watch?v=J4t265zWn04 -https://www.youtube.com/watch?v=Z66kRbSH_uU -https://www.youtube.com/watch?v=oKuaW6z16EA -https://www.youtube.com/watch?v=vLN3Vy3BRDk -https://www.youtube.com/watch?v=pnDDrLoNjvE -https://www.youtube.com/watch?v=sfKcuh-4KXk -https://www.youtube.com/watch?v=MUV2tbTe-gk -https://www.youtube.com/watch?v=SeHGsjnTjGY -https://www.youtube.com/watch?v=4p1_h_-HEPs -https://www.youtube.com/watch?v=57yQ1qJhJe0 -https://www.youtube.com/watch?v=17HNRtQyAGU -https://www.youtube.com/watch?v=C-bvICeWw_M -https://www.youtube.com/watch?v=kQ7hd-68au4 -https://www.youtube.com/watch?v=s6o2AtE-kUI -https://www.youtube.com/watch?v=YoMMu1gBahs -https://www.youtube.com/watch?v=VNfm4RT431g -https://www.youtube.com/watch?v=rrYlQlJwkus -https://www.youtube.com/watch?v=oonEB-IPBaU -https://www.youtube.com/watch?v=Dcac2B3qvWg -https://www.youtube.com/watch?v=HZqKKzn2UFs -https://www.youtube.com/watch?v=AM-FGYVsoBc -https://www.youtube.com/watch?v=5OX7CLr4PLE -https://www.youtube.com/watch?v=HAWvUl5dyDk -https://www.youtube.com/watch?v=1eqqdP0K4Vg -https://www.youtube.com/watch?v=aDPG1-baku0 -https://www.youtube.com/watch?v=smJio-3-LIM -https://www.youtube.com/watch?v=sBr1clm9xdI -https://www.youtube.com/watch?v=ULzPldjmoYM -https://www.youtube.com/watch?v=7tW7QBkGQrc -https://www.youtube.com/watch?v=FXmWRe2FQU8 -https://www.youtube.com/watch?v=ZyJ6Mx_j8go -https://www.youtube.com/watch?v=8HeODJvg2cc -https://www.youtube.com/watch?v=0nQQpVvH01k -https://www.youtube.com/watch?v=7_Qw2VsZ_fE -https://www.youtube.com/watch?v=4rLiF8ROqgQ -https://www.youtube.com/watch?v=lSdrRidKmRI -https://www.youtube.com/watch?v=9BoyIFnQvlo -https://www.youtube.com/watch?v=YXxUEgD9g5E -https://www.youtube.com/watch?v=HOGUAI-kJFc -https://www.youtube.com/watch?v=67oQXgK7Vz8 -https://www.youtube.com/watch?v=co8DCZR_0s4 -https://www.youtube.com/watch?v=hb6j3L0RDaE -https://www.youtube.com/watch?v=6ZtkrtIPo4Q -https://www.youtube.com/watch?v=0LZmbG61eDY -https://www.youtube.com/watch?v=JOgjRV8K5lc -https://www.youtube.com/watch?v=LxYu4xXKfyw -https://www.youtube.com/watch?v=N8MJhB4L8v4 -https://www.youtube.com/watch?v=n4bkV1s-PPY -https://www.youtube.com/watch?v=4Wa6XJEDYH4 -https://www.youtube.com/watch?v=n4udGJAqe6c -https://www.youtube.com/watch?v=zFNTtZXUvhc -https://www.youtube.com/watch?v=FtvVf23ZDjY -https://www.youtube.com/watch?v=Aqv3MX-dHT4 -https://www.youtube.com/watch?v=4uqg6MuwNc0 -https://www.youtube.com/watch?v=f18SkV6hFvo -https://www.youtube.com/watch?v=ZAD5eCOWIS8 -https://www.youtube.com/watch?v=0CUF13oxaVE -https://www.youtube.com/watch?v=yHiVYkb5eWo -https://www.youtube.com/watch?v=vyNo6dW7XrI -https://www.youtube.com/watch?v=EwMMdVKF22I -https://www.youtube.com/watch?v=6YpC-pGDRyQ -https://www.youtube.com/watch?v=y2NixaBN6xw -https://www.youtube.com/watch?v=TCyvJv6qv10 -https://www.youtube.com/watch?v=ubsYhOEBmWI -https://www.youtube.com/watch?v=ul5Xai16hjE -https://www.youtube.com/watch?v=QKC8cerBW-s -https://www.youtube.com/watch?v=_v9sKsaDJ0s -https://www.youtube.com/watch?v=fJM0M7I4bmM -https://www.youtube.com/watch?v=dmeajYLmHOQ -https://www.youtube.com/watch?v=9DtM-zA1_0Y -https://www.youtube.com/watch?v=20NI7mGf0kE -https://www.youtube.com/watch?v=UfLuPlzcq1o -https://www.youtube.com/watch?v=7xT1b0AzI1Y -https://www.youtube.com/watch?v=SaiyvwevIno -https://www.youtube.com/watch?v=L8RX2_DVKqY -https://www.youtube.com/watch?v=K2VRYB9PSIY -https://www.youtube.com/watch?v=z1iBdDvIXKg -https://www.youtube.com/watch?v=TjRiRF8nXEo -https://www.youtube.com/watch?v=UKS0YrXoRdA -https://www.youtube.com/watch?v=did4sI6zq_A -https://www.youtube.com/watch?v=wf5-HRTKg-E -https://www.youtube.com/watch?v=nB6y1x4pbi4 -https://www.youtube.com/watch?v=9JsoIuIxopQ -https://www.youtube.com/watch?v=DsuREQMVjNY -https://www.youtube.com/watch?v=z5GgUQak-us -https://www.youtube.com/watch?v=yiVAyD9ozCo -https://www.youtube.com/watch?v=roHXzgobmnw -https://www.youtube.com/watch?v=XaOHdTuxuPM -https://www.youtube.com/watch?v=PNlmaXsixvQ -https://www.youtube.com/watch?v=PCxRTU3rBTE -https://www.youtube.com/watch?v=e3BCGTYsDOs -https://www.youtube.com/watch?v=QRv90i58W_E -https://www.youtube.com/watch?v=33PovAKeH0E -https://www.youtube.com/watch?v=FlGvWltFCmA -https://www.youtube.com/watch?v=b-ffdH8Hg08 -https://www.youtube.com/watch?v=Si0KHefiKaM -https://www.youtube.com/watch?v=2dgFiwdcorY -https://www.youtube.com/watch?v=cFVF-6pX9R0 -https://www.youtube.com/watch?v=OUkHgcG2v2M -https://www.youtube.com/watch?v=gm9LqOfnZOI -https://www.youtube.com/watch?v=_UQO2LM1WPw -https://www.youtube.com/watch?v=LKwG21IEWUw -https://www.youtube.com/watch?v=S4hoDHN_wQY -https://www.youtube.com/watch?v=8MV8Yy5GImE -https://www.youtube.com/watch?v=oF2HTr_XgVM -https://www.youtube.com/watch?v=CH9JaEBW83Y -https://www.youtube.com/watch?v=pxjDuq3r4iI -https://www.youtube.com/watch?v=3dUJdTxxCiE -https://www.youtube.com/watch?v=HcfxxE__Xjc -https://www.youtube.com/watch?v=GqVZLE5yxtY -https://www.youtube.com/watch?v=FfrhxtShg5s -https://www.youtube.com/watch?v=j6_gXJ54OFA -https://www.youtube.com/watch?v=UZ-qB0EKdEM -https://www.youtube.com/watch?v=SOgvMA1G7_M -https://www.youtube.com/watch?v=S5abWlkirvU -https://www.youtube.com/watch?v=3MXfmTMJ_SI -https://www.youtube.com/watch?v=hT8KURdMMi4 -https://www.youtube.com/watch?v=UoIr_9J12RY -https://www.youtube.com/watch?v=dPCNW1dFigg -https://www.youtube.com/watch?v=IWc4RArypJs -https://www.youtube.com/watch?v=ZghfaSSd3dQ -https://www.youtube.com/watch?v=H_YFW94w_FQ -https://www.youtube.com/watch?v=PJv9mk5hltM -https://www.youtube.com/watch?v=7XV_gsf8yow -https://www.youtube.com/watch?v=8NukH9vc2JI -https://www.youtube.com/watch?v=tlNi-QfnOTc -https://www.youtube.com/watch?v=fc77CExfyvA -https://www.youtube.com/watch?v=YDZPIg6K1X4 -https://www.youtube.com/watch?v=BBrIcZ8VT4E -https://www.youtube.com/watch?v=yFGpm9oLs20 -https://www.youtube.com/watch?v=G81ehUvbzrU -https://www.youtube.com/watch?v=NJARNmRC1NY -https://www.youtube.com/watch?v=LZNmPPirbFQ -https://www.youtube.com/watch?v=ZqZN7qNyQso -https://www.youtube.com/watch?v=X6bFHcU6u9w -https://www.youtube.com/watch?v=Ej4LIXsaLag -https://www.youtube.com/watch?v=7T0ntzJQAO8 -https://www.youtube.com/watch?v=VwOk7Y7n_0k -https://www.youtube.com/watch?v=m5IERpTJLaw -https://www.youtube.com/watch?v=_irlrf9Qhl0 -https://www.youtube.com/watch?v=sNeSgwgyjPA -https://www.youtube.com/watch?v=tVoFvkB2weE \ No newline at end of file diff --git a/airflow/inputfiles/urls.rt250_05.txt b/airflow/inputfiles/urls.rt250_05.txt deleted file mode 100644 index 6c3219b..0000000 --- a/airflow/inputfiles/urls.rt250_05.txt +++ /dev/null @@ -1,236 +0,0 @@ -https://www.youtube.com/watch?v=p4T-uoIF_6Y -https://www.youtube.com/watch?v=qNZRvUK_ogw -https://www.youtube.com/watch?v=0nHJXFiFtnk -https://www.youtube.com/watch?v=wSwPs-6WdfY -https://www.youtube.com/watch?v=ONWWw-hSbsg -https://www.youtube.com/watch?v=lxM_a7xvYGc -https://www.youtube.com/watch?v=ENz1DI-xv08 -https://www.youtube.com/watch?v=VYOpiHVztCE -https://www.youtube.com/watch?v=Sj1mr1vZ1zg -https://www.youtube.com/watch?v=fNfY53BPSmg -https://www.youtube.com/watch?v=vuuQKCbAIjk -https://www.youtube.com/watch?v=z3M-xV_oem4 -https://www.youtube.com/watch?v=pfOYokoGVb0 -https://www.youtube.com/watch?v=HejkqCQe0D8 -https://www.youtube.com/watch?v=7jQUzT2M8X0 -https://www.youtube.com/watch?v=kAA_7GG1LQI -https://www.youtube.com/watch?v=yRXo1l6gRX4 -https://www.youtube.com/watch?v=PULxfbCW1bM -https://www.youtube.com/watch?v=Ow1zVOUxQ3E -https://www.youtube.com/watch?v=09_ieUyQD8s -https://www.youtube.com/watch?v=delIGj2LDts -https://www.youtube.com/watch?v=ir4fhBd9PW8 -https://www.youtube.com/watch?v=lQ9smpWZ_dQ -https://www.youtube.com/watch?v=9XkO3mye-1w -https://www.youtube.com/watch?v=KVzjmF3sFxs -https://www.youtube.com/watch?v=0VusddXJIsI -https://www.youtube.com/watch?v=6_ssgMPsgbI -https://www.youtube.com/watch?v=VbZw9HweTeo -https://www.youtube.com/watch?v=00SImiNjHyM -https://www.youtube.com/watch?v=dO6JKS6wbAA -https://www.youtube.com/watch?v=wpqMNn1IhDw -https://www.youtube.com/watch?v=sSv3ihsOxvM -https://www.youtube.com/watch?v=6mFcIil6_z0 -https://www.youtube.com/watch?v=Sgx-2mDqSzc -https://www.youtube.com/watch?v=A8AB7TpQxEs -https://www.youtube.com/watch?v=KS7eqksUg9o -https://www.youtube.com/watch?v=QF343yGE3CY -https://www.youtube.com/watch?v=T7XOVJRUa-0 -https://www.youtube.com/watch?v=-D0ZbmuNZR4 -https://www.youtube.com/watch?v=SZ5uMY2BnjI -https://www.youtube.com/watch?v=Wz2OGaWFY0E -https://www.youtube.com/watch?v=Wz6ofYXGc88 -https://www.youtube.com/watch?v=gbG5N1WxE3Q -https://www.youtube.com/watch?v=htET1lBbmBI -https://www.youtube.com/watch?v=rKTMv0Em8XY -https://www.youtube.com/watch?v=73tgi-8FxGI -https://www.youtube.com/watch?v=io8FePn3Z6A -https://www.youtube.com/watch?v=SptD8UmCrRM -https://www.youtube.com/watch?v=WvcDTzX0vr8 -https://www.youtube.com/watch?v=-_5rhG_EjTg -https://www.youtube.com/watch?v=1UAwlk6sRZo -https://www.youtube.com/watch?v=DCQmVzOifcg -https://www.youtube.com/watch?v=3cT4Sw_Dkhg -https://www.youtube.com/watch?v=pfdcN2FcKug -https://www.youtube.com/watch?v=JHD5Jt9J41U -https://www.youtube.com/watch?v=4WjBo5p6eTw -https://www.youtube.com/watch?v=Hl6AB6oViQs -https://www.youtube.com/watch?v=5XNQo4CC9dM -https://www.youtube.com/watch?v=Fnr1v1wYy60 -https://www.youtube.com/watch?v=2sKHlRpOMMo -https://www.youtube.com/watch?v=Ou6UEeUGIDA -https://www.youtube.com/watch?v=x6pX2rT-SqM -https://www.youtube.com/watch?v=dINr88UYHgc -https://www.youtube.com/watch?v=S3UOmaM-PpU -https://www.youtube.com/watch?v=8pXy8bh0Q0Q -https://www.youtube.com/watch?v=jhJFYfacwAc -https://www.youtube.com/watch?v=Hy67pOxlTgQ -https://www.youtube.com/watch?v=64-sAuJ8K7Y -https://www.youtube.com/watch?v=xaGWnjP7NVQ -https://www.youtube.com/watch?v=vYu3tkfaEcc -https://www.youtube.com/watch?v=6kO7vguhv2M -https://www.youtube.com/watch?v=rZjpxCSbu9w -https://www.youtube.com/watch?v=F4SZ4kNkX0c -https://www.youtube.com/watch?v=MeMR02kR3gE -https://www.youtube.com/watch?v=hm9vnD7gsCQ -https://www.youtube.com/watch?v=MaKkOahuC78 -https://www.youtube.com/watch?v=mc1FhpTls-A -https://www.youtube.com/watch?v=7XOxmV4ddNI -https://www.youtube.com/watch?v=b2o7a0D1ALs -https://www.youtube.com/watch?v=_oSutnfDUf8 -https://www.youtube.com/watch?v=taiQxc18xk0 -https://www.youtube.com/watch?v=kI6lgu9OL9I -https://www.youtube.com/watch?v=UNE-Himotz0 -https://www.youtube.com/watch?v=uD70OQniXTQ -https://www.youtube.com/watch?v=vs72Ng_jodU -https://www.youtube.com/watch?v=yanyMgdNfWU -https://www.youtube.com/watch?v=sGtTSEDc_w8 -https://www.youtube.com/watch?v=p-tCzjrh52Q -https://www.youtube.com/watch?v=PMQJ3dHi-JQ -https://www.youtube.com/watch?v=zIt66_3zYfM -https://www.youtube.com/watch?v=8mAMPRNl7ZQ -https://www.youtube.com/watch?v=ktFVOeYnwJI -https://www.youtube.com/watch?v=sKfIsBA_k60 -https://www.youtube.com/watch?v=y2axw12xLlc -https://www.youtube.com/watch?v=yDPEq8ObtXg -https://www.youtube.com/watch?v=tjJX5oB4EF0 -https://www.youtube.com/watch?v=WERFogub0MY -https://www.youtube.com/watch?v=JfN1S9Cil1I -https://www.youtube.com/watch?v=OUY9hCLQ6s8 -https://www.youtube.com/watch?v=kpg0Q0KEvyU -https://www.youtube.com/watch?v=AxgSz2gh6WE -https://www.youtube.com/watch?v=sKRDjWd4KCw -https://www.youtube.com/watch?v=mBl2wOUv5RA -https://www.youtube.com/watch?v=-i_xeg7jssk -https://www.youtube.com/watch?v=OG2kKOmey4Q -https://www.youtube.com/watch?v=mbHsmGm0F8c -https://www.youtube.com/watch?v=pn2D3Wd5Lc4 -https://www.youtube.com/watch?v=8MFvpgA6ylk -https://www.youtube.com/watch?v=BKC7rXf54-k -https://www.youtube.com/watch?v=Q7Q5ygmO3mU -https://www.youtube.com/watch?v=WwbqQ3TPeok -https://www.youtube.com/watch?v=_1GYGOaMCes -https://www.youtube.com/watch?v=jCJ-nEx2ko0 -https://www.youtube.com/watch?v=nxQ8UPOk4tc -https://www.youtube.com/watch?v=13m9TxErPRI -https://www.youtube.com/watch?v=IRcFyWNZeAA -https://www.youtube.com/watch?v=UL6kx-t_xM8 -https://www.youtube.com/watch?v=9gtn2fmvRlA -https://www.youtube.com/watch?v=xJjpIjfEPoc -https://www.youtube.com/watch?v=-sXNKkOsMNs -https://www.youtube.com/watch?v=883w-T9wHBs -https://www.youtube.com/watch?v=a1EQBbKRfoc -https://www.youtube.com/watch?v=b7_7qpLvKpQ -https://www.youtube.com/watch?v=r2abEcxai08 -https://www.youtube.com/watch?v=VEzH8V7Tt80 -https://www.youtube.com/watch?v=-Zt0PiwL0Wo -https://www.youtube.com/watch?v=5L_hjw0y9WU -https://www.youtube.com/watch?v=oLpS-692p_Y -https://www.youtube.com/watch?v=c0IiK5jpg_I -https://www.youtube.com/watch?v=JZcpGbm4FX4 -https://www.youtube.com/watch?v=RVvUTayv2L8 -https://www.youtube.com/watch?v=mKx9CJ2B-Us -https://www.youtube.com/watch?v=6AWS08oFmO4 -https://www.youtube.com/watch?v=6VNQ91IAE1U -https://www.youtube.com/watch?v=YdITVnpEOiA -https://www.youtube.com/watch?v=z67zFn_E94g -https://www.youtube.com/watch?v=NduD3p7MugA -https://www.youtube.com/watch?v=aMPTcld2R50 -https://www.youtube.com/watch?v=4rsm7Xjv_Hw -https://www.youtube.com/watch?v=cKKi-s6xvGQ -https://www.youtube.com/watch?v=fa8yHVDha9A -https://www.youtube.com/watch?v=2ocd6oVbfSk -https://www.youtube.com/watch?v=xd6BSNRjS9s -https://www.youtube.com/watch?v=kXPkpDzTKUI -https://www.youtube.com/watch?v=TeTxAhmjZDc -https://www.youtube.com/watch?v=pIsznwhTFzA -https://www.youtube.com/watch?v=TX2duib2HvQ -https://www.youtube.com/watch?v=2lSOhcBa1cE -https://www.youtube.com/watch?v=aBHO9vEqeg0 -https://www.youtube.com/watch?v=6kNnsU01z6s -https://www.youtube.com/watch?v=AgXkWyfudCY -https://www.youtube.com/watch?v=63LvptDiyn0 -https://www.youtube.com/watch?v=mpls4RO2Sew -https://www.youtube.com/watch?v=FQQOy3gK0aM -https://www.youtube.com/watch?v=iElhQSrK_gQ -https://www.youtube.com/watch?v=06rJ3VU5XuY -https://www.youtube.com/watch?v=f1uAhp8G2iY -https://www.youtube.com/watch?v=rEolrC_dWCc -https://www.youtube.com/watch?v=XQBl29RFtRw -https://www.youtube.com/watch?v=gGmKewwq_G4 -https://www.youtube.com/watch?v=Ktv_YSp1T48 -https://www.youtube.com/watch?v=1s8kzflWSuI -https://www.youtube.com/watch?v=Yt7Gtls0Rpc -https://www.youtube.com/watch?v=0xHg1q2dybA -https://www.youtube.com/watch?v=D9l-LOBMYTA -https://www.youtube.com/watch?v=hw7fxmn8lJk -https://www.youtube.com/watch?v=uvvAIuFptXw -https://www.youtube.com/watch?v=ds2JXnt5xXs -https://www.youtube.com/watch?v=xkWirP51bFU -https://www.youtube.com/watch?v=-jkmgcbf2AQ -https://www.youtube.com/watch?v=mH8Qrii8EOQ -https://www.youtube.com/watch?v=367pqLOIXcM -https://www.youtube.com/watch?v=3jJDsO137Wc -https://www.youtube.com/watch?v=RDGRu99HgWQ -https://www.youtube.com/watch?v=l4W1EeLdpAg -https://www.youtube.com/watch?v=NjZVHcoxgjY -https://www.youtube.com/watch?v=5UncrLkE6rA -https://www.youtube.com/watch?v=01rfz1uMDP0 -https://www.youtube.com/watch?v=RN6a6FNGbUI -https://www.youtube.com/watch?v=WZIYfMpDXCY -https://www.youtube.com/watch?v=mvrpON9NTk4 -https://www.youtube.com/watch?v=ywM_kpE6x08 -https://www.youtube.com/watch?v=ygPlWXqHsig -https://www.youtube.com/watch?v=mAcwJmv-lCc -https://www.youtube.com/watch?v=IPFFvQDYaf4 -https://www.youtube.com/watch?v=zurAzTl_h38 -https://www.youtube.com/watch?v=fiauZn74bak -https://www.youtube.com/watch?v=uuiFxk428WI -https://www.youtube.com/watch?v=HqayuXwuL3w -https://www.youtube.com/watch?v=E6beeQe7NQA -https://www.youtube.com/watch?v=tKH2QqmekxA -https://www.youtube.com/watch?v=z1T1ObFO_P0 -https://www.youtube.com/watch?v=pk_wGZjE4ZM -https://www.youtube.com/watch?v=I8AFTCfTCNU -https://www.youtube.com/watch?v=ef7YgE16fko -https://www.youtube.com/watch?v=XX9gQ5ogYM0 -https://www.youtube.com/watch?v=jfwJxQHBuYQ -https://www.youtube.com/watch?v=19Rv6AuPQ2g -https://www.youtube.com/watch?v=0tIXPDwJves -https://www.youtube.com/watch?v=3YPohsVqHU0 -https://www.youtube.com/watch?v=mfJXdvV92jI -https://www.youtube.com/watch?v=mSemBOoh2Yo -https://www.youtube.com/watch?v=0l20ILvAwt4 -https://www.youtube.com/watch?v=qEYFI_z0K3E -https://www.youtube.com/watch?v=280uzhCRR7s -https://www.youtube.com/watch?v=UNJ3S8ivbTQ -https://www.youtube.com/watch?v=FJm_yAT4yDc -https://www.youtube.com/watch?v=skpWa0r4bUI -https://www.youtube.com/watch?v=W8PJbBAcyV0 -https://www.youtube.com/watch?v=TmJS6aj3-dw -https://www.youtube.com/watch?v=k_w_TzpeZxI -https://www.youtube.com/watch?v=K_F3yqv7CJE -https://www.youtube.com/watch?v=QXmlNXcMySE -https://www.youtube.com/watch?v=A1vAAOVmbP0 -https://www.youtube.com/watch?v=PaBqH6VD55E -https://www.youtube.com/watch?v=ucAoopysUAI -https://www.youtube.com/watch?v=vjshLjye-qE -https://www.youtube.com/watch?v=XgQBNjTQ3Is -https://www.youtube.com/watch?v=pF7-2QwXHKc -https://www.youtube.com/watch?v=jctNS45TWpU -https://www.youtube.com/watch?v=PKorRRGUyHw -https://www.youtube.com/watch?v=2v8TzB8-LoI -https://www.youtube.com/watch?v=3VatPXxfjaM -https://www.youtube.com/watch?v=omeKGD5FdIA -https://www.youtube.com/watch?v=u4zV6mXmmKs -https://www.youtube.com/watch?v=YYee20a-QqI -https://www.youtube.com/watch?v=SZ9mdXqtwd8 -https://www.youtube.com/watch?v=zKYRTjUywaY -https://www.youtube.com/watch?v=-9e4GCOOmp8 -https://www.youtube.com/watch?v=6xFLANoml_Q -https://www.youtube.com/watch?v=feMq5y8UB3Q -https://www.youtube.com/watch?v=qZ6tlyBMUqE -https://www.youtube.com/watch?v=fnNqGTWMYBM -https://www.youtube.com/watch?v=Ry8Z5solATY -https://www.youtube.com/watch?v=fnbiyF0buBU -https://www.youtube.com/watch?v=9WXa1LNNe2o \ No newline at end of file diff --git a/airflow/inputfiles/urls.rt250_06.txt b/airflow/inputfiles/urls.rt250_06.txt deleted file mode 100644 index d5480ab..0000000 --- a/airflow/inputfiles/urls.rt250_06.txt +++ /dev/null @@ -1,239 +0,0 @@ -https://www.youtube.com/watch?v=z8UZZZxFeUc -https://www.youtube.com/watch?v=JXaJXwwOJ4Y -https://www.youtube.com/watch?v=uIWXP5jcBv4 -https://www.youtube.com/watch?v=bFyGBhVyiMo -https://www.youtube.com/watch?v=QTzobwv3Yw0 -https://www.youtube.com/watch?v=8Jbm7SL7s74 -https://www.youtube.com/watch?v=jdeZXWw3Nwo -https://www.youtube.com/watch?v=_KtVSsqSl4A -https://www.youtube.com/watch?v=j37CI3fhAwI -https://www.youtube.com/watch?v=IKoQFXPCVL4 -https://www.youtube.com/watch?v=b136ue2BxBo -https://www.youtube.com/watch?v=_p2DNkef5RM -https://www.youtube.com/watch?v=2vo7ix3_jYQ -https://www.youtube.com/watch?v=FuuMOebTHGQ -https://www.youtube.com/watch?v=ZBc6NpA__FY -https://www.youtube.com/watch?v=nP4Vzfp4U4g -https://www.youtube.com/watch?v=xYW0e8Vjpb4 -https://www.youtube.com/watch?v=YMEov5dJ3Ac -https://www.youtube.com/watch?v=2DYzzjwoqNM -https://www.youtube.com/watch?v=wO89RZgChCU -https://www.youtube.com/watch?v=hK4mdtUs9ZE -https://www.youtube.com/watch?v=QdlEzZKJFqc -https://www.youtube.com/watch?v=Fu0E9CLeHhU -https://www.youtube.com/watch?v=XJ_UDNWXYas -https://www.youtube.com/watch?v=2MYqr8atWKQ -https://www.youtube.com/watch?v=GjhNPl6S3cs -https://www.youtube.com/watch?v=GffJ3Yibndo -https://www.youtube.com/watch?v=SoIAPMCaDHI -https://www.youtube.com/watch?v=2IM9mIWhB0E -https://www.youtube.com/watch?v=qSS_73nlO1s -https://www.youtube.com/watch?v=S7RktiWIF5A -https://www.youtube.com/watch?v=eRGC_P3YV2s -https://www.youtube.com/watch?v=etnv3-g0aHk -https://www.youtube.com/watch?v=shW4hoJwg5c -https://www.youtube.com/watch?v=PokWotcGy8g -https://www.youtube.com/watch?v=xzvv4FnuSok -https://www.youtube.com/watch?v=bX5b98wVpoU -https://www.youtube.com/watch?v=51i8HuB2stg -https://www.youtube.com/watch?v=G2jFSnScYKs -https://www.youtube.com/watch?v=I6ImYrILAHU -https://www.youtube.com/watch?v=VbIR9XG6EFk -https://www.youtube.com/watch?v=1j__nRqiSmg -https://www.youtube.com/watch?v=TdGi6CgQuW8 -https://www.youtube.com/watch?v=1gQ99t8InuA -https://www.youtube.com/watch?v=Y-8nqbbFUWQ -https://www.youtube.com/watch?v=P9aHdSdql94 -https://www.youtube.com/watch?v=Omb0ipW_Ojo -https://www.youtube.com/watch?v=SKgjRyfg69Y -https://www.youtube.com/watch?v=h1iIlc-bdeM -https://www.youtube.com/watch?v=Bemw7-CrC-Q -https://www.youtube.com/watch?v=kc0VX29APG8 -https://www.youtube.com/watch?v=bN7Si1lY_Oo -https://www.youtube.com/watch?v=2NrgIhlGMss -https://www.youtube.com/watch?v=9nQ_Qs8gilE -https://www.youtube.com/watch?v=sFvJOjgA4bA -https://www.youtube.com/watch?v=w1biOJ2oKQw -https://www.youtube.com/watch?v=zF5cN9P5_aU -https://www.youtube.com/watch?v=_cqBSZPGwfw -https://www.youtube.com/watch?v=2T7hTrXKQIM -https://www.youtube.com/watch?v=icaEyu5gfbI -https://www.youtube.com/watch?v=6tIMyrKyupM -https://www.youtube.com/watch?v=lM4tb6fQ_nU -https://www.youtube.com/watch?v=_go14KzQA8A -https://www.youtube.com/watch?v=QgQFc4DL_yg -https://www.youtube.com/watch?v=BixPQs8sCuc -https://www.youtube.com/watch?v=s3jUI9QawCQ -https://www.youtube.com/watch?v=h31v70v8Usw -https://www.youtube.com/watch?v=QKbHkqK1gnk -https://www.youtube.com/watch?v=zGOQhVS8q_I -https://www.youtube.com/watch?v=W-DwBR0wHD8 -https://www.youtube.com/watch?v=en4Lom0HTVQ -https://www.youtube.com/watch?v=6-TM3WHpXk8 -https://www.youtube.com/watch?v=l-a1tra_LJY -https://www.youtube.com/watch?v=xEheck5jDss -https://www.youtube.com/watch?v=pxOjDtsSaPo -https://www.youtube.com/watch?v=ESDlq4Uza68 -https://www.youtube.com/watch?v=l3599LD9ot0 -https://www.youtube.com/watch?v=jo1Gcx33xg8 -https://www.youtube.com/watch?v=DcADJQWk9AE -https://www.youtube.com/watch?v=DV0mS4OiPv8 -https://www.youtube.com/watch?v=Lk_A8heCZUI -https://www.youtube.com/watch?v=PxMAWsZMmSQ -https://www.youtube.com/watch?v=6lZiUc4LLA8 -https://www.youtube.com/watch?v=Sa9-Is51Wn8 -https://www.youtube.com/watch?v=1RHu2Vfw3v0 -https://www.youtube.com/watch?v=aac2UFmgA-Y -https://www.youtube.com/watch?v=lrzdn6syTrM -https://www.youtube.com/watch?v=xUaZZ6Yr6KI -https://www.youtube.com/watch?v=Njm0h38ljqs -https://www.youtube.com/watch?v=npIwOKtj2yM -https://www.youtube.com/watch?v=sX45wetKJq4 -https://www.youtube.com/watch?v=E1wRBaIFY3c -https://www.youtube.com/watch?v=akmjJAfy0xM -https://www.youtube.com/watch?v=kB62dvTOFhA -https://www.youtube.com/watch?v=LX3VH1I9Qg4 -https://www.youtube.com/watch?v=wSod3-xDe90 -https://www.youtube.com/watch?v=xFHN_xIN-eU -https://www.youtube.com/watch?v=LTtc10Iom6o -https://www.youtube.com/watch?v=-bKFpzTM-MA -https://www.youtube.com/watch?v=BjTO2n_c5eQ -https://www.youtube.com/watch?v=G1dNIRpqZJg -https://www.youtube.com/watch?v=xr_Wgs0BHY8 -https://www.youtube.com/watch?v=zpvmWlkl74s -https://www.youtube.com/watch?v=-dNDoy2sA1c -https://www.youtube.com/watch?v=q55uhizppEk -https://www.youtube.com/watch?v=ms2xZYmhN7E -https://www.youtube.com/watch?v=I1mXIlzbTNQ -https://www.youtube.com/watch?v=OLgjdd4VYCU -https://www.youtube.com/watch?v=flBo_rqbRqc -https://www.youtube.com/watch?v=vgEke8PTzWo -https://www.youtube.com/watch?v=d9Npvs3YUEA -https://www.youtube.com/watch?v=nB8UexTjlts -https://www.youtube.com/watch?v=1YkCgyAttcY -https://www.youtube.com/watch?v=pNLXjgQbKVA -https://www.youtube.com/watch?v=UI9Ay3sP-Ic -https://www.youtube.com/watch?v=fAMtvFi6JBQ -https://www.youtube.com/watch?v=RSjVB0h5TD8 -https://www.youtube.com/watch?v=6PZAVZ4EZVE -https://www.youtube.com/watch?v=1enPX90IDjU -https://www.youtube.com/watch?v=PRHYrtgdz70 -https://www.youtube.com/watch?v=qkP5pjASL3o -https://www.youtube.com/watch?v=37zGf0w1Dug -https://www.youtube.com/watch?v=-2SoH9C0tbo -https://www.youtube.com/watch?v=P_1iWnR-tkA -https://www.youtube.com/watch?v=y4dQ6DTWURU -https://www.youtube.com/watch?v=4mPP8uWwxr8 -https://www.youtube.com/watch?v=7q8aLzdUXd8 -https://www.youtube.com/watch?v=CGoej1jmNbQ -https://www.youtube.com/watch?v=rHCqTxAEOOM -https://www.youtube.com/watch?v=LcGqzVfw85M -https://www.youtube.com/watch?v=iTxzkv8kEWI -https://www.youtube.com/watch?v=j1cRaYMlJmQ -https://www.youtube.com/watch?v=U_bg7CNrAZc -https://www.youtube.com/watch?v=rNUFnOO2rXs -https://www.youtube.com/watch?v=voi8bgF1Ijw -https://www.youtube.com/watch?v=3Xbyc8sbpCI -https://www.youtube.com/watch?v=W61OfSeoWBI -https://www.youtube.com/watch?v=RpcYEFBZMnY -https://www.youtube.com/watch?v=UdATWjLvZ10 -https://www.youtube.com/watch?v=1sPiqba_w-c -https://www.youtube.com/watch?v=NGLASxoC4SA -https://www.youtube.com/watch?v=LqYJRx81E6w -https://www.youtube.com/watch?v=kVQqM2w8sm4 -https://www.youtube.com/watch?v=rP4AjhcVIMQ -https://www.youtube.com/watch?v=Ptpl65Z_Q0M -https://www.youtube.com/watch?v=aVnv7Iy_QtE -https://www.youtube.com/watch?v=Kl7o4k6CfXw -https://www.youtube.com/watch?v=M6G1BAa8E9c -https://www.youtube.com/watch?v=fkNCVh4HIBA -https://www.youtube.com/watch?v=8gbRco5_2rg -https://www.youtube.com/watch?v=b2UIhsD2Cvg -https://www.youtube.com/watch?v=tqdQ6W-01FM -https://www.youtube.com/watch?v=Li4PMmYFjOg -https://www.youtube.com/watch?v=LM2_eKw2nw4 -https://www.youtube.com/watch?v=k8Z3_QVcxQA -https://www.youtube.com/watch?v=Sfo6fW-QJys -https://www.youtube.com/watch?v=c06bh8Gjtk8 -https://www.youtube.com/watch?v=mRx56SFQ8QA -https://www.youtube.com/watch?v=6QocZNp2Djo -https://www.youtube.com/watch?v=BbhrHJH4KRI -https://www.youtube.com/watch?v=lNCy7BO37hg -https://www.youtube.com/watch?v=ZF4t_BtIARA -https://www.youtube.com/watch?v=l3A4dHyW0Hk -https://www.youtube.com/watch?v=N5w-HfJdOIE -https://www.youtube.com/watch?v=vWttLvxA-08 -https://www.youtube.com/watch?v=TEGB8sgm5S4 -https://www.youtube.com/watch?v=dOB9cKSCXZo -https://www.youtube.com/watch?v=7fYHm-70kCs -https://www.youtube.com/watch?v=6do1PrNrhks -https://www.youtube.com/watch?v=lz2hNSk2Trc -https://www.youtube.com/watch?v=awCoKx8VNx0 -https://www.youtube.com/watch?v=_IC0X1jsWQg -https://www.youtube.com/watch?v=BfWfOZ8-4Vk -https://www.youtube.com/watch?v=5JxfZDHexes -https://www.youtube.com/watch?v=QKv2dlecwE4 -https://www.youtube.com/watch?v=bbFqfVHlOQs -https://www.youtube.com/watch?v=7FwB8-UtSTY -https://www.youtube.com/watch?v=Z6UZ2Est_Rk -https://www.youtube.com/watch?v=lsXrjvmhyGc -https://www.youtube.com/watch?v=yQmff118iG0 -https://www.youtube.com/watch?v=5zZCVYX46oA -https://www.youtube.com/watch?v=hakUmmty_Ls -https://www.youtube.com/watch?v=gO7qwdewGL8 -https://www.youtube.com/watch?v=N4bChET_zcE -https://www.youtube.com/watch?v=km06-0psWps -https://www.youtube.com/watch?v=vYG3_0iwY6Q -https://www.youtube.com/watch?v=NfW7ugCXq-E -https://www.youtube.com/watch?v=-N6YZYPjyG4 -https://www.youtube.com/watch?v=MTqLhhYDQsg -https://www.youtube.com/watch?v=iNnrMwskCt0 -https://www.youtube.com/watch?v=cy1hAIofwTA -https://www.youtube.com/watch?v=ssPwz0YCqsA -https://www.youtube.com/watch?v=nPG8_5O1urE -https://www.youtube.com/watch?v=M6sy9m93Kcc -https://www.youtube.com/watch?v=69m-cyniT0Y -https://www.youtube.com/watch?v=JPy5s_vVTFs -https://www.youtube.com/watch?v=KfrQlDAog3Q -https://www.youtube.com/watch?v=x-7MZ-iGoQk -https://www.youtube.com/watch?v=H7IlCQnvU0I -https://www.youtube.com/watch?v=Yy19s2yBsVU -https://www.youtube.com/watch?v=2G5XeSGDPyc -https://www.youtube.com/watch?v=B1oYCHvADZY -https://www.youtube.com/watch?v=fSH7eaRQuJU -https://www.youtube.com/watch?v=95rnBvsZbZU -https://www.youtube.com/watch?v=pKRvuwn9GfI -https://www.youtube.com/watch?v=il0j0nQnK20 -https://www.youtube.com/watch?v=x3aishDTjeI -https://www.youtube.com/watch?v=1oGsN1mVgsM -https://www.youtube.com/watch?v=mBVYIXRAkHM -https://www.youtube.com/watch?v=6Z51tXq54zc -https://www.youtube.com/watch?v=-_gyejedS14 -https://www.youtube.com/watch?v=FF5ttCY898w -https://www.youtube.com/watch?v=xCGaVzXoBvo -https://www.youtube.com/watch?v=qt7lOLl8GAc -https://www.youtube.com/watch?v=WrjgF6hjfxY -https://www.youtube.com/watch?v=7AyxprXTsIg -https://www.youtube.com/watch?v=SyR5yRwWtNo -https://www.youtube.com/watch?v=-WIOayHCDPo -https://www.youtube.com/watch?v=y0JtoZERkhQ -https://www.youtube.com/watch?v=lqdbxTKrTS8 -https://www.youtube.com/watch?v=KUuJH5WSN_c -https://www.youtube.com/watch?v=vGUHmGWFgiA -https://www.youtube.com/watch?v=zDuvYbU6giQ -https://www.youtube.com/watch?v=D-KvhXV9qdM -https://www.youtube.com/watch?v=-NAcOMzY2qk -https://www.youtube.com/watch?v=Dysjq3qtI1c -https://www.youtube.com/watch?v=U-zJ0YVOeac -https://www.youtube.com/watch?v=50F6NBek_uE -https://www.youtube.com/watch?v=yNpIqZkaGXs -https://www.youtube.com/watch?v=CrbeAD8S8hU -https://www.youtube.com/watch?v=b4dHqUw9s98 -https://www.youtube.com/watch?v=RxRwInWoNCA -https://www.youtube.com/watch?v=QO8aek7VgOw -https://www.youtube.com/watch?v=Yk1y0z0Lz-Q -https://www.youtube.com/watch?v=fndst7rrz90 -https://www.youtube.com/watch?v=_sROrMglc7s -https://www.youtube.com/watch?v=NT8WIkntDBQ -https://www.youtube.com/watch?v=8xO0nrxYhtU -https://www.youtube.com/watch?v=VZJSRmgH7Ww \ No newline at end of file diff --git a/airflow/inputfiles/urls.rt250_07.txt b/airflow/inputfiles/urls.rt250_07.txt deleted file mode 100644 index 6296b12..0000000 --- a/airflow/inputfiles/urls.rt250_07.txt +++ /dev/null @@ -1,241 +0,0 @@ -https://www.youtube.com/watch?v=nsejs-Vj6VA -https://www.youtube.com/watch?v=_cRazf-kuh0 -https://www.youtube.com/watch?v=p_JrAn10AsE -https://www.youtube.com/watch?v=8fDFFLoY08s -https://www.youtube.com/watch?v=TKfE2xHLcJU -https://www.youtube.com/watch?v=zY1UHAhvZNY -https://www.youtube.com/watch?v=eJZKJUvWPV0 -https://www.youtube.com/watch?v=0REjPqvjFwc -https://www.youtube.com/watch?v=oMVoj6hkt7Q -https://www.youtube.com/watch?v=gog3F-MRaBA -https://www.youtube.com/watch?v=qdDWjoy4g4o -https://www.youtube.com/watch?v=fzVtPI0WTgQ -https://www.youtube.com/watch?v=dceFJd55ZRs -https://www.youtube.com/watch?v=fAUydLncMlQ -https://www.youtube.com/watch?v=6hcoparO0AY -https://www.youtube.com/watch?v=R-OXwsnZkhg -https://www.youtube.com/watch?v=HMyZGhCNWEs -https://www.youtube.com/watch?v=sATiSokDMsE -https://www.youtube.com/watch?v=XrghP79kpjU -https://www.youtube.com/watch?v=Bgo5eBUIG70 -https://www.youtube.com/watch?v=iCA8zfIS57s -https://www.youtube.com/watch?v=H1TZCRXBus4 -https://www.youtube.com/watch?v=quMnDQBXzA8 -https://www.youtube.com/watch?v=dbkiHJyEWEU -https://www.youtube.com/watch?v=NBp4QUN77eg -https://www.youtube.com/watch?v=HZyOQzDS678 -https://www.youtube.com/watch?v=teOOvPRSuc8 -https://www.youtube.com/watch?v=rC9-woyHDKw -https://www.youtube.com/watch?v=CzDZv3upREs -https://www.youtube.com/watch?v=xkz6y4-I538 -https://www.youtube.com/watch?v=hP6-1XpmvW4 -https://www.youtube.com/watch?v=nHOMo4L4j_Y -https://www.youtube.com/watch?v=TH2UrUpWakY -https://www.youtube.com/watch?v=Qs1mQA4TRbg -https://www.youtube.com/watch?v=jn-N9NEZybs -https://www.youtube.com/watch?v=3BZsAQC6Prw -https://www.youtube.com/watch?v=i90tbQDY3eo -https://www.youtube.com/watch?v=vmyi7L6xDdU -https://www.youtube.com/watch?v=Yd7P73qLD3c -https://www.youtube.com/watch?v=zOUsPwjhph8 -https://www.youtube.com/watch?v=37yUKFtz_BI -https://www.youtube.com/watch?v=WU8iL0D2nY4 -https://www.youtube.com/watch?v=MTrIygjBiWk -https://www.youtube.com/watch?v=pNp85sYhcEs -https://www.youtube.com/watch?v=M72QlvVS39I -https://www.youtube.com/watch?v=xQy30Iikl9g -https://www.youtube.com/watch?v=TQq6CE-_zF4 -https://www.youtube.com/watch?v=bStpz3_WPuU -https://www.youtube.com/watch?v=lC51HTZjQeY -https://www.youtube.com/watch?v=_gljvpWvjdY -https://www.youtube.com/watch?v=KZUFuALvKSc -https://www.youtube.com/watch?v=-Kr4Ft2fCDc -https://www.youtube.com/watch?v=KcwYIQ8AAOs -https://www.youtube.com/watch?v=c5hjPFgf1DU -https://www.youtube.com/watch?v=w7aabeoAIns -https://www.youtube.com/watch?v=rb5NKtj2fn0 -https://www.youtube.com/watch?v=aJFnftIgpVg -https://www.youtube.com/watch?v=0poCn5r1wDc -https://www.youtube.com/watch?v=ygfFlVvBu4o -https://www.youtube.com/watch?v=MSI6bKsLH0s -https://www.youtube.com/watch?v=HbJd4DGdRXo -https://www.youtube.com/watch?v=0u2UXvR4yVI -https://www.youtube.com/watch?v=0TIqoSR1Mfk -https://www.youtube.com/watch?v=nlYkvu2-8_8 -https://www.youtube.com/watch?v=y7uOkbMqEjI -https://www.youtube.com/watch?v=V5UrsV1PU3Y -https://www.youtube.com/watch?v=kmw1YaOkfjE -https://www.youtube.com/watch?v=dOuyoV-9aHQ -https://www.youtube.com/watch?v=E0NbvW2TFLg -https://www.youtube.com/watch?v=q-jUCA5Rm28 -https://www.youtube.com/watch?v=YrueIOPs5L0 -https://www.youtube.com/watch?v=IHkP6Syj82s -https://www.youtube.com/watch?v=iIFl1qX7CoY -https://www.youtube.com/watch?v=0cKhemqjgQ8 -https://www.youtube.com/watch?v=dcHvwaI5bAg -https://www.youtube.com/watch?v=Pezn6Ru5fi0 -https://www.youtube.com/watch?v=Wf_BnBmT5_E -https://www.youtube.com/watch?v=34Uc06b-yQ4 -https://www.youtube.com/watch?v=lnbpEfRQwyc -https://www.youtube.com/watch?v=Ilh90vut7jo -https://www.youtube.com/watch?v=CHc1Bu8A9QM -https://www.youtube.com/watch?v=73Ie5z5aBw8 -https://www.youtube.com/watch?v=k8PJC1YTelY -https://www.youtube.com/watch?v=iGZ-i6c2OGU -https://www.youtube.com/watch?v=Tj96vlZBPUs -https://www.youtube.com/watch?v=KDE2s9B1eDo -https://www.youtube.com/watch?v=d2E2x-BSKO0 -https://www.youtube.com/watch?v=Vvnq-f43v20 -https://www.youtube.com/watch?v=OP0TPyBjq7c -https://www.youtube.com/watch?v=sjZ-dbd7FQ0 -https://www.youtube.com/watch?v=Z1Z6KPzy0p4 -https://www.youtube.com/watch?v=vg0Ko4LnYh4 -https://www.youtube.com/watch?v=f0djA_P2rQ8 -https://www.youtube.com/watch?v=cC3q0aHIYCU -https://www.youtube.com/watch?v=LHcJTxccNXE -https://www.youtube.com/watch?v=JA7VdwocB6I -https://www.youtube.com/watch?v=Nhmlm15C4eA -https://www.youtube.com/watch?v=rY_suqSxWV8 -https://www.youtube.com/watch?v=pI29BFeSmuI -https://www.youtube.com/watch?v=KOgfzQxYIsI -https://www.youtube.com/watch?v=0D5Tc8Op_n4 -https://www.youtube.com/watch?v=CxcEWMHNePE -https://www.youtube.com/watch?v=obijefFzq7A -https://www.youtube.com/watch?v=WU5ULpORl3A -https://www.youtube.com/watch?v=IHZwGNq5tb4 -https://www.youtube.com/watch?v=EKFqRJUEjEA -https://www.youtube.com/watch?v=I4eb7P9atas -https://www.youtube.com/watch?v=lLxGNWraT4s -https://www.youtube.com/watch?v=zgr98z06KBA -https://www.youtube.com/watch?v=125ox0n1NeQ -https://www.youtube.com/watch?v=fH22LI57SHY -https://www.youtube.com/watch?v=sGTrqDst1Dc -https://www.youtube.com/watch?v=OBg_Li5ErIU -https://www.youtube.com/watch?v=sQIt0HHZ8fw -https://www.youtube.com/watch?v=M6zNwBAYKSM -https://www.youtube.com/watch?v=Qspv6ZH29FA -https://www.youtube.com/watch?v=WuHPgV2Yxbs -https://www.youtube.com/watch?v=hu8khi3clIY -https://www.youtube.com/watch?v=ds5tz7yWBCk -https://www.youtube.com/watch?v=A7xqGo4oeec -https://www.youtube.com/watch?v=FFJDWDhFybQ -https://www.youtube.com/watch?v=YPHA89-RDqg -https://www.youtube.com/watch?v=bUvul182gWs -https://www.youtube.com/watch?v=3gyX7S8dwq0 -https://www.youtube.com/watch?v=4wmE8_0odtY -https://www.youtube.com/watch?v=FpTyGD4M-Cs -https://www.youtube.com/watch?v=NinU97Irqi8 -https://www.youtube.com/watch?v=m3YF4PTVOWI -https://www.youtube.com/watch?v=IgytzDONMS8 -https://www.youtube.com/watch?v=ccOx041__PE -https://www.youtube.com/watch?v=Go_H4eFwDXk -https://www.youtube.com/watch?v=nw2U2V7o_cg -https://www.youtube.com/watch?v=idtYG-SAps4 -https://www.youtube.com/watch?v=O8tSaYoR_40 -https://www.youtube.com/watch?v=wFeSbJySSfA -https://www.youtube.com/watch?v=BaToTsaDkl4 -https://www.youtube.com/watch?v=Qnytk6loeJQ -https://www.youtube.com/watch?v=vr_mAzDD_fA -https://www.youtube.com/watch?v=Pc_EupD2jSA -https://www.youtube.com/watch?v=hku9k2Xvl6E -https://www.youtube.com/watch?v=tovKGVUwfcA -https://www.youtube.com/watch?v=uVW458DyeOM -https://www.youtube.com/watch?v=0Yijhh3Krb4 -https://www.youtube.com/watch?v=jLywttFVo54 -https://www.youtube.com/watch?v=LNI382U5WH0 -https://www.youtube.com/watch?v=4iGXG127GyE -https://www.youtube.com/watch?v=jEec64BueAw -https://www.youtube.com/watch?v=JQT8tGBCX3U -https://www.youtube.com/watch?v=gFqUAiDSLKc -https://www.youtube.com/watch?v=yO-hte4ubig -https://www.youtube.com/watch?v=OaCaeatGiHg -https://www.youtube.com/watch?v=9lKTASGr6aI -https://www.youtube.com/watch?v=_868vk4OPPA -https://www.youtube.com/watch?v=Z97QM0Yx6uk -https://www.youtube.com/watch?v=jVLhMwjkbf0 -https://www.youtube.com/watch?v=4oascqjiSJw -https://www.youtube.com/watch?v=Unf3oPkNIzw -https://www.youtube.com/watch?v=-G_-x2tOFCA -https://www.youtube.com/watch?v=pbc5kBSATt0 -https://www.youtube.com/watch?v=1MEbuepe0-I -https://www.youtube.com/watch?v=jWqjpi7hcSQ -https://www.youtube.com/watch?v=WWz-VmWIizU -https://www.youtube.com/watch?v=osNDWp5Lxs4 -https://www.youtube.com/watch?v=GRr_a8MjmMc -https://www.youtube.com/watch?v=sBgB3iMc0fU -https://www.youtube.com/watch?v=h7SGCGAwSY0 -https://www.youtube.com/watch?v=AxUVWd86vLA -https://www.youtube.com/watch?v=Wc_G3GAFyqA -https://www.youtube.com/watch?v=Cxj3ZRJ7NLY -https://www.youtube.com/watch?v=D_jP3ib6XQE -https://www.youtube.com/watch?v=ouFJwOQrPuM -https://www.youtube.com/watch?v=_xpQyzECcCY -https://www.youtube.com/watch?v=Z7k81eWYy_Y -https://www.youtube.com/watch?v=zj4X_Fyxc2o -https://www.youtube.com/watch?v=P30NDHxamLw -https://www.youtube.com/watch?v=VEa0RiU5aeU -https://www.youtube.com/watch?v=4QaomQeGxoo -https://www.youtube.com/watch?v=FSc6ZUeihNw -https://www.youtube.com/watch?v=b4Rc4ReDb0E -https://www.youtube.com/watch?v=Lpe9IxDT7Mg -https://www.youtube.com/watch?v=GOi9s_z1F1I -https://www.youtube.com/watch?v=Uo64UC9rTcE -https://www.youtube.com/watch?v=ddHwQHOtKZc -https://www.youtube.com/watch?v=S-L86KKvXCQ -https://www.youtube.com/watch?v=JjX-TRYRu4c -https://www.youtube.com/watch?v=2rrPI0swTN4 -https://www.youtube.com/watch?v=6cqdOXZ-p1k -https://www.youtube.com/watch?v=WgA9Mp5g3Y4 -https://www.youtube.com/watch?v=U0HUrZYx6ac -https://www.youtube.com/watch?v=IBcTbZEV4Fo -https://www.youtube.com/watch?v=AG8OpUezp7c -https://www.youtube.com/watch?v=ItFoZ46tqxQ -https://www.youtube.com/watch?v=GaxaPI3Gu28 -https://www.youtube.com/watch?v=lKT2fxbijRY -https://www.youtube.com/watch?v=cVA0grbN7hQ -https://www.youtube.com/watch?v=fHe2-hRyTmY -https://www.youtube.com/watch?v=U8rSmAOKTcQ -https://www.youtube.com/watch?v=aNnFRhE4j9Y -https://www.youtube.com/watch?v=sVnFzhkmtHk -https://www.youtube.com/watch?v=S3iZ3Tc-Hnc -https://www.youtube.com/watch?v=2GtgoycXGvY -https://www.youtube.com/watch?v=oWK6Udr2Nh4 -https://www.youtube.com/watch?v=kF5t6C41LEk -https://www.youtube.com/watch?v=s9Zp4qz4Vu0 -https://www.youtube.com/watch?v=9EOzw3C1BrM -https://www.youtube.com/watch?v=go17ulKugwQ -https://www.youtube.com/watch?v=f0XhiUgMPfA -https://www.youtube.com/watch?v=F73sFEJferQ -https://www.youtube.com/watch?v=OHI6BTcdMBw -https://www.youtube.com/watch?v=AichQ8Wk4ac -https://www.youtube.com/watch?v=CFx6yBhkciU -https://www.youtube.com/watch?v=VjLtkPPksUY -https://www.youtube.com/watch?v=ft07E4HZJ7Q -https://www.youtube.com/watch?v=mcF-bO_Odq4 -https://www.youtube.com/watch?v=64y2g7ahof4 -https://www.youtube.com/watch?v=aDsKWg_YM7s -https://www.youtube.com/watch?v=0fRpmAYWgJE -https://www.youtube.com/watch?v=Ddp9uGyjtt4 -https://www.youtube.com/watch?v=dx-9vznjBSg -https://www.youtube.com/watch?v=mQ7b1Q_KJfU -https://www.youtube.com/watch?v=0tJR95pgM3o -https://www.youtube.com/watch?v=iY3MxLzEQKI -https://www.youtube.com/watch?v=sSXyzOrkdfs -https://www.youtube.com/watch?v=s4RJntKPU1I -https://www.youtube.com/watch?v=zgqUFmzI54E -https://www.youtube.com/watch?v=woiJ0Pbo1b0 -https://www.youtube.com/watch?v=KgT9XrH94Nk -https://www.youtube.com/watch?v=oNxu5lt2efw -https://www.youtube.com/watch?v=E2l9L0m1EUA -https://www.youtube.com/watch?v=ZhJE6t9PlZM -https://www.youtube.com/watch?v=qQD13r8kMOQ -https://www.youtube.com/watch?v=oKe6LVmuan4 -https://www.youtube.com/watch?v=XJtLW-hPD7w -https://www.youtube.com/watch?v=q3vqX_pVTZs -https://www.youtube.com/watch?v=GjmR9wnd_Dw -https://www.youtube.com/watch?v=bhKcw0w8fNo -https://www.youtube.com/watch?v=2-p0IXWzGrc -https://www.youtube.com/watch?v=IQRoXoiJKQ4 -https://www.youtube.com/watch?v=gJl8gVq6Gpo -https://www.youtube.com/watch?v=oXAszTqnnYk -https://www.youtube.com/watch?v=HfihwpdJjBU \ No newline at end of file diff --git a/airflow/inputfiles/urls.rt250_08.txt b/airflow/inputfiles/urls.rt250_08.txt deleted file mode 100644 index ed399bd..0000000 --- a/airflow/inputfiles/urls.rt250_08.txt +++ /dev/null @@ -1,230 +0,0 @@ -https://www.youtube.com/watch?v=r6dI7depuDc -https://www.youtube.com/watch?v=9Us8X93IpHI -https://www.youtube.com/watch?v=eP9ivGVw7kI -https://www.youtube.com/watch?v=A0deapksgiA -https://www.youtube.com/watch?v=ZLpoV5OTtZk -https://www.youtube.com/watch?v=k4jWWF4BetA -https://www.youtube.com/watch?v=35KOQ4uRIRs -https://www.youtube.com/watch?v=gJ7ROX5Y8hc -https://www.youtube.com/watch?v=tHbkzGWUZog -https://www.youtube.com/watch?v=WWd3g6tHso8 -https://www.youtube.com/watch?v=0663OUVIskw -https://www.youtube.com/watch?v=oy-JVwHPaUI -https://www.youtube.com/watch?v=y6G8S8lOUrA -https://www.youtube.com/watch?v=W78no98uQ-U -https://www.youtube.com/watch?v=BA2PP-MGGwE -https://www.youtube.com/watch?v=tTDpFJ1uuPg -https://www.youtube.com/watch?v=0BfC_7PcJB0 -https://www.youtube.com/watch?v=0h02m6TUzf8 -https://www.youtube.com/watch?v=siHWunTXQ0A -https://www.youtube.com/watch?v=BYC1R04F2Hk -https://www.youtube.com/watch?v=7w-ozZaEyP8 -https://www.youtube.com/watch?v=o7dKrpL7BpM -https://www.youtube.com/watch?v=IQyYk7FFCbw -https://www.youtube.com/watch?v=f_Gqgl6cFr0 -https://www.youtube.com/watch?v=G3QblS_YOms -https://www.youtube.com/watch?v=DoLGVksNpdM -https://www.youtube.com/watch?v=TT4yzCS4PeY -https://www.youtube.com/watch?v=RIGkw6nxLcc -https://www.youtube.com/watch?v=cWJgvi3UelE -https://www.youtube.com/watch?v=crtRlS9I3ro -https://www.youtube.com/watch?v=6dvBUPqj59c -https://www.youtube.com/watch?v=IfI6GnYGM0I -https://www.youtube.com/watch?v=QnLmgFuVg6w -https://www.youtube.com/watch?v=reouaReX7C4 -https://www.youtube.com/watch?v=s1pLvWNsh44 -https://www.youtube.com/watch?v=smmhlt3_lws -https://www.youtube.com/watch?v=QgTKDo-NbU8 -https://www.youtube.com/watch?v=rOvksymiKqs -https://www.youtube.com/watch?v=l1rVxcAzh1M -https://www.youtube.com/watch?v=_fFuQXF-t3k -https://www.youtube.com/watch?v=BH7R_yuIFKQ -https://www.youtube.com/watch?v=-s89hDTLSjs -https://www.youtube.com/watch?v=4IMfpToOWSo -https://www.youtube.com/watch?v=B0oMlBTEh1U -https://www.youtube.com/watch?v=ylNM15gPhsk -https://www.youtube.com/watch?v=M8ONaAxZ3MU -https://www.youtube.com/watch?v=V51CE9jO4O0 -https://www.youtube.com/watch?v=R1Lo5BZtlQw -https://www.youtube.com/watch?v=uqceJK_Iao4 -https://www.youtube.com/watch?v=XeSUzWSVk9o -https://www.youtube.com/watch?v=bhenb5eG--I -https://www.youtube.com/watch?v=dpTjA8MrLhI -https://www.youtube.com/watch?v=G2yTkeufjds -https://www.youtube.com/watch?v=o-R_DGU9hpM -https://www.youtube.com/watch?v=L2KUBIDfscI -https://www.youtube.com/watch?v=uLr35eZdFTA -https://www.youtube.com/watch?v=2zIvsCI88hI -https://www.youtube.com/watch?v=zOsscfwC0Mg -https://www.youtube.com/watch?v=c2kh0n2dD7Q -https://www.youtube.com/watch?v=U2nd48Sl3T4 -https://www.youtube.com/watch?v=WD99krTKdF4 -https://www.youtube.com/watch?v=gfcFr3jruGg -https://www.youtube.com/watch?v=fsnTn1XqBic -https://www.youtube.com/watch?v=SwyLoyzuKm0 -https://www.youtube.com/watch?v=pDBToCyX1JE -https://www.youtube.com/watch?v=JNnDcvOlWJc -https://www.youtube.com/watch?v=7FVXJ9nSJhw -https://www.youtube.com/watch?v=j_ndVX1nTWo -https://www.youtube.com/watch?v=twN8USkFwaI -https://www.youtube.com/watch?v=N4P7vW2JA80 -https://www.youtube.com/watch?v=YzOTPqp0DQM -https://www.youtube.com/watch?v=aMpYQZTwIi8 -https://www.youtube.com/watch?v=H_lzWZl-VEk -https://www.youtube.com/watch?v=OXVeAoTdjHA -https://www.youtube.com/watch?v=17Oin61Mqc4 -https://www.youtube.com/watch?v=Cd0ciobBDcM -https://www.youtube.com/watch?v=rgU_5zaM0GU -https://www.youtube.com/watch?v=ll9LATC7_Ec -https://www.youtube.com/watch?v=K89lQsMwKpk -https://www.youtube.com/watch?v=mWcuuWJxLBs -https://www.youtube.com/watch?v=sc9q7JKLAmk -https://www.youtube.com/watch?v=bsu_n3Kljrw -https://www.youtube.com/watch?v=AWT5Itr4iIM -https://www.youtube.com/watch?v=TUbEiHEhahQ -https://www.youtube.com/watch?v=85CB_SgFNE8 -https://www.youtube.com/watch?v=rZeEP1suY7Q -https://www.youtube.com/watch?v=aNzVTdaHlpY -https://www.youtube.com/watch?v=QhWVwhe088c -https://www.youtube.com/watch?v=Rrjgb8ug144 -https://www.youtube.com/watch?v=ZS1VzG8pMz0 -https://www.youtube.com/watch?v=dwqX1IOBO1s -https://www.youtube.com/watch?v=gzq_ipnKzHc -https://www.youtube.com/watch?v=nrbiBE-GOVE -https://www.youtube.com/watch?v=DiPGzyMGw_A -https://www.youtube.com/watch?v=E827rlTAs3o -https://www.youtube.com/watch?v=sZ7HVucYRFM -https://www.youtube.com/watch?v=DJObJpK_wwU -https://www.youtube.com/watch?v=gXj3a_Ldagc -https://www.youtube.com/watch?v=8K5-CDvnefA -https://www.youtube.com/watch?v=AmphO8KrvZU -https://www.youtube.com/watch?v=7cDa5ZVXyRo -https://www.youtube.com/watch?v=NrFI656OE04 -https://www.youtube.com/watch?v=-dfb8v4dyts -https://www.youtube.com/watch?v=XHSL4tSuT-0 -https://www.youtube.com/watch?v=LUWic9zqq1M -https://www.youtube.com/watch?v=9X04dHWyu3Y -https://www.youtube.com/watch?v=FZmutKzxPhM -https://www.youtube.com/watch?v=Qznqxnn6YM0 -https://www.youtube.com/watch?v=CJzeNfEkEdE -https://www.youtube.com/watch?v=Rc2SS3dIFkk -https://www.youtube.com/watch?v=fYNqL3tgPj0 -https://www.youtube.com/watch?v=VC42A4FZSWw -https://www.youtube.com/watch?v=tew6A1xC5G4 -https://www.youtube.com/watch?v=06x_heN_39g -https://www.youtube.com/watch?v=cPOXcOs2cGg -https://www.youtube.com/watch?v=PyRvVKbbO68 -https://www.youtube.com/watch?v=ggOvUskvz9k -https://www.youtube.com/watch?v=BozqO_Ogy5w -https://www.youtube.com/watch?v=wmdb0RzvG7c -https://www.youtube.com/watch?v=FaxJYMPJ-pg -https://www.youtube.com/watch?v=ZpMzEGafoNw -https://www.youtube.com/watch?v=MP5VOBvP1Fg -https://www.youtube.com/watch?v=9EHHxVYRjiY -https://www.youtube.com/watch?v=0ApIbLKs-JU -https://www.youtube.com/watch?v=TWGRgM4fk4M -https://www.youtube.com/watch?v=eevPwoV7RvA -https://www.youtube.com/watch?v=brisSYFS31M -https://www.youtube.com/watch?v=SazhBlQTUhU -https://www.youtube.com/watch?v=3fscCoTP1iU -https://www.youtube.com/watch?v=pVwrZXsiCgE -https://www.youtube.com/watch?v=ztYTv43GvOg -https://www.youtube.com/watch?v=gPnyId2TCyQ -https://www.youtube.com/watch?v=RugwiGqJD60 -https://www.youtube.com/watch?v=DAdf7ikSE2M -https://www.youtube.com/watch?v=zHlWqwuktK8 -https://www.youtube.com/watch?v=t1JHMHPLJ54 -https://www.youtube.com/watch?v=iZC1L2uf-2I -https://www.youtube.com/watch?v=gnyv8uapleQ -https://www.youtube.com/watch?v=HH-oQrhhbgY -https://www.youtube.com/watch?v=8rOfqKXa7F4 -https://www.youtube.com/watch?v=yOGlDAJZH2M -https://www.youtube.com/watch?v=JngRtwK7gy4 -https://www.youtube.com/watch?v=zZ-oreEX2qo -https://www.youtube.com/watch?v=j690eWDthBo -https://www.youtube.com/watch?v=HBqG4BtYxRo -https://www.youtube.com/watch?v=q2NzF5hj1yw -https://www.youtube.com/watch?v=_Pnm5SJA4E0 -https://www.youtube.com/watch?v=mWKP6lmKtew -https://www.youtube.com/watch?v=ktCKwqFdaUU -https://www.youtube.com/watch?v=V4x5yOm9BFk -https://www.youtube.com/watch?v=uEIDIppWvAY -https://www.youtube.com/watch?v=R5KOBpyJmVs -https://www.youtube.com/watch?v=JFzm9o9l-4Y -https://www.youtube.com/watch?v=uf4ybmScHUQ -https://www.youtube.com/watch?v=fjLNYgfoYqs -https://www.youtube.com/watch?v=IWVscbhcsho -https://www.youtube.com/watch?v=zDSLWV9KypM -https://www.youtube.com/watch?v=vZyB77TU1zU -https://www.youtube.com/watch?v=608BY6toahw -https://www.youtube.com/watch?v=WHGPRC2K7mY -https://www.youtube.com/watch?v=3pEd0wcg_C4 -https://www.youtube.com/watch?v=AtzcGUWPwII -https://www.youtube.com/watch?v=ePUebx75iRQ -https://www.youtube.com/watch?v=IWdM9uq3ikk -https://www.youtube.com/watch?v=TeYCOtEecYU -https://www.youtube.com/watch?v=ncOWfpeYXFY -https://www.youtube.com/watch?v=x1RUjs9k-1c -https://www.youtube.com/watch?v=hS5VMbTEd1c -https://www.youtube.com/watch?v=XkMM9BQBDCQ -https://www.youtube.com/watch?v=zh-rpPHJWG4 -https://www.youtube.com/watch?v=bnVm7K8e1WY -https://www.youtube.com/watch?v=o8nHQ3JXnaY -https://www.youtube.com/watch?v=OLC4IeYEEjI -https://www.youtube.com/watch?v=2sE2osCi4JU -https://www.youtube.com/watch?v=lHFz0A8uiec -https://www.youtube.com/watch?v=H4zDTRO9Z58 -https://www.youtube.com/watch?v=IiDRZtiSqiM -https://www.youtube.com/watch?v=Ao7U1fB_yP8 -https://www.youtube.com/watch?v=qe6vAkiGXac -https://www.youtube.com/watch?v=QWsUmDesiJc -https://www.youtube.com/watch?v=ZLMHYYFGT4g -https://www.youtube.com/watch?v=dXKBgnRpfj4 -https://www.youtube.com/watch?v=CFyw49Fk_4U -https://www.youtube.com/watch?v=qd0Ksf1gO14 -https://www.youtube.com/watch?v=taUnDYQ6UI8 -https://www.youtube.com/watch?v=L7HY6r3C9T0 -https://www.youtube.com/watch?v=sTl6GtA20_M -https://www.youtube.com/watch?v=diL-9EB0TyM -https://www.youtube.com/watch?v=ymIOTE1PjZg -https://www.youtube.com/watch?v=_ZbtbsVaOPU -https://www.youtube.com/watch?v=9sGDtfUiktk -https://www.youtube.com/watch?v=vlVzSCef55I -https://www.youtube.com/watch?v=Y7tG3GrQvJ4 -https://www.youtube.com/watch?v=9JyI1FWv5yM -https://www.youtube.com/watch?v=obKqJaSnQK0 -https://www.youtube.com/watch?v=XR426H2wVgU -https://www.youtube.com/watch?v=Vu2C1pbIFWA -https://www.youtube.com/watch?v=XA4Ile920PA -https://www.youtube.com/watch?v=6qHXBy9EJvQ -https://www.youtube.com/watch?v=0WdmMfk0e6Q -https://www.youtube.com/watch?v=andVPsILYqw -https://www.youtube.com/watch?v=4Ttoi0W-wAM -https://www.youtube.com/watch?v=TID0TW7QRm0 -https://www.youtube.com/watch?v=p4L5_JfbL7w -https://www.youtube.com/watch?v=DUj29F5ZhEE -https://www.youtube.com/watch?v=9T8dkxDC42c -https://www.youtube.com/watch?v=ECCQhQTZ1SE -https://www.youtube.com/watch?v=JfrYNOJMvRg -https://www.youtube.com/watch?v=FJcrRD8yI4E -https://www.youtube.com/watch?v=N85iiQTKsXw -https://www.youtube.com/watch?v=Yyc6vE-0OgI -https://www.youtube.com/watch?v=-z9Kye8DBM0 -https://www.youtube.com/watch?v=ulSEa7ZOhWA -https://www.youtube.com/watch?v=SE52l6y7slI -https://www.youtube.com/watch?v=oErU3HKAilo -https://www.youtube.com/watch?v=NEYsomAIa1g -https://www.youtube.com/watch?v=4H5fsY5_MiA -https://www.youtube.com/watch?v=Lo5z_qGuQSI -https://www.youtube.com/watch?v=V2jfRLJsMCE -https://www.youtube.com/watch?v=aZxQruhKMDY -https://www.youtube.com/watch?v=ZMRcCmN25sA -https://www.youtube.com/watch?v=O-XsiSCIK7c -https://www.youtube.com/watch?v=-wl21uhUEx4 -https://www.youtube.com/watch?v=WfbrXc-EW54 -https://www.youtube.com/watch?v=M0yCTg5daZI -https://www.youtube.com/watch?v=UhT2sev4HK8 -https://www.youtube.com/watch?v=4v1AraoTz3w -https://www.youtube.com/watch?v=zTTU1MEiIbY -https://www.youtube.com/watch?v=pHb2ZMXtAiY -https://www.youtube.com/watch?v=mDUf4T5lv8E \ No newline at end of file diff --git a/airflow/inputfiles/urls.rt250_11.txt b/airflow/inputfiles/urls.rt250_11.txt deleted file mode 100644 index ef26cdd..0000000 --- a/airflow/inputfiles/urls.rt250_11.txt +++ /dev/null @@ -1,237 +0,0 @@ -https://www.youtube.com/watch?v=-OYTrvsnRSQ -https://www.youtube.com/watch?v=HR-iLG6B8ds -https://www.youtube.com/watch?v=Gjtg8xAMbdU -https://www.youtube.com/watch?v=AKWX1wR4sy4 -https://www.youtube.com/watch?v=TTmAZRt8nK0 -https://www.youtube.com/watch?v=o242B-tnea8 -https://www.youtube.com/watch?v=wg7AyXZYbrs -https://www.youtube.com/watch?v=VSbWDAER9_A -https://www.youtube.com/watch?v=k20mrsFg9-Y -https://www.youtube.com/watch?v=vtkwDMx-tV8 -https://www.youtube.com/watch?v=4dvawM7D0hI -https://www.youtube.com/watch?v=eD0JzemrL20 -https://www.youtube.com/watch?v=etqVGZkJ2Bg -https://www.youtube.com/watch?v=LEactpOVdSM -https://www.youtube.com/watch?v=IAro_PZ6BQo -https://www.youtube.com/watch?v=RbW6o64ROvo -https://www.youtube.com/watch?v=RRoinZOWSS4 -https://www.youtube.com/watch?v=KQCfiswxjZI -https://www.youtube.com/watch?v=WqXw1Fpqpg8 -https://www.youtube.com/watch?v=WdueCBIyAfQ -https://www.youtube.com/watch?v=r1KdHltNgaY -https://www.youtube.com/watch?v=RryMChxFH58 -https://www.youtube.com/watch?v=A8wuUQt1Wik -https://www.youtube.com/watch?v=rCof-kGXOhk -https://www.youtube.com/watch?v=XX8ZxKFqoms -https://www.youtube.com/watch?v=T28iUbhDyZc -https://www.youtube.com/watch?v=F94negmd_Zg -https://www.youtube.com/watch?v=GjKANaIYvio -https://www.youtube.com/watch?v=XHI4mfWv0NU -https://www.youtube.com/watch?v=IKF_ZKhrxd4 -https://www.youtube.com/watch?v=al6SGQh0e0w -https://www.youtube.com/watch?v=SSCnx2HXSSw -https://www.youtube.com/watch?v=wSL896yNi_g -https://www.youtube.com/watch?v=XHugXebbTkw -https://www.youtube.com/watch?v=TtWI3cg-97I -https://www.youtube.com/watch?v=CIi5Khk0w-E -https://www.youtube.com/watch?v=5zitlGXm7i4 -https://www.youtube.com/watch?v=RTGPniQnj-4 -https://www.youtube.com/watch?v=PygTOgWy9NM -https://www.youtube.com/watch?v=08jdDCM1Jt4 -https://www.youtube.com/watch?v=s5mWxWlfmpE -https://www.youtube.com/watch?v=Oz5TobEFC_M -https://www.youtube.com/watch?v=jbdsSlo6EeE -https://www.youtube.com/watch?v=sUeJ4XDEQPA -https://www.youtube.com/watch?v=-QNIsfH1W6o -https://www.youtube.com/watch?v=khefdNkdrUA -https://www.youtube.com/watch?v=RerdE3DEPZk -https://www.youtube.com/watch?v=axSJE2GJCMY -https://www.youtube.com/watch?v=TalhxWW_AWo -https://www.youtube.com/watch?v=wlYmRMxqrKs -https://www.youtube.com/watch?v=tQtalMb-yi0 -https://www.youtube.com/watch?v=N2zw3_qroes -https://www.youtube.com/watch?v=f2ul_dsl4YM -https://www.youtube.com/watch?v=Dn-l3hv54dg -https://www.youtube.com/watch?v=Tmk-kKInF1Y -https://www.youtube.com/watch?v=dN6FD2RQXxQ -https://www.youtube.com/watch?v=BZKnqQYvwcE -https://www.youtube.com/watch?v=YB0P9HLgQuI -https://www.youtube.com/watch?v=LCP6qXwwOM4 -https://www.youtube.com/watch?v=c7xf3Ojj7KU -https://www.youtube.com/watch?v=gUg5WFE1UQg -https://www.youtube.com/watch?v=oG4j-rydlt0 -https://www.youtube.com/watch?v=NqKi-3E9W1Y -https://www.youtube.com/watch?v=mcFkcokKCNo -https://www.youtube.com/watch?v=U9pvsdkU3OE -https://www.youtube.com/watch?v=irL__2bmVBI -https://www.youtube.com/watch?v=aBwkS_wA9nM -https://www.youtube.com/watch?v=JBM5DEEyL-o -https://www.youtube.com/watch?v=H5rMXNG3gyc -https://www.youtube.com/watch?v=Y-03XzLAUDw -https://www.youtube.com/watch?v=xsHtpbobVkM -https://www.youtube.com/watch?v=hITnd7BSG-U -https://www.youtube.com/watch?v=LdKGVfUmpYk -https://www.youtube.com/watch?v=QtBGTQZYNyk -https://www.youtube.com/watch?v=BNF8-3G8vas -https://www.youtube.com/watch?v=B6aT-u__gJo -https://www.youtube.com/watch?v=oIGTEmHwjLg -https://www.youtube.com/watch?v=0VmIV7q34fM -https://www.youtube.com/watch?v=0dSCNkc1va4 -https://www.youtube.com/watch?v=98lsZ-wo1ek -https://www.youtube.com/watch?v=8U8nVbfNxlg -https://www.youtube.com/watch?v=9VVVNWk92d4 -https://www.youtube.com/watch?v=Ci04JrlD4us -https://www.youtube.com/watch?v=mx-AwWwdQKQ -https://www.youtube.com/watch?v=miNPNWSCQE0 -https://www.youtube.com/watch?v=FaJshsUefyQ -https://www.youtube.com/watch?v=TdvGEDxdsv4 -https://www.youtube.com/watch?v=1Hrb0g9LTsY -https://www.youtube.com/watch?v=1gERK4_AMgU -https://www.youtube.com/watch?v=6F36ChawCh8 -https://www.youtube.com/watch?v=7fCT8l5PA-0 -https://www.youtube.com/watch?v=35qUGqnd3nE -https://www.youtube.com/watch?v=SCQenAiVYBA -https://www.youtube.com/watch?v=MhGh_mCqrJU -https://www.youtube.com/watch?v=bOdjMBx60R0 -https://www.youtube.com/watch?v=N8gNk6u7nYQ -https://www.youtube.com/watch?v=Y0_-D9Ox9ng -https://www.youtube.com/watch?v=1GPTxwVbwvY -https://www.youtube.com/watch?v=Ewd0UuOEjPY -https://www.youtube.com/watch?v=Qw5oiYJFuSk -https://www.youtube.com/watch?v=55EaCTJ1pmE -https://www.youtube.com/watch?v=cuSZk8Pa8OI -https://www.youtube.com/watch?v=Xmu8YF0Bjyw -https://www.youtube.com/watch?v=ObyY3uEp7pA -https://www.youtube.com/watch?v=SB2092RkzhQ -https://www.youtube.com/watch?v=Xt-GvsySsOM -https://www.youtube.com/watch?v=FdyYRbOf0mQ -https://www.youtube.com/watch?v=9j4iaA9FcMg -https://www.youtube.com/watch?v=9Qi5sQpZGME -https://www.youtube.com/watch?v=d5qyho0ensI -https://www.youtube.com/watch?v=smlSGLoB8eI -https://www.youtube.com/watch?v=fJeDDk9Nzpk -https://www.youtube.com/watch?v=LfXLNUNqCOY -https://www.youtube.com/watch?v=ZEEY7AEA3z8 -https://www.youtube.com/watch?v=lEXGzNQtVGw -https://www.youtube.com/watch?v=MXv1fBNlMB0 -https://www.youtube.com/watch?v=7p5MNHXFTik -https://www.youtube.com/watch?v=d_4APkAy8Zk -https://www.youtube.com/watch?v=l6DpX3Bszh8 -https://www.youtube.com/watch?v=rc01sE8tfjo -https://www.youtube.com/watch?v=DY8BBLB_OuY -https://www.youtube.com/watch?v=NaJxE3R6fZU -https://www.youtube.com/watch?v=TeWVHkxx4C4 -https://www.youtube.com/watch?v=vbfYhTkX2FE -https://www.youtube.com/watch?v=TJmm555VGaU -https://www.youtube.com/watch?v=aunY-e-Jim8 -https://www.youtube.com/watch?v=HnLAm7OeJZc -https://www.youtube.com/watch?v=BTihVgB3j5U -https://www.youtube.com/watch?v=5Fh8bkdgIvM -https://www.youtube.com/watch?v=T9aj6Hc7tCI -https://www.youtube.com/watch?v=bJNjCcAhajI -https://www.youtube.com/watch?v=7dfLVKKRQ2U -https://www.youtube.com/watch?v=zXZSnUKaQt8 -https://www.youtube.com/watch?v=i6ZSMTAD1fQ -https://www.youtube.com/watch?v=koczyZ9jaGA -https://www.youtube.com/watch?v=uatoQB05WBY -https://www.youtube.com/watch?v=Xk1cUTg-1CY -https://www.youtube.com/watch?v=kGZS4pyqzOM -https://www.youtube.com/watch?v=jbyfwRF3qt8 -https://www.youtube.com/watch?v=i8VG_QT81cI -https://www.youtube.com/watch?v=0j8h1RiYMRA -https://www.youtube.com/watch?v=bhAej3dGUf8 -https://www.youtube.com/watch?v=KVMWvlI_Clg -https://www.youtube.com/watch?v=1zZwJRC7MRU -https://www.youtube.com/watch?v=e3nXSZAqVuM -https://www.youtube.com/watch?v=g_z_q-ylqBs -https://www.youtube.com/watch?v=TUklrg9ecVU -https://www.youtube.com/watch?v=uwwGeFOwabw -https://www.youtube.com/watch?v=zNyTvA0rMVs -https://www.youtube.com/watch?v=1xCZZGySdjY -https://www.youtube.com/watch?v=WoDEdNe_JDg -https://www.youtube.com/watch?v=-r01Nyjmltc -https://www.youtube.com/watch?v=ilN94X3Qmug -https://www.youtube.com/watch?v=3Zi2-g42lio -https://www.youtube.com/watch?v=MiLjhVl2FCY -https://www.youtube.com/watch?v=oZ9bdUqbKIU -https://www.youtube.com/watch?v=1mCjN-v4jvo -https://www.youtube.com/watch?v=oiRQ8weGi88 -https://www.youtube.com/watch?v=IVYU6O6uFJQ -https://www.youtube.com/watch?v=IGZGomDqxCE -https://www.youtube.com/watch?v=qlc7mEH_BHs -https://www.youtube.com/watch?v=GBcCT2bKgjY -https://www.youtube.com/watch?v=Gd3OZTuPeuc -https://www.youtube.com/watch?v=UQftIucKJyE -https://www.youtube.com/watch?v=viPomNeDwb4 -https://www.youtube.com/watch?v=eL3kOZqhl1Y -https://www.youtube.com/watch?v=0qo2q_lJpyc -https://www.youtube.com/watch?v=aNQS7XwI4uw -https://www.youtube.com/watch?v=lTa_0IrgL0k -https://www.youtube.com/watch?v=Nqvu51Z_uLc -https://www.youtube.com/watch?v=QEZ30dVgqyE -https://www.youtube.com/watch?v=G_4jcOwjjOI -https://www.youtube.com/watch?v=EXs553jIi-M -https://www.youtube.com/watch?v=e56_vnN12G4 -https://www.youtube.com/watch?v=uBfhO0pMiv0 -https://www.youtube.com/watch?v=ehG4FOaLqS0 -https://www.youtube.com/watch?v=tQ6qgJwN1m8 -https://www.youtube.com/watch?v=Qs8kAzVmaCA -https://www.youtube.com/watch?v=8ucB_qu1Inw -https://www.youtube.com/watch?v=OuiJhSc7r74 -https://www.youtube.com/watch?v=o4ozyDZugJc -https://www.youtube.com/watch?v=io4Ym8HqI2o -https://www.youtube.com/watch?v=bpstDSeSls4 -https://www.youtube.com/watch?v=vn9I2-twWKE -https://www.youtube.com/watch?v=-LZeoEcLXlk -https://www.youtube.com/watch?v=-I1qtCjwaBE -https://www.youtube.com/watch?v=AR3YQYvmMwE -https://www.youtube.com/watch?v=eQT4GZ_EqhQ -https://www.youtube.com/watch?v=i7voHoNZM9M -https://www.youtube.com/watch?v=Op1UJ5zciPk -https://www.youtube.com/watch?v=wMVX8tpg4nA -https://www.youtube.com/watch?v=izjTgLIYYs0 -https://www.youtube.com/watch?v=Pu1nRr-iZ6A -https://www.youtube.com/watch?v=PGex6tbXQPE -https://www.youtube.com/watch?v=H7HR796PVWg -https://www.youtube.com/watch?v=WZJccvWFoNo -https://www.youtube.com/watch?v=JLibrVW9_T4 -https://www.youtube.com/watch?v=7K1FnxElCug -https://www.youtube.com/watch?v=_iZ47vuEtaY -https://www.youtube.com/watch?v=5aafY8hGrUc -https://www.youtube.com/watch?v=3oxXxruuuuM -https://www.youtube.com/watch?v=eXRuojc_GLA -https://www.youtube.com/watch?v=d_qfc4o1RUI -https://www.youtube.com/watch?v=pvB1S3KVFhE -https://www.youtube.com/watch?v=_1X7caqbvkA -https://www.youtube.com/watch?v=XmjibdYAvLo -https://www.youtube.com/watch?v=aGsl9NGHh-E -https://www.youtube.com/watch?v=R_Z4sTgCK4k -https://www.youtube.com/watch?v=v7KHGsg6w4s -https://www.youtube.com/watch?v=aX6aGQufM3w -https://www.youtube.com/watch?v=yQZ94f7WMhc -https://www.youtube.com/watch?v=6i975zYY6eE -https://www.youtube.com/watch?v=RbbFCphAh0c -https://www.youtube.com/watch?v=wN_S8q03iOI -https://www.youtube.com/watch?v=GlhNWhIxzOE -https://www.youtube.com/watch?v=N7GlgQjNEOw -https://www.youtube.com/watch?v=Dyo7vQvXW_w -https://www.youtube.com/watch?v=1vHGsrpdRT8 -https://www.youtube.com/watch?v=hQhAIi1TT3s -https://www.youtube.com/watch?v=uJ5DmRIYGNo -https://www.youtube.com/watch?v=zT7ScFhOymY -https://www.youtube.com/watch?v=8SP-XcTntTI -https://www.youtube.com/watch?v=Wh8UZ0DNmmc -https://www.youtube.com/watch?v=IMo-p2hQVcc -https://www.youtube.com/watch?v=yHJ50ZRTN3E -https://www.youtube.com/watch?v=25JPZbxyfn4 -https://www.youtube.com/watch?v=BzqkagVVqN8 -https://www.youtube.com/watch?v=IBdX5dleUMg -https://www.youtube.com/watch?v=nAqV1-LO8n0 -https://www.youtube.com/watch?v=SFFe8JThkeg -https://www.youtube.com/watch?v=Kr5xy7Pqe9s -https://www.youtube.com/watch?v=r1kBEMb3wx8 -https://www.youtube.com/watch?v=Nc-iL85MTs8 -https://www.youtube.com/watch?v=ULC2AExXG4k -https://www.youtube.com/watch?v=kPoBOD3qbR8 -https://www.youtube.com/watch?v=_lZ91R7KZpQ -https://www.youtube.com/watch?v=rCpUpTozlbE \ No newline at end of file diff --git a/airflow/inputfiles/urls.rt250_12.txt b/airflow/inputfiles/urls.rt250_12.txt deleted file mode 100644 index f14b1d8..0000000 --- a/airflow/inputfiles/urls.rt250_12.txt +++ /dev/null @@ -1,233 +0,0 @@ -https://www.youtube.com/watch?v=6xG55KCOw9c -https://www.youtube.com/watch?v=THIhR42o5Yo -https://www.youtube.com/watch?v=PueGS2ovb1k -https://www.youtube.com/watch?v=3ULU8ygv2uI -https://www.youtube.com/watch?v=vwmzA7puaag -https://www.youtube.com/watch?v=REdLuU2aKlE -https://www.youtube.com/watch?v=MmrsXGdjCN4 -https://www.youtube.com/watch?v=xkwPcTLBNYM -https://www.youtube.com/watch?v=-IBaWMv16RU -https://www.youtube.com/watch?v=Sq9QEsAOTkM -https://www.youtube.com/watch?v=V2EGLGgqYkI -https://www.youtube.com/watch?v=QDafrUdxXpM -https://www.youtube.com/watch?v=LOP7GVSzlcY -https://www.youtube.com/watch?v=uZoyBTh10S8 -https://www.youtube.com/watch?v=tX8jPSql8YU -https://www.youtube.com/watch?v=hiOy1Gf-3qY -https://www.youtube.com/watch?v=7piyNpPpESg -https://www.youtube.com/watch?v=zbLPgrDB3yI -https://www.youtube.com/watch?v=bIZIXRHyqoo -https://www.youtube.com/watch?v=JUv89Ehkoj0 -https://www.youtube.com/watch?v=Hi2L2OXSPMQ -https://www.youtube.com/watch?v=CiIFLJPAzx4 -https://www.youtube.com/watch?v=EFvv5hl9Cbo -https://www.youtube.com/watch?v=6SybA6YL2Dk -https://www.youtube.com/watch?v=diesIlZaYwg -https://www.youtube.com/watch?v=f9fQ-3ZKMW4 -https://www.youtube.com/watch?v=g3gIt_8pNFw -https://www.youtube.com/watch?v=WOOlnWuo5cc -https://www.youtube.com/watch?v=6saf_WUItTs -https://www.youtube.com/watch?v=yDTQvqANLwA -https://www.youtube.com/watch?v=2rTMHKBYkak -https://www.youtube.com/watch?v=tT3W6GP6cCA -https://www.youtube.com/watch?v=tz9GT0P0ryA -https://www.youtube.com/watch?v=eojre2rUMqw -https://www.youtube.com/watch?v=0Zzl3HzhYz0 -https://www.youtube.com/watch?v=vTp1auScNMk -https://www.youtube.com/watch?v=oBWZjKR9ZeQ -https://www.youtube.com/watch?v=Kk1xwtIV8vs -https://www.youtube.com/watch?v=v80YwdDGoeA -https://www.youtube.com/watch?v=RTwCoLuVEkk -https://www.youtube.com/watch?v=O_eVwq5srVs -https://www.youtube.com/watch?v=4y6sP0QP9fw -https://www.youtube.com/watch?v=8175ZvSPjtI -https://www.youtube.com/watch?v=p5uEXSimNHw -https://www.youtube.com/watch?v=tx_ezJgB-1U -https://www.youtube.com/watch?v=d61Y1IQjREI -https://www.youtube.com/watch?v=rjA6vEh2VcM -https://www.youtube.com/watch?v=M922Dq0aAMo -https://www.youtube.com/watch?v=kr__8j-30GI -https://www.youtube.com/watch?v=O9QntgWyKMw -https://www.youtube.com/watch?v=X3T-yVZnSwE -https://www.youtube.com/watch?v=cu32CcPzNG4 -https://www.youtube.com/watch?v=EFksUEbPBSg -https://www.youtube.com/watch?v=b_ZTwMM5wU0 -https://www.youtube.com/watch?v=g5QLW6LeJ3Q -https://www.youtube.com/watch?v=IxhhDb0Ap1g -https://www.youtube.com/watch?v=Hk0rRNw6Hao -https://www.youtube.com/watch?v=Zsm80SgJRBQ -https://www.youtube.com/watch?v=DpWBCMbE7aE -https://www.youtube.com/watch?v=6LWXnVr-9t8 -https://www.youtube.com/watch?v=8qWVLCpRShE -https://www.youtube.com/watch?v=AbHEP5JHusQ -https://www.youtube.com/watch?v=rbRn3rUhX4E -https://www.youtube.com/watch?v=8b5_qEwmUu8 -https://www.youtube.com/watch?v=gsKssuz8tPA -https://www.youtube.com/watch?v=pwEtEXQ6PVw -https://www.youtube.com/watch?v=v7rFBX9QOpE -https://www.youtube.com/watch?v=oxxRgi8a120 -https://www.youtube.com/watch?v=LRtEUdjrQqg -https://www.youtube.com/watch?v=unkTCzy1qrA -https://www.youtube.com/watch?v=Cd7912woWsE -https://www.youtube.com/watch?v=w5LMl9o3Ofs -https://www.youtube.com/watch?v=_dqILnERIXQ -https://www.youtube.com/watch?v=s9RnwZ7IaaA -https://www.youtube.com/watch?v=ftksxfcG4V0 -https://www.youtube.com/watch?v=ZWr0tbXySlg -https://www.youtube.com/watch?v=e-BCNcMv2j0 -https://www.youtube.com/watch?v=mewfu8lafy8 -https://www.youtube.com/watch?v=VWG7L3c_5J8 -https://www.youtube.com/watch?v=JXZXNFLQMHE -https://www.youtube.com/watch?v=fb3zbGCVL6M -https://www.youtube.com/watch?v=uN4p8E-AZF0 -https://www.youtube.com/watch?v=SgFtJaJoX8Y -https://www.youtube.com/watch?v=WvAs0s7DVYg -https://www.youtube.com/watch?v=t799a5XUSAU -https://www.youtube.com/watch?v=ljyY1LjK_ZE -https://www.youtube.com/watch?v=ArD55SLC62E -https://www.youtube.com/watch?v=cvpGOFUsVnI -https://www.youtube.com/watch?v=SZXg328Mzsk -https://www.youtube.com/watch?v=-BoZ1gEAC6g -https://www.youtube.com/watch?v=VQzU4Sy-cQQ -https://www.youtube.com/watch?v=73lYz3r3XZY -https://www.youtube.com/watch?v=JFrABFhjAMk -https://www.youtube.com/watch?v=sN6qndqz0KQ -https://www.youtube.com/watch?v=s0_MmWg2m8A -https://www.youtube.com/watch?v=I0GtLj2q5kQ -https://www.youtube.com/watch?v=3tJ0S7ciHRg -https://www.youtube.com/watch?v=hc_7bu1togM -https://www.youtube.com/watch?v=gFbL3pzkNEM -https://www.youtube.com/watch?v=I8R94gDqiGE -https://www.youtube.com/watch?v=TdErMRIxRqY -https://www.youtube.com/watch?v=49JqAHear9E -https://www.youtube.com/watch?v=Y6IbKjhHFSg -https://www.youtube.com/watch?v=-DluaoC73Oo -https://www.youtube.com/watch?v=6GAc1HfTQX8 -https://www.youtube.com/watch?v=zdGkKCiKYuE -https://www.youtube.com/watch?v=1oUH8LWmM0I -https://www.youtube.com/watch?v=TNf_8J8LoM0 -https://www.youtube.com/watch?v=kXcjZ2BkE2Y -https://www.youtube.com/watch?v=dgK8hbT2D3Y -https://www.youtube.com/watch?v=H1sdanVzblg -https://www.youtube.com/watch?v=Y4HSgvsz-AI -https://www.youtube.com/watch?v=YvBfHVPpBmw -https://www.youtube.com/watch?v=sSxhG5qH38Q -https://www.youtube.com/watch?v=x4zz7xAa-fM -https://www.youtube.com/watch?v=nOPm3XAlxZg -https://www.youtube.com/watch?v=7Yor7ci8noM -https://www.youtube.com/watch?v=BnJ4BDxlgSM -https://www.youtube.com/watch?v=j4otcsHfzrE -https://www.youtube.com/watch?v=lIHDioyC4Q4 -https://www.youtube.com/watch?v=e40TGdOO1JU -https://www.youtube.com/watch?v=iKnbegCExns -https://www.youtube.com/watch?v=ao1TeLrOPPQ -https://www.youtube.com/watch?v=Lx_UnKWJyLE -https://www.youtube.com/watch?v=nDtgGczwIPY -https://www.youtube.com/watch?v=0bJ2mfxtmjg -https://www.youtube.com/watch?v=cbG1GetVqyw -https://www.youtube.com/watch?v=gIONn9bp8_w -https://www.youtube.com/watch?v=vePMmBM-IVU -https://www.youtube.com/watch?v=xP9cwmJ5dog -https://www.youtube.com/watch?v=uxYeke3bLMk -https://www.youtube.com/watch?v=eCze8sLvmcE -https://www.youtube.com/watch?v=EcBx2S-x3Hs -https://www.youtube.com/watch?v=zO3rfSDuBFg -https://www.youtube.com/watch?v=0RtavDSBnt4 -https://www.youtube.com/watch?v=8EojjO04ahA -https://www.youtube.com/watch?v=UbWPWyl8xWQ -https://www.youtube.com/watch?v=Ocwj3RfhTwI -https://www.youtube.com/watch?v=8EOo1DTKT98 -https://www.youtube.com/watch?v=7Rh7MjD6LLQ -https://www.youtube.com/watch?v=_C3FNmY1IxI -https://www.youtube.com/watch?v=yvQXwo4w3_0 -https://www.youtube.com/watch?v=PNVWUdB653k -https://www.youtube.com/watch?v=-DWQIrKbrEc -https://www.youtube.com/watch?v=1MgfOJ9ILCo -https://www.youtube.com/watch?v=og-mP2_bjcE -https://www.youtube.com/watch?v=Gar0-Yc_gpY -https://www.youtube.com/watch?v=OAyBcaFI56I -https://www.youtube.com/watch?v=5T1AD2jE6Kg -https://www.youtube.com/watch?v=sECZvLhjvPQ -https://www.youtube.com/watch?v=f1YdQbcFe3k -https://www.youtube.com/watch?v=B00d1ZrHlgc -https://www.youtube.com/watch?v=s05OP79dB28 -https://www.youtube.com/watch?v=6e-ID8Wxbjo -https://www.youtube.com/watch?v=64Uihh-m0LE -https://www.youtube.com/watch?v=hDc6FPsH7h4 -https://www.youtube.com/watch?v=HDiJLqx3Luk -https://www.youtube.com/watch?v=wcy3iNJXAJs -https://www.youtube.com/watch?v=rV39f1xY040 -https://www.youtube.com/watch?v=A8oTnrChwoc -https://www.youtube.com/watch?v=xmjPzr1PQG8 -https://www.youtube.com/watch?v=SlYjBVi4LeM -https://www.youtube.com/watch?v=m7hq3HU1x1c -https://www.youtube.com/watch?v=Ycdfq5SVgYs -https://www.youtube.com/watch?v=Lgy5IT6V1II -https://www.youtube.com/watch?v=ivPrbp3Ef0M -https://www.youtube.com/watch?v=xS8T1nXqibQ -https://www.youtube.com/watch?v=DWXBC9Ud32Y -https://www.youtube.com/watch?v=ZQdxYJkbD6s -https://www.youtube.com/watch?v=xkB7ra_ZcCM -https://www.youtube.com/watch?v=ugCKpZwt7-Q -https://www.youtube.com/watch?v=CHXI5J4V4AE -https://www.youtube.com/watch?v=Jt7_FFHd1Uw -https://www.youtube.com/watch?v=3aKlZpFAI3w -https://www.youtube.com/watch?v=i_Y7of6rGy8 -https://www.youtube.com/watch?v=YUp3RSlqBnk -https://www.youtube.com/watch?v=zRmm0-IMV-0 -https://www.youtube.com/watch?v=9IlBbSIQQes -https://www.youtube.com/watch?v=LhaDO3MwY08 -https://www.youtube.com/watch?v=GIMPGEcvats -https://www.youtube.com/watch?v=tJjwfD-adPE -https://www.youtube.com/watch?v=tCyYgZr6CHA -https://www.youtube.com/watch?v=O2IuSn7abLQ -https://www.youtube.com/watch?v=vWAIK55GrSQ -https://www.youtube.com/watch?v=X0Pwphld4l8 -https://www.youtube.com/watch?v=Men_VTPKzRk -https://www.youtube.com/watch?v=4N1QO35qeAw -https://www.youtube.com/watch?v=mDyz1zC5mV4 -https://www.youtube.com/watch?v=IwOpp57oQa8 -https://www.youtube.com/watch?v=rpBONirrpFY -https://www.youtube.com/watch?v=1nPRqTVWcXo -https://www.youtube.com/watch?v=qflzvf0P8Go -https://www.youtube.com/watch?v=TbLnKnd55KI -https://www.youtube.com/watch?v=jA3nz1RGhoQ -https://www.youtube.com/watch?v=mqx4w9RxyC8 -https://www.youtube.com/watch?v=mqoJr3qL98Q -https://www.youtube.com/watch?v=KTxnalxOBq4 -https://www.youtube.com/watch?v=ghOcpZ_n3ck -https://www.youtube.com/watch?v=dReCYPaWB28 -https://www.youtube.com/watch?v=XY79-cpbFME -https://www.youtube.com/watch?v=jwVwgGtdoAc -https://www.youtube.com/watch?v=wpGu13Xt_w0 -https://www.youtube.com/watch?v=5NRyHwCPfgY -https://www.youtube.com/watch?v=zTYxYSTDBjM -https://www.youtube.com/watch?v=QQj4uFa05N4 -https://www.youtube.com/watch?v=1GMgjlp4Yps -https://www.youtube.com/watch?v=kUo37zZhNxE -https://www.youtube.com/watch?v=Jz5CGd0dsaU -https://www.youtube.com/watch?v=iLQS_HPWO9c -https://www.youtube.com/watch?v=_lApBJu9gUY -https://www.youtube.com/watch?v=M1XwLmDpElY -https://www.youtube.com/watch?v=JT5HaX5yVPc -https://www.youtube.com/watch?v=PmQc2_9cux8 -https://www.youtube.com/watch?v=wQMkJ47gTto -https://www.youtube.com/watch?v=arYXHRmVUSQ -https://www.youtube.com/watch?v=hC_KKWCju34 -https://www.youtube.com/watch?v=dYi51VQyqWM -https://www.youtube.com/watch?v=tN_qiCQU8mE -https://www.youtube.com/watch?v=ilm532-pJ6k -https://www.youtube.com/watch?v=5uo1Ir6X_r8 -https://www.youtube.com/watch?v=-M5YASO4Qo8 -https://www.youtube.com/watch?v=Jg0yPEiD5uk -https://www.youtube.com/watch?v=GIgOpPFcNlU -https://www.youtube.com/watch?v=ttJtmEsjeik -https://www.youtube.com/watch?v=IuoT4lj5320 -https://www.youtube.com/watch?v=0_zqq0BXwfk -https://www.youtube.com/watch?v=sK6VuV8mgPk -https://www.youtube.com/watch?v=z0JQaxqXlic -https://www.youtube.com/watch?v=W06m5BF4bZA -https://www.youtube.com/watch?v=tGZJWiETVto -https://www.youtube.com/watch?v=09u2R0LZ1YU -https://www.youtube.com/watch?v=_OqomCppv30 -https://www.youtube.com/watch?v=wf8tUgUCPac \ No newline at end of file diff --git a/airflow/inputfiles/urls.rt250_13.txt b/airflow/inputfiles/urls.rt250_13.txt deleted file mode 100644 index c92a984..0000000 --- a/airflow/inputfiles/urls.rt250_13.txt +++ /dev/null @@ -1,236 +0,0 @@ -https://www.youtube.com/watch?v=lczFvGhUfts -https://www.youtube.com/watch?v=kFx3-0pNPWc -https://www.youtube.com/watch?v=JZfuvcVU91E -https://www.youtube.com/watch?v=xT5JEhTo7Rc -https://www.youtube.com/watch?v=mTinH8GAflM -https://www.youtube.com/watch?v=3srlxHfZLm4 -https://www.youtube.com/watch?v=clv601ldGA4 -https://www.youtube.com/watch?v=bHPY6Nn9QMs -https://www.youtube.com/watch?v=9y5aeZ33w_4 -https://www.youtube.com/watch?v=yhUxI5phmO8 -https://www.youtube.com/watch?v=VbE2CU9nA1k -https://www.youtube.com/watch?v=It-PLAJt_zQ -https://www.youtube.com/watch?v=9SekDfrN-X4 -https://www.youtube.com/watch?v=T0pnbDrW1CI -https://www.youtube.com/watch?v=XBYyXyvW6tE -https://www.youtube.com/watch?v=XFep6Dhx-Fs -https://www.youtube.com/watch?v=W8iTykaPSLM -https://www.youtube.com/watch?v=z3YrrXDQDyY -https://www.youtube.com/watch?v=9KU33u28EtI -https://www.youtube.com/watch?v=kvDMgzLZR2k -https://www.youtube.com/watch?v=_xWION4vOgM -https://www.youtube.com/watch?v=70zAbdwD9yY -https://www.youtube.com/watch?v=59ZG0Hjf5Us -https://www.youtube.com/watch?v=ZnI-KMjlrgA -https://www.youtube.com/watch?v=0-HtNtzR3HY -https://www.youtube.com/watch?v=G5UwFimUH_k -https://www.youtube.com/watch?v=UQ9YjPBUgII -https://www.youtube.com/watch?v=TtO1czEGERI -https://www.youtube.com/watch?v=s_1O1UJtMtQ -https://www.youtube.com/watch?v=Cq9XGcbsNRA -https://www.youtube.com/watch?v=dWsO8_oHeAo -https://www.youtube.com/watch?v=xF8HGuSfoYE -https://www.youtube.com/watch?v=nIuZdHv6chU -https://www.youtube.com/watch?v=pfbdEHVtBz4 -https://www.youtube.com/watch?v=OJWP4kft7NI -https://www.youtube.com/watch?v=1ZRrg52B81w -https://www.youtube.com/watch?v=cPtg_qRa59w -https://www.youtube.com/watch?v=W4WZjoUiKr8 -https://www.youtube.com/watch?v=70niBZ6GLSs -https://www.youtube.com/watch?v=auTIwWeByuw -https://www.youtube.com/watch?v=VaXHkSUlEdI -https://www.youtube.com/watch?v=PbelNpfEJQc -https://www.youtube.com/watch?v=R2VIfDPxX3M -https://www.youtube.com/watch?v=aTv5h7kqPp0 -https://www.youtube.com/watch?v=A7Vh8ZemNkc -https://www.youtube.com/watch?v=zn7dx98XomI -https://www.youtube.com/watch?v=DyLVTThUXBc -https://www.youtube.com/watch?v=zdNxMHSVkFg -https://www.youtube.com/watch?v=MXp2LFunRFc -https://www.youtube.com/watch?v=gE1RoN6ChBs -https://www.youtube.com/watch?v=cFcnz9rCD-o -https://www.youtube.com/watch?v=JueaSBXghD0 -https://www.youtube.com/watch?v=03de7Gcpz_4 -https://www.youtube.com/watch?v=Ij33PqNmbro -https://www.youtube.com/watch?v=57iQZerkh0o -https://www.youtube.com/watch?v=N1b7xZdVtRw -https://www.youtube.com/watch?v=M4gLxvcaBYU -https://www.youtube.com/watch?v=yGuH6jx7xkI -https://www.youtube.com/watch?v=AeopuNMkQt8 -https://www.youtube.com/watch?v=QKzE7lsCPw8 -https://www.youtube.com/watch?v=uZn0PIJ_k9E -https://www.youtube.com/watch?v=bxJ4-3W6LwE -https://www.youtube.com/watch?v=2MJPj6oWIKo -https://www.youtube.com/watch?v=yujj2p2dGWk -https://www.youtube.com/watch?v=-bXdzATNQKw -https://www.youtube.com/watch?v=xN3nXJX9vJs -https://www.youtube.com/watch?v=zwgm4a5OugI -https://www.youtube.com/watch?v=NBNN0lqZ8tU -https://www.youtube.com/watch?v=5e460FHKV64 -https://www.youtube.com/watch?v=hsbCYJIvq3o -https://www.youtube.com/watch?v=_qsfagfYAVs -https://www.youtube.com/watch?v=ZNl4FVK9-yQ -https://www.youtube.com/watch?v=2-TYgeGNTQk -https://www.youtube.com/watch?v=Zin6ZIfdTAY -https://www.youtube.com/watch?v=sn7XKfA4W-0 -https://www.youtube.com/watch?v=gmRYrsutXEo -https://www.youtube.com/watch?v=1jhOFw6GrLg -https://www.youtube.com/watch?v=PxzOVCQU-4U -https://www.youtube.com/watch?v=BlHNshJh9zM -https://www.youtube.com/watch?v=LVmH60yo0JI -https://www.youtube.com/watch?v=o-Tzlp_nG70 -https://www.youtube.com/watch?v=njuIKmM2xDM -https://www.youtube.com/watch?v=4zwQ88zMyio -https://www.youtube.com/watch?v=E3Wr6GWkRzA -https://www.youtube.com/watch?v=aypLuo7UXWE -https://www.youtube.com/watch?v=TANbVyW6x1k -https://www.youtube.com/watch?v=oNcdsp12pag -https://www.youtube.com/watch?v=qv7QFzRqf34 -https://www.youtube.com/watch?v=cYK9aTSRtbE -https://www.youtube.com/watch?v=7kmaoLaQlF8 -https://www.youtube.com/watch?v=st1zEAlNPic -https://www.youtube.com/watch?v=2XQnxn39yj4 -https://www.youtube.com/watch?v=gUbA8Zn8YPQ -https://www.youtube.com/watch?v=Uny3t9woExo -https://www.youtube.com/watch?v=lCaIfb8B5Mo -https://www.youtube.com/watch?v=jEkQ0RZObiI -https://www.youtube.com/watch?v=Beo6jYwuZpc -https://www.youtube.com/watch?v=KILWCT_Zzoc -https://www.youtube.com/watch?v=fb2VzDSf7S0 -https://www.youtube.com/watch?v=4a_oIbAhCgo -https://www.youtube.com/watch?v=eCLS2lWR1us -https://www.youtube.com/watch?v=MS01ipsrJCA -https://www.youtube.com/watch?v=3zqIIJhiyoo -https://www.youtube.com/watch?v=R3KOub_thmc -https://www.youtube.com/watch?v=ZQadO6LiP9s -https://www.youtube.com/watch?v=pzhhzPhwYh8 -https://www.youtube.com/watch?v=kLkNcyM0gNA -https://www.youtube.com/watch?v=yHpVRxZJyMg -https://www.youtube.com/watch?v=DZlfy7Eeeu0 -https://www.youtube.com/watch?v=b9Ao1j5bDXU -https://www.youtube.com/watch?v=vfPEVcXrO2c -https://www.youtube.com/watch?v=TK3gpctWmy8 -https://www.youtube.com/watch?v=Gn9_FWSU32Y -https://www.youtube.com/watch?v=zrSrLrBZ_eA -https://www.youtube.com/watch?v=5CS4NAkSjeM -https://www.youtube.com/watch?v=bbNkX9AWRbM -https://www.youtube.com/watch?v=03Yi0nl6N8Y -https://www.youtube.com/watch?v=moANh2cUXzw -https://www.youtube.com/watch?v=X4MNSJQ3jxk -https://www.youtube.com/watch?v=Eh7IdlNdcjk -https://www.youtube.com/watch?v=HNR4pmJ_YPs -https://www.youtube.com/watch?v=QIzmK6U41yc -https://www.youtube.com/watch?v=i7Tsz0bTzv4 -https://www.youtube.com/watch?v=U2Qz2Uz38Ds -https://www.youtube.com/watch?v=I6G_ojX3FO0 -https://www.youtube.com/watch?v=gQOsKTlMr9w -https://www.youtube.com/watch?v=LBC0msJZofU -https://www.youtube.com/watch?v=gHRicolSA8Y -https://www.youtube.com/watch?v=v8roAb048Us -https://www.youtube.com/watch?v=P_RIqP_sjZ8 -https://www.youtube.com/watch?v=e9E-fnJZObk -https://www.youtube.com/watch?v=UDRyL9EH5Bs -https://www.youtube.com/watch?v=EG77Z0p17Hc -https://www.youtube.com/watch?v=EW8OrhKu6ac -https://www.youtube.com/watch?v=swGrHB6VYF0 -https://www.youtube.com/watch?v=1KSp_Hr0_8g -https://www.youtube.com/watch?v=WqyHn7p90ws -https://www.youtube.com/watch?v=8rASZi0V6hI -https://www.youtube.com/watch?v=JPX0c4vmMbk -https://www.youtube.com/watch?v=DDstXkeEY5k -https://www.youtube.com/watch?v=lIWpl1rISOU -https://www.youtube.com/watch?v=w3t-AhvtwIY -https://www.youtube.com/watch?v=bCPOPg9Jxko -https://www.youtube.com/watch?v=M4CIPaK4CzU -https://www.youtube.com/watch?v=sSWYhKIPK_w -https://www.youtube.com/watch?v=DL9orOzONFM -https://www.youtube.com/watch?v=zV1mn041-MY -https://www.youtube.com/watch?v=u9fKGIZEOZc -https://www.youtube.com/watch?v=nwW_2C1VQRA -https://www.youtube.com/watch?v=xyjIpsJ9C_E -https://www.youtube.com/watch?v=zv3JJeoNWpw -https://www.youtube.com/watch?v=yFh-pd4K8kM -https://www.youtube.com/watch?v=helJykjI5Ks -https://www.youtube.com/watch?v=O5grg4BhmwA -https://www.youtube.com/watch?v=hxqqPMdeWlk -https://www.youtube.com/watch?v=aBfY4EXizl4 -https://www.youtube.com/watch?v=uXbTAyacm-o -https://www.youtube.com/watch?v=EHchyDF5mPA -https://www.youtube.com/watch?v=8hN6tFyXtMM -https://www.youtube.com/watch?v=ytaFppE2PrQ -https://www.youtube.com/watch?v=H7T79men-54 -https://www.youtube.com/watch?v=wobBUFvj6vw -https://www.youtube.com/watch?v=JCkSb2SHIY8 -https://www.youtube.com/watch?v=DbdV9vauV_8 -https://www.youtube.com/watch?v=WY7F2paiokM -https://www.youtube.com/watch?v=7_XKe5nL3j4 -https://www.youtube.com/watch?v=ImQPrZx6ZHw -https://www.youtube.com/watch?v=D4Of5F5CDLA -https://www.youtube.com/watch?v=piL1B00ekBE -https://www.youtube.com/watch?v=etOwU2jh5vo -https://www.youtube.com/watch?v=jqceDCEbuds -https://www.youtube.com/watch?v=NBJgtpZxRks -https://www.youtube.com/watch?v=Rztv9EWij4Y -https://www.youtube.com/watch?v=Mb3C2fSxLdI -https://www.youtube.com/watch?v=vK2LAKvrzPo -https://www.youtube.com/watch?v=w0JpfEd31AM -https://www.youtube.com/watch?v=mYSaT9V4324 -https://www.youtube.com/watch?v=3979ONO_DOk -https://www.youtube.com/watch?v=5li6iJy_DYs -https://www.youtube.com/watch?v=wA9RT0MBe3s -https://www.youtube.com/watch?v=NEeLktfngjQ -https://www.youtube.com/watch?v=es24xo9ju7U -https://www.youtube.com/watch?v=OaGCZ8SsKG4 -https://www.youtube.com/watch?v=lQsBzd952xk -https://www.youtube.com/watch?v=neD2PEqr-Po -https://www.youtube.com/watch?v=vHqIYvrKwks -https://www.youtube.com/watch?v=1A6qbKM5vgE -https://www.youtube.com/watch?v=zxPStAHlg4E -https://www.youtube.com/watch?v=jZCfL7fblMM -https://www.youtube.com/watch?v=TAccYcmvQ0k -https://www.youtube.com/watch?v=kzbyykGWN9U -https://www.youtube.com/watch?v=Jzj8TyuuvzQ -https://www.youtube.com/watch?v=ruQVatMNzto -https://www.youtube.com/watch?v=4tOse6iLVs0 -https://www.youtube.com/watch?v=3l_25zyd-s8 -https://www.youtube.com/watch?v=Cu0D682ezDU -https://www.youtube.com/watch?v=ZkHmWMl_rfo -https://www.youtube.com/watch?v=qcI0TSXWzVs -https://www.youtube.com/watch?v=THdAow8-nG8 -https://www.youtube.com/watch?v=53FiW6nOJfQ -https://www.youtube.com/watch?v=w52Vjf2dphk -https://www.youtube.com/watch?v=O3buBB-zkw4 -https://www.youtube.com/watch?v=qsRb4sFg7iE -https://www.youtube.com/watch?v=B36mO34Yaj8 -https://www.youtube.com/watch?v=apgTsYIOZks -https://www.youtube.com/watch?v=uONZRzmh3xc -https://www.youtube.com/watch?v=FTY3sC1ad-8 -https://www.youtube.com/watch?v=OfFoZQsS0qI -https://www.youtube.com/watch?v=SlE1uDWGMuI -https://www.youtube.com/watch?v=rDZYOsRB_DE -https://www.youtube.com/watch?v=iWUNJPVzTT0 -https://www.youtube.com/watch?v=hfEwesszkvI -https://www.youtube.com/watch?v=Tk1zk4xikrk -https://www.youtube.com/watch?v=kbqKifCiUFo -https://www.youtube.com/watch?v=H4oCces6L-I -https://www.youtube.com/watch?v=dZc0Wi1l0L0 -https://www.youtube.com/watch?v=_DRdlJnx2Vo -https://www.youtube.com/watch?v=uoyvZlIQ_-I -https://www.youtube.com/watch?v=gCrKVixw7Tk -https://www.youtube.com/watch?v=IVk6qntdVzk -https://www.youtube.com/watch?v=ZK3n2LCqRio -https://www.youtube.com/watch?v=ZzKNrl5kyF4 -https://www.youtube.com/watch?v=Gld-GRwARlA -https://www.youtube.com/watch?v=tcqDUEhgT_g -https://www.youtube.com/watch?v=fPh0SGZHbTk -https://www.youtube.com/watch?v=rAsmd_5SfII -https://www.youtube.com/watch?v=8Dotiqbtvoo -https://www.youtube.com/watch?v=Q1bJaVlV-84 -https://www.youtube.com/watch?v=CBQ5qbYV3XM -https://www.youtube.com/watch?v=pOsgKiHqKt0 -https://www.youtube.com/watch?v=HC-e2wj7ePc -https://www.youtube.com/watch?v=JiBb2n5UEG8 -https://www.youtube.com/watch?v=INNL2cWTgmI -https://www.youtube.com/watch?v=nEiX-U8P4H4 -https://www.youtube.com/watch?v=azXdNflZL28 -https://www.youtube.com/watch?v=8GxjkvKfRh4 \ No newline at end of file diff --git a/airflow/inputfiles/urls.rt250_14.txt b/airflow/inputfiles/urls.rt250_14.txt deleted file mode 100644 index 7d54fdc..0000000 --- a/airflow/inputfiles/urls.rt250_14.txt +++ /dev/null @@ -1,237 +0,0 @@ -https://www.youtube.com/watch?v=HH71IKNVVYk -https://www.youtube.com/watch?v=807yt9yhd6I -https://www.youtube.com/watch?v=El7pa-DsJ7w -https://www.youtube.com/watch?v=nIZUoHkJNH0 -https://www.youtube.com/watch?v=v8EuLwLPF1I -https://www.youtube.com/watch?v=VXvqZQBGF5A -https://www.youtube.com/watch?v=tTkxj7PJ6Ss -https://www.youtube.com/watch?v=Brraz_ZKUIA -https://www.youtube.com/watch?v=BEjJRVVD0d4 -https://www.youtube.com/watch?v=Kw52RfKqX5Q -https://www.youtube.com/watch?v=Tj1CYEYIZI8 -https://www.youtube.com/watch?v=R6sRU0cLwcg -https://www.youtube.com/watch?v=HYN-BL55NyI -https://www.youtube.com/watch?v=4xRSfFe0aA4 -https://www.youtube.com/watch?v=qcT3I4Fhod8 -https://www.youtube.com/watch?v=FhR_Fc6I-Es -https://www.youtube.com/watch?v=5kQI69iDZEI -https://www.youtube.com/watch?v=eiQUC22MT0o -https://www.youtube.com/watch?v=mVNsumhm0bk -https://www.youtube.com/watch?v=5kyt-4p0qpw -https://www.youtube.com/watch?v=kdWxc8hDPbA -https://www.youtube.com/watch?v=VK8oMBYr9AE -https://www.youtube.com/watch?v=KHIe0adNnPw -https://www.youtube.com/watch?v=WSRrChHFR2I -https://www.youtube.com/watch?v=eWyU5N5oGUw -https://www.youtube.com/watch?v=OTF26cgxS5A -https://www.youtube.com/watch?v=AyOrNze3SE4 -https://www.youtube.com/watch?v=0bShQ01DxnY -https://www.youtube.com/watch?v=jal3Rr0TPpM -https://www.youtube.com/watch?v=IvwSs1j6sL0 -https://www.youtube.com/watch?v=rbvJbvdspoU -https://www.youtube.com/watch?v=gSqq_vuW0es -https://www.youtube.com/watch?v=EY7KAbarSnk -https://www.youtube.com/watch?v=oCZ5BJlh6Jo -https://www.youtube.com/watch?v=veVrB1DNV-c -https://www.youtube.com/watch?v=Jl0MB13-zK8 -https://www.youtube.com/watch?v=qfYc2z7cvC4 -https://www.youtube.com/watch?v=J8NH9qAkys4 -https://www.youtube.com/watch?v=bBpcF-hnKz0 -https://www.youtube.com/watch?v=3JsODEZl5lo -https://www.youtube.com/watch?v=I5CFgz1qeSc -https://www.youtube.com/watch?v=LIKcI1MBnto -https://www.youtube.com/watch?v=79wzDxJu0UE -https://www.youtube.com/watch?v=gHS0Lf8TyF8 -https://www.youtube.com/watch?v=oqgrcgRkunw -https://www.youtube.com/watch?v=mWJC1eL_OR0 -https://www.youtube.com/watch?v=41cWX4W1vEI -https://www.youtube.com/watch?v=8mO3yxKHU1E -https://www.youtube.com/watch?v=cR2EZ9b2AYw -https://www.youtube.com/watch?v=wyPYWkVKQyw -https://www.youtube.com/watch?v=MuUsmsfwizg -https://www.youtube.com/watch?v=9Y18qV1UCRM -https://www.youtube.com/watch?v=hd9jxasd-60 -https://www.youtube.com/watch?v=WEVdhWKG2x8 -https://www.youtube.com/watch?v=csq261RPA2I -https://www.youtube.com/watch?v=8pmocmq8Z_Y -https://www.youtube.com/watch?v=OFUVP1sDMhQ -https://www.youtube.com/watch?v=YZnULU8-WiY -https://www.youtube.com/watch?v=bkUEPqEqMOc -https://www.youtube.com/watch?v=fgxaU6KA4kI -https://www.youtube.com/watch?v=6XziyFQ2qb0 -https://www.youtube.com/watch?v=71QrG4wEY_g -https://www.youtube.com/watch?v=wJXJTliyR98 -https://www.youtube.com/watch?v=lDw7qM3lCOU -https://www.youtube.com/watch?v=ONpSx6GGlTE -https://www.youtube.com/watch?v=W3tNcsdWzv4 -https://www.youtube.com/watch?v=Sq7SBO1VqKc -https://www.youtube.com/watch?v=PEmiEWUb1uo -https://www.youtube.com/watch?v=Mw1Du8gYMiE -https://www.youtube.com/watch?v=H4u0uhldLJQ -https://www.youtube.com/watch?v=fEDVDvIWfZE -https://www.youtube.com/watch?v=1tgrLHWLhBs -https://www.youtube.com/watch?v=VDgp3h8bAu8 -https://www.youtube.com/watch?v=8ZcZAL3C928 -https://www.youtube.com/watch?v=0Zpivy32UGU -https://www.youtube.com/watch?v=u-m-BBQQ9Tw -https://www.youtube.com/watch?v=cvHvX8mFTJE -https://www.youtube.com/watch?v=L-o5p75Q-cg -https://www.youtube.com/watch?v=ZfORukYHkGA -https://www.youtube.com/watch?v=t9sbedJUdUc -https://www.youtube.com/watch?v=zBoHq5-w5UY -https://www.youtube.com/watch?v=q50-whddP7Y -https://www.youtube.com/watch?v=D1PiHzL703U -https://www.youtube.com/watch?v=voQEUBq8q_A -https://www.youtube.com/watch?v=Ps2HyJCINyw -https://www.youtube.com/watch?v=kzco7k86Unw -https://www.youtube.com/watch?v=YE2zr2lV50M -https://www.youtube.com/watch?v=pU_MuwLZeAc -https://www.youtube.com/watch?v=PC5NoUQMOhA -https://www.youtube.com/watch?v=KGm_VKYfTMI -https://www.youtube.com/watch?v=duUThrCj7Lk -https://www.youtube.com/watch?v=n5UaEOcC5G4 -https://www.youtube.com/watch?v=gjEmu6h_lpE -https://www.youtube.com/watch?v=en1KyArCG6w -https://www.youtube.com/watch?v=bRPWouCfF7I -https://www.youtube.com/watch?v=lXnIwNH8vGo -https://www.youtube.com/watch?v=xmPqK9T_yEU -https://www.youtube.com/watch?v=PYUNAbge9Xw -https://www.youtube.com/watch?v=J39cA-10XcE -https://www.youtube.com/watch?v=1BJD7Rv7iS8 -https://www.youtube.com/watch?v=D6WdfQDXRsw -https://www.youtube.com/watch?v=g6-v4pwo2ik -https://www.youtube.com/watch?v=UCgHBFhO7FM -https://www.youtube.com/watch?v=ALNUXWNvEZY -https://www.youtube.com/watch?v=0Qx13_oyycg -https://www.youtube.com/watch?v=1KeCeqkwnTs -https://www.youtube.com/watch?v=JmQd29Q6-s8 -https://www.youtube.com/watch?v=5eE9eTDjII0 -https://www.youtube.com/watch?v=-kPpPTIYpeY -https://www.youtube.com/watch?v=n7IfbF48Jqo -https://www.youtube.com/watch?v=HXG-qjuDkPE -https://www.youtube.com/watch?v=77sKr6xPbW8 -https://www.youtube.com/watch?v=w9uEnYCbmno -https://www.youtube.com/watch?v=M9B2M5rGOqE -https://www.youtube.com/watch?v=g-6eEKHMT4A -https://www.youtube.com/watch?v=0k6rcvts1FM -https://www.youtube.com/watch?v=3Glq6IgKJ-g -https://www.youtube.com/watch?v=dl_kgfUid_E -https://www.youtube.com/watch?v=RpBgeBWcw4I -https://www.youtube.com/watch?v=WiyI1U6HfJ8 -https://www.youtube.com/watch?v=CvPD5zHvxpE -https://www.youtube.com/watch?v=H5aghJz-6-0 -https://www.youtube.com/watch?v=hvGwiRk9qFI -https://www.youtube.com/watch?v=kP5en-KOZak -https://www.youtube.com/watch?v=Lt9z2PZxTA8 -https://www.youtube.com/watch?v=ZAN2gg3Bf88 -https://www.youtube.com/watch?v=dfcoKxlL9zo -https://www.youtube.com/watch?v=6G-zWuGwXwE -https://www.youtube.com/watch?v=IFOgQYDqCDg -https://www.youtube.com/watch?v=fN1ASoQVOtc -https://www.youtube.com/watch?v=nDvvyndocg0 -https://www.youtube.com/watch?v=YS2do-ydhsE -https://www.youtube.com/watch?v=5IL47nns0R4 -https://www.youtube.com/watch?v=A3BueGvC0dU -https://www.youtube.com/watch?v=qE5_I54Onsg -https://www.youtube.com/watch?v=1QA_KBw3bpM -https://www.youtube.com/watch?v=H2ms1CuhCAQ -https://www.youtube.com/watch?v=zh7AyRCjMK8 -https://www.youtube.com/watch?v=oklOIfwaIm4 -https://www.youtube.com/watch?v=2_m2D7BlCSY -https://www.youtube.com/watch?v=AOh3iLuvEx0 -https://www.youtube.com/watch?v=qt6rxatB78k -https://www.youtube.com/watch?v=CCLgvay1AJg -https://www.youtube.com/watch?v=2CJsz7yfWVQ -https://www.youtube.com/watch?v=mXr5R4PybQ8 -https://www.youtube.com/watch?v=FPJUY3OG1j4 -https://www.youtube.com/watch?v=AxZSX649ZQM -https://www.youtube.com/watch?v=vAqqcajeLyw -https://www.youtube.com/watch?v=wT50rGqJi-A -https://www.youtube.com/watch?v=bURXdJK5yDo -https://www.youtube.com/watch?v=Ci-jKDX2Utc -https://www.youtube.com/watch?v=4jNOtUpzcsM -https://www.youtube.com/watch?v=9F1xYw3-wC0 -https://www.youtube.com/watch?v=-wgG-UKu6P4 -https://www.youtube.com/watch?v=RA0QPohN8OA -https://www.youtube.com/watch?v=swaAAvGIVkQ -https://www.youtube.com/watch?v=E2cttPQLTmU -https://www.youtube.com/watch?v=7u_HP6AjgNM -https://www.youtube.com/watch?v=-o7z8zDigKw -https://www.youtube.com/watch?v=2iDzZRY4nFg -https://www.youtube.com/watch?v=TvauDXRHRQM -https://www.youtube.com/watch?v=gm0CbvYTfyU -https://www.youtube.com/watch?v=aVH30g7M7-s -https://www.youtube.com/watch?v=SaRJpKcMpIc -https://www.youtube.com/watch?v=VME8ViM9p_E -https://www.youtube.com/watch?v=Ob1gwjj03-g -https://www.youtube.com/watch?v=Kx8QHsFjzdo -https://www.youtube.com/watch?v=k0MXRrhoGzw -https://www.youtube.com/watch?v=iQpIcy-iINQ -https://www.youtube.com/watch?v=iIegaxZamds -https://www.youtube.com/watch?v=nKgS3q8Eixw -https://www.youtube.com/watch?v=Vu53Y9-u6OQ -https://www.youtube.com/watch?v=A-1xFilVGKA -https://www.youtube.com/watch?v=NzXu-1jdv2w -https://www.youtube.com/watch?v=dN0sAUTPOo0 -https://www.youtube.com/watch?v=KhKqvIa4KPI -https://www.youtube.com/watch?v=7EQlStjqPXk -https://www.youtube.com/watch?v=GvGYuLtTpI4 -https://www.youtube.com/watch?v=socM2vLFAXw -https://www.youtube.com/watch?v=4Lf0s5sF6Ps -https://www.youtube.com/watch?v=O0SNpN9bA7M -https://www.youtube.com/watch?v=_1B1Unp8rzQ -https://www.youtube.com/watch?v=2Pxto4fr8fU -https://www.youtube.com/watch?v=-wnCY6-C_LE -https://www.youtube.com/watch?v=Jyo0daMS_FI -https://www.youtube.com/watch?v=6Mv190DEO5Q -https://www.youtube.com/watch?v=A3vqXUImxmo -https://www.youtube.com/watch?v=jTavkw3fHCU -https://www.youtube.com/watch?v=wO_IP_UBHSE -https://www.youtube.com/watch?v=eK76eyqZLuI -https://www.youtube.com/watch?v=fGq8t2S9A4Y -https://www.youtube.com/watch?v=kwgTZ7Kx1j0 -https://www.youtube.com/watch?v=QpR8PIGCI6A -https://www.youtube.com/watch?v=wP6eLrWucEU -https://www.youtube.com/watch?v=c8wcjZjYUKk -https://www.youtube.com/watch?v=3jqtAd3ms6s -https://www.youtube.com/watch?v=yQJxa2y3RMU -https://www.youtube.com/watch?v=xq46z38tXds -https://www.youtube.com/watch?v=N5xZyUxRJBM -https://www.youtube.com/watch?v=seeZtr3lYz8 -https://www.youtube.com/watch?v=DtoINgJb8NM -https://www.youtube.com/watch?v=Q1by2tU9Rp4 -https://www.youtube.com/watch?v=X7Ho48iO098 -https://www.youtube.com/watch?v=Nxs8sYQ6D4Y -https://www.youtube.com/watch?v=ePktyiVhM9Y -https://www.youtube.com/watch?v=FHAjPjwINDM -https://www.youtube.com/watch?v=iog13CSQsBE -https://www.youtube.com/watch?v=JC_7WZnqqYI -https://www.youtube.com/watch?v=qzar3W-wKdA -https://www.youtube.com/watch?v=JaLalK-3Ll0 -https://www.youtube.com/watch?v=L-VBIZ1lxZ8 -https://www.youtube.com/watch?v=z6LqQAbFZ5U -https://www.youtube.com/watch?v=V4sIhzXF5mw -https://www.youtube.com/watch?v=XaZskPC2CH8 -https://www.youtube.com/watch?v=N-sgaV4nP_c -https://www.youtube.com/watch?v=g4S3tejwVBQ -https://www.youtube.com/watch?v=uq7Wh078eUg -https://www.youtube.com/watch?v=zSmiH1Gbpp8 -https://www.youtube.com/watch?v=6WSy_8fjh3Y -https://www.youtube.com/watch?v=KRxe3BiApmU -https://www.youtube.com/watch?v=bO-OsMybcdY -https://www.youtube.com/watch?v=Tl9lNbchXKU -https://www.youtube.com/watch?v=MIGsaLyquMI -https://www.youtube.com/watch?v=clfJqj03TEs -https://www.youtube.com/watch?v=0Wpkb1L4pYw -https://www.youtube.com/watch?v=qtIP2ieotyk -https://www.youtube.com/watch?v=jm3t-hXP1Eo -https://www.youtube.com/watch?v=KU5QuYzEte0 -https://www.youtube.com/watch?v=r5iLqBt3TkE -https://www.youtube.com/watch?v=8vFMbZEma7w -https://www.youtube.com/watch?v=Q6SgU9w7XbY -https://www.youtube.com/watch?v=y9nZZxnsoTw -https://www.youtube.com/watch?v=y1VjxqY8BrI -https://www.youtube.com/watch?v=cVy39nbbSB4 -https://www.youtube.com/watch?v=JJDOUA_9TnU -https://www.youtube.com/watch?v=5fh0g4j0JPM -https://www.youtube.com/watch?v=4bp7st-p2Ns \ No newline at end of file diff --git a/airflow/inputfiles/urls.rt250_15.txt b/airflow/inputfiles/urls.rt250_15.txt deleted file mode 100644 index 717a68b..0000000 --- a/airflow/inputfiles/urls.rt250_15.txt +++ /dev/null @@ -1,235 +0,0 @@ -https://www.youtube.com/watch?v=Vka_RHl8NBg -https://www.youtube.com/watch?v=C7mEj7LsxDM -https://www.youtube.com/watch?v=oU3xk3oaIWs -https://www.youtube.com/watch?v=Vc80-qcAYTI -https://www.youtube.com/watch?v=6jdO7Z0IA9s -https://www.youtube.com/watch?v=M624f51E6TU -https://www.youtube.com/watch?v=a2zb_vffseo -https://www.youtube.com/watch?v=kjJSsR7G-yA -https://www.youtube.com/watch?v=HTfUdUUB2sY -https://www.youtube.com/watch?v=clDTNsCoAt0 -https://www.youtube.com/watch?v=EtlQ36lNezM -https://www.youtube.com/watch?v=LvXVwrrsMjE -https://www.youtube.com/watch?v=lmC8nB3Sx0Y -https://www.youtube.com/watch?v=UgJMzy12jEk -https://www.youtube.com/watch?v=mqrZu6fiOlA -https://www.youtube.com/watch?v=cxyHJSC6qDI -https://www.youtube.com/watch?v=yJOvKMfMy9Q -https://www.youtube.com/watch?v=74eDPvsPc58 -https://www.youtube.com/watch?v=OVuJECLWkQQ -https://www.youtube.com/watch?v=V35MoqZdaKU -https://www.youtube.com/watch?v=5VVSyz8DYH8 -https://www.youtube.com/watch?v=DJz78yI9zvg -https://www.youtube.com/watch?v=Qs5ynEowGp0 -https://www.youtube.com/watch?v=PeRX98IZhJM -https://www.youtube.com/watch?v=GiVe7i3bY74 -https://www.youtube.com/watch?v=WOsmZel1tUQ -https://www.youtube.com/watch?v=6HyYFW4KAjE -https://www.youtube.com/watch?v=T40pRAlLPFI -https://www.youtube.com/watch?v=ESJ4fEsc3E8 -https://www.youtube.com/watch?v=JrmW9FXdiqg -https://www.youtube.com/watch?v=nfCY1Y2wCaE -https://www.youtube.com/watch?v=QDA8tjQeGa0 -https://www.youtube.com/watch?v=VNWFVRESr9k -https://www.youtube.com/watch?v=FwzDqhHNZq4 -https://www.youtube.com/watch?v=otevRZLVIns -https://www.youtube.com/watch?v=QbbaZPVWjZ0 -https://www.youtube.com/watch?v=NThxpgYS54k -https://www.youtube.com/watch?v=GzqvU_H3cjs -https://www.youtube.com/watch?v=aINHgBzBAsc -https://www.youtube.com/watch?v=dFLilVIP6v0 -https://www.youtube.com/watch?v=NQ_VXw5C5_g -https://www.youtube.com/watch?v=NlwOF78qdkM -https://www.youtube.com/watch?v=W8-8ZfmTIw8 -https://www.youtube.com/watch?v=_rSM6kDk6jU -https://www.youtube.com/watch?v=Tr38d3eLXf8 -https://www.youtube.com/watch?v=YhIXV4DWhIA -https://www.youtube.com/watch?v=-nfJpVRjRM0 -https://www.youtube.com/watch?v=kosQ_XMUtDI -https://www.youtube.com/watch?v=D7OYw3vhtlQ -https://www.youtube.com/watch?v=3Xnmt2Ymii4 -https://www.youtube.com/watch?v=bM2Lw4iDWMs -https://www.youtube.com/watch?v=lVeTpIpFTuI -https://www.youtube.com/watch?v=Bxb6m5jVs4k -https://www.youtube.com/watch?v=rh8Zo8ZCw8w -https://www.youtube.com/watch?v=7C0wdkJH4tw -https://www.youtube.com/watch?v=TOOHa3M_3_A -https://www.youtube.com/watch?v=P0gfEojLMmY -https://www.youtube.com/watch?v=OcS5i4b94fE -https://www.youtube.com/watch?v=6K_SzWye5tE -https://www.youtube.com/watch?v=pVlrX8hakyE -https://www.youtube.com/watch?v=zIrG0Wt6CcA -https://www.youtube.com/watch?v=EjXQbmbgxhQ -https://www.youtube.com/watch?v=MmvWYXp1PNM -https://www.youtube.com/watch?v=lLx3Z3c7mV8 -https://www.youtube.com/watch?v=73ykZR11zes -https://www.youtube.com/watch?v=WHYrfpgtm6U -https://www.youtube.com/watch?v=b8xrtcAyyw0 -https://www.youtube.com/watch?v=SNFeZyokO6o -https://www.youtube.com/watch?v=sre-OE94un8 -https://www.youtube.com/watch?v=q28um1cqF4Q -https://www.youtube.com/watch?v=phgtPLyP97w -https://www.youtube.com/watch?v=jlUkuRkYFZU -https://www.youtube.com/watch?v=3G3m2BTXUQw -https://www.youtube.com/watch?v=te2LYx1SVBE -https://www.youtube.com/watch?v=JACfOF38Ffk -https://www.youtube.com/watch?v=ZKb43WnjoiU -https://www.youtube.com/watch?v=Qel0DXeISN0 -https://www.youtube.com/watch?v=9FekdRZPIpQ -https://www.youtube.com/watch?v=L3E45I283ZU -https://www.youtube.com/watch?v=ZpKcW61Y3zI -https://www.youtube.com/watch?v=SiJmWpuQhNQ -https://www.youtube.com/watch?v=sYe2vx6p_T4 -https://www.youtube.com/watch?v=eJEGgi8ZkRQ -https://www.youtube.com/watch?v=V--wYltJnB4 -https://www.youtube.com/watch?v=z8EMf_9wUzc -https://www.youtube.com/watch?v=2vuVM43MRiA -https://www.youtube.com/watch?v=Sc88FJQw-8A -https://www.youtube.com/watch?v=2lB9s3y-6eA -https://www.youtube.com/watch?v=vNkYcCV7S4U -https://www.youtube.com/watch?v=F3ls751u5o0 -https://www.youtube.com/watch?v=oFo8wPllZu8 -https://www.youtube.com/watch?v=Ku9lKfjZU30 -https://www.youtube.com/watch?v=3Yq8fNNHB1Y -https://www.youtube.com/watch?v=Sk3dVf0H9C0 -https://www.youtube.com/watch?v=w99cGM_y4Og -https://www.youtube.com/watch?v=jm0Zv6ydHeo -https://www.youtube.com/watch?v=5X2g_qbEgEs -https://www.youtube.com/watch?v=MYTQew8auIY -https://www.youtube.com/watch?v=GOy_-PzLf8g -https://www.youtube.com/watch?v=YUkd_TZQZKk -https://www.youtube.com/watch?v=hzeo2pDrtLk -https://www.youtube.com/watch?v=wEabH0TyYwM -https://www.youtube.com/watch?v=SlL6VTxaCog -https://www.youtube.com/watch?v=s6ZflJj1p34 -https://www.youtube.com/watch?v=TMTHkY8hCx8 -https://www.youtube.com/watch?v=cRBj61eBjhQ -https://www.youtube.com/watch?v=DTL3G1h2SdQ -https://www.youtube.com/watch?v=NCI6-NWq6DY -https://www.youtube.com/watch?v=lIvSjlKUvCg -https://www.youtube.com/watch?v=6r7Ubc_dEQk -https://www.youtube.com/watch?v=IFdEerP9z4Q -https://www.youtube.com/watch?v=JbBGMq-wBHM -https://www.youtube.com/watch?v=RIYSRqBseLI -https://www.youtube.com/watch?v=2v_UzrgNwpo -https://www.youtube.com/watch?v=iX_vynlyrC8 -https://www.youtube.com/watch?v=ZBjsh2p80sY -https://www.youtube.com/watch?v=AK6dg9YJz70 -https://www.youtube.com/watch?v=AMAzK-IjtW0 -https://www.youtube.com/watch?v=PfaI57gHTkU -https://www.youtube.com/watch?v=Cn9asaKJkP8 -https://www.youtube.com/watch?v=GWDOcK-GHUo -https://www.youtube.com/watch?v=FdrM63-sL5E -https://www.youtube.com/watch?v=Ni15untE9lc -https://www.youtube.com/watch?v=ZXiNG_wc3cs -https://www.youtube.com/watch?v=a1NjiQlF_QU -https://www.youtube.com/watch?v=aKlFxKwlJCI -https://www.youtube.com/watch?v=ncyhGHY-J8s -https://www.youtube.com/watch?v=Nsl6F3KD68I -https://www.youtube.com/watch?v=y-MVI2HeAb8 -https://www.youtube.com/watch?v=lDmo3DvHSoQ -https://www.youtube.com/watch?v=_nO4GwwK7a0 -https://www.youtube.com/watch?v=JNV0Bs_Z2rY -https://www.youtube.com/watch?v=QCs8H3xMCCg -https://www.youtube.com/watch?v=xSlTzHlfJy0 -https://www.youtube.com/watch?v=2eVxtYA-1jI -https://www.youtube.com/watch?v=a9Ma4IjuWvM -https://www.youtube.com/watch?v=8vaxMejObcY -https://www.youtube.com/watch?v=SR6dOkauvIo -https://www.youtube.com/watch?v=Pudto3Xw_NY -https://www.youtube.com/watch?v=6EIjFdz_344 -https://www.youtube.com/watch?v=Q30ETlsokl0 -https://www.youtube.com/watch?v=4RJNa-Dj68I -https://www.youtube.com/watch?v=KoNzY_CeeKw -https://www.youtube.com/watch?v=7xNd2lPYR68 -https://www.youtube.com/watch?v=Bowfz2gZ2Sk -https://www.youtube.com/watch?v=X32vDZjnIWc -https://www.youtube.com/watch?v=GLP9CuTWUlk -https://www.youtube.com/watch?v=JDsMfCdDUfk -https://www.youtube.com/watch?v=VKynMQrBM7E -https://www.youtube.com/watch?v=ejB1lm7jDzw -https://www.youtube.com/watch?v=8B4Sg9xIGTQ -https://www.youtube.com/watch?v=_rB2YS9t63M -https://www.youtube.com/watch?v=KJyvaMvan9I -https://www.youtube.com/watch?v=ZcdhsUEd6TU -https://www.youtube.com/watch?v=NCPYSEYoF1c -https://www.youtube.com/watch?v=EgupaPnIeMM -https://www.youtube.com/watch?v=TqPRCGbHSjw -https://www.youtube.com/watch?v=Hi62GOTWGI8 -https://www.youtube.com/watch?v=dHcZffZjYB0 -https://www.youtube.com/watch?v=Pvnqhkywbxc -https://www.youtube.com/watch?v=UceBEWYa13w -https://www.youtube.com/watch?v=AgRH2C5yPjQ -https://www.youtube.com/watch?v=dXM5zpjqtOc -https://www.youtube.com/watch?v=a4Sb42va86Q -https://www.youtube.com/watch?v=JSZdE83PiRQ -https://www.youtube.com/watch?v=mWi5mU1bPPg -https://www.youtube.com/watch?v=ZTVH8aYSwjQ -https://www.youtube.com/watch?v=e_3WleCGJbc -https://www.youtube.com/watch?v=woMgT3A_71Q -https://www.youtube.com/watch?v=771kiAizO8g -https://www.youtube.com/watch?v=7PYkf420f9c -https://www.youtube.com/watch?v=AgE4Ke5nNoY -https://www.youtube.com/watch?v=ZD-UOCrU6Fg -https://www.youtube.com/watch?v=WnNpg4LRzHI -https://www.youtube.com/watch?v=guKqHIuxq90 -https://www.youtube.com/watch?v=WZ89NCgdvII -https://www.youtube.com/watch?v=e9r2o7MRyMQ -https://www.youtube.com/watch?v=1xdaVv0qb0I -https://www.youtube.com/watch?v=l5SYopn7vSg -https://www.youtube.com/watch?v=5xiKlm6vG5w -https://www.youtube.com/watch?v=iKAtkHSVfjU -https://www.youtube.com/watch?v=xQHmDw2ayNw -https://www.youtube.com/watch?v=PsWWUiANTfo -https://www.youtube.com/watch?v=tQIIhH7A9FE -https://www.youtube.com/watch?v=6EkM3Fvirq0 -https://www.youtube.com/watch?v=mQ-kLX_NRwU -https://www.youtube.com/watch?v=QR6WfdodfDU -https://www.youtube.com/watch?v=Xb4ZZ6T50vM -https://www.youtube.com/watch?v=E0HGtjMKljg -https://www.youtube.com/watch?v=tbtPMw1BNA4 -https://www.youtube.com/watch?v=eFJlQBkjEqw -https://www.youtube.com/watch?v=vJ4Ue81SyQw -https://www.youtube.com/watch?v=dXkf-O-ByOQ -https://www.youtube.com/watch?v=zIfZxrswlEY -https://www.youtube.com/watch?v=A5vdyzU-0zg -https://www.youtube.com/watch?v=2ajukBAGGuU -https://www.youtube.com/watch?v=WTZ4zCezHUU -https://www.youtube.com/watch?v=XWi8rXiRq9E -https://www.youtube.com/watch?v=qpMl2erxOgU -https://www.youtube.com/watch?v=VwbO60gjWoA -https://www.youtube.com/watch?v=7IuipODNcRE -https://www.youtube.com/watch?v=HfMHDL0SgZ0 -https://www.youtube.com/watch?v=yv5mcmNEwdU -https://www.youtube.com/watch?v=xma0nVpBjZo -https://www.youtube.com/watch?v=TKxNwoYM5ec -https://www.youtube.com/watch?v=hiKwAlTM1Ys -https://www.youtube.com/watch?v=KG-30LxX-qs -https://www.youtube.com/watch?v=_6Ms_SJmQn4 -https://www.youtube.com/watch?v=0tdXaee9o6k -https://www.youtube.com/watch?v=J9FCafzAuaE -https://www.youtube.com/watch?v=7jTrdi5t83Y -https://www.youtube.com/watch?v=WIky8XG3SP4 -https://www.youtube.com/watch?v=0iv6pKiKzL4 -https://www.youtube.com/watch?v=ZfWDeOgVSQo -https://www.youtube.com/watch?v=7QdQ7z3mFr4 -https://www.youtube.com/watch?v=JfjXBXIvd-8 -https://www.youtube.com/watch?v=YyOK4Lau-xY -https://www.youtube.com/watch?v=klgdpFMajjY -https://www.youtube.com/watch?v=I-ar6huKQ_c -https://www.youtube.com/watch?v=vkL_zAcnkI8 -https://www.youtube.com/watch?v=tgMX2SfF5lI -https://www.youtube.com/watch?v=4FrSX37DoTU -https://www.youtube.com/watch?v=ERX--LGwC10 -https://www.youtube.com/watch?v=t_2kZnBl9i0 -https://www.youtube.com/watch?v=YnhdkKajmRA -https://www.youtube.com/watch?v=7nfJO1rJYMo -https://www.youtube.com/watch?v=B4bd4HcRo0Q -https://www.youtube.com/watch?v=BOs7CVyGyGg -https://www.youtube.com/watch?v=OZyMlE-yy3U -https://www.youtube.com/watch?v=SvbS2IALKmo -https://www.youtube.com/watch?v=U-IdboWZNiA -https://www.youtube.com/watch?v=msjiKwDbsaM -https://www.youtube.com/watch?v=dmZ1pWgxZn0 -https://www.youtube.com/watch?v=utGMKcJtuPo -https://www.youtube.com/watch?v=sGseachRqQs \ No newline at end of file diff --git a/airflow/inputfiles/urls.rt250_16.txt b/airflow/inputfiles/urls.rt250_16.txt deleted file mode 100644 index bac8d1f..0000000 --- a/airflow/inputfiles/urls.rt250_16.txt +++ /dev/null @@ -1,230 +0,0 @@ -https://www.youtube.com/watch?v=s_7soIjx_sk -https://www.youtube.com/watch?v=QiFpIIF7-4g -https://www.youtube.com/watch?v=EFCcOOuWWc8 -https://www.youtube.com/watch?v=ORAwIWpC3Gw -https://www.youtube.com/watch?v=nB5njVbYF_k -https://www.youtube.com/watch?v=nfz__ppKnHM -https://www.youtube.com/watch?v=9y9YPN5mclI -https://www.youtube.com/watch?v=knEbE0conCs -https://www.youtube.com/watch?v=uuoZinaXlAs -https://www.youtube.com/watch?v=HATcwbvQEac -https://www.youtube.com/watch?v=9B5bfAzqT6A -https://www.youtube.com/watch?v=Spa5Hstqe9c -https://www.youtube.com/watch?v=RX8-5eZZH-g -https://www.youtube.com/watch?v=FhNZRjWbV88 -https://www.youtube.com/watch?v=4iOXfE5mrOs -https://www.youtube.com/watch?v=v2ZKFLsJh44 -https://www.youtube.com/watch?v=Im7IqqmDmwE -https://www.youtube.com/watch?v=ldQM2aFCLAY -https://www.youtube.com/watch?v=wNqXAjJtZG0 -https://www.youtube.com/watch?v=etSP1c6XQF8 -https://www.youtube.com/watch?v=Kz7tHkNSXnU -https://www.youtube.com/watch?v=YurbIFCKqHo -https://www.youtube.com/watch?v=UspvXAlQ26o -https://www.youtube.com/watch?v=JYvEggbevBw -https://www.youtube.com/watch?v=TJpGFfXl4Rg -https://www.youtube.com/watch?v=T59N1dRRAtw -https://www.youtube.com/watch?v=sdW6jwBctac -https://www.youtube.com/watch?v=jTWH3Rb72rI -https://www.youtube.com/watch?v=OT5UFvRabSg -https://www.youtube.com/watch?v=N7uyjA7i7SI -https://www.youtube.com/watch?v=D2z8w4MNnOI -https://www.youtube.com/watch?v=PGaSqod7xJ8 -https://www.youtube.com/watch?v=W0RtLCRayZI -https://www.youtube.com/watch?v=9d_fnwE-P3g -https://www.youtube.com/watch?v=Y_nKb8qDnS4 -https://www.youtube.com/watch?v=mSqdcFxU-BE -https://www.youtube.com/watch?v=dh8voXLq30Q -https://www.youtube.com/watch?v=j34tBBPPO7k -https://www.youtube.com/watch?v=Z39BEKY6r2g -https://www.youtube.com/watch?v=tV48lHlCO_E -https://www.youtube.com/watch?v=9AdxIIwBwD8 -https://www.youtube.com/watch?v=sgYKkmO4Y9k -https://www.youtube.com/watch?v=g38X9lWrCvM -https://www.youtube.com/watch?v=qeZX32QWR5I -https://www.youtube.com/watch?v=t949upp1nAc -https://www.youtube.com/watch?v=FQG5G5yuhkI -https://www.youtube.com/watch?v=vqArE3Ddjp0 -https://www.youtube.com/watch?v=kWD8zGU-sHs -https://www.youtube.com/watch?v=UKpJ07-Fdco -https://www.youtube.com/watch?v=yCTY7L58M0c -https://www.youtube.com/watch?v=Eg9kwoacM_Q -https://www.youtube.com/watch?v=TYh4lXx57b4 -https://www.youtube.com/watch?v=KkQGpeabUmc -https://www.youtube.com/watch?v=alqtdG-lH20 -https://www.youtube.com/watch?v=LhUFVnK5IBU -https://www.youtube.com/watch?v=SKxio1UECBA -https://www.youtube.com/watch?v=OkZQdhgToL4 -https://www.youtube.com/watch?v=tSyp0B5KLIc -https://www.youtube.com/watch?v=X2l_P0KN4FE -https://www.youtube.com/watch?v=Rw6YwelbmPU -https://www.youtube.com/watch?v=QucIkabx540 -https://www.youtube.com/watch?v=R6a2CRSeq4I -https://www.youtube.com/watch?v=S_pCLiKX1jQ -https://www.youtube.com/watch?v=B_uOj6ZaGjU -https://www.youtube.com/watch?v=pMxjV14wEaQ -https://www.youtube.com/watch?v=hSCD7O2zZqs -https://www.youtube.com/watch?v=MT7JYGXHvsg -https://www.youtube.com/watch?v=Z7eUu3-vcXA -https://www.youtube.com/watch?v=Pk2cph6j3Qk -https://www.youtube.com/watch?v=hionali1f8Q -https://www.youtube.com/watch?v=RFkfq59Q6Jc -https://www.youtube.com/watch?v=ZIZVTRYCGWM -https://www.youtube.com/watch?v=QFMoiOAY6i4 -https://www.youtube.com/watch?v=6uRUgR7azzc -https://www.youtube.com/watch?v=76sAQCo9Ao0 -https://www.youtube.com/watch?v=vEjnuOJadpA -https://www.youtube.com/watch?v=T5qyH3fO_NQ -https://www.youtube.com/watch?v=HIHiwfUj8_Y -https://www.youtube.com/watch?v=oMnMBFHzO2A -https://www.youtube.com/watch?v=AsRDIaqn-b0 -https://www.youtube.com/watch?v=u1VSkXNGAM0 -https://www.youtube.com/watch?v=ys3q0YCAtWA -https://www.youtube.com/watch?v=zCfWIxAIo2A -https://www.youtube.com/watch?v=5hgHNqbMovk -https://www.youtube.com/watch?v=JQGubPbpwp0 -https://www.youtube.com/watch?v=np2aMq-duMA -https://www.youtube.com/watch?v=MsKvbsmxSEk -https://www.youtube.com/watch?v=xOOUaiwdY98 -https://www.youtube.com/watch?v=gxJsp3I9PvQ -https://www.youtube.com/watch?v=cpFV2AxmEeY -https://www.youtube.com/watch?v=42mSq6e5ns0 -https://www.youtube.com/watch?v=ZcMv4sje3Vw -https://www.youtube.com/watch?v=tNTzPVJoMKQ -https://www.youtube.com/watch?v=NQRZXINtEhs -https://www.youtube.com/watch?v=aTG1pNOSYro -https://www.youtube.com/watch?v=ZoTtMmX1oz8 -https://www.youtube.com/watch?v=ulUenhoi__M -https://www.youtube.com/watch?v=tT-3k4barTQ -https://www.youtube.com/watch?v=QtMoPqXYXDk -https://www.youtube.com/watch?v=HEftQMgt4rg -https://www.youtube.com/watch?v=b4r4XexQLrM -https://www.youtube.com/watch?v=1yFwBLcBFRQ -https://www.youtube.com/watch?v=WKHlvJ6x1LA -https://www.youtube.com/watch?v=xSOIjsEekWA -https://www.youtube.com/watch?v=62akyr7rzxI -https://www.youtube.com/watch?v=XIIJpCoCm4Q -https://www.youtube.com/watch?v=BbtWZWOoACk -https://www.youtube.com/watch?v=ijyJWAgGWG0 -https://www.youtube.com/watch?v=ACHMYMusnKo -https://www.youtube.com/watch?v=trsCBMyuyLI -https://www.youtube.com/watch?v=iSaF1n0cbuQ -https://www.youtube.com/watch?v=10j3GH59eL4 -https://www.youtube.com/watch?v=mIwhMVo7GJs -https://www.youtube.com/watch?v=1cB4HhI47Jg -https://www.youtube.com/watch?v=H94hOHN2rVw -https://www.youtube.com/watch?v=rGnMrpcgUjE -https://www.youtube.com/watch?v=Tg15phZM9MA -https://www.youtube.com/watch?v=ayPZx_IUoos -https://www.youtube.com/watch?v=hKq0g55QaM4 -https://www.youtube.com/watch?v=NrAW-afOBqM -https://www.youtube.com/watch?v=kQa5hKggcjw -https://www.youtube.com/watch?v=yEymLHO3CQ4 -https://www.youtube.com/watch?v=0NsTlre-PPA -https://www.youtube.com/watch?v=mELSxoR-zCU -https://www.youtube.com/watch?v=yEVXuFCUjXw -https://www.youtube.com/watch?v=hMFcuOFDCcg -https://www.youtube.com/watch?v=6MeAaGtWp9s -https://www.youtube.com/watch?v=jKvNdMDukcc -https://www.youtube.com/watch?v=8KS0CHgvI1A -https://www.youtube.com/watch?v=AKvX7ah_hOw -https://www.youtube.com/watch?v=GuLbIe2cF2w -https://www.youtube.com/watch?v=sY-zWrNqkOE -https://www.youtube.com/watch?v=3iC097rvS_o -https://www.youtube.com/watch?v=wGZtL-gaazM -https://www.youtube.com/watch?v=Er1FX6IMfqQ -https://www.youtube.com/watch?v=1jvJ3DMJAlg -https://www.youtube.com/watch?v=c85Fh_WDU3A -https://www.youtube.com/watch?v=wtiG7C87QX4 -https://www.youtube.com/watch?v=eACzH5r_Ma8 -https://www.youtube.com/watch?v=9ypHGzE6Di8 -https://www.youtube.com/watch?v=EvcvCsNF0-g -https://www.youtube.com/watch?v=zEp3EOVlRFE -https://www.youtube.com/watch?v=ZMVgFtRVSuQ -https://www.youtube.com/watch?v=YELUbjJS280 -https://www.youtube.com/watch?v=ViNvarsfuNQ -https://www.youtube.com/watch?v=bITtNQSvWfc -https://www.youtube.com/watch?v=bg0YLrDhXgQ -https://www.youtube.com/watch?v=TF2weikuHEo -https://www.youtube.com/watch?v=Lw55m9XNSaQ -https://www.youtube.com/watch?v=NNCTs-K7U38 -https://www.youtube.com/watch?v=lgLixSq9wS4 -https://www.youtube.com/watch?v=2ZMLHS7l5NU -https://www.youtube.com/watch?v=TVfRO7hn3bs -https://www.youtube.com/watch?v=wByimVu-hFs -https://www.youtube.com/watch?v=amk_ol9sb3M -https://www.youtube.com/watch?v=S6QvzexdgfY -https://www.youtube.com/watch?v=yFvRdCOe_Ss -https://www.youtube.com/watch?v=ghvU3NQvb-4 -https://www.youtube.com/watch?v=-R5HMIcBxNo -https://www.youtube.com/watch?v=xksdvTH0fA4 -https://www.youtube.com/watch?v=VwlX1fGTOio -https://www.youtube.com/watch?v=luTcIoSJooo -https://www.youtube.com/watch?v=zyQoz35pRN4 -https://www.youtube.com/watch?v=1ijOjx54_8k -https://www.youtube.com/watch?v=Q57suljQVtE -https://www.youtube.com/watch?v=f6Wpgyakg4Q -https://www.youtube.com/watch?v=SmZwenV25hI -https://www.youtube.com/watch?v=438S8whO7sM -https://www.youtube.com/watch?v=GJwGamIG_2o -https://www.youtube.com/watch?v=RBEjeQf3eUA -https://www.youtube.com/watch?v=0Bm0VrVWTVM -https://www.youtube.com/watch?v=ELO3tyx76R0 -https://www.youtube.com/watch?v=2RjdVnvMtZk -https://www.youtube.com/watch?v=6zyz3lHLlh0 -https://www.youtube.com/watch?v=gAjmL2hgjVA -https://www.youtube.com/watch?v=UxD-eeezwz8 -https://www.youtube.com/watch?v=FQ1Qj5tiAfU -https://www.youtube.com/watch?v=ApE2rPgtQ04 -https://www.youtube.com/watch?v=wp-d1_jUCS8 -https://www.youtube.com/watch?v=6Y22VyY5h1A -https://www.youtube.com/watch?v=CQgucWUHzS8 -https://www.youtube.com/watch?v=5_7mObtXbvc -https://www.youtube.com/watch?v=xMAkXgaG1LY -https://www.youtube.com/watch?v=haNnYV72rKw -https://www.youtube.com/watch?v=DELqUJkPXPg -https://www.youtube.com/watch?v=pX7s8Xw5YCc -https://www.youtube.com/watch?v=9vr04dqyX98 -https://www.youtube.com/watch?v=dYLPqsSQpcU -https://www.youtube.com/watch?v=ljQZiwqYqi8 -https://www.youtube.com/watch?v=JASbtU-NIYE -https://www.youtube.com/watch?v=DrTj3YdT_S8 -https://www.youtube.com/watch?v=3E_M5GC5me0 -https://www.youtube.com/watch?v=Wz74VDYVpO4 -https://www.youtube.com/watch?v=BseyjmC39x8 -https://www.youtube.com/watch?v=IHaqsz_LDBE -https://www.youtube.com/watch?v=X9y2thI8n-g -https://www.youtube.com/watch?v=Tvt1-4bA0ZU -https://www.youtube.com/watch?v=PX-Z916sh_Q -https://www.youtube.com/watch?v=k0pCbTE11jE -https://www.youtube.com/watch?v=Rut5u78laS8 -https://www.youtube.com/watch?v=6e9y8HP9-Qs -https://www.youtube.com/watch?v=lpfomwG5Jb0 -https://www.youtube.com/watch?v=hJVfabvgg7g -https://www.youtube.com/watch?v=ot5N-kaB86g -https://www.youtube.com/watch?v=I8W2eJ6lMv8 -https://www.youtube.com/watch?v=iK489f6qZhw -https://www.youtube.com/watch?v=7m7GdAaDUJk -https://www.youtube.com/watch?v=b9-QhEhWCMg -https://www.youtube.com/watch?v=1NPHHtZobWA -https://www.youtube.com/watch?v=md4EimJIMxU -https://www.youtube.com/watch?v=0cxc5iVzavo -https://www.youtube.com/watch?v=EShwg_-8gBQ -https://www.youtube.com/watch?v=jBNU3d1APAk -https://www.youtube.com/watch?v=X5s3aQD2vTg -https://www.youtube.com/watch?v=pg4PFn1a3w4 -https://www.youtube.com/watch?v=j25u9KdduTI -https://www.youtube.com/watch?v=1G2a1FOdBX4 -https://www.youtube.com/watch?v=cnAz3Tz6KM0 -https://www.youtube.com/watch?v=x4RzM8MpYQs -https://www.youtube.com/watch?v=1zan33JDOaU -https://www.youtube.com/watch?v=e2qGGGcaNbE -https://www.youtube.com/watch?v=KxKh-ey1anM -https://www.youtube.com/watch?v=sY33ut-4q5U -https://www.youtube.com/watch?v=Yhy_9B-W1Bc -https://www.youtube.com/watch?v=HVjLcXoWdr0 -https://www.youtube.com/watch?v=EDk4aURqdt0 -https://www.youtube.com/watch?v=f69aszkNSrA -https://www.youtube.com/watch?v=ArP-iJnxr3g -https://www.youtube.com/watch?v=cSMlgnD9Uf4 -https://www.youtube.com/watch?v=VvpHCEq3ETg diff --git a/airflow/inputfiles/urls.rt250_17.txt b/airflow/inputfiles/urls.rt250_17.txt deleted file mode 100644 index b691794..0000000 --- a/airflow/inputfiles/urls.rt250_17.txt +++ /dev/null @@ -1,235 +0,0 @@ -https://www.youtube.com/watch?v=Uh3-23diDuQ -https://www.youtube.com/watch?v=0h5fjDk4Fxk -https://www.youtube.com/watch?v=w729MixxpsY -https://www.youtube.com/watch?v=2JQv7_ijvHs -https://www.youtube.com/watch?v=lmBbDkhNtfU -https://www.youtube.com/watch?v=HdCoCj7DNlM -https://www.youtube.com/watch?v=LikXRzpPYEw -https://www.youtube.com/watch?v=tU9heYibc_4 -https://www.youtube.com/watch?v=PXbkzMxmd_Q -https://www.youtube.com/watch?v=4Wrgvzh9oF8 -https://www.youtube.com/watch?v=jBY7nHRdFuY -https://www.youtube.com/watch?v=WFxITA-cPn0 -https://www.youtube.com/watch?v=iZXtblrH3E0 -https://www.youtube.com/watch?v=21n1QM1E5Tg -https://www.youtube.com/watch?v=ore2ZKBFVIE -https://www.youtube.com/watch?v=DJPjz4TM-r8 -https://www.youtube.com/watch?v=Uz13MyjVlI0 -https://www.youtube.com/watch?v=M734Drp7DEk -https://www.youtube.com/watch?v=nG_dAIdROnA -https://www.youtube.com/watch?v=5S5i0RcG4JU -https://www.youtube.com/watch?v=nhP6k9XeJj8 -https://www.youtube.com/watch?v=T00fMHYd4xg -https://www.youtube.com/watch?v=5btdrrWIPj8 -https://www.youtube.com/watch?v=-0Dy7ZB5Tp0 -https://www.youtube.com/watch?v=cbRqV5Nczs0 -https://www.youtube.com/watch?v=wnWfEALwfrw -https://www.youtube.com/watch?v=hrMmDB735d8 -https://www.youtube.com/watch?v=-T5viJt_NeY -https://www.youtube.com/watch?v=NbifCHxb1kU -https://www.youtube.com/watch?v=xUJKjeIknXY -https://www.youtube.com/watch?v=rGUN31cV2Hg -https://www.youtube.com/watch?v=2HRQDPQkntU -https://www.youtube.com/watch?v=QuidE5bynlg -https://www.youtube.com/watch?v=OzTDb4Vslkg -https://www.youtube.com/watch?v=u-t0Dm4Jx6s -https://www.youtube.com/watch?v=QQn7WjVq35M -https://www.youtube.com/watch?v=UcsqZ0r43xQ -https://www.youtube.com/watch?v=hcIdB-l3c_U -https://www.youtube.com/watch?v=4KF3dnURKaU -https://www.youtube.com/watch?v=km_2AcH-76A -https://www.youtube.com/watch?v=GG1lxGKvFZo -https://www.youtube.com/watch?v=4gM4ngPf-Ug -https://www.youtube.com/watch?v=CQrUmMTaCss -https://www.youtube.com/watch?v=fgkJpaPp6Ho -https://www.youtube.com/watch?v=Sa1h5lFB2oo -https://www.youtube.com/watch?v=JPxdXpn8pU8 -https://www.youtube.com/watch?v=OiDz8w9nSC8 -https://www.youtube.com/watch?v=Ka9PvuYAdm0 -https://www.youtube.com/watch?v=_qr5dg7jIN0 -https://www.youtube.com/watch?v=_PYpZwH1Goo -https://www.youtube.com/watch?v=lk9E49BqUkQ -https://www.youtube.com/watch?v=rcreLiAZGL0 -https://www.youtube.com/watch?v=ieic-zav9Fk -https://www.youtube.com/watch?v=CwCpNXBF6js -https://www.youtube.com/watch?v=LlvC2Kj2pFo -https://www.youtube.com/watch?v=prErUmVPO3A -https://www.youtube.com/watch?v=StNr4Qo5QxI -https://www.youtube.com/watch?v=DwXPj__Wm5E -https://www.youtube.com/watch?v=z3tA2nrhIuk -https://www.youtube.com/watch?v=oKGM6yhysMw -https://www.youtube.com/watch?v=bXNjW_I_1Qo -https://www.youtube.com/watch?v=LPmoYOBnVR0 -https://www.youtube.com/watch?v=GyI9lJELbEk -https://www.youtube.com/watch?v=_C6t5mf4lbo -https://www.youtube.com/watch?v=OiGxqjZfbJc -https://www.youtube.com/watch?v=KCPW6EXa8-o -https://www.youtube.com/watch?v=W8yWWK0C8yg -https://www.youtube.com/watch?v=_obpK8FSh0M -https://www.youtube.com/watch?v=IChe3vDfRiI -https://www.youtube.com/watch?v=Bl_ph6AMXz8 -https://www.youtube.com/watch?v=xReRVPK97SE -https://www.youtube.com/watch?v=w3UZJ21mpS8 -https://www.youtube.com/watch?v=50ilwSLjqPQ -https://www.youtube.com/watch?v=k1gJ787wdR4 -https://www.youtube.com/watch?v=6onGbYzhrks -https://www.youtube.com/watch?v=JzdOP-4lSAM -https://www.youtube.com/watch?v=25ey_nTjFXM -https://www.youtube.com/watch?v=JHwghzfAkNM -https://www.youtube.com/watch?v=c2N-wMntv7o -https://www.youtube.com/watch?v=wvTkHp8yWQA -https://www.youtube.com/watch?v=ekx12gwN35o -https://www.youtube.com/watch?v=3Dmwk7vH5aE -https://www.youtube.com/watch?v=37oU7XZqHKQ -https://www.youtube.com/watch?v=p7cRbQwQLIU -https://www.youtube.com/watch?v=O5WZy3lgUwk -https://www.youtube.com/watch?v=xwQ7heyQDU0 -https://www.youtube.com/watch?v=Ioc2DzR36eg -https://www.youtube.com/watch?v=_Ws5CLQAZiY -https://www.youtube.com/watch?v=0eWGo4d061o -https://www.youtube.com/watch?v=w0gwOfOspWM -https://www.youtube.com/watch?v=kuBIKagXlD0 -https://www.youtube.com/watch?v=MxUVYfjSKUo -https://www.youtube.com/watch?v=Gm2rAs40jCU -https://www.youtube.com/watch?v=5_jbqEeWdqY -https://www.youtube.com/watch?v=_vCLPKftFsk -https://www.youtube.com/watch?v=sLzyP0g3Tz0 -https://www.youtube.com/watch?v=6_ALpSHwsyU -https://www.youtube.com/watch?v=iyChl-zsg8I -https://www.youtube.com/watch?v=n-tgxts1qTQ -https://www.youtube.com/watch?v=xwIoQ7aHASA -https://www.youtube.com/watch?v=kWDWFGZcPn0 -https://www.youtube.com/watch?v=Z3WZ8IYnx6o -https://www.youtube.com/watch?v=6grKBi186q0 -https://www.youtube.com/watch?v=yzmSXl428lo -https://www.youtube.com/watch?v=-7aDUxmsbho -https://www.youtube.com/watch?v=2E15vQEUh3Y -https://www.youtube.com/watch?v=-gTJpdOSKdE -https://www.youtube.com/watch?v=1BgHKfpGqxQ -https://www.youtube.com/watch?v=CGG8-6y82Dc -https://www.youtube.com/watch?v=OYWFQagB5SA -https://www.youtube.com/watch?v=v2ejcHqbB9M -https://www.youtube.com/watch?v=Ui8eW_bQI-c -https://www.youtube.com/watch?v=Df43IR-Y2pg -https://www.youtube.com/watch?v=8SxFtbjJLIY -https://www.youtube.com/watch?v=_CzzsdbwVvU -https://www.youtube.com/watch?v=1zQ_pVzAWmQ -https://www.youtube.com/watch?v=Zx7l88BnZq4 -https://www.youtube.com/watch?v=8D7cgnCRg9M -https://www.youtube.com/watch?v=1mmOlk_6KiY -https://www.youtube.com/watch?v=f9yuepxqoI8 -https://www.youtube.com/watch?v=WzwdRsbyrsE -https://www.youtube.com/watch?v=O0PK3YdZ6Gs -https://www.youtube.com/watch?v=y5sBj8J1zXE -https://www.youtube.com/watch?v=Erg0E8MkI8g -https://www.youtube.com/watch?v=V2XpuzU9qKc -https://www.youtube.com/watch?v=iO6Td9WZl8Q -https://www.youtube.com/watch?v=Ve6hXGghGDU -https://www.youtube.com/watch?v=XQkWto6o-zE -https://www.youtube.com/watch?v=jU7HHOjHIHI -https://www.youtube.com/watch?v=y8U0G8Yp5As -https://www.youtube.com/watch?v=hK3dOOeJYeQ -https://www.youtube.com/watch?v=3XwUV_IIeAY -https://www.youtube.com/watch?v=-H7oKDmsJBA -https://www.youtube.com/watch?v=ax2sbkIwsbs -https://www.youtube.com/watch?v=emBuYmGX9pA -https://www.youtube.com/watch?v=bYnSsdt-EwQ -https://www.youtube.com/watch?v=xWH-3s6ae6g -https://www.youtube.com/watch?v=bGwSw06nHlI -https://www.youtube.com/watch?v=XzfATljHtZA -https://www.youtube.com/watch?v=vHEpEVrULj8 -https://www.youtube.com/watch?v=Vw9VQqB9nZY -https://www.youtube.com/watch?v=Zo8DB-WiT8o -https://www.youtube.com/watch?v=118Qmc4PW94 -https://www.youtube.com/watch?v=mqjvTCW28wA -https://www.youtube.com/watch?v=COOUsA8sgzw -https://www.youtube.com/watch?v=nJjZWvoIXno -https://www.youtube.com/watch?v=Un-iP21XLcY -https://www.youtube.com/watch?v=ndqEvSH28sE -https://www.youtube.com/watch?v=ROTsUIJFFAI -https://www.youtube.com/watch?v=sjtU9ZJ1kl4 -https://www.youtube.com/watch?v=-l6aB5-5IGo -https://www.youtube.com/watch?v=B2zGJdhw7Qk -https://www.youtube.com/watch?v=Hq-agpSNVvk -https://www.youtube.com/watch?v=9Kmah0OdmfQ -https://www.youtube.com/watch?v=G3pd86ahuIk -https://www.youtube.com/watch?v=Z4zFfpUWFjc -https://www.youtube.com/watch?v=uCXKbn0_LYU -https://www.youtube.com/watch?v=Mm4CsHQ7jEY -https://www.youtube.com/watch?v=gzDj9vQhM3U -https://www.youtube.com/watch?v=nYBMPcWh6io -https://www.youtube.com/watch?v=sTIi-NkM_4o -https://www.youtube.com/watch?v=Mid_00T8OzY -https://www.youtube.com/watch?v=O74uCOmq-5w -https://www.youtube.com/watch?v=5czh6hr5ZCU -https://www.youtube.com/watch?v=Seu6J2umwKg -https://www.youtube.com/watch?v=aYtahgqJzGM -https://www.youtube.com/watch?v=hOGbqy4YQbc -https://www.youtube.com/watch?v=5hBzDL7E9yk -https://www.youtube.com/watch?v=cOY9GMP_fyA -https://www.youtube.com/watch?v=S7pF2ggDLM4 -https://www.youtube.com/watch?v=eTDWGdCwEsU -https://www.youtube.com/watch?v=9CyB887wkHs -https://www.youtube.com/watch?v=PnKwZK96xOQ -https://www.youtube.com/watch?v=1bnsQ6waGm8 -https://www.youtube.com/watch?v=ezJJzX7F57E -https://www.youtube.com/watch?v=abgJBju9UhE -https://www.youtube.com/watch?v=Gb_CTWcoscs -https://www.youtube.com/watch?v=XK70iPIeAnY -https://www.youtube.com/watch?v=bpROcbyQOdc -https://www.youtube.com/watch?v=Pvyi1ldhlVQ -https://www.youtube.com/watch?v=a8ABcZWicB0 -https://www.youtube.com/watch?v=STXjofgjHMo -https://www.youtube.com/watch?v=yKUAqlCUFx4 -https://www.youtube.com/watch?v=aE3IkzploxM -https://www.youtube.com/watch?v=0BNLKVNMeuo -https://www.youtube.com/watch?v=HcoKqF60cO8 -https://www.youtube.com/watch?v=kj7UFFcXVlQ -https://www.youtube.com/watch?v=E30v2guQPJg -https://www.youtube.com/watch?v=_L01csO4Nek -https://www.youtube.com/watch?v=p3uH5LA0IdM -https://www.youtube.com/watch?v=cEnSOZhGdfo -https://www.youtube.com/watch?v=P97y46gLFOk -https://www.youtube.com/watch?v=iayw8uf6DM8 -https://www.youtube.com/watch?v=hrUZrC6OplE -https://www.youtube.com/watch?v=F36GPTHUyXU -https://www.youtube.com/watch?v=QPc0595_s28 -https://www.youtube.com/watch?v=kl9hYdYQy0g -https://www.youtube.com/watch?v=_BucBOfiyLg -https://www.youtube.com/watch?v=PWwNGu-AaqA -https://www.youtube.com/watch?v=HQhgyMKvBSY -https://www.youtube.com/watch?v=VNCQU3dRY64 -https://www.youtube.com/watch?v=bG4ByNMW5ds -https://www.youtube.com/watch?v=L8tcS2e3rfk -https://www.youtube.com/watch?v=g39DLsLdXTE -https://www.youtube.com/watch?v=4d4aeRHTdTo -https://www.youtube.com/watch?v=RgdhZCq19ZQ -https://www.youtube.com/watch?v=I9NHi6EmkTk -https://www.youtube.com/watch?v=6CcpmLOWyMk -https://www.youtube.com/watch?v=IB7E41G2CtI -https://www.youtube.com/watch?v=6O-Aef1Gn4c -https://www.youtube.com/watch?v=uxfL1LRpbW4 -https://www.youtube.com/watch?v=FCpcyBHurFw -https://www.youtube.com/watch?v=CygnT11F_ZI -https://www.youtube.com/watch?v=cHVGidgy71o -https://www.youtube.com/watch?v=bGtQwp5ixTA -https://www.youtube.com/watch?v=voeKD_enQTo -https://www.youtube.com/watch?v=G05pwijaK3A -https://www.youtube.com/watch?v=B44UzfY3Xsc -https://www.youtube.com/watch?v=p8E-1M3hZSo -https://www.youtube.com/watch?v=pxWm1Pfzy1w -https://www.youtube.com/watch?v=PWN2vaiAdrE -https://www.youtube.com/watch?v=TRapAZa9UgQ -https://www.youtube.com/watch?v=JaopWs4FnpU -https://www.youtube.com/watch?v=Bm1xne0yK08 -https://www.youtube.com/watch?v=UrbDu6--eyY -https://www.youtube.com/watch?v=0uxmUb8_bYQ -https://www.youtube.com/watch?v=xEPT6rceiaE -https://www.youtube.com/watch?v=OcWYreMBLHE -https://www.youtube.com/watch?v=74Hh1nJJRXs -https://www.youtube.com/watch?v=kxH_bu7oWEQ -https://www.youtube.com/watch?v=xLZQHETWRCM -https://www.youtube.com/watch?v=QLNurv0bgDs -https://www.youtube.com/watch?v=Xa_JupHC-BY -https://www.youtube.com/watch?v=5AdEUJ_bA-w -https://www.youtube.com/watch?v=CumOhS7DldM \ No newline at end of file diff --git a/airflow/inputfiles/urls.rt250_18.txt b/airflow/inputfiles/urls.rt250_18.txt deleted file mode 100644 index 1370e4a..0000000 --- a/airflow/inputfiles/urls.rt250_18.txt +++ /dev/null @@ -1,233 +0,0 @@ -https://www.youtube.com/watch?v=w4rRYaRB5T0 -https://www.youtube.com/watch?v=FjvPtUvZZRE -https://www.youtube.com/watch?v=iII9kszf9-E -https://www.youtube.com/watch?v=7hyi0F6EEGg -https://www.youtube.com/watch?v=ilQMJ-kwMAU -https://www.youtube.com/watch?v=SJOP-f5pvlY -https://www.youtube.com/watch?v=LhBc8ElIf_Y -https://www.youtube.com/watch?v=w_koXhVD1uc -https://www.youtube.com/watch?v=HCoR_bmy-lA -https://www.youtube.com/watch?v=vKPezUzvTjg -https://www.youtube.com/watch?v=Gy3teXRZc3M -https://www.youtube.com/watch?v=4FqSKCGo_yQ -https://www.youtube.com/watch?v=hoY7RZV1W4Y -https://www.youtube.com/watch?v=vKyKEcq44uw -https://www.youtube.com/watch?v=g0LBa1CxhDc -https://www.youtube.com/watch?v=4Je1Q5SioSQ -https://www.youtube.com/watch?v=141sUnI0Z8o -https://www.youtube.com/watch?v=RssoSCiTvcQ -https://www.youtube.com/watch?v=gNmiJWksHV0 -https://www.youtube.com/watch?v=-sht8xDsM_4 -https://www.youtube.com/watch?v=q9npqzOrkXQ -https://www.youtube.com/watch?v=0e1tKLUdWc0 -https://www.youtube.com/watch?v=FWa3LA5diUA -https://www.youtube.com/watch?v=iANrNIqlfEU -https://www.youtube.com/watch?v=HoQgL7JmzYE -https://www.youtube.com/watch?v=m4xFa51_qTw -https://www.youtube.com/watch?v=JEtPhrfEZTc -https://www.youtube.com/watch?v=GV6PNirTB5Q -https://www.youtube.com/watch?v=ymOwuN4Icbw -https://www.youtube.com/watch?v=z-PZegRllLE -https://www.youtube.com/watch?v=_FmUv2AX36Q -https://www.youtube.com/watch?v=NytoPxQXGFk -https://www.youtube.com/watch?v=yX05hYFyiBY -https://www.youtube.com/watch?v=JcdVC9wJX9g -https://www.youtube.com/watch?v=Zm1o6Uvqt6w -https://www.youtube.com/watch?v=1_3XNdOEHB4 -https://www.youtube.com/watch?v=iERK93l5S4U -https://www.youtube.com/watch?v=bzQGeM-Adpk -https://www.youtube.com/watch?v=hEPeQWP9gq8 -https://www.youtube.com/watch?v=kdxXsrO-_2o -https://www.youtube.com/watch?v=O0kH3wOwfyM -https://www.youtube.com/watch?v=7MT3LwhuYjo -https://www.youtube.com/watch?v=3rrT4hwjRiE -https://www.youtube.com/watch?v=h1Zx3W0OtAk -https://www.youtube.com/watch?v=Nor0KPVPImQ -https://www.youtube.com/watch?v=_eWaSkmUP-Q -https://www.youtube.com/watch?v=RYF5UYJC6Mk -https://www.youtube.com/watch?v=OUgecZ8n-dk -https://www.youtube.com/watch?v=PD-okljVoz8 -https://www.youtube.com/watch?v=t1lDjE6zGR8 -https://www.youtube.com/watch?v=QRcFSOOIR04 -https://www.youtube.com/watch?v=W3E5IOtsz68 -https://www.youtube.com/watch?v=qZlW5AeLKaA -https://www.youtube.com/watch?v=A8wb1LtJzbA -https://www.youtube.com/watch?v=-83bTbd6Vb4 -https://www.youtube.com/watch?v=gaZ6nCFyT-E -https://www.youtube.com/watch?v=JCmlbmA9seQ -https://www.youtube.com/watch?v=66zl23CVyBE -https://www.youtube.com/watch?v=enjb-u5GS0E -https://www.youtube.com/watch?v=zQ7DZ_ZKLQ0 -https://www.youtube.com/watch?v=cltTMwuk-Y0 -https://www.youtube.com/watch?v=od41NVjN6Uc -https://www.youtube.com/watch?v=6XVgwxF8bU8 -https://www.youtube.com/watch?v=KIAv3ZQ1gks -https://www.youtube.com/watch?v=4yzSoM9ZJoc -https://www.youtube.com/watch?v=mKM_Hve4PSM -https://www.youtube.com/watch?v=EabCNrEsgXY -https://www.youtube.com/watch?v=ObwEO_jK9_g -https://www.youtube.com/watch?v=Y19eeNT0hNQ -https://www.youtube.com/watch?v=OBvGnwg4WeY -https://www.youtube.com/watch?v=blwClcLpTZA -https://www.youtube.com/watch?v=e5FHL3Gl56k -https://www.youtube.com/watch?v=om0sYjy7eT8 -https://www.youtube.com/watch?v=GG1uIMTZA30 -https://www.youtube.com/watch?v=bW5lxEe9Qqg -https://www.youtube.com/watch?v=cbwxE8eZBhw -https://www.youtube.com/watch?v=kMzNX0j4NO8 -https://www.youtube.com/watch?v=q5XBM4-OVJg -https://www.youtube.com/watch?v=Ip67Pcwm-8c -https://www.youtube.com/watch?v=ietAfH6O3IE -https://www.youtube.com/watch?v=0CBUwyVhssQ -https://www.youtube.com/watch?v=WocIrnJHCks -https://www.youtube.com/watch?v=Vu2sP1rBSuQ -https://www.youtube.com/watch?v=ypxZjDVLPd8 -https://www.youtube.com/watch?v=v1O8jVvuDZE -https://www.youtube.com/watch?v=0gVYLElHyuU -https://www.youtube.com/watch?v=efx7G2vGiWc -https://www.youtube.com/watch?v=i907BKtWzKM -https://www.youtube.com/watch?v=Bru4dK_5cJk -https://www.youtube.com/watch?v=JSNrp0VfFxM -https://www.youtube.com/watch?v=cKvtjYtK_Zs -https://www.youtube.com/watch?v=xkWrdzoqOHc -https://www.youtube.com/watch?v=gU8Q4qhtWT8 -https://www.youtube.com/watch?v=uFOs71VYSXQ -https://www.youtube.com/watch?v=9kb7VxErS64 -https://www.youtube.com/watch?v=jzmHWb1uKFY -https://www.youtube.com/watch?v=g6ZEep0YEe8 -https://www.youtube.com/watch?v=ARKY-HjGuio -https://www.youtube.com/watch?v=0Yr6CwKNorQ -https://www.youtube.com/watch?v=B0tA10cAXZw -https://www.youtube.com/watch?v=WsGUy4_LiEU -https://www.youtube.com/watch?v=-ndIAbqBYgk -https://www.youtube.com/watch?v=QbKhTZKEarU -https://www.youtube.com/watch?v=HOuePkn2qeI -https://www.youtube.com/watch?v=rDajxgZA5NU -https://www.youtube.com/watch?v=_0aMs9sWoeQ -https://www.youtube.com/watch?v=fPHFbYqK7kc -https://www.youtube.com/watch?v=2_wU4yl4LAY -https://www.youtube.com/watch?v=4xuBM4deE6g -https://www.youtube.com/watch?v=LmYgj8A2Tdk -https://www.youtube.com/watch?v=1WzMTIcIsJo -https://www.youtube.com/watch?v=3qCYB7CVewo -https://www.youtube.com/watch?v=GaflSBOX4Bw -https://www.youtube.com/watch?v=WzzoHmeVfEg -https://www.youtube.com/watch?v=ggY8ruNAJaM -https://www.youtube.com/watch?v=8JpXJQN3Q4U -https://www.youtube.com/watch?v=zahEfgHltAQ -https://www.youtube.com/watch?v=PidO06KgJgM -https://www.youtube.com/watch?v=ahkrZU7dbwg -https://www.youtube.com/watch?v=_6Uf0Wef5lk -https://www.youtube.com/watch?v=NtakSrWF8qw -https://www.youtube.com/watch?v=KPB550JDKvs -https://www.youtube.com/watch?v=a9b8E1cFAcg -https://www.youtube.com/watch?v=lVlxJHf9Uz0 -https://www.youtube.com/watch?v=3wB7hOa6pTI -https://www.youtube.com/watch?v=4rT8sKSoHYg -https://www.youtube.com/watch?v=315qa9cu8NM -https://www.youtube.com/watch?v=eXInviPnnZA -https://www.youtube.com/watch?v=hiis-ugAa6Y -https://www.youtube.com/watch?v=vZy7LUVqK4I -https://www.youtube.com/watch?v=_Y8mWMbYcKk -https://www.youtube.com/watch?v=bIZrrGvkcaE -https://www.youtube.com/watch?v=kvgZc-q7K08 -https://www.youtube.com/watch?v=4uiqIjIzJqg -https://www.youtube.com/watch?v=g2P3KlzZ_bc -https://www.youtube.com/watch?v=i6c47ojZ1gU -https://www.youtube.com/watch?v=J-WVFLD5z4E -https://www.youtube.com/watch?v=ViKKvVgHQro -https://www.youtube.com/watch?v=n-QbtsYb3ao -https://www.youtube.com/watch?v=4buYFi4RHXM -https://www.youtube.com/watch?v=h_-2AKl9mEU -https://www.youtube.com/watch?v=RKirt5pR5s4 -https://www.youtube.com/watch?v=vQQDv11dLZo -https://www.youtube.com/watch?v=pw-v-Af0Zt4 -https://www.youtube.com/watch?v=yyAUCuj_rfU -https://www.youtube.com/watch?v=5FSqDMjwFLQ -https://www.youtube.com/watch?v=J3m8gQKj9ls -https://www.youtube.com/watch?v=FiYgnqVWeD0 -https://www.youtube.com/watch?v=35M5N22WEAw -https://www.youtube.com/watch?v=BrKHrNWU7fw -https://www.youtube.com/watch?v=CEVUU_s8Pk8 -https://www.youtube.com/watch?v=p2l0QYUNsLw -https://www.youtube.com/watch?v=hGbyVDtuKlU -https://www.youtube.com/watch?v=5iPFMlxPSTc -https://www.youtube.com/watch?v=GVlVCSWeJL4 -https://www.youtube.com/watch?v=WdVYt8PL0Po -https://www.youtube.com/watch?v=p2P_A9TqH78 -https://www.youtube.com/watch?v=LQ-Kt4arMu8 -https://www.youtube.com/watch?v=0ygsfbMtrZA -https://www.youtube.com/watch?v=5hQEI4jGEao -https://www.youtube.com/watch?v=0HG6LF7hsPk -https://www.youtube.com/watch?v=JqR_R4EvsNs -https://www.youtube.com/watch?v=YTS9r3aQ2Rw -https://www.youtube.com/watch?v=UDfjPR3XxGI -https://www.youtube.com/watch?v=eJhUqThVeTU -https://www.youtube.com/watch?v=eSFea0F4pM4 -https://www.youtube.com/watch?v=Iumi423BMsY -https://www.youtube.com/watch?v=ix_lu8DoqVM -https://www.youtube.com/watch?v=YQMsHKAftA8 -https://www.youtube.com/watch?v=YvdY-vk_qXo -https://www.youtube.com/watch?v=v5S1NeyK4zo -https://www.youtube.com/watch?v=T6eYlq568q8 -https://www.youtube.com/watch?v=LHncOi0gLVo -https://www.youtube.com/watch?v=JzNhRNPs-zs -https://www.youtube.com/watch?v=fh1cW1xt6Hs -https://www.youtube.com/watch?v=96pUYEFNtbw -https://www.youtube.com/watch?v=3iav7GPapME -https://www.youtube.com/watch?v=ifeg0nKnYAc -https://www.youtube.com/watch?v=TSOGNwPdJC0 -https://www.youtube.com/watch?v=Vdb-LAifVgw -https://www.youtube.com/watch?v=N8MzHelGeZA -https://www.youtube.com/watch?v=8lDLi6KL3NE -https://www.youtube.com/watch?v=AzesbB6B-Pw -https://www.youtube.com/watch?v=rCsByru8q80 -https://www.youtube.com/watch?v=KFCIJ6HAefo -https://www.youtube.com/watch?v=gZm2T-9vQgA -https://www.youtube.com/watch?v=urdmi5PF9tk -https://www.youtube.com/watch?v=uwdnq5BHelM -https://www.youtube.com/watch?v=Npojrebk8K4 -https://www.youtube.com/watch?v=hF8nxFrWEBM -https://www.youtube.com/watch?v=8ZP8Tmej3qI -https://www.youtube.com/watch?v=5LT6yyfblKU -https://www.youtube.com/watch?v=rwQdepyLhIc -https://www.youtube.com/watch?v=dJf0WhYGC-A -https://www.youtube.com/watch?v=LEADj_2kk5M -https://www.youtube.com/watch?v=rqklugKUZf0 -https://www.youtube.com/watch?v=toYKzDsIlM0 -https://www.youtube.com/watch?v=u8DoguzNFlE -https://www.youtube.com/watch?v=6A14Z3bVKxk -https://www.youtube.com/watch?v=Hx6V61dT9-c -https://www.youtube.com/watch?v=WbkkdqZtHhw -https://www.youtube.com/watch?v=FTTp__BIL6c -https://www.youtube.com/watch?v=2NIPuIbmeBg -https://www.youtube.com/watch?v=63Q5WLMF0lU -https://www.youtube.com/watch?v=KioksPhg_78 -https://www.youtube.com/watch?v=zMJ27qdfGAI -https://www.youtube.com/watch?v=q9biOLIba28 -https://www.youtube.com/watch?v=7RPWlQI1HFY -https://www.youtube.com/watch?v=Z2fPu9NX3DE -https://www.youtube.com/watch?v=_H3X_k7z7Iw -https://www.youtube.com/watch?v=0YPBfRGXejQ -https://www.youtube.com/watch?v=xUVwAIJu40g -https://www.youtube.com/watch?v=50GV67qKcSM -https://www.youtube.com/watch?v=DuVy41rot60 -https://www.youtube.com/watch?v=4GXF7gItY6g -https://www.youtube.com/watch?v=mNuPUraHMMM -https://www.youtube.com/watch?v=OE5R-dxhktY -https://www.youtube.com/watch?v=wO52Mj2dOvk -https://www.youtube.com/watch?v=T10wxcDWchE -https://www.youtube.com/watch?v=qh9FHZ1-ihg -https://www.youtube.com/watch?v=n5R7te2NUDo -https://www.youtube.com/watch?v=xMK-jrM9KKM -https://www.youtube.com/watch?v=iQsw6tnDWUw -https://www.youtube.com/watch?v=QzGwqKSJp1Q -https://www.youtube.com/watch?v=2aTk7l-SBjc -https://www.youtube.com/watch?v=pF3fZ9Uv-hQ -https://www.youtube.com/watch?v=XZeKA5WRGkY -https://www.youtube.com/watch?v=kLsZ4t2a6Ts -https://www.youtube.com/watch?v=Zw8SxU02ZIk -https://www.youtube.com/watch?v=OplKfLEIq6w -https://www.youtube.com/watch?v=cXyk40-MWZA -https://www.youtube.com/watch?v=wlMw3dhuicc -https://www.youtube.com/watch?v=wFyVmtInX04 diff --git a/airflow/inputfiles/urls.rt3700.txt b/airflow/inputfiles/urls.rt3700.txt deleted file mode 100644 index 0f6d9a5..0000000 --- a/airflow/inputfiles/urls.rt3700.txt +++ /dev/null @@ -1,3767 +0,0 @@ -https://www.youtube.com/watch?v=W_ovLaoQ5pI -https://www.youtube.com/watch?v=KnDAl7BqOq0 -https://www.youtube.com/watch?v=Ixwy1_Y1wPE -https://www.youtube.com/watch?v=oYHN2vgcpls -https://www.youtube.com/watch?v=WwYtzky4mjs -https://www.youtube.com/watch?v=dc7-aDyAeL4 -https://www.youtube.com/watch?v=XQDrjYTFVfE -https://www.youtube.com/watch?v=F0s9IJR4CN4 -https://www.youtube.com/watch?v=qrZ7RVXHdzo -https://www.youtube.com/watch?v=gwOjcuexMWU -https://www.youtube.com/watch?v=B4gVYr_9XP4 -https://www.youtube.com/watch?v=9bjUH5xViPE -https://www.youtube.com/watch?v=kbVZoqBfjCo -https://www.youtube.com/watch?v=hWXnlqOatS8 -https://www.youtube.com/watch?v=0izQuMMfIIo -https://www.youtube.com/watch?v=1x32zwHrg8s -https://www.youtube.com/watch?v=uzU1fxHS2dU -https://www.youtube.com/watch?v=qckyrafr4W4 -https://www.youtube.com/watch?v=tuvMl-Gbs1E -https://www.youtube.com/watch?v=uByH48D79KI -https://www.youtube.com/watch?v=qJ9qia29Kwk -https://www.youtube.com/watch?v=17DIjWMYBNs -https://www.youtube.com/watch?v=Ex9z0iIVluU -https://www.youtube.com/watch?v=DJP67SSAM9A -https://www.youtube.com/watch?v=OUe2oNykheg -https://www.youtube.com/watch?v=CgpuKqQl1tU -https://www.youtube.com/watch?v=4RNiyOZexag -https://www.youtube.com/watch?v=d4d8B-axCJU -https://www.youtube.com/watch?v=AMkVydmdLDU -https://www.youtube.com/watch?v=s6D1xf3VCCs -https://www.youtube.com/watch?v=ns1u61Zrzzk -https://www.youtube.com/watch?v=Ysqesg9CQ94 -https://www.youtube.com/watch?v=_VvNRtc3K0w -https://www.youtube.com/watch?v=zt-wA67nfJA -https://www.youtube.com/watch?v=bProjpkgXn8 -https://www.youtube.com/watch?v=8Gb8ccijtxI -https://www.youtube.com/watch?v=iFJJ_7_SUns -https://www.youtube.com/watch?v=9HBlnMlGSpI -https://www.youtube.com/watch?v=ioTF6x9mhz0 -https://www.youtube.com/watch?v=k1pdrHKb3P4 -https://www.youtube.com/watch?v=wprhAP6S7MY -https://www.youtube.com/watch?v=WFKKd_o89wI -https://www.youtube.com/watch?v=w59IixVPPUs -https://www.youtube.com/watch?v=9XeYW0l7JKU -https://www.youtube.com/watch?v=HGSIM6WiIMo -https://www.youtube.com/watch?v=SGXZJ8z8QHg -https://www.youtube.com/watch?v=IsqDx71KLyo -https://www.youtube.com/watch?v=NXOHkWf_sTY -https://www.youtube.com/watch?v=ICYe8ZSy0LQ -https://www.youtube.com/watch?v=GMKd0gjPCsU -https://www.youtube.com/watch?v=fbZYCKUCJs4 -https://www.youtube.com/watch?v=br1jufv9rW0 -https://www.youtube.com/watch?v=Y_flpHI4uEw -https://www.youtube.com/watch?v=FlVTAEpf53s -https://www.youtube.com/watch?v=fN6olULFwTA -https://www.youtube.com/watch?v=EsXZIyx9fmc -https://www.youtube.com/watch?v=s-TQlPdyoiE -https://www.youtube.com/watch?v=GyczQSFPTfQ -https://www.youtube.com/watch?v=ER1qbc1pMzs -https://www.youtube.com/watch?v=ceyVCBX-wGc -https://www.youtube.com/watch?v=BcNvEGIWolk -https://www.youtube.com/watch?v=15idHcwUAfk -https://www.youtube.com/watch?v=WrC6SRasFnU -https://www.youtube.com/watch?v=lBzdu8tLe-I -https://www.youtube.com/watch?v=T7U3BOIwVNc -https://www.youtube.com/watch?v=o9wi6Tt4Z3w -https://www.youtube.com/watch?v=rDKH2dhVE_c -https://www.youtube.com/watch?v=hgzkDHhImoI -https://www.youtube.com/watch?v=3Nz1Vym36ak -https://www.youtube.com/watch?v=TCwcSAZkppc -https://www.youtube.com/watch?v=btS5Kzy_zB0 -https://www.youtube.com/watch?v=csimc-rbrhE -https://www.youtube.com/watch?v=TQymjBogMlw -https://www.youtube.com/watch?v=iMkXhj31UKQ -https://www.youtube.com/watch?v=T0Gdsnmffro -https://www.youtube.com/watch?v=2z6HSIypJ_w -https://www.youtube.com/watch?v=oX_3TTyEnKk -https://www.youtube.com/watch?v=89yqiHuBvQQ -https://www.youtube.com/watch?v=eLympVyGQQI -https://www.youtube.com/watch?v=Gy67TZyPn2Q -https://www.youtube.com/watch?v=qw8437Em-3k -https://www.youtube.com/watch?v=z7Y8KiT7bPk -https://www.youtube.com/watch?v=ojJhtn1mEGg -https://www.youtube.com/watch?v=D1Cic7Uc0ns -https://www.youtube.com/watch?v=-63519KiUqM -https://www.youtube.com/watch?v=qJfHfBR20-g -https://www.youtube.com/watch?v=QwbNv-bG9oA -https://www.youtube.com/watch?v=nwfiyKx9x18 -https://www.youtube.com/watch?v=L3aPsthTBW8 -https://www.youtube.com/watch?v=zjfCX6bpUFg -https://www.youtube.com/watch?v=2hdR_bpvjDM -https://www.youtube.com/watch?v=leoxUrT9DJg -https://www.youtube.com/watch?v=fongJ-r7Uac -https://www.youtube.com/watch?v=vas0e7e8bmI -https://www.youtube.com/watch?v=MRrUq0oqFmo -https://www.youtube.com/watch?v=lgWMCCYXqGg -https://www.youtube.com/watch?v=hwqXFvpujRs -https://www.youtube.com/watch?v=V3ZWVvrmPvw -https://www.youtube.com/watch?v=gP2QkIJaQHE -https://www.youtube.com/watch?v=7U1Q4tscmUU -https://www.youtube.com/watch?v=T5oO9HYyT_8 -https://www.youtube.com/watch?v=N7St23woljA -https://www.youtube.com/watch?v=_iWSQ3XD_eQ -https://www.youtube.com/watch?v=ev1urHanjCo -https://www.youtube.com/watch?v=ebvVkT_gHOQ -https://www.youtube.com/watch?v=IUrylOsLD6A -https://www.youtube.com/watch?v=aJ2DWpOhM98 -https://www.youtube.com/watch?v=HldN0Atn5LA -https://www.youtube.com/watch?v=f6eY71i7TfI -https://www.youtube.com/watch?v=m1A5aOGYGM8 -https://www.youtube.com/watch?v=US0in27JPv4 -https://www.youtube.com/watch?v=SWAWTrsXH5E -https://www.youtube.com/watch?v=0K6F8TJowCw -https://www.youtube.com/watch?v=O-uAmbRDCjQ -https://www.youtube.com/watch?v=TdqayW3Yhus -https://www.youtube.com/watch?v=mZ1Gieg2PbU -https://www.youtube.com/watch?v=D7DoRpB_p7g -https://www.youtube.com/watch?v=y2j03DYoC9k -https://www.youtube.com/watch?v=H6UwY_jvIkg -https://www.youtube.com/watch?v=jPHdMovcsno -https://www.youtube.com/watch?v=Ui9ioQhlYB8 -https://www.youtube.com/watch?v=16A9rW-bYOw -https://www.youtube.com/watch?v=0HjKLqPZlk8 -https://www.youtube.com/watch?v=KFcUjf9pJzE -https://www.youtube.com/watch?v=qslMthxSRWU -https://www.youtube.com/watch?v=Jt7haujk3sk -https://www.youtube.com/watch?v=MJK7NX0E2_4 -https://www.youtube.com/watch?v=OMdjNk3aQdk -https://www.youtube.com/watch?v=n4HCQrbYc_w -https://www.youtube.com/watch?v=3gFC-igZPr8 -https://www.youtube.com/watch?v=aqS6aduySeo -https://www.youtube.com/watch?v=ylLsucs0PRY -https://www.youtube.com/watch?v=-hIPIMAAk9E -https://www.youtube.com/watch?v=SHjD3xOkWac -https://www.youtube.com/watch?v=FxiWcpW1hOc -https://www.youtube.com/watch?v=s-wpgAK-fzg -https://www.youtube.com/watch?v=82XT7UQbF-w -https://www.youtube.com/watch?v=4bsjF-d9ODc -https://www.youtube.com/watch?v=LPfOH1_9gYU -https://www.youtube.com/watch?v=ITD6zT6SNZo -https://www.youtube.com/watch?v=mBSP343k7Xk -https://www.youtube.com/watch?v=SpXgj9PI1FI -https://www.youtube.com/watch?v=xkiqMGZEYbc -https://www.youtube.com/watch?v=ph2UXTChSsw -https://www.youtube.com/watch?v=Lq0cra_cqLc -https://www.youtube.com/watch?v=W1SS9Yt4PNI -https://www.youtube.com/watch?v=yV5g1sufBVI -https://www.youtube.com/watch?v=y7jNwdmysbo -https://www.youtube.com/watch?v=3i4Q9EgSuA8 -https://www.youtube.com/watch?v=-j7C5MfDXrA -https://www.youtube.com/watch?v=BkSglsAO7-w -https://www.youtube.com/watch?v=05dqwprWsnc -https://www.youtube.com/watch?v=b2xwjdv5nxY -https://www.youtube.com/watch?v=06RshyLtUic -https://www.youtube.com/watch?v=Mm1DH0lAtQs -https://www.youtube.com/watch?v=OfxsLW1ZUsk -https://www.youtube.com/watch?v=_AZs4CG7CbA -https://www.youtube.com/watch?v=RzZRssOgH7A -https://www.youtube.com/watch?v=Mqe8ZhqIISI -https://www.youtube.com/watch?v=6w0qYD46Afo -https://www.youtube.com/watch?v=YVtFh0283YU -https://www.youtube.com/watch?v=jCCH685ldpg -https://www.youtube.com/watch?v=Ut1_9Ma9fZg -https://www.youtube.com/watch?v=pjV_fCpJgLc -https://www.youtube.com/watch?v=a4NtRH9sZLk -https://www.youtube.com/watch?v=XjVj9wipu70 -https://www.youtube.com/watch?v=CWMhNCPMXeI -https://www.youtube.com/watch?v=MC9YYtWLadQ -https://www.youtube.com/watch?v=7yI1tP5oWQw -https://www.youtube.com/watch?v=wxh9zh-ygig -https://www.youtube.com/watch?v=Ul3JY18tiJ0 -https://www.youtube.com/watch?v=suj-r9RCMGY -https://www.youtube.com/watch?v=AbL6ZGOjc_M -https://www.youtube.com/watch?v=VGjHw351GdU -https://www.youtube.com/watch?v=32W16gzB3E8 -https://www.youtube.com/watch?v=PTujVtVZD-c -https://www.youtube.com/watch?v=SFbnmsMa_i0 -https://www.youtube.com/watch?v=X_FpFtE9mGM -https://www.youtube.com/watch?v=g6xy8KXaxDE -https://www.youtube.com/watch?v=UO6BR3rXpHs -https://www.youtube.com/watch?v=-a14SXc9ERk -https://www.youtube.com/watch?v=Hl73pJhS1Jk -https://www.youtube.com/watch?v=wXgLpByOcos -https://www.youtube.com/watch?v=FH3nQzkKc08 -https://www.youtube.com/watch?v=wFYAbEfajd0 -https://www.youtube.com/watch?v=zcyM1HzB4OY -https://www.youtube.com/watch?v=cLXZ9INHYyI -https://www.youtube.com/watch?v=Iq7Sjbcw5Ek -https://www.youtube.com/watch?v=kgX5IcFE2HE -https://www.youtube.com/watch?v=Dnq4p0BZ1zA -https://www.youtube.com/watch?v=bitMfdK4mAE -https://www.youtube.com/watch?v=HWqLp-gGOPw -https://www.youtube.com/watch?v=g6DFwFhfqSE -https://www.youtube.com/watch?v=81VXi1v_6Gg -https://www.youtube.com/watch?v=sCf09W7u_as -https://www.youtube.com/watch?v=MIRmwYQ0pnw -https://www.youtube.com/watch?v=dT0PJCDY-WY -https://www.youtube.com/watch?v=Tia6NFKI29c -https://www.youtube.com/watch?v=ZEH4XMI2gi0 -https://www.youtube.com/watch?v=JFC-hkuLwz8 -https://www.youtube.com/watch?v=xfjVsnGk92M -https://www.youtube.com/watch?v=aeF2hf_R2h0 -https://www.youtube.com/watch?v=D_j_k8nWY3g -https://www.youtube.com/watch?v=lvO8Dq1yORA -https://www.youtube.com/watch?v=tvovFzmiF6E -https://www.youtube.com/watch?v=KUsI9cxtJPU -https://www.youtube.com/watch?v=vgLVkrAnBLI -https://www.youtube.com/watch?v=M_ofMDC-FEQ -https://www.youtube.com/watch?v=O4gqva5ROqw -https://www.youtube.com/watch?v=DTgN-m3lAY4 -https://www.youtube.com/watch?v=bqgdyYcM4_Q -https://www.youtube.com/watch?v=hNCpvI-d6Fk -https://www.youtube.com/watch?v=va96DIxvE44 -https://www.youtube.com/watch?v=ZOwqc5DjkDk -https://www.youtube.com/watch?v=1i6UAXkjy9A -https://www.youtube.com/watch?v=gbaxWdn_Uq0 -https://www.youtube.com/watch?v=ygjE8I2k5m8 -https://www.youtube.com/watch?v=U6TUDhJ4KF4 -https://www.youtube.com/watch?v=-OSZqBAF-ck -https://www.youtube.com/watch?v=MXTnUTdBLaU -https://www.youtube.com/watch?v=38JAI9MIprU -https://www.youtube.com/watch?v=VE7TqzCQypI -https://www.youtube.com/watch?v=TdkXjlJiQq4 -https://www.youtube.com/watch?v=TTQ6N9GNeGo -https://www.youtube.com/watch?v=JnTv_K8ah0E -https://www.youtube.com/watch?v=kNI1tYegCZY -https://www.youtube.com/watch?v=Tc3A1vJf4Rg -https://www.youtube.com/watch?v=xBvINP1ddSo -https://www.youtube.com/watch?v=p-Z-cx-43eA -https://www.youtube.com/watch?v=0sqjAko-vgI -https://www.youtube.com/watch?v=W0i1_RaLrho -https://www.youtube.com/watch?v=2G3RFYBcHds -https://www.youtube.com/watch?v=Yirxj1qPBnU -https://www.youtube.com/watch?v=_e9JfXsM9ks -https://www.youtube.com/watch?v=V-UlKut8NbU -https://www.youtube.com/watch?v=C7D19AiYG4c -https://www.youtube.com/watch?v=Qj_u_rAgqDU -https://www.youtube.com/watch?v=ftObtlgqcFM -https://www.youtube.com/watch?v=wa02oKjBgvA -https://www.youtube.com/watch?v=yPjuAumM7g8 -https://www.youtube.com/watch?v=lpGq2LRqwAk -https://www.youtube.com/watch?v=jeD7tHfacHw -https://www.youtube.com/watch?v=CIxVCg71xZo -https://www.youtube.com/watch?v=gyKj3b-MbAQ -https://www.youtube.com/watch?v=yTCAjBGiUTg -https://www.youtube.com/watch?v=rNtMzj0thHg -https://www.youtube.com/watch?v=eDf4QdHkM5A -https://www.youtube.com/watch?v=KF7elMYIiyk -https://www.youtube.com/watch?v=tfoyuy5jQoc -https://www.youtube.com/watch?v=ZoXnm8dR3VU -https://www.youtube.com/watch?v=eGmo34B_OVo -https://www.youtube.com/watch?v=OR4pk7e4KbA -https://www.youtube.com/watch?v=cSVcsJ6jK4Y -https://www.youtube.com/watch?v=Ye1_bZGicWU -https://www.youtube.com/watch?v=n98hMqGK16k -https://www.youtube.com/watch?v=gXmfUJhgvAg -https://www.youtube.com/watch?v=ZHaZsBQzycY -https://www.youtube.com/watch?v=SfEQCvh6OmI -https://www.youtube.com/watch?v=xqe3MAkyGVc -https://www.youtube.com/watch?v=mc6BvRRyN8M -https://www.youtube.com/watch?v=wzPxLW7GZr0 -https://www.youtube.com/watch?v=zLJd9PBomIA -https://www.youtube.com/watch?v=DlLER38zpq4 -https://www.youtube.com/watch?v=lg9hBws5KS4 -https://www.youtube.com/watch?v=pSgRbPFNgj4 -https://www.youtube.com/watch?v=gX8tm4sP1qY -https://www.youtube.com/watch?v=-2EYqmuGLLM -https://www.youtube.com/watch?v=kKTq3Ndpu7E -https://www.youtube.com/watch?v=KvsOV5hHnq4 -https://www.youtube.com/watch?v=DPuK9pasFDA -https://www.youtube.com/watch?v=zQVSEIb4uJ4 -https://www.youtube.com/watch?v=AuupjeyKLnw -https://www.youtube.com/watch?v=-iDaJ1KO8A0 -https://www.youtube.com/watch?v=OheF39Zcees -https://www.youtube.com/watch?v=kqjOVTQlGrI -https://www.youtube.com/watch?v=QcLD4KdJkKA -https://www.youtube.com/watch?v=bHngc3m0Xdk -https://www.youtube.com/watch?v=Ti8ZnrOD5_0 -https://www.youtube.com/watch?v=dIUNVVnFC0U -https://www.youtube.com/watch?v=6Mc1Q7Ii55c -https://www.youtube.com/watch?v=kwxDk9nT9J4 -https://www.youtube.com/watch?v=B9WBEPkNf-w -https://www.youtube.com/watch?v=1Pt5Zrakvdg -https://www.youtube.com/watch?v=dsiu7kXFBI8 -https://www.youtube.com/watch?v=AQiIRKhgFHE -https://www.youtube.com/watch?v=geFIHaBoKaY -https://www.youtube.com/watch?v=QA4a9Db8m88 -https://www.youtube.com/watch?v=OIFU4k1f0Ec -https://www.youtube.com/watch?v=2iSBnEwWwjo -https://www.youtube.com/watch?v=VcWAOEoue1Y -https://www.youtube.com/watch?v=rdR7_4da4Js -https://www.youtube.com/watch?v=hOkvzOkipaM -https://www.youtube.com/watch?v=wLOR5mlx7VY -https://www.youtube.com/watch?v=PFcDLcK_zcY -https://www.youtube.com/watch?v=cP8Q7DIl3nI -https://www.youtube.com/watch?v=x_pJksDZSzU -https://www.youtube.com/watch?v=BeOF0c-EzIQ -https://www.youtube.com/watch?v=7PQ2uRSFd94 -https://www.youtube.com/watch?v=v_ZmsRnDmsw -https://www.youtube.com/watch?v=2Y-9Rznk8ug -https://www.youtube.com/watch?v=U6flSitpCM0 -https://www.youtube.com/watch?v=VVDf4mcyPAw -https://www.youtube.com/watch?v=m8jrjn64MVk -https://www.youtube.com/watch?v=rq8chzZeDpo -https://www.youtube.com/watch?v=e9c6Is5-XYM -https://www.youtube.com/watch?v=SxTrAm_2oT8 -https://www.youtube.com/watch?v=tSy7g2s9_eo -https://www.youtube.com/watch?v=zxs7UeUJr0s -https://www.youtube.com/watch?v=FvryEetPxrI -https://www.youtube.com/watch?v=o9qn_UHBKQ0 -https://www.youtube.com/watch?v=PBNpVOwoXLY -https://www.youtube.com/watch?v=PpMPvuSX1CY -https://www.youtube.com/watch?v=dqjASGYlWRU -https://www.youtube.com/watch?v=DGfo_K6NTwo -https://www.youtube.com/watch?v=WpUpTVFW3S4 -https://www.youtube.com/watch?v=dCgjywvszFE -https://www.youtube.com/watch?v=FuW3lMJF2zA -https://www.youtube.com/watch?v=bKaU95ceeUw -https://www.youtube.com/watch?v=Ynwqt_R3faM -https://www.youtube.com/watch?v=td70vUbqAgw -https://www.youtube.com/watch?v=9ZwKVhtzFM4 -https://www.youtube.com/watch?v=xAvwjZxkp_s -https://www.youtube.com/watch?v=FlwadWqd9jY -https://www.youtube.com/watch?v=grosmlJJpOQ -https://www.youtube.com/watch?v=8tj04EuSuR8 -https://www.youtube.com/watch?v=bCdkBP6nYrY -https://www.youtube.com/watch?v=9BbMwzKy7pY -https://www.youtube.com/watch?v=0A55FZ5R0MI -https://www.youtube.com/watch?v=S7Z5XDc5X3I -https://www.youtube.com/watch?v=sWO5gY7UbKM -https://www.youtube.com/watch?v=UrkAAASpCis -https://www.youtube.com/watch?v=iAiQBMEeeV4 -https://www.youtube.com/watch?v=XnRekopCpZ0 -https://www.youtube.com/watch?v=bnstqG8YJ-E -https://www.youtube.com/watch?v=dk5UlOS6IYI -https://www.youtube.com/watch?v=uelzEzmIhh0 -https://www.youtube.com/watch?v=gq1pUYxILOc -https://www.youtube.com/watch?v=OgXkB9S_GmA -https://www.youtube.com/watch?v=mvGD7RRehaI -https://www.youtube.com/watch?v=s3df_PR0x7Y -https://www.youtube.com/watch?v=mRDmsxKQurs -https://www.youtube.com/watch?v=bhKN_KOeWhI -https://www.youtube.com/watch?v=EStYpTS-TRU -https://www.youtube.com/watch?v=357YonN45w0 -https://www.youtube.com/watch?v=UGJfPbOpiCA -https://www.youtube.com/watch?v=1F6uYuHgOdI -https://www.youtube.com/watch?v=PvsqLRbCJlA -https://www.youtube.com/watch?v=P96IOk9mQgk -https://www.youtube.com/watch?v=M5EqG9d-3Ug -https://www.youtube.com/watch?v=R6eDBa6UjmY -https://www.youtube.com/watch?v=CZvUQEU2cvs -https://www.youtube.com/watch?v=giazbLbDdv0 -https://www.youtube.com/watch?v=JFoI12_47ck -https://www.youtube.com/watch?v=q5dZ396lYbk -https://www.youtube.com/watch?v=McPkR_D7zI0 -https://www.youtube.com/watch?v=774oBwazxHw -https://www.youtube.com/watch?v=-_5AipO_dfw -https://www.youtube.com/watch?v=MnlU4BPrLuk -https://www.youtube.com/watch?v=24LuuQH4hnc -https://www.youtube.com/watch?v=e4ivBc0l7Ok -https://www.youtube.com/watch?v=S4ff7HgfULA -https://www.youtube.com/watch?v=AxhJcEndmjs -https://www.youtube.com/watch?v=NdeCQFd2blY -https://www.youtube.com/watch?v=xgl4ltsE_8E -https://www.youtube.com/watch?v=in5xKqvxrAk -https://www.youtube.com/watch?v=TAU_0EpXBgQ -https://www.youtube.com/watch?v=3DbMqaactuU -https://www.youtube.com/watch?v=BWuY55TfChs -https://www.youtube.com/watch?v=41ecD9culo4 -https://www.youtube.com/watch?v=kYV8Q5UpDTw -https://www.youtube.com/watch?v=wOQlIDXHkD4 -https://www.youtube.com/watch?v=vMXVse5OuFI -https://www.youtube.com/watch?v=Fem5C3R60Sg -https://www.youtube.com/watch?v=OhySOErdxjM -https://www.youtube.com/watch?v=KC09gbct8u4 -https://www.youtube.com/watch?v=bJ4vPNkjRdE -https://www.youtube.com/watch?v=RvBrUzLugjA -https://www.youtube.com/watch?v=QuNxtlXivBk -https://www.youtube.com/watch?v=yz6OjqZfdLM -https://www.youtube.com/watch?v=CoOLkzZCcGE -https://www.youtube.com/watch?v=FecXRY-8IPw -https://www.youtube.com/watch?v=KUQBqesn-6M -https://www.youtube.com/watch?v=NAF9kveijEA -https://www.youtube.com/watch?v=hl4j6E-ICco -https://www.youtube.com/watch?v=yg5tpHvElvM -https://www.youtube.com/watch?v=ZddCBXb10hw -https://www.youtube.com/watch?v=hTKjjdN8MGQ -https://www.youtube.com/watch?v=X9hHQaYj7Lo -https://www.youtube.com/watch?v=P0B-L66ffLw -https://www.youtube.com/watch?v=5mQ4hc8Uvn8 -https://www.youtube.com/watch?v=KaPSeF592h0 -https://www.youtube.com/watch?v=xerdSyr3sSU -https://www.youtube.com/watch?v=ZW2jcFuHdhA -https://www.youtube.com/watch?v=ek3TqzF-KVE -https://www.youtube.com/watch?v=sMT8I7qIoZs -https://www.youtube.com/watch?v=hPvr-qxf52s -https://www.youtube.com/watch?v=__Ier-gibdA -https://www.youtube.com/watch?v=Wqs0Im26Bfg -https://www.youtube.com/watch?v=auk6LFmPgC8 -https://www.youtube.com/watch?v=OEX2aUVFYNI -https://www.youtube.com/watch?v=i_suF4tWuj4 -https://www.youtube.com/watch?v=Gmy9pDH26do -https://www.youtube.com/watch?v=ktXSBjJdd5Q -https://www.youtube.com/watch?v=p3kzKLus9yg -https://www.youtube.com/watch?v=tB2l4wTK4OE -https://www.youtube.com/watch?v=gm4XxHSJePc -https://www.youtube.com/watch?v=uy7z2ywGb8c -https://www.youtube.com/watch?v=OmfINsA961s -https://www.youtube.com/watch?v=8impJJlnKS8 -https://www.youtube.com/watch?v=aKCzJoP2bsY -https://www.youtube.com/watch?v=Q0jNgwJDXYk -https://www.youtube.com/watch?v=ratCJH1TN9Y -https://www.youtube.com/watch?v=kpiCo2tDedQ -https://www.youtube.com/watch?v=Jxsj2VSYp_I -https://www.youtube.com/watch?v=FeS1TqWJLqE -https://www.youtube.com/watch?v=xJJnQWo50lA -https://www.youtube.com/watch?v=FMqeu-2OCC8 -https://www.youtube.com/watch?v=wHaVTysBL9U -https://www.youtube.com/watch?v=lfmVNlorAV8 -https://www.youtube.com/watch?v=mD1d0YLwbHQ -https://www.youtube.com/watch?v=BZHKlc3N_wA -https://www.youtube.com/watch?v=7X4vxF9V9PE -https://www.youtube.com/watch?v=s_ftU_N-KAc -https://www.youtube.com/watch?v=LMXj3C2JhdA -https://www.youtube.com/watch?v=iq6sC58oSMo -https://www.youtube.com/watch?v=ZV3e4CtYltc -https://www.youtube.com/watch?v=TBB6xBg7isY -https://www.youtube.com/watch?v=majq3tuDPlg -https://www.youtube.com/watch?v=A62-iVYtkvg -https://www.youtube.com/watch?v=oH-hzXI7RzE -https://www.youtube.com/watch?v=OqAu24YGNKM -https://www.youtube.com/watch?v=YcgFu0urTjo -https://www.youtube.com/watch?v=L_qDQ2WALdc -https://www.youtube.com/watch?v=76nZ2RSxxik -https://www.youtube.com/watch?v=s4mnCMUrMV0 -https://www.youtube.com/watch?v=eAhZel9fdcE -https://www.youtube.com/watch?v=TXchNmKFu8I -https://www.youtube.com/watch?v=KqLLKx7jJxM -https://www.youtube.com/watch?v=wBkH4Sho9Uw -https://www.youtube.com/watch?v=3UeYut9Nm3E -https://www.youtube.com/watch?v=rNHZh5931hA -https://www.youtube.com/watch?v=fU6GFD3wNDs -https://www.youtube.com/watch?v=WAFiutRXPHU -https://www.youtube.com/watch?v=d9PoN3qbkUA -https://www.youtube.com/watch?v=jjbVZ6fPReI -https://www.youtube.com/watch?v=avHoMxrGh3c -https://www.youtube.com/watch?v=zxdhR5cBKYA -https://www.youtube.com/watch?v=XoK7nSXYmgQ -https://www.youtube.com/watch?v=ZhzwfaYrcvc -https://www.youtube.com/watch?v=-cHTdfy6CUI -https://www.youtube.com/watch?v=hJ6se5Ms3ko -https://www.youtube.com/watch?v=Zxfcj4uc0h4 -https://www.youtube.com/watch?v=yTG5zrbbxmg -https://www.youtube.com/watch?v=EH8BsC2MKNY -https://www.youtube.com/watch?v=fGXTLaO7aPo -https://www.youtube.com/watch?v=p2jo-VXkzr4 -https://www.youtube.com/watch?v=DN47veER2K0 -https://www.youtube.com/watch?v=h3dMZC3V_mA -https://www.youtube.com/watch?v=4KBB_CxKN6M -https://www.youtube.com/watch?v=nVoSg1NfPrE -https://www.youtube.com/watch?v=GHzS1ogWdMI -https://www.youtube.com/watch?v=r6Q8GLUGWY4 -https://www.youtube.com/watch?v=-t0U70j9DHY -https://www.youtube.com/watch?v=gHYAwsSXsNI -https://www.youtube.com/watch?v=XTGlxwURgJo -https://www.youtube.com/watch?v=Dj-Zrmh_a54 -https://www.youtube.com/watch?v=GNrt-iNaKvQ -https://www.youtube.com/watch?v=vrvYFPHxVMg -https://www.youtube.com/watch?v=bdlZlk0wvvo -https://www.youtube.com/watch?v=qd789Zfq5iU -https://www.youtube.com/watch?v=G4h-B9lI_vA -https://www.youtube.com/watch?v=HtJIvuVRR_s -https://www.youtube.com/watch?v=eiB4V7hSqa4 -https://www.youtube.com/watch?v=B_Ay09BH2qU -https://www.youtube.com/watch?v=jA-64XSM2s4 -https://www.youtube.com/watch?v=IzPqkrsEPSc -https://www.youtube.com/watch?v=pHJvLpyb2tA -https://www.youtube.com/watch?v=S3t3wvksx9U -https://www.youtube.com/watch?v=YdmaCruUVDM -https://www.youtube.com/watch?v=zlnoM_Le0C4 -https://www.youtube.com/watch?v=ppoa5SJDmA0 -https://www.youtube.com/watch?v=fD9Jz4GuSY4 -https://www.youtube.com/watch?v=DGzLoTmx6JQ -https://www.youtube.com/watch?v=dovFWM5KjLU -https://www.youtube.com/watch?v=ZvlXe9HEQXQ -https://www.youtube.com/watch?v=-69wodyyiVw -https://www.youtube.com/watch?v=ymd-9Mlusbg -https://www.youtube.com/watch?v=5NM8qn6Hz20 -https://www.youtube.com/watch?v=aZLq0ODPkJs -https://www.youtube.com/watch?v=E8idIfNUTGA -https://www.youtube.com/watch?v=9tLuJxoySL0 -https://www.youtube.com/watch?v=TAOo-sTxYWw -https://www.youtube.com/watch?v=6vqRtTHv2l8 -https://www.youtube.com/watch?v=u2-XFiAeZ0M -https://www.youtube.com/watch?v=GSevUSqfbKM -https://www.youtube.com/watch?v=ZAx17rgrhM4 -https://www.youtube.com/watch?v=beW0wPoKU08 -https://www.youtube.com/watch?v=ffi0bNkc7iw -https://www.youtube.com/watch?v=YN_bIXecRzk -https://www.youtube.com/watch?v=f5XA4-NaHfk -https://www.youtube.com/watch?v=41L2f8-Gp1E -https://www.youtube.com/watch?v=cyN4abf_cUQ -https://www.youtube.com/watch?v=kZwS6uracK0 -https://www.youtube.com/watch?v=ssJ2YAl-W60 -https://www.youtube.com/watch?v=Zn_ngvMUp8s -https://www.youtube.com/watch?v=of80gd4-_rU -https://www.youtube.com/watch?v=scj5d81nEWY -https://www.youtube.com/watch?v=24SMoy1JqxU -https://www.youtube.com/watch?v=SsqrVhVWBtQ -https://www.youtube.com/watch?v=qRuuDA3Oy1k -https://www.youtube.com/watch?v=E-II-TTGm1s -https://www.youtube.com/watch?v=a3kKG0hEbE4 -https://www.youtube.com/watch?v=UUi3KUyAJVw -https://www.youtube.com/watch?v=0f4Tl-y1SHY -https://www.youtube.com/watch?v=rNWD8g2gYlU -https://www.youtube.com/watch?v=E5KbVk6kFo8 -https://www.youtube.com/watch?v=9EQPXEvgaT0 -https://www.youtube.com/watch?v=VijmHIURpAg -https://www.youtube.com/watch?v=XZVeeC2MFps -https://www.youtube.com/watch?v=MU1izPlV7mE -https://www.youtube.com/watch?v=YzvEiKysxfI -https://www.youtube.com/watch?v=S-zswgmxRWk -https://www.youtube.com/watch?v=irR7K8QC5Mw -https://www.youtube.com/watch?v=8fboEbvBP4U -https://www.youtube.com/watch?v=Ehi60JJR6K8 -https://www.youtube.com/watch?v=unQ37i1fI3E -https://www.youtube.com/watch?v=Sl1xZIVwQzE -https://www.youtube.com/watch?v=EsgjQP8kc-4 -https://www.youtube.com/watch?v=-CS0ojb2VjA -https://www.youtube.com/watch?v=9_1qSUWAtzM -https://www.youtube.com/watch?v=H5_guRjO7qc -https://www.youtube.com/watch?v=GEi5YyVLB5M -https://www.youtube.com/watch?v=kMdZXT_6Jmo -https://www.youtube.com/watch?v=Kw1KahIAPkI -https://www.youtube.com/watch?v=sCat14cTzYA -https://www.youtube.com/watch?v=oQeBFLY3WL4 -https://www.youtube.com/watch?v=G0wp8-Il2RY -https://www.youtube.com/watch?v=PWlydfB627s -https://www.youtube.com/watch?v=aDUtimJ1GL8 -https://www.youtube.com/watch?v=vdaLx-wJ118 -https://www.youtube.com/watch?v=SggLcqRWUcU -https://www.youtube.com/watch?v=X_jKmYUcbmE -https://www.youtube.com/watch?v=DBi96HRvEug -https://www.youtube.com/watch?v=W-RrbD170uM -https://www.youtube.com/watch?v=z3JNl4ABEMU -https://www.youtube.com/watch?v=0vQu4oSMdQI -https://www.youtube.com/watch?v=s1Z1731q5e0 -https://www.youtube.com/watch?v=Et1ErNdiqXI -https://www.youtube.com/watch?v=b8HdEnMG9Mw -https://www.youtube.com/watch?v=4Jn35uhyAdU -https://www.youtube.com/watch?v=7g8nStM_XbM -https://www.youtube.com/watch?v=fNYdM_UWg_I -https://www.youtube.com/watch?v=EA2TyYTpMlQ -https://www.youtube.com/watch?v=-gqWADvlZ44 -https://www.youtube.com/watch?v=pjsMnKiyMmI -https://www.youtube.com/watch?v=-B8qoqH0i1Y -https://www.youtube.com/watch?v=nE-vyXDpjYU -https://www.youtube.com/watch?v=0Ok2fGNfbDU -https://www.youtube.com/watch?v=IXee870AMAs -https://www.youtube.com/watch?v=ye9lx_rbQ8o -https://www.youtube.com/watch?v=Ku8aWQaLIBQ -https://www.youtube.com/watch?v=junMQDztHck -https://www.youtube.com/watch?v=xngdVZtDh7I -https://www.youtube.com/watch?v=DARiF_HgHts -https://www.youtube.com/watch?v=jw7-9lEo1kc -https://www.youtube.com/watch?v=ODPGJPoga1A -https://www.youtube.com/watch?v=ZBybfxJyQuE -https://www.youtube.com/watch?v=mrPeOtu_6cU -https://www.youtube.com/watch?v=E3fxsRem3rA -https://www.youtube.com/watch?v=5Oa8Uec_eBg -https://www.youtube.com/watch?v=pcJxFJcHkuo -https://www.youtube.com/watch?v=Zl4Gbaun1pA -https://www.youtube.com/watch?v=EhKgh4N1AXc -https://www.youtube.com/watch?v=urGuSLPunlU -https://www.youtube.com/watch?v=4o0i3UFDJBA -https://www.youtube.com/watch?v=JXX3NQKvpIg -https://www.youtube.com/watch?v=EEwrQrEtivk -https://www.youtube.com/watch?v=FSxmGJBvYbU -https://www.youtube.com/watch?v=_eCnHwhXaTI -https://www.youtube.com/watch?v=xGtKzO7r0GI -https://www.youtube.com/watch?v=U5S9E6KMNns -https://www.youtube.com/watch?v=zF0yuAUI1F0 -https://www.youtube.com/watch?v=6lEE8tAk8YE -https://www.youtube.com/watch?v=dXrLLUqzlCM -https://www.youtube.com/watch?v=hOLrUvPyF68 -https://www.youtube.com/watch?v=aFwCB5VOk_c -https://www.youtube.com/watch?v=nKCjiJ5MJ9s -https://www.youtube.com/watch?v=il_9MesqVEw -https://www.youtube.com/watch?v=DhHrSXSJ9sQ -https://www.youtube.com/watch?v=vRshEolL8eM -https://www.youtube.com/watch?v=_UOmXO1t0ms -https://www.youtube.com/watch?v=_Lq0LKMTsTc -https://www.youtube.com/watch?v=1rZgsDAohi8 -https://www.youtube.com/watch?v=rZl1NAjtlr8 -https://www.youtube.com/watch?v=KFWegI-YGBw -https://www.youtube.com/watch?v=Nhqny-t2BoA -https://www.youtube.com/watch?v=rHf1PBmve8U -https://www.youtube.com/watch?v=Qhm3rTNuu1c -https://www.youtube.com/watch?v=_mGDcyryvuQ -https://www.youtube.com/watch?v=qo0cNivWHwI -https://www.youtube.com/watch?v=KjoN6pDVw7c -https://www.youtube.com/watch?v=hYLAZNqx9Sc -https://www.youtube.com/watch?v=Y4GEzEh4BDY -https://www.youtube.com/watch?v=SFfDx-SSDzo -https://www.youtube.com/watch?v=vt6I-SUokgs -https://www.youtube.com/watch?v=4Eqz9U1oEpE -https://www.youtube.com/watch?v=iBdn0aG6SCY -https://www.youtube.com/watch?v=5YGOwYF5zlE -https://www.youtube.com/watch?v=iqdS0qfA1iw -https://www.youtube.com/watch?v=S6SvIe3Kxa0 -https://www.youtube.com/watch?v=0JV0SuPtWwU -https://www.youtube.com/watch?v=UB-YooM-NIY -https://www.youtube.com/watch?v=4f7uOAxYQKk -https://www.youtube.com/watch?v=ODYcEncY9Z8 -https://www.youtube.com/watch?v=z1gsZhSRs_A -https://www.youtube.com/watch?v=lQHEDa6vDhk -https://www.youtube.com/watch?v=Y33t3LEoTlM -https://www.youtube.com/watch?v=SOk9ROkKPrA -https://www.youtube.com/watch?v=lxPy60KW3VY -https://www.youtube.com/watch?v=reT95LPQCoM -https://www.youtube.com/watch?v=jmrqWtANVm0 -https://www.youtube.com/watch?v=3TfciDvpMOU -https://www.youtube.com/watch?v=HW677VglUgs -https://www.youtube.com/watch?v=DtYkKDkGrqo -https://www.youtube.com/watch?v=128YFZf8DGo -https://www.youtube.com/watch?v=KujWR5rPJ1o -https://www.youtube.com/watch?v=RZ6g7zRVaOA -https://www.youtube.com/watch?v=3L_yf8TO1P0 -https://www.youtube.com/watch?v=RJuY4t_58Y0 -https://www.youtube.com/watch?v=DLTlIVuawAE -https://www.youtube.com/watch?v=QfF9mpVq_14 -https://www.youtube.com/watch?v=OqGD8MNJKnI -https://www.youtube.com/watch?v=O4BqK1cylmQ -https://www.youtube.com/watch?v=vu2FttWQKMg -https://www.youtube.com/watch?v=Yh2nT6crCiE -https://www.youtube.com/watch?v=dKbRkBvtohg -https://www.youtube.com/watch?v=D0eDZjIwAmI -https://www.youtube.com/watch?v=AD37qE7t0ck -https://www.youtube.com/watch?v=l3UU8A8JEE8 -https://www.youtube.com/watch?v=GshhMRTjwZs -https://www.youtube.com/watch?v=-R-0EKCgXNY -https://www.youtube.com/watch?v=WhXJJQzmlTQ -https://www.youtube.com/watch?v=x0Fx3YGbvrs -https://www.youtube.com/watch?v=P5T1dXkG7-I -https://www.youtube.com/watch?v=7VO6E6Nj75c -https://www.youtube.com/watch?v=CivVo4AbbVo -https://www.youtube.com/watch?v=MKrMnu22z9c -https://www.youtube.com/watch?v=2YgNc05_Z7E -https://www.youtube.com/watch?v=6HJi1cg-gBE -https://www.youtube.com/watch?v=felrJtLc3UY -https://www.youtube.com/watch?v=U8HIIF-W3zE -https://www.youtube.com/watch?v=EYwNAObexJk -https://www.youtube.com/watch?v=iq4lpHbGQ60 -https://www.youtube.com/watch?v=ANpWkCGe6Zk -https://www.youtube.com/watch?v=a7W0t-Q5cFs -https://www.youtube.com/watch?v=9oDYxrEfVzM -https://www.youtube.com/watch?v=okbFZMnxoGQ -https://www.youtube.com/watch?v=Npb88SaLOPc -https://www.youtube.com/watch?v=S6u-py6UlX4 -https://www.youtube.com/watch?v=FXmNHhFOUuM -https://www.youtube.com/watch?v=q9p7i0Jb4rg -https://www.youtube.com/watch?v=6ZaG2I7mXcM -https://www.youtube.com/watch?v=ojDYVBeK_d4 -https://www.youtube.com/watch?v=HisWu1kZcTI -https://www.youtube.com/watch?v=WXm5T0AWE04 -https://www.youtube.com/watch?v=6mI2rvHbCQE -https://www.youtube.com/watch?v=hEYARh2flvc -https://www.youtube.com/watch?v=MVTmbHpeQwg -https://www.youtube.com/watch?v=DN7Pp-qdKY8 -https://www.youtube.com/watch?v=nGLB3uIhvdA -https://www.youtube.com/watch?v=VDq2whjVOQ8 -https://www.youtube.com/watch?v=2uFJkQJHX7s -https://www.youtube.com/watch?v=fRJrdKVfA4E -https://www.youtube.com/watch?v=JXtbeBL7iog -https://www.youtube.com/watch?v=1HUZpi6Kx5g -https://www.youtube.com/watch?v=j-UII0WaS-w -https://www.youtube.com/watch?v=HMjlEN2YgLg -https://www.youtube.com/watch?v=9TqFfzAzbNE -https://www.youtube.com/watch?v=GZSslRRYJg8 -https://www.youtube.com/watch?v=sR123A-THRs -https://www.youtube.com/watch?v=bt3X8MJgJWo -https://www.youtube.com/watch?v=NXEmtBe3R2c -https://www.youtube.com/watch?v=Aw7KY5ryvNo -https://www.youtube.com/watch?v=iIyfFJZuxSs -https://www.youtube.com/watch?v=JR2R1yTcUyk -https://www.youtube.com/watch?v=ID2HSLcAKmE -https://www.youtube.com/watch?v=sxuixFTH4Y8 -https://www.youtube.com/watch?v=4veS9jm-utw -https://www.youtube.com/watch?v=-bNr6-8xHhE -https://www.youtube.com/watch?v=mLscN6cV89k -https://www.youtube.com/watch?v=cZlMf1khndo -https://www.youtube.com/watch?v=FXWA9qKxhXM -https://www.youtube.com/watch?v=Gen3Ng42Md4 -https://www.youtube.com/watch?v=AqYhwYiXmvM -https://www.youtube.com/watch?v=rUnmd1j4mkM -https://www.youtube.com/watch?v=b3C6X_DUwbs -https://www.youtube.com/watch?v=I7xIcND2oBs -https://www.youtube.com/watch?v=grkcZZo9nXE -https://www.youtube.com/watch?v=rF3gnarthUE -https://www.youtube.com/watch?v=ft-hzWHU9ac -https://www.youtube.com/watch?v=w4tasjBq9pI -https://www.youtube.com/watch?v=JiCFWNxWwAs -https://www.youtube.com/watch?v=DpaZOzMXHnI -https://www.youtube.com/watch?v=2OwwoTVQ7Uc -https://www.youtube.com/watch?v=MdqKCXXXRBs -https://www.youtube.com/watch?v=bZgOyRXBqMM -https://www.youtube.com/watch?v=dmAteOuRBfQ -https://www.youtube.com/watch?v=h_e-al5r4gk -https://www.youtube.com/watch?v=pi3N_wQS2n0 -https://www.youtube.com/watch?v=4SD-hsZ2Cso -https://www.youtube.com/watch?v=mtd2vwD7t5I -https://www.youtube.com/watch?v=0zj6do0S0rc -https://www.youtube.com/watch?v=sO_hQgkul_w -https://www.youtube.com/watch?v=p3nf7CuskI4 -https://www.youtube.com/watch?v=Ck-RlFjl6UA -https://www.youtube.com/watch?v=GWg_lkXHg6A -https://www.youtube.com/watch?v=_I34dV-BhaA -https://www.youtube.com/watch?v=SWpRsRAAyXE -https://www.youtube.com/watch?v=uXZqwKp255E -https://www.youtube.com/watch?v=60Tt-BZ2QF4 -https://www.youtube.com/watch?v=acBMw3OL_8k -https://www.youtube.com/watch?v=aZpKNjN-mnA -https://www.youtube.com/watch?v=eQ9LWl_NpxE -https://www.youtube.com/watch?v=QbVtkJI56x4 -https://www.youtube.com/watch?v=0S4iduG2DoY -https://www.youtube.com/watch?v=5IAD-EFqgX4 -https://www.youtube.com/watch?v=hVAanRQ6kmI -https://www.youtube.com/watch?v=GKMr408YsLI -https://www.youtube.com/watch?v=trrwQ2_mr10 -https://www.youtube.com/watch?v=EN-mjnSO8rU -https://www.youtube.com/watch?v=ssujDv_dWCQ -https://www.youtube.com/watch?v=0gZCi6hZjqc -https://www.youtube.com/watch?v=ueATjovo5TM -https://www.youtube.com/watch?v=l5nzkdMPeTM -https://www.youtube.com/watch?v=ejxUiHoZqoo -https://www.youtube.com/watch?v=jK04XF1iK3g -https://www.youtube.com/watch?v=7M33poh57dg -https://www.youtube.com/watch?v=gwfCxfvMQuQ -https://www.youtube.com/watch?v=RxX_q9ra7XM -https://www.youtube.com/watch?v=5qFfbpA6hjg -https://www.youtube.com/watch?v=PbFKX8EZGAM -https://www.youtube.com/watch?v=Lc_SsKBQqMs -https://www.youtube.com/watch?v=unsrEUNXxGQ -https://www.youtube.com/watch?v=VKLI2iMY7v4 -https://www.youtube.com/watch?v=ymgo_bReR9A -https://www.youtube.com/watch?v=DOruZP0mpNc -https://www.youtube.com/watch?v=TndhOZv6ejE -https://www.youtube.com/watch?v=Ex_5kLFxpHk -https://www.youtube.com/watch?v=5Oga5OYv-sU -https://www.youtube.com/watch?v=fzEcs1s1jNk -https://www.youtube.com/watch?v=kKjY7Pj4K3o -https://www.youtube.com/watch?v=-OTZxe3IZ0Y -https://www.youtube.com/watch?v=rnUxkSviq0w -https://www.youtube.com/watch?v=u9ob0AOX3DE -https://www.youtube.com/watch?v=HWplP9yPSrk -https://www.youtube.com/watch?v=bi4qdsDEQk4 -https://www.youtube.com/watch?v=K-dvZrLMPFg -https://www.youtube.com/watch?v=05OF14NhSVQ -https://www.youtube.com/watch?v=56T4a3FoWxc -https://www.youtube.com/watch?v=dJr-KpDS86M -https://www.youtube.com/watch?v=d8lGr1e7rHc -https://www.youtube.com/watch?v=DoCiNSlYEuM -https://www.youtube.com/watch?v=pHhIGaECBIU -https://www.youtube.com/watch?v=jbNxu-lkamQ -https://www.youtube.com/watch?v=Mu3klSAn8_M -https://www.youtube.com/watch?v=GaqU6VbRfjA -https://www.youtube.com/watch?v=fq4Q90CjxFw -https://www.youtube.com/watch?v=oSXLaLC6zks -https://www.youtube.com/watch?v=Y8jW2jm6Rho -https://www.youtube.com/watch?v=uRAc3_UsXs8 -https://www.youtube.com/watch?v=W1n5dvzGph0 -https://www.youtube.com/watch?v=uWhiz9or3PA -https://www.youtube.com/watch?v=_A-3gkPVmc0 -https://www.youtube.com/watch?v=huVwqWg2vM4 -https://www.youtube.com/watch?v=_P6k6AdXts4 -https://www.youtube.com/watch?v=OcMnLtgMHEY -https://www.youtube.com/watch?v=r87feQvLfLI -https://www.youtube.com/watch?v=39A9XqLgtsk -https://www.youtube.com/watch?v=f5D79JWJq20 -https://www.youtube.com/watch?v=M6znxNv8TvI -https://www.youtube.com/watch?v=9rv04oK-PSo -https://www.youtube.com/watch?v=6KWq6lNjdQ8 -https://www.youtube.com/watch?v=sZWHJvC847c -https://www.youtube.com/watch?v=Jzoc9dyaOZ8 -https://www.youtube.com/watch?v=6253hY1D0oY -https://www.youtube.com/watch?v=TpRAHZun_wo -https://www.youtube.com/watch?v=EX8y8u2nTJQ -https://www.youtube.com/watch?v=ZEbobft1Awo -https://www.youtube.com/watch?v=SFe1RqTiAYc -https://www.youtube.com/watch?v=5KM9hc1V_qw -https://www.youtube.com/watch?v=G9B-P8msN74 -https://www.youtube.com/watch?v=-3zkv6CKMfU -https://www.youtube.com/watch?v=Cckr3dhQOLI -https://www.youtube.com/watch?v=Gy2NB2ncaMU -https://www.youtube.com/watch?v=gN44tf6nx78 -https://www.youtube.com/watch?v=ShdOISVUjHw -https://www.youtube.com/watch?v=Lm5IFC9ALfk -https://www.youtube.com/watch?v=_rZYSwKpLX0 -https://www.youtube.com/watch?v=L92zNi0MNRM -https://www.youtube.com/watch?v=n0UoWuP9OiA -https://www.youtube.com/watch?v=vg335u2KZy4 -https://www.youtube.com/watch?v=HlwRYcXoM-A -https://www.youtube.com/watch?v=DyaWG0Pff6w -https://www.youtube.com/watch?v=J4t265zWn04 -https://www.youtube.com/watch?v=Z66kRbSH_uU -https://www.youtube.com/watch?v=oKuaW6z16EA -https://www.youtube.com/watch?v=vLN3Vy3BRDk -https://www.youtube.com/watch?v=pnDDrLoNjvE -https://www.youtube.com/watch?v=sfKcuh-4KXk -https://www.youtube.com/watch?v=MUV2tbTe-gk -https://www.youtube.com/watch?v=SeHGsjnTjGY -https://www.youtube.com/watch?v=4p1_h_-HEPs -https://www.youtube.com/watch?v=57yQ1qJhJe0 -https://www.youtube.com/watch?v=17HNRtQyAGU -https://www.youtube.com/watch?v=C-bvICeWw_M -https://www.youtube.com/watch?v=kQ7hd-68au4 -https://www.youtube.com/watch?v=s6o2AtE-kUI -https://www.youtube.com/watch?v=YoMMu1gBahs -https://www.youtube.com/watch?v=VNfm4RT431g -https://www.youtube.com/watch?v=rrYlQlJwkus -https://www.youtube.com/watch?v=oonEB-IPBaU -https://www.youtube.com/watch?v=Dcac2B3qvWg -https://www.youtube.com/watch?v=HZqKKzn2UFs -https://www.youtube.com/watch?v=AM-FGYVsoBc -https://www.youtube.com/watch?v=5OX7CLr4PLE -https://www.youtube.com/watch?v=HAWvUl5dyDk -https://www.youtube.com/watch?v=1eqqdP0K4Vg -https://www.youtube.com/watch?v=aDPG1-baku0 -https://www.youtube.com/watch?v=smJio-3-LIM -https://www.youtube.com/watch?v=sBr1clm9xdI -https://www.youtube.com/watch?v=ULzPldjmoYM -https://www.youtube.com/watch?v=7tW7QBkGQrc -https://www.youtube.com/watch?v=FXmWRe2FQU8 -https://www.youtube.com/watch?v=ZyJ6Mx_j8go -https://www.youtube.com/watch?v=8HeODJvg2cc -https://www.youtube.com/watch?v=0nQQpVvH01k -https://www.youtube.com/watch?v=7_Qw2VsZ_fE -https://www.youtube.com/watch?v=4rLiF8ROqgQ -https://www.youtube.com/watch?v=lSdrRidKmRI -https://www.youtube.com/watch?v=9BoyIFnQvlo -https://www.youtube.com/watch?v=YXxUEgD9g5E -https://www.youtube.com/watch?v=HOGUAI-kJFc -https://www.youtube.com/watch?v=67oQXgK7Vz8 -https://www.youtube.com/watch?v=co8DCZR_0s4 -https://www.youtube.com/watch?v=hb6j3L0RDaE -https://www.youtube.com/watch?v=6ZtkrtIPo4Q -https://www.youtube.com/watch?v=0LZmbG61eDY -https://www.youtube.com/watch?v=JOgjRV8K5lc -https://www.youtube.com/watch?v=LxYu4xXKfyw -https://www.youtube.com/watch?v=N8MJhB4L8v4 -https://www.youtube.com/watch?v=n4bkV1s-PPY -https://www.youtube.com/watch?v=4Wa6XJEDYH4 -https://www.youtube.com/watch?v=n4udGJAqe6c -https://www.youtube.com/watch?v=zFNTtZXUvhc -https://www.youtube.com/watch?v=FtvVf23ZDjY -https://www.youtube.com/watch?v=Aqv3MX-dHT4 -https://www.youtube.com/watch?v=4uqg6MuwNc0 -https://www.youtube.com/watch?v=f18SkV6hFvo -https://www.youtube.com/watch?v=ZAD5eCOWIS8 -https://www.youtube.com/watch?v=0CUF13oxaVE -https://www.youtube.com/watch?v=yHiVYkb5eWo -https://www.youtube.com/watch?v=vyNo6dW7XrI -https://www.youtube.com/watch?v=EwMMdVKF22I -https://www.youtube.com/watch?v=6YpC-pGDRyQ -https://www.youtube.com/watch?v=y2NixaBN6xw -https://www.youtube.com/watch?v=TCyvJv6qv10 -https://www.youtube.com/watch?v=ubsYhOEBmWI -https://www.youtube.com/watch?v=ul5Xai16hjE -https://www.youtube.com/watch?v=QKC8cerBW-s -https://www.youtube.com/watch?v=_v9sKsaDJ0s -https://www.youtube.com/watch?v=fJM0M7I4bmM -https://www.youtube.com/watch?v=dmeajYLmHOQ -https://www.youtube.com/watch?v=9DtM-zA1_0Y -https://www.youtube.com/watch?v=20NI7mGf0kE -https://www.youtube.com/watch?v=UfLuPlzcq1o -https://www.youtube.com/watch?v=7xT1b0AzI1Y -https://www.youtube.com/watch?v=SaiyvwevIno -https://www.youtube.com/watch?v=L8RX2_DVKqY -https://www.youtube.com/watch?v=K2VRYB9PSIY -https://www.youtube.com/watch?v=z1iBdDvIXKg -https://www.youtube.com/watch?v=TjRiRF8nXEo -https://www.youtube.com/watch?v=UKS0YrXoRdA -https://www.youtube.com/watch?v=did4sI6zq_A -https://www.youtube.com/watch?v=wf5-HRTKg-E -https://www.youtube.com/watch?v=nB6y1x4pbi4 -https://www.youtube.com/watch?v=9JsoIuIxopQ -https://www.youtube.com/watch?v=DsuREQMVjNY -https://www.youtube.com/watch?v=z5GgUQak-us -https://www.youtube.com/watch?v=yiVAyD9ozCo -https://www.youtube.com/watch?v=roHXzgobmnw -https://www.youtube.com/watch?v=XaOHdTuxuPM -https://www.youtube.com/watch?v=PNlmaXsixvQ -https://www.youtube.com/watch?v=PCxRTU3rBTE -https://www.youtube.com/watch?v=e3BCGTYsDOs -https://www.youtube.com/watch?v=QRv90i58W_E -https://www.youtube.com/watch?v=33PovAKeH0E -https://www.youtube.com/watch?v=FlGvWltFCmA -https://www.youtube.com/watch?v=b-ffdH8Hg08 -https://www.youtube.com/watch?v=Si0KHefiKaM -https://www.youtube.com/watch?v=2dgFiwdcorY -https://www.youtube.com/watch?v=cFVF-6pX9R0 -https://www.youtube.com/watch?v=OUkHgcG2v2M -https://www.youtube.com/watch?v=gm9LqOfnZOI -https://www.youtube.com/watch?v=_UQO2LM1WPw -https://www.youtube.com/watch?v=LKwG21IEWUw -https://www.youtube.com/watch?v=S4hoDHN_wQY -https://www.youtube.com/watch?v=8MV8Yy5GImE -https://www.youtube.com/watch?v=oF2HTr_XgVM -https://www.youtube.com/watch?v=CH9JaEBW83Y -https://www.youtube.com/watch?v=pxjDuq3r4iI -https://www.youtube.com/watch?v=3dUJdTxxCiE -https://www.youtube.com/watch?v=HcfxxE__Xjc -https://www.youtube.com/watch?v=GqVZLE5yxtY -https://www.youtube.com/watch?v=FfrhxtShg5s -https://www.youtube.com/watch?v=j6_gXJ54OFA -https://www.youtube.com/watch?v=UZ-qB0EKdEM -https://www.youtube.com/watch?v=SOgvMA1G7_M -https://www.youtube.com/watch?v=S5abWlkirvU -https://www.youtube.com/watch?v=3MXfmTMJ_SI -https://www.youtube.com/watch?v=hT8KURdMMi4 -https://www.youtube.com/watch?v=UoIr_9J12RY -https://www.youtube.com/watch?v=dPCNW1dFigg -https://www.youtube.com/watch?v=IWc4RArypJs -https://www.youtube.com/watch?v=ZghfaSSd3dQ -https://www.youtube.com/watch?v=H_YFW94w_FQ -https://www.youtube.com/watch?v=PJv9mk5hltM -https://www.youtube.com/watch?v=7XV_gsf8yow -https://www.youtube.com/watch?v=8NukH9vc2JI -https://www.youtube.com/watch?v=tlNi-QfnOTc -https://www.youtube.com/watch?v=fc77CExfyvA -https://www.youtube.com/watch?v=YDZPIg6K1X4 -https://www.youtube.com/watch?v=BBrIcZ8VT4E -https://www.youtube.com/watch?v=yFGpm9oLs20 -https://www.youtube.com/watch?v=G81ehUvbzrU -https://www.youtube.com/watch?v=NJARNmRC1NY -https://www.youtube.com/watch?v=LZNmPPirbFQ -https://www.youtube.com/watch?v=ZqZN7qNyQso -https://www.youtube.com/watch?v=X6bFHcU6u9w -https://www.youtube.com/watch?v=Ej4LIXsaLag -https://www.youtube.com/watch?v=7T0ntzJQAO8 -https://www.youtube.com/watch?v=VwOk7Y7n_0k -https://www.youtube.com/watch?v=m5IERpTJLaw -https://www.youtube.com/watch?v=_irlrf9Qhl0 -https://www.youtube.com/watch?v=sNeSgwgyjPA -https://www.youtube.com/watch?v=tVoFvkB2weE -https://www.youtube.com/watch?v=p4T-uoIF_6Y -https://www.youtube.com/watch?v=qNZRvUK_ogw -https://www.youtube.com/watch?v=0nHJXFiFtnk -https://www.youtube.com/watch?v=wSwPs-6WdfY -https://www.youtube.com/watch?v=ONWWw-hSbsg -https://www.youtube.com/watch?v=lxM_a7xvYGc -https://www.youtube.com/watch?v=ENz1DI-xv08 -https://www.youtube.com/watch?v=VYOpiHVztCE -https://www.youtube.com/watch?v=Sj1mr1vZ1zg -https://www.youtube.com/watch?v=fNfY53BPSmg -https://www.youtube.com/watch?v=vuuQKCbAIjk -https://www.youtube.com/watch?v=z3M-xV_oem4 -https://www.youtube.com/watch?v=pfOYokoGVb0 -https://www.youtube.com/watch?v=HejkqCQe0D8 -https://www.youtube.com/watch?v=7jQUzT2M8X0 -https://www.youtube.com/watch?v=kAA_7GG1LQI -https://www.youtube.com/watch?v=yRXo1l6gRX4 -https://www.youtube.com/watch?v=PULxfbCW1bM -https://www.youtube.com/watch?v=Ow1zVOUxQ3E -https://www.youtube.com/watch?v=09_ieUyQD8s -https://www.youtube.com/watch?v=delIGj2LDts -https://www.youtube.com/watch?v=ir4fhBd9PW8 -https://www.youtube.com/watch?v=lQ9smpWZ_dQ -https://www.youtube.com/watch?v=9XkO3mye-1w -https://www.youtube.com/watch?v=KVzjmF3sFxs -https://www.youtube.com/watch?v=0VusddXJIsI -https://www.youtube.com/watch?v=6_ssgMPsgbI -https://www.youtube.com/watch?v=VbZw9HweTeo -https://www.youtube.com/watch?v=00SImiNjHyM -https://www.youtube.com/watch?v=dO6JKS6wbAA -https://www.youtube.com/watch?v=wpqMNn1IhDw -https://www.youtube.com/watch?v=sSv3ihsOxvM -https://www.youtube.com/watch?v=6mFcIil6_z0 -https://www.youtube.com/watch?v=Sgx-2mDqSzc -https://www.youtube.com/watch?v=A8AB7TpQxEs -https://www.youtube.com/watch?v=KS7eqksUg9o -https://www.youtube.com/watch?v=QF343yGE3CY -https://www.youtube.com/watch?v=T7XOVJRUa-0 -https://www.youtube.com/watch?v=-D0ZbmuNZR4 -https://www.youtube.com/watch?v=SZ5uMY2BnjI -https://www.youtube.com/watch?v=Wz2OGaWFY0E -https://www.youtube.com/watch?v=Wz6ofYXGc88 -https://www.youtube.com/watch?v=gbG5N1WxE3Q -https://www.youtube.com/watch?v=htET1lBbmBI -https://www.youtube.com/watch?v=rKTMv0Em8XY -https://www.youtube.com/watch?v=73tgi-8FxGI -https://www.youtube.com/watch?v=io8FePn3Z6A -https://www.youtube.com/watch?v=SptD8UmCrRM -https://www.youtube.com/watch?v=WvcDTzX0vr8 -https://www.youtube.com/watch?v=-_5rhG_EjTg -https://www.youtube.com/watch?v=1UAwlk6sRZo -https://www.youtube.com/watch?v=DCQmVzOifcg -https://www.youtube.com/watch?v=3cT4Sw_Dkhg -https://www.youtube.com/watch?v=pfdcN2FcKug -https://www.youtube.com/watch?v=JHD5Jt9J41U -https://www.youtube.com/watch?v=4WjBo5p6eTw -https://www.youtube.com/watch?v=Hl6AB6oViQs -https://www.youtube.com/watch?v=5XNQo4CC9dM -https://www.youtube.com/watch?v=Fnr1v1wYy60 -https://www.youtube.com/watch?v=2sKHlRpOMMo -https://www.youtube.com/watch?v=Ou6UEeUGIDA -https://www.youtube.com/watch?v=x6pX2rT-SqM -https://www.youtube.com/watch?v=dINr88UYHgc -https://www.youtube.com/watch?v=S3UOmaM-PpU -https://www.youtube.com/watch?v=8pXy8bh0Q0Q -https://www.youtube.com/watch?v=jhJFYfacwAc -https://www.youtube.com/watch?v=Hy67pOxlTgQ -https://www.youtube.com/watch?v=64-sAuJ8K7Y -https://www.youtube.com/watch?v=xaGWnjP7NVQ -https://www.youtube.com/watch?v=vYu3tkfaEcc -https://www.youtube.com/watch?v=6kO7vguhv2M -https://www.youtube.com/watch?v=rZjpxCSbu9w -https://www.youtube.com/watch?v=F4SZ4kNkX0c -https://www.youtube.com/watch?v=MeMR02kR3gE -https://www.youtube.com/watch?v=hm9vnD7gsCQ -https://www.youtube.com/watch?v=MaKkOahuC78 -https://www.youtube.com/watch?v=mc1FhpTls-A -https://www.youtube.com/watch?v=7XOxmV4ddNI -https://www.youtube.com/watch?v=b2o7a0D1ALs -https://www.youtube.com/watch?v=_oSutnfDUf8 -https://www.youtube.com/watch?v=taiQxc18xk0 -https://www.youtube.com/watch?v=kI6lgu9OL9I -https://www.youtube.com/watch?v=UNE-Himotz0 -https://www.youtube.com/watch?v=uD70OQniXTQ -https://www.youtube.com/watch?v=vs72Ng_jodU -https://www.youtube.com/watch?v=yanyMgdNfWU -https://www.youtube.com/watch?v=sGtTSEDc_w8 -https://www.youtube.com/watch?v=p-tCzjrh52Q -https://www.youtube.com/watch?v=PMQJ3dHi-JQ -https://www.youtube.com/watch?v=zIt66_3zYfM -https://www.youtube.com/watch?v=8mAMPRNl7ZQ -https://www.youtube.com/watch?v=ktFVOeYnwJI -https://www.youtube.com/watch?v=sKfIsBA_k60 -https://www.youtube.com/watch?v=y2axw12xLlc -https://www.youtube.com/watch?v=yDPEq8ObtXg -https://www.youtube.com/watch?v=tjJX5oB4EF0 -https://www.youtube.com/watch?v=WERFogub0MY -https://www.youtube.com/watch?v=JfN1S9Cil1I -https://www.youtube.com/watch?v=OUY9hCLQ6s8 -https://www.youtube.com/watch?v=kpg0Q0KEvyU -https://www.youtube.com/watch?v=AxgSz2gh6WE -https://www.youtube.com/watch?v=sKRDjWd4KCw -https://www.youtube.com/watch?v=mBl2wOUv5RA -https://www.youtube.com/watch?v=-i_xeg7jssk -https://www.youtube.com/watch?v=OG2kKOmey4Q -https://www.youtube.com/watch?v=mbHsmGm0F8c -https://www.youtube.com/watch?v=pn2D3Wd5Lc4 -https://www.youtube.com/watch?v=8MFvpgA6ylk -https://www.youtube.com/watch?v=BKC7rXf54-k -https://www.youtube.com/watch?v=Q7Q5ygmO3mU -https://www.youtube.com/watch?v=WwbqQ3TPeok -https://www.youtube.com/watch?v=_1GYGOaMCes -https://www.youtube.com/watch?v=jCJ-nEx2ko0 -https://www.youtube.com/watch?v=nxQ8UPOk4tc -https://www.youtube.com/watch?v=13m9TxErPRI -https://www.youtube.com/watch?v=IRcFyWNZeAA -https://www.youtube.com/watch?v=UL6kx-t_xM8 -https://www.youtube.com/watch?v=9gtn2fmvRlA -https://www.youtube.com/watch?v=xJjpIjfEPoc -https://www.youtube.com/watch?v=-sXNKkOsMNs -https://www.youtube.com/watch?v=883w-T9wHBs -https://www.youtube.com/watch?v=a1EQBbKRfoc -https://www.youtube.com/watch?v=b7_7qpLvKpQ -https://www.youtube.com/watch?v=r2abEcxai08 -https://www.youtube.com/watch?v=VEzH8V7Tt80 -https://www.youtube.com/watch?v=-Zt0PiwL0Wo -https://www.youtube.com/watch?v=5L_hjw0y9WU -https://www.youtube.com/watch?v=oLpS-692p_Y -https://www.youtube.com/watch?v=c0IiK5jpg_I -https://www.youtube.com/watch?v=JZcpGbm4FX4 -https://www.youtube.com/watch?v=RVvUTayv2L8 -https://www.youtube.com/watch?v=mKx9CJ2B-Us -https://www.youtube.com/watch?v=6AWS08oFmO4 -https://www.youtube.com/watch?v=6VNQ91IAE1U -https://www.youtube.com/watch?v=YdITVnpEOiA -https://www.youtube.com/watch?v=z67zFn_E94g -https://www.youtube.com/watch?v=NduD3p7MugA -https://www.youtube.com/watch?v=aMPTcld2R50 -https://www.youtube.com/watch?v=4rsm7Xjv_Hw -https://www.youtube.com/watch?v=cKKi-s6xvGQ -https://www.youtube.com/watch?v=fa8yHVDha9A -https://www.youtube.com/watch?v=2ocd6oVbfSk -https://www.youtube.com/watch?v=xd6BSNRjS9s -https://www.youtube.com/watch?v=kXPkpDzTKUI -https://www.youtube.com/watch?v=TeTxAhmjZDc -https://www.youtube.com/watch?v=pIsznwhTFzA -https://www.youtube.com/watch?v=TX2duib2HvQ -https://www.youtube.com/watch?v=2lSOhcBa1cE -https://www.youtube.com/watch?v=aBHO9vEqeg0 -https://www.youtube.com/watch?v=6kNnsU01z6s -https://www.youtube.com/watch?v=AgXkWyfudCY -https://www.youtube.com/watch?v=63LvptDiyn0 -https://www.youtube.com/watch?v=mpls4RO2Sew -https://www.youtube.com/watch?v=FQQOy3gK0aM -https://www.youtube.com/watch?v=iElhQSrK_gQ -https://www.youtube.com/watch?v=06rJ3VU5XuY -https://www.youtube.com/watch?v=f1uAhp8G2iY -https://www.youtube.com/watch?v=rEolrC_dWCc -https://www.youtube.com/watch?v=XQBl29RFtRw -https://www.youtube.com/watch?v=gGmKewwq_G4 -https://www.youtube.com/watch?v=Ktv_YSp1T48 -https://www.youtube.com/watch?v=1s8kzflWSuI -https://www.youtube.com/watch?v=Yt7Gtls0Rpc -https://www.youtube.com/watch?v=0xHg1q2dybA -https://www.youtube.com/watch?v=D9l-LOBMYTA -https://www.youtube.com/watch?v=hw7fxmn8lJk -https://www.youtube.com/watch?v=uvvAIuFptXw -https://www.youtube.com/watch?v=ds2JXnt5xXs -https://www.youtube.com/watch?v=xkWirP51bFU -https://www.youtube.com/watch?v=-jkmgcbf2AQ -https://www.youtube.com/watch?v=mH8Qrii8EOQ -https://www.youtube.com/watch?v=367pqLOIXcM -https://www.youtube.com/watch?v=3jJDsO137Wc -https://www.youtube.com/watch?v=RDGRu99HgWQ -https://www.youtube.com/watch?v=l4W1EeLdpAg -https://www.youtube.com/watch?v=NjZVHcoxgjY -https://www.youtube.com/watch?v=5UncrLkE6rA -https://www.youtube.com/watch?v=01rfz1uMDP0 -https://www.youtube.com/watch?v=RN6a6FNGbUI -https://www.youtube.com/watch?v=WZIYfMpDXCY -https://www.youtube.com/watch?v=mvrpON9NTk4 -https://www.youtube.com/watch?v=ywM_kpE6x08 -https://www.youtube.com/watch?v=ygPlWXqHsig -https://www.youtube.com/watch?v=mAcwJmv-lCc -https://www.youtube.com/watch?v=IPFFvQDYaf4 -https://www.youtube.com/watch?v=zurAzTl_h38 -https://www.youtube.com/watch?v=fiauZn74bak -https://www.youtube.com/watch?v=uuiFxk428WI -https://www.youtube.com/watch?v=HqayuXwuL3w -https://www.youtube.com/watch?v=E6beeQe7NQA -https://www.youtube.com/watch?v=tKH2QqmekxA -https://www.youtube.com/watch?v=z1T1ObFO_P0 -https://www.youtube.com/watch?v=pk_wGZjE4ZM -https://www.youtube.com/watch?v=I8AFTCfTCNU -https://www.youtube.com/watch?v=ef7YgE16fko -https://www.youtube.com/watch?v=XX9gQ5ogYM0 -https://www.youtube.com/watch?v=jfwJxQHBuYQ -https://www.youtube.com/watch?v=19Rv6AuPQ2g -https://www.youtube.com/watch?v=0tIXPDwJves -https://www.youtube.com/watch?v=3YPohsVqHU0 -https://www.youtube.com/watch?v=mfJXdvV92jI -https://www.youtube.com/watch?v=mSemBOoh2Yo -https://www.youtube.com/watch?v=0l20ILvAwt4 -https://www.youtube.com/watch?v=qEYFI_z0K3E -https://www.youtube.com/watch?v=280uzhCRR7s -https://www.youtube.com/watch?v=UNJ3S8ivbTQ -https://www.youtube.com/watch?v=FJm_yAT4yDc -https://www.youtube.com/watch?v=skpWa0r4bUI -https://www.youtube.com/watch?v=W8PJbBAcyV0 -https://www.youtube.com/watch?v=TmJS6aj3-dw -https://www.youtube.com/watch?v=k_w_TzpeZxI -https://www.youtube.com/watch?v=K_F3yqv7CJE -https://www.youtube.com/watch?v=QXmlNXcMySE -https://www.youtube.com/watch?v=A1vAAOVmbP0 -https://www.youtube.com/watch?v=PaBqH6VD55E -https://www.youtube.com/watch?v=ucAoopysUAI -https://www.youtube.com/watch?v=vjshLjye-qE -https://www.youtube.com/watch?v=XgQBNjTQ3Is -https://www.youtube.com/watch?v=pF7-2QwXHKc -https://www.youtube.com/watch?v=jctNS45TWpU -https://www.youtube.com/watch?v=PKorRRGUyHw -https://www.youtube.com/watch?v=2v8TzB8-LoI -https://www.youtube.com/watch?v=3VatPXxfjaM -https://www.youtube.com/watch?v=omeKGD5FdIA -https://www.youtube.com/watch?v=u4zV6mXmmKs -https://www.youtube.com/watch?v=YYee20a-QqI -https://www.youtube.com/watch?v=SZ9mdXqtwd8 -https://www.youtube.com/watch?v=zKYRTjUywaY -https://www.youtube.com/watch?v=-9e4GCOOmp8 -https://www.youtube.com/watch?v=6xFLANoml_Q -https://www.youtube.com/watch?v=feMq5y8UB3Q -https://www.youtube.com/watch?v=qZ6tlyBMUqE -https://www.youtube.com/watch?v=fnNqGTWMYBM -https://www.youtube.com/watch?v=Ry8Z5solATY -https://www.youtube.com/watch?v=fnbiyF0buBU -https://www.youtube.com/watch?v=9WXa1LNNe2o -https://www.youtube.com/watch?v=z8UZZZxFeUc -https://www.youtube.com/watch?v=JXaJXwwOJ4Y -https://www.youtube.com/watch?v=uIWXP5jcBv4 -https://www.youtube.com/watch?v=bFyGBhVyiMo -https://www.youtube.com/watch?v=QTzobwv3Yw0 -https://www.youtube.com/watch?v=8Jbm7SL7s74 -https://www.youtube.com/watch?v=jdeZXWw3Nwo -https://www.youtube.com/watch?v=_KtVSsqSl4A -https://www.youtube.com/watch?v=j37CI3fhAwI -https://www.youtube.com/watch?v=IKoQFXPCVL4 -https://www.youtube.com/watch?v=b136ue2BxBo -https://www.youtube.com/watch?v=_p2DNkef5RM -https://www.youtube.com/watch?v=2vo7ix3_jYQ -https://www.youtube.com/watch?v=FuuMOebTHGQ -https://www.youtube.com/watch?v=ZBc6NpA__FY -https://www.youtube.com/watch?v=nP4Vzfp4U4g -https://www.youtube.com/watch?v=xYW0e8Vjpb4 -https://www.youtube.com/watch?v=YMEov5dJ3Ac -https://www.youtube.com/watch?v=2DYzzjwoqNM -https://www.youtube.com/watch?v=wO89RZgChCU -https://www.youtube.com/watch?v=hK4mdtUs9ZE -https://www.youtube.com/watch?v=QdlEzZKJFqc -https://www.youtube.com/watch?v=Fu0E9CLeHhU -https://www.youtube.com/watch?v=XJ_UDNWXYas -https://www.youtube.com/watch?v=2MYqr8atWKQ -https://www.youtube.com/watch?v=GjhNPl6S3cs -https://www.youtube.com/watch?v=GffJ3Yibndo -https://www.youtube.com/watch?v=SoIAPMCaDHI -https://www.youtube.com/watch?v=2IM9mIWhB0E -https://www.youtube.com/watch?v=qSS_73nlO1s -https://www.youtube.com/watch?v=S7RktiWIF5A -https://www.youtube.com/watch?v=eRGC_P3YV2s -https://www.youtube.com/watch?v=etnv3-g0aHk -https://www.youtube.com/watch?v=shW4hoJwg5c -https://www.youtube.com/watch?v=PokWotcGy8g -https://www.youtube.com/watch?v=xzvv4FnuSok -https://www.youtube.com/watch?v=bX5b98wVpoU -https://www.youtube.com/watch?v=51i8HuB2stg -https://www.youtube.com/watch?v=G2jFSnScYKs -https://www.youtube.com/watch?v=I6ImYrILAHU -https://www.youtube.com/watch?v=VbIR9XG6EFk -https://www.youtube.com/watch?v=1j__nRqiSmg -https://www.youtube.com/watch?v=TdGi6CgQuW8 -https://www.youtube.com/watch?v=1gQ99t8InuA -https://www.youtube.com/watch?v=Y-8nqbbFUWQ -https://www.youtube.com/watch?v=P9aHdSdql94 -https://www.youtube.com/watch?v=Omb0ipW_Ojo -https://www.youtube.com/watch?v=SKgjRyfg69Y -https://www.youtube.com/watch?v=h1iIlc-bdeM -https://www.youtube.com/watch?v=Bemw7-CrC-Q -https://www.youtube.com/watch?v=kc0VX29APG8 -https://www.youtube.com/watch?v=bN7Si1lY_Oo -https://www.youtube.com/watch?v=2NrgIhlGMss -https://www.youtube.com/watch?v=9nQ_Qs8gilE -https://www.youtube.com/watch?v=sFvJOjgA4bA -https://www.youtube.com/watch?v=w1biOJ2oKQw -https://www.youtube.com/watch?v=zF5cN9P5_aU -https://www.youtube.com/watch?v=_cqBSZPGwfw -https://www.youtube.com/watch?v=2T7hTrXKQIM -https://www.youtube.com/watch?v=icaEyu5gfbI -https://www.youtube.com/watch?v=6tIMyrKyupM -https://www.youtube.com/watch?v=lM4tb6fQ_nU -https://www.youtube.com/watch?v=_go14KzQA8A -https://www.youtube.com/watch?v=QgQFc4DL_yg -https://www.youtube.com/watch?v=BixPQs8sCuc -https://www.youtube.com/watch?v=s3jUI9QawCQ -https://www.youtube.com/watch?v=h31v70v8Usw -https://www.youtube.com/watch?v=QKbHkqK1gnk -https://www.youtube.com/watch?v=zGOQhVS8q_I -https://www.youtube.com/watch?v=W-DwBR0wHD8 -https://www.youtube.com/watch?v=en4Lom0HTVQ -https://www.youtube.com/watch?v=6-TM3WHpXk8 -https://www.youtube.com/watch?v=l-a1tra_LJY -https://www.youtube.com/watch?v=xEheck5jDss -https://www.youtube.com/watch?v=pxOjDtsSaPo -https://www.youtube.com/watch?v=ESDlq4Uza68 -https://www.youtube.com/watch?v=l3599LD9ot0 -https://www.youtube.com/watch?v=jo1Gcx33xg8 -https://www.youtube.com/watch?v=DcADJQWk9AE -https://www.youtube.com/watch?v=DV0mS4OiPv8 -https://www.youtube.com/watch?v=Lk_A8heCZUI -https://www.youtube.com/watch?v=PxMAWsZMmSQ -https://www.youtube.com/watch?v=6lZiUc4LLA8 -https://www.youtube.com/watch?v=Sa9-Is51Wn8 -https://www.youtube.com/watch?v=1RHu2Vfw3v0 -https://www.youtube.com/watch?v=aac2UFmgA-Y -https://www.youtube.com/watch?v=lrzdn6syTrM -https://www.youtube.com/watch?v=xUaZZ6Yr6KI -https://www.youtube.com/watch?v=Njm0h38ljqs -https://www.youtube.com/watch?v=npIwOKtj2yM -https://www.youtube.com/watch?v=sX45wetKJq4 -https://www.youtube.com/watch?v=E1wRBaIFY3c -https://www.youtube.com/watch?v=akmjJAfy0xM -https://www.youtube.com/watch?v=kB62dvTOFhA -https://www.youtube.com/watch?v=LX3VH1I9Qg4 -https://www.youtube.com/watch?v=wSod3-xDe90 -https://www.youtube.com/watch?v=xFHN_xIN-eU -https://www.youtube.com/watch?v=LTtc10Iom6o -https://www.youtube.com/watch?v=-bKFpzTM-MA -https://www.youtube.com/watch?v=BjTO2n_c5eQ -https://www.youtube.com/watch?v=G1dNIRpqZJg -https://www.youtube.com/watch?v=xr_Wgs0BHY8 -https://www.youtube.com/watch?v=zpvmWlkl74s -https://www.youtube.com/watch?v=-dNDoy2sA1c -https://www.youtube.com/watch?v=q55uhizppEk -https://www.youtube.com/watch?v=ms2xZYmhN7E -https://www.youtube.com/watch?v=I1mXIlzbTNQ -https://www.youtube.com/watch?v=OLgjdd4VYCU -https://www.youtube.com/watch?v=flBo_rqbRqc -https://www.youtube.com/watch?v=vgEke8PTzWo -https://www.youtube.com/watch?v=d9Npvs3YUEA -https://www.youtube.com/watch?v=nB8UexTjlts -https://www.youtube.com/watch?v=1YkCgyAttcY -https://www.youtube.com/watch?v=pNLXjgQbKVA -https://www.youtube.com/watch?v=UI9Ay3sP-Ic -https://www.youtube.com/watch?v=fAMtvFi6JBQ -https://www.youtube.com/watch?v=RSjVB0h5TD8 -https://www.youtube.com/watch?v=6PZAVZ4EZVE -https://www.youtube.com/watch?v=1enPX90IDjU -https://www.youtube.com/watch?v=PRHYrtgdz70 -https://www.youtube.com/watch?v=qkP5pjASL3o -https://www.youtube.com/watch?v=37zGf0w1Dug -https://www.youtube.com/watch?v=-2SoH9C0tbo -https://www.youtube.com/watch?v=P_1iWnR-tkA -https://www.youtube.com/watch?v=y4dQ6DTWURU -https://www.youtube.com/watch?v=4mPP8uWwxr8 -https://www.youtube.com/watch?v=7q8aLzdUXd8 -https://www.youtube.com/watch?v=CGoej1jmNbQ -https://www.youtube.com/watch?v=rHCqTxAEOOM -https://www.youtube.com/watch?v=LcGqzVfw85M -https://www.youtube.com/watch?v=iTxzkv8kEWI -https://www.youtube.com/watch?v=j1cRaYMlJmQ -https://www.youtube.com/watch?v=U_bg7CNrAZc -https://www.youtube.com/watch?v=rNUFnOO2rXs -https://www.youtube.com/watch?v=voi8bgF1Ijw -https://www.youtube.com/watch?v=3Xbyc8sbpCI -https://www.youtube.com/watch?v=W61OfSeoWBI -https://www.youtube.com/watch?v=RpcYEFBZMnY -https://www.youtube.com/watch?v=UdATWjLvZ10 -https://www.youtube.com/watch?v=1sPiqba_w-c -https://www.youtube.com/watch?v=NGLASxoC4SA -https://www.youtube.com/watch?v=LqYJRx81E6w -https://www.youtube.com/watch?v=kVQqM2w8sm4 -https://www.youtube.com/watch?v=rP4AjhcVIMQ -https://www.youtube.com/watch?v=Ptpl65Z_Q0M -https://www.youtube.com/watch?v=aVnv7Iy_QtE -https://www.youtube.com/watch?v=Kl7o4k6CfXw -https://www.youtube.com/watch?v=M6G1BAa8E9c -https://www.youtube.com/watch?v=fkNCVh4HIBA -https://www.youtube.com/watch?v=8gbRco5_2rg -https://www.youtube.com/watch?v=b2UIhsD2Cvg -https://www.youtube.com/watch?v=tqdQ6W-01FM -https://www.youtube.com/watch?v=Li4PMmYFjOg -https://www.youtube.com/watch?v=LM2_eKw2nw4 -https://www.youtube.com/watch?v=k8Z3_QVcxQA -https://www.youtube.com/watch?v=Sfo6fW-QJys -https://www.youtube.com/watch?v=c06bh8Gjtk8 -https://www.youtube.com/watch?v=mRx56SFQ8QA -https://www.youtube.com/watch?v=6QocZNp2Djo -https://www.youtube.com/watch?v=BbhrHJH4KRI -https://www.youtube.com/watch?v=lNCy7BO37hg -https://www.youtube.com/watch?v=ZF4t_BtIARA -https://www.youtube.com/watch?v=l3A4dHyW0Hk -https://www.youtube.com/watch?v=N5w-HfJdOIE -https://www.youtube.com/watch?v=vWttLvxA-08 -https://www.youtube.com/watch?v=TEGB8sgm5S4 -https://www.youtube.com/watch?v=dOB9cKSCXZo -https://www.youtube.com/watch?v=7fYHm-70kCs -https://www.youtube.com/watch?v=6do1PrNrhks -https://www.youtube.com/watch?v=lz2hNSk2Trc -https://www.youtube.com/watch?v=awCoKx8VNx0 -https://www.youtube.com/watch?v=_IC0X1jsWQg -https://www.youtube.com/watch?v=BfWfOZ8-4Vk -https://www.youtube.com/watch?v=5JxfZDHexes -https://www.youtube.com/watch?v=QKv2dlecwE4 -https://www.youtube.com/watch?v=bbFqfVHlOQs -https://www.youtube.com/watch?v=7FwB8-UtSTY -https://www.youtube.com/watch?v=Z6UZ2Est_Rk -https://www.youtube.com/watch?v=lsXrjvmhyGc -https://www.youtube.com/watch?v=yQmff118iG0 -https://www.youtube.com/watch?v=5zZCVYX46oA -https://www.youtube.com/watch?v=hakUmmty_Ls -https://www.youtube.com/watch?v=gO7qwdewGL8 -https://www.youtube.com/watch?v=N4bChET_zcE -https://www.youtube.com/watch?v=km06-0psWps -https://www.youtube.com/watch?v=vYG3_0iwY6Q -https://www.youtube.com/watch?v=NfW7ugCXq-E -https://www.youtube.com/watch?v=-N6YZYPjyG4 -https://www.youtube.com/watch?v=MTqLhhYDQsg -https://www.youtube.com/watch?v=iNnrMwskCt0 -https://www.youtube.com/watch?v=cy1hAIofwTA -https://www.youtube.com/watch?v=ssPwz0YCqsA -https://www.youtube.com/watch?v=nPG8_5O1urE -https://www.youtube.com/watch?v=M6sy9m93Kcc -https://www.youtube.com/watch?v=69m-cyniT0Y -https://www.youtube.com/watch?v=JPy5s_vVTFs -https://www.youtube.com/watch?v=KfrQlDAog3Q -https://www.youtube.com/watch?v=x-7MZ-iGoQk -https://www.youtube.com/watch?v=H7IlCQnvU0I -https://www.youtube.com/watch?v=Yy19s2yBsVU -https://www.youtube.com/watch?v=2G5XeSGDPyc -https://www.youtube.com/watch?v=B1oYCHvADZY -https://www.youtube.com/watch?v=fSH7eaRQuJU -https://www.youtube.com/watch?v=95rnBvsZbZU -https://www.youtube.com/watch?v=pKRvuwn9GfI -https://www.youtube.com/watch?v=il0j0nQnK20 -https://www.youtube.com/watch?v=x3aishDTjeI -https://www.youtube.com/watch?v=1oGsN1mVgsM -https://www.youtube.com/watch?v=mBVYIXRAkHM -https://www.youtube.com/watch?v=6Z51tXq54zc -https://www.youtube.com/watch?v=-_gyejedS14 -https://www.youtube.com/watch?v=FF5ttCY898w -https://www.youtube.com/watch?v=xCGaVzXoBvo -https://www.youtube.com/watch?v=qt7lOLl8GAc -https://www.youtube.com/watch?v=WrjgF6hjfxY -https://www.youtube.com/watch?v=7AyxprXTsIg -https://www.youtube.com/watch?v=SyR5yRwWtNo -https://www.youtube.com/watch?v=-WIOayHCDPo -https://www.youtube.com/watch?v=y0JtoZERkhQ -https://www.youtube.com/watch?v=lqdbxTKrTS8 -https://www.youtube.com/watch?v=KUuJH5WSN_c -https://www.youtube.com/watch?v=vGUHmGWFgiA -https://www.youtube.com/watch?v=zDuvYbU6giQ -https://www.youtube.com/watch?v=D-KvhXV9qdM -https://www.youtube.com/watch?v=-NAcOMzY2qk -https://www.youtube.com/watch?v=Dysjq3qtI1c -https://www.youtube.com/watch?v=U-zJ0YVOeac -https://www.youtube.com/watch?v=50F6NBek_uE -https://www.youtube.com/watch?v=yNpIqZkaGXs -https://www.youtube.com/watch?v=CrbeAD8S8hU -https://www.youtube.com/watch?v=b4dHqUw9s98 -https://www.youtube.com/watch?v=RxRwInWoNCA -https://www.youtube.com/watch?v=QO8aek7VgOw -https://www.youtube.com/watch?v=Yk1y0z0Lz-Q -https://www.youtube.com/watch?v=fndst7rrz90 -https://www.youtube.com/watch?v=_sROrMglc7s -https://www.youtube.com/watch?v=NT8WIkntDBQ -https://www.youtube.com/watch?v=8xO0nrxYhtU -https://www.youtube.com/watch?v=VZJSRmgH7Ww -https://www.youtube.com/watch?v=nsejs-Vj6VA -https://www.youtube.com/watch?v=_cRazf-kuh0 -https://www.youtube.com/watch?v=p_JrAn10AsE -https://www.youtube.com/watch?v=8fDFFLoY08s -https://www.youtube.com/watch?v=TKfE2xHLcJU -https://www.youtube.com/watch?v=zY1UHAhvZNY -https://www.youtube.com/watch?v=eJZKJUvWPV0 -https://www.youtube.com/watch?v=0REjPqvjFwc -https://www.youtube.com/watch?v=oMVoj6hkt7Q -https://www.youtube.com/watch?v=gog3F-MRaBA -https://www.youtube.com/watch?v=qdDWjoy4g4o -https://www.youtube.com/watch?v=fzVtPI0WTgQ -https://www.youtube.com/watch?v=dceFJd55ZRs -https://www.youtube.com/watch?v=fAUydLncMlQ -https://www.youtube.com/watch?v=6hcoparO0AY -https://www.youtube.com/watch?v=R-OXwsnZkhg -https://www.youtube.com/watch?v=HMyZGhCNWEs -https://www.youtube.com/watch?v=sATiSokDMsE -https://www.youtube.com/watch?v=XrghP79kpjU -https://www.youtube.com/watch?v=Bgo5eBUIG70 -https://www.youtube.com/watch?v=iCA8zfIS57s -https://www.youtube.com/watch?v=H1TZCRXBus4 -https://www.youtube.com/watch?v=quMnDQBXzA8 -https://www.youtube.com/watch?v=dbkiHJyEWEU -https://www.youtube.com/watch?v=NBp4QUN77eg -https://www.youtube.com/watch?v=HZyOQzDS678 -https://www.youtube.com/watch?v=teOOvPRSuc8 -https://www.youtube.com/watch?v=rC9-woyHDKw -https://www.youtube.com/watch?v=CzDZv3upREs -https://www.youtube.com/watch?v=xkz6y4-I538 -https://www.youtube.com/watch?v=hP6-1XpmvW4 -https://www.youtube.com/watch?v=nHOMo4L4j_Y -https://www.youtube.com/watch?v=TH2UrUpWakY -https://www.youtube.com/watch?v=Qs1mQA4TRbg -https://www.youtube.com/watch?v=jn-N9NEZybs -https://www.youtube.com/watch?v=3BZsAQC6Prw -https://www.youtube.com/watch?v=i90tbQDY3eo -https://www.youtube.com/watch?v=vmyi7L6xDdU -https://www.youtube.com/watch?v=Yd7P73qLD3c -https://www.youtube.com/watch?v=zOUsPwjhph8 -https://www.youtube.com/watch?v=37yUKFtz_BI -https://www.youtube.com/watch?v=WU8iL0D2nY4 -https://www.youtube.com/watch?v=MTrIygjBiWk -https://www.youtube.com/watch?v=pNp85sYhcEs -https://www.youtube.com/watch?v=M72QlvVS39I -https://www.youtube.com/watch?v=xQy30Iikl9g -https://www.youtube.com/watch?v=TQq6CE-_zF4 -https://www.youtube.com/watch?v=bStpz3_WPuU -https://www.youtube.com/watch?v=lC51HTZjQeY -https://www.youtube.com/watch?v=_gljvpWvjdY -https://www.youtube.com/watch?v=KZUFuALvKSc -https://www.youtube.com/watch?v=-Kr4Ft2fCDc -https://www.youtube.com/watch?v=KcwYIQ8AAOs -https://www.youtube.com/watch?v=c5hjPFgf1DU -https://www.youtube.com/watch?v=w7aabeoAIns -https://www.youtube.com/watch?v=rb5NKtj2fn0 -https://www.youtube.com/watch?v=aJFnftIgpVg -https://www.youtube.com/watch?v=0poCn5r1wDc -https://www.youtube.com/watch?v=ygfFlVvBu4o -https://www.youtube.com/watch?v=MSI6bKsLH0s -https://www.youtube.com/watch?v=HbJd4DGdRXo -https://www.youtube.com/watch?v=0u2UXvR4yVI -https://www.youtube.com/watch?v=0TIqoSR1Mfk -https://www.youtube.com/watch?v=nlYkvu2-8_8 -https://www.youtube.com/watch?v=y7uOkbMqEjI -https://www.youtube.com/watch?v=V5UrsV1PU3Y -https://www.youtube.com/watch?v=kmw1YaOkfjE -https://www.youtube.com/watch?v=dOuyoV-9aHQ -https://www.youtube.com/watch?v=E0NbvW2TFLg -https://www.youtube.com/watch?v=q-jUCA5Rm28 -https://www.youtube.com/watch?v=YrueIOPs5L0 -https://www.youtube.com/watch?v=IHkP6Syj82s -https://www.youtube.com/watch?v=iIFl1qX7CoY -https://www.youtube.com/watch?v=0cKhemqjgQ8 -https://www.youtube.com/watch?v=dcHvwaI5bAg -https://www.youtube.com/watch?v=Pezn6Ru5fi0 -https://www.youtube.com/watch?v=Wf_BnBmT5_E -https://www.youtube.com/watch?v=34Uc06b-yQ4 -https://www.youtube.com/watch?v=lnbpEfRQwyc -https://www.youtube.com/watch?v=Ilh90vut7jo -https://www.youtube.com/watch?v=CHc1Bu8A9QM -https://www.youtube.com/watch?v=73Ie5z5aBw8 -https://www.youtube.com/watch?v=k8PJC1YTelY -https://www.youtube.com/watch?v=iGZ-i6c2OGU -https://www.youtube.com/watch?v=Tj96vlZBPUs -https://www.youtube.com/watch?v=KDE2s9B1eDo -https://www.youtube.com/watch?v=d2E2x-BSKO0 -https://www.youtube.com/watch?v=Vvnq-f43v20 -https://www.youtube.com/watch?v=OP0TPyBjq7c -https://www.youtube.com/watch?v=sjZ-dbd7FQ0 -https://www.youtube.com/watch?v=Z1Z6KPzy0p4 -https://www.youtube.com/watch?v=vg0Ko4LnYh4 -https://www.youtube.com/watch?v=f0djA_P2rQ8 -https://www.youtube.com/watch?v=cC3q0aHIYCU -https://www.youtube.com/watch?v=LHcJTxccNXE -https://www.youtube.com/watch?v=JA7VdwocB6I -https://www.youtube.com/watch?v=Nhmlm15C4eA -https://www.youtube.com/watch?v=rY_suqSxWV8 -https://www.youtube.com/watch?v=pI29BFeSmuI -https://www.youtube.com/watch?v=KOgfzQxYIsI -https://www.youtube.com/watch?v=0D5Tc8Op_n4 -https://www.youtube.com/watch?v=CxcEWMHNePE -https://www.youtube.com/watch?v=obijefFzq7A -https://www.youtube.com/watch?v=WU5ULpORl3A -https://www.youtube.com/watch?v=IHZwGNq5tb4 -https://www.youtube.com/watch?v=EKFqRJUEjEA -https://www.youtube.com/watch?v=I4eb7P9atas -https://www.youtube.com/watch?v=lLxGNWraT4s -https://www.youtube.com/watch?v=zgr98z06KBA -https://www.youtube.com/watch?v=125ox0n1NeQ -https://www.youtube.com/watch?v=fH22LI57SHY -https://www.youtube.com/watch?v=sGTrqDst1Dc -https://www.youtube.com/watch?v=OBg_Li5ErIU -https://www.youtube.com/watch?v=sQIt0HHZ8fw -https://www.youtube.com/watch?v=M6zNwBAYKSM -https://www.youtube.com/watch?v=Qspv6ZH29FA -https://www.youtube.com/watch?v=WuHPgV2Yxbs -https://www.youtube.com/watch?v=hu8khi3clIY -https://www.youtube.com/watch?v=ds5tz7yWBCk -https://www.youtube.com/watch?v=A7xqGo4oeec -https://www.youtube.com/watch?v=FFJDWDhFybQ -https://www.youtube.com/watch?v=YPHA89-RDqg -https://www.youtube.com/watch?v=bUvul182gWs -https://www.youtube.com/watch?v=3gyX7S8dwq0 -https://www.youtube.com/watch?v=4wmE8_0odtY -https://www.youtube.com/watch?v=FpTyGD4M-Cs -https://www.youtube.com/watch?v=NinU97Irqi8 -https://www.youtube.com/watch?v=m3YF4PTVOWI -https://www.youtube.com/watch?v=IgytzDONMS8 -https://www.youtube.com/watch?v=ccOx041__PE -https://www.youtube.com/watch?v=Go_H4eFwDXk -https://www.youtube.com/watch?v=nw2U2V7o_cg -https://www.youtube.com/watch?v=idtYG-SAps4 -https://www.youtube.com/watch?v=O8tSaYoR_40 -https://www.youtube.com/watch?v=wFeSbJySSfA -https://www.youtube.com/watch?v=BaToTsaDkl4 -https://www.youtube.com/watch?v=Qnytk6loeJQ -https://www.youtube.com/watch?v=vr_mAzDD_fA -https://www.youtube.com/watch?v=Pc_EupD2jSA -https://www.youtube.com/watch?v=hku9k2Xvl6E -https://www.youtube.com/watch?v=tovKGVUwfcA -https://www.youtube.com/watch?v=uVW458DyeOM -https://www.youtube.com/watch?v=0Yijhh3Krb4 -https://www.youtube.com/watch?v=jLywttFVo54 -https://www.youtube.com/watch?v=LNI382U5WH0 -https://www.youtube.com/watch?v=4iGXG127GyE -https://www.youtube.com/watch?v=jEec64BueAw -https://www.youtube.com/watch?v=JQT8tGBCX3U -https://www.youtube.com/watch?v=gFqUAiDSLKc -https://www.youtube.com/watch?v=yO-hte4ubig -https://www.youtube.com/watch?v=OaCaeatGiHg -https://www.youtube.com/watch?v=9lKTASGr6aI -https://www.youtube.com/watch?v=_868vk4OPPA -https://www.youtube.com/watch?v=Z97QM0Yx6uk -https://www.youtube.com/watch?v=jVLhMwjkbf0 -https://www.youtube.com/watch?v=4oascqjiSJw -https://www.youtube.com/watch?v=Unf3oPkNIzw -https://www.youtube.com/watch?v=-G_-x2tOFCA -https://www.youtube.com/watch?v=pbc5kBSATt0 -https://www.youtube.com/watch?v=1MEbuepe0-I -https://www.youtube.com/watch?v=jWqjpi7hcSQ -https://www.youtube.com/watch?v=WWz-VmWIizU -https://www.youtube.com/watch?v=osNDWp5Lxs4 -https://www.youtube.com/watch?v=GRr_a8MjmMc -https://www.youtube.com/watch?v=sBgB3iMc0fU -https://www.youtube.com/watch?v=h7SGCGAwSY0 -https://www.youtube.com/watch?v=AxUVWd86vLA -https://www.youtube.com/watch?v=Wc_G3GAFyqA -https://www.youtube.com/watch?v=Cxj3ZRJ7NLY -https://www.youtube.com/watch?v=D_jP3ib6XQE -https://www.youtube.com/watch?v=ouFJwOQrPuM -https://www.youtube.com/watch?v=_xpQyzECcCY -https://www.youtube.com/watch?v=Z7k81eWYy_Y -https://www.youtube.com/watch?v=zj4X_Fyxc2o -https://www.youtube.com/watch?v=P30NDHxamLw -https://www.youtube.com/watch?v=VEa0RiU5aeU -https://www.youtube.com/watch?v=4QaomQeGxoo -https://www.youtube.com/watch?v=FSc6ZUeihNw -https://www.youtube.com/watch?v=b4Rc4ReDb0E -https://www.youtube.com/watch?v=Lpe9IxDT7Mg -https://www.youtube.com/watch?v=GOi9s_z1F1I -https://www.youtube.com/watch?v=Uo64UC9rTcE -https://www.youtube.com/watch?v=ddHwQHOtKZc -https://www.youtube.com/watch?v=S-L86KKvXCQ -https://www.youtube.com/watch?v=JjX-TRYRu4c -https://www.youtube.com/watch?v=2rrPI0swTN4 -https://www.youtube.com/watch?v=6cqdOXZ-p1k -https://www.youtube.com/watch?v=WgA9Mp5g3Y4 -https://www.youtube.com/watch?v=U0HUrZYx6ac -https://www.youtube.com/watch?v=IBcTbZEV4Fo -https://www.youtube.com/watch?v=AG8OpUezp7c -https://www.youtube.com/watch?v=ItFoZ46tqxQ -https://www.youtube.com/watch?v=GaxaPI3Gu28 -https://www.youtube.com/watch?v=lKT2fxbijRY -https://www.youtube.com/watch?v=cVA0grbN7hQ -https://www.youtube.com/watch?v=fHe2-hRyTmY -https://www.youtube.com/watch?v=U8rSmAOKTcQ -https://www.youtube.com/watch?v=aNnFRhE4j9Y -https://www.youtube.com/watch?v=sVnFzhkmtHk -https://www.youtube.com/watch?v=S3iZ3Tc-Hnc -https://www.youtube.com/watch?v=2GtgoycXGvY -https://www.youtube.com/watch?v=oWK6Udr2Nh4 -https://www.youtube.com/watch?v=kF5t6C41LEk -https://www.youtube.com/watch?v=s9Zp4qz4Vu0 -https://www.youtube.com/watch?v=9EOzw3C1BrM -https://www.youtube.com/watch?v=go17ulKugwQ -https://www.youtube.com/watch?v=f0XhiUgMPfA -https://www.youtube.com/watch?v=F73sFEJferQ -https://www.youtube.com/watch?v=OHI6BTcdMBw -https://www.youtube.com/watch?v=AichQ8Wk4ac -https://www.youtube.com/watch?v=CFx6yBhkciU -https://www.youtube.com/watch?v=VjLtkPPksUY -https://www.youtube.com/watch?v=ft07E4HZJ7Q -https://www.youtube.com/watch?v=mcF-bO_Odq4 -https://www.youtube.com/watch?v=64y2g7ahof4 -https://www.youtube.com/watch?v=aDsKWg_YM7s -https://www.youtube.com/watch?v=0fRpmAYWgJE -https://www.youtube.com/watch?v=Ddp9uGyjtt4 -https://www.youtube.com/watch?v=dx-9vznjBSg -https://www.youtube.com/watch?v=mQ7b1Q_KJfU -https://www.youtube.com/watch?v=0tJR95pgM3o -https://www.youtube.com/watch?v=iY3MxLzEQKI -https://www.youtube.com/watch?v=sSXyzOrkdfs -https://www.youtube.com/watch?v=s4RJntKPU1I -https://www.youtube.com/watch?v=zgqUFmzI54E -https://www.youtube.com/watch?v=woiJ0Pbo1b0 -https://www.youtube.com/watch?v=KgT9XrH94Nk -https://www.youtube.com/watch?v=oNxu5lt2efw -https://www.youtube.com/watch?v=E2l9L0m1EUA -https://www.youtube.com/watch?v=ZhJE6t9PlZM -https://www.youtube.com/watch?v=qQD13r8kMOQ -https://www.youtube.com/watch?v=oKe6LVmuan4 -https://www.youtube.com/watch?v=XJtLW-hPD7w -https://www.youtube.com/watch?v=q3vqX_pVTZs -https://www.youtube.com/watch?v=GjmR9wnd_Dw -https://www.youtube.com/watch?v=bhKcw0w8fNo -https://www.youtube.com/watch?v=2-p0IXWzGrc -https://www.youtube.com/watch?v=IQRoXoiJKQ4 -https://www.youtube.com/watch?v=gJl8gVq6Gpo -https://www.youtube.com/watch?v=oXAszTqnnYk -https://www.youtube.com/watch?v=HfihwpdJjBU -https://www.youtube.com/watch?v=r6dI7depuDc -https://www.youtube.com/watch?v=9Us8X93IpHI -https://www.youtube.com/watch?v=eP9ivGVw7kI -https://www.youtube.com/watch?v=A0deapksgiA -https://www.youtube.com/watch?v=ZLpoV5OTtZk -https://www.youtube.com/watch?v=k4jWWF4BetA -https://www.youtube.com/watch?v=35KOQ4uRIRs -https://www.youtube.com/watch?v=gJ7ROX5Y8hc -https://www.youtube.com/watch?v=tHbkzGWUZog -https://www.youtube.com/watch?v=WWd3g6tHso8 -https://www.youtube.com/watch?v=0663OUVIskw -https://www.youtube.com/watch?v=oy-JVwHPaUI -https://www.youtube.com/watch?v=y6G8S8lOUrA -https://www.youtube.com/watch?v=W78no98uQ-U -https://www.youtube.com/watch?v=BA2PP-MGGwE -https://www.youtube.com/watch?v=tTDpFJ1uuPg -https://www.youtube.com/watch?v=0BfC_7PcJB0 -https://www.youtube.com/watch?v=0h02m6TUzf8 -https://www.youtube.com/watch?v=siHWunTXQ0A -https://www.youtube.com/watch?v=BYC1R04F2Hk -https://www.youtube.com/watch?v=7w-ozZaEyP8 -https://www.youtube.com/watch?v=o7dKrpL7BpM -https://www.youtube.com/watch?v=IQyYk7FFCbw -https://www.youtube.com/watch?v=f_Gqgl6cFr0 -https://www.youtube.com/watch?v=G3QblS_YOms -https://www.youtube.com/watch?v=DoLGVksNpdM -https://www.youtube.com/watch?v=TT4yzCS4PeY -https://www.youtube.com/watch?v=RIGkw6nxLcc -https://www.youtube.com/watch?v=cWJgvi3UelE -https://www.youtube.com/watch?v=crtRlS9I3ro -https://www.youtube.com/watch?v=6dvBUPqj59c -https://www.youtube.com/watch?v=IfI6GnYGM0I -https://www.youtube.com/watch?v=QnLmgFuVg6w -https://www.youtube.com/watch?v=reouaReX7C4 -https://www.youtube.com/watch?v=s1pLvWNsh44 -https://www.youtube.com/watch?v=smmhlt3_lws -https://www.youtube.com/watch?v=QgTKDo-NbU8 -https://www.youtube.com/watch?v=rOvksymiKqs -https://www.youtube.com/watch?v=l1rVxcAzh1M -https://www.youtube.com/watch?v=_fFuQXF-t3k -https://www.youtube.com/watch?v=BH7R_yuIFKQ -https://www.youtube.com/watch?v=-s89hDTLSjs -https://www.youtube.com/watch?v=4IMfpToOWSo -https://www.youtube.com/watch?v=B0oMlBTEh1U -https://www.youtube.com/watch?v=ylNM15gPhsk -https://www.youtube.com/watch?v=M8ONaAxZ3MU -https://www.youtube.com/watch?v=V51CE9jO4O0 -https://www.youtube.com/watch?v=R1Lo5BZtlQw -https://www.youtube.com/watch?v=uqceJK_Iao4 -https://www.youtube.com/watch?v=XeSUzWSVk9o -https://www.youtube.com/watch?v=bhenb5eG--I -https://www.youtube.com/watch?v=dpTjA8MrLhI -https://www.youtube.com/watch?v=G2yTkeufjds -https://www.youtube.com/watch?v=o-R_DGU9hpM -https://www.youtube.com/watch?v=L2KUBIDfscI -https://www.youtube.com/watch?v=uLr35eZdFTA -https://www.youtube.com/watch?v=2zIvsCI88hI -https://www.youtube.com/watch?v=zOsscfwC0Mg -https://www.youtube.com/watch?v=c2kh0n2dD7Q -https://www.youtube.com/watch?v=U2nd48Sl3T4 -https://www.youtube.com/watch?v=WD99krTKdF4 -https://www.youtube.com/watch?v=gfcFr3jruGg -https://www.youtube.com/watch?v=fsnTn1XqBic -https://www.youtube.com/watch?v=SwyLoyzuKm0 -https://www.youtube.com/watch?v=pDBToCyX1JE -https://www.youtube.com/watch?v=JNnDcvOlWJc -https://www.youtube.com/watch?v=7FVXJ9nSJhw -https://www.youtube.com/watch?v=j_ndVX1nTWo -https://www.youtube.com/watch?v=twN8USkFwaI -https://www.youtube.com/watch?v=N4P7vW2JA80 -https://www.youtube.com/watch?v=YzOTPqp0DQM -https://www.youtube.com/watch?v=aMpYQZTwIi8 -https://www.youtube.com/watch?v=H_lzWZl-VEk -https://www.youtube.com/watch?v=OXVeAoTdjHA -https://www.youtube.com/watch?v=17Oin61Mqc4 -https://www.youtube.com/watch?v=Cd0ciobBDcM -https://www.youtube.com/watch?v=rgU_5zaM0GU -https://www.youtube.com/watch?v=ll9LATC7_Ec -https://www.youtube.com/watch?v=K89lQsMwKpk -https://www.youtube.com/watch?v=mWcuuWJxLBs -https://www.youtube.com/watch?v=sc9q7JKLAmk -https://www.youtube.com/watch?v=bsu_n3Kljrw -https://www.youtube.com/watch?v=AWT5Itr4iIM -https://www.youtube.com/watch?v=TUbEiHEhahQ -https://www.youtube.com/watch?v=85CB_SgFNE8 -https://www.youtube.com/watch?v=rZeEP1suY7Q -https://www.youtube.com/watch?v=aNzVTdaHlpY -https://www.youtube.com/watch?v=QhWVwhe088c -https://www.youtube.com/watch?v=Rrjgb8ug144 -https://www.youtube.com/watch?v=ZS1VzG8pMz0 -https://www.youtube.com/watch?v=dwqX1IOBO1s -https://www.youtube.com/watch?v=gzq_ipnKzHc -https://www.youtube.com/watch?v=nrbiBE-GOVE -https://www.youtube.com/watch?v=DiPGzyMGw_A -https://www.youtube.com/watch?v=E827rlTAs3o -https://www.youtube.com/watch?v=sZ7HVucYRFM -https://www.youtube.com/watch?v=DJObJpK_wwU -https://www.youtube.com/watch?v=gXj3a_Ldagc -https://www.youtube.com/watch?v=8K5-CDvnefA -https://www.youtube.com/watch?v=AmphO8KrvZU -https://www.youtube.com/watch?v=7cDa5ZVXyRo -https://www.youtube.com/watch?v=NrFI656OE04 -https://www.youtube.com/watch?v=-dfb8v4dyts -https://www.youtube.com/watch?v=XHSL4tSuT-0 -https://www.youtube.com/watch?v=LUWic9zqq1M -https://www.youtube.com/watch?v=9X04dHWyu3Y -https://www.youtube.com/watch?v=FZmutKzxPhM -https://www.youtube.com/watch?v=Qznqxnn6YM0 -https://www.youtube.com/watch?v=CJzeNfEkEdE -https://www.youtube.com/watch?v=Rc2SS3dIFkk -https://www.youtube.com/watch?v=fYNqL3tgPj0 -https://www.youtube.com/watch?v=VC42A4FZSWw -https://www.youtube.com/watch?v=tew6A1xC5G4 -https://www.youtube.com/watch?v=06x_heN_39g -https://www.youtube.com/watch?v=cPOXcOs2cGg -https://www.youtube.com/watch?v=PyRvVKbbO68 -https://www.youtube.com/watch?v=ggOvUskvz9k -https://www.youtube.com/watch?v=BozqO_Ogy5w -https://www.youtube.com/watch?v=wmdb0RzvG7c -https://www.youtube.com/watch?v=FaxJYMPJ-pg -https://www.youtube.com/watch?v=ZpMzEGafoNw -https://www.youtube.com/watch?v=MP5VOBvP1Fg -https://www.youtube.com/watch?v=9EHHxVYRjiY -https://www.youtube.com/watch?v=0ApIbLKs-JU -https://www.youtube.com/watch?v=TWGRgM4fk4M -https://www.youtube.com/watch?v=eevPwoV7RvA -https://www.youtube.com/watch?v=brisSYFS31M -https://www.youtube.com/watch?v=SazhBlQTUhU -https://www.youtube.com/watch?v=3fscCoTP1iU -https://www.youtube.com/watch?v=pVwrZXsiCgE -https://www.youtube.com/watch?v=ztYTv43GvOg -https://www.youtube.com/watch?v=gPnyId2TCyQ -https://www.youtube.com/watch?v=RugwiGqJD60 -https://www.youtube.com/watch?v=DAdf7ikSE2M -https://www.youtube.com/watch?v=zHlWqwuktK8 -https://www.youtube.com/watch?v=t1JHMHPLJ54 -https://www.youtube.com/watch?v=iZC1L2uf-2I -https://www.youtube.com/watch?v=gnyv8uapleQ -https://www.youtube.com/watch?v=HH-oQrhhbgY -https://www.youtube.com/watch?v=8rOfqKXa7F4 -https://www.youtube.com/watch?v=yOGlDAJZH2M -https://www.youtube.com/watch?v=JngRtwK7gy4 -https://www.youtube.com/watch?v=zZ-oreEX2qo -https://www.youtube.com/watch?v=j690eWDthBo -https://www.youtube.com/watch?v=HBqG4BtYxRo -https://www.youtube.com/watch?v=q2NzF5hj1yw -https://www.youtube.com/watch?v=_Pnm5SJA4E0 -https://www.youtube.com/watch?v=mWKP6lmKtew -https://www.youtube.com/watch?v=ktCKwqFdaUU -https://www.youtube.com/watch?v=V4x5yOm9BFk -https://www.youtube.com/watch?v=uEIDIppWvAY -https://www.youtube.com/watch?v=R5KOBpyJmVs -https://www.youtube.com/watch?v=JFzm9o9l-4Y -https://www.youtube.com/watch?v=uf4ybmScHUQ -https://www.youtube.com/watch?v=fjLNYgfoYqs -https://www.youtube.com/watch?v=IWVscbhcsho -https://www.youtube.com/watch?v=zDSLWV9KypM -https://www.youtube.com/watch?v=vZyB77TU1zU -https://www.youtube.com/watch?v=608BY6toahw -https://www.youtube.com/watch?v=WHGPRC2K7mY -https://www.youtube.com/watch?v=3pEd0wcg_C4 -https://www.youtube.com/watch?v=AtzcGUWPwII -https://www.youtube.com/watch?v=ePUebx75iRQ -https://www.youtube.com/watch?v=IWdM9uq3ikk -https://www.youtube.com/watch?v=TeYCOtEecYU -https://www.youtube.com/watch?v=ncOWfpeYXFY -https://www.youtube.com/watch?v=x1RUjs9k-1c -https://www.youtube.com/watch?v=hS5VMbTEd1c -https://www.youtube.com/watch?v=XkMM9BQBDCQ -https://www.youtube.com/watch?v=zh-rpPHJWG4 -https://www.youtube.com/watch?v=bnVm7K8e1WY -https://www.youtube.com/watch?v=o8nHQ3JXnaY -https://www.youtube.com/watch?v=OLC4IeYEEjI -https://www.youtube.com/watch?v=2sE2osCi4JU -https://www.youtube.com/watch?v=lHFz0A8uiec -https://www.youtube.com/watch?v=H4zDTRO9Z58 -https://www.youtube.com/watch?v=IiDRZtiSqiM -https://www.youtube.com/watch?v=Ao7U1fB_yP8 -https://www.youtube.com/watch?v=qe6vAkiGXac -https://www.youtube.com/watch?v=QWsUmDesiJc -https://www.youtube.com/watch?v=ZLMHYYFGT4g -https://www.youtube.com/watch?v=dXKBgnRpfj4 -https://www.youtube.com/watch?v=CFyw49Fk_4U -https://www.youtube.com/watch?v=qd0Ksf1gO14 -https://www.youtube.com/watch?v=taUnDYQ6UI8 -https://www.youtube.com/watch?v=L7HY6r3C9T0 -https://www.youtube.com/watch?v=sTl6GtA20_M -https://www.youtube.com/watch?v=diL-9EB0TyM -https://www.youtube.com/watch?v=ymIOTE1PjZg -https://www.youtube.com/watch?v=_ZbtbsVaOPU -https://www.youtube.com/watch?v=9sGDtfUiktk -https://www.youtube.com/watch?v=vlVzSCef55I -https://www.youtube.com/watch?v=Y7tG3GrQvJ4 -https://www.youtube.com/watch?v=9JyI1FWv5yM -https://www.youtube.com/watch?v=obKqJaSnQK0 -https://www.youtube.com/watch?v=XR426H2wVgU -https://www.youtube.com/watch?v=Vu2C1pbIFWA -https://www.youtube.com/watch?v=XA4Ile920PA -https://www.youtube.com/watch?v=6qHXBy9EJvQ -https://www.youtube.com/watch?v=0WdmMfk0e6Q -https://www.youtube.com/watch?v=andVPsILYqw -https://www.youtube.com/watch?v=4Ttoi0W-wAM -https://www.youtube.com/watch?v=TID0TW7QRm0 -https://www.youtube.com/watch?v=p4L5_JfbL7w -https://www.youtube.com/watch?v=DUj29F5ZhEE -https://www.youtube.com/watch?v=9T8dkxDC42c -https://www.youtube.com/watch?v=ECCQhQTZ1SE -https://www.youtube.com/watch?v=JfrYNOJMvRg -https://www.youtube.com/watch?v=FJcrRD8yI4E -https://www.youtube.com/watch?v=N85iiQTKsXw -https://www.youtube.com/watch?v=Yyc6vE-0OgI -https://www.youtube.com/watch?v=-z9Kye8DBM0 -https://www.youtube.com/watch?v=ulSEa7ZOhWA -https://www.youtube.com/watch?v=SE52l6y7slI -https://www.youtube.com/watch?v=oErU3HKAilo -https://www.youtube.com/watch?v=NEYsomAIa1g -https://www.youtube.com/watch?v=4H5fsY5_MiA -https://www.youtube.com/watch?v=Lo5z_qGuQSI -https://www.youtube.com/watch?v=V2jfRLJsMCE -https://www.youtube.com/watch?v=aZxQruhKMDY -https://www.youtube.com/watch?v=ZMRcCmN25sA -https://www.youtube.com/watch?v=O-XsiSCIK7c -https://www.youtube.com/watch?v=-wl21uhUEx4 -https://www.youtube.com/watch?v=WfbrXc-EW54 -https://www.youtube.com/watch?v=M0yCTg5daZI -https://www.youtube.com/watch?v=UhT2sev4HK8 -https://www.youtube.com/watch?v=4v1AraoTz3w -https://www.youtube.com/watch?v=zTTU1MEiIbY -https://www.youtube.com/watch?v=pHb2ZMXtAiY -https://www.youtube.com/watch?v=mDUf4T5lv8E -https://www.youtube.com/watch?v=-OYTrvsnRSQ -https://www.youtube.com/watch?v=HR-iLG6B8ds -https://www.youtube.com/watch?v=Gjtg8xAMbdU -https://www.youtube.com/watch?v=AKWX1wR4sy4 -https://www.youtube.com/watch?v=TTmAZRt8nK0 -https://www.youtube.com/watch?v=o242B-tnea8 -https://www.youtube.com/watch?v=wg7AyXZYbrs -https://www.youtube.com/watch?v=VSbWDAER9_A -https://www.youtube.com/watch?v=k20mrsFg9-Y -https://www.youtube.com/watch?v=vtkwDMx-tV8 -https://www.youtube.com/watch?v=4dvawM7D0hI -https://www.youtube.com/watch?v=eD0JzemrL20 -https://www.youtube.com/watch?v=etqVGZkJ2Bg -https://www.youtube.com/watch?v=LEactpOVdSM -https://www.youtube.com/watch?v=IAro_PZ6BQo -https://www.youtube.com/watch?v=RbW6o64ROvo -https://www.youtube.com/watch?v=RRoinZOWSS4 -https://www.youtube.com/watch?v=KQCfiswxjZI -https://www.youtube.com/watch?v=WqXw1Fpqpg8 -https://www.youtube.com/watch?v=WdueCBIyAfQ -https://www.youtube.com/watch?v=r1KdHltNgaY -https://www.youtube.com/watch?v=RryMChxFH58 -https://www.youtube.com/watch?v=A8wuUQt1Wik -https://www.youtube.com/watch?v=rCof-kGXOhk -https://www.youtube.com/watch?v=XX8ZxKFqoms -https://www.youtube.com/watch?v=T28iUbhDyZc -https://www.youtube.com/watch?v=F94negmd_Zg -https://www.youtube.com/watch?v=GjKANaIYvio -https://www.youtube.com/watch?v=XHI4mfWv0NU -https://www.youtube.com/watch?v=IKF_ZKhrxd4 -https://www.youtube.com/watch?v=al6SGQh0e0w -https://www.youtube.com/watch?v=SSCnx2HXSSw -https://www.youtube.com/watch?v=wSL896yNi_g -https://www.youtube.com/watch?v=XHugXebbTkw -https://www.youtube.com/watch?v=TtWI3cg-97I -https://www.youtube.com/watch?v=CIi5Khk0w-E -https://www.youtube.com/watch?v=5zitlGXm7i4 -https://www.youtube.com/watch?v=RTGPniQnj-4 -https://www.youtube.com/watch?v=PygTOgWy9NM -https://www.youtube.com/watch?v=08jdDCM1Jt4 -https://www.youtube.com/watch?v=s5mWxWlfmpE -https://www.youtube.com/watch?v=Oz5TobEFC_M -https://www.youtube.com/watch?v=jbdsSlo6EeE -https://www.youtube.com/watch?v=sUeJ4XDEQPA -https://www.youtube.com/watch?v=-QNIsfH1W6o -https://www.youtube.com/watch?v=khefdNkdrUA -https://www.youtube.com/watch?v=RerdE3DEPZk -https://www.youtube.com/watch?v=axSJE2GJCMY -https://www.youtube.com/watch?v=TalhxWW_AWo -https://www.youtube.com/watch?v=wlYmRMxqrKs -https://www.youtube.com/watch?v=tQtalMb-yi0 -https://www.youtube.com/watch?v=N2zw3_qroes -https://www.youtube.com/watch?v=f2ul_dsl4YM -https://www.youtube.com/watch?v=Dn-l3hv54dg -https://www.youtube.com/watch?v=Tmk-kKInF1Y -https://www.youtube.com/watch?v=dN6FD2RQXxQ -https://www.youtube.com/watch?v=BZKnqQYvwcE -https://www.youtube.com/watch?v=YB0P9HLgQuI -https://www.youtube.com/watch?v=LCP6qXwwOM4 -https://www.youtube.com/watch?v=c7xf3Ojj7KU -https://www.youtube.com/watch?v=gUg5WFE1UQg -https://www.youtube.com/watch?v=oG4j-rydlt0 -https://www.youtube.com/watch?v=NqKi-3E9W1Y -https://www.youtube.com/watch?v=mcFkcokKCNo -https://www.youtube.com/watch?v=U9pvsdkU3OE -https://www.youtube.com/watch?v=irL__2bmVBI -https://www.youtube.com/watch?v=aBwkS_wA9nM -https://www.youtube.com/watch?v=JBM5DEEyL-o -https://www.youtube.com/watch?v=H5rMXNG3gyc -https://www.youtube.com/watch?v=Y-03XzLAUDw -https://www.youtube.com/watch?v=xsHtpbobVkM -https://www.youtube.com/watch?v=hITnd7BSG-U -https://www.youtube.com/watch?v=LdKGVfUmpYk -https://www.youtube.com/watch?v=QtBGTQZYNyk -https://www.youtube.com/watch?v=BNF8-3G8vas -https://www.youtube.com/watch?v=B6aT-u__gJo -https://www.youtube.com/watch?v=oIGTEmHwjLg -https://www.youtube.com/watch?v=0VmIV7q34fM -https://www.youtube.com/watch?v=0dSCNkc1va4 -https://www.youtube.com/watch?v=98lsZ-wo1ek -https://www.youtube.com/watch?v=8U8nVbfNxlg -https://www.youtube.com/watch?v=9VVVNWk92d4 -https://www.youtube.com/watch?v=Ci04JrlD4us -https://www.youtube.com/watch?v=mx-AwWwdQKQ -https://www.youtube.com/watch?v=miNPNWSCQE0 -https://www.youtube.com/watch?v=FaJshsUefyQ -https://www.youtube.com/watch?v=TdvGEDxdsv4 -https://www.youtube.com/watch?v=1Hrb0g9LTsY -https://www.youtube.com/watch?v=1gERK4_AMgU -https://www.youtube.com/watch?v=6F36ChawCh8 -https://www.youtube.com/watch?v=7fCT8l5PA-0 -https://www.youtube.com/watch?v=35qUGqnd3nE -https://www.youtube.com/watch?v=SCQenAiVYBA -https://www.youtube.com/watch?v=MhGh_mCqrJU -https://www.youtube.com/watch?v=bOdjMBx60R0 -https://www.youtube.com/watch?v=N8gNk6u7nYQ -https://www.youtube.com/watch?v=Y0_-D9Ox9ng -https://www.youtube.com/watch?v=1GPTxwVbwvY -https://www.youtube.com/watch?v=Ewd0UuOEjPY -https://www.youtube.com/watch?v=Qw5oiYJFuSk -https://www.youtube.com/watch?v=55EaCTJ1pmE -https://www.youtube.com/watch?v=cuSZk8Pa8OI -https://www.youtube.com/watch?v=Xmu8YF0Bjyw -https://www.youtube.com/watch?v=ObyY3uEp7pA -https://www.youtube.com/watch?v=SB2092RkzhQ -https://www.youtube.com/watch?v=Xt-GvsySsOM -https://www.youtube.com/watch?v=FdyYRbOf0mQ -https://www.youtube.com/watch?v=9j4iaA9FcMg -https://www.youtube.com/watch?v=9Qi5sQpZGME -https://www.youtube.com/watch?v=d5qyho0ensI -https://www.youtube.com/watch?v=smlSGLoB8eI -https://www.youtube.com/watch?v=fJeDDk9Nzpk -https://www.youtube.com/watch?v=LfXLNUNqCOY -https://www.youtube.com/watch?v=ZEEY7AEA3z8 -https://www.youtube.com/watch?v=lEXGzNQtVGw -https://www.youtube.com/watch?v=MXv1fBNlMB0 -https://www.youtube.com/watch?v=7p5MNHXFTik -https://www.youtube.com/watch?v=d_4APkAy8Zk -https://www.youtube.com/watch?v=l6DpX3Bszh8 -https://www.youtube.com/watch?v=rc01sE8tfjo -https://www.youtube.com/watch?v=DY8BBLB_OuY -https://www.youtube.com/watch?v=NaJxE3R6fZU -https://www.youtube.com/watch?v=TeWVHkxx4C4 -https://www.youtube.com/watch?v=vbfYhTkX2FE -https://www.youtube.com/watch?v=TJmm555VGaU -https://www.youtube.com/watch?v=aunY-e-Jim8 -https://www.youtube.com/watch?v=HnLAm7OeJZc -https://www.youtube.com/watch?v=BTihVgB3j5U -https://www.youtube.com/watch?v=5Fh8bkdgIvM -https://www.youtube.com/watch?v=T9aj6Hc7tCI -https://www.youtube.com/watch?v=bJNjCcAhajI -https://www.youtube.com/watch?v=7dfLVKKRQ2U -https://www.youtube.com/watch?v=zXZSnUKaQt8 -https://www.youtube.com/watch?v=i6ZSMTAD1fQ -https://www.youtube.com/watch?v=koczyZ9jaGA -https://www.youtube.com/watch?v=uatoQB05WBY -https://www.youtube.com/watch?v=Xk1cUTg-1CY -https://www.youtube.com/watch?v=kGZS4pyqzOM -https://www.youtube.com/watch?v=jbyfwRF3qt8 -https://www.youtube.com/watch?v=i8VG_QT81cI -https://www.youtube.com/watch?v=0j8h1RiYMRA -https://www.youtube.com/watch?v=bhAej3dGUf8 -https://www.youtube.com/watch?v=KVMWvlI_Clg -https://www.youtube.com/watch?v=1zZwJRC7MRU -https://www.youtube.com/watch?v=e3nXSZAqVuM -https://www.youtube.com/watch?v=g_z_q-ylqBs -https://www.youtube.com/watch?v=TUklrg9ecVU -https://www.youtube.com/watch?v=uwwGeFOwabw -https://www.youtube.com/watch?v=zNyTvA0rMVs -https://www.youtube.com/watch?v=1xCZZGySdjY -https://www.youtube.com/watch?v=WoDEdNe_JDg -https://www.youtube.com/watch?v=-r01Nyjmltc -https://www.youtube.com/watch?v=ilN94X3Qmug -https://www.youtube.com/watch?v=3Zi2-g42lio -https://www.youtube.com/watch?v=MiLjhVl2FCY -https://www.youtube.com/watch?v=oZ9bdUqbKIU -https://www.youtube.com/watch?v=1mCjN-v4jvo -https://www.youtube.com/watch?v=oiRQ8weGi88 -https://www.youtube.com/watch?v=IVYU6O6uFJQ -https://www.youtube.com/watch?v=IGZGomDqxCE -https://www.youtube.com/watch?v=qlc7mEH_BHs -https://www.youtube.com/watch?v=GBcCT2bKgjY -https://www.youtube.com/watch?v=Gd3OZTuPeuc -https://www.youtube.com/watch?v=UQftIucKJyE -https://www.youtube.com/watch?v=viPomNeDwb4 -https://www.youtube.com/watch?v=eL3kOZqhl1Y -https://www.youtube.com/watch?v=0qo2q_lJpyc -https://www.youtube.com/watch?v=aNQS7XwI4uw -https://www.youtube.com/watch?v=lTa_0IrgL0k -https://www.youtube.com/watch?v=Nqvu51Z_uLc -https://www.youtube.com/watch?v=QEZ30dVgqyE -https://www.youtube.com/watch?v=G_4jcOwjjOI -https://www.youtube.com/watch?v=EXs553jIi-M -https://www.youtube.com/watch?v=e56_vnN12G4 -https://www.youtube.com/watch?v=uBfhO0pMiv0 -https://www.youtube.com/watch?v=ehG4FOaLqS0 -https://www.youtube.com/watch?v=tQ6qgJwN1m8 -https://www.youtube.com/watch?v=Qs8kAzVmaCA -https://www.youtube.com/watch?v=8ucB_qu1Inw -https://www.youtube.com/watch?v=OuiJhSc7r74 -https://www.youtube.com/watch?v=o4ozyDZugJc -https://www.youtube.com/watch?v=io4Ym8HqI2o -https://www.youtube.com/watch?v=bpstDSeSls4 -https://www.youtube.com/watch?v=vn9I2-twWKE -https://www.youtube.com/watch?v=-LZeoEcLXlk -https://www.youtube.com/watch?v=-I1qtCjwaBE -https://www.youtube.com/watch?v=AR3YQYvmMwE -https://www.youtube.com/watch?v=eQT4GZ_EqhQ -https://www.youtube.com/watch?v=i7voHoNZM9M -https://www.youtube.com/watch?v=Op1UJ5zciPk -https://www.youtube.com/watch?v=wMVX8tpg4nA -https://www.youtube.com/watch?v=izjTgLIYYs0 -https://www.youtube.com/watch?v=Pu1nRr-iZ6A -https://www.youtube.com/watch?v=PGex6tbXQPE -https://www.youtube.com/watch?v=H7HR796PVWg -https://www.youtube.com/watch?v=WZJccvWFoNo -https://www.youtube.com/watch?v=JLibrVW9_T4 -https://www.youtube.com/watch?v=7K1FnxElCug -https://www.youtube.com/watch?v=_iZ47vuEtaY -https://www.youtube.com/watch?v=5aafY8hGrUc -https://www.youtube.com/watch?v=3oxXxruuuuM -https://www.youtube.com/watch?v=eXRuojc_GLA -https://www.youtube.com/watch?v=d_qfc4o1RUI -https://www.youtube.com/watch?v=pvB1S3KVFhE -https://www.youtube.com/watch?v=_1X7caqbvkA -https://www.youtube.com/watch?v=XmjibdYAvLo -https://www.youtube.com/watch?v=aGsl9NGHh-E -https://www.youtube.com/watch?v=R_Z4sTgCK4k -https://www.youtube.com/watch?v=v7KHGsg6w4s -https://www.youtube.com/watch?v=aX6aGQufM3w -https://www.youtube.com/watch?v=yQZ94f7WMhc -https://www.youtube.com/watch?v=6i975zYY6eE -https://www.youtube.com/watch?v=RbbFCphAh0c -https://www.youtube.com/watch?v=wN_S8q03iOI -https://www.youtube.com/watch?v=GlhNWhIxzOE -https://www.youtube.com/watch?v=N7GlgQjNEOw -https://www.youtube.com/watch?v=Dyo7vQvXW_w -https://www.youtube.com/watch?v=1vHGsrpdRT8 -https://www.youtube.com/watch?v=hQhAIi1TT3s -https://www.youtube.com/watch?v=uJ5DmRIYGNo -https://www.youtube.com/watch?v=zT7ScFhOymY -https://www.youtube.com/watch?v=8SP-XcTntTI -https://www.youtube.com/watch?v=Wh8UZ0DNmmc -https://www.youtube.com/watch?v=IMo-p2hQVcc -https://www.youtube.com/watch?v=yHJ50ZRTN3E -https://www.youtube.com/watch?v=25JPZbxyfn4 -https://www.youtube.com/watch?v=BzqkagVVqN8 -https://www.youtube.com/watch?v=IBdX5dleUMg -https://www.youtube.com/watch?v=nAqV1-LO8n0 -https://www.youtube.com/watch?v=SFFe8JThkeg -https://www.youtube.com/watch?v=Kr5xy7Pqe9s -https://www.youtube.com/watch?v=r1kBEMb3wx8 -https://www.youtube.com/watch?v=Nc-iL85MTs8 -https://www.youtube.com/watch?v=ULC2AExXG4k -https://www.youtube.com/watch?v=kPoBOD3qbR8 -https://www.youtube.com/watch?v=_lZ91R7KZpQ -https://www.youtube.com/watch?v=rCpUpTozlbE -https://www.youtube.com/watch?v=6xG55KCOw9c -https://www.youtube.com/watch?v=THIhR42o5Yo -https://www.youtube.com/watch?v=PueGS2ovb1k -https://www.youtube.com/watch?v=3ULU8ygv2uI -https://www.youtube.com/watch?v=vwmzA7puaag -https://www.youtube.com/watch?v=REdLuU2aKlE -https://www.youtube.com/watch?v=MmrsXGdjCN4 -https://www.youtube.com/watch?v=xkwPcTLBNYM -https://www.youtube.com/watch?v=-IBaWMv16RU -https://www.youtube.com/watch?v=Sq9QEsAOTkM -https://www.youtube.com/watch?v=V2EGLGgqYkI -https://www.youtube.com/watch?v=QDafrUdxXpM -https://www.youtube.com/watch?v=LOP7GVSzlcY -https://www.youtube.com/watch?v=uZoyBTh10S8 -https://www.youtube.com/watch?v=tX8jPSql8YU -https://www.youtube.com/watch?v=hiOy1Gf-3qY -https://www.youtube.com/watch?v=7piyNpPpESg -https://www.youtube.com/watch?v=zbLPgrDB3yI -https://www.youtube.com/watch?v=bIZIXRHyqoo -https://www.youtube.com/watch?v=JUv89Ehkoj0 -https://www.youtube.com/watch?v=Hi2L2OXSPMQ -https://www.youtube.com/watch?v=CiIFLJPAzx4 -https://www.youtube.com/watch?v=EFvv5hl9Cbo -https://www.youtube.com/watch?v=6SybA6YL2Dk -https://www.youtube.com/watch?v=diesIlZaYwg -https://www.youtube.com/watch?v=f9fQ-3ZKMW4 -https://www.youtube.com/watch?v=g3gIt_8pNFw -https://www.youtube.com/watch?v=WOOlnWuo5cc -https://www.youtube.com/watch?v=6saf_WUItTs -https://www.youtube.com/watch?v=yDTQvqANLwA -https://www.youtube.com/watch?v=2rTMHKBYkak -https://www.youtube.com/watch?v=tT3W6GP6cCA -https://www.youtube.com/watch?v=tz9GT0P0ryA -https://www.youtube.com/watch?v=eojre2rUMqw -https://www.youtube.com/watch?v=0Zzl3HzhYz0 -https://www.youtube.com/watch?v=vTp1auScNMk -https://www.youtube.com/watch?v=oBWZjKR9ZeQ -https://www.youtube.com/watch?v=Kk1xwtIV8vs -https://www.youtube.com/watch?v=v80YwdDGoeA -https://www.youtube.com/watch?v=RTwCoLuVEkk -https://www.youtube.com/watch?v=O_eVwq5srVs -https://www.youtube.com/watch?v=4y6sP0QP9fw -https://www.youtube.com/watch?v=8175ZvSPjtI -https://www.youtube.com/watch?v=p5uEXSimNHw -https://www.youtube.com/watch?v=tx_ezJgB-1U -https://www.youtube.com/watch?v=d61Y1IQjREI -https://www.youtube.com/watch?v=rjA6vEh2VcM -https://www.youtube.com/watch?v=M922Dq0aAMo -https://www.youtube.com/watch?v=kr__8j-30GI -https://www.youtube.com/watch?v=O9QntgWyKMw -https://www.youtube.com/watch?v=X3T-yVZnSwE -https://www.youtube.com/watch?v=cu32CcPzNG4 -https://www.youtube.com/watch?v=EFksUEbPBSg -https://www.youtube.com/watch?v=b_ZTwMM5wU0 -https://www.youtube.com/watch?v=g5QLW6LeJ3Q -https://www.youtube.com/watch?v=IxhhDb0Ap1g -https://www.youtube.com/watch?v=Hk0rRNw6Hao -https://www.youtube.com/watch?v=Zsm80SgJRBQ -https://www.youtube.com/watch?v=DpWBCMbE7aE -https://www.youtube.com/watch?v=6LWXnVr-9t8 -https://www.youtube.com/watch?v=8qWVLCpRShE -https://www.youtube.com/watch?v=AbHEP5JHusQ -https://www.youtube.com/watch?v=rbRn3rUhX4E -https://www.youtube.com/watch?v=8b5_qEwmUu8 -https://www.youtube.com/watch?v=gsKssuz8tPA -https://www.youtube.com/watch?v=pwEtEXQ6PVw -https://www.youtube.com/watch?v=v7rFBX9QOpE -https://www.youtube.com/watch?v=oxxRgi8a120 -https://www.youtube.com/watch?v=LRtEUdjrQqg -https://www.youtube.com/watch?v=unkTCzy1qrA -https://www.youtube.com/watch?v=Cd7912woWsE -https://www.youtube.com/watch?v=w5LMl9o3Ofs -https://www.youtube.com/watch?v=_dqILnERIXQ -https://www.youtube.com/watch?v=s9RnwZ7IaaA -https://www.youtube.com/watch?v=ftksxfcG4V0 -https://www.youtube.com/watch?v=ZWr0tbXySlg -https://www.youtube.com/watch?v=e-BCNcMv2j0 -https://www.youtube.com/watch?v=mewfu8lafy8 -https://www.youtube.com/watch?v=VWG7L3c_5J8 -https://www.youtube.com/watch?v=JXZXNFLQMHE -https://www.youtube.com/watch?v=fb3zbGCVL6M -https://www.youtube.com/watch?v=uN4p8E-AZF0 -https://www.youtube.com/watch?v=SgFtJaJoX8Y -https://www.youtube.com/watch?v=WvAs0s7DVYg -https://www.youtube.com/watch?v=t799a5XUSAU -https://www.youtube.com/watch?v=ljyY1LjK_ZE -https://www.youtube.com/watch?v=ArD55SLC62E -https://www.youtube.com/watch?v=cvpGOFUsVnI -https://www.youtube.com/watch?v=SZXg328Mzsk -https://www.youtube.com/watch?v=-BoZ1gEAC6g -https://www.youtube.com/watch?v=VQzU4Sy-cQQ -https://www.youtube.com/watch?v=73lYz3r3XZY -https://www.youtube.com/watch?v=JFrABFhjAMk -https://www.youtube.com/watch?v=sN6qndqz0KQ -https://www.youtube.com/watch?v=s0_MmWg2m8A -https://www.youtube.com/watch?v=I0GtLj2q5kQ -https://www.youtube.com/watch?v=3tJ0S7ciHRg -https://www.youtube.com/watch?v=hc_7bu1togM -https://www.youtube.com/watch?v=gFbL3pzkNEM -https://www.youtube.com/watch?v=I8R94gDqiGE -https://www.youtube.com/watch?v=TdErMRIxRqY -https://www.youtube.com/watch?v=49JqAHear9E -https://www.youtube.com/watch?v=Y6IbKjhHFSg -https://www.youtube.com/watch?v=-DluaoC73Oo -https://www.youtube.com/watch?v=6GAc1HfTQX8 -https://www.youtube.com/watch?v=zdGkKCiKYuE -https://www.youtube.com/watch?v=1oUH8LWmM0I -https://www.youtube.com/watch?v=TNf_8J8LoM0 -https://www.youtube.com/watch?v=kXcjZ2BkE2Y -https://www.youtube.com/watch?v=dgK8hbT2D3Y -https://www.youtube.com/watch?v=H1sdanVzblg -https://www.youtube.com/watch?v=Y4HSgvsz-AI -https://www.youtube.com/watch?v=YvBfHVPpBmw -https://www.youtube.com/watch?v=sSxhG5qH38Q -https://www.youtube.com/watch?v=x4zz7xAa-fM -https://www.youtube.com/watch?v=nOPm3XAlxZg -https://www.youtube.com/watch?v=7Yor7ci8noM -https://www.youtube.com/watch?v=BnJ4BDxlgSM -https://www.youtube.com/watch?v=j4otcsHfzrE -https://www.youtube.com/watch?v=lIHDioyC4Q4 -https://www.youtube.com/watch?v=e40TGdOO1JU -https://www.youtube.com/watch?v=iKnbegCExns -https://www.youtube.com/watch?v=ao1TeLrOPPQ -https://www.youtube.com/watch?v=Lx_UnKWJyLE -https://www.youtube.com/watch?v=nDtgGczwIPY -https://www.youtube.com/watch?v=0bJ2mfxtmjg -https://www.youtube.com/watch?v=cbG1GetVqyw -https://www.youtube.com/watch?v=gIONn9bp8_w -https://www.youtube.com/watch?v=vePMmBM-IVU -https://www.youtube.com/watch?v=xP9cwmJ5dog -https://www.youtube.com/watch?v=uxYeke3bLMk -https://www.youtube.com/watch?v=eCze8sLvmcE -https://www.youtube.com/watch?v=EcBx2S-x3Hs -https://www.youtube.com/watch?v=zO3rfSDuBFg -https://www.youtube.com/watch?v=0RtavDSBnt4 -https://www.youtube.com/watch?v=8EojjO04ahA -https://www.youtube.com/watch?v=UbWPWyl8xWQ -https://www.youtube.com/watch?v=Ocwj3RfhTwI -https://www.youtube.com/watch?v=8EOo1DTKT98 -https://www.youtube.com/watch?v=7Rh7MjD6LLQ -https://www.youtube.com/watch?v=_C3FNmY1IxI -https://www.youtube.com/watch?v=yvQXwo4w3_0 -https://www.youtube.com/watch?v=PNVWUdB653k -https://www.youtube.com/watch?v=-DWQIrKbrEc -https://www.youtube.com/watch?v=1MgfOJ9ILCo -https://www.youtube.com/watch?v=og-mP2_bjcE -https://www.youtube.com/watch?v=Gar0-Yc_gpY -https://www.youtube.com/watch?v=OAyBcaFI56I -https://www.youtube.com/watch?v=5T1AD2jE6Kg -https://www.youtube.com/watch?v=sECZvLhjvPQ -https://www.youtube.com/watch?v=f1YdQbcFe3k -https://www.youtube.com/watch?v=B00d1ZrHlgc -https://www.youtube.com/watch?v=s05OP79dB28 -https://www.youtube.com/watch?v=6e-ID8Wxbjo -https://www.youtube.com/watch?v=64Uihh-m0LE -https://www.youtube.com/watch?v=hDc6FPsH7h4 -https://www.youtube.com/watch?v=HDiJLqx3Luk -https://www.youtube.com/watch?v=wcy3iNJXAJs -https://www.youtube.com/watch?v=rV39f1xY040 -https://www.youtube.com/watch?v=A8oTnrChwoc -https://www.youtube.com/watch?v=xmjPzr1PQG8 -https://www.youtube.com/watch?v=SlYjBVi4LeM -https://www.youtube.com/watch?v=m7hq3HU1x1c -https://www.youtube.com/watch?v=Ycdfq5SVgYs -https://www.youtube.com/watch?v=Lgy5IT6V1II -https://www.youtube.com/watch?v=ivPrbp3Ef0M -https://www.youtube.com/watch?v=xS8T1nXqibQ -https://www.youtube.com/watch?v=DWXBC9Ud32Y -https://www.youtube.com/watch?v=ZQdxYJkbD6s -https://www.youtube.com/watch?v=xkB7ra_ZcCM -https://www.youtube.com/watch?v=ugCKpZwt7-Q -https://www.youtube.com/watch?v=CHXI5J4V4AE -https://www.youtube.com/watch?v=Jt7_FFHd1Uw -https://www.youtube.com/watch?v=3aKlZpFAI3w -https://www.youtube.com/watch?v=i_Y7of6rGy8 -https://www.youtube.com/watch?v=YUp3RSlqBnk -https://www.youtube.com/watch?v=zRmm0-IMV-0 -https://www.youtube.com/watch?v=9IlBbSIQQes -https://www.youtube.com/watch?v=LhaDO3MwY08 -https://www.youtube.com/watch?v=GIMPGEcvats -https://www.youtube.com/watch?v=tJjwfD-adPE -https://www.youtube.com/watch?v=tCyYgZr6CHA -https://www.youtube.com/watch?v=O2IuSn7abLQ -https://www.youtube.com/watch?v=vWAIK55GrSQ -https://www.youtube.com/watch?v=X0Pwphld4l8 -https://www.youtube.com/watch?v=Men_VTPKzRk -https://www.youtube.com/watch?v=4N1QO35qeAw -https://www.youtube.com/watch?v=mDyz1zC5mV4 -https://www.youtube.com/watch?v=IwOpp57oQa8 -https://www.youtube.com/watch?v=rpBONirrpFY -https://www.youtube.com/watch?v=1nPRqTVWcXo -https://www.youtube.com/watch?v=qflzvf0P8Go -https://www.youtube.com/watch?v=TbLnKnd55KI -https://www.youtube.com/watch?v=jA3nz1RGhoQ -https://www.youtube.com/watch?v=mqx4w9RxyC8 -https://www.youtube.com/watch?v=mqoJr3qL98Q -https://www.youtube.com/watch?v=KTxnalxOBq4 -https://www.youtube.com/watch?v=ghOcpZ_n3ck -https://www.youtube.com/watch?v=dReCYPaWB28 -https://www.youtube.com/watch?v=XY79-cpbFME -https://www.youtube.com/watch?v=jwVwgGtdoAc -https://www.youtube.com/watch?v=wpGu13Xt_w0 -https://www.youtube.com/watch?v=5NRyHwCPfgY -https://www.youtube.com/watch?v=zTYxYSTDBjM -https://www.youtube.com/watch?v=QQj4uFa05N4 -https://www.youtube.com/watch?v=1GMgjlp4Yps -https://www.youtube.com/watch?v=kUo37zZhNxE -https://www.youtube.com/watch?v=Jz5CGd0dsaU -https://www.youtube.com/watch?v=iLQS_HPWO9c -https://www.youtube.com/watch?v=_lApBJu9gUY -https://www.youtube.com/watch?v=M1XwLmDpElY -https://www.youtube.com/watch?v=JT5HaX5yVPc -https://www.youtube.com/watch?v=PmQc2_9cux8 -https://www.youtube.com/watch?v=wQMkJ47gTto -https://www.youtube.com/watch?v=arYXHRmVUSQ -https://www.youtube.com/watch?v=hC_KKWCju34 -https://www.youtube.com/watch?v=dYi51VQyqWM -https://www.youtube.com/watch?v=tN_qiCQU8mE -https://www.youtube.com/watch?v=ilm532-pJ6k -https://www.youtube.com/watch?v=5uo1Ir6X_r8 -https://www.youtube.com/watch?v=-M5YASO4Qo8 -https://www.youtube.com/watch?v=Jg0yPEiD5uk -https://www.youtube.com/watch?v=GIgOpPFcNlU -https://www.youtube.com/watch?v=ttJtmEsjeik -https://www.youtube.com/watch?v=IuoT4lj5320 -https://www.youtube.com/watch?v=0_zqq0BXwfk -https://www.youtube.com/watch?v=sK6VuV8mgPk -https://www.youtube.com/watch?v=z0JQaxqXlic -https://www.youtube.com/watch?v=W06m5BF4bZA -https://www.youtube.com/watch?v=tGZJWiETVto -https://www.youtube.com/watch?v=09u2R0LZ1YU -https://www.youtube.com/watch?v=_OqomCppv30 -https://www.youtube.com/watch?v=wf8tUgUCPac -https://www.youtube.com/watch?v=lczFvGhUfts -https://www.youtube.com/watch?v=kFx3-0pNPWc -https://www.youtube.com/watch?v=JZfuvcVU91E -https://www.youtube.com/watch?v=xT5JEhTo7Rc -https://www.youtube.com/watch?v=mTinH8GAflM -https://www.youtube.com/watch?v=3srlxHfZLm4 -https://www.youtube.com/watch?v=clv601ldGA4 -https://www.youtube.com/watch?v=bHPY6Nn9QMs -https://www.youtube.com/watch?v=9y5aeZ33w_4 -https://www.youtube.com/watch?v=yhUxI5phmO8 -https://www.youtube.com/watch?v=VbE2CU9nA1k -https://www.youtube.com/watch?v=It-PLAJt_zQ -https://www.youtube.com/watch?v=9SekDfrN-X4 -https://www.youtube.com/watch?v=T0pnbDrW1CI -https://www.youtube.com/watch?v=XBYyXyvW6tE -https://www.youtube.com/watch?v=XFep6Dhx-Fs -https://www.youtube.com/watch?v=W8iTykaPSLM -https://www.youtube.com/watch?v=z3YrrXDQDyY -https://www.youtube.com/watch?v=9KU33u28EtI -https://www.youtube.com/watch?v=kvDMgzLZR2k -https://www.youtube.com/watch?v=_xWION4vOgM -https://www.youtube.com/watch?v=70zAbdwD9yY -https://www.youtube.com/watch?v=59ZG0Hjf5Us -https://www.youtube.com/watch?v=ZnI-KMjlrgA -https://www.youtube.com/watch?v=0-HtNtzR3HY -https://www.youtube.com/watch?v=G5UwFimUH_k -https://www.youtube.com/watch?v=UQ9YjPBUgII -https://www.youtube.com/watch?v=TtO1czEGERI -https://www.youtube.com/watch?v=s_1O1UJtMtQ -https://www.youtube.com/watch?v=Cq9XGcbsNRA -https://www.youtube.com/watch?v=dWsO8_oHeAo -https://www.youtube.com/watch?v=xF8HGuSfoYE -https://www.youtube.com/watch?v=nIuZdHv6chU -https://www.youtube.com/watch?v=pfbdEHVtBz4 -https://www.youtube.com/watch?v=OJWP4kft7NI -https://www.youtube.com/watch?v=1ZRrg52B81w -https://www.youtube.com/watch?v=cPtg_qRa59w -https://www.youtube.com/watch?v=W4WZjoUiKr8 -https://www.youtube.com/watch?v=70niBZ6GLSs -https://www.youtube.com/watch?v=auTIwWeByuw -https://www.youtube.com/watch?v=VaXHkSUlEdI -https://www.youtube.com/watch?v=PbelNpfEJQc -https://www.youtube.com/watch?v=R2VIfDPxX3M -https://www.youtube.com/watch?v=aTv5h7kqPp0 -https://www.youtube.com/watch?v=A7Vh8ZemNkc -https://www.youtube.com/watch?v=zn7dx98XomI -https://www.youtube.com/watch?v=DyLVTThUXBc -https://www.youtube.com/watch?v=zdNxMHSVkFg -https://www.youtube.com/watch?v=MXp2LFunRFc -https://www.youtube.com/watch?v=gE1RoN6ChBs -https://www.youtube.com/watch?v=cFcnz9rCD-o -https://www.youtube.com/watch?v=JueaSBXghD0 -https://www.youtube.com/watch?v=03de7Gcpz_4 -https://www.youtube.com/watch?v=Ij33PqNmbro -https://www.youtube.com/watch?v=57iQZerkh0o -https://www.youtube.com/watch?v=N1b7xZdVtRw -https://www.youtube.com/watch?v=M4gLxvcaBYU -https://www.youtube.com/watch?v=yGuH6jx7xkI -https://www.youtube.com/watch?v=AeopuNMkQt8 -https://www.youtube.com/watch?v=QKzE7lsCPw8 -https://www.youtube.com/watch?v=uZn0PIJ_k9E -https://www.youtube.com/watch?v=bxJ4-3W6LwE -https://www.youtube.com/watch?v=2MJPj6oWIKo -https://www.youtube.com/watch?v=yujj2p2dGWk -https://www.youtube.com/watch?v=-bXdzATNQKw -https://www.youtube.com/watch?v=xN3nXJX9vJs -https://www.youtube.com/watch?v=zwgm4a5OugI -https://www.youtube.com/watch?v=NBNN0lqZ8tU -https://www.youtube.com/watch?v=5e460FHKV64 -https://www.youtube.com/watch?v=hsbCYJIvq3o -https://www.youtube.com/watch?v=_qsfagfYAVs -https://www.youtube.com/watch?v=ZNl4FVK9-yQ -https://www.youtube.com/watch?v=2-TYgeGNTQk -https://www.youtube.com/watch?v=Zin6ZIfdTAY -https://www.youtube.com/watch?v=sn7XKfA4W-0 -https://www.youtube.com/watch?v=gmRYrsutXEo -https://www.youtube.com/watch?v=1jhOFw6GrLg -https://www.youtube.com/watch?v=PxzOVCQU-4U -https://www.youtube.com/watch?v=BlHNshJh9zM -https://www.youtube.com/watch?v=LVmH60yo0JI -https://www.youtube.com/watch?v=o-Tzlp_nG70 -https://www.youtube.com/watch?v=njuIKmM2xDM -https://www.youtube.com/watch?v=4zwQ88zMyio -https://www.youtube.com/watch?v=E3Wr6GWkRzA -https://www.youtube.com/watch?v=aypLuo7UXWE -https://www.youtube.com/watch?v=TANbVyW6x1k -https://www.youtube.com/watch?v=oNcdsp12pag -https://www.youtube.com/watch?v=qv7QFzRqf34 -https://www.youtube.com/watch?v=cYK9aTSRtbE -https://www.youtube.com/watch?v=7kmaoLaQlF8 -https://www.youtube.com/watch?v=st1zEAlNPic -https://www.youtube.com/watch?v=2XQnxn39yj4 -https://www.youtube.com/watch?v=gUbA8Zn8YPQ -https://www.youtube.com/watch?v=Uny3t9woExo -https://www.youtube.com/watch?v=lCaIfb8B5Mo -https://www.youtube.com/watch?v=jEkQ0RZObiI -https://www.youtube.com/watch?v=Beo6jYwuZpc -https://www.youtube.com/watch?v=KILWCT_Zzoc -https://www.youtube.com/watch?v=fb2VzDSf7S0 -https://www.youtube.com/watch?v=4a_oIbAhCgo -https://www.youtube.com/watch?v=eCLS2lWR1us -https://www.youtube.com/watch?v=MS01ipsrJCA -https://www.youtube.com/watch?v=3zqIIJhiyoo -https://www.youtube.com/watch?v=R3KOub_thmc -https://www.youtube.com/watch?v=ZQadO6LiP9s -https://www.youtube.com/watch?v=pzhhzPhwYh8 -https://www.youtube.com/watch?v=kLkNcyM0gNA -https://www.youtube.com/watch?v=yHpVRxZJyMg -https://www.youtube.com/watch?v=DZlfy7Eeeu0 -https://www.youtube.com/watch?v=b9Ao1j5bDXU -https://www.youtube.com/watch?v=vfPEVcXrO2c -https://www.youtube.com/watch?v=TK3gpctWmy8 -https://www.youtube.com/watch?v=Gn9_FWSU32Y -https://www.youtube.com/watch?v=zrSrLrBZ_eA -https://www.youtube.com/watch?v=5CS4NAkSjeM -https://www.youtube.com/watch?v=bbNkX9AWRbM -https://www.youtube.com/watch?v=03Yi0nl6N8Y -https://www.youtube.com/watch?v=moANh2cUXzw -https://www.youtube.com/watch?v=X4MNSJQ3jxk -https://www.youtube.com/watch?v=Eh7IdlNdcjk -https://www.youtube.com/watch?v=HNR4pmJ_YPs -https://www.youtube.com/watch?v=QIzmK6U41yc -https://www.youtube.com/watch?v=i7Tsz0bTzv4 -https://www.youtube.com/watch?v=U2Qz2Uz38Ds -https://www.youtube.com/watch?v=I6G_ojX3FO0 -https://www.youtube.com/watch?v=gQOsKTlMr9w -https://www.youtube.com/watch?v=LBC0msJZofU -https://www.youtube.com/watch?v=gHRicolSA8Y -https://www.youtube.com/watch?v=v8roAb048Us -https://www.youtube.com/watch?v=P_RIqP_sjZ8 -https://www.youtube.com/watch?v=e9E-fnJZObk -https://www.youtube.com/watch?v=UDRyL9EH5Bs -https://www.youtube.com/watch?v=EG77Z0p17Hc -https://www.youtube.com/watch?v=EW8OrhKu6ac -https://www.youtube.com/watch?v=swGrHB6VYF0 -https://www.youtube.com/watch?v=1KSp_Hr0_8g -https://www.youtube.com/watch?v=WqyHn7p90ws -https://www.youtube.com/watch?v=8rASZi0V6hI -https://www.youtube.com/watch?v=JPX0c4vmMbk -https://www.youtube.com/watch?v=DDstXkeEY5k -https://www.youtube.com/watch?v=lIWpl1rISOU -https://www.youtube.com/watch?v=w3t-AhvtwIY -https://www.youtube.com/watch?v=bCPOPg9Jxko -https://www.youtube.com/watch?v=M4CIPaK4CzU -https://www.youtube.com/watch?v=sSWYhKIPK_w -https://www.youtube.com/watch?v=DL9orOzONFM -https://www.youtube.com/watch?v=zV1mn041-MY -https://www.youtube.com/watch?v=u9fKGIZEOZc -https://www.youtube.com/watch?v=nwW_2C1VQRA -https://www.youtube.com/watch?v=xyjIpsJ9C_E -https://www.youtube.com/watch?v=zv3JJeoNWpw -https://www.youtube.com/watch?v=yFh-pd4K8kM -https://www.youtube.com/watch?v=helJykjI5Ks -https://www.youtube.com/watch?v=O5grg4BhmwA -https://www.youtube.com/watch?v=hxqqPMdeWlk -https://www.youtube.com/watch?v=aBfY4EXizl4 -https://www.youtube.com/watch?v=uXbTAyacm-o -https://www.youtube.com/watch?v=EHchyDF5mPA -https://www.youtube.com/watch?v=8hN6tFyXtMM -https://www.youtube.com/watch?v=ytaFppE2PrQ -https://www.youtube.com/watch?v=H7T79men-54 -https://www.youtube.com/watch?v=wobBUFvj6vw -https://www.youtube.com/watch?v=JCkSb2SHIY8 -https://www.youtube.com/watch?v=DbdV9vauV_8 -https://www.youtube.com/watch?v=WY7F2paiokM -https://www.youtube.com/watch?v=7_XKe5nL3j4 -https://www.youtube.com/watch?v=ImQPrZx6ZHw -https://www.youtube.com/watch?v=D4Of5F5CDLA -https://www.youtube.com/watch?v=piL1B00ekBE -https://www.youtube.com/watch?v=etOwU2jh5vo -https://www.youtube.com/watch?v=jqceDCEbuds -https://www.youtube.com/watch?v=NBJgtpZxRks -https://www.youtube.com/watch?v=Rztv9EWij4Y -https://www.youtube.com/watch?v=Mb3C2fSxLdI -https://www.youtube.com/watch?v=vK2LAKvrzPo -https://www.youtube.com/watch?v=w0JpfEd31AM -https://www.youtube.com/watch?v=mYSaT9V4324 -https://www.youtube.com/watch?v=3979ONO_DOk -https://www.youtube.com/watch?v=5li6iJy_DYs -https://www.youtube.com/watch?v=wA9RT0MBe3s -https://www.youtube.com/watch?v=NEeLktfngjQ -https://www.youtube.com/watch?v=es24xo9ju7U -https://www.youtube.com/watch?v=OaGCZ8SsKG4 -https://www.youtube.com/watch?v=lQsBzd952xk -https://www.youtube.com/watch?v=neD2PEqr-Po -https://www.youtube.com/watch?v=vHqIYvrKwks -https://www.youtube.com/watch?v=1A6qbKM5vgE -https://www.youtube.com/watch?v=zxPStAHlg4E -https://www.youtube.com/watch?v=jZCfL7fblMM -https://www.youtube.com/watch?v=TAccYcmvQ0k -https://www.youtube.com/watch?v=kzbyykGWN9U -https://www.youtube.com/watch?v=Jzj8TyuuvzQ -https://www.youtube.com/watch?v=ruQVatMNzto -https://www.youtube.com/watch?v=4tOse6iLVs0 -https://www.youtube.com/watch?v=3l_25zyd-s8 -https://www.youtube.com/watch?v=Cu0D682ezDU -https://www.youtube.com/watch?v=ZkHmWMl_rfo -https://www.youtube.com/watch?v=qcI0TSXWzVs -https://www.youtube.com/watch?v=THdAow8-nG8 -https://www.youtube.com/watch?v=53FiW6nOJfQ -https://www.youtube.com/watch?v=w52Vjf2dphk -https://www.youtube.com/watch?v=O3buBB-zkw4 -https://www.youtube.com/watch?v=qsRb4sFg7iE -https://www.youtube.com/watch?v=B36mO34Yaj8 -https://www.youtube.com/watch?v=apgTsYIOZks -https://www.youtube.com/watch?v=uONZRzmh3xc -https://www.youtube.com/watch?v=FTY3sC1ad-8 -https://www.youtube.com/watch?v=OfFoZQsS0qI -https://www.youtube.com/watch?v=SlE1uDWGMuI -https://www.youtube.com/watch?v=rDZYOsRB_DE -https://www.youtube.com/watch?v=iWUNJPVzTT0 -https://www.youtube.com/watch?v=hfEwesszkvI -https://www.youtube.com/watch?v=Tk1zk4xikrk -https://www.youtube.com/watch?v=kbqKifCiUFo -https://www.youtube.com/watch?v=H4oCces6L-I -https://www.youtube.com/watch?v=dZc0Wi1l0L0 -https://www.youtube.com/watch?v=_DRdlJnx2Vo -https://www.youtube.com/watch?v=uoyvZlIQ_-I -https://www.youtube.com/watch?v=gCrKVixw7Tk -https://www.youtube.com/watch?v=IVk6qntdVzk -https://www.youtube.com/watch?v=ZK3n2LCqRio -https://www.youtube.com/watch?v=ZzKNrl5kyF4 -https://www.youtube.com/watch?v=Gld-GRwARlA -https://www.youtube.com/watch?v=tcqDUEhgT_g -https://www.youtube.com/watch?v=fPh0SGZHbTk -https://www.youtube.com/watch?v=rAsmd_5SfII -https://www.youtube.com/watch?v=8Dotiqbtvoo -https://www.youtube.com/watch?v=Q1bJaVlV-84 -https://www.youtube.com/watch?v=CBQ5qbYV3XM -https://www.youtube.com/watch?v=pOsgKiHqKt0 -https://www.youtube.com/watch?v=HC-e2wj7ePc -https://www.youtube.com/watch?v=JiBb2n5UEG8 -https://www.youtube.com/watch?v=INNL2cWTgmI -https://www.youtube.com/watch?v=nEiX-U8P4H4 -https://www.youtube.com/watch?v=azXdNflZL28 -https://www.youtube.com/watch?v=8GxjkvKfRh4 -https://www.youtube.com/watch?v=HH71IKNVVYk -https://www.youtube.com/watch?v=807yt9yhd6I -https://www.youtube.com/watch?v=El7pa-DsJ7w -https://www.youtube.com/watch?v=nIZUoHkJNH0 -https://www.youtube.com/watch?v=v8EuLwLPF1I -https://www.youtube.com/watch?v=VXvqZQBGF5A -https://www.youtube.com/watch?v=tTkxj7PJ6Ss -https://www.youtube.com/watch?v=Brraz_ZKUIA -https://www.youtube.com/watch?v=BEjJRVVD0d4 -https://www.youtube.com/watch?v=Kw52RfKqX5Q -https://www.youtube.com/watch?v=Tj1CYEYIZI8 -https://www.youtube.com/watch?v=R6sRU0cLwcg -https://www.youtube.com/watch?v=HYN-BL55NyI -https://www.youtube.com/watch?v=4xRSfFe0aA4 -https://www.youtube.com/watch?v=qcT3I4Fhod8 -https://www.youtube.com/watch?v=FhR_Fc6I-Es -https://www.youtube.com/watch?v=5kQI69iDZEI -https://www.youtube.com/watch?v=eiQUC22MT0o -https://www.youtube.com/watch?v=mVNsumhm0bk -https://www.youtube.com/watch?v=5kyt-4p0qpw -https://www.youtube.com/watch?v=kdWxc8hDPbA -https://www.youtube.com/watch?v=VK8oMBYr9AE -https://www.youtube.com/watch?v=KHIe0adNnPw -https://www.youtube.com/watch?v=WSRrChHFR2I -https://www.youtube.com/watch?v=eWyU5N5oGUw -https://www.youtube.com/watch?v=OTF26cgxS5A -https://www.youtube.com/watch?v=AyOrNze3SE4 -https://www.youtube.com/watch?v=0bShQ01DxnY -https://www.youtube.com/watch?v=jal3Rr0TPpM -https://www.youtube.com/watch?v=IvwSs1j6sL0 -https://www.youtube.com/watch?v=rbvJbvdspoU -https://www.youtube.com/watch?v=gSqq_vuW0es -https://www.youtube.com/watch?v=EY7KAbarSnk -https://www.youtube.com/watch?v=oCZ5BJlh6Jo -https://www.youtube.com/watch?v=veVrB1DNV-c -https://www.youtube.com/watch?v=Jl0MB13-zK8 -https://www.youtube.com/watch?v=qfYc2z7cvC4 -https://www.youtube.com/watch?v=J8NH9qAkys4 -https://www.youtube.com/watch?v=bBpcF-hnKz0 -https://www.youtube.com/watch?v=3JsODEZl5lo -https://www.youtube.com/watch?v=I5CFgz1qeSc -https://www.youtube.com/watch?v=LIKcI1MBnto -https://www.youtube.com/watch?v=79wzDxJu0UE -https://www.youtube.com/watch?v=gHS0Lf8TyF8 -https://www.youtube.com/watch?v=oqgrcgRkunw -https://www.youtube.com/watch?v=mWJC1eL_OR0 -https://www.youtube.com/watch?v=41cWX4W1vEI -https://www.youtube.com/watch?v=8mO3yxKHU1E -https://www.youtube.com/watch?v=cR2EZ9b2AYw -https://www.youtube.com/watch?v=wyPYWkVKQyw -https://www.youtube.com/watch?v=MuUsmsfwizg -https://www.youtube.com/watch?v=9Y18qV1UCRM -https://www.youtube.com/watch?v=hd9jxasd-60 -https://www.youtube.com/watch?v=WEVdhWKG2x8 -https://www.youtube.com/watch?v=csq261RPA2I -https://www.youtube.com/watch?v=8pmocmq8Z_Y -https://www.youtube.com/watch?v=OFUVP1sDMhQ -https://www.youtube.com/watch?v=YZnULU8-WiY -https://www.youtube.com/watch?v=bkUEPqEqMOc -https://www.youtube.com/watch?v=fgxaU6KA4kI -https://www.youtube.com/watch?v=6XziyFQ2qb0 -https://www.youtube.com/watch?v=71QrG4wEY_g -https://www.youtube.com/watch?v=wJXJTliyR98 -https://www.youtube.com/watch?v=lDw7qM3lCOU -https://www.youtube.com/watch?v=ONpSx6GGlTE -https://www.youtube.com/watch?v=W3tNcsdWzv4 -https://www.youtube.com/watch?v=Sq7SBO1VqKc -https://www.youtube.com/watch?v=PEmiEWUb1uo -https://www.youtube.com/watch?v=Mw1Du8gYMiE -https://www.youtube.com/watch?v=H4u0uhldLJQ -https://www.youtube.com/watch?v=fEDVDvIWfZE -https://www.youtube.com/watch?v=1tgrLHWLhBs -https://www.youtube.com/watch?v=VDgp3h8bAu8 -https://www.youtube.com/watch?v=8ZcZAL3C928 -https://www.youtube.com/watch?v=0Zpivy32UGU -https://www.youtube.com/watch?v=u-m-BBQQ9Tw -https://www.youtube.com/watch?v=cvHvX8mFTJE -https://www.youtube.com/watch?v=L-o5p75Q-cg -https://www.youtube.com/watch?v=ZfORukYHkGA -https://www.youtube.com/watch?v=t9sbedJUdUc -https://www.youtube.com/watch?v=zBoHq5-w5UY -https://www.youtube.com/watch?v=q50-whddP7Y -https://www.youtube.com/watch?v=D1PiHzL703U -https://www.youtube.com/watch?v=voQEUBq8q_A -https://www.youtube.com/watch?v=Ps2HyJCINyw -https://www.youtube.com/watch?v=kzco7k86Unw -https://www.youtube.com/watch?v=YE2zr2lV50M -https://www.youtube.com/watch?v=pU_MuwLZeAc -https://www.youtube.com/watch?v=PC5NoUQMOhA -https://www.youtube.com/watch?v=KGm_VKYfTMI -https://www.youtube.com/watch?v=duUThrCj7Lk -https://www.youtube.com/watch?v=n5UaEOcC5G4 -https://www.youtube.com/watch?v=gjEmu6h_lpE -https://www.youtube.com/watch?v=en1KyArCG6w -https://www.youtube.com/watch?v=bRPWouCfF7I -https://www.youtube.com/watch?v=lXnIwNH8vGo -https://www.youtube.com/watch?v=xmPqK9T_yEU -https://www.youtube.com/watch?v=PYUNAbge9Xw -https://www.youtube.com/watch?v=J39cA-10XcE -https://www.youtube.com/watch?v=1BJD7Rv7iS8 -https://www.youtube.com/watch?v=D6WdfQDXRsw -https://www.youtube.com/watch?v=g6-v4pwo2ik -https://www.youtube.com/watch?v=UCgHBFhO7FM -https://www.youtube.com/watch?v=ALNUXWNvEZY -https://www.youtube.com/watch?v=0Qx13_oyycg -https://www.youtube.com/watch?v=1KeCeqkwnTs -https://www.youtube.com/watch?v=JmQd29Q6-s8 -https://www.youtube.com/watch?v=5eE9eTDjII0 -https://www.youtube.com/watch?v=-kPpPTIYpeY -https://www.youtube.com/watch?v=n7IfbF48Jqo -https://www.youtube.com/watch?v=HXG-qjuDkPE -https://www.youtube.com/watch?v=77sKr6xPbW8 -https://www.youtube.com/watch?v=w9uEnYCbmno -https://www.youtube.com/watch?v=M9B2M5rGOqE -https://www.youtube.com/watch?v=g-6eEKHMT4A -https://www.youtube.com/watch?v=0k6rcvts1FM -https://www.youtube.com/watch?v=3Glq6IgKJ-g -https://www.youtube.com/watch?v=dl_kgfUid_E -https://www.youtube.com/watch?v=RpBgeBWcw4I -https://www.youtube.com/watch?v=WiyI1U6HfJ8 -https://www.youtube.com/watch?v=CvPD5zHvxpE -https://www.youtube.com/watch?v=H5aghJz-6-0 -https://www.youtube.com/watch?v=hvGwiRk9qFI -https://www.youtube.com/watch?v=kP5en-KOZak -https://www.youtube.com/watch?v=Lt9z2PZxTA8 -https://www.youtube.com/watch?v=ZAN2gg3Bf88 -https://www.youtube.com/watch?v=dfcoKxlL9zo -https://www.youtube.com/watch?v=6G-zWuGwXwE -https://www.youtube.com/watch?v=IFOgQYDqCDg -https://www.youtube.com/watch?v=fN1ASoQVOtc -https://www.youtube.com/watch?v=nDvvyndocg0 -https://www.youtube.com/watch?v=YS2do-ydhsE -https://www.youtube.com/watch?v=5IL47nns0R4 -https://www.youtube.com/watch?v=A3BueGvC0dU -https://www.youtube.com/watch?v=qE5_I54Onsg -https://www.youtube.com/watch?v=1QA_KBw3bpM -https://www.youtube.com/watch?v=H2ms1CuhCAQ -https://www.youtube.com/watch?v=zh7AyRCjMK8 -https://www.youtube.com/watch?v=oklOIfwaIm4 -https://www.youtube.com/watch?v=2_m2D7BlCSY -https://www.youtube.com/watch?v=AOh3iLuvEx0 -https://www.youtube.com/watch?v=qt6rxatB78k -https://www.youtube.com/watch?v=CCLgvay1AJg -https://www.youtube.com/watch?v=2CJsz7yfWVQ -https://www.youtube.com/watch?v=mXr5R4PybQ8 -https://www.youtube.com/watch?v=FPJUY3OG1j4 -https://www.youtube.com/watch?v=AxZSX649ZQM -https://www.youtube.com/watch?v=vAqqcajeLyw -https://www.youtube.com/watch?v=wT50rGqJi-A -https://www.youtube.com/watch?v=bURXdJK5yDo -https://www.youtube.com/watch?v=Ci-jKDX2Utc -https://www.youtube.com/watch?v=4jNOtUpzcsM -https://www.youtube.com/watch?v=9F1xYw3-wC0 -https://www.youtube.com/watch?v=-wgG-UKu6P4 -https://www.youtube.com/watch?v=RA0QPohN8OA -https://www.youtube.com/watch?v=swaAAvGIVkQ -https://www.youtube.com/watch?v=E2cttPQLTmU -https://www.youtube.com/watch?v=7u_HP6AjgNM -https://www.youtube.com/watch?v=-o7z8zDigKw -https://www.youtube.com/watch?v=2iDzZRY4nFg -https://www.youtube.com/watch?v=TvauDXRHRQM -https://www.youtube.com/watch?v=gm0CbvYTfyU -https://www.youtube.com/watch?v=aVH30g7M7-s -https://www.youtube.com/watch?v=SaRJpKcMpIc -https://www.youtube.com/watch?v=VME8ViM9p_E -https://www.youtube.com/watch?v=Ob1gwjj03-g -https://www.youtube.com/watch?v=Kx8QHsFjzdo -https://www.youtube.com/watch?v=k0MXRrhoGzw -https://www.youtube.com/watch?v=iQpIcy-iINQ -https://www.youtube.com/watch?v=iIegaxZamds -https://www.youtube.com/watch?v=nKgS3q8Eixw -https://www.youtube.com/watch?v=Vu53Y9-u6OQ -https://www.youtube.com/watch?v=A-1xFilVGKA -https://www.youtube.com/watch?v=NzXu-1jdv2w -https://www.youtube.com/watch?v=dN0sAUTPOo0 -https://www.youtube.com/watch?v=KhKqvIa4KPI -https://www.youtube.com/watch?v=7EQlStjqPXk -https://www.youtube.com/watch?v=GvGYuLtTpI4 -https://www.youtube.com/watch?v=socM2vLFAXw -https://www.youtube.com/watch?v=4Lf0s5sF6Ps -https://www.youtube.com/watch?v=O0SNpN9bA7M -https://www.youtube.com/watch?v=_1B1Unp8rzQ -https://www.youtube.com/watch?v=2Pxto4fr8fU -https://www.youtube.com/watch?v=-wnCY6-C_LE -https://www.youtube.com/watch?v=Jyo0daMS_FI -https://www.youtube.com/watch?v=6Mv190DEO5Q -https://www.youtube.com/watch?v=A3vqXUImxmo -https://www.youtube.com/watch?v=jTavkw3fHCU -https://www.youtube.com/watch?v=wO_IP_UBHSE -https://www.youtube.com/watch?v=eK76eyqZLuI -https://www.youtube.com/watch?v=fGq8t2S9A4Y -https://www.youtube.com/watch?v=kwgTZ7Kx1j0 -https://www.youtube.com/watch?v=QpR8PIGCI6A -https://www.youtube.com/watch?v=wP6eLrWucEU -https://www.youtube.com/watch?v=c8wcjZjYUKk -https://www.youtube.com/watch?v=3jqtAd3ms6s -https://www.youtube.com/watch?v=yQJxa2y3RMU -https://www.youtube.com/watch?v=xq46z38tXds -https://www.youtube.com/watch?v=N5xZyUxRJBM -https://www.youtube.com/watch?v=seeZtr3lYz8 -https://www.youtube.com/watch?v=DtoINgJb8NM -https://www.youtube.com/watch?v=Q1by2tU9Rp4 -https://www.youtube.com/watch?v=X7Ho48iO098 -https://www.youtube.com/watch?v=Nxs8sYQ6D4Y -https://www.youtube.com/watch?v=ePktyiVhM9Y -https://www.youtube.com/watch?v=FHAjPjwINDM -https://www.youtube.com/watch?v=iog13CSQsBE -https://www.youtube.com/watch?v=JC_7WZnqqYI -https://www.youtube.com/watch?v=qzar3W-wKdA -https://www.youtube.com/watch?v=JaLalK-3Ll0 -https://www.youtube.com/watch?v=L-VBIZ1lxZ8 -https://www.youtube.com/watch?v=z6LqQAbFZ5U -https://www.youtube.com/watch?v=V4sIhzXF5mw -https://www.youtube.com/watch?v=XaZskPC2CH8 -https://www.youtube.com/watch?v=N-sgaV4nP_c -https://www.youtube.com/watch?v=g4S3tejwVBQ -https://www.youtube.com/watch?v=uq7Wh078eUg -https://www.youtube.com/watch?v=zSmiH1Gbpp8 -https://www.youtube.com/watch?v=6WSy_8fjh3Y -https://www.youtube.com/watch?v=KRxe3BiApmU -https://www.youtube.com/watch?v=bO-OsMybcdY -https://www.youtube.com/watch?v=Tl9lNbchXKU -https://www.youtube.com/watch?v=MIGsaLyquMI -https://www.youtube.com/watch?v=clfJqj03TEs -https://www.youtube.com/watch?v=0Wpkb1L4pYw -https://www.youtube.com/watch?v=qtIP2ieotyk -https://www.youtube.com/watch?v=jm3t-hXP1Eo -https://www.youtube.com/watch?v=KU5QuYzEte0 -https://www.youtube.com/watch?v=r5iLqBt3TkE -https://www.youtube.com/watch?v=8vFMbZEma7w -https://www.youtube.com/watch?v=Q6SgU9w7XbY -https://www.youtube.com/watch?v=y9nZZxnsoTw -https://www.youtube.com/watch?v=y1VjxqY8BrI -https://www.youtube.com/watch?v=cVy39nbbSB4 -https://www.youtube.com/watch?v=JJDOUA_9TnU -https://www.youtube.com/watch?v=5fh0g4j0JPM -https://www.youtube.com/watch?v=4bp7st-p2Ns -https://www.youtube.com/watch?v=Vka_RHl8NBg -https://www.youtube.com/watch?v=C7mEj7LsxDM -https://www.youtube.com/watch?v=oU3xk3oaIWs -https://www.youtube.com/watch?v=Vc80-qcAYTI -https://www.youtube.com/watch?v=6jdO7Z0IA9s -https://www.youtube.com/watch?v=M624f51E6TU -https://www.youtube.com/watch?v=a2zb_vffseo -https://www.youtube.com/watch?v=kjJSsR7G-yA -https://www.youtube.com/watch?v=HTfUdUUB2sY -https://www.youtube.com/watch?v=clDTNsCoAt0 -https://www.youtube.com/watch?v=EtlQ36lNezM -https://www.youtube.com/watch?v=LvXVwrrsMjE -https://www.youtube.com/watch?v=lmC8nB3Sx0Y -https://www.youtube.com/watch?v=UgJMzy12jEk -https://www.youtube.com/watch?v=mqrZu6fiOlA -https://www.youtube.com/watch?v=cxyHJSC6qDI -https://www.youtube.com/watch?v=yJOvKMfMy9Q -https://www.youtube.com/watch?v=74eDPvsPc58 -https://www.youtube.com/watch?v=OVuJECLWkQQ -https://www.youtube.com/watch?v=V35MoqZdaKU -https://www.youtube.com/watch?v=5VVSyz8DYH8 -https://www.youtube.com/watch?v=DJz78yI9zvg -https://www.youtube.com/watch?v=Qs5ynEowGp0 -https://www.youtube.com/watch?v=PeRX98IZhJM -https://www.youtube.com/watch?v=GiVe7i3bY74 -https://www.youtube.com/watch?v=WOsmZel1tUQ -https://www.youtube.com/watch?v=6HyYFW4KAjE -https://www.youtube.com/watch?v=T40pRAlLPFI -https://www.youtube.com/watch?v=ESJ4fEsc3E8 -https://www.youtube.com/watch?v=JrmW9FXdiqg -https://www.youtube.com/watch?v=nfCY1Y2wCaE -https://www.youtube.com/watch?v=QDA8tjQeGa0 -https://www.youtube.com/watch?v=VNWFVRESr9k -https://www.youtube.com/watch?v=FwzDqhHNZq4 -https://www.youtube.com/watch?v=otevRZLVIns -https://www.youtube.com/watch?v=QbbaZPVWjZ0 -https://www.youtube.com/watch?v=NThxpgYS54k -https://www.youtube.com/watch?v=GzqvU_H3cjs -https://www.youtube.com/watch?v=aINHgBzBAsc -https://www.youtube.com/watch?v=dFLilVIP6v0 -https://www.youtube.com/watch?v=NQ_VXw5C5_g -https://www.youtube.com/watch?v=NlwOF78qdkM -https://www.youtube.com/watch?v=W8-8ZfmTIw8 -https://www.youtube.com/watch?v=_rSM6kDk6jU -https://www.youtube.com/watch?v=Tr38d3eLXf8 -https://www.youtube.com/watch?v=YhIXV4DWhIA -https://www.youtube.com/watch?v=-nfJpVRjRM0 -https://www.youtube.com/watch?v=kosQ_XMUtDI -https://www.youtube.com/watch?v=D7OYw3vhtlQ -https://www.youtube.com/watch?v=3Xnmt2Ymii4 -https://www.youtube.com/watch?v=bM2Lw4iDWMs -https://www.youtube.com/watch?v=lVeTpIpFTuI -https://www.youtube.com/watch?v=Bxb6m5jVs4k -https://www.youtube.com/watch?v=rh8Zo8ZCw8w -https://www.youtube.com/watch?v=7C0wdkJH4tw -https://www.youtube.com/watch?v=TOOHa3M_3_A -https://www.youtube.com/watch?v=P0gfEojLMmY -https://www.youtube.com/watch?v=OcS5i4b94fE -https://www.youtube.com/watch?v=6K_SzWye5tE -https://www.youtube.com/watch?v=pVlrX8hakyE -https://www.youtube.com/watch?v=zIrG0Wt6CcA -https://www.youtube.com/watch?v=EjXQbmbgxhQ -https://www.youtube.com/watch?v=MmvWYXp1PNM -https://www.youtube.com/watch?v=lLx3Z3c7mV8 -https://www.youtube.com/watch?v=73ykZR11zes -https://www.youtube.com/watch?v=WHYrfpgtm6U -https://www.youtube.com/watch?v=b8xrtcAyyw0 -https://www.youtube.com/watch?v=SNFeZyokO6o -https://www.youtube.com/watch?v=sre-OE94un8 -https://www.youtube.com/watch?v=q28um1cqF4Q -https://www.youtube.com/watch?v=phgtPLyP97w -https://www.youtube.com/watch?v=jlUkuRkYFZU -https://www.youtube.com/watch?v=3G3m2BTXUQw -https://www.youtube.com/watch?v=te2LYx1SVBE -https://www.youtube.com/watch?v=JACfOF38Ffk -https://www.youtube.com/watch?v=ZKb43WnjoiU -https://www.youtube.com/watch?v=Qel0DXeISN0 -https://www.youtube.com/watch?v=9FekdRZPIpQ -https://www.youtube.com/watch?v=L3E45I283ZU -https://www.youtube.com/watch?v=ZpKcW61Y3zI -https://www.youtube.com/watch?v=SiJmWpuQhNQ -https://www.youtube.com/watch?v=sYe2vx6p_T4 -https://www.youtube.com/watch?v=eJEGgi8ZkRQ -https://www.youtube.com/watch?v=V--wYltJnB4 -https://www.youtube.com/watch?v=z8EMf_9wUzc -https://www.youtube.com/watch?v=2vuVM43MRiA -https://www.youtube.com/watch?v=Sc88FJQw-8A -https://www.youtube.com/watch?v=2lB9s3y-6eA -https://www.youtube.com/watch?v=vNkYcCV7S4U -https://www.youtube.com/watch?v=F3ls751u5o0 -https://www.youtube.com/watch?v=oFo8wPllZu8 -https://www.youtube.com/watch?v=Ku9lKfjZU30 -https://www.youtube.com/watch?v=3Yq8fNNHB1Y -https://www.youtube.com/watch?v=Sk3dVf0H9C0 -https://www.youtube.com/watch?v=w99cGM_y4Og -https://www.youtube.com/watch?v=jm0Zv6ydHeo -https://www.youtube.com/watch?v=5X2g_qbEgEs -https://www.youtube.com/watch?v=MYTQew8auIY -https://www.youtube.com/watch?v=GOy_-PzLf8g -https://www.youtube.com/watch?v=YUkd_TZQZKk -https://www.youtube.com/watch?v=hzeo2pDrtLk -https://www.youtube.com/watch?v=wEabH0TyYwM -https://www.youtube.com/watch?v=SlL6VTxaCog -https://www.youtube.com/watch?v=s6ZflJj1p34 -https://www.youtube.com/watch?v=TMTHkY8hCx8 -https://www.youtube.com/watch?v=cRBj61eBjhQ -https://www.youtube.com/watch?v=DTL3G1h2SdQ -https://www.youtube.com/watch?v=NCI6-NWq6DY -https://www.youtube.com/watch?v=lIvSjlKUvCg -https://www.youtube.com/watch?v=6r7Ubc_dEQk -https://www.youtube.com/watch?v=IFdEerP9z4Q -https://www.youtube.com/watch?v=JbBGMq-wBHM -https://www.youtube.com/watch?v=RIYSRqBseLI -https://www.youtube.com/watch?v=2v_UzrgNwpo -https://www.youtube.com/watch?v=iX_vynlyrC8 -https://www.youtube.com/watch?v=ZBjsh2p80sY -https://www.youtube.com/watch?v=AK6dg9YJz70 -https://www.youtube.com/watch?v=AMAzK-IjtW0 -https://www.youtube.com/watch?v=PfaI57gHTkU -https://www.youtube.com/watch?v=Cn9asaKJkP8 -https://www.youtube.com/watch?v=GWDOcK-GHUo -https://www.youtube.com/watch?v=FdrM63-sL5E -https://www.youtube.com/watch?v=Ni15untE9lc -https://www.youtube.com/watch?v=ZXiNG_wc3cs -https://www.youtube.com/watch?v=a1NjiQlF_QU -https://www.youtube.com/watch?v=aKlFxKwlJCI -https://www.youtube.com/watch?v=ncyhGHY-J8s -https://www.youtube.com/watch?v=Nsl6F3KD68I -https://www.youtube.com/watch?v=y-MVI2HeAb8 -https://www.youtube.com/watch?v=lDmo3DvHSoQ -https://www.youtube.com/watch?v=_nO4GwwK7a0 -https://www.youtube.com/watch?v=JNV0Bs_Z2rY -https://www.youtube.com/watch?v=QCs8H3xMCCg -https://www.youtube.com/watch?v=xSlTzHlfJy0 -https://www.youtube.com/watch?v=2eVxtYA-1jI -https://www.youtube.com/watch?v=a9Ma4IjuWvM -https://www.youtube.com/watch?v=8vaxMejObcY -https://www.youtube.com/watch?v=SR6dOkauvIo -https://www.youtube.com/watch?v=Pudto3Xw_NY -https://www.youtube.com/watch?v=6EIjFdz_344 -https://www.youtube.com/watch?v=Q30ETlsokl0 -https://www.youtube.com/watch?v=4RJNa-Dj68I -https://www.youtube.com/watch?v=KoNzY_CeeKw -https://www.youtube.com/watch?v=7xNd2lPYR68 -https://www.youtube.com/watch?v=Bowfz2gZ2Sk -https://www.youtube.com/watch?v=X32vDZjnIWc -https://www.youtube.com/watch?v=GLP9CuTWUlk -https://www.youtube.com/watch?v=JDsMfCdDUfk -https://www.youtube.com/watch?v=VKynMQrBM7E -https://www.youtube.com/watch?v=ejB1lm7jDzw -https://www.youtube.com/watch?v=8B4Sg9xIGTQ -https://www.youtube.com/watch?v=_rB2YS9t63M -https://www.youtube.com/watch?v=KJyvaMvan9I -https://www.youtube.com/watch?v=ZcdhsUEd6TU -https://www.youtube.com/watch?v=NCPYSEYoF1c -https://www.youtube.com/watch?v=EgupaPnIeMM -https://www.youtube.com/watch?v=TqPRCGbHSjw -https://www.youtube.com/watch?v=Hi62GOTWGI8 -https://www.youtube.com/watch?v=dHcZffZjYB0 -https://www.youtube.com/watch?v=Pvnqhkywbxc -https://www.youtube.com/watch?v=UceBEWYa13w -https://www.youtube.com/watch?v=AgRH2C5yPjQ -https://www.youtube.com/watch?v=dXM5zpjqtOc -https://www.youtube.com/watch?v=a4Sb42va86Q -https://www.youtube.com/watch?v=JSZdE83PiRQ -https://www.youtube.com/watch?v=mWi5mU1bPPg -https://www.youtube.com/watch?v=ZTVH8aYSwjQ -https://www.youtube.com/watch?v=e_3WleCGJbc -https://www.youtube.com/watch?v=woMgT3A_71Q -https://www.youtube.com/watch?v=771kiAizO8g -https://www.youtube.com/watch?v=7PYkf420f9c -https://www.youtube.com/watch?v=AgE4Ke5nNoY -https://www.youtube.com/watch?v=ZD-UOCrU6Fg -https://www.youtube.com/watch?v=WnNpg4LRzHI -https://www.youtube.com/watch?v=guKqHIuxq90 -https://www.youtube.com/watch?v=WZ89NCgdvII -https://www.youtube.com/watch?v=e9r2o7MRyMQ -https://www.youtube.com/watch?v=1xdaVv0qb0I -https://www.youtube.com/watch?v=l5SYopn7vSg -https://www.youtube.com/watch?v=5xiKlm6vG5w -https://www.youtube.com/watch?v=iKAtkHSVfjU -https://www.youtube.com/watch?v=xQHmDw2ayNw -https://www.youtube.com/watch?v=PsWWUiANTfo -https://www.youtube.com/watch?v=tQIIhH7A9FE -https://www.youtube.com/watch?v=6EkM3Fvirq0 -https://www.youtube.com/watch?v=mQ-kLX_NRwU -https://www.youtube.com/watch?v=QR6WfdodfDU -https://www.youtube.com/watch?v=Xb4ZZ6T50vM -https://www.youtube.com/watch?v=E0HGtjMKljg -https://www.youtube.com/watch?v=tbtPMw1BNA4 -https://www.youtube.com/watch?v=eFJlQBkjEqw -https://www.youtube.com/watch?v=vJ4Ue81SyQw -https://www.youtube.com/watch?v=dXkf-O-ByOQ -https://www.youtube.com/watch?v=zIfZxrswlEY -https://www.youtube.com/watch?v=A5vdyzU-0zg -https://www.youtube.com/watch?v=2ajukBAGGuU -https://www.youtube.com/watch?v=WTZ4zCezHUU -https://www.youtube.com/watch?v=XWi8rXiRq9E -https://www.youtube.com/watch?v=qpMl2erxOgU -https://www.youtube.com/watch?v=VwbO60gjWoA -https://www.youtube.com/watch?v=7IuipODNcRE -https://www.youtube.com/watch?v=HfMHDL0SgZ0 -https://www.youtube.com/watch?v=yv5mcmNEwdU -https://www.youtube.com/watch?v=xma0nVpBjZo -https://www.youtube.com/watch?v=TKxNwoYM5ec -https://www.youtube.com/watch?v=hiKwAlTM1Ys -https://www.youtube.com/watch?v=KG-30LxX-qs -https://www.youtube.com/watch?v=_6Ms_SJmQn4 -https://www.youtube.com/watch?v=0tdXaee9o6k -https://www.youtube.com/watch?v=J9FCafzAuaE -https://www.youtube.com/watch?v=7jTrdi5t83Y -https://www.youtube.com/watch?v=WIky8XG3SP4 -https://www.youtube.com/watch?v=0iv6pKiKzL4 -https://www.youtube.com/watch?v=ZfWDeOgVSQo -https://www.youtube.com/watch?v=7QdQ7z3mFr4 -https://www.youtube.com/watch?v=JfjXBXIvd-8 -https://www.youtube.com/watch?v=YyOK4Lau-xY -https://www.youtube.com/watch?v=klgdpFMajjY -https://www.youtube.com/watch?v=I-ar6huKQ_c -https://www.youtube.com/watch?v=vkL_zAcnkI8 -https://www.youtube.com/watch?v=tgMX2SfF5lI -https://www.youtube.com/watch?v=4FrSX37DoTU -https://www.youtube.com/watch?v=ERX--LGwC10 -https://www.youtube.com/watch?v=t_2kZnBl9i0 -https://www.youtube.com/watch?v=YnhdkKajmRA -https://www.youtube.com/watch?v=7nfJO1rJYMo -https://www.youtube.com/watch?v=B4bd4HcRo0Q -https://www.youtube.com/watch?v=BOs7CVyGyGg -https://www.youtube.com/watch?v=OZyMlE-yy3U -https://www.youtube.com/watch?v=SvbS2IALKmo -https://www.youtube.com/watch?v=U-IdboWZNiA -https://www.youtube.com/watch?v=msjiKwDbsaM -https://www.youtube.com/watch?v=dmZ1pWgxZn0 -https://www.youtube.com/watch?v=utGMKcJtuPo -https://www.youtube.com/watch?v=sGseachRqQs -https://www.youtube.com/watch?v=s_7soIjx_sk -https://www.youtube.com/watch?v=QiFpIIF7-4g -https://www.youtube.com/watch?v=EFCcOOuWWc8 -https://www.youtube.com/watch?v=ORAwIWpC3Gw -https://www.youtube.com/watch?v=nB5njVbYF_k -https://www.youtube.com/watch?v=nfz__ppKnHM -https://www.youtube.com/watch?v=9y9YPN5mclI -https://www.youtube.com/watch?v=knEbE0conCs -https://www.youtube.com/watch?v=uuoZinaXlAs -https://www.youtube.com/watch?v=HATcwbvQEac -https://www.youtube.com/watch?v=9B5bfAzqT6A -https://www.youtube.com/watch?v=Spa5Hstqe9c -https://www.youtube.com/watch?v=RX8-5eZZH-g -https://www.youtube.com/watch?v=FhNZRjWbV88 -https://www.youtube.com/watch?v=4iOXfE5mrOs -https://www.youtube.com/watch?v=v2ZKFLsJh44 -https://www.youtube.com/watch?v=Im7IqqmDmwE -https://www.youtube.com/watch?v=ldQM2aFCLAY -https://www.youtube.com/watch?v=wNqXAjJtZG0 -https://www.youtube.com/watch?v=etSP1c6XQF8 -https://www.youtube.com/watch?v=Kz7tHkNSXnU -https://www.youtube.com/watch?v=YurbIFCKqHo -https://www.youtube.com/watch?v=UspvXAlQ26o -https://www.youtube.com/watch?v=JYvEggbevBw -https://www.youtube.com/watch?v=TJpGFfXl4Rg -https://www.youtube.com/watch?v=T59N1dRRAtw -https://www.youtube.com/watch?v=sdW6jwBctac -https://www.youtube.com/watch?v=jTWH3Rb72rI -https://www.youtube.com/watch?v=OT5UFvRabSg -https://www.youtube.com/watch?v=N7uyjA7i7SI -https://www.youtube.com/watch?v=D2z8w4MNnOI -https://www.youtube.com/watch?v=PGaSqod7xJ8 -https://www.youtube.com/watch?v=W0RtLCRayZI -https://www.youtube.com/watch?v=9d_fnwE-P3g -https://www.youtube.com/watch?v=Y_nKb8qDnS4 -https://www.youtube.com/watch?v=mSqdcFxU-BE -https://www.youtube.com/watch?v=dh8voXLq30Q -https://www.youtube.com/watch?v=j34tBBPPO7k -https://www.youtube.com/watch?v=Z39BEKY6r2g -https://www.youtube.com/watch?v=tV48lHlCO_E -https://www.youtube.com/watch?v=9AdxIIwBwD8 -https://www.youtube.com/watch?v=sgYKkmO4Y9k -https://www.youtube.com/watch?v=g38X9lWrCvM -https://www.youtube.com/watch?v=qeZX32QWR5I -https://www.youtube.com/watch?v=t949upp1nAc -https://www.youtube.com/watch?v=FQG5G5yuhkI -https://www.youtube.com/watch?v=vqArE3Ddjp0 -https://www.youtube.com/watch?v=kWD8zGU-sHs -https://www.youtube.com/watch?v=UKpJ07-Fdco -https://www.youtube.com/watch?v=yCTY7L58M0c -https://www.youtube.com/watch?v=Eg9kwoacM_Q -https://www.youtube.com/watch?v=TYh4lXx57b4 -https://www.youtube.com/watch?v=KkQGpeabUmc -https://www.youtube.com/watch?v=alqtdG-lH20 -https://www.youtube.com/watch?v=LhUFVnK5IBU -https://www.youtube.com/watch?v=SKxio1UECBA -https://www.youtube.com/watch?v=OkZQdhgToL4 -https://www.youtube.com/watch?v=tSyp0B5KLIc -https://www.youtube.com/watch?v=X2l_P0KN4FE -https://www.youtube.com/watch?v=Rw6YwelbmPU -https://www.youtube.com/watch?v=QucIkabx540 -https://www.youtube.com/watch?v=R6a2CRSeq4I -https://www.youtube.com/watch?v=S_pCLiKX1jQ -https://www.youtube.com/watch?v=B_uOj6ZaGjU -https://www.youtube.com/watch?v=pMxjV14wEaQ -https://www.youtube.com/watch?v=hSCD7O2zZqs -https://www.youtube.com/watch?v=MT7JYGXHvsg -https://www.youtube.com/watch?v=Z7eUu3-vcXA -https://www.youtube.com/watch?v=Pk2cph6j3Qk -https://www.youtube.com/watch?v=hionali1f8Q -https://www.youtube.com/watch?v=RFkfq59Q6Jc -https://www.youtube.com/watch?v=ZIZVTRYCGWM -https://www.youtube.com/watch?v=QFMoiOAY6i4 -https://www.youtube.com/watch?v=6uRUgR7azzc -https://www.youtube.com/watch?v=76sAQCo9Ao0 -https://www.youtube.com/watch?v=vEjnuOJadpA -https://www.youtube.com/watch?v=T5qyH3fO_NQ -https://www.youtube.com/watch?v=HIHiwfUj8_Y -https://www.youtube.com/watch?v=oMnMBFHzO2A -https://www.youtube.com/watch?v=AsRDIaqn-b0 -https://www.youtube.com/watch?v=u1VSkXNGAM0 -https://www.youtube.com/watch?v=ys3q0YCAtWA -https://www.youtube.com/watch?v=zCfWIxAIo2A -https://www.youtube.com/watch?v=5hgHNqbMovk -https://www.youtube.com/watch?v=JQGubPbpwp0 -https://www.youtube.com/watch?v=np2aMq-duMA -https://www.youtube.com/watch?v=MsKvbsmxSEk -https://www.youtube.com/watch?v=xOOUaiwdY98 -https://www.youtube.com/watch?v=gxJsp3I9PvQ -https://www.youtube.com/watch?v=cpFV2AxmEeY -https://www.youtube.com/watch?v=42mSq6e5ns0 -https://www.youtube.com/watch?v=ZcMv4sje3Vw -https://www.youtube.com/watch?v=tNTzPVJoMKQ -https://www.youtube.com/watch?v=NQRZXINtEhs -https://www.youtube.com/watch?v=aTG1pNOSYro -https://www.youtube.com/watch?v=ZoTtMmX1oz8 -https://www.youtube.com/watch?v=ulUenhoi__M -https://www.youtube.com/watch?v=tT-3k4barTQ -https://www.youtube.com/watch?v=QtMoPqXYXDk -https://www.youtube.com/watch?v=HEftQMgt4rg -https://www.youtube.com/watch?v=b4r4XexQLrM -https://www.youtube.com/watch?v=1yFwBLcBFRQ -https://www.youtube.com/watch?v=WKHlvJ6x1LA -https://www.youtube.com/watch?v=xSOIjsEekWA -https://www.youtube.com/watch?v=62akyr7rzxI -https://www.youtube.com/watch?v=XIIJpCoCm4Q -https://www.youtube.com/watch?v=BbtWZWOoACk -https://www.youtube.com/watch?v=ijyJWAgGWG0 -https://www.youtube.com/watch?v=ACHMYMusnKo -https://www.youtube.com/watch?v=trsCBMyuyLI -https://www.youtube.com/watch?v=iSaF1n0cbuQ -https://www.youtube.com/watch?v=10j3GH59eL4 -https://www.youtube.com/watch?v=mIwhMVo7GJs -https://www.youtube.com/watch?v=1cB4HhI47Jg -https://www.youtube.com/watch?v=H94hOHN2rVw -https://www.youtube.com/watch?v=rGnMrpcgUjE -https://www.youtube.com/watch?v=Tg15phZM9MA -https://www.youtube.com/watch?v=ayPZx_IUoos -https://www.youtube.com/watch?v=hKq0g55QaM4 -https://www.youtube.com/watch?v=NrAW-afOBqM -https://www.youtube.com/watch?v=kQa5hKggcjw -https://www.youtube.com/watch?v=yEymLHO3CQ4 -https://www.youtube.com/watch?v=0NsTlre-PPA -https://www.youtube.com/watch?v=mELSxoR-zCU -https://www.youtube.com/watch?v=yEVXuFCUjXw -https://www.youtube.com/watch?v=hMFcuOFDCcg -https://www.youtube.com/watch?v=6MeAaGtWp9s -https://www.youtube.com/watch?v=jKvNdMDukcc -https://www.youtube.com/watch?v=8KS0CHgvI1A -https://www.youtube.com/watch?v=AKvX7ah_hOw -https://www.youtube.com/watch?v=GuLbIe2cF2w -https://www.youtube.com/watch?v=sY-zWrNqkOE -https://www.youtube.com/watch?v=3iC097rvS_o -https://www.youtube.com/watch?v=wGZtL-gaazM -https://www.youtube.com/watch?v=Er1FX6IMfqQ -https://www.youtube.com/watch?v=1jvJ3DMJAlg -https://www.youtube.com/watch?v=c85Fh_WDU3A -https://www.youtube.com/watch?v=wtiG7C87QX4 -https://www.youtube.com/watch?v=eACzH5r_Ma8 -https://www.youtube.com/watch?v=9ypHGzE6Di8 -https://www.youtube.com/watch?v=EvcvCsNF0-g -https://www.youtube.com/watch?v=zEp3EOVlRFE -https://www.youtube.com/watch?v=ZMVgFtRVSuQ -https://www.youtube.com/watch?v=YELUbjJS280 -https://www.youtube.com/watch?v=ViNvarsfuNQ -https://www.youtube.com/watch?v=bITtNQSvWfc -https://www.youtube.com/watch?v=bg0YLrDhXgQ -https://www.youtube.com/watch?v=TF2weikuHEo -https://www.youtube.com/watch?v=Lw55m9XNSaQ -https://www.youtube.com/watch?v=NNCTs-K7U38 -https://www.youtube.com/watch?v=lgLixSq9wS4 -https://www.youtube.com/watch?v=2ZMLHS7l5NU -https://www.youtube.com/watch?v=TVfRO7hn3bs -https://www.youtube.com/watch?v=wByimVu-hFs -https://www.youtube.com/watch?v=amk_ol9sb3M -https://www.youtube.com/watch?v=S6QvzexdgfY -https://www.youtube.com/watch?v=yFvRdCOe_Ss -https://www.youtube.com/watch?v=ghvU3NQvb-4 -https://www.youtube.com/watch?v=-R5HMIcBxNo -https://www.youtube.com/watch?v=xksdvTH0fA4 -https://www.youtube.com/watch?v=VwlX1fGTOio -https://www.youtube.com/watch?v=luTcIoSJooo -https://www.youtube.com/watch?v=zyQoz35pRN4 -https://www.youtube.com/watch?v=1ijOjx54_8k -https://www.youtube.com/watch?v=Q57suljQVtE -https://www.youtube.com/watch?v=f6Wpgyakg4Q -https://www.youtube.com/watch?v=SmZwenV25hI -https://www.youtube.com/watch?v=438S8whO7sM -https://www.youtube.com/watch?v=GJwGamIG_2o -https://www.youtube.com/watch?v=RBEjeQf3eUA -https://www.youtube.com/watch?v=0Bm0VrVWTVM -https://www.youtube.com/watch?v=ELO3tyx76R0 -https://www.youtube.com/watch?v=2RjdVnvMtZk -https://www.youtube.com/watch?v=6zyz3lHLlh0 -https://www.youtube.com/watch?v=gAjmL2hgjVA -https://www.youtube.com/watch?v=UxD-eeezwz8 -https://www.youtube.com/watch?v=FQ1Qj5tiAfU -https://www.youtube.com/watch?v=ApE2rPgtQ04 -https://www.youtube.com/watch?v=wp-d1_jUCS8 -https://www.youtube.com/watch?v=6Y22VyY5h1A -https://www.youtube.com/watch?v=CQgucWUHzS8 -https://www.youtube.com/watch?v=5_7mObtXbvc -https://www.youtube.com/watch?v=xMAkXgaG1LY -https://www.youtube.com/watch?v=haNnYV72rKw -https://www.youtube.com/watch?v=DELqUJkPXPg -https://www.youtube.com/watch?v=pX7s8Xw5YCc -https://www.youtube.com/watch?v=9vr04dqyX98 -https://www.youtube.com/watch?v=dYLPqsSQpcU -https://www.youtube.com/watch?v=ljQZiwqYqi8 -https://www.youtube.com/watch?v=JASbtU-NIYE -https://www.youtube.com/watch?v=DrTj3YdT_S8 -https://www.youtube.com/watch?v=3E_M5GC5me0 -https://www.youtube.com/watch?v=Wz74VDYVpO4 -https://www.youtube.com/watch?v=BseyjmC39x8 -https://www.youtube.com/watch?v=IHaqsz_LDBE -https://www.youtube.com/watch?v=X9y2thI8n-g -https://www.youtube.com/watch?v=Tvt1-4bA0ZU -https://www.youtube.com/watch?v=PX-Z916sh_Q -https://www.youtube.com/watch?v=k0pCbTE11jE -https://www.youtube.com/watch?v=Rut5u78laS8 -https://www.youtube.com/watch?v=6e9y8HP9-Qs -https://www.youtube.com/watch?v=lpfomwG5Jb0 -https://www.youtube.com/watch?v=hJVfabvgg7g -https://www.youtube.com/watch?v=ot5N-kaB86g -https://www.youtube.com/watch?v=I8W2eJ6lMv8 -https://www.youtube.com/watch?v=iK489f6qZhw -https://www.youtube.com/watch?v=7m7GdAaDUJk -https://www.youtube.com/watch?v=b9-QhEhWCMg -https://www.youtube.com/watch?v=1NPHHtZobWA -https://www.youtube.com/watch?v=md4EimJIMxU -https://www.youtube.com/watch?v=0cxc5iVzavo -https://www.youtube.com/watch?v=EShwg_-8gBQ -https://www.youtube.com/watch?v=jBNU3d1APAk -https://www.youtube.com/watch?v=X5s3aQD2vTg -https://www.youtube.com/watch?v=pg4PFn1a3w4 -https://www.youtube.com/watch?v=j25u9KdduTI -https://www.youtube.com/watch?v=1G2a1FOdBX4 -https://www.youtube.com/watch?v=cnAz3Tz6KM0 -https://www.youtube.com/watch?v=x4RzM8MpYQs -https://www.youtube.com/watch?v=1zan33JDOaU -https://www.youtube.com/watch?v=e2qGGGcaNbE -https://www.youtube.com/watch?v=KxKh-ey1anM -https://www.youtube.com/watch?v=sY33ut-4q5U -https://www.youtube.com/watch?v=Yhy_9B-W1Bc -https://www.youtube.com/watch?v=HVjLcXoWdr0 -https://www.youtube.com/watch?v=EDk4aURqdt0 -https://www.youtube.com/watch?v=f69aszkNSrA -https://www.youtube.com/watch?v=ArP-iJnxr3g -https://www.youtube.com/watch?v=cSMlgnD9Uf4 -https://www.youtube.com/watch?v=VvpHCEq3ETg -https://www.youtube.com/watch?v=Uh3-23diDuQ -https://www.youtube.com/watch?v=0h5fjDk4Fxk -https://www.youtube.com/watch?v=w729MixxpsY -https://www.youtube.com/watch?v=2JQv7_ijvHs -https://www.youtube.com/watch?v=lmBbDkhNtfU -https://www.youtube.com/watch?v=HdCoCj7DNlM -https://www.youtube.com/watch?v=LikXRzpPYEw -https://www.youtube.com/watch?v=tU9heYibc_4 -https://www.youtube.com/watch?v=PXbkzMxmd_Q -https://www.youtube.com/watch?v=4Wrgvzh9oF8 -https://www.youtube.com/watch?v=jBY7nHRdFuY -https://www.youtube.com/watch?v=WFxITA-cPn0 -https://www.youtube.com/watch?v=iZXtblrH3E0 -https://www.youtube.com/watch?v=21n1QM1E5Tg -https://www.youtube.com/watch?v=ore2ZKBFVIE -https://www.youtube.com/watch?v=DJPjz4TM-r8 -https://www.youtube.com/watch?v=Uz13MyjVlI0 -https://www.youtube.com/watch?v=M734Drp7DEk -https://www.youtube.com/watch?v=nG_dAIdROnA -https://www.youtube.com/watch?v=5S5i0RcG4JU -https://www.youtube.com/watch?v=nhP6k9XeJj8 -https://www.youtube.com/watch?v=T00fMHYd4xg -https://www.youtube.com/watch?v=5btdrrWIPj8 -https://www.youtube.com/watch?v=-0Dy7ZB5Tp0 -https://www.youtube.com/watch?v=cbRqV5Nczs0 -https://www.youtube.com/watch?v=wnWfEALwfrw -https://www.youtube.com/watch?v=hrMmDB735d8 -https://www.youtube.com/watch?v=-T5viJt_NeY -https://www.youtube.com/watch?v=NbifCHxb1kU -https://www.youtube.com/watch?v=xUJKjeIknXY -https://www.youtube.com/watch?v=rGUN31cV2Hg -https://www.youtube.com/watch?v=2HRQDPQkntU -https://www.youtube.com/watch?v=QuidE5bynlg -https://www.youtube.com/watch?v=OzTDb4Vslkg -https://www.youtube.com/watch?v=u-t0Dm4Jx6s -https://www.youtube.com/watch?v=QQn7WjVq35M -https://www.youtube.com/watch?v=UcsqZ0r43xQ -https://www.youtube.com/watch?v=hcIdB-l3c_U -https://www.youtube.com/watch?v=4KF3dnURKaU -https://www.youtube.com/watch?v=km_2AcH-76A -https://www.youtube.com/watch?v=GG1lxGKvFZo -https://www.youtube.com/watch?v=4gM4ngPf-Ug -https://www.youtube.com/watch?v=CQrUmMTaCss -https://www.youtube.com/watch?v=fgkJpaPp6Ho -https://www.youtube.com/watch?v=Sa1h5lFB2oo -https://www.youtube.com/watch?v=JPxdXpn8pU8 -https://www.youtube.com/watch?v=OiDz8w9nSC8 -https://www.youtube.com/watch?v=Ka9PvuYAdm0 -https://www.youtube.com/watch?v=_qr5dg7jIN0 -https://www.youtube.com/watch?v=_PYpZwH1Goo -https://www.youtube.com/watch?v=lk9E49BqUkQ -https://www.youtube.com/watch?v=rcreLiAZGL0 -https://www.youtube.com/watch?v=ieic-zav9Fk -https://www.youtube.com/watch?v=CwCpNXBF6js -https://www.youtube.com/watch?v=LlvC2Kj2pFo -https://www.youtube.com/watch?v=prErUmVPO3A -https://www.youtube.com/watch?v=StNr4Qo5QxI -https://www.youtube.com/watch?v=DwXPj__Wm5E -https://www.youtube.com/watch?v=z3tA2nrhIuk -https://www.youtube.com/watch?v=oKGM6yhysMw -https://www.youtube.com/watch?v=bXNjW_I_1Qo -https://www.youtube.com/watch?v=LPmoYOBnVR0 -https://www.youtube.com/watch?v=GyI9lJELbEk -https://www.youtube.com/watch?v=_C6t5mf4lbo -https://www.youtube.com/watch?v=OiGxqjZfbJc -https://www.youtube.com/watch?v=KCPW6EXa8-o -https://www.youtube.com/watch?v=W8yWWK0C8yg -https://www.youtube.com/watch?v=_obpK8FSh0M -https://www.youtube.com/watch?v=IChe3vDfRiI -https://www.youtube.com/watch?v=Bl_ph6AMXz8 -https://www.youtube.com/watch?v=xReRVPK97SE -https://www.youtube.com/watch?v=w3UZJ21mpS8 -https://www.youtube.com/watch?v=50ilwSLjqPQ -https://www.youtube.com/watch?v=k1gJ787wdR4 -https://www.youtube.com/watch?v=6onGbYzhrks -https://www.youtube.com/watch?v=JzdOP-4lSAM -https://www.youtube.com/watch?v=25ey_nTjFXM -https://www.youtube.com/watch?v=JHwghzfAkNM -https://www.youtube.com/watch?v=c2N-wMntv7o -https://www.youtube.com/watch?v=wvTkHp8yWQA -https://www.youtube.com/watch?v=ekx12gwN35o -https://www.youtube.com/watch?v=3Dmwk7vH5aE -https://www.youtube.com/watch?v=37oU7XZqHKQ -https://www.youtube.com/watch?v=p7cRbQwQLIU -https://www.youtube.com/watch?v=O5WZy3lgUwk -https://www.youtube.com/watch?v=xwQ7heyQDU0 -https://www.youtube.com/watch?v=Ioc2DzR36eg -https://www.youtube.com/watch?v=_Ws5CLQAZiY -https://www.youtube.com/watch?v=0eWGo4d061o -https://www.youtube.com/watch?v=w0gwOfOspWM -https://www.youtube.com/watch?v=kuBIKagXlD0 -https://www.youtube.com/watch?v=MxUVYfjSKUo -https://www.youtube.com/watch?v=Gm2rAs40jCU -https://www.youtube.com/watch?v=5_jbqEeWdqY -https://www.youtube.com/watch?v=_vCLPKftFsk -https://www.youtube.com/watch?v=sLzyP0g3Tz0 -https://www.youtube.com/watch?v=6_ALpSHwsyU -https://www.youtube.com/watch?v=iyChl-zsg8I -https://www.youtube.com/watch?v=n-tgxts1qTQ -https://www.youtube.com/watch?v=xwIoQ7aHASA -https://www.youtube.com/watch?v=kWDWFGZcPn0 -https://www.youtube.com/watch?v=Z3WZ8IYnx6o -https://www.youtube.com/watch?v=6grKBi186q0 -https://www.youtube.com/watch?v=yzmSXl428lo -https://www.youtube.com/watch?v=-7aDUxmsbho -https://www.youtube.com/watch?v=2E15vQEUh3Y -https://www.youtube.com/watch?v=-gTJpdOSKdE -https://www.youtube.com/watch?v=1BgHKfpGqxQ -https://www.youtube.com/watch?v=CGG8-6y82Dc -https://www.youtube.com/watch?v=OYWFQagB5SA -https://www.youtube.com/watch?v=v2ejcHqbB9M -https://www.youtube.com/watch?v=Ui8eW_bQI-c -https://www.youtube.com/watch?v=Df43IR-Y2pg -https://www.youtube.com/watch?v=8SxFtbjJLIY -https://www.youtube.com/watch?v=_CzzsdbwVvU -https://www.youtube.com/watch?v=1zQ_pVzAWmQ -https://www.youtube.com/watch?v=Zx7l88BnZq4 -https://www.youtube.com/watch?v=8D7cgnCRg9M -https://www.youtube.com/watch?v=1mmOlk_6KiY -https://www.youtube.com/watch?v=f9yuepxqoI8 -https://www.youtube.com/watch?v=WzwdRsbyrsE -https://www.youtube.com/watch?v=O0PK3YdZ6Gs -https://www.youtube.com/watch?v=y5sBj8J1zXE -https://www.youtube.com/watch?v=Erg0E8MkI8g -https://www.youtube.com/watch?v=V2XpuzU9qKc -https://www.youtube.com/watch?v=iO6Td9WZl8Q -https://www.youtube.com/watch?v=Ve6hXGghGDU -https://www.youtube.com/watch?v=XQkWto6o-zE -https://www.youtube.com/watch?v=jU7HHOjHIHI -https://www.youtube.com/watch?v=y8U0G8Yp5As -https://www.youtube.com/watch?v=hK3dOOeJYeQ -https://www.youtube.com/watch?v=3XwUV_IIeAY -https://www.youtube.com/watch?v=-H7oKDmsJBA -https://www.youtube.com/watch?v=ax2sbkIwsbs -https://www.youtube.com/watch?v=emBuYmGX9pA -https://www.youtube.com/watch?v=bYnSsdt-EwQ -https://www.youtube.com/watch?v=xWH-3s6ae6g -https://www.youtube.com/watch?v=bGwSw06nHlI -https://www.youtube.com/watch?v=XzfATljHtZA -https://www.youtube.com/watch?v=vHEpEVrULj8 -https://www.youtube.com/watch?v=Vw9VQqB9nZY -https://www.youtube.com/watch?v=Zo8DB-WiT8o -https://www.youtube.com/watch?v=118Qmc4PW94 -https://www.youtube.com/watch?v=mqjvTCW28wA -https://www.youtube.com/watch?v=COOUsA8sgzw -https://www.youtube.com/watch?v=nJjZWvoIXno -https://www.youtube.com/watch?v=Un-iP21XLcY -https://www.youtube.com/watch?v=ndqEvSH28sE -https://www.youtube.com/watch?v=ROTsUIJFFAI -https://www.youtube.com/watch?v=sjtU9ZJ1kl4 -https://www.youtube.com/watch?v=-l6aB5-5IGo -https://www.youtube.com/watch?v=B2zGJdhw7Qk -https://www.youtube.com/watch?v=Hq-agpSNVvk -https://www.youtube.com/watch?v=9Kmah0OdmfQ -https://www.youtube.com/watch?v=G3pd86ahuIk -https://www.youtube.com/watch?v=Z4zFfpUWFjc -https://www.youtube.com/watch?v=uCXKbn0_LYU -https://www.youtube.com/watch?v=Mm4CsHQ7jEY -https://www.youtube.com/watch?v=gzDj9vQhM3U -https://www.youtube.com/watch?v=nYBMPcWh6io -https://www.youtube.com/watch?v=sTIi-NkM_4o -https://www.youtube.com/watch?v=Mid_00T8OzY -https://www.youtube.com/watch?v=O74uCOmq-5w -https://www.youtube.com/watch?v=5czh6hr5ZCU -https://www.youtube.com/watch?v=Seu6J2umwKg -https://www.youtube.com/watch?v=aYtahgqJzGM -https://www.youtube.com/watch?v=hOGbqy4YQbc -https://www.youtube.com/watch?v=5hBzDL7E9yk -https://www.youtube.com/watch?v=cOY9GMP_fyA -https://www.youtube.com/watch?v=S7pF2ggDLM4 -https://www.youtube.com/watch?v=eTDWGdCwEsU -https://www.youtube.com/watch?v=9CyB887wkHs -https://www.youtube.com/watch?v=PnKwZK96xOQ -https://www.youtube.com/watch?v=1bnsQ6waGm8 -https://www.youtube.com/watch?v=ezJJzX7F57E -https://www.youtube.com/watch?v=abgJBju9UhE -https://www.youtube.com/watch?v=Gb_CTWcoscs -https://www.youtube.com/watch?v=XK70iPIeAnY -https://www.youtube.com/watch?v=bpROcbyQOdc -https://www.youtube.com/watch?v=Pvyi1ldhlVQ -https://www.youtube.com/watch?v=a8ABcZWicB0 -https://www.youtube.com/watch?v=STXjofgjHMo -https://www.youtube.com/watch?v=yKUAqlCUFx4 -https://www.youtube.com/watch?v=aE3IkzploxM -https://www.youtube.com/watch?v=0BNLKVNMeuo -https://www.youtube.com/watch?v=HcoKqF60cO8 -https://www.youtube.com/watch?v=kj7UFFcXVlQ -https://www.youtube.com/watch?v=E30v2guQPJg -https://www.youtube.com/watch?v=_L01csO4Nek -https://www.youtube.com/watch?v=p3uH5LA0IdM -https://www.youtube.com/watch?v=cEnSOZhGdfo -https://www.youtube.com/watch?v=P97y46gLFOk -https://www.youtube.com/watch?v=iayw8uf6DM8 -https://www.youtube.com/watch?v=hrUZrC6OplE -https://www.youtube.com/watch?v=F36GPTHUyXU -https://www.youtube.com/watch?v=QPc0595_s28 -https://www.youtube.com/watch?v=kl9hYdYQy0g -https://www.youtube.com/watch?v=_BucBOfiyLg -https://www.youtube.com/watch?v=PWwNGu-AaqA -https://www.youtube.com/watch?v=HQhgyMKvBSY -https://www.youtube.com/watch?v=VNCQU3dRY64 -https://www.youtube.com/watch?v=bG4ByNMW5ds -https://www.youtube.com/watch?v=L8tcS2e3rfk -https://www.youtube.com/watch?v=g39DLsLdXTE -https://www.youtube.com/watch?v=4d4aeRHTdTo -https://www.youtube.com/watch?v=RgdhZCq19ZQ -https://www.youtube.com/watch?v=I9NHi6EmkTk -https://www.youtube.com/watch?v=6CcpmLOWyMk -https://www.youtube.com/watch?v=IB7E41G2CtI -https://www.youtube.com/watch?v=6O-Aef1Gn4c -https://www.youtube.com/watch?v=uxfL1LRpbW4 -https://www.youtube.com/watch?v=FCpcyBHurFw -https://www.youtube.com/watch?v=CygnT11F_ZI -https://www.youtube.com/watch?v=cHVGidgy71o -https://www.youtube.com/watch?v=bGtQwp5ixTA -https://www.youtube.com/watch?v=voeKD_enQTo -https://www.youtube.com/watch?v=G05pwijaK3A -https://www.youtube.com/watch?v=B44UzfY3Xsc -https://www.youtube.com/watch?v=p8E-1M3hZSo -https://www.youtube.com/watch?v=pxWm1Pfzy1w -https://www.youtube.com/watch?v=PWN2vaiAdrE -https://www.youtube.com/watch?v=TRapAZa9UgQ -https://www.youtube.com/watch?v=JaopWs4FnpU -https://www.youtube.com/watch?v=Bm1xne0yK08 -https://www.youtube.com/watch?v=UrbDu6--eyY -https://www.youtube.com/watch?v=0uxmUb8_bYQ -https://www.youtube.com/watch?v=xEPT6rceiaE -https://www.youtube.com/watch?v=OcWYreMBLHE -https://www.youtube.com/watch?v=74Hh1nJJRXs -https://www.youtube.com/watch?v=kxH_bu7oWEQ -https://www.youtube.com/watch?v=xLZQHETWRCM -https://www.youtube.com/watch?v=QLNurv0bgDs -https://www.youtube.com/watch?v=Xa_JupHC-BY -https://www.youtube.com/watch?v=5AdEUJ_bA-w -https://www.youtube.com/watch?v=CumOhS7DldM -https://www.youtube.com/watch?v=w4rRYaRB5T0 -https://www.youtube.com/watch?v=FjvPtUvZZRE -https://www.youtube.com/watch?v=iII9kszf9-E -https://www.youtube.com/watch?v=7hyi0F6EEGg -https://www.youtube.com/watch?v=ilQMJ-kwMAU -https://www.youtube.com/watch?v=SJOP-f5pvlY -https://www.youtube.com/watch?v=LhBc8ElIf_Y -https://www.youtube.com/watch?v=w_koXhVD1uc -https://www.youtube.com/watch?v=HCoR_bmy-lA -https://www.youtube.com/watch?v=vKPezUzvTjg -https://www.youtube.com/watch?v=Gy3teXRZc3M -https://www.youtube.com/watch?v=4FqSKCGo_yQ -https://www.youtube.com/watch?v=hoY7RZV1W4Y -https://www.youtube.com/watch?v=vKyKEcq44uw -https://www.youtube.com/watch?v=g0LBa1CxhDc -https://www.youtube.com/watch?v=4Je1Q5SioSQ -https://www.youtube.com/watch?v=141sUnI0Z8o -https://www.youtube.com/watch?v=RssoSCiTvcQ -https://www.youtube.com/watch?v=gNmiJWksHV0 -https://www.youtube.com/watch?v=-sht8xDsM_4 -https://www.youtube.com/watch?v=q9npqzOrkXQ -https://www.youtube.com/watch?v=0e1tKLUdWc0 -https://www.youtube.com/watch?v=FWa3LA5diUA -https://www.youtube.com/watch?v=iANrNIqlfEU -https://www.youtube.com/watch?v=HoQgL7JmzYE -https://www.youtube.com/watch?v=m4xFa51_qTw -https://www.youtube.com/watch?v=JEtPhrfEZTc -https://www.youtube.com/watch?v=GV6PNirTB5Q -https://www.youtube.com/watch?v=ymOwuN4Icbw -https://www.youtube.com/watch?v=z-PZegRllLE -https://www.youtube.com/watch?v=_FmUv2AX36Q -https://www.youtube.com/watch?v=NytoPxQXGFk -https://www.youtube.com/watch?v=yX05hYFyiBY -https://www.youtube.com/watch?v=JcdVC9wJX9g -https://www.youtube.com/watch?v=Zm1o6Uvqt6w -https://www.youtube.com/watch?v=1_3XNdOEHB4 -https://www.youtube.com/watch?v=iERK93l5S4U -https://www.youtube.com/watch?v=bzQGeM-Adpk -https://www.youtube.com/watch?v=hEPeQWP9gq8 -https://www.youtube.com/watch?v=kdxXsrO-_2o -https://www.youtube.com/watch?v=O0kH3wOwfyM -https://www.youtube.com/watch?v=7MT3LwhuYjo -https://www.youtube.com/watch?v=3rrT4hwjRiE -https://www.youtube.com/watch?v=h1Zx3W0OtAk -https://www.youtube.com/watch?v=Nor0KPVPImQ -https://www.youtube.com/watch?v=_eWaSkmUP-Q -https://www.youtube.com/watch?v=RYF5UYJC6Mk -https://www.youtube.com/watch?v=OUgecZ8n-dk -https://www.youtube.com/watch?v=PD-okljVoz8 -https://www.youtube.com/watch?v=t1lDjE6zGR8 -https://www.youtube.com/watch?v=QRcFSOOIR04 -https://www.youtube.com/watch?v=W3E5IOtsz68 -https://www.youtube.com/watch?v=qZlW5AeLKaA -https://www.youtube.com/watch?v=A8wb1LtJzbA -https://www.youtube.com/watch?v=-83bTbd6Vb4 -https://www.youtube.com/watch?v=gaZ6nCFyT-E -https://www.youtube.com/watch?v=JCmlbmA9seQ -https://www.youtube.com/watch?v=66zl23CVyBE -https://www.youtube.com/watch?v=enjb-u5GS0E -https://www.youtube.com/watch?v=zQ7DZ_ZKLQ0 -https://www.youtube.com/watch?v=cltTMwuk-Y0 -https://www.youtube.com/watch?v=od41NVjN6Uc -https://www.youtube.com/watch?v=6XVgwxF8bU8 -https://www.youtube.com/watch?v=KIAv3ZQ1gks -https://www.youtube.com/watch?v=4yzSoM9ZJoc -https://www.youtube.com/watch?v=mKM_Hve4PSM -https://www.youtube.com/watch?v=EabCNrEsgXY -https://www.youtube.com/watch?v=ObwEO_jK9_g -https://www.youtube.com/watch?v=Y19eeNT0hNQ -https://www.youtube.com/watch?v=OBvGnwg4WeY -https://www.youtube.com/watch?v=blwClcLpTZA -https://www.youtube.com/watch?v=e5FHL3Gl56k -https://www.youtube.com/watch?v=om0sYjy7eT8 -https://www.youtube.com/watch?v=GG1uIMTZA30 -https://www.youtube.com/watch?v=bW5lxEe9Qqg -https://www.youtube.com/watch?v=cbwxE8eZBhw -https://www.youtube.com/watch?v=kMzNX0j4NO8 -https://www.youtube.com/watch?v=q5XBM4-OVJg -https://www.youtube.com/watch?v=Ip67Pcwm-8c -https://www.youtube.com/watch?v=ietAfH6O3IE -https://www.youtube.com/watch?v=0CBUwyVhssQ -https://www.youtube.com/watch?v=WocIrnJHCks -https://www.youtube.com/watch?v=Vu2sP1rBSuQ -https://www.youtube.com/watch?v=ypxZjDVLPd8 -https://www.youtube.com/watch?v=v1O8jVvuDZE -https://www.youtube.com/watch?v=0gVYLElHyuU -https://www.youtube.com/watch?v=efx7G2vGiWc -https://www.youtube.com/watch?v=i907BKtWzKM -https://www.youtube.com/watch?v=Bru4dK_5cJk -https://www.youtube.com/watch?v=JSNrp0VfFxM -https://www.youtube.com/watch?v=cKvtjYtK_Zs -https://www.youtube.com/watch?v=xkWrdzoqOHc -https://www.youtube.com/watch?v=gU8Q4qhtWT8 -https://www.youtube.com/watch?v=uFOs71VYSXQ -https://www.youtube.com/watch?v=9kb7VxErS64 -https://www.youtube.com/watch?v=jzmHWb1uKFY -https://www.youtube.com/watch?v=g6ZEep0YEe8 -https://www.youtube.com/watch?v=ARKY-HjGuio -https://www.youtube.com/watch?v=0Yr6CwKNorQ -https://www.youtube.com/watch?v=B0tA10cAXZw -https://www.youtube.com/watch?v=WsGUy4_LiEU -https://www.youtube.com/watch?v=-ndIAbqBYgk -https://www.youtube.com/watch?v=QbKhTZKEarU -https://www.youtube.com/watch?v=HOuePkn2qeI -https://www.youtube.com/watch?v=rDajxgZA5NU -https://www.youtube.com/watch?v=_0aMs9sWoeQ -https://www.youtube.com/watch?v=fPHFbYqK7kc -https://www.youtube.com/watch?v=2_wU4yl4LAY -https://www.youtube.com/watch?v=4xuBM4deE6g -https://www.youtube.com/watch?v=LmYgj8A2Tdk -https://www.youtube.com/watch?v=1WzMTIcIsJo -https://www.youtube.com/watch?v=3qCYB7CVewo -https://www.youtube.com/watch?v=GaflSBOX4Bw -https://www.youtube.com/watch?v=WzzoHmeVfEg -https://www.youtube.com/watch?v=ggY8ruNAJaM -https://www.youtube.com/watch?v=8JpXJQN3Q4U -https://www.youtube.com/watch?v=zahEfgHltAQ -https://www.youtube.com/watch?v=PidO06KgJgM -https://www.youtube.com/watch?v=ahkrZU7dbwg -https://www.youtube.com/watch?v=_6Uf0Wef5lk -https://www.youtube.com/watch?v=NtakSrWF8qw -https://www.youtube.com/watch?v=KPB550JDKvs -https://www.youtube.com/watch?v=a9b8E1cFAcg -https://www.youtube.com/watch?v=lVlxJHf9Uz0 -https://www.youtube.com/watch?v=3wB7hOa6pTI -https://www.youtube.com/watch?v=4rT8sKSoHYg -https://www.youtube.com/watch?v=315qa9cu8NM -https://www.youtube.com/watch?v=eXInviPnnZA -https://www.youtube.com/watch?v=hiis-ugAa6Y -https://www.youtube.com/watch?v=vZy7LUVqK4I -https://www.youtube.com/watch?v=_Y8mWMbYcKk -https://www.youtube.com/watch?v=bIZrrGvkcaE -https://www.youtube.com/watch?v=kvgZc-q7K08 -https://www.youtube.com/watch?v=4uiqIjIzJqg -https://www.youtube.com/watch?v=g2P3KlzZ_bc -https://www.youtube.com/watch?v=i6c47ojZ1gU -https://www.youtube.com/watch?v=J-WVFLD5z4E -https://www.youtube.com/watch?v=ViKKvVgHQro -https://www.youtube.com/watch?v=n-QbtsYb3ao -https://www.youtube.com/watch?v=4buYFi4RHXM -https://www.youtube.com/watch?v=h_-2AKl9mEU -https://www.youtube.com/watch?v=RKirt5pR5s4 -https://www.youtube.com/watch?v=vQQDv11dLZo -https://www.youtube.com/watch?v=pw-v-Af0Zt4 -https://www.youtube.com/watch?v=yyAUCuj_rfU -https://www.youtube.com/watch?v=5FSqDMjwFLQ -https://www.youtube.com/watch?v=J3m8gQKj9ls -https://www.youtube.com/watch?v=FiYgnqVWeD0 -https://www.youtube.com/watch?v=35M5N22WEAw -https://www.youtube.com/watch?v=BrKHrNWU7fw -https://www.youtube.com/watch?v=CEVUU_s8Pk8 -https://www.youtube.com/watch?v=p2l0QYUNsLw -https://www.youtube.com/watch?v=hGbyVDtuKlU -https://www.youtube.com/watch?v=5iPFMlxPSTc -https://www.youtube.com/watch?v=GVlVCSWeJL4 -https://www.youtube.com/watch?v=WdVYt8PL0Po -https://www.youtube.com/watch?v=p2P_A9TqH78 -https://www.youtube.com/watch?v=LQ-Kt4arMu8 -https://www.youtube.com/watch?v=0ygsfbMtrZA -https://www.youtube.com/watch?v=5hQEI4jGEao -https://www.youtube.com/watch?v=0HG6LF7hsPk -https://www.youtube.com/watch?v=JqR_R4EvsNs -https://www.youtube.com/watch?v=YTS9r3aQ2Rw -https://www.youtube.com/watch?v=UDfjPR3XxGI -https://www.youtube.com/watch?v=eJhUqThVeTU -https://www.youtube.com/watch?v=eSFea0F4pM4 -https://www.youtube.com/watch?v=Iumi423BMsY -https://www.youtube.com/watch?v=ix_lu8DoqVM -https://www.youtube.com/watch?v=YQMsHKAftA8 -https://www.youtube.com/watch?v=YvdY-vk_qXo -https://www.youtube.com/watch?v=v5S1NeyK4zo -https://www.youtube.com/watch?v=T6eYlq568q8 -https://www.youtube.com/watch?v=LHncOi0gLVo -https://www.youtube.com/watch?v=JzNhRNPs-zs -https://www.youtube.com/watch?v=fh1cW1xt6Hs -https://www.youtube.com/watch?v=96pUYEFNtbw -https://www.youtube.com/watch?v=3iav7GPapME -https://www.youtube.com/watch?v=ifeg0nKnYAc -https://www.youtube.com/watch?v=TSOGNwPdJC0 -https://www.youtube.com/watch?v=Vdb-LAifVgw -https://www.youtube.com/watch?v=N8MzHelGeZA -https://www.youtube.com/watch?v=8lDLi6KL3NE -https://www.youtube.com/watch?v=AzesbB6B-Pw -https://www.youtube.com/watch?v=rCsByru8q80 -https://www.youtube.com/watch?v=KFCIJ6HAefo -https://www.youtube.com/watch?v=gZm2T-9vQgA -https://www.youtube.com/watch?v=urdmi5PF9tk -https://www.youtube.com/watch?v=uwdnq5BHelM -https://www.youtube.com/watch?v=Npojrebk8K4 -https://www.youtube.com/watch?v=hF8nxFrWEBM -https://www.youtube.com/watch?v=8ZP8Tmej3qI -https://www.youtube.com/watch?v=5LT6yyfblKU -https://www.youtube.com/watch?v=rwQdepyLhIc -https://www.youtube.com/watch?v=dJf0WhYGC-A -https://www.youtube.com/watch?v=LEADj_2kk5M -https://www.youtube.com/watch?v=rqklugKUZf0 -https://www.youtube.com/watch?v=toYKzDsIlM0 -https://www.youtube.com/watch?v=u8DoguzNFlE -https://www.youtube.com/watch?v=6A14Z3bVKxk -https://www.youtube.com/watch?v=Hx6V61dT9-c -https://www.youtube.com/watch?v=WbkkdqZtHhw -https://www.youtube.com/watch?v=FTTp__BIL6c -https://www.youtube.com/watch?v=2NIPuIbmeBg -https://www.youtube.com/watch?v=63Q5WLMF0lU -https://www.youtube.com/watch?v=KioksPhg_78 -https://www.youtube.com/watch?v=zMJ27qdfGAI -https://www.youtube.com/watch?v=q9biOLIba28 -https://www.youtube.com/watch?v=7RPWlQI1HFY -https://www.youtube.com/watch?v=Z2fPu9NX3DE -https://www.youtube.com/watch?v=_H3X_k7z7Iw -https://www.youtube.com/watch?v=0YPBfRGXejQ -https://www.youtube.com/watch?v=xUVwAIJu40g -https://www.youtube.com/watch?v=50GV67qKcSM -https://www.youtube.com/watch?v=DuVy41rot60 -https://www.youtube.com/watch?v=4GXF7gItY6g -https://www.youtube.com/watch?v=mNuPUraHMMM -https://www.youtube.com/watch?v=OE5R-dxhktY -https://www.youtube.com/watch?v=wO52Mj2dOvk -https://www.youtube.com/watch?v=T10wxcDWchE -https://www.youtube.com/watch?v=qh9FHZ1-ihg -https://www.youtube.com/watch?v=n5R7te2NUDo -https://www.youtube.com/watch?v=xMK-jrM9KKM -https://www.youtube.com/watch?v=iQsw6tnDWUw -https://www.youtube.com/watch?v=QzGwqKSJp1Q -https://www.youtube.com/watch?v=2aTk7l-SBjc -https://www.youtube.com/watch?v=pF3fZ9Uv-hQ -https://www.youtube.com/watch?v=XZeKA5WRGkY -https://www.youtube.com/watch?v=kLsZ4t2a6Ts -https://www.youtube.com/watch?v=Zw8SxU02ZIk -https://www.youtube.com/watch?v=OplKfLEIq6w -https://www.youtube.com/watch?v=cXyk40-MWZA -https://www.youtube.com/watch?v=wlMw3dhuicc -https://www.youtube.com/watch?v=wFyVmtInX04 diff --git a/airflow/inputfiles/urls.sky28.json b/airflow/inputfiles/urls.sky28.json deleted file mode 100644 index 84e4259..0000000 --- a/airflow/inputfiles/urls.sky28.json +++ /dev/null @@ -1,30 +0,0 @@ -[ -"https://www.youtube.com/watch?v=lKrVuufVMXA", -"https://www.youtube.com/watch?v=ISqDcqGdow0", -"https://www.youtube.com/watch?v=srG-WnQdZq8", -"https://www.youtube.com/watch?v=HP-KB6XFqgs", -"https://www.youtube.com/watch?v=1e13SIh51wk", -"https://www.youtube.com/watch?v=VTKG48FjSxs", -"https://www.youtube.com/watch?v=onEWAyPRm6E", -"https://www.youtube.com/watch?v=7RdrGwpZzMo", -"https://www.youtube.com/watch?v=M5uu93_AhXg", -"https://www.youtube.com/watch?v=xnkvCBfTfok", -"https://www.youtube.com/watch?v=oE9hGZyFN8E", -"https://www.youtube.com/watch?v=7LofBMRP6U4", -"https://www.youtube.com/watch?v=EDE8tyroJEE", -"https://www.youtube.com/watch?v=oLwsWGi0sUc", -"https://www.youtube.com/watch?v=a6dvhHPyFIw", -"https://www.youtube.com/watch?v=4jds773UlWE", -"https://www.youtube.com/watch?v=B6dXxqiSBSM", -"https://www.youtube.com/watch?v=9EbS6w3RSG0", -"https://www.youtube.com/watch?v=LyKONGzUANU", -"https://www.youtube.com/watch?v=sGW5kfpR6Wo", -"https://www.youtube.com/watch?v=pa4-JninkUQ", -"https://www.youtube.com/watch?v=DxXMFBWarjY", -"https://www.youtube.com/watch?v=PYQjfpCEWvc", -"https://www.youtube.com/watch?v=_jlNCjI9jiQ", -"https://www.youtube.com/watch?v=BxEC11QS3sQ", -"https://www.youtube.com/watch?v=6-qbWRzVbGA", -"https://www.youtube.com/watch?v=p3lCQvZBv_k", -"https://www.youtube.com/watch?v=67YA1CHpGrM" -] \ No newline at end of file diff --git a/airflow/inputfiles/urls.sky3.json b/airflow/inputfiles/urls.sky3.json deleted file mode 100644 index a4f7e34..0000000 --- a/airflow/inputfiles/urls.sky3.json +++ /dev/null @@ -1,5 +0,0 @@ -[ -"https://www.youtube.com/watch?v=uxiLE2Kv7wc", -"https://www.youtube.com/watch?v=Q7R0epGFnRI", -"https://www.youtube.com/watch?v=4mEmsJXKroE" -] \ No newline at end of file diff --git a/airflow/inputfiles/urls.tq46.json b/airflow/inputfiles/urls.tq46.json deleted file mode 100644 index 16d099d..0000000 --- a/airflow/inputfiles/urls.tq46.json +++ /dev/null @@ -1,48 +0,0 @@ -[ -"https://www.youtube.com/watch?v=l700b4BpFAA", -"https://www.youtube.com/watch?v=G_JAVwwWyUM", -"https://www.youtube.com/watch?v=2LGz9nUw-XI", -"https://www.youtube.com/watch?v=7dK6a8LWAWw", -"https://www.youtube.com/watch?v=lKSZnZggcto", -"https://www.youtube.com/watch?v=Zy0ZFAMqm7U", -"https://www.youtube.com/watch?v=7UunWMHBrEE", -"https://www.youtube.com/watch?v=LPdbLCX3N-4", -"https://www.youtube.com/watch?v=-lJ5DVbkVw4", -"https://www.youtube.com/watch?v=QrRRS0RzELs", -"https://www.youtube.com/watch?v=XSty74mE1iE", -"https://www.youtube.com/watch?v=orijdeDOk5g", -"https://www.youtube.com/watch?v=27YVRo9VUE8", -"https://www.youtube.com/watch?v=p-JNgLI_8nA", -"https://www.youtube.com/watch?v=gkekjIJB_Nw", -"https://www.youtube.com/watch?v=V8QFCgOfkgw", -"https://www.youtube.com/watch?v=_GVVEsxZ_Mo", -"https://www.youtube.com/watch?v=7_zMqxK4gZE", -"https://www.youtube.com/watch?v=cwuJCb316yQ", -"https://www.youtube.com/watch?v=TIGxtvVVHak", -"https://www.youtube.com/watch?v=KhcicW2keWY", -"https://www.youtube.com/watch?v=miUJ85pFCPE", -"https://www.youtube.com/watch?v=97L4qVfSwv4", -"https://www.youtube.com/watch?v=Wk38hWQfz24", -"https://www.youtube.com/watch?v=iIU-NVWkTDE", -"https://www.youtube.com/watch?v=l89VaRof8ug", -"https://www.youtube.com/watch?v=IIkjS5MpQVM", -"https://www.youtube.com/watch?v=9XxPGKkOs0o", -"https://www.youtube.com/watch?v=_dlpve9GPZM", -"https://www.youtube.com/watch?v=He_3MjAuZNQ", -"https://www.youtube.com/watch?v=FnPEHn2NHT4", -"https://www.youtube.com/watch?v=HuSjI7HFkzo", -"https://www.youtube.com/watch?v=pBZSgVJHacs", -"https://www.youtube.com/watch?v=OgsG082zDGo", -"https://www.youtube.com/watch?v=_4sxhmPsryY", -"https://www.youtube.com/watch?v=kqU6B5rIEnI", -"https://www.youtube.com/watch?v=BEYn_ILHmBE", -"https://www.youtube.com/watch?v=qy9Zr3HV9V4", -"https://www.youtube.com/watch?v=7I1VvJZbG-M", -"https://www.youtube.com/watch?v=WOa-HA3MoVQ", -"https://www.youtube.com/watch?v=uaHI-WHwivc", -"https://www.youtube.com/watch?v=9ku8r8uZ9EQ", -"https://www.youtube.com/watch?v=XAyaDcLxwHQ", -"https://www.youtube.com/watch?v=zpc-hJGSNBc", -"https://www.youtube.com/watch?v=AGbG62y1DyE", -"https://www.youtube.com/watch?v=7rmyabL60oA" -] \ No newline at end of file diff --git a/airflow/plugins/__init__.py b/airflow/plugins/__init__.py deleted file mode 100644 index 8643640..0000000 --- a/airflow/plugins/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -""" -Airflow plugins initialization. -""" -import os -import logging - -# Set the custom secrets masker -os.environ['AIRFLOW__LOGGING__SECRETS_MASKER_CLASS'] = 'custom_secrets_masker.CustomSecretsMasker' - -# Apply Thrift patches -try: - from patch_thrift_exceptions import patch_thrift_exceptions - patch_thrift_exceptions() -except Exception as e: - logging.error(f"Error applying Thrift exception patches: {e}") - -logger = logging.getLogger(__name__) -logger.info("Airflow custom configuration applied") diff --git a/airflow/plugins/ytdlp_plugin.py b/airflow/plugins/ytdlp_plugin.py deleted file mode 100644 index 6929713..0000000 --- a/airflow/plugins/ytdlp_plugin.py +++ /dev/null @@ -1,56 +0,0 @@ -from airflow.plugins_manager import AirflowPlugin -from airflow.hooks.base import BaseHook -from airflow.configuration import conf -import uuid -import backoff - -class YTDLPHook(BaseHook): - def __init__(self, conn_id='ytdlp_default'): - super().__init__() - self.conn_id = conn_id - self.connection = self.get_connection(conn_id) - self.timeout = conf.getint('ytdlp', 'timeout', fallback=120) - self.max_retries = conf.getint('ytdlp', 'max_retries', fallback=3) - - @backoff.on_exception(backoff.expo, - Exception, - max_tries=3, - max_time=300) - def start_service(self, host, port, service_id, work_dir): - """Start token service as a long-running process""" - import subprocess - import os - from pathlib import Path - - # Get script path relative to Airflow home - airflow_home = os.getenv('AIRFLOW_HOME', '') - script_path = Path(airflow_home).parent / 'ytdlp_ops_server.py' - - # Ensure work directory exists - os.makedirs(work_dir, exist_ok=True) - - # Start service process - cmd = [ - 'python', str(script_path), - '--port', str(port), - '--host', host, - '--service-id', service_id, - '--context-dir', work_dir, - '--script-dir', str(Path(airflow_home) / 'dags' / 'scripts') - ] - - self.log.info(f"Starting token service: {' '.join(cmd)}") - - # Start process detached - docker_cmd = [ - 'docker-compose', '-f', 'docker-compose.yaml', - 'up', '-d', '--build', 'ytdlp-service' - ] - subprocess.run(docker_cmd, check=True) - - self.log.info(f"Token service started on {host}:{port}") - return True - -class YTDLPPlugin(AirflowPlugin): - name = 'ytdlp_plugin' - hooks = [YTDLPHook] diff --git a/airflow/roles/airflow-master/tasks/main.yml b/airflow/roles/airflow-master/tasks/main.yml deleted file mode 100644 index 479c8d7..0000000 --- a/airflow/roles/airflow-master/tasks/main.yml +++ /dev/null @@ -1,331 +0,0 @@ ---- -- name: Check if Airflow master deployment directory exists - stat: - path: "{{ airflow_master_dir }}" - register: master_dir_stat - -- name: Ensure Airflow master deployment directory exists - file: - path: "{{ airflow_master_dir }}" - state: directory - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - mode: '0755' - become: yes - when: not master_dir_stat.stat.exists - -- name: Ensure Airflow master configs directory exists - file: - path: "{{ airflow_master_dir }}/configs" - state: directory - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - mode: '0755' - become: yes - -- name: Ensure Airflow master config directory exists - file: - path: "{{ airflow_master_dir }}/config" - state: directory - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - mode: '0755' - become: yes - -- name: Ensure Airflow operational directories exist with correct permissions - file: - path: "{{ airflow_master_dir }}/{{ item }}" - state: directory - owner: "{{ airflow_uid }}" - group: "{{ deploy_group }}" - mode: '0775' - become: yes - loop: - - "dags" - - "logs" - - "plugins" - - "downloadfiles" - - "addfiles" - - "inputfiles" - -- name: Check if source directories exist - stat: - path: "../{{ item }}" - register: source_dirs - loop: - - "airflow/plugins" - - "airflow/addfiles" - - "airflow/bgutil-ytdlp-pot-provider" - -- name: "Log: Syncing Airflow core files" - debug: - msg: "Syncing DAGs, configs, and Python source code to the master node." - -- name: Sync Airflow master files - synchronize: - src: "../{{ item }}" - dest: "{{ airflow_master_dir }}/" - archive: yes - recursive: yes - delete: yes - rsync_path: "sudo rsync" - rsync_opts: "{{ rsync_default_opts }}" - loop: - - "airflow/Dockerfile" - - "airflow/Dockerfile.caddy" - - "airflow/.dockerignore" - - "airflow/dags" - - "airflow/inputfiles" - - "setup.py" - - "yt_ops_services" - - "thrift_model" - - "VERSION" - - "airflow/update-yt-dlp.sh" - - "get_info_json_client.py" - - "proxy_manager_client.py" - - "utils" - -- name: Copy custom Python config files to master - copy: - src: "../airflow/config/{{ item }}" - dest: "{{ airflow_master_dir }}/config/{{ item }}" - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - mode: '0644' - become: yes - loop: - - "custom_task_hooks.py" - - "airflow_local_settings.py" - -- name: Ensure any existing airflow.cfg directory is removed - file: - path: "{{ airflow_master_dir }}/config/airflow.cfg" - state: absent - become: yes - ignore_errors: yes - -- name: Copy airflow.cfg to master - copy: - src: "../airflow/airflow.cfg" - dest: "{{ airflow_master_dir }}/config/airflow.cfg" - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - mode: '0644' - become: yes - -- name: Sync Airflow master config files - synchronize: - src: "../airflow/configs/{{ item }}" - dest: "{{ airflow_master_dir }}/configs/" - archive: yes - recursive: yes - rsync_path: "sudo rsync" - rsync_opts: "{{ rsync_default_opts }}" - loop: - - "nginx.conf" - - "Caddyfile" - -- name: Sync optional directories if they exist - synchronize: - src: "../{{ item.item }}/" - dest: "{{ airflow_master_dir }}/{{ item.item | basename }}/" - archive: yes - recursive: yes - delete: yes - rsync_path: "sudo rsync" - rsync_opts: "{{ rsync_default_opts }}" - loop: "{{ source_dirs.results }}" - when: item.stat.exists - -- name: Sync pangramia thrift files - synchronize: - src: "../thrift_model/gen_py/pangramia/" - dest: "{{ airflow_master_dir }}/pangramia/" - archive: yes - recursive: yes - delete: yes - rsync_path: "sudo rsync" - rsync_opts: "{{ rsync_default_opts }}" - -- name: Template docker-compose file for master - template: - src: "{{ playbook_dir }}/../airflow/configs/docker-compose-master.yaml.j2" - dest: "{{ airflow_master_dir }}/configs/docker-compose-master.yaml" - mode: "{{ file_permissions }}" - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - become: yes - -- name: Template Redis connection file - template: - src: "../airflow/config/redis_default_conn.json.j2" - dest: "{{ airflow_master_dir }}/config/redis_default_conn.json" - mode: "{{ file_permissions }}" - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - become: yes - -- name: Template Minio connection file for master - template: - src: "../airflow/config/minio_default_conn.json.j2" - dest: "{{ airflow_master_dir }}/config/minio_default_conn.json" - mode: "{{ file_permissions }}" - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - become: yes - -- name: Ensure config directory is group-writable for Airflow initialization - file: - path: "{{ airflow_master_dir }}/config" - state: directory - mode: '0775' - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - become: yes - -- name: Ensure airflow.cfg is group-writable for Airflow initialization - file: - path: "{{ airflow_master_dir }}/config/airflow.cfg" - state: file - mode: '0664' - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - become: yes - -- name: Create symlink for docker-compose.yaml - file: - src: "{{ airflow_master_dir }}/configs/docker-compose-master.yaml" - dest: "{{ airflow_master_dir }}/docker-compose.yaml" - state: link - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - force: yes - follow: no - -- name: Ensure correct permissions for build context - file: - path: "{{ airflow_master_dir }}" - state: directory - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - recurse: yes - become: yes - -- name: Ensure postgres-data directory exists on master and has correct permissions - file: - path: "{{ airflow_master_dir }}/postgres-data" - state: directory - owner: "999" # UID for the 'postgres' user in the official postgres image - group: "999" # GID for the 'postgres' group in the official postgres image - mode: '0700' - become: yes - -- name: Set group-writable and setgid permissions on master logs directory contents - shell: | - find {{ airflow_master_dir }}/logs -type d -exec chmod g+rws {} + - find {{ airflow_master_dir }}/logs -type f -exec chmod g+rw {} + - become: yes - -- name: Verify Dockerfile exists in build directory - stat: - path: "{{ airflow_master_dir }}/Dockerfile" - register: dockerfile_stat - -- name: Fail if Dockerfile is missing - fail: - msg: "Dockerfile not found in {{ airflow_master_dir }}. Cannot build image." - when: not dockerfile_stat.stat.exists - -- name: "Log: Building Airflow Docker image" - debug: - msg: "Building the main Airflow Docker image ({{ airflow_image_name }}) locally on the master node. This may take a few minutes." - -- name: Build Airflow master image - community.docker.docker_image: - name: "{{ airflow_image_name }}" - build: - path: "{{ airflow_master_dir }}" - dockerfile: "Dockerfile" # Explicitly specify the Dockerfile name - source: build - force_source: true - when: not fast_deploy | default(false) - -- name: "Log: Preparing assets for Caddy image" - debug: - msg: "Extracting static assets from the Airflow image to build the Caddy reverse proxy." - when: not fast_deploy | default(false) - -- name: Prepare Caddy asset extraction directory - file: - path: "{{ airflow_master_dir }}/caddy_build_assets" - state: "{{ item }}" - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - mode: '0755' - loop: - - absent - - directory - become: yes - when: not fast_deploy | default(false) - -- name: Ensure subdirectories exist with correct permissions - file: - path: "{{ airflow_master_dir }}/caddy_build_assets/{{ item }}" - state: directory - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - mode: '0755' - loop: - - "appbuilder" - - "dist" - become: yes - when: not fast_deploy | default(false) - -- name: Extract static assets from Airflow image for Caddy build - shell: | - set -e - CONTAINER_ID=$(docker create {{ airflow_image_name }}) - # Dynamically find paths inside the container - APPBUILDER_PATH=$(docker run --rm --entrypoint "" {{ airflow_image_name }} python -c 'import os, flask_appbuilder; print(os.path.join(os.path.dirname(flask_appbuilder.__file__), "static", "appbuilder"))') - AIRFLOW_DIST_PATH=$(docker run --rm --entrypoint "" {{ airflow_image_name }} python -c 'import os, airflow; print(os.path.join(os.path.dirname(airflow.__file__), "www/static/dist"))') - # Copy assets from container to host - docker cp "${CONTAINER_ID}:${APPBUILDER_PATH}/." "./caddy_build_assets/appbuilder" - docker cp "${CONTAINER_ID}:${AIRFLOW_DIST_PATH}/." "./caddy_build_assets/dist" - docker rm -f $CONTAINER_ID - # Pre-compress assets - find ./caddy_build_assets/appbuilder -type f -print0 | xargs -0 gzip -k -9 - find ./caddy_build_assets/dist -type f -print0 | xargs -0 gzip -k -9 - args: - chdir: "{{ airflow_master_dir }}" - executable: /bin/bash - become: yes - register: asset_extraction - changed_when: asset_extraction.rc == 0 - when: not fast_deploy | default(false) - -- name: "Log: Building Caddy reverse proxy image" - debug: - msg: "Building the Caddy image (pangramia/ytdlp-ops-caddy:latest) to serve static assets." - -- name: Build Caddy image - community.docker.docker_image: - name: "pangramia/ytdlp-ops-caddy:latest" - build: - path: "{{ airflow_master_dir }}" - dockerfile: "Dockerfile.caddy" - source: build - force_source: true - when: not fast_deploy | default(false) - -- name: "Log: Starting Airflow services" - debug: - msg: "Starting Airflow core services (webserver, scheduler, etc.) on the master node using docker-compose." - -- name: Start Airflow master service - community.docker.docker_compose_v2: - project_src: "{{ airflow_master_dir }}" - files: - - "configs/docker-compose-master.yaml" - state: present - remove_orphans: true - pull: "{{ 'never' if fast_deploy | default(false) else 'missing' }}" diff --git a/airflow/update-yt-dlp.sh b/airflow/update-yt-dlp.sh deleted file mode 100644 index aed3451..0000000 --- a/airflow/update-yt-dlp.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash -# update-yt-dlp.sh – run by the worker container before every DAG execution -set -e -echo "[$(date)] Updating yt-dlp to latest nightly master..." -python3 -m pip install -U --pre "yt-dlp[default]" --upgrade-strategy eager --force-reinstall --no-cache-dir -echo "[$(date)] yt-dlp updated to:" -yt-dlp --version diff --git a/ansible/.gitignore b/ansible/.gitignore index b0ac3ed..c487941 100644 --- a/ansible/.gitignore +++ b/ansible/.gitignore @@ -1 +1,4 @@ +inventory.ini +group_vars/all/generated_vars.yml +host_vars/ .aider* diff --git a/ansible/.vault_pass b/ansible/.vault_pass deleted file mode 100644 index 89a0cd2..0000000 --- a/ansible/.vault_pass +++ /dev/null @@ -1 +0,0 @@ -ytdlp-ops diff --git a/ansible/MIGRATION.md b/ansible/MIGRATION.md deleted file mode 100644 index 78f995b..0000000 --- a/ansible/MIGRATION.md +++ /dev/null @@ -1,9 +0,0 @@ -# Migration Notes - -This document tracks the process of migrating the Ansible deployment. - -## Guiding Principles - -- No changes to business logic or core functionality are permitted during this phase. -- The focus is solely on resolving file path issues, dependency errors, and structural inconsistencies resulting from the migration of a subset of files. -- All changes should be aimed at making the existing playbooks runnable in the new environment. diff --git a/ansible/README-yt.md b/ansible/README-yt.md deleted file mode 100644 index 0695db7..0000000 --- a/ansible/README-yt.md +++ /dev/null @@ -1,120 +0,0 @@ -# Ansible-driven YT-DLP / Airflow Cluster – Quick-Start & Cheat-Sheet - -> One playbook = one command to **deploy**, **update**, **restart**, or **re-configure** the entire cluster. - ---- - -## 0. Prerequisites (run once on the **tower** server) - -``` - ---- - -## 1. Ansible Vault Setup (run once on your **local machine**) - -This project uses Ansible Vault to encrypt sensitive data like passwords and API keys. To run the playbooks, you need to provide the vault password. The recommended way is to create a file named `.vault_pass` in the root of the project directory. - -1. **Create the Vault Password File:** - From the project's root directory (e.g., `/opt/yt-ops-services`), create the file. The file should contain only your vault password on a single line. - - ```bash - # Replace 'your_secret_password_here' with your actual vault password - echo "your_secret_password_here" > .vault_pass - ``` - -2. **Secure the File:** - It's good practice to restrict permissions on this file so only you can read it. - - ```bash - chmod 600 .vault_pass - ``` - -The `ansible.cfg` file is configured to automatically look for this `.vault_pass` file in the project root. - ---- - -## 1.5. Cluster & Inventory Management - -The Ansible inventory (`ansible/inventory.ini`), host-specific variables (`ansible/host_vars/`), and the master `docker-compose.yaml` are dynamically generated from a central cluster definition file (e.g., `cluster.yml`). - -**Whenever you add, remove, or change the IP of a node in your `cluster.yml`, you must re-run the generator script.** - -1. **Install Script Dependencies (run once):** - The generator script requires `PyYAML` and `Jinja2`. Install them using pip: - ```bash - pip3 install PyYAML Jinja2 - ``` - -2. **Edit Your Cluster Definition:** - Modify your `cluster.yml` file (located in the project root) to define your master and worker nodes. - -3. **Run the Generator Script:** - From the project's root directory, run the following command to update all generated files: - - ```bash - # Make sure the script is executable first: chmod +x tools/generate-inventory.py - ./tools/generate-inventory.py cluster.yml - ``` - -This ensures that Ansible has the correct host information and that the master node's Docker Compose configuration includes the correct `extra_hosts` for log fetching from workers. - ---- - -## 2. Setup and Basic Usage - -### Running Ansible Commands - -**IMPORTANT:** All `ansible-playbook` commands should be run from within the `ansible/` directory. This allows Ansible to automatically find the `ansible.cfg` and `inventory.ini` files. - -```bash -cd ansible -ansible-playbook .yml -``` - -The `ansible.cfg` file is configured to automatically use the `.vault_pass` file located in the project root (one level above `ansible/`). This means you **do not** need to manually specify `--vault-password-file ../.vault_pass` in your commands. Ensure your `.vault_pass` file is located in the project root. - -If you run `ansible-playbook` from the project root instead of the `ansible/` directory, you will see warnings about the inventory not being parsed, because Ansible does not automatically find `ansible/ansible.cfg`. - ---- - -## 3. Deployment Scenarios - -### Full Cluster Deployment - -To deploy or update the entire cluster (master and all workers), run the main playbook. This will build/pull images and restart all services. - -```bash -# Run from inside the ansible/ directory -ansible-playbook playbook-full.yml -``` - -### Targeted & Fast Deployments - -For faster development cycles, you can deploy changes to specific parts of the cluster without rebuilding or re-pulling Docker images. - -#### Updating Only the Master Node (Fast Deploy) - -To sync configuration, code, and restart services on the master node *without* rebuilding the Airflow image or pulling the `ytdlp-ops-server` image, use the `fast_deploy` flag with the master playbook. This is ideal for pushing changes to DAGs, Python code, or config files. - -```bash -# Run from inside the ansible/ directory -ansible-playbook playbook-master.yml --extra-vars "fast_deploy=true" -``` - -#### Updating Only a Specific Worker Node (Fast Deploy) - -Similarly, you can update a single worker node. Replace `dl001` with the hostname of the worker you want to target from your `inventory.ini`. - -```bash -# Run from inside the ansible/ directory -ansible-playbook playbook-worker.yml --limit dl001 --extra-vars "fast_deploy=true" -``` - -#### Updating Only DAGs and Configs - -If you have only changed DAGs or configuration files and don't need to restart any services, you can run a much faster playbook that only syncs the `dags/` and `config/` directories. - -```bash -# Run from inside the ansible/ directory -ansible-playbook playbook-dags.yml -``` diff --git a/ansible/README.md b/ansible/README.md deleted file mode 100644 index 8a00cae..0000000 --- a/ansible/README.md +++ /dev/null @@ -1,46 +0,0 @@ -# Ansible Deployment for YT-DLP Cluster - -This document provides an overview of the Ansible playbooks used to deploy and manage the YT-DLP Airflow cluster. - -## Main Playbooks - -These are the primary entry points for cluster management. - -- `playbook-full-with-proxies.yml`: **(Recommended Entry Point)** Deploys shadowsocks proxies and then the entire application stack. -- `playbook-full.yml`: Deploys the entire application stack (master and workers) without touching proxies. -- `playbook-master.yml`: Deploys/updates only the Airflow master node. -- `playbook-worker.yml`: Deploys/updates all Airflow worker nodes. -- `playbook-proxies.yml`: Deploys/updates only the shadowsocks proxy services on all nodes. - -## Component & Utility Playbooks - -These playbooks are used for more specific tasks or are called by the main playbooks. - -### Core Deployment Logic -- `roles/airflow-master/tasks/main.yml`: Contains all tasks for setting up the Airflow master services. -- `roles/airflow-worker/tasks/main.yml`: Contains all tasks for setting up the Airflow worker services. -- `roles/ytdlp-master/tasks/main.yml`: Contains tasks for setting up the YT-DLP management services on the master. -- `roles/ytdlp-worker/tasks/main.yml`: Contains tasks for setting up YT-DLP, Camoufox, and other worker-specific services. - -### Utility & Maintenance -- `playbook-dags.yml`: Quickly syncs only the `dags/` and `config/` directories to all nodes. -- `playbook-hook.yml`: Syncs Airflow custom hooks and restarts relevant services. -- `playbook-sync-local.yml`: Syncs local development files (e.g., `ytops_client`, `pangramia`) to workers. -- `playbooks/pause_worker.yml`: Pauses a worker by creating a lock file, preventing it from taking new tasks. -- `playbooks/resume_worker.yml`: Resumes a paused worker by removing the lock file. -- `playbooks/playbook-bgutils-start.yml`: Starts the `bgutil-provider` container. -- `playbooks/playbook-bgutils-stop.yml`: Stops the `bgutil-provider` container. -- `playbook-update-s3-vars.yml`: Updates the `s3_delivery_connection` in Airflow. -- `playbook-update-regression-script.yml`: Updates the `regression.py` script on the master. - -### Deprecated -- `playbook-dl.yml`: Older worker deployment logic. Superseded by `playbook-worker.yml`. -- `playbook-depricated.dl.yml`: Older worker deployment logic. Superseded by `playbook-worker.yml`. - -## Current Goal: Disable Camoufox & Enable Aria2 - -The current objective is to modify the worker deployment (`playbook-worker.yml` and its role `roles/ytdlp-worker/tasks/main.yml`) to: -1. **Disable Camoufox**: Prevent the build, configuration generation, and startup of all `camoufox` services. -2. **Enable Aria2**: Ensure the `aria2-pro` service is built and started correctly on worker nodes. - -The `playbook-worker.yml` has already been updated to build the `aria2-pro` image. The next steps will involve modifying `roles/ytdlp-worker/tasks/main.yml` to remove the Camoufox-related tasks. diff --git a/ansible/configs/etc/sysctl.d/99-redis-overcommit.conf b/ansible/configs/etc/sysctl.d/99-redis-overcommit.conf deleted file mode 100644 index 288a67a..0000000 --- a/ansible/configs/etc/sysctl.d/99-redis-overcommit.conf +++ /dev/null @@ -1,2 +0,0 @@ -# Enable memory overcommit for Redis to prevent background save failures -vm.overcommit_memory = 1 diff --git a/ansible/group_vars/all/generated_vars.yml b/ansible/group_vars/all/generated_vars.yml deleted file mode 100644 index 14b5a76..0000000 --- a/ansible/group_vars/all/generated_vars.yml +++ /dev/null @@ -1,43 +0,0 @@ ---- -# This file is auto-generated by tools/generate-inventory.py -# Do not edit – your changes will be overwritten. -airflow_image_name: pangramia/ytdlp-ops-airflow:latest -airflow_master_dir: /srv/airflow_master -airflow_uid: 1003 -airflow_worker_dir: /srv/airflow_dl_worker -ansible_user: alex_p -camoufox_base_port: 9070 -camoufox_base_vnc_port: 5901 -deploy_group: ytdl -dir_permissions: '0755' -docker_network_name: airflow_proxynet -dockerhub_user: pangramia -envoy_admin_port: 9901 -envoy_port: 9080 -external_access_ips: [] -file_permissions: '0644' -host_timezone: Europe/Moscow -management_service_port: 9091 -master_host_ip: 89.253.221.173 -postgres_port: 5432 -redis_port: 52909 -rsync_default_opts: -- --no-owner -- --no-group -- --no-times -- --copy-links -- --copy-unsafe-links -- --exclude=.git* -- --exclude=__pycache__ -- --exclude=*.pyc -- --exclude=*.log -- --exclude=.DS_Store -shadowsocks_cipher_method: aes-256-gcm -shadowsocks_fast_open: true -shadowsocks_image: ghcr.io/shadowsocks/sslocal-rust:v1.22.0 -shadowsocks_local_address: 0.0.0.0 -shadowsocks_mode: tcp_and_udp -shadowsocks_timeout: 20 -ssh_user: alex_p -ytdlp_base_port: 9090 -ytdlp_ops_image: pangramia/ytdlp-ops-server:3.10.1-exp diff --git a/ansible/host_vars/af-green.yml b/ansible/host_vars/af-green.yml deleted file mode 100644 index dbd71d6..0000000 --- a/ansible/host_vars/af-green.yml +++ /dev/null @@ -1,22 +0,0 @@ ---- -# Variables for af-green -master_host_ip: 89.253.221.173 -redis_port: 52909 -shadowsocks_proxies: - sslocal-rust-1087: - server: "91.103.252.51" - server_port: 8388 - local_port: 1087 - vault_password_key: "vault_ss_password_1" - sslocal-rust-1086: - server: "62.60.178.45" - server_port: 8388 - local_port: 1086 - vault_password_key: "vault_ss_password_2" - sslocal-rust-1081: - server: "79.137.207.43" - server_port: 8388 - local_port: 1081 - vault_password_key: "vault_ss_password_2" -worker_proxies: - - "socks5://sslocal-rust-1087:1087" diff --git a/ansible/host_vars/dl001.yml b/ansible/host_vars/dl001.yml deleted file mode 100644 index c09444a..0000000 --- a/ansible/host_vars/dl001.yml +++ /dev/null @@ -1,22 +0,0 @@ ---- -# Variables for dl001 -master_host_ip: 89.253.221.173 -redis_port: 52909 -shadowsocks_proxies: - sslocal-rust-1087: - server: "91.103.252.51" - server_port: 8388 - local_port: 1087 - vault_password_key: "vault_ss_password_1" - sslocal-rust-1086: - server: "62.60.178.45" - server_port: 8388 - local_port: 1086 - vault_password_key: "vault_ss_password_2" - sslocal-rust-1081: - server: "79.137.207.43" - server_port: 8388 - local_port: 1081 - vault_password_key: "vault_ss_password_2" -worker_proxies: - - "socks5://sslocal-rust-1087:1087" diff --git a/ansible/host_vars/dl003.yml b/ansible/host_vars/dl003.yml deleted file mode 100644 index 0133cb1..0000000 --- a/ansible/host_vars/dl003.yml +++ /dev/null @@ -1,22 +0,0 @@ ---- -# Variables for dl003 -master_host_ip: 89.253.221.173 -redis_port: 52909 -shadowsocks_proxies: - sslocal-rust-1087: - server: "91.103.252.51" - server_port: 8388 - local_port: 1087 - vault_password_key: "vault_ss_password_1" - sslocal-rust-1086: - server: "62.60.178.45" - server_port: 8388 - local_port: 1086 - vault_password_key: "vault_ss_password_2" - sslocal-rust-1081: - server: "79.137.207.43" - server_port: 8388 - local_port: 1081 - vault_password_key: "vault_ss_password_2" -worker_proxies: - - "socks5://sslocal-rust-1087:1087" diff --git a/ansible/inventory.ini b/ansible/inventory.ini deleted file mode 100644 index 02fec51..0000000 --- a/ansible/inventory.ini +++ /dev/null @@ -1,10 +0,0 @@ -# This file is auto-generated by tools/generate-inventory.py -# Do not edit – your changes will be overwritten. -# Edit cluster.yml and re-run the generator instead. - -[airflow_master] -af-green ansible_host=89.253.221.173 ansible_port=22 - -[airflow_workers] -dl003 ansible_host=62.60.245.103 -dl001 ansible_host=109.107.189.106 diff --git a/ansible/playbook-base-system.yml b/ansible/playbook-base-system.yml new file mode 100644 index 0000000..2e6c7d7 --- /dev/null +++ b/ansible/playbook-base-system.yml @@ -0,0 +1,151 @@ +--- +- name: "BASE-SYSTEM: Common Setup for All Nodes" + hosts: all + gather_facts: yes + vars_files: + - "group_vars/all/generated_vars.stress.yml" # Assumes generate-inventory.py was run with cluster.stress.yml + - "group_vars/all/vault.yml" + pre_tasks: + - name: Announce base system setup + ansible.builtin.debug: + msg: "Starting base system setup on {{ inventory_hostname }}" + + tasks: + # 1. Install System Essentials + - name: Install NTP for time synchronization + ansible.builtin.apt: + name: ntp + state: present + become: yes + + - name: Ensure NTP service is started and enabled + ansible.builtin.service: + name: ntp + state: started + enabled: yes + become: yes + + - name: Install pipx + ansible.builtin.apt: + name: pipx + state: present + become: yes + + - name: Install Glances for system monitoring + ansible.builtin.command: pipx install glances[all] + args: + creates: "{{ ansible_env.HOME }}/.local/bin/glances" + become: yes + become_user: "{{ ansible_user }}" + + - name: Install base system packages for tools + ansible.builtin.apt: + name: + - unzip + - wget + - xz-utils + - build-essential + - python3-pip + state: present + update_cache: yes + become: yes + + # 2. Secure the Host + - name: Copy secure sshd_config + ansible.builtin.copy: + src: "configs/etc/ssh/sshd_config" + dest: "/etc/ssh/sshd_config" + owner: root + group: root + mode: '0644' + become: yes + notify: Restart sshd + + - name: Include Fail2ban role + ansible.builtin.include_role: + name: fail2ban + + # 3. Manage Hostname Resolution + - name: Update /etc/hosts file for cluster name resolution + ansible.builtin.lineinfile: + path: /etc/hosts + regexp: '.* {{ item }}$' + line: "{{ hostvars[item].ansible_host }} {{ item }}" + state: present + loop: "{{ groups['all'] }}" + become: yes + + # 4. Install Docker + - name: Install Docker + block: + - name: Check if Docker is already installed + ansible.builtin.stat: + path: /usr/bin/docker + register: docker_binary + + - name: Install Docker if not present + block: + - name: Add Docker's official GPG key + ansible.builtin.apt_key: + url: https://download.docker.com/linux/ubuntu/gpg + state: present + + - name: Set up the Docker repository + ansible.builtin.apt_repository: + repo: "deb [arch=amd64] https://download.docker.com/linux/ubuntu {{ ansible_lsb.codename }} stable" + state: present + + - name: Install prerequisites for Docker + ansible.builtin.apt: + name: + - apt-transport-https + - ca-certificates + - curl + - software-properties-common + state: present + update_cache: yes + + - name: Install Docker Engine and Docker Compose + ansible.builtin.apt: + name: + - docker-ce + - docker-ce-cli + - containerd.io + - docker-compose-plugin + - python3-docker + state: present + update_cache: yes + when: not docker_binary.stat.exists + become: yes + + # 5. Configure Docker Service & User + - name: Ensure Docker service is started and enabled + ansible.builtin.service: + name: docker + state: started + enabled: yes + become: yes + + - name: Add deploy user to the docker group + ansible.builtin.user: + name: "{{ ansible_user }}" + groups: docker + append: yes + become: yes + + - name: Reset SSH connection to apply group changes + ansible.builtin.meta: reset_connection + + # 6. Create Shared Docker Network + - name: Ensure shared Docker network exists + community.docker.docker_network: + name: "{{ docker_network_name }}" + driver: bridge + become: yes + + handlers: + - name: Restart sshd + ansible.builtin.service: + name: ssh + state: restarted + become: yes diff --git a/ansible/playbook-dags.yml b/ansible/playbook-dags.yml deleted file mode 100644 index 80b09c2..0000000 --- a/ansible/playbook-dags.yml +++ /dev/null @@ -1,63 +0,0 @@ ---- -- name: Deploy Airflow DAGs to Master - hosts: airflow_master - gather_facts: no - vars_files: - - "{{ inventory_dir }}/group_vars/all/generated_vars.yml" - - "{{ inventory_dir }}/group_vars/all/vault.yml" - tasks: - - name: Sync DAGs to MASTER server - ansible.posix.synchronize: - src: "../airflow/dags/" - dest: /srv/airflow_master/dags/ - archive: yes - delete: no - rsync_path: "sudo rsync" - rsync_opts: - - "--exclude=__pycache__/" - - "--exclude=*.pyc" - - - name: Sync Config to MASTER server - ansible.posix.synchronize: - src: "../airflow/config/{{ item }}" - dest: /srv/airflow_master/config/ - archive: yes - rsync_path: "sudo rsync" - rsync_opts: - - "--exclude=__pycache__/" - - "--exclude=*.pyc" - loop: - - "airflow.cfg" - - "custom_task_hooks.py" - -- name: Deploy Airflow DAGs to DL Workers - hosts: airflow_workers - gather_facts: no - vars_files: - - "{{ inventory_dir }}/group_vars/all/generated_vars.yml" - - "{{ inventory_dir }}/group_vars/all/vault.yml" - tasks: - - name: Sync DAGs to WORKER server - ansible.posix.synchronize: - src: "../airflow/dags/" - dest: /srv/airflow_dl_worker/dags/ - archive: yes - delete: no - rsync_path: "sudo rsync" - rsync_opts: - - "--exclude=__pycache__/" - - "--exclude=*.pyc" - - - name: Sync Config to WORKER server - ansible.posix.synchronize: - src: "../airflow/config/{{ item }}" - dest: /srv/airflow_dl_worker/config/ - archive: yes - rsync_path: "sudo rsync" - rsync_opts: - - "--exclude=__pycache__/" - - "--exclude=*.pyc" - loop: - - "airflow.cfg" - - "custom_task_hooks.py" - diff --git a/ansible/playbook-depricated.dl.yml b/ansible/playbook-depricated.dl.yml deleted file mode 100644 index 9a7ff3b..0000000 --- a/ansible/playbook-depricated.dl.yml +++ /dev/null @@ -1,96 +0,0 @@ ---- -- name: Deploy Airflow DL Worker Stack - hosts: airflow_workers - vars_files: - - "{{ inventory_dir }}/group_vars/all/vault.yml" - - "{{ inventory_dir }}/group_vars/all/generated_vars.yml" - pre_tasks: - - name: Announce fast deploy mode if enabled - debug: - msg: "🚀 FAST DEPLOY MODE ENABLED: Skipping Docker image builds and pulls. 🚀" - when: fast_deploy | default(false) - run_once: true - tasks: - - - name: Ensure worker directory exists - file: - path: "{{ airflow_worker_dir }}" - state: directory - owner: "{{ ansible_user }}" - group: "{{ deploy_group }}" - mode: '0755' - become: yes - - - name: Template .env file for worker - template: - src: templates/.env.j2 - dest: "{{ airflow_worker_dir }}/.env" - mode: '0600' - vars: - service_role: "worker" - - - name: Template docker-compose file for Airflow worker - template: - src: ../airflow/configs/docker-compose-dl.yaml.j2 - dest: "{{ airflow_worker_dir }}/configs/docker-compose-dl.yaml" - mode: '0644' - - - name: Ensure configs directory exists for config generator - file: - path: "{{ airflow_worker_dir }}/configs" - state: directory - owner: "{{ ansible_user }}" - group: "{{ deploy_group }}" - mode: '0755' - become: yes - - - name: Sync config generator script - ansible.posix.synchronize: - src: ../airflow/generate_envoy_config.py - dest: "{{ airflow_worker_dir }}/" - archive: yes - rsync_path: "sudo rsync" - - - name: Sync config generator templates - ansible.posix.synchronize: - src: ../airflow/configs/{{ item }} - dest: "{{ airflow_worker_dir }}/configs/" - archive: yes - rsync_path: "sudo rsync" - loop: - - docker-compose.config-generate.yaml - - envoy.yaml.j2 - - docker-compose.camoufox.yaml.j2 - - docker-compose-ytdlp-ops.yaml.j2 - - - name: Build Airflow worker image from local Dockerfile - community.docker.docker_image: - name: "{{ airflow_image_name }}" - build: - path: "{{ airflow_worker_dir }}" - dockerfile: "Dockerfile" - source: build - force_source: true - when: not fast_deploy | default(false) - - - - name: Pull ytdlp-ops-server image only - community.docker.docker_image: - name: "{{ ytdlp_ops_image }}" - source: pull - when: not fast_deploy | default(false) - - - name: Generate dynamic configs (camoufox + envoy) - shell: - cmd: "docker compose --project-directory . -f configs/docker-compose.config-generate.yaml run --rm config-generator" - chdir: "{{ airflow_worker_dir }}" - - - name: Start worker services - community.docker.docker_compose_v2: - project_src: "{{ airflow_worker_dir }}" - files: - - configs/docker-compose-dl.yaml - - configs/docker-compose-ytdlp-ops.yaml - state: present - remove_orphans: true - pull: "{{ 'never' if fast_deploy | default(false) else 'missing' }}" diff --git a/ansible/playbook-dl.yml b/ansible/playbook-dl.yml deleted file mode 100644 index 1a81ebf..0000000 --- a/ansible/playbook-dl.yml +++ /dev/null @@ -1,130 +0,0 @@ ---- -- name: Deploy Airflow DL Worker Stack - hosts: airflow_workers - vars_files: - - "{{ inventory_dir }}/group_vars/all/vault.yml" - - "{{ inventory_dir }}/group_vars/all/generated_vars.yml" - pre_tasks: - - name: Announce fast deploy mode if enabled - debug: - msg: "🚀 FAST DEPLOY MODE ENABLED: Skipping Docker image builds and pulls. 🚀" - when: fast_deploy | default(false) - run_once: true - - - name: Install python3-pip - ansible.builtin.apt: - name: python3-pip - state: present - become: yes - - - name: Install required python packages for ytops-client on host - ansible.builtin.pip: - name: - - thrift - - aria2p - - PyYAML - state: present - extra_args: --break-system-packages - become: yes - tasks: - - - name: Ensure worker directory exists - file: - path: "{{ airflow_worker_dir }}" - state: directory - owner: "{{ ansible_user }}" - group: "{{ deploy_group }}" - mode: '0755' - become: yes - - - name: Template .env file for worker - template: - src: templates/.env.j2 - dest: "{{ airflow_worker_dir }}/.env" - mode: '0600' - vars: - service_role: "worker" - - - name: Template docker-compose file for Airflow worker - template: - src: ../airflow/configs/docker-compose-dl.yaml.j2 - dest: "{{ airflow_worker_dir }}/configs/docker-compose-dl.yaml" - mode: '0644' - - - name: Ensure configs directory exists for config generator - file: - path: "{{ airflow_worker_dir }}/configs" - state: directory - owner: "{{ ansible_user }}" - group: "{{ deploy_group }}" - mode: '0755' - become: yes - - - name: Sync config generator script - ansible.posix.synchronize: - src: ../airflow/generate_envoy_config.py - dest: "{{ airflow_worker_dir }}/" - archive: yes - rsync_path: "sudo rsync" - - - name: Sync config generator templates - ansible.posix.synchronize: - src: ../airflow/configs/{{ item }} - dest: "{{ airflow_worker_dir }}/configs/" - archive: yes - rsync_path: "sudo rsync" - loop: - - docker-compose.config-generate.yaml - - envoy.yaml.j2 - - docker-compose.camoufox.yaml.j2 - - docker-compose-ytdlp-ops.yaml.j2 - - - name: Build Airflow worker image from local Dockerfile - community.docker.docker_image: - name: "{{ airflow_image_name }}" - build: - path: "{{ airflow_worker_dir }}" - dockerfile: "Dockerfile" - source: build - force_source: true - when: not fast_deploy | default(false) - - - name: Pull ytdlp-ops-server image only - community.docker.docker_image: - name: "{{ ytdlp_ops_image }}" - source: pull - when: not fast_deploy | default(false) - - - name: Generate dynamic configs (camoufox + envoy) - shell: - cmd: "docker compose --project-directory . -f configs/docker-compose.config-generate.yaml run --rm config-generator" - chdir: "{{ airflow_worker_dir }}" - - - name: Start worker services - community.docker.docker_compose_v2: - project_src: "{{ airflow_worker_dir }}" - files: - - configs/docker-compose-dl.yaml - - configs/docker-compose-ytdlp-ops.yaml - state: present - remove_orphans: true - pull: "{{ 'never' if fast_deploy | default(false) else 'missing' }}" - - - name: Stop camoufox services (deprecated) - community.docker.docker_compose_v2: - project_src: "{{ airflow_worker_dir }}" - files: - - configs/docker-compose.camoufox.yaml - state: absent - ignore_errors: true - - - name: Update Airflow variable with S3 worker hostnames - ansible.builtin.command: > - docker compose exec -T airflow-scheduler - airflow variables set s3_worker_hostnames - '{{ groups["airflow_workers"] | map("regex_replace", "\\..*", "") | list | to_json }}' - args: - chdir: "{{ airflow_master_dir }}" - become: yes - delegate_to: "{{ groups['airflow_master'][0] }}" - run_once: true diff --git a/ansible/playbook-docker-services-setup.yml b/ansible/playbook-docker-services-setup.yml new file mode 100644 index 0000000..45227fe --- /dev/null +++ b/ansible/playbook-docker-services-setup.yml @@ -0,0 +1,189 @@ +--- +# This playbook orchestrates the entire setup for the stress test environment. +# It is composed of multiple plays and imported playbooks to ensure modularity and correct execution order. + +# ------------------------------------------------------------------------------------------------- +# PHASE 1: Base System Configuration +# Ensures all nodes have the necessary base packages, user configurations, and Docker installed. +# ------------------------------------------------------------------------------------------------- +- name: "PHASE 1.1: Import base system setup playbook" + import_playbook: playbook-base-system.yml + +- name: "PHASE 1.2: Import proxy deployment playbook" + import_playbook: playbook-proxies.yml + +# ------------------------------------------------------------------------------------------------- +# PHASE 2: Application Directory and Code Setup +# Creates the necessary directory structure and syncs the application code. +# ------------------------------------------------------------------------------------------------- +- name: "PHASE 2.1: Create Base Directories" + hosts: all + gather_facts: no + vars_files: + - "group_vars/all/generated_vars.stress.yml" + - "group_vars/all/vault.yml" + tasks: + - name: Define base directory for node + ansible.builtin.set_fact: + base_dir: "{{ airflow_master_dir if inventory_hostname in groups['airflow_master'] else airflow_worker_dir }}" + + - name: Ensure base directories and subdirectories exist + ansible.builtin.file: + path: "{{ base_dir }}/{{ item }}" + state: directory + owner: "{{ ansible_user }}" + group: "{{ deploy_group }}" + mode: '0755' + recurse: yes + loop: + - "" # for the base directory itself + - "inputfiles" + - "run/docker_mount/fetched_info_jsons" + become: yes + +- name: "PHASE 2.2: Import playbook to install Python dependencies" + import_playbook: playbook-stress-install-deps.yml + +- name: "PHASE 2.3: Import playbook to sync local code" + import_playbook: playbook-stress-sync-code.yml + +# ------------------------------------------------------------------------------------------------- +# PHASE 3: Environment and Service Configuration +# Generates the .env file and starts the role-specific services on master and workers. +# ------------------------------------------------------------------------------------------------- +- name: "PHASE 3.1: Import playbook to generate .env file" + import_playbook: playbook-stress-generate-env.yml + +- name: "PHASE 3.2: Master Node Services Setup" + hosts: airflow_master + gather_facts: no + vars_files: + - "group_vars/all/generated_vars.stress.yml" + - "group_vars/all/vault.yml" + tasks: + - name: Configure system performance and kernel settings + ansible.builtin.copy: + src: "configs/etc/sysctl.d/99-system-limits.conf" + dest: "/etc/sysctl.d/99-system-limits.conf" + owner: root + group: root + mode: '0644' + become: yes + register: sysctl_config_copy + + - name: Apply sysctl settings + ansible.builtin.command: sysctl --system + become: yes + when: sysctl_config_copy.changed + + - name: Ensure MinIO data directory exists + ansible.builtin.file: + path: "{{ airflow_master_dir }}/minio-data" + state: directory + owner: "{{ ansible_user }}" + group: "{{ deploy_group }}" + mode: '0755' + become: yes + + - name: Template Docker Compose file for master services + ansible.builtin.template: + src: templates/docker-compose.stress-master.j2 + dest: "{{ airflow_master_dir }}/docker-compose.stress.yml" + owner: "{{ ansible_user }}" + group: "{{ deploy_group }}" + mode: '0644' + become: yes + + - name: Start master services (Redis, MinIO) + community.docker.docker_compose_v2: + project_src: "{{ airflow_master_dir }}" + files: + - docker-compose.stress.yml + state: present + remove_orphans: true + become: yes + + - name: Wait for MinIO service to be ready + ansible.builtin.wait_for: + host: "{{ hostvars[inventory_hostname].ansible_host }}" + port: 9000 + delay: 5 + timeout: 60 + delegate_to: localhost + + - name: Download MinIO Client (mc) if not present + ansible.builtin.command: + cmd: wget https://dl.min.io/client/mc/release/linux-amd64/mc -O /usr/local/bin/mc + creates: /usr/local/bin/mc + become: yes + + - name: Ensure MinIO Client (mc) is executable + ansible.builtin.file: + path: /usr/local/bin/mc + mode: '0755' + become: yes + + - name: Configure mc alias for local MinIO + ansible.builtin.command: > + mc alias set local http://localhost:9000 {{ vault_s3_access_key_id }} {{ vault_s3_secret_access_key }} + become: yes + become_user: "{{ ansible_user }}" + changed_when: false + environment: + HOME: "/home/{{ ansible_user }}" + + - name: Ensure S3 buckets exist in MinIO using mc + ansible.builtin.command: > + mc mb local/{{ item }} + loop: + - "stress-inputs" + - "stress-jsons" + become: yes + become_user: "{{ ansible_user }}" + register: mc_mb_result + failed_when: > + mc_mb_result.rc != 0 and + "already exists" not in mc_mb_result.stderr + changed_when: mc_mb_result.rc == 0 + environment: + HOME: "/home/{{ ansible_user }}" + +- name: "PHASE 3.3: Shared Storage Setup (s3fs)" + hosts: airflow_master:airflow_workers + gather_facts: no + vars_files: + - "group_vars/all/generated_vars.stress.yml" + - "group_vars/all/vault.yml" + tasks: + - name: Define base directory for node + ansible.builtin.set_fact: + base_dir: "{{ airflow_master_dir if inventory_hostname in groups['airflow_master'] else airflow_worker_dir }}" + + - name: Mount S3 buckets via s3fs + block: + - name: Install s3fs for mounting S3 buckets + ansible.builtin.apt: + name: s3fs + state: present + become: yes + + - name: Configure s3fs credentials + ansible.builtin.copy: + content: "{{ vault_s3_access_key_id }}:{{ vault_s3_secret_access_key }}" + dest: "/home/{{ ansible_user }}/.passwd-s3fs" + owner: "{{ ansible_user }}" + group: "{{ deploy_group }}" + mode: '0600' + become: yes + + - name: Mount S3 buckets for stress testing + ansible.posix.mount: + src: "s3fs#{{ item.bucket }}" + path: "{{ item.path }}" + fstype: fuse + opts: "_netdev,allow_other,use_path_request_style,nonempty,url=http://{{ hostvars[groups['airflow_master'][0]].ansible_host }}:9000,passwd_file=/home/{{ ansible_user }}/.passwd-s3fs" + state: mounted + loop: + - { bucket: 'stress-inputs', path: '{{ base_dir }}/inputfiles' } + - { bucket: 'stress-jsons', path: '{{ base_dir }}/run/docker_mount/fetched_info_jsons' } + become: yes diff --git a/ansible/playbook-full-with-proxies.yml b/ansible/playbook-full-with-proxies.yml deleted file mode 100644 index de6555a..0000000 --- a/ansible/playbook-full-with-proxies.yml +++ /dev/null @@ -1,6 +0,0 @@ ---- -- name: Deploy entire cluster with proxies - import_playbook: playbook-proxies.yml - -- name: Deploy application stack - import_playbook: playbook-full.yml diff --git a/ansible/playbook-full.yml b/ansible/playbook-full.yml deleted file mode 100644 index 613d487..0000000 --- a/ansible/playbook-full.yml +++ /dev/null @@ -1,205 +0,0 @@ ---- -- name: Deploy entire Airflow cluster - hosts: all - gather_facts: true - vars_files: - - "{{ inventory_dir }}/group_vars/all/generated_vars.yml" - - "{{ inventory_dir }}/group_vars/all/vault.yml" - pre_tasks: - - name: Announce fast deploy mode if enabled - debug: - msg: "🚀 FAST DEPLOY MODE ENABLED: Skipping Docker image builds and pulls. 🚀" - when: fast_deploy | default(false) - run_once: true - - - name: Check if Docker is already installed - ansible.builtin.stat: - path: /usr/bin/docker - register: docker_binary - - - name: Install Docker if not present - block: - - name: Add Docker's official GPG key - ansible.builtin.apt_key: - url: https://download.docker.com/linux/ubuntu/gpg - state: present - - - name: Find and remove any existing Docker repository files to avoid conflicts - block: - - name: Find legacy docker repository files - ansible.builtin.find: - paths: /etc/apt/sources.list.d/ - patterns: '*.list' - contains: 'deb .*download.docker.com' - register: legacy_docker_repo_files - - - name: Remove legacy docker repository files - ansible.builtin.file: - path: "{{ item.path }}" - state: absent - loop: "{{ legacy_docker_repo_files.files }}" - - - name: Set up the Docker repository - ansible.builtin.apt_repository: - repo: "deb [arch=amd64] https://download.docker.com/linux/ubuntu {{ ansible_lsb.codename }} stable" - state: present - - - name: Install prerequisites for Docker - ansible.builtin.apt: - name: - - apt-transport-https - - ca-certificates - - curl - - software-properties-common - - vim - - python3-pip - - iputils-ping - - traceroute - - fail2ban - - conntrack - - tcpdump - state: present - update_cache: yes - - - name: Install Docker Engine and Docker Compose - ansible.builtin.apt: - name: - - docker-ce - - docker-ce-cli - - containerd.io - - docker-compose-plugin - - python3-docker - state: present - update_cache: yes - when: not docker_binary.stat.exists - become: yes - tasks: - - - name: Install pipx - ansible.builtin.apt: - name: pipx - state: present - become: yes - - - name: Install Glances for system monitoring - ansible.builtin.command: pipx install glances[all] - args: - creates: "{{ ansible_env.HOME }}/.local/bin/glances" - become: yes - become_user: "{{ ansible_user }}" - - - name: Ensure Docker service is started and enabled - ansible.builtin.service: - name: docker - state: started - enabled: yes - become: yes - - - name: Add deploy user to the docker group - ansible.builtin.user: - name: "{{ ansible_user }}" - groups: docker - append: yes - become: yes - - - name: Reset SSH connection to apply group changes - ansible.builtin.meta: reset_connection - - - - name: Ensure shared Docker network exists - community.docker.docker_network: - name: airflow_proxynet - driver: bridge - - -- name: Deploy master - import_playbook: playbook-master.yml - when: inventory_hostname in groups['airflow_master'] - -- name: Deploy workers - import_playbook: playbook-worker.yml - when: inventory_hostname in groups['airflow_workers'] - -- name: Deploy and Reload Airflow Task Hook - hosts: all - gather_facts: no - vars_files: - - "{{ inventory_dir }}/group_vars/all/generated_vars.yml" - - "{{ inventory_dir }}/group_vars/all/vault.yml" - tasks: - - name: Ensure config directory exists on MASTER server - when: inventory_hostname in groups['airflow_master'] - ansible.builtin.file: - path: "{{ airflow_master_dir }}/config" - state: directory - owner: "{{ ansible_user }}" - group: "{{ ansible_user }}" - mode: '0755' - become: yes - - - name: Ensure config directory exists on WORKER server - when: inventory_hostname in groups['airflow_workers'] - ansible.builtin.file: - path: "{{ airflow_worker_dir }}/config" - state: directory - owner: "{{ ansible_user }}" - group: "{{ ansible_user }}" - mode: '0755' - become: yes - - - name: Sync custom_task_hooks.py to MASTER server - when: inventory_hostname in groups['airflow_master'] - synchronize: - src: "../airflow/config/custom_task_hooks.py" - dest: "{{ airflow_master_dir }}/config/" - archive: yes - rsync_path: "sudo rsync" - - - name: Sync airflow_local_settings.py to MASTER server - when: inventory_hostname in groups['airflow_master'] - synchronize: - src: "../airflow/config/airflow_local_settings.py" - dest: "{{ airflow_master_dir }}/config/" - archive: yes - rsync_path: "sudo rsync" - - - name: Sync custom_task_hooks.py to WORKER server - when: inventory_hostname in groups['airflow_workers'] - synchronize: - src: "../airflow/config/custom_task_hooks.py" - dest: "{{ airflow_worker_dir }}/config/" - archive: yes - rsync_path: "sudo rsync" - - - name: Sync airflow_local_settings.py to WORKER server - when: inventory_hostname in groups['airflow_workers'] - synchronize: - src: "../airflow/config/airflow_local_settings.py" - dest: "{{ airflow_worker_dir }}/config/" - archive: yes - rsync_path: "sudo rsync" - - - name: Restart Airflow services on MASTER to apply hook - when: inventory_hostname in groups['airflow_master'] - ansible.builtin.command: - cmd: "docker compose restart airflow-scheduler airflow-webserver airflow-master-worker airflow-triggerer" - chdir: "{{ airflow_master_dir }}" - become: yes - - - name: Restart Airflow worker on WORKER to apply hook - when: inventory_hostname in groups['airflow_workers'] - ansible.builtin.command: - cmd: "docker compose restart airflow-worker-dl airflow-worker-auth airflow-worker-mgmt airflow-worker-s3" - chdir: "{{ airflow_worker_dir }}" - become: yes - - - name: Update Airflow variable with S3 worker hostnames - ansible.builtin.command: > - docker compose exec -T airflow-scheduler - airflow variables set s3_worker_hostnames - '{{ groups["airflow_workers"] | map("regex_replace", "\\..*", "") | list | to_json }}' - args: - chdir: "{{ airflow_master_dir }}" - become: yes - when: inventory_hostname in groups['airflow_master'] - run_once: true diff --git a/ansible/playbook-hook.yml b/ansible/playbook-hook.yml deleted file mode 100644 index d69a7d9..0000000 --- a/ansible/playbook-hook.yml +++ /dev/null @@ -1,73 +0,0 @@ ---- -- name: Deploy and Reload Airflow Task Hook - hosts: all - gather_facts: no - vars_files: - - "{{ inventory_dir }}/group_vars/all/generated_vars.yml" - - "{{ inventory_dir }}/group_vars/all/vault.yml" - tasks: - - name: Ensure config directory exists on MASTER server - when: inventory_hostname in groups['airflow_master'] - ansible.builtin.file: - path: "{{ airflow_master_dir }}/config" - state: directory - owner: "{{ ansible_user }}" - group: "{{ ansible_user }}" - mode: '0755' - become: yes - - - name: Ensure config directory exists on WORKER server - when: inventory_hostname in groups['airflow_workers'] - ansible.builtin.file: - path: "{{ airflow_worker_dir }}/config" - state: directory - owner: "{{ ansible_user }}" - group: "{{ ansible_user }}" - mode: '0755' - become: yes - - - name: Sync custom_task_hooks.py to MASTER server - when: inventory_hostname in groups['airflow_master'] - synchronize: - src: "../airflow/config/custom_task_hooks.py" - dest: "{{ airflow_master_dir }}/config/" - archive: yes - rsync_path: "sudo rsync" - - - name: Sync airflow_local_settings.py to MASTER server - when: inventory_hostname in groups['airflow_master'] - synchronize: - src: "../airflow/config/airflow_local_settings.py" - dest: "{{ airflow_master_dir }}/config/" - archive: yes - rsync_path: "sudo rsync" - - - name: Sync custom_task_hooks.py to WORKER server - when: inventory_hostname in groups['airflow_workers'] - synchronize: - src: "../airflow/config/custom_task_hooks.py" - dest: "{{ airflow_worker_dir }}/config/" - archive: yes - rsync_path: "sudo rsync" - - - name: Sync airflow_local_settings.py to WORKER server - when: inventory_hostname in groups['airflow_workers'] - synchronize: - src: "../airflow/config/airflow_local_settings.py" - dest: "{{ airflow_worker_dir }}/config/" - archive: yes - rsync_path: "sudo rsync" - - - name: Restart Airflow services on MASTER - when: inventory_hostname in groups['airflow_master'] - ansible.builtin.command: - cmd: "docker compose restart airflow-scheduler airflow-webserver airflow-master-worker airflow-triggerer" - chdir: "{{ airflow_master_dir }}" - become: yes - - - name: Restart Airflow worker on WORKER - when: inventory_hostname in groups['airflow_workers'] - ansible.builtin.command: - cmd: "docker compose restart airflow-worker-dl airflow-worker-auth airflow-worker-mgmt airflow-worker-s3" - chdir: "{{ airflow_worker_dir }}" - become: yes diff --git a/ansible/playbook-install-local.yml b/ansible/playbook-install-local.yml deleted file mode 100644 index 87b4b62..0000000 --- a/ansible/playbook-install-local.yml +++ /dev/null @@ -1,44 +0,0 @@ ---- -- name: Install Local Development Packages - hosts: airflow_workers, airflow_master - gather_facts: no - vars_files: - - "{{ inventory_dir }}/group_vars/all/generated_vars.yml" - - tasks: - - name: Ensure python3-pip is installed - ansible.builtin.apt: - name: python3-pip - state: present - update_cache: yes - become: yes - - - name: Upgrade pip to the latest version (for systems without PEP 668) - ansible.builtin.command: python3 -m pip install --upgrade pip - register: pip_upgrade_old_systems - changed_when: "'Requirement already satisfied' not in pip_upgrade_old_systems.stdout" - failed_when: false # This task will fail on newer systems, which is expected. - become: yes - become_user: "{{ ansible_user }}" - - - name: Upgrade pip to the latest version (for systems with PEP 668) - ansible.builtin.command: python3 -m pip install --upgrade pip --break-system-packages - when: pip_upgrade_old_systems.rc != 0 and 'externally-managed-environment' in pip_upgrade_old_systems.stderr - changed_when: "'Requirement already satisfied' not in pip_upgrade_new_systems.stdout" - register: pip_upgrade_new_systems - become: yes - become_user: "{{ ansible_user }}" - - - name: Install or upgrade yt-dlp to the latest nightly version - ansible.builtin.command: python3 -m pip install -U --pre "yt-dlp[default]" --break-system-packages - register: ytdlp_install - changed_when: "'Requirement already satisfied' not in ytdlp_install.stdout" - become: yes - become_user: "{{ ansible_user }}" - - - name: Install requests library - ansible.builtin.command: python3 -m pip install requests==2.31.0 --break-system-packages - register: requests_install - changed_when: "'Requirement already satisfied' not in requests_install.stdout" - become: yes - become_user: "{{ ansible_user }}" diff --git a/ansible/playbook-master.yml b/ansible/playbook-master.yml deleted file mode 100644 index c13ba9d..0000000 --- a/ansible/playbook-master.yml +++ /dev/null @@ -1,355 +0,0 @@ ---- -- name: Deploy Airflow Master - hosts: airflow_master - gather_facts: yes - vars_files: - - "{{ inventory_dir }}/group_vars/all/generated_vars.yml" - - "{{ inventory_dir }}/group_vars/all/vault.yml" - vars: - envoy_port: 9980 - envoy_admin_port: 9981 - pre_tasks: - - name: Announce master deployment - debug: - msg: "Starting deployment for Airflow Master: {{ inventory_hostname }} ({{ ansible_host }})" - - - name: Install python3-pip - ansible.builtin.apt: - name: python3-pip - state: present - become: yes - - - name: Install required python packages - ansible.builtin.pip: - name: - - thrift - - aria2p - - PyYAML - state: present - become: yes - - - name: Configure Redis memory overcommit setting - copy: - src: "configs/etc/sysctl.d/99-redis-overcommit.conf" - dest: "/etc/sysctl.d/99-redis-overcommit.conf" - owner: root - group: root - mode: '0644' - become: yes - register: redis_sysctl_config_copy - - - name: Configure system limits - copy: - src: "configs/etc/sysctl.d/99-system-limits.conf" - dest: "/etc/sysctl.d/99-system-limits.conf" - owner: root - group: root - mode: '0644' - become: yes - register: limits_sysctl_config_copy - - - name: Apply sysctl settings for Redis - command: sysctl --system - become: yes - when: redis_sysctl_config_copy.changed - - - name: Apply sysctl settings for system limits - command: sysctl --system - become: yes - when: limits_sysctl_config_copy.changed - - - name: Configure system timezone - # Ensures all services and logs on this node use a consistent timezone. - community.general.timezone: - name: "{{ host_timezone }}" - become: yes - - - name: Install NTP for time synchronization - ansible.builtin.apt: - name: ntp - state: present - become: yes - - - name: Ensure NTP service is started and enabled - ansible.builtin.service: - name: ntp - state: started - enabled: yes - become: yes - - - name: Set deploy_group to a valid single group name - set_fact: - deploy_group: "ytdl" - - - name: Ensure deploy group exists - group: - name: "{{ deploy_group }}" - state: present - become: yes - - - name: Ensure deploy user exists - user: - name: "{{ ansible_user }}" - group: "{{ deploy_group }}" - state: present - become: yes - - - name: Validate deploy_group variable - ansible.builtin.assert: - that: - - deploy_group is defined - - deploy_group is string - - "',' not in deploy_group" - - "' ' not in deploy_group" - fail_msg: "The 'deploy_group' variable ('{{ deploy_group }}') must be a single, valid group name. It should not contain commas or spaces." - - - name: Check for swapfile - stat: - path: /swapfile - register: swap_file - become: yes - - - name: Create 8GB swapfile - command: fallocate -l 8G /swapfile - when: not swap_file.stat.exists - become: yes - - - name: Set swapfile permissions - file: - path: /swapfile - mode: '0600' - when: not swap_file.stat.exists - become: yes - - - name: Make swap - command: mkswap /swapfile - when: not swap_file.stat.exists - become: yes - - - name: Check current swap status - command: swapon --show - register: swap_status - changed_when: false - become: yes - - - name: Enable swap - command: swapon /swapfile - when: "'/swapfile' not in swap_status.stdout" - become: yes - - - name: Add swapfile to fstab - lineinfile: - path: /etc/fstab - regexp: '^/swapfile' - line: '/swapfile none swap sw 0 0' - state: present - become: yes - - - name: Get GID of the deploy group - getent: - database: group - key: "{{ deploy_group }}" - register: deploy_group_info - become: yes - - - name: Set deploy_group_gid fact - set_fact: - deploy_group_gid: "{{ deploy_group_info.ansible_facts.getent_group[deploy_group][1] }}" - when: deploy_group_info.ansible_facts.getent_group is defined and deploy_group in deploy_group_info.ansible_facts.getent_group - - - name: Ensure deploy_group_gid is set to a valid value - set_fact: - deploy_group_gid: "0" - when: deploy_group_gid is not defined or deploy_group_gid == "" - - - name: Ensure master directory exists - ansible.builtin.file: - path: "{{ airflow_master_dir }}" - state: directory - owner: "{{ ansible_user }}" - group: "{{ deploy_group }}" - mode: '0755' - become: yes - - - name: Ensure runtime data directories exist with correct ownership - ansible.builtin.file: - path: "{{ airflow_master_dir }}/{{ item }}" - state: directory - owner: "{{ ansible_user }}" - group: "{{ deploy_group }}" - mode: '0775' - recurse: yes - loop: - - "downloadfiles" - - "inputfiles" - - "dumps" - become: yes - - - name: Sync python packages to master for build context - ansible.posix.synchronize: - src: "../{{ item }}/" - dest: "{{ airflow_master_dir }}/{{ item }}/" - rsync_opts: - - "--delete" - - "--exclude=.DS_Store" - - "--exclude=__pycache__" - - "--exclude='*.pyc'" - recursive: yes - perms: yes - loop: - - "thrift_model" - - "pangramia" - - "ytops_client" - - "yt_ops_services" - become: yes - become_user: "{{ ansible_user }}" - - - name: Ensure bin directory exists on master for build context - ansible.builtin.file: - path: "{{ airflow_master_dir }}/bin" - state: directory - mode: '0755' - become: yes - become_user: "{{ ansible_user }}" - - - name: Sync root files and client utilities to master for build context - ansible.posix.synchronize: - src: "../{{ item }}" - dest: "{{ airflow_master_dir }}/{{ item }}" - perms: yes - loop: - - "setup.py" - - "VERSION" - - "bin/ytops-client" - become: yes - become_user: "{{ ansible_user }}" - - - name: Ensure Airflow project directory is writable by the container user (UID 50000) - ansible.builtin.file: - path: "{{ airflow_master_dir }}" - owner: 50000 - group: 50000 - become: yes - - - name: Ensure Airflow subdirectories are writable by the container user (UID 50000) - ansible.builtin.file: - path: "{{ item }}" - owner: 50000 - group: 50000 - recurse: yes - state: directory - loop: - - "{{ airflow_master_dir }}/dags" - - "{{ airflow_master_dir }}/logs" - - "{{ airflow_master_dir }}/plugins" - - "{{ airflow_master_dir }}/config" - become: yes - - tasks: - - name: Install pipx - ansible.builtin.apt: - name: pipx - state: present - become: yes - - - name: Install Glances for system monitoring - ansible.builtin.command: pipx install glances[all] - args: - creates: "{{ ansible_env.HOME }}/.local/bin/glances" - become: yes - become_user: "{{ ansible_user }}" - - # Include Docker health check - - name: Include Docker health check tasks - include_tasks: tasks/docker_health_check.yml - - - name: Generate Docker Compose configurations - ansible.builtin.command: > - docker compose --project-directory . -f configs/docker-compose.config-generate.yaml run --rm config-generator - args: - chdir: "{{ airflow_master_dir }}" - become: yes - become_user: "{{ ansible_user }}" - register: config_generator_result - changed_when: "'Creating' in config_generator_result.stdout or 'Recreating' in config_generator_result.stdout" - - - name: Show config generator output - ansible.builtin.debug: - var: config_generator_result.stdout_lines - when: config_generator_result.changed - - roles: - - ytdlp-master - - airflow-master - - post_tasks: - - name: Wait for airflow-scheduler to be running before proceeding - ansible.builtin.command: docker compose ps --filter "status=running" --services - args: - chdir: "{{ airflow_master_dir }}" - register: running_services - until: "'airflow-scheduler' in running_services.stdout_lines" - retries: 30 - delay: 10 - changed_when: false - become: yes - become_user: "{{ ansible_user }}" - - - name: Delete existing Airflow redis_default connection to ensure an idempotent update - ansible.builtin.command: > - docker compose exec -T airflow-scheduler - airflow connections delete redis_default - args: - chdir: "{{ airflow_master_dir }}" - register: delete_redis_conn - retries: 5 - delay: 10 - until: delete_redis_conn.rc == 0 or 'not found' in delete_redis_conn.stderr - changed_when: "'was deleted successfully' in delete_redis_conn.stdout" - failed_when: - - delete_redis_conn.rc != 0 - - "'not found' not in delete_redis_conn.stderr" - become: yes - become_user: "{{ ansible_user }}" - - - name: Add Airflow redis_default connection - ansible.builtin.command: > - docker compose exec -T airflow-scheduler - airflow connections add redis_default - --conn-uri 'redis://:{{ vault_redis_password }}@{{ ansible_host }}:{{ redis_port }}/{{ redis_db_celery_broker | default(1) }}' - args: - chdir: "{{ airflow_master_dir }}" - register: add_redis_conn - retries: 5 - delay: 10 - until: add_redis_conn.rc == 0 - changed_when: "'was successfully added' in add_redis_conn.stdout" - become: yes - become_user: "{{ ansible_user }}" - - - name: Update S3 delivery connection - ansible.builtin.import_playbook: playbook-update-s3-vars.yml - - # - name: Include camoufox verification tasks - # include_tasks: tasks/verify_camoufox.yml - # when: not fast_deploy | default(false) - - - name: Run regression test - command: > - docker exec -i airflow-regression-runner python3 /opt/airflow/dags/scripts/regression.py - --client "{{ regression_client | default('mweb') }}" - --workers {{ regression_workers | default(4) }} - --workers-per-bunch {{ regression_workers_per_bunch | default(4) }} - --run-time-min {{ regression_run_time_min | default(120) }} - --input-file "{{ regression_input_file | default('/opt/airflow/inputfiles/video_ids.csv') }}" - --progress-interval-min {{ regression_progress_interval_min | default(2) }} - --report-file "{{ regression_report_file | default('/opt/airflow/downloadfiles/regression_report.csv') }}" - {% if regression_cleanup | default(true) %}--cleanup{% endif %} - register: regression_test_result - changed_when: false - when: run_regression_test | default(false) - - - name: Display regression test output - debug: - var: regression_test_result.stdout_lines - when: run_regression_test | default(false) diff --git a/ansible/playbook-proxies.yml b/ansible/playbook-proxies.yml index 794fcf0..61b6594 100644 --- a/ansible/playbook-proxies.yml +++ b/ansible/playbook-proxies.yml @@ -2,86 +2,6 @@ - name: Deploy Shadowsocks-Rust Proxy Configurations hosts: all gather_facts: yes - pre_tasks: - - name: Check if Docker is already installed - ansible.builtin.stat: - path: /usr/bin/docker - register: docker_binary - - - name: Install Docker if not present - block: - - name: Add Docker's official GPG key - ansible.builtin.apt_key: - url: https://download.docker.com/linux/ubuntu/gpg - state: present - - - name: Find and remove any existing Docker repository files to avoid conflicts - block: - - name: Find legacy docker repository files - ansible.builtin.find: - paths: /etc/apt/sources.list.d/ - patterns: '*.list' - contains: 'deb .*download.docker.com' - register: legacy_docker_repo_files - - - name: Remove legacy docker repository files - ansible.builtin.file: - path: "{{ item.path }}" - state: absent - loop: "{{ legacy_docker_repo_files.files }}" - - - name: Set up the Docker repository - ansible.builtin.apt_repository: - repo: "deb [arch=amd64] https://download.docker.com/linux/ubuntu {{ ansible_lsb.codename }} stable" - state: present - - - name: Install prerequisites for Docker - ansible.builtin.apt: - name: - - apt-transport-https - - ca-certificates - - curl - - software-properties-common - - vim - - python3-pip - state: present - update_cache: yes - - - name: Install Docker Engine and Docker Compose - ansible.builtin.apt: - name: - - docker-ce - - docker-ce-cli - - containerd.io - - docker-compose-plugin - - python3-docker - state: present - update_cache: yes - when: not docker_binary.stat.exists - become: yes - - - name: Ensure Docker service is started and enabled - ansible.builtin.service: - name: docker - state: started - enabled: yes - become: yes - - - name: Add deploy user to the docker group - ansible.builtin.user: - name: "{{ ansible_user }}" - groups: docker - append: yes - become: yes - - - name: Reset SSH connection to apply group changes - ansible.builtin.meta: reset_connection - - - name: Ensure shared Docker network exists - community.docker.docker_network: - name: "{{ docker_network_name }}" - driver: bridge - become: yes tasks: - name: Deploy Shadowsocks-Rust proxy services block: @@ -135,6 +55,14 @@ path: /srv/shadowsocks-rust/docker-compose.yaml state: absent + - name: Force stop and remove known proxy containers to prevent conflicts + community.docker.docker_container: + name: "{{ item.key }}" + state: absent + loop: "{{ shadowsocks_proxies | dict2items }}" + loop_control: + label: "{{ item.key }}" + - name: Stop and remove any existing Shadowsocks-Rust proxy services community.docker.docker_compose_v2: project_src: "/srv/shadowsocks-rust" diff --git a/ansible/playbook-stress-generate-env.yml b/ansible/playbook-stress-generate-env.yml new file mode 100644 index 0000000..bf69fdb --- /dev/null +++ b/ansible/playbook-stress-generate-env.yml @@ -0,0 +1,32 @@ +--- +- name: "STRESS-SETUP: Create .env file" + hosts: all + gather_facts: no + vars_files: + - "group_vars/all/generated_vars.stress.yml" + - "group_vars/all/vault.yml" + tasks: + - name: Define base directory for node + ansible.builtin.set_fact: + base_dir: "{{ airflow_master_dir if inventory_hostname in groups['airflow_master'] else airflow_worker_dir }}" + + - name: Create .env file for stress test environment + ansible.builtin.template: + src: templates/.env.stress.j2 + dest: "{{ base_dir }}/.env" + owner: "{{ ansible_user }}" + group: "{{ deploy_group }}" + mode: '0644' + become: yes + + - name: Ensure REDIS_PORT is set in .env file + ansible.builtin.lineinfile: + path: "{{ base_dir }}/.env" + line: "REDIS_PORT={{ redis_port }}" + regexp: "^REDIS_PORT=" + state: present + create: yes + owner: "{{ ansible_user }}" + group: "{{ deploy_group }}" + mode: '0644' + become: yes diff --git a/ansible/playbook-stress-install-deps.yml b/ansible/playbook-stress-install-deps.yml new file mode 100644 index 0000000..ca14455 --- /dev/null +++ b/ansible/playbook-stress-install-deps.yml @@ -0,0 +1,42 @@ +--- +- name: "STRESS-SETUP: Install Python Dependencies" + hosts: all + gather_facts: yes + vars_files: + - "group_vars/all/generated_vars.stress.yml" + - "group_vars/all/vault.yml" + pre_tasks: + - name: Ensure python3-pip is installed + block: + - name: Install prerequisites for managing repositories + ansible.builtin.apt: + name: software-properties-common + state: present + update_cache: yes + + - name: Ensure universe repository is enabled + ansible.builtin.apt_repository: + repo: "deb http://archive.ubuntu.com/ubuntu {{ ansible_lsb.codename }} universe" + state: present + update_cache: yes + + - name: Install python3-pip + ansible.builtin.apt: + name: python3-pip + state: present + become: yes + + tasks: + - name: Install required Python packages + ansible.builtin.pip: + name: + - python-dotenv + - aria2p + - tabulate + - redis + - PyYAML + - aiothrift + - PySocks + state: present + extra_args: --break-system-packages + become: yes diff --git a/ansible/playbook-stress-sync-code.yml b/ansible/playbook-stress-sync-code.yml new file mode 100644 index 0000000..8ddf940 --- /dev/null +++ b/ansible/playbook-stress-sync-code.yml @@ -0,0 +1,58 @@ +--- +- name: "STRESS-SETUP: Sync Local Code" + hosts: all + gather_facts: no + vars_files: + - "group_vars/all/generated_vars.stress.yml" + - "group_vars/all/vault.yml" + tasks: + - name: Define base directory for node + ansible.builtin.set_fact: + base_dir: "{{ airflow_master_dir if inventory_hostname in groups['airflow_master'] else airflow_worker_dir }}" + + - name: Ensure base directory exists for code sync + ansible.builtin.file: + path: "{{ base_dir }}" + state: directory + owner: "{{ ansible_user }}" + group: "{{ deploy_group }}" + mode: '0755' + become: yes + + - name: Sync python packages and directories for stress testing + ansible.posix.synchronize: + src: "../ytops_client-source/{{ item }}/" + dest: "{{ base_dir }}/{{ item }}/" + rsync_opts: + - "--delete" + - "--exclude=.DS_Store" + - "--exclude=__pycache__" + - "--exclude='*.pyc'" + recursive: yes + perms: yes + loop: + - "thrift_model" + - "pangramia" + - "ytops_client" + - "policies" + - "bin" + - "yt_ops_services" + become: yes + become_user: "{{ ansible_user }}" + + - name: Sync client utility scripts and configs + ansible.posix.synchronize: + src: "../ytops_client-source/{{ item }}" + dest: "{{ base_dir }}/{{ item }}" + perms: yes + loop: + - "cli.auth.config" + - "cli.download.config" + - "package_client.py" + - "setup.py" + - "VERSION" + - "VERSION.client" + - "thrift_exceptions_patch.py" + - "ytdlp.json" + become: yes + become_user: "{{ ansible_user }}" diff --git a/ansible/playbook-sync-local.yml b/ansible/playbook-sync-local.yml deleted file mode 100644 index 7b15bfe..0000000 --- a/ansible/playbook-sync-local.yml +++ /dev/null @@ -1,101 +0,0 @@ ---- -- name: Sync Local Development Files to Workers and Master - hosts: airflow_workers, airflow_master - gather_facts: no - vars_files: - - "{{ inventory_dir }}/group_vars/all/generated_vars.yml" - vars: - sync_dir: "{{ airflow_worker_dir if 'airflow_workers' in group_names else airflow_master_dir }}" - - pre_tasks: - - name: Announce local sync - debug: - msg: "Syncing local dev files to {{ inventory_hostname }} at {{ sync_dir }}" - - tasks: - - name: Sync thrift_model directory - ansible.posix.synchronize: - src: ../thrift_model/ - dest: "{{ sync_dir }}/thrift_model/" - rsync_opts: - - "--delete" - - "--exclude=.DS_Store" - - "--exclude=__pycache__" - - "--exclude='*.pyc'" - recursive: yes - perms: yes - become: yes - become_user: "{{ ansible_user }}" - - - name: Sync pangramia package - ansible.posix.synchronize: - src: ../pangramia/ - dest: "{{ sync_dir }}/pangramia/" - rsync_opts: - - "--delete" - - "--exclude=.DS_Store" - - "--exclude=__pycache__" - - "--exclude='*.pyc'" - recursive: yes - perms: yes - become: yes - become_user: "{{ ansible_user }}" - - - name: Sync ytops_client directory - ansible.posix.synchronize: - src: ../ytops_client/ - dest: "{{ sync_dir }}/ytops_client/" - rsync_opts: - - "--delete" - - "--exclude=.DS_Store" - - "--exclude=__pycache__" - - "--exclude='*.pyc'" - recursive: yes - perms: yes - become: yes - become_user: "{{ ansible_user }}" - - - name: Sync policies directory - ansible.posix.synchronize: - src: ../policies/ - dest: "{{ sync_dir }}/policies/" - rsync_opts: - - "--delete" - - "--exclude=.DS_Store" - - "--exclude=__pycache__" - - "--exclude='*.pyc'" - recursive: yes - perms: yes - become: yes - become_user: "{{ ansible_user }}" - - - name: Sync ytdlp.json - ansible.posix.synchronize: - src: ../ytdlp.json - dest: "{{ sync_dir }}/ytdlp.json" - perms: yes - become: yes - become_user: "{{ ansible_user }}" - - - name: Ensure bin directory exists for client utilities - ansible.builtin.file: - path: "{{ sync_dir }}/bin" - state: directory - mode: '0755' - become: yes - become_user: "{{ ansible_user }}" - - - name: Sync client utility scripts - ansible.posix.synchronize: - src: "../{{ item }}" - dest: "{{ sync_dir }}/{{ item }}" - perms: yes - loop: - - "cli.config" - - "package_client.py" - - "setup.py" - - "bin/ytops-client" - - "bin/build-yt-dlp-image" - - "VERSION.client" - become: yes - become_user: "{{ ansible_user }}" diff --git a/ansible/playbook-update-regression-script.yml b/ansible/playbook-update-regression-script.yml deleted file mode 100644 index 5fd5ead..0000000 --- a/ansible/playbook-update-regression-script.yml +++ /dev/null @@ -1,27 +0,0 @@ ---- -- name: Update Regression Test Script - hosts: airflow_master - gather_facts: no - vars: - # This should be the root directory of your project on the master host. - # It's set as a variable so you can override it if needed, e.g., - # ansible-playbook ... -e "project_dir=/path/to/your/project" - project_dir: "/srv/airflow_master" - - tasks: - - name: Copy latest regression.py script to the master host - copy: - src: ../airflow/dags/scripts/regression.py - dest: "{{ project_dir }}/dags/scripts/regression.py" - owner: "{{ ansible_user }}" - group: "ytdl" # Assuming the same deploy group as the main playbook - mode: '0644' - become: yes - notify: - - Announce completion - - handlers: - - name: Announce completion - listen: "Announce completion" - debug: - msg: "Regression script has been updated on {{ inventory_hostname }}. You can now run it using 'docker exec'." diff --git a/ansible/playbook-update-s3-vars.yml b/ansible/playbook-update-s3-vars.yml deleted file mode 100644 index da20807..0000000 --- a/ansible/playbook-update-s3-vars.yml +++ /dev/null @@ -1,59 +0,0 @@ ---- -- name: Update S3 Connection Variable - hosts: airflow_master - gather_facts: no - vars_files: - - "{{ inventory_dir }}/group_vars/all/generated_vars.yml" - - "{{ inventory_dir }}/group_vars/all/vault.yml" - tasks: - - name: Delete existing s3_delivery_connection to ensure an idempotent update - ansible.builtin.command: > - docker compose exec -T airflow-scheduler - airflow connections delete s3_delivery_connection - args: - chdir: "{{ airflow_master_dir }}" - register: delete_s3_conn - retries: 5 - delay: 10 - until: delete_s3_conn.rc == 0 or 'Did not find a connection' in delete_s3_conn.stderr - changed_when: "'was deleted successfully' in delete_s3_conn.stdout" - failed_when: - - delete_s3_conn.rc != 0 - - "'Did not find a connection' not in delete_s3_conn.stderr" - become: yes - become_user: "{{ ansible_user }}" - - - name: Add/Update s3_delivery_connection - ansible.builtin.command: - argv: - - docker - - compose - - exec - - -T - - airflow-scheduler - - airflow - - connections - - add - - s3_delivery_connection - - --conn-type - - aws - - --conn-login - - "{{ vault_s3_delivery_access_key_id }}" - - --conn-password - - "{{ vault_s3_delivery_secret_access_key }}" - - --conn-host - - "{{ vault_s3_delivery_endpoint }}" - - --conn-extra - - "{{ s3_extra_dict | to_json }}" - chdir: "{{ airflow_master_dir }}" - vars: - s3_extra_dict: - bucket: "{{ vault_s3_delivery_bucket }}" - region_name: "{{ vault_s3_delivery_aws_region }}" - register: add_s3_conn - retries: 5 - delay: 10 - until: add_s3_conn.rc == 0 - changed_when: "'was successfully added' in add_s3_conn.stdout" - become: yes - become_user: "{{ ansible_user }}" diff --git a/ansible/playbook-worker.yml b/ansible/playbook-worker.yml deleted file mode 100644 index cb26110..0000000 --- a/ansible/playbook-worker.yml +++ /dev/null @@ -1,422 +0,0 @@ ---- -- name: Deploy Airflow Workers - hosts: airflow_workers - gather_facts: yes - vars_files: - - "{{ inventory_dir }}/group_vars/all/generated_vars.yml" - - "{{ inventory_dir }}/group_vars/all/vault.yml" - pre_tasks: - - name: Announce worker deployment - debug: - msg: "Starting deployment for Airflow Worker: {{ inventory_hostname }} ({{ ansible_user }}@{{ ansible_host }})" - - - name: Configure system timezone - # Ensures all services and logs on this node use a consistent timezone. - community.general.timezone: - name: "{{ host_timezone }}" - become: yes - - - name: Install NTP for time synchronization - ansible.builtin.apt: - name: ntp - state: present - become: yes - - - name: Ensure NTP service is started and enabled - ansible.builtin.service: - name: ntp - state: started - enabled: yes - become: yes - - - name: Set deploy_group to a valid single group name - set_fact: - deploy_group: "ytdl" - - - name: Ensure deploy group exists - group: - name: "{{ deploy_group }}" - state: present - become: yes - - - name: Ensure deploy user exists - user: - name: "{{ ansible_user }}" - group: "{{ deploy_group }}" - state: present - become: yes - - - name: Validate deploy_group variable - ansible.builtin.assert: - that: - - deploy_group is defined - - deploy_group is string - - "',' not in deploy_group" - - "' ' not in deploy_group" - fail_msg: "The 'deploy_group' variable ('{{ deploy_group }}') must be a single, valid group name. It should not contain commas or spaces." - - - name: Check for swapfile - stat: - path: /swapfile - register: swap_file - become: yes - - - name: Create 8GB swapfile - command: fallocate -l 8G /swapfile - when: not swap_file.stat.exists - become: yes - - - name: Set swapfile permissions - file: - path: /swapfile - mode: '0600' - when: not swap_file.stat.exists - become: yes - - - name: Make swap - command: mkswap /swapfile - when: not swap_file.stat.exists - become: yes - - - name: Check current swap status - command: swapon --show - register: swap_status - changed_when: false - become: yes - - - name: Enable swap - command: swapon /swapfile - when: "'/swapfile' not in swap_status.stdout" - become: yes - - - name: Add swapfile to fstab - lineinfile: - path: /etc/fstab - regexp: '^/swapfile' - line: '/swapfile none swap sw 0 0' - state: present - become: yes - - - name: Get GID of the deploy group - getent: - database: group - key: "{{ deploy_group }}" - register: deploy_group_info - become: yes - - - name: Set deploy_group_gid fact - set_fact: - deploy_group_gid: "{{ deploy_group_info.ansible_facts.getent_group[deploy_group][1] }}" - when: deploy_group_info.ansible_facts.getent_group is defined and deploy_group in deploy_group_info.ansible_facts.getent_group - - - name: Ensure deploy_group_gid is set to a valid value - set_fact: - deploy_group_gid: "0" - when: deploy_group_gid is not defined or deploy_group_gid == "" - - - name: Configure system limits - copy: - src: "configs/etc/sysctl.d/99-system-limits.conf" - dest: "/etc/sysctl.d/99-system-limits.conf" - owner: root - group: root - mode: '0644' - become: yes - register: limits_sysctl_config_copy - - - name: Apply sysctl settings for system limits - command: sysctl --system - become: yes - when: limits_sysctl_config_copy.changed - - - name: Create logs directory structure relative to deployment - file: - path: "./logs/yt-dlp-ops/communication_logs" - state: directory - mode: '0755' - owner: "{{ ansible_user }}" - group: "{{ deploy_group }}" - become: yes - - - name: Ensure worker directory exists - ansible.builtin.file: - path: "{{ airflow_worker_dir }}" - state: directory - owner: "{{ ansible_user }}" - group: "{{ deploy_group }}" - mode: '0755' - become: yes - - - name: Ensure runtime data directories exist with correct ownership - ansible.builtin.file: - path: "{{ airflow_worker_dir }}/{{ item }}" - state: directory - owner: "{{ ansible_user }}" - group: "{{ deploy_group }}" - mode: '0775' - recurse: yes - loop: - - "downloadfiles" - - "downloadfiles/videos" - - "downloadfiles/videos/in-progress" - - "downloadfiles/videos/ready" - - "inputfiles" - - "dumps" - become: yes - - - name: Create .dockerignore on worker to exclude runtime data from build context - ansible.builtin.copy: - dest: "{{ airflow_worker_dir }}/.dockerignore" - content: | - # Exclude build artifacts and virtual environments - __pycache__/ - *.pyc - *.pyo - .venv/ - venv/ - - # Exclude sensitive information - .env - .vault_pass - - # Exclude local development and OS-specific files - .DS_Store - .idea/ - *.swp - - # Exclude large directories with runtime data that should not be in the image - logs/ - downloadfiles/ - addfiles/ - *downloads/ - postgres-data/ - redis-data/ - minio-data/ - owner: "{{ ansible_user }}" - group: "{{ deploy_group }}" - mode: '0644' - become: yes - - - name: Sync python packages to worker for build context - ansible.posix.synchronize: - src: "../{{ item }}/" - dest: "{{ airflow_worker_dir }}/{{ item }}/" - rsync_opts: - - "--delete" - - "--exclude=.DS_Store" - - "--exclude=__pycache__" - - "--exclude='*.pyc'" - recursive: yes - perms: yes - loop: - - "thrift_model" - - "pangramia" - - "ytops_client" - - "yt_ops_services" - become: yes - become_user: "{{ ansible_user }}" - - - name: Sync aria2-pro-docker to worker for build context - ansible.posix.synchronize: - src: "../airflow/aria2-pro-docker/" - dest: "{{ airflow_worker_dir }}/aria2-pro-docker/" - rsync_opts: - - "--delete" - recursive: yes - perms: yes - become: yes - become_user: "{{ ansible_user }}" - - - name: Ensure bin directory exists on worker for build context - ansible.builtin.file: - path: "{{ airflow_worker_dir }}/bin" - state: directory - mode: '0755' - become: yes - become_user: "{{ ansible_user }}" - - - name: Sync root files and client utilities to worker for build context - ansible.posix.synchronize: - src: "../{{ item }}" - dest: "{{ airflow_worker_dir }}/{{ item }}" - perms: yes - loop: - - "setup.py" - - "VERSION" - - "bin/ytops-client" - become: yes - become_user: "{{ ansible_user }}" - - - name: Ensure Airflow project directory is writable by the container user (UID 50000) - ansible.builtin.file: - path: "{{ airflow_worker_dir }}" - owner: 50000 - group: 50000 - become: yes - - - name: Ensure Airflow subdirectories are writable by the container user (UID 50000) - ansible.builtin.file: - path: "{{ item }}" - owner: 50000 - group: 50000 - recurse: yes - state: directory - loop: - - "{{ airflow_worker_dir }}/dags" - - "{{ airflow_worker_dir }}/logs" - - "{{ airflow_worker_dir }}/plugins" - - "{{ airflow_worker_dir }}/config" - become: yes - - tasks: - - name: Install pipx - ansible.builtin.apt: - name: pipx - state: present - become: yes - - - name: Install Glances for system monitoring - ansible.builtin.command: pipx install glances[all] - args: - creates: "{{ ansible_env.HOME }}/.local/bin/glances" - become: yes - become_user: "{{ ansible_user }}" - - - name: Install base system packages for tools - ansible.builtin.apt: - name: - - unzip - - wget - - xz-utils - state: present - update_cache: yes - become: yes - - - name: Install required Python packages - ansible.builtin.pip: - name: - - python-dotenv - - aria2p - - tabulate - - redis - - PyYAML - - aiothrift - - PySocks - state: present - extra_args: --break-system-packages - become: yes - - - name: Install pinned Python packages - ansible.builtin.pip: - name: - - brotli==1.1.0 - - certifi==2025.10.05 - - curl-cffi==0.13.0 - - mutagen==1.47.0 - - pycryptodomex==3.23.0 - - secretstorage==3.4.0 - - urllib3==2.5.0 - - websockets==15.0.1 - state: present - extra_args: --break-system-packages - become: yes - - - name: Upgrade yt-dlp and bgutil provider - ansible.builtin.shell: | - set -e - python3 -m pip install -U --pre "yt-dlp[default,curl-cffi]" --break-system-packages - python3 -m pip install --no-cache-dir -U bgutil-ytdlp-pot-provider --break-system-packages - args: - warn: false - become: yes - changed_when: true - - - name: Check for FFmpeg - stat: - path: /usr/local/bin/ffmpeg - register: ffmpeg_binary - become: yes - - - name: Install FFmpeg - when: not ffmpeg_binary.stat.exists - become: yes - block: - - name: Create ffmpeg directory - ansible.builtin.file: - path: /opt/ffmpeg - state: directory - mode: '0755' - - - name: Download and unarchive FFmpeg - ansible.builtin.unarchive: - src: "https://github.com/yt-dlp/FFmpeg-Builds/releases/download/latest/ffmpeg-master-latest-linux64-gpl.tar.xz" - dest: /opt/ffmpeg - remote_src: yes - extra_opts: [--strip-components=1] - - - name: Symlink ffmpeg and ffprobe - ansible.builtin.file: - src: "/opt/ffmpeg/bin/{{ item }}" - dest: "/usr/local/bin/{{ item }}" - state: link - force: yes - loop: - - ffmpeg - - ffprobe - - - name: Check for Deno - stat: - path: /usr/local/bin/deno - register: deno_binary - become: yes - - - name: Install Deno - when: not deno_binary.stat.exists - become: yes - block: - - name: Download and unarchive Deno - ansible.builtin.unarchive: - src: https://github.com/denoland/deno/releases/latest/download/deno-x86_64-unknown-linux-gnu.zip - dest: /usr/local/bin/ - remote_src: yes - mode: '0755' - - - name: Check if ytops_client requirements.txt exists - stat: - path: "{{ airflow_worker_dir }}/ytops_client/requirements.txt" - register: ytops_client_reqs - become: yes - become_user: "{{ ansible_user }}" - - - name: Install dependencies from ytops_client/requirements.txt - ansible.builtin.pip: - requirements: "{{ airflow_worker_dir }}/ytops_client/requirements.txt" - state: present - extra_args: --break-system-packages - when: ytops_client_reqs.stat.exists - become: yes - - # Include Docker health check - - name: Include Docker health check tasks - include_tasks: tasks/docker_health_check.yml - - - - name: Pull pre-built Docker images for ytdlp-ops services - ansible.builtin.command: > - docker compose --project-directory . -f configs/docker-compose-ytdlp-ops.yaml pull --ignore-buildable - args: - chdir: "{{ airflow_worker_dir }}" - become: yes - become_user: "{{ ansible_user }}" - register: docker_pull_result - retries: 3 - delay: 10 - changed_when: "'Pulling' in docker_pull_result.stdout or 'Downloaded' in docker_pull_result.stdout" - - - name: Show docker pull output - ansible.builtin.debug: - var: docker_pull_result.stdout_lines - when: docker_pull_result.changed - - roles: - - ytdlp-worker diff --git a/ansible/playbook-ytdlp-master-only.yml b/ansible/playbook-ytdlp-master-only.yml deleted file mode 100644 index 18e75ca..0000000 --- a/ansible/playbook-ytdlp-master-only.yml +++ /dev/null @@ -1,22 +0,0 @@ ---- -- name: Deploy YTDLP Master Services (Management Role Only) - hosts: airflow_master - gather_facts: no - vars_files: - - "{{ inventory_dir }}/group_vars/all/generated_vars.yml" - - "{{ inventory_dir }}/group_vars/all/vault.yml" - tasks: - - name: Announce ytdlp-master-only deployment - debug: - msg: "Starting deployment for YTDLP Master services on: {{ inventory_hostname }}" - - - name: Start/Redeploy ytdlp-ops services without camoufox - community.docker.docker_compose_v2: - project_src: "{{ airflow_master_dir }}" - files: - - configs/docker-compose-ytdlp-ops.yaml - state: present - remove_orphans: true - recreate: always - pull: "{{ 'never' if fast_deploy | default(false) else 'missing' }}" - become: yes diff --git a/ansible/playbooks/pause_worker.yml b/ansible/playbooks/pause_worker.yml deleted file mode 100644 index 27dca8a..0000000 --- a/ansible/playbooks/pause_worker.yml +++ /dev/null @@ -1,15 +0,0 @@ ---- -- hosts: airflow_workers - gather_facts: no - vars_files: - - "{{ inventory_dir }}/group_vars/all/generated_vars.yml" - - "{{ inventory_dir }}/group_vars/all/vault.yml" - tasks: - - name: "Create lock file to pause worker" - file: - path: "{{ airflow_worker_dir }}/inputfiles/AIRFLOW.PREVENT_URL_PULL.lockfile" - state: touch - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - mode: '0644' - become: yes diff --git a/ansible/playbooks/playbook-bgutils-start.yml b/ansible/playbooks/playbook-bgutils-start.yml deleted file mode 100644 index ca83531..0000000 --- a/ansible/playbooks/playbook-bgutils-start.yml +++ /dev/null @@ -1,19 +0,0 @@ ---- -- name: Start bgutil-provider service - hosts: all # Use --limit to target specific hosts, e.g., --limit management - become: true - gather_facts: false - vars: - container_name: "bgutil-provider" - - tasks: - - name: "Ensure {{ container_name }} container is started" - community.docker.docker_container: - name: "{{ container_name }}" - state: started - register: container_status - - - name: "Display container status" - ansible.builtin.debug: - msg: "{{ container_name }} was started." - when: container_status.changed diff --git a/ansible/playbooks/playbook-bgutils-stop.yml b/ansible/playbooks/playbook-bgutils-stop.yml deleted file mode 100644 index 156f107..0000000 --- a/ansible/playbooks/playbook-bgutils-stop.yml +++ /dev/null @@ -1,19 +0,0 @@ ---- -- name: Stop bgutil-provider service - hosts: all # Use --limit to target specific hosts, e.g., --limit management - become: true - gather_facts: false - vars: - container_name: "bgutil-provider" - - tasks: - - name: "Ensure {{ container_name }} container is stopped" - community.docker.docker_container: - name: "{{ container_name }}" - state: stopped - register: container_status - - - name: "Display container status" - ansible.builtin.debug: - msg: "{{ container_name }} was stopped." - when: container_status.changed diff --git a/ansible/playbooks/restart_worker.yml b/ansible/playbooks/restart_worker.yml deleted file mode 100644 index 4d1e03b..0000000 --- a/ansible/playbooks/restart_worker.yml +++ /dev/null @@ -1,47 +0,0 @@ ---- -- name: Restart and Update ytdlp-ops Worker - hosts: all:!af-green - vars: - # This should be the root directory of your project on the target worker machine. - project_dir: "{{ '/srv/airflow_master' if inventory_hostname == 'af-green' else '/srv/airflow_dl_worker' }}" - # This is the path to your compose file, relative to the project_dir. - compose_file: "configs/docker-compose-ytdlp-ops.yaml" - # The specific image to pull for updates. - service_image: "pangramia/ytdlp-ops-server:4.0.1" - - tasks: - - name: "Ensure project directory exists" - ansible.builtin.file: - path: "{{ project_dir }}" - state: directory - mode: '0755' - become: yes - - - - name: "Pull the latest image for the ytdlp-ops service" - community.docker.docker_image: - name: "{{ service_image }}" - source: pull - tags: - - pull - - - name: "Take down the ytdlp-ops services" - community.docker.docker_compose_v2: - project_src: "{{ project_dir }}" - files: - - "{{ compose_file }}" - state: absent - remove_volumes: true - tags: - - down - - - name: "Bring up the ytdlp-ops services" - community.docker.docker_compose_v2: - project_src: "{{ project_dir }}" - files: - - "{{ compose_file }}" - state: present - recreate: always # Corresponds to --force-recreate - build: never - tags: - - up diff --git a/ansible/playbooks/resume_worker.yml b/ansible/playbooks/resume_worker.yml deleted file mode 100644 index 0ac778f..0000000 --- a/ansible/playbooks/resume_worker.yml +++ /dev/null @@ -1,14 +0,0 @@ ---- -- hosts: airflow_workers - gather_facts: yes - vars_files: - - "{{ inventory_dir }}/group_vars/all/generated_vars.yml" - - "{{ inventory_dir }}/group_vars/all/vault.yml" - tasks: - - name: "Archive lock file to resume worker" - command: > - mv {{ airflow_worker_dir }}/inputfiles/AIRFLOW.PREVENT_URL_PULL.lockfile - {{ airflow_worker_dir }}/inputfiles/AIRFLOW.PREVENT_URL_PULL.lockfile.removed-{{ ansible_date_time.year }}{{ '%02d' | format(ansible_date_time.month | int) }}{{ '%02d' | format(ansible_date_time.day | int) }}-{{ '%02d' | format(ansible_date_time.hour | int) }}{{ '%02d' | format(ansible_date_time.minute | int) }} - args: - removes: "{{ airflow_worker_dir }}/inputfiles/AIRFLOW.PREVENT_URL_PULL.lockfile" - become: yes diff --git a/ansible/roles/airflow-master/tasks/main.yml b/ansible/roles/airflow-master/tasks/main.yml deleted file mode 100644 index f991224..0000000 --- a/ansible/roles/airflow-master/tasks/main.yml +++ /dev/null @@ -1,333 +0,0 @@ ---- -- name: Check if Airflow master deployment directory exists - stat: - path: "{{ airflow_master_dir }}" - register: master_dir_stat - -- name: Ensure Airflow master deployment directory exists - file: - path: "{{ airflow_master_dir }}" - state: directory - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - mode: '0755' - become: yes - when: not master_dir_stat.stat.exists - -- name: Ensure Airflow master configs directory exists - file: - path: "{{ airflow_master_dir }}/configs" - state: directory - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - mode: '0755' - become: yes - -- name: Ensure Airflow master config directory exists - file: - path: "{{ airflow_master_dir }}/config" - state: directory - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - mode: '0755' - become: yes - -- name: Ensure Airflow operational directories exist with correct permissions - file: - path: "{{ airflow_master_dir }}/{{ item }}" - state: directory - owner: "{{ airflow_uid }}" - group: "{{ deploy_group }}" - mode: '0775' - become: yes - loop: - - "dags" - - "logs" - - "plugins" - - "downloadfiles" - - "addfiles" - - "inputfiles" - -- name: Check if source directories exist - stat: - path: "../{{ item }}" - register: source_dirs - loop: - - "airflow/plugins" - - "airflow/addfiles" - - "airflow/bgutil-ytdlp-pot-provider" - -- name: "Log: Syncing Airflow core files" - debug: - msg: "Syncing DAGs, configs, and Python source code to the master node." - -- name: Sync Airflow master files - synchronize: - src: "../{{ item }}" - dest: "{{ airflow_master_dir }}/" - archive: yes - recursive: yes - delete: yes - rsync_path: "sudo rsync" - rsync_opts: "{{ rsync_default_opts }}" - loop: - - "airflow/Dockerfile" - - "airflow/Dockerfile.caddy" - - "airflow/.dockerignore" - - "airflow/dags" - - "airflow/inputfiles" - - "setup.py" - - "yt_ops_services" - - "thrift_model" - - "VERSION" - - "airflow/update-yt-dlp.sh" - - "proxy_manager_client.py" - - "utils" - -- name: Copy custom Python config files to master - copy: - src: "../airflow/config/{{ item }}" - dest: "{{ airflow_master_dir }}/config/{{ item }}" - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - mode: '0644' - become: yes - loop: - - "custom_task_hooks.py" - - "airflow_local_settings.py" - -- name: Ensure any existing airflow.cfg directory is removed - file: - path: "{{ airflow_master_dir }}/config/airflow.cfg" - state: absent - become: yes - ignore_errors: yes - -- name: Copy airflow.cfg to master - copy: - src: "../airflow/airflow.cfg" - dest: "{{ airflow_master_dir }}/config/airflow.cfg" - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - mode: '0644' - become: yes - -- name: Sync Airflow master config files - synchronize: - src: "../airflow/configs/{{ item }}" - dest: "{{ airflow_master_dir }}/configs/" - archive: yes - recursive: yes - rsync_path: "sudo rsync" - rsync_opts: "{{ rsync_default_opts }}" - loop: - - "nginx.conf" - - "Caddyfile" - -- name: Sync optional directories if they exist - synchronize: - src: "../{{ item.item }}/" - dest: "{{ airflow_master_dir }}/{{ item.item | basename }}/" - archive: yes - recursive: yes - delete: yes - rsync_path: "sudo rsync" - rsync_opts: "{{ rsync_default_opts }}" - loop: "{{ source_dirs.results }}" - when: item.stat.exists - -- name: Sync pangramia thrift files - synchronize: - src: "../thrift_model/gen_py/pangramia/" - dest: "{{ airflow_master_dir }}/pangramia/" - archive: yes - recursive: yes - delete: yes - rsync_path: "sudo rsync" - rsync_opts: "{{ rsync_default_opts }}" - -- name: Template docker-compose file for master - template: - src: "{{ playbook_dir }}/../airflow/configs/docker-compose-master.yaml.j2" - dest: "{{ airflow_master_dir }}/configs/docker-compose-master.yaml" - mode: "{{ file_permissions }}" - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - become: yes - -- name: Template Redis connection file - template: - src: "../airflow/config/redis_default_conn.json.j2" - dest: "{{ airflow_master_dir }}/config/redis_default_conn.json" - mode: "{{ file_permissions }}" - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - become: yes - -- name: Template Minio connection file for master - template: - src: "../airflow/config/minio_default_conn.json.j2" - dest: "{{ airflow_master_dir }}/config/minio_default_conn.json" - mode: "{{ file_permissions }}" - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - become: yes - -- name: Ensure config directory is group-writable for Airflow initialization - file: - path: "{{ airflow_master_dir }}/config" - state: directory - mode: '0775' - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - become: yes - -- name: Ensure airflow.cfg is group-writable for Airflow initialization - file: - path: "{{ airflow_master_dir }}/config/airflow.cfg" - state: file - mode: '0664' - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - become: yes - -- name: Create symlink for docker-compose.yaml - file: - src: "{{ airflow_master_dir }}/configs/docker-compose-master.yaml" - dest: "{{ airflow_master_dir }}/docker-compose.yaml" - state: link - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - force: yes - follow: no - -- name: Ensure correct permissions for build context - file: - path: "{{ airflow_master_dir }}" - state: directory - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - recurse: yes - become: yes - -- name: Ensure postgres-data directory exists on master and has correct permissions - file: - path: "{{ airflow_master_dir }}/postgres-data" - state: directory - owner: "999" # UID for the 'postgres' user in the official postgres image - group: "999" # GID for the 'postgres' group in the official postgres image - mode: '0700' - recurse: yes - become: yes - -- name: Set proper ownership and permissions on master logs directory contents - shell: | - chown -R {{ airflow_uid }}:{{ deploy_group }} {{ airflow_master_dir }}/logs - find {{ airflow_master_dir }}/logs -type d -exec chmod g+rws {} + - find {{ airflow_master_dir }}/logs -type f -exec chmod g+rw {} + - become: yes - -- name: Verify Dockerfile exists in build directory - stat: - path: "{{ airflow_master_dir }}/Dockerfile" - register: dockerfile_stat - -- name: Fail if Dockerfile is missing - fail: - msg: "Dockerfile not found in {{ airflow_master_dir }}. Cannot build image." - when: not dockerfile_stat.stat.exists - -- name: "Log: Building Airflow Docker image" - debug: - msg: "Building the main Airflow Docker image ({{ airflow_image_name }}) locally on the master node. This may take a few minutes." - -- name: Build Airflow master image - community.docker.docker_image: - name: "{{ airflow_image_name }}" - build: - path: "{{ airflow_master_dir }}" - dockerfile: "Dockerfile" # Explicitly specify the Dockerfile name - source: build - force_source: true - when: not fast_deploy | default(false) - -- name: "Log: Preparing assets for Caddy image" - debug: - msg: "Extracting static assets from the Airflow image to build the Caddy reverse proxy." - when: not fast_deploy | default(false) - -- name: Prepare Caddy asset extraction directory - file: - path: "{{ airflow_master_dir }}/caddy_build_assets" - state: "{{ item }}" - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - mode: '0755' - loop: - - absent - - directory - become: yes - when: not fast_deploy | default(false) - -- name: Ensure subdirectories exist with correct permissions - file: - path: "{{ airflow_master_dir }}/caddy_build_assets/{{ item }}" - state: directory - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - mode: '0755' - loop: - - "appbuilder" - - "dist" - become: yes - when: not fast_deploy | default(false) - -- name: Extract static assets from Airflow image for Caddy build - shell: | - set -e - CONTAINER_ID=$(docker create {{ airflow_image_name }}) - # Dynamically find paths inside the container - APPBUILDER_PATH=$(docker run --rm --entrypoint "" {{ airflow_image_name }} python -c 'import os, flask_appbuilder; print(os.path.join(os.path.dirname(flask_appbuilder.__file__), "static", "appbuilder"))') - AIRFLOW_DIST_PATH=$(docker run --rm --entrypoint "" {{ airflow_image_name }} python -c 'import os, airflow; print(os.path.join(os.path.dirname(airflow.__file__), "www/static/dist"))') - # Copy assets from container to host - docker cp "${CONTAINER_ID}:${APPBUILDER_PATH}/." "./caddy_build_assets/appbuilder" - docker cp "${CONTAINER_ID}:${AIRFLOW_DIST_PATH}/." "./caddy_build_assets/dist" - docker rm -f $CONTAINER_ID - # Pre-compress assets - find ./caddy_build_assets/appbuilder -type f -print0 | xargs -0 gzip -k -9 - find ./caddy_build_assets/dist -type f -print0 | xargs -0 gzip -k -9 - args: - chdir: "{{ airflow_master_dir }}" - executable: /bin/bash - become: yes - register: asset_extraction - changed_when: asset_extraction.rc == 0 - when: not fast_deploy | default(false) - -- name: "Log: Building Caddy reverse proxy image" - debug: - msg: "Building the Caddy image (pangramia/ytdlp-ops-caddy:latest) to serve static assets." - -- name: Build Caddy image - community.docker.docker_image: - name: "pangramia/ytdlp-ops-caddy:latest" - build: - path: "{{ airflow_master_dir }}" - dockerfile: "Dockerfile.caddy" - source: build - force_source: true - when: not fast_deploy | default(false) - -- name: "Log: Starting Airflow services" - debug: - msg: "Starting Airflow core services (webserver, scheduler, etc.) on the master node using docker-compose." - -- name: Start Airflow master service - community.docker.docker_compose_v2: - project_src: "{{ airflow_master_dir }}" - files: - - "configs/docker-compose-master.yaml" - - "configs/docker-compose-ytdlp-ops.yaml" - state: present - remove_orphans: true - pull: "{{ 'never' if fast_deploy | default(false) else 'missing' }}" diff --git a/ansible/roles/airflow-worker/tasks/main.yml b/ansible/roles/airflow-worker/tasks/main.yml deleted file mode 100644 index 9e7904c..0000000 --- a/ansible/roles/airflow-worker/tasks/main.yml +++ /dev/null @@ -1,244 +0,0 @@ ---- -- name: Check if Airflow worker deployment directory exists - stat: - path: "{{ airflow_worker_dir }}" - register: worker_dir_stat - -- name: Ensure Airflow worker deployment directory exists - file: - path: "{{ airflow_worker_dir }}" - state: directory - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - mode: '0755' - become: yes - when: not worker_dir_stat.stat.exists - -- name: Ensure Airflow worker configs directory exists - file: - path: "{{ airflow_worker_dir }}/configs" - state: directory - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - mode: '0755' - become: yes - -- name: Ensure Airflow worker config directory exists - file: - path: "{{ airflow_worker_dir }}/config" - state: directory - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - mode: '0755' - become: yes - -- name: Ensure Airflow operational directories exist with correct permissions - file: - path: "{{ airflow_worker_dir }}/{{ item }}" - state: directory - owner: "{{ airflow_uid }}" - group: "{{ deploy_group }}" - mode: '0775' - become: yes - loop: - - "dags" - - "logs" - - "plugins" - - "downloadfiles" - - "addfiles" - - "inputfiles" - -- name: "Log: Syncing Airflow core files" - debug: - msg: "Syncing DAGs, configs, and Python source code to the worker node." - -- name: Sync Airflow worker files - synchronize: - src: "../{{ item }}" - dest: "{{ airflow_worker_dir }}/" - archive: yes - recursive: yes - delete: yes - rsync_path: "sudo rsync" - rsync_opts: "{{ rsync_default_opts }}" - loop: - - "airflow/Dockerfile" - - "airflow/.dockerignore" - - "airflow/dags" - - "airflow/inputfiles" - - "setup.py" - - "yt_ops_services" - - "thrift_model" - - "VERSION" - - "airflow/update-yt-dlp.sh" - - "proxy_manager_client.py" - - "utils" - -- name: Copy custom Python config files to worker - copy: - src: "../airflow/config/{{ item }}" - dest: "{{ airflow_worker_dir }}/config/{{ item }}" - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - mode: '0644' - become: yes - loop: - - "custom_task_hooks.py" - - "airflow_local_settings.py" - -- name: Ensure any existing airflow.cfg directory is removed - file: - path: "{{ airflow_worker_dir }}/config/airflow.cfg" - state: absent - become: yes - ignore_errors: yes - -- name: Copy airflow.cfg to worker - copy: - src: "../airflow/airflow.cfg" - dest: "{{ airflow_worker_dir }}/config/airflow.cfg" - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - mode: '0644' - become: yes - -- name: Check if source directories exist - stat: - path: "../{{ item }}" - register: source_dirs - loop: - - "airflow/plugins" - - "airflow/addfiles" - - "airflow/bgutil-ytdlp-pot-provider" - -- name: Sync optional directories if they exist - synchronize: - src: "../{{ item.item }}/" - dest: "{{ airflow_worker_dir }}/{{ item.item | basename }}/" - archive: yes - recursive: yes - delete: yes - rsync_path: "sudo rsync" - rsync_opts: "{{ rsync_default_opts }}" - loop: "{{ source_dirs.results }}" - when: item.stat.exists - -- name: Sync pangramia thrift files - synchronize: - src: "../thrift_model/gen_py/pangramia/" - dest: "{{ airflow_worker_dir }}/pangramia/" - archive: yes - recursive: yes - delete: yes - rsync_path: "sudo rsync" - rsync_opts: "{{ rsync_default_opts }}" - -- name: Ensure config directory is group-writable for Airflow initialization - file: - path: "{{ airflow_worker_dir }}/config" - state: directory - mode: '0775' - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - become: yes - -- name: Ensure airflow.cfg is group-writable for Airflow initialization - file: - path: "{{ airflow_worker_dir }}/config/airflow.cfg" - state: file - mode: '0664' - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - become: yes - -- name: Template docker-compose file for worker - template: - src: "{{ playbook_dir }}/../airflow/configs/docker-compose-dl.yaml.j2" - dest: "{{ airflow_worker_dir }}/configs/docker-compose-dl.yaml" - mode: "{{ file_permissions }}" - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - become: yes - -- name: Create .env file for Airflow worker service - template: - src: "../../templates/.env.j2" - dest: "{{ airflow_worker_dir }}/.env" - mode: "{{ file_permissions }}" - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - become: yes - vars: - service_role: "worker" - server_identity: "ytdlp-ops-service-worker-{{ inventory_hostname }}" - -- name: Template Minio connection file for worker - template: - src: "../airflow/config/minio_default_conn.json.j2" - dest: "{{ airflow_worker_dir }}/config/minio_default_conn.json" - mode: "{{ file_permissions }}" - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - become: yes - -- name: Create symlink for docker-compose.yaml - file: - src: "{{ airflow_worker_dir }}/configs/docker-compose-dl.yaml" - dest: "{{ airflow_worker_dir }}/docker-compose.yaml" - state: link - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - follow: no - -- name: Ensure correct permissions for build context - file: - path: "{{ airflow_worker_dir }}" - state: directory - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - recurse: yes - become: yes - -- name: Set proper ownership and permissions on worker logs directory contents - shell: | - chown -R {{ airflow_uid }}:{{ deploy_group }} {{ airflow_worker_dir }}/logs - find {{ airflow_worker_dir }}/logs -type d -exec chmod g+rws {} + - find {{ airflow_worker_dir }}/logs -type f -exec chmod g+rw {} + - become: yes - -- name: Verify Dockerfile exists in build directory - stat: - path: "{{ airflow_worker_dir }}/Dockerfile" - register: dockerfile_stat - -- name: Fail if Dockerfile is missing - fail: - msg: "Dockerfile not found in {{ airflow_worker_dir }}. Cannot build image." - when: not dockerfile_stat.stat.exists - -- name: "Log: Building Airflow Docker image" - debug: - msg: "Building the main Airflow Docker image ({{ airflow_image_name }}) locally on the worker node. This may take a few minutes." - -- name: Build Airflow worker image - community.docker.docker_image: - name: "{{ airflow_image_name }}" - build: - path: "{{ airflow_worker_dir }}" - dockerfile: "Dockerfile" - source: build - force_source: true - when: not fast_deploy | default(false) - -- name: "Log: Starting Airflow services" - debug: - msg: "Starting Airflow worker services (celery worker) on the node using docker-compose." - -- name: Start Airflow worker service - community.docker.docker_compose_v2: - project_src: "{{ airflow_worker_dir }}" - files: - - "configs/docker-compose-dl.yaml" - state: present - remove_orphans: true - pull: "{{ 'never' if fast_deploy | default(false) else 'missing' }}" diff --git a/ansible/roles/fail2ban/tasks/main.yml b/ansible/roles/fail2ban/tasks/main.yml index fd2b4fc..97888ab 100644 --- a/ansible/roles/fail2ban/tasks/main.yml +++ b/ansible/roles/fail2ban/tasks/main.yml @@ -1,4 +1,11 @@ --- +- name: Ensure log directory exists + ansible.builtin.file: + path: /var/log + state: directory + mode: '0755' + become: yes + - name: Install fail2ban ansible.builtin.apt: name: fail2ban diff --git a/ansible/roles/shadowsocks-deploy/tasks/main.yml b/ansible/roles/shadowsocks-deploy/tasks/main.yml deleted file mode 100644 index 180ebb2..0000000 --- a/ansible/roles/shadowsocks-deploy/tasks/main.yml +++ /dev/null @@ -1,60 +0,0 @@ ---- -- name: Set shadowsocks base directory fact - set_fact: - shadowsocks_dir: "/srv/shadowsocks-rust" - -- name: Ensure shadowsocks base directory exists - file: - path: "{{ shadowsocks_dir }}" - state: directory - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - mode: '0755' - become: yes - -- name: Ensure proxy configuration directories exist - file: - path: "{{ shadowsocks_dir }}/config_ssp_{{ item.value.local_port }}" - state: directory - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - mode: '0755' - loop: "{{ shadowsocks_proxies | default({}) | dict2items }}" - loop_control: - label: "{{ item.key }}" - become: yes - when: shadowsocks_proxies is defined - -- name: Template proxy configuration files - template: - src: "config.json.j2" - dest: "{{ shadowsocks_dir }}/config_ssp_{{ item.value.local_port }}/config.json" - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - mode: '0644' - loop: "{{ shadowsocks_proxies | default({}) | dict2items }}" - loop_control: - label: "{{ item.key }}" - become: yes - when: shadowsocks_proxies is defined - -- name: Template docker-compose file for proxies - template: - src: "docker-compose.proxies.yaml.j2" - dest: "{{ shadowsocks_dir }}/docker-compose.proxies.yaml" - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - mode: '0644' - become: yes - when: shadowsocks_proxies is defined - -- name: Create symlink for docker-compose.yaml - file: - src: "{{ shadowsocks_dir }}/docker-compose.proxies.yaml" - dest: "{{ shadowsocks_dir }}/docker-compose.yaml" - state: link - force: yes - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - become: yes - when: shadowsocks_proxies is defined diff --git a/ansible/roles/shadowsocks-deploy/templates/config.json.j2 b/ansible/roles/shadowsocks-deploy/templates/config.json.j2 deleted file mode 100644 index 846b84d..0000000 --- a/ansible/roles/shadowsocks-deploy/templates/config.json.j2 +++ /dev/null @@ -1,11 +0,0 @@ -{ - "server": "{{ item.value.server }}", - "server_port": {{ item.value.server_port }}, - "password": "{{ lookup('vars', item.value.vault_password_key) }}", - "local_address": "0.0.0.0", - "local_port": {{ item.value.local_port }}, - "timeout": 20, - "method": "aes-256-gcm", - "fast_open": true, - "mode": "tcp_and_udp" -} diff --git a/ansible/roles/shadowsocks-deploy/templates/docker-compose.proxies.yaml.j2 b/ansible/roles/shadowsocks-deploy/templates/docker-compose.proxies.yaml.j2 deleted file mode 100644 index ca51296..0000000 --- a/ansible/roles/shadowsocks-deploy/templates/docker-compose.proxies.yaml.j2 +++ /dev/null @@ -1,22 +0,0 @@ -# This file is managed by Ansible. -name: "shadowsocks-proxies" -services: -{% for name, config in shadowsocks_proxies.items() %} - {{ name }}: - image: ghcr.io/shadowsocks/sslocal-rust:v1.22.0 - container_name: {{ name }} - restart: always - ports: - - "127.0.0.1:{{ config.local_port }}:{{ config.local_port }}/tcp" - - "127.0.0.1:{{ config.local_port }}:{{ config.local_port }}/udp" - volumes: - - /srv/shadowsocks-rust/config_ssp_{{ config.local_port }}/config.json:/etc/shadowsocks-rust/config.json:ro - networks: - - default - - airflow_proxynet -{% endfor %} - -networks: - airflow_proxynet: - name: airflow_proxynet - external: true diff --git a/ansible/roles/ytdlp-master/tasks/main.yml b/ansible/roles/ytdlp-master/tasks/main.yml deleted file mode 100644 index 689ddd4..0000000 --- a/ansible/roles/ytdlp-master/tasks/main.yml +++ /dev/null @@ -1,142 +0,0 @@ ---- -- name: Check if YT-DLP master deployment directory exists - stat: - path: "{{ airflow_master_dir }}" - register: master_dir_stat - -- name: Ensure YT-DLP master deployment directory exists - file: - path: "{{ airflow_master_dir }}" - state: directory - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - mode: '0755' - become: yes - when: not master_dir_stat.stat.exists - -- name: Ensure YT-DLP master configs directory exists - file: - path: "{{ airflow_master_dir }}/configs" - state: directory - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - mode: '0755' - become: yes - -- name: "Log: Syncing YT-DLP service files" - debug: - msg: "Syncing YT-DLP service components (config generator, envoy/camoufox templates) to the master node." - -- name: Sync YT-DLP config generator to master - synchronize: - src: "../airflow/generate_envoy_config.py" - dest: "{{ airflow_master_dir }}/" - archive: yes - rsync_path: "sudo rsync" - rsync_opts: "{{ rsync_default_opts }}" - -- name: Sync YT-DLP config files to master - synchronize: - src: "../airflow/configs/{{ item }}" - dest: "{{ airflow_master_dir }}/configs/" - archive: yes - recursive: yes - rsync_path: "sudo rsync" - rsync_opts: "{{ rsync_default_opts }}" - loop: - - "docker-compose-ytdlp-ops.yaml.j2" - - "docker-compose.config-generate.yaml" - - "envoy.yaml.j2" - -- name: Create .env file for YT-DLP master service - template: - src: "../../templates/.env.j2" - dest: "{{ airflow_master_dir }}/.env" - mode: "{{ file_permissions }}" - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - become: yes - vars: - service_role: "management" - server_identity: "ytdlp-ops-service-mgmt" - -- name: Create symlink for .env in configs directory for manual docker-compose commands - file: - src: "../.env" - dest: "{{ airflow_master_dir }}/configs/.env" - state: link - force: yes - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - become: yes - -- name: Template docker-compose file for YT-DLP master service - template: - src: "../airflow/configs/docker-compose-ytdlp-ops.yaml.j2" - dest: "{{ airflow_master_dir }}/configs/docker-compose-ytdlp-ops.yaml" - mode: "{{ file_permissions }}" - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - become: yes - vars: - service_role: "management" - -- name: "Log: Generating YT-DLP service configurations" - debug: - msg: "Running the configuration generator script inside a temporary Docker container. This creates docker-compose and envoy files based on .env variables." - -- name: Ensure envoy.yaml is removed before generation - file: - path: "{{ airflow_master_dir }}/envoy.yaml" - state: absent - become: yes - -- name: Create placeholder envoy.yaml to prevent Docker from creating a directory - file: - path: "{{ airflow_master_dir }}/envoy.yaml" - state: touch - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - mode: '0664' - become: yes - -- name: Generate YT-DLP service configurations - shell: - cmd: "docker compose --project-directory {{ airflow_master_dir }} -f configs/docker-compose.config-generate.yaml run --rm config-generator" - chdir: "{{ airflow_master_dir }}" - become: yes - become_user: "{{ ssh_user }}" - -- name: Ensure correct permissions for build context after generation - file: - path: "{{ airflow_master_dir }}" - state: directory - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - recurse: yes - become: yes - - -- name: Check for shadowsocks-rust proxy compose file - stat: - path: "/srv/shadowsocks-rust/docker-compose.proxies.yaml" - register: proxy_compose_file - -- name: "Log: Starting YT-DLP management service" - debug: - msg: "Starting the YT-DLP management service on the master node. This service handles account and proxy management." - -- name: Log in to Docker Hub to pull private images - community.docker.docker_login: - username: "{{ dockerhub_user }}" - password: "{{ vault_dockerhub_password }}" - when: vault_dockerhub_password is defined and vault_dockerhub_password | length > 0 - -- name: Start YT-DLP master service - community.docker.docker_compose_v2: - project_src: "{{ airflow_master_dir }}" - files: - - "configs/docker-compose-ytdlp-ops.yaml" - state: present - remove_orphans: true - pull: "{{ 'never' if fast_deploy | default(false) else 'missing' }}" diff --git a/ansible/roles/ytdlp-worker/defaults/main.yml b/ansible/roles/ytdlp-worker/defaults/main.yml deleted file mode 100644 index 1d467f3..0000000 --- a/ansible/roles/ytdlp-worker/defaults/main.yml +++ /dev/null @@ -1,3 +0,0 @@ ---- -# defaults file for ytdlp-worker -camoufox_base_port: 10000 diff --git a/ansible/roles/ytdlp-worker/tasks/main.yml b/ansible/roles/ytdlp-worker/tasks/main.yml deleted file mode 100644 index 26a5979..0000000 --- a/ansible/roles/ytdlp-worker/tasks/main.yml +++ /dev/null @@ -1,280 +0,0 @@ ---- -- name: Ensure worker is not paused on deploy (remove .lock file) - file: - path: "{{ airflow_worker_dir }}/inputfiles/AIRFLOW.PREVENT_URL_PULL.lockfile" - state: absent - become: yes - -- name: Clean up old renamed lock files (older than 7 days) - ansible.builtin.find: - paths: "{{ airflow_worker_dir }}/inputfiles" - patterns: "AIRFLOW.PREVENT_URL_PULL.lockfile.removed-*" - age: "7d" - use_regex: false - register: old_lock_files - become: yes - -- name: Remove found old lock files - ansible.builtin.file: - path: "{{ item.path }}" - state: absent - loop: "{{ old_lock_files.files }}" - become: yes - when: old_lock_files.files | length > 0 - -- name: Ensure YT-DLP worker inputfiles directory exists - file: - path: "{{ airflow_worker_dir }}/inputfiles" - state: directory - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - mode: '0755' - become: yes - -- name: Ensure YT-DLP worker logs directory exists - file: - path: "{{ airflow_worker_dir }}/logs" - state: directory - owner: "{{ airflow_uid }}" - group: "{{ deploy_group }}" - mode: '0775' - become: yes - -- name: Check if YT-DLP worker deployment directory exists - stat: - path: "{{ airflow_worker_dir }}" - register: worker_dir_stat - -- name: Ensure YT-DLP worker deployment directory exists - file: - path: "{{ airflow_worker_dir }}" - state: directory - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - mode: '0755' - become: yes - when: not worker_dir_stat.stat.exists - -- name: Ensure YT-DLP worker configs directory exists - file: - path: "{{ airflow_worker_dir }}/configs" - state: directory - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - mode: '0755' - become: yes - -- name: "Log: Syncing YT-DLP service files" - debug: - msg: "Syncing YT-DLP service components (config generator, envoy templates) to the worker node." - -- name: Sync YT-DLP config generator to worker - synchronize: - src: "../airflow/generate_envoy_config.py" - dest: "{{ airflow_worker_dir }}/" - archive: yes - rsync_path: "sudo rsync" - rsync_opts: "{{ rsync_default_opts }}" - -- name: Sync YT-DLP config files to worker - synchronize: - src: "../airflow/configs/{{ item }}" - dest: "{{ airflow_worker_dir }}/configs/" - archive: yes - recursive: yes - rsync_path: "sudo rsync" - rsync_opts: "{{ rsync_default_opts }}" - loop: - - "docker-compose-ytdlp-ops.yaml.j2" - - "docker-compose.config-generate.yaml" - - "envoy.yaml.j2" - -- name: Sync Airflow build context to worker - synchronize: - src: "../{{ item }}" - dest: "{{ airflow_worker_dir }}/" - archive: yes - recursive: yes - rsync_path: "sudo rsync" - rsync_opts: "{{ rsync_default_opts }}" - loop: - - "airflow/Dockerfile" - - "setup.py" - - "VERSION" - - "yt_ops_services" - - "thrift_model" - - "pangramia" - -- name: Create .env file for YT-DLP worker service - template: - src: "../../templates/.env.j2" - dest: "{{ airflow_worker_dir }}/.env" - mode: "{{ file_permissions }}" - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - become: yes - vars: - service_role: "worker" - server_identity: "ytdlp-ops-service-worker-{{ inventory_hostname }}" - -- name: Create symlink for .env in configs directory for manual docker-compose commands - file: - src: "../.env" - dest: "{{ airflow_worker_dir }}/configs/.env" - state: link - force: yes - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - become: yes - -- name: Log in to Docker Hub to pull private images - community.docker.docker_login: - username: "{{ dockerhub_user }}" - password: "{{ vault_dockerhub_password }}" - when: vault_dockerhub_password is defined and vault_dockerhub_password | length > 0 - -- name: "Log: Generating YT-DLP service configurations" - debug: - msg: "Running the configuration generator script inside a temporary Docker container. This creates docker-compose, envoy, and camoufox files based on .env variables." - -- name: Ensure previously generated config files are removed before generation - file: - path: "{{ item }}" - state: absent - loop: - - "{{ airflow_worker_dir }}/envoy.yaml" - - "{{ airflow_worker_dir }}/configs/docker-compose.camoufox.yaml" - - "{{ airflow_worker_dir }}/configs/camoufox_endpoints.json" - become: yes - -- name: Create placeholder envoy.yaml to prevent Docker from creating a directory - file: - path: "{{ airflow_worker_dir }}/envoy.yaml" - state: touch - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - mode: '0664' - become: yes - -- name: Generate YT-DLP service configurations - shell: - cmd: "docker compose --project-directory {{ airflow_worker_dir }} -f configs/docker-compose.config-generate.yaml run --rm config-generator" - chdir: "{{ airflow_worker_dir }}" - become: yes - become_user: "{{ ssh_user }}" - -- name: Clean up old root docker-compose files to prevent conflicts - ansible.builtin.file: - path: "{{ airflow_worker_dir }}/{{ item }}" - state: absent - loop: - - "docker-compose.yml" - - "docker-compose.yaml" - - "docker-compose.override.yml" - - "docker-compose.airflow.yml" - become: yes - -- name: Template docker-compose file for Airflow worker - template: - src: "{{ playbook_dir }}/../airflow/configs/docker-compose-dl.yaml.j2" - dest: "{{ airflow_worker_dir }}/configs/docker-compose.airflow.yml" - mode: "{{ file_permissions }}" - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - become: yes - -- name: "Log: Building Airflow image" - debug: - msg: "Building the Airflow image locally. This image contains all dependencies for running DAGs." - -- name: Build Airflow image from local Dockerfile - community.docker.docker_image: - name: "pangramia/ytdlp-ops-airflow:latest" - build: - path: "{{ airflow_worker_dir }}" - dockerfile: "Dockerfile" - source: build - force_source: true - when: not fast_deploy | default(false) - -- name: "Log: Building aria2-pro image" - debug: - msg: "Building the aria2-pro image locally. This image provides the download manager." - when: not fast_deploy | default(false) - -- name: Build aria2-pro image from docker-compose - ansible.builtin.command: > - docker compose -f configs/docker-compose.airflow.yml build aria2-pro - args: - chdir: "{{ airflow_worker_dir }}" - become: yes - become_user: "{{ ansible_user }}" - register: docker_build_result - changed_when: "'Building' in docker_build_result.stdout or 'writing image' in docker_build_result.stdout" - when: not fast_deploy | default(false) - -# - name: "Log: Building Camoufox (remote browser) image" -# debug: -# msg: "Building the Camoufox image locally. This image provides remote-controlled Firefox browsers for token generation." -# -# - name: Build Camoufox image from local Dockerfile -# community.docker.docker_image: -# name: "camoufox:latest" -# build: -# path: "{{ airflow_worker_dir }}/camoufox" -# source: build -# force_source: true -# when: not fast_deploy | default(false) - -- name: Ensure correct permissions for build context after generation - file: - path: "{{ airflow_worker_dir }}" - state: directory - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - recurse: yes - become: yes - -- name: Check for shadowsocks-rust proxy compose file - stat: - path: "/srv/shadowsocks-rust/docker-compose.proxies.yaml" - register: proxy_compose_file - -- name: "Log: Stopping worker services before start" - debug: - msg: "Stopping all worker services to ensure a clean start." - -- name: Stop all worker services - community.docker.docker_compose_v2: - project_src: "{{ airflow_worker_dir }}" - files: - - "configs/docker-compose-ytdlp-ops.yaml" - - "configs/docker-compose.airflow.yml" - state: absent - remove_volumes: true # Corresponds to docker compose down -v - -- name: Forcefully remove project-specific Docker volumes to fix corruption issues - ansible.builtin.shell: "docker volume ls -q --filter 'label=com.docker.compose.project=ytdlp-ops-worker' | xargs -r docker volume rm --force" - become: yes - register: removed_volumes - changed_when: removed_volumes.stdout | length > 0 - failed_when: false - -- name: "Log: Starting all worker services" - debug: - msg: "Starting all worker services: ytdlp-ops, and airflow-worker." - -- name: Start all worker services - community.docker.docker_compose_v2: - project_src: "{{ airflow_worker_dir }}" - files: - - "configs/docker-compose-ytdlp-ops.yaml" - - "configs/docker-compose.airflow.yml" - state: present - remove_orphans: true - pull: "{{ 'never' if fast_deploy | default(false) else 'missing' }}" - recreate: always # Corresponds to --force-recreate - -# - name: Include camoufox verification tasks -# include_tasks: ../../../tasks/verify_camoufox.yml -# when: not fast_deploy | default(false) diff --git a/ansible/scripts/verify_camoufox_services.py b/ansible/scripts/verify_camoufox_services.py deleted file mode 100644 index 4eade42..0000000 --- a/ansible/scripts/verify_camoufox_services.py +++ /dev/null @@ -1,242 +0,0 @@ -#!/usr/bin/env python3 -""" -Script to verify that all camoufox services are running and accessible. -This script should be run after deployment to ensure the cluster is healthy. -""" - -import subprocess -import sys -import json -import time -import logging -from typing import List, Dict, Tuple - -# Configure logging -logging.basicConfig( - level=logging.INFO, - format='%(asctime)s - %(levelname)s - %(message)s' -) -logger = logging.getLogger(__name__) - -def run_docker_command(cmd: List[str]) -> Tuple[int, str, str]: - """Run a docker command and return (returncode, stdout, stderr)""" - try: - result = subprocess.run(cmd, capture_output=True, text=True, timeout=30) - return result.returncode, result.stdout.strip(), result.stderr.strip() - except subprocess.TimeoutExpired: - logger.error(f"Command timed out: {' '.join(cmd)}") - return 1, "", "Command timed out" - except Exception as e: - logger.error(f"Error running command: {' '.join(cmd)} - {e}") - return 1, "", str(e) - -def get_docker_compose_services(project_path: str) -> List[Dict]: - """Get list of services from docker-compose""" - # Try different ways to get services since the project naming might vary - possible_commands = [ - ["docker", "compose", "-p", "ytdlp-ops", "ps", "--format", "json"], - ["docker", "compose", "-p", "ytdlp-ops-camoufox", "ps", "--format", "json"], - ["docker", "compose", "--project-directory", project_path, "ps", "--format", "json"], - ["docker", "compose", "ps", "--format", "json"] - ] - - for cmd in possible_commands: - returncode, stdout, stderr = run_docker_command(cmd) - if returncode == 0 and stdout: - try: - # Handle both single JSON object and JSON array - if stdout.startswith('['): - services = json.loads(stdout) - else: - # Multiple JSON objects, one per line - services = [] - for line in stdout.split('\n'): - if line.strip(): - services.append(json.loads(line)) - if services: - return services - except json.JSONDecodeError as e: - logger.debug(f"Failed to parse docker-compose output with command {' '.join(cmd)}: {e}") - continue - - # If all commands failed, try to get all containers and filter for camoufox - logger.info("Falling back to direct container inspection") - returncode, stdout, stderr = run_docker_command(["docker", "ps", "--format", "json"]) - if returncode == 0 and stdout: - try: - containers = [] - for line in stdout.split('\n'): - if line.strip(): - containers.append(json.loads(line)) - - # Filter for camoufox containers - camoufox_containers = [c for c in containers if 'camoufox' in c.get('Names', '')] - return camoufox_containers - except json.JSONDecodeError: - pass - - logger.error("Failed to get docker-compose services with all methods") - return [] - -def check_service_health(service_name: str, port: int = 12345) -> bool: - """Check if a service is responding on its expected port""" - # For camoufox services, we can check if they're running and have network connectivity - # Since they're WebSocket services, we'll just verify they're running for now - cmd = ["docker", "inspect", service_name] - returncode, stdout, stderr = run_docker_command(cmd) - - if returncode != 0: - logger.error(f"Failed to inspect service {service_name}: {stderr}") - return False - - try: - service_info = json.loads(stdout) - if service_info and len(service_info) > 0: - state = service_info[0].get('State', {}) - running = state.get('Running', False) - health = state.get('Health', {}).get('Status', 'unknown') - - if running: - logger.info(f"Service {service_name} is running (health: {health})") - return True - else: - logger.error(f"Service {service_name} is not running") - return False - except json.JSONDecodeError as e: - logger.error(f"Failed to parse docker inspect output for {service_name}: {e}") - return False - -def verify_camoufox_services(project_path: str = "/srv/airflow_dl_worker") -> bool: - """Main function to verify all camoufox services""" - logger.info("Starting camoufox service verification...") - - # Get all services - services = get_docker_compose_services(project_path) - - if not services: - logger.warning("No services found through docker-compose. Checking for running camoufox containers directly...") - # Try to find camoufox containers directly - cmd = ["docker", "ps", "--filter", "name=camoufox", "--format", "json"] - returncode, stdout, stderr = run_docker_command(cmd) - - if returncode == 0 and stdout: - try: - camoufox_containers = [] - for line in stdout.split('\n'): - if line.strip(): - camoufox_containers.append(json.loads(line)) - services = camoufox_containers - except json.JSONDecodeError: - services = [] - - if not services: - logger.error("No camoufox services or containers found.") - # Check if we're on a worker node by looking for camoufox config - import os - if os.path.exists(f"{project_path}/configs/docker-compose.camoufox.yaml"): - logger.info("Camoufox config exists but no services running. This might indicate a startup issue.") - return False - else: - logger.info("No camoufox config found. This might be a master node.") - return True - - logger.info(f"Found {len(services)} camoufox service(s) or container(s)") - - # Check each service - all_healthy = True - camoufox_services_found = 0 - - for service in services: - # Different docker output formats have different field names - service_name = (service.get('Name') or - service.get('Names') or - service.get('name') or - service.get('Service', 'unknown')) - - # If we're dealing with container output, Names might be a string - if isinstance(service_name, str): - service_names = [service_name] - else: - service_names = service_name if isinstance(service_name, list) else [str(service_name)] - - # Check if any of the service names contain 'camoufox' - is_camoufox_service = any('camoufox' in name.lower() for name in service_names) - - if not is_camoufox_service: - continue - - camoufox_services_found += 1 - logger.info(f"Checking service: {service_names[0] if service_names else 'unknown'}") - - # Use the first service name for health check - name_to_check = service_names[0] if service_names else 'unknown' - - # Check if service is running - if not check_service_health(name_to_check): - all_healthy = False - continue - - # Check service status from docker output - service_status = (service.get('State') or - service.get('status') or - service.get('Status') or - 'unknown') - service_health = (service.get('Health') or - service.get('health') or - 'unknown') - - logger.info(f"Service {name_to_check} - Status: {service_status}, Health: {service_health}") - - if service_status not in ['running', 'Running']: - logger.error(f"Service {name_to_check} is not running (status: {service_status})") - all_healthy = False - elif service_health not in ['healthy', 'unknown', '']: # unknown or empty is OK for services without healthcheck - logger.warning(f"Service {name_to_check} health is {service_health}") - - if camoufox_services_found == 0: - logger.warning("No camoufox services found in the service list") - return False - - logger.info(f"Successfully verified {camoufox_services_found} camoufox service(s)") - return all_healthy - -def main(): - """Main entry point""" - logger.info("Camoufox Service Verification Script") - logger.info("=" * 40) - - # Try to detect project path - import os - project_paths = [ - "/srv/airflow_dl_worker", # Worker node - "/srv/airflow_master", # Master node - "/app", # Container path - "." # Current directory - ] - - project_path = None - for path in project_paths: - if os.path.exists(path): - project_path = path - break - - if not project_path: - logger.error("Could not determine project path") - return 1 - - logger.info(f"Using project path: {project_path}") - - try: - success = verify_camoufox_services(project_path) - if success: - logger.info("✅ All camoufox services verification PASSED") - return 0 - else: - logger.error("❌ Camoufox services verification FAILED") - return 1 - except Exception as e: - logger.error(f"Unexpected error during verification: {e}", exc_info=True) - return 1 - -if __name__ == "__main__": - sys.exit(main()) diff --git a/ansible/tasks/verify_camoufox.yml b/ansible/tasks/verify_camoufox.yml deleted file mode 100644 index b574cf9..0000000 --- a/ansible/tasks/verify_camoufox.yml +++ /dev/null @@ -1,38 +0,0 @@ ---- -- name: Copy camoufox verification script to worker - copy: - src: scripts/verify_camoufox_services.py - dest: "{{ airflow_worker_dir }}/verify_camoufox_services.py" - mode: '0755' - owner: "{{ ssh_user }}" - group: "{{ deploy_group }}" - become: yes - when: inventory_hostname in groups['airflow_workers'] - -- name: Run camoufox service verification - command: python3 verify_camoufox_services.py - args: - chdir: "{{ airflow_worker_dir }}" - environment: - PATH: "{{ ansible_env.PATH }}:/usr/local/bin" - register: verification_result - become: yes - become_user: "{{ ssh_user }}" - when: inventory_hostname in groups['airflow_workers'] - ignore_errors: yes - -- name: Check verification results - debug: - msg: | - Camoufox verification {{ 'PASSED' if verification_result.rc == 0 else 'FAILED' }} - Output: {{ verification_result.stdout }} - Errors: {{ verification_result.stderr }} - when: inventory_hostname in groups['airflow_workers'] and verification_result is defined - -- name: Fail deployment if camoufox verification failed - fail: - msg: "Camoufox service verification failed. Check service status and network connectivity." - when: > - inventory_hostname in groups['airflow_workers'] and - verification_result is defined and - verification_result.rc != 0 diff --git a/ansible/templates/.env.j2 b/ansible/templates/.env.j2 deleted file mode 100644 index e7ab80c..0000000 --- a/ansible/templates/.env.j2 +++ /dev/null @@ -1,67 +0,0 @@ -# This file is managed by Ansible. Do not edit manually. - -# --- Common Settings --- -HOSTNAME="{{ inventory_hostname }}" -COMPOSE_PROJECT_NAME="ytdlp-ops-{{ service_role | default('all-in-one') }}" -TZ="{{ host_timezone }}" -service_role={{ service_role | default('all-in-one') }} - -# --- Docker Image Settings --- -YTDLP_OPS_IMAGE="{{ ytdlp_ops_image }}" -AIRFLOW_IMAGE_NAME="{{ airflow_image_name }}" - -# --- Network Settings --- -ENVOY_PORT={{ envoy_port }} -ENVOY_ADMIN_PORT={{ envoy_admin_port }} -YTDLP_BASE_PORT={{ ytdlp_base_port }} -YTDLP_WORKERS={{ ytdlp_workers | default(3) }} -MANAGEMENT_SERVICE_PORT={{ management_service_port }} -REDIS_PORT={{ redis_port }} -POSTGRES_PORT={{ postgres_port }} - -# --- Security Settings --- -REDIS_PASSWORD="{{ vault_redis_password }}" -POSTGRES_PASSWORD="{{ vault_postgres_password }}" -AIRFLOW_ADMIN_PASSWORD="{{ vault_airflow_admin_password }}" -FLOWER_PASSWORD="{{ vault_flower_password }}" - -# --- User and Group IDs --- -AIRFLOW_UID={{ airflow_uid | default(1003) }} -AIRFLOW_GID={{ deploy_group_gid | default(1001) }} - -# --- S3 Logging Configuration (for Airflow integration) --- -# Optional: for appending service logs to Airflow's S3 logs. -# These should match the 'minio_default' connection configured in Airflow. -S3_ENDPOINT_URL="{{ s3_endpoint_url | default('') }}" -S3_ACCESS_KEY_ID="{{ vault_s3_access_key_id | default('') }}" -S3_SECRET_ACCESS_KEY="{{ vault_s3_secret_access_key | default('') }}" -S3_REGION_NAME="{{ s3_region_name | default('us-east-1') }}" - -# --- Master-specific settings --- -{% if 'master' in service_role or 'management' in service_role %} -MASTER_HOST_IP={{ hostvars[groups['airflow_master'][0]].ansible_host }} -# Camoufox is not used on master, but the config generator expects the variable. -CAMOUFOX_PROXIES= - -{% endif %} - -# --- Worker-specific settings --- -{% if 'worker' in service_role %} -AIRFLOW_PROJ_DIR={{ airflow_worker_dir }} -MASTER_HOST_IP={{ hostvars[groups['airflow_master'][0]].ansible_host }} - -# --- Envoy & Worker Configuration --- -ENVOY_BACKEND_ADDRESS=ytdlp-ops-service -YTDLP_TIMEOUT=600 - -# --- Camoufox (Browser) Configuration --- -CAMOUFOX_PROXIES="{{ (worker_proxies | default([])) | join(',') }}" -VNC_PASSWORD="{{ vault_vnc_password }}" -CAMOUFOX_BASE_VNC_PORT={{ camoufox_base_vnc_port | default(5901) }} -CAMOUFOX_PORT={{ camoufox_base_port | default(9070) }} - -# --- Account Manager Configuration --- -ACCOUNT_ACTIVE_DURATION_MIN={{ account_active_duration_min | default(7) }} -ACCOUNT_COOLDOWN_DURATION_MIN={{ account_cooldown_duration_min | default(30) }} - -{% endif %} diff --git a/ansible/templates/.env.stress.j2 b/ansible/templates/.env.stress.j2 new file mode 100644 index 0000000..39af3d8 --- /dev/null +++ b/ansible/templates/.env.stress.j2 @@ -0,0 +1,9 @@ +# This file is managed by Ansible for the stress test environment. +# --- Network Settings --- +REDIS_HOST={{ hostvars[groups['airflow_master'][0]].ansible_host }} +REDIS_PASSWORD={{ vault_redis_password }} + +# --- Account Manager Configuration --- +ACCOUNT_ACTIVE_DURATION_MIN=7 +ACCOUNT_COOLDOWN_DURATION_MIN=30 +STRESS_POLICY_INBOX_QUEUE=dev_stress_inbox diff --git a/ansible/templates/docker-compose.stress-master.j2 b/ansible/templates/docker-compose.stress-master.j2 new file mode 100644 index 0000000..812bcc3 --- /dev/null +++ b/ansible/templates/docker-compose.stress-master.j2 @@ -0,0 +1,43 @@ +# Template for stress test master services +name: "stress-services" +services: + redis: + image: redis:7-alpine + container_name: stress-redis + restart: always + ports: + - "{{ redis_port }}:6379" + command: redis-server --requirepass {{ vault_redis_password }} + networks: + - {{ docker_network_name }} + + minio: + image: minio/minio:latest + container_name: stress-minio + restart: always + ports: + - "9000:9000" # API Port + - "9001:9001" # Console Port + volumes: + - ./minio-data:/data + environment: + MINIO_ROOT_USER: "{{ vault_s3_access_key_id }}" + MINIO_ROOT_PASSWORD: "{{ vault_s3_secret_access_key }}" + command: server /data --console-address ":9001" + networks: + - {{ docker_network_name }} + + bgutil-provider: + image: brainicism/bgutil-ytdlp-pot-provider + container_name: bgutil-provider + init: true + ports: + - "4416:4416" + restart: unless-stopped + networks: + - {{ docker_network_name }} + +networks: + {{ docker_network_name }}: + name: {{ docker_network_name }} + external: true diff --git a/cli.config b/cli.config deleted file mode 100644 index 7bb7f64..0000000 --- a/cli.config +++ /dev/null @@ -1,17 +0,0 @@ -# This is a yt-dlp configuration file. -# It contains one command-line option per line. - -#--no-progress ---format-sort "res,ext:mp4:m4a" ---recode-video mp4 ---no-playlist ---no-overwrites ---continue ---output "%(extractor)s - %(title)s.%(ext)s" ---no-mtime ---verbose -#--simulate -# Performance options -#--no-resize-buffer -#--buffer-size 4M -#--concurrent-fragments 8 diff --git a/cluster.green.yml b/cluster.green.yml deleted file mode 100644 index 8d4f516..0000000 --- a/cluster.green.yml +++ /dev/null @@ -1,106 +0,0 @@ -global_vars: - # Docker image versions - ytdlp_ops_image: "pangramia/ytdlp-ops-server:3.10.1-exp" - airflow_image_name: "pangramia/ytdlp-ops-airflow:latest" - - # Default ports - postgres_port: 5432 - ytdlp_base_port: 9090 - envoy_port: 9080 - envoy_admin_port: 9901 - management_service_port: 9091 - camoufox_base_port: 9070 - camoufox_base_vnc_port: 5901 - - # Default UID - airflow_uid: 1003 - - # Default directories - airflow_master_dir: "/srv/airflow_master" - airflow_worker_dir: "/srv/airflow_dl_worker" - - # Docker network name - docker_network_name: "airflow_proxynet" - - # Default usernames - ssh_user: "alex_p" - ansible_user: "alex_p" - - # Default group - deploy_group: "ytdl" - - # Default file permissions - dir_permissions: "0755" - file_permissions: "0644" - - # Default rsync options - rsync_default_opts: - - "--no-owner" - - "--no-group" - - "--no-times" - - "--copy-links" - - "--copy-unsafe-links" - - "--exclude=.git*" - - "--exclude=__pycache__" - - "--exclude=*.pyc" - - "--exclude=*.log" - - "--exclude=.DS_Store" - - # Docker-Hub credentials - dockerhub_user: "pangramia" - - # Host timezone - host_timezone: "Europe/Moscow" - - # Shadowsocks cipher method - shadowsocks_cipher_method: "aes-256-gcm" - - # Shadowsocks image - shadowsocks_image: "ghcr.io/shadowsocks/sslocal-rust:v1.22.0" - - # Shadowsocks config options - shadowsocks_local_address: "0.0.0.0" - shadowsocks_timeout: 20 - shadowsocks_fast_open: true - shadowsocks_mode: "tcp_and_udp" - -# Global list of all available proxies to be deployed everywhere. -# The key (e.g., 'sslocal-rust-1087') becomes the service name. -shadowsocks_proxies: - - sslocal-rust-1087: - server: 91.103.252.51 - server_port: 8388 - local_port: 1087 - vault_password_key: vault_ss_password_1 - - sslocal-rust-1086: - server: 62.60.178.45 - server_port: 8388 - local_port: 1086 - vault_password_key: vault_ss_password_2 - - - sslocal-rust-1081: - server: 79.137.207.43 - server_port: 8388 - local_port: 1081 - vault_password_key: vault_ss_password_2 - -master: - af-green: - ip: 89.253.221.173 - port: 22 - proxies: - - "socks5://sslocal-rust-1087:1087" - -workers: - dl003: - ip: 62.60.245.103 - proxies: - - "socks5://sslocal-rust-1087:1087" - - dl001: - ip: 109.107.189.106 - proxies: - - "socks5://sslocal-rust-1087:1087" diff --git a/cluster.test.yml b/cluster.test.yml deleted file mode 100644 index 69db96a..0000000 --- a/cluster.test.yml +++ /dev/null @@ -1,101 +0,0 @@ -global_vars: - # Docker image versions - ytdlp_ops_image: "pangramia/ytdlp-ops-server:latest" - airflow_image_name: "pangramia/ytdlp-ops-airflow:latest" - - # Default ports - postgres_port: 5432 - ytdlp_base_port: 9090 - envoy_port: 9080 - envoy_admin_port: 9901 - management_service_port: 9091 - camoufox_base_vnc_port: 5901 - - # Default UID - airflow_uid: 1003 - - # Default directories - airflow_master_dir: "/srv/airflow_master" - airflow_worker_dir: "/srv/airflow_dl_worker" - - # Docker network name - docker_network_name: "airflow_proxynet" - - # Default usernames - ssh_user: "alex_p" - ansible_user: "alex_p" - - # Default group - deploy_group: "ytdl" - - # Default file permissions - dir_permissions: "0755" - file_permissions: "0644" - - # Default rsync options - rsync_default_opts: - - "--no-owner" - - "--no-group" - - "--no-times" - - "--copy-links" - - "--copy-unsafe-links" - - "--exclude=.git*" - - "--exclude=__pycache__" - - "--exclude=*.pyc" - - "--exclude=*.log" - - "--exclude=.DS_Store" - - # Docker-Hub credentials - dockerhub_user: "pangramia" - - # Host timezone - host_timezone: "Europe/Moscow" - - # Shadowsocks cipher method - shadowsocks_cipher_method: "aes-256-gcm" - - # Shadowsocks image - shadowsocks_image: "ghcr.io/shadowsocks/sslocal-rust:v1.22.0" - - # Shadowsocks config options - shadowsocks_local_address: "0.0.0.0" - shadowsocks_timeout: 20 - shadowsocks_fast_open: true - shadowsocks_mode: "tcp_and_udp" - -# Global list of all available proxies to be deployed everywhere. -# The key (e.g., 'sslocal-rust-1087') becomes the service name. -shadowsocks_proxies: - - sslocal-rust-1087: - server: 91.103.252.51 - server_port: 8388 - local_port: 1087 - vault_password_key: vault_ss_password_1 - - sslocal-rust-1086: - server: 62.60.178.45 - server_port: 8388 - local_port: 1086 - vault_password_key: vault_ss_password_2 - - - sslocal-rust-1081: - server: 79.137.207.43 - server_port: 8388 - local_port: 1081 - vault_password_key: vault_ss_password_2 - -master: - af-test: - ip: 89.253.223.97 - port: 22 - proxies: - - "socks5://sslocal-rust-1086:1086" - - "socks5://sslocal-rust-1081:1081" -workers: - dl002: - ip: 62.60.178.54 - proxies: - - "socks5://sslocal-rust-1081:1081" - - "socks5://sslocal-rust-1086:1086" diff --git a/deploy.sh b/deploy.sh deleted file mode 100644 index 4a728fb..0000000 --- a/deploy.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash -set -e - -echo "Generating inventory..." -./tools/generate-inventory.py cluster.test.yml - -echo "Deploying full cluster..." -cd ansible -ansible-playbook playbook-full.yml diff --git a/pangramia/__pycache__/__init__.cpython-39.pyc b/pangramia/__pycache__/__init__.cpython-39.pyc deleted file mode 100644 index e5bdb0c..0000000 Binary files a/pangramia/__pycache__/__init__.cpython-39.pyc and /dev/null differ diff --git a/pangramia/yt/exceptions/__pycache__/__init__.cpython-39.pyc b/pangramia/yt/exceptions/__pycache__/__init__.cpython-39.pyc deleted file mode 100644 index 32c1ca9..0000000 Binary files a/pangramia/yt/exceptions/__pycache__/__init__.cpython-39.pyc and /dev/null differ diff --git a/pangramia/yt/exceptions/__pycache__/ttypes.cpython-39.pyc b/pangramia/yt/exceptions/__pycache__/ttypes.cpython-39.pyc deleted file mode 100644 index 87cefa9..0000000 Binary files a/pangramia/yt/exceptions/__pycache__/ttypes.cpython-39.pyc and /dev/null differ diff --git a/playbooks/playbook-bgutils-start.yml b/playbooks/playbook-bgutils-start.yml deleted file mode 100644 index e69de29..0000000 diff --git a/playbooks/playbook-bgutils-stop.yml b/playbooks/playbook-bgutils-stop.yml deleted file mode 100644 index e69de29..0000000 diff --git a/proxy_manager_client.py b/proxy_manager_client.py deleted file mode 100644 index 19b57e0..0000000 --- a/proxy_manager_client.py +++ /dev/null @@ -1,192 +0,0 @@ -#!/usr/bin/env python3 -""" -Client script to manage proxies in the YTTokenOpService. - -This script allows you to list, ban, unban, and reset proxies that are managed -by a ytdlp-ops-server instance via Redis. -""" - -import argparse -import sys -import os -import logging -from pathlib import Path -import datetime - -# Configure logging -logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') -logger = logging.getLogger(__name__) - - - -try: - from thrift.transport import TTransport - from pangramia.yt.exceptions.ttypes import PBServiceException, PBUserException - from pangramia.yt.common.constants import ErrorCode - from tabulate import tabulate - from yt_ops_services.client_utils import get_thrift_client, format_timestamp -except ImportError as e: - print(f"Error importing required modules: {e}") - print("Please ensure you have installed dependencies by running: pip install -e .") - sys.exit(1) - - - - -def main(): - parser = argparse.ArgumentParser( - description="Manage proxies for the YTDLP Operations Server.\n\n" - "This script allows you to list, ban, unban, and reset proxies that are managed\n" - "by a ytdlp-ops-server instance via Redis. It provides a command-line interface\n" - "to interact with the proxy management features of the server.", - epilog="Usage examples:\n" - " # List statuses for a specific server identity\n" - " python proxy_manager_client.py list --server-identity ytdlp-ops-airflow-service\n\n" - " # Ban a proxy for a specific server\n" - " python proxy_manager_client.py ban --server-identity ytdlp-ops-airflow-service --proxy-url socks5://proxy.example.com:1080\n\n" - " # Unban a proxy\n" - " python proxy_manager_client.py unban --server-identity ytdlp-ops-airflow-service --proxy-url socks5://proxy.example.com:1080\n\n" - " # Reset all proxies for a server to ACTIVE\n" - " python proxy_manager_client.py reset --server-identity ytdlp-ops-airflow-service", - formatter_class=argparse.RawTextHelpFormatter - ) - parser.add_argument('--host', default=os.getenv('YTDLP_HOST', '127.0.0.1'), help="Server host (default: 127.0.0.1 or YTDLP_HOST env). Using 127.0.0.1 avoids harmless connection errors when the local Envoy proxy only listens on IPv4.") - parser.add_argument('--port', type=int, default=int(os.getenv('YTDLP_PORT', '9080')), help='Server port (default: 9080 or YTDLP_PORT env)') - - subparsers = parser.add_subparsers(dest='command', required=True, help='Available commands') - - # List command - list_parser = subparsers.add_parser( - 'list', - help='List proxy statuses for a given server identity.', - description="List the status of all proxies associated with a specific server identity.\n" - "The status includes:\n" - "- Server: The server identity.\n" - "- Proxy URL: The URL of the proxy.\n" - "- Status: ACTIVE or BANNED.\n" - "- Success: Count of successful uses.\n" - "- Failures: Count of failed uses.\n" - "- Last Success: Timestamp of the last successful use.\n" - "- Last Failure: Timestamp of the last failed use.", - formatter_class=argparse.RawTextHelpFormatter - ) - list_parser.add_argument('--server-identity', type=str, help='The identity of the server to query. If not provided, shows status for the connected server instance.') - - # Ban command - ban_parser = subparsers.add_parser( - 'ban', - help='Ban a specific proxy for a server.', - description="Manually set a proxy's status to BANNED for a specific server identity.\n" - "A banned proxy will not be used for future requests by that server instance\n" - "until it is unbanned or reset.", - formatter_class=argparse.RawTextHelpFormatter - ) - ban_parser.add_argument('--server-identity', type=str, required=True, help='The identity of the server where the proxy should be banned.') - ban_parser.add_argument('--proxy-url', type=str, required=True, help="The full URL of the proxy to ban (e.g., 'socks5://host:port').") - - # Unban command - unban_parser = subparsers.add_parser( - 'unban', - help='Unban a specific proxy for a server.', - description="Manually set a proxy's status to ACTIVE for a specific server identity.\n" - "This will allow the server instance to use the proxy for future requests.", - formatter_class=argparse.RawTextHelpFormatter - ) - unban_parser.add_argument('--server-identity', type=str, required=True, help='The identity of the server where the proxy should be unbanned.') - unban_parser.add_argument('--proxy-url', type=str, required=True, help="The full URL of the proxy to unban (e.g., 'socks5://host:port').") - - # Reset command - reset_parser = subparsers.add_parser( - 'reset', - help='Reset all proxy statuses for a server to ACTIVE.', - description="Reset the status of all proxies associated with a specific server identity to ACTIVE.\n" - "This is useful for clearing all bans and making all configured proxies available again.", - formatter_class=argparse.RawTextHelpFormatter - ) - reset_parser.add_argument('--server-identity', type=str, required=True, help='The identity of the server whose proxies should be reset.') - - args = parser.parse_args() - - client, transport = None, None - try: - client, transport = get_thrift_client(args.host, args.port) - - if args.command == 'list': - logger.info(f"Getting proxy statuses for server: {args.server_identity or 'local server'}") - statuses = client.getProxyStatus(args.server_identity) - if not statuses: - print("\nThe server reported no proxy statuses.") - print("This can happen if no proxies are configured, or if all configured proxies failed their initial health check on server startup.\n") - else: - # Determine which proxy is next in rotation for each server identity - next_proxies = {s.serverIdentity: s.proxyUrl for s in statuses if '(next)' in s.status} - - status_list = [] - for s in statuses: - is_next = next_proxies.get(s.serverIdentity) == s.proxyUrl - status_list.append({ - "Server": s.serverIdentity, - "Proxy URL": f"{s.proxyUrl} ->" if is_next else s.proxyUrl, - "Status": s.status.replace(" (next)", ""), - "Success": s.successCount, - "Failures": s.failureCount, - "Last Success": format_timestamp(s.lastSuccessTimestamp), - "Last Failure": format_timestamp(s.lastFailureTimestamp), - }) - print("\n--- Proxy Statuses ---") - print(tabulate(status_list, headers="keys", tablefmt="grid")) - print("----------------------\n") - - elif args.command == 'ban': - logger.info(f"Banning proxy '{args.proxy_url}' for server '{args.server_identity}'...") - success = client.banProxy(args.proxy_url, args.server_identity) - if success: - print(f"Successfully banned proxy '{args.proxy_url}' for server '{args.server_identity}'.") - else: - print("Failed to ban proxy. Check server logs for details.") - sys.exit(1) - - elif args.command == 'unban': - logger.info(f"Unbanning proxy '{args.proxy_url}' for server '{args.server_identity}'...") - success = client.unbanProxy(args.proxy_url, args.server_identity) - if success: - print(f"Successfully unbanned proxy '{args.proxy_url}' for server '{args.server_identity}'.") - else: - print("Failed to unban proxy. Check server logs for details.") - sys.exit(1) - - elif args.command == 'reset': - logger.info(f"Resetting all proxy statuses for server '{args.server_identity}'...") - success = client.resetAllProxyStatuses(args.server_identity) - if success: - print(f"Successfully reset all proxy statuses for server '{args.server_identity}'.") - else: - print("Failed to reset all proxy statuses. Check server logs for details.") - sys.exit(1) - - except (PBServiceException, PBUserException) as e: - if hasattr(e, 'errorCode') and e.errorCode == ErrorCode.NOT_IMPLEMENTED: - logger.error(f"Action '{args.command}' is not implemented by the server. It may be running in the wrong service mode.") - print(f"Error: The server does not support the action '{args.command}'.") - print("Please check that the server is running in 'all-in-one' or 'management' mode.") - else: - logger.error(f"Thrift error performing action '{args.command}': {e.message}", exc_info=True) - print(f"Error: {e.message}") - sys.exit(1) - except TTransport.TTransportException as e: - # The logger.error is not needed here because TSocket already logs connection errors. - print(f"Error: Connection to server at {args.host}:{args.port} failed. Is the server running?") - print(f"Details: {e}") - sys.exit(1) - except Exception as e: - logger.error(f"An unexpected error occurred: {e}", exc_info=True) - print(f"An unexpected error occurred: {e}") - sys.exit(1) - finally: - if transport and transport.isOpen(): - transport.close() - logger.info("Thrift connection closed.") - - -if __name__ == "__main__": - main() diff --git a/tools/dl_workers_list.yml b/tools/dl_workers_list.yml deleted file mode 100644 index 726447c..0000000 --- a/tools/dl_workers_list.yml +++ /dev/null @@ -1,4 +0,0 @@ -# This file is now auto-generated by tools/generate-inventory.py -# Do not edit – put overrides in cluster.yml instead - -dl-worker-001: 109.107.189.106 diff --git a/tools/generate-inventory.py b/tools/generate-inventory.py index 8f527e3..c1d9da1 100755 --- a/tools/generate-inventory.py +++ b/tools/generate-inventory.py @@ -20,7 +20,7 @@ def generate_inventory(cluster_config, inventory_path): # Master group f.write("[airflow_master]\n") - for hostname, config in cluster_config['master'].items(): + for hostname, config in cluster_config.get('master', {}).items(): line = f"{hostname} ansible_host={config['ip']}" if 'port' in config: line += f" ansible_port={config['port']}" @@ -28,9 +28,9 @@ def generate_inventory(cluster_config, inventory_path): f.write("\n") - # Workers group + # Workers group (handles case where workers are not defined) f.write("[airflow_workers]\n") - for hostname, config in cluster_config['workers'].items(): + for hostname, config in cluster_config.get('workers', {}).items(): line = f"{hostname} ansible_host={config['ip']}" if 'port' in config: line += f" ansible_port={config['port']}" @@ -41,14 +41,17 @@ def generate_host_vars(cluster_config, host_vars_dir): # Create host_vars directory if it doesn't exist os.makedirs(host_vars_dir, exist_ok=True) - # Get master IP for Redis configuration from the new structure - master_ip = list(cluster_config['master'].values())[0]['ip'] + master_nodes = cluster_config.get('master', {}) + if not master_nodes: + print("Error: 'master' section is missing or empty in cluster config. Cannot proceed.") + sys.exit(1) + master_ip = list(master_nodes.values())[0]['ip'] # Get global proxy definitions shadowsocks_proxies = cluster_config.get('shadowsocks_proxies', {}) # Combine master and worker nodes for processing - all_nodes = {**cluster_config['master'], **cluster_config['workers']} + all_nodes = {**cluster_config.get('master', {}), **cluster_config.get('workers', {})} for hostname, config in all_nodes.items(): host_vars_file = os.path.join(host_vars_dir, f"{hostname}.yml") @@ -91,8 +94,11 @@ def generate_group_vars(cluster_config, group_vars_path): global_vars = cluster_config.get('global_vars', {}) external_ips = cluster_config.get('external_access_ips', []) - # Get master IP for Redis configuration - master_ip = list(cluster_config['master'].values())[0]['ip'] + master_nodes = cluster_config.get('master', {}) + if not master_nodes: + print("Error: 'master' section is missing or empty in cluster config. Cannot proceed.") + sys.exit(1) + master_ip = list(master_nodes.values())[0]['ip'] # Combine master and worker nodes to create a hostvars-like structure all_nodes = {**cluster_config.get('master', {}), **cluster_config.get('workers', {})} diff --git a/tools/sync-to-jump.sh b/tools/sync-to-jump.sh deleted file mode 100755 index 82e355b..0000000 --- a/tools/sync-to-jump.sh +++ /dev/null @@ -1,69 +0,0 @@ -#!/bin/bash -# -# Syncs the project directory to a remote "jump" host for deployment orchestration. -# -# This script is designed to be run from the root of the project directory. -# It syncs essential project files like source code, DAGs, and Ansible playbooks, -# while excluding generated files, local data, logs, and other non-essential files -# to ensure a clean copy is deployed. - -set -e # Exit immediately if a command exits with a non-zero status. -set -u # Treat unset variables as an error. - -# --- Configuration --- -# IMPORTANT: Update these variables to match your environment. -# -# The remote host to sync to (e.g., user@hostname) -REMOTE_HOST="alex_p@af-jump" -# The destination path on the remote host -REMOTE_PATH="/home/alex_p/yt-ops-services" -# The root directory of the project on the local machine. -SOURCE_DIR="." - -# --- rsync command --- -echo ">>> Syncing project from '$SOURCE_DIR' to '$REMOTE_HOST:$REMOTE_PATH'..." - -# Use an array for exclude options for clarity and to handle spaces correctly. -# This list is based on an analysis of the project structure and generated artifacts. -EXCLUDE_OPTS=( - "--exclude=.git" - "--exclude=__pycache__" - "--exclude='*.pyc'" - "--exclude='*.log'" - "--exclude=.DS_Store" - "--exclude=.vault_pass" - "--exclude=.env" - "--exclude=ansible/inventory.ini" - "--exclude=ansible/host_vars/" - "--exclude=ansible/group_vars/all/generated_vars.yml" - "--exclude=postgres-data/" - "--exclude=redis-data/" - "--exclude=minio-data/" - "--exclude=logs/" - "--exclude=downloadfiles/" - "--exclude=addfiles/" - "--exclude=token_generator/node_modules/" - # Exclude files generated on remote hosts by Ansible/config-generator - "--exclude=airflow/configs/envoy.yaml" - "--exclude=airflow/configs/docker-compose.camoufox.yaml" - "--exclude=airflow/configs/camoufox_endpoints.json" - "--exclude=cluster*.yml" - # Exclude local development notes - "--exclude=TODO-*.md" - # Exclude user-specific tools - "--exclude=*aider*" -) - -# The rsync command: -# -a: archive mode (recursive, preserves permissions, etc.) -# -v: verbose -# -z: compress file data during the transfer -# --delete: delete extraneous files from the destination directory -# --partial: keep partially transferred files -# --progress: show progress during transfer -rsync -avz --delete --partial --progress \ - "${EXCLUDE_OPTS[@]}" \ - "$SOURCE_DIR/" \ - "$REMOTE_HOST:$REMOTE_PATH/" - -echo ">>> Sync complete." diff --git a/yt_ops_services/__pycache__/__init__.cpython-39.pyc b/yt_ops_services/__pycache__/__init__.cpython-39.pyc deleted file mode 100644 index 441a915..0000000 Binary files a/yt_ops_services/__pycache__/__init__.cpython-39.pyc and /dev/null differ diff --git a/yt_ops_services/__pycache__/client_utils.cpython-39.pyc b/yt_ops_services/__pycache__/client_utils.cpython-39.pyc deleted file mode 100644 index d980504..0000000 Binary files a/yt_ops_services/__pycache__/client_utils.cpython-39.pyc and /dev/null differ diff --git a/yt_ops_services/__pycache__/version.cpython-39.pyc b/yt_ops_services/__pycache__/version.cpython-39.pyc deleted file mode 100644 index 1abbdff..0000000 Binary files a/yt_ops_services/__pycache__/version.cpython-39.pyc and /dev/null differ diff --git a/VERSION.client b/ytops_client-source/VERSION similarity index 100% rename from VERSION.client rename to ytops_client-source/VERSION diff --git a/ytops_client-source/VERSION.client b/ytops_client-source/VERSION.client new file mode 100644 index 0000000..6d7de6e --- /dev/null +++ b/ytops_client-source/VERSION.client @@ -0,0 +1 @@ +1.0.2 diff --git a/bin/build-yt-dlp-image b/ytops_client-source/bin/build-yt-dlp-image similarity index 100% rename from bin/build-yt-dlp-image rename to ytops_client-source/bin/build-yt-dlp-image diff --git a/ytops_client-source/bin/install-goytdlp.sh b/ytops_client-source/bin/install-goytdlp.sh new file mode 100755 index 0000000..23380bf --- /dev/null +++ b/ytops_client-source/bin/install-goytdlp.sh @@ -0,0 +1,74 @@ +#!/bin/sh +set -e + +# Determine the project root directory based on the script's location +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" + +GO_YTDLP_SRC_DIR="$PROJECT_ROOT/go-ytdlp" +YTDLP_CLI_DIR="$PROJECT_ROOT/ytops_client/go_ytdlp_cli" +INSTALL_PATH="/usr/local/bin/go-ytdlp" + +echo "Building go-ytdlp CLI tool..." + +if ! command -v go >/dev/null 2>&1; then + echo "Go is not installed. Please install Go first." >&2 + echo "See https://golang.org/doc/install" >&2 + exit 1 +fi + +# First, ensure the go-ytdlp library is built +if [ ! -d "$GO_YTDLP_SRC_DIR" ]; then + echo "go-ytdlp source directory not found at $GO_YTDLP_SRC_DIR" >&2 + echo "Please ensure the go-ytdlp source code is present at that location." >&2 + exit 1 +fi + +echo "Found go-ytdlp source at $GO_YTDLP_SRC_DIR" +cd "$GO_YTDLP_SRC_DIR" + +echo "Ensuring go-ytdlp dependencies are up to date..." +go mod tidy + +echo "Generating Go source files with 'make'..." +if ! command -v make >/dev/null 2>&1; then + echo "'make' is not installed. Please install it to run the code generation step." >&2 + exit 1 +fi +make + +echo "Building go-ytdlp CLI wrapper..." +cd "$YTDLP_CLI_DIR" +go mod tidy +go build -o go-ytdlp . + +BUILT_BINARY_PATH="$YTDLP_CLI_DIR/go-ytdlp" + +if [ ! -f "$BUILT_BINARY_PATH" ]; then + echo "Failed to build binary at $BUILT_BINARY_PATH" >&2 + exit 1 +fi + +echo "Creating symlink at $INSTALL_PATH..." +# Remove existing symlink or file if it exists +if [ -e "$INSTALL_PATH" ]; then + echo "Removing existing file/symlink at $INSTALL_PATH" + if [ -w "$(dirname "$INSTALL_PATH")" ]; then + rm -f "$INSTALL_PATH" + else + sudo rm -f "$INSTALL_PATH" + fi +fi + +# Create the symlink +if [ -w "$(dirname "$INSTALL_PATH")" ]; then + ln -s "$BUILT_BINARY_PATH" "$INSTALL_PATH" +else + echo "Warning: $(dirname "$INSTALL_PATH") is not writable. Attempting with sudo." + sudo ln -s "$BUILT_BINARY_PATH" "$INSTALL_PATH" +fi + +echo +echo "go-ytdlp linked successfully." +echo "You can now use the 'go-ytdlp' command." +echo "Binary location: $BUILT_BINARY_PATH" diff --git a/ytops_client-source/bin/setup-profiles-from-policy b/ytops_client-source/bin/setup-profiles-from-policy new file mode 100644 index 0000000..4b7e4ed --- /dev/null +++ b/ytops_client-source/bin/setup-profiles-from-policy @@ -0,0 +1,22 @@ +#!/bin/bash +# +# Sets up profiles for a simulation run based on a policy file. +# +# Usage: ./bin/setup-profiles-from-policy [--preserve] +# --preserve: Pass --preserve-profiles to not delete existing profiles. +# +set -e + +EXTRA_ARGS="" +if [[ "$1" == "--preserve" ]]; then + EXTRA_ARGS="--preserve-profiles" +fi + +echo "--- Setting up profiles from default simulation policy ---" + +# This script now wraps the new 'simulation setup' command. +# It passes --preserve-profiles if provided, and ensures .env is used. +bin/ytops-client simulation setup --env-file .env $EXTRA_ARGS + +echo "--- Profile setup complete. ---" +echo "You can now run the authentication simulation with: bin/ytops-client simulation auth" diff --git a/bin/ytops-client b/ytops_client-source/bin/ytops-client similarity index 100% rename from bin/ytops-client rename to ytops_client-source/bin/ytops-client diff --git a/cli.auth.config b/ytops_client-source/cli.auth.config similarity index 100% rename from cli.auth.config rename to ytops_client-source/cli.auth.config diff --git a/cli.download.config b/ytops_client-source/cli.download.config similarity index 100% rename from cli.download.config rename to ytops_client-source/cli.download.config diff --git a/package_client.py b/ytops_client-source/package_client.py similarity index 100% rename from package_client.py rename to ytops_client-source/package_client.py diff --git a/pangramia/__init__.py b/ytops_client-source/pangramia/__init__.py similarity index 100% rename from pangramia/__init__.py rename to ytops_client-source/pangramia/__init__.py diff --git a/pangramia/base_service/BaseService-remote b/ytops_client-source/pangramia/base_service/BaseService-remote similarity index 100% rename from pangramia/base_service/BaseService-remote rename to ytops_client-source/pangramia/base_service/BaseService-remote diff --git a/pangramia/base_service/BaseService.py b/ytops_client-source/pangramia/base_service/BaseService.py similarity index 100% rename from pangramia/base_service/BaseService.py rename to ytops_client-source/pangramia/base_service/BaseService.py diff --git a/pangramia/base_service/__init__.py b/ytops_client-source/pangramia/base_service/__init__.py similarity index 100% rename from pangramia/base_service/__init__.py rename to ytops_client-source/pangramia/base_service/__init__.py diff --git a/pangramia/base_service/constants.py b/ytops_client-source/pangramia/base_service/constants.py similarity index 100% rename from pangramia/base_service/constants.py rename to ytops_client-source/pangramia/base_service/constants.py diff --git a/pangramia/base_service/ttypes.py b/ytops_client-source/pangramia/base_service/ttypes.py similarity index 100% rename from pangramia/base_service/ttypes.py rename to ytops_client-source/pangramia/base_service/ttypes.py diff --git a/pangramia/yt/__init__.py b/ytops_client-source/pangramia/yt/__init__.py similarity index 100% rename from pangramia/yt/__init__.py rename to ytops_client-source/pangramia/yt/__init__.py diff --git a/pangramia/yt/common/__init__.py b/ytops_client-source/pangramia/yt/common/__init__.py similarity index 100% rename from pangramia/yt/common/__init__.py rename to ytops_client-source/pangramia/yt/common/__init__.py diff --git a/pangramia/yt/common/constants.py b/ytops_client-source/pangramia/yt/common/constants.py similarity index 100% rename from pangramia/yt/common/constants.py rename to ytops_client-source/pangramia/yt/common/constants.py diff --git a/pangramia/yt/common/ttypes.py b/ytops_client-source/pangramia/yt/common/ttypes.py similarity index 100% rename from pangramia/yt/common/ttypes.py rename to ytops_client-source/pangramia/yt/common/ttypes.py diff --git a/pangramia/yt/exceptions/__init__.py b/ytops_client-source/pangramia/yt/exceptions/__init__.py similarity index 100% rename from pangramia/yt/exceptions/__init__.py rename to ytops_client-source/pangramia/yt/exceptions/__init__.py diff --git a/pangramia/yt/exceptions/constants.py b/ytops_client-source/pangramia/yt/exceptions/constants.py similarity index 100% rename from pangramia/yt/exceptions/constants.py rename to ytops_client-source/pangramia/yt/exceptions/constants.py diff --git a/pangramia/yt/exceptions/ttypes.py b/ytops_client-source/pangramia/yt/exceptions/ttypes.py similarity index 100% rename from pangramia/yt/exceptions/ttypes.py rename to ytops_client-source/pangramia/yt/exceptions/ttypes.py diff --git a/pangramia/yt/management/YTManagementService-remote b/ytops_client-source/pangramia/yt/management/YTManagementService-remote similarity index 100% rename from pangramia/yt/management/YTManagementService-remote rename to ytops_client-source/pangramia/yt/management/YTManagementService-remote diff --git a/pangramia/yt/management/YTManagementService.py b/ytops_client-source/pangramia/yt/management/YTManagementService.py similarity index 100% rename from pangramia/yt/management/YTManagementService.py rename to ytops_client-source/pangramia/yt/management/YTManagementService.py diff --git a/pangramia/yt/management/__init__.py b/ytops_client-source/pangramia/yt/management/__init__.py similarity index 100% rename from pangramia/yt/management/__init__.py rename to ytops_client-source/pangramia/yt/management/__init__.py diff --git a/pangramia/yt/management/constants.py b/ytops_client-source/pangramia/yt/management/constants.py similarity index 100% rename from pangramia/yt/management/constants.py rename to ytops_client-source/pangramia/yt/management/constants.py diff --git a/pangramia/yt/management/ttypes.py b/ytops_client-source/pangramia/yt/management/ttypes.py similarity index 100% rename from pangramia/yt/management/ttypes.py rename to ytops_client-source/pangramia/yt/management/ttypes.py diff --git a/pangramia/yt/tokens_ops/YTTokenOpService-remote b/ytops_client-source/pangramia/yt/tokens_ops/YTTokenOpService-remote similarity index 100% rename from pangramia/yt/tokens_ops/YTTokenOpService-remote rename to ytops_client-source/pangramia/yt/tokens_ops/YTTokenOpService-remote diff --git a/pangramia/yt/tokens_ops/YTTokenOpService.py b/ytops_client-source/pangramia/yt/tokens_ops/YTTokenOpService.py similarity index 100% rename from pangramia/yt/tokens_ops/YTTokenOpService.py rename to ytops_client-source/pangramia/yt/tokens_ops/YTTokenOpService.py diff --git a/pangramia/yt/tokens_ops/__init__.py b/ytops_client-source/pangramia/yt/tokens_ops/__init__.py similarity index 100% rename from pangramia/yt/tokens_ops/__init__.py rename to ytops_client-source/pangramia/yt/tokens_ops/__init__.py diff --git a/pangramia/yt/tokens_ops/constants.py b/ytops_client-source/pangramia/yt/tokens_ops/constants.py similarity index 100% rename from pangramia/yt/tokens_ops/constants.py rename to ytops_client-source/pangramia/yt/tokens_ops/constants.py diff --git a/pangramia/yt/tokens_ops/ttypes.py b/ytops_client-source/pangramia/yt/tokens_ops/ttypes.py similarity index 100% rename from pangramia/yt/tokens_ops/ttypes.py rename to ytops_client-source/pangramia/yt/tokens_ops/ttypes.py diff --git a/policies/10_direct_docker_auth_simulation.yaml b/ytops_client-source/policies/10_direct_docker_auth_simulation.yaml similarity index 100% rename from policies/10_direct_docker_auth_simulation.yaml rename to ytops_client-source/policies/10_direct_docker_auth_simulation.yaml diff --git a/policies/11_direct_docker_download_simulation.yaml b/ytops_client-source/policies/11_direct_docker_download_simulation.yaml similarity index 100% rename from policies/11_direct_docker_download_simulation.yaml rename to ytops_client-source/policies/11_direct_docker_download_simulation.yaml diff --git a/policies/6_profile_setup_policy.yaml b/ytops_client-source/policies/6_profile_setup_policy.yaml similarity index 96% rename from policies/6_profile_setup_policy.yaml rename to ytops_client-source/policies/6_profile_setup_policy.yaml index 56b47f9..79e1d22 100644 --- a/policies/6_profile_setup_policy.yaml +++ b/ytops_client-source/policies/6_profile_setup_policy.yaml @@ -14,7 +14,7 @@ auth_profile_setup: pools: - prefix: "user1" proxy: "sslocal-rust-1092:1092" - count: 1 + count: 4 # --- Profile setup for the DOWNLOAD simulation --- download_profile_setup: @@ -23,5 +23,5 @@ download_profile_setup: pools: - prefix: "user1" proxy: "sslocal-rust-1092:1092" - count: 1 + count: 4 diff --git a/policies/8_unified_simulation_enforcer.yaml b/ytops_client-source/policies/8_unified_simulation_enforcer.yaml similarity index 95% rename from policies/8_unified_simulation_enforcer.yaml rename to ytops_client-source/policies/8_unified_simulation_enforcer.yaml index 2c1b9c6..6066733 100644 --- a/policies/8_unified_simulation_enforcer.yaml +++ b/ytops_client-source/policies/8_unified_simulation_enforcer.yaml @@ -83,9 +83,10 @@ auth_policy_enforcer_config: # This should be longer than the docker container timeout (15m). unlock_stale_locks_after_seconds: 960 - # No post-task cooldown for auth simulation profiles. When a task is finished, - # the profile is immediately returned to the ACTIVE state. - unlock_cooldown_seconds: 0 + # A short post-task cooldown for auth simulation profiles. When a batch is finished, + # the profile is put into COOLDOWN briefly. This prevents a worker from immediately + # re-locking the same profile, giving the policy enforcer a window to perform rotation. + unlock_cooldown_seconds: 1 # Cross-simulation synchronization cross_simulation_sync: diff --git a/ytops_client-source/policies/queue_auth_simulation.yaml b/ytops_client-source/policies/queue_auth_simulation.yaml new file mode 100644 index 0000000..9f910ea --- /dev/null +++ b/ytops_client-source/policies/queue_auth_simulation.yaml @@ -0,0 +1,62 @@ +# Policy: Queue-based Authentication Simulation +# +# This policy simulates a continuous stream of authentication requests +# by pulling URLs from a Redis queue, processing them with yt-ops-server, +# and pushing results to appropriate result queues. +# +name: queue_auth_simulation + +settings: + mode: fetch_only + orchestration_mode: queue_auth + profile_mode: from_pool_with_lock + # Directory to save info.json files (optional). + # For distributed operation across multiple machines, this MUST be a shared location + # like an S3 bucket path. The underlying code must be adapted to handle S3 paths. + save_info_json_dir: "run/docker_mount/fetched_info_jsons/" #"s3://your-shared-bucket/stress_test/info_jsons/" + + + dummy_simulation_settings: + # Simulate auth processing time between 5 and 10 seconds for each URL. + auth_min_seconds: 5 + auth_max_seconds: 10 + # You can also control simulated failure rates here. + auth_failure_rate: 0.0 # 0% failure rate + auth_skipped_failure_rate: 0.0 # 0% skipped rate + +execution_control: + workers: 1 + # How long a worker should pause if it cannot find an available profile or task. + worker_polling_interval_seconds: 1 + # Run until conditions + run_until: + # Run for this many minutes (0 = unlimited) + minutes: 0 + # Process this many requests (0 = unlimited) + requests: 0 + + + + +info_json_generation_policy: + profile_prefix: "user1" + client: "ytdlp" + # Extra arguments to pass to the get-info command + extra_args: "--verbose" + +queue_policy: + # Set to false to use legacy, unprefixed queue names (e.g., 'queue2_auth_inbox'). + # Set to true (or omit) to use environment-prefixed names (e.g., 'sim_auth_queue2_auth_inbox'). + use_env_prefix: false + + # If specified, create download tasks for these formats + # Can be "all", a specific format ID, or a list of format IDs + formats_to_download: "140-dashy/140-dashy-0/140,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy" + + # How many tasks a worker should pull from the queue at once. + # The worker will lock one profile to process the entire batch. + batch_size: 25 + +simulation_parameters: + auth_env: "sim_auth" + download_env: "sim_download" diff --git a/ytops_client-source/policies/queue_download_simulation.yaml b/ytops_client-source/policies/queue_download_simulation.yaml new file mode 100644 index 0000000..608b795 --- /dev/null +++ b/ytops_client-source/policies/queue_download_simulation.yaml @@ -0,0 +1,64 @@ +# Policy: Queue-based Download Simulation +# +# This policy simulates a continuous stream of download requests +# by pulling tasks from a Redis queue, downloading the specified formats, +# and pushing results to appropriate result queues. +# +name: queue_download_simulation + +settings: + mode: download_only + orchestration_mode: queue_download + profile_mode: from_pool_with_lock + + dummy_simulation_settings: + download_min_seconds: 5 + download_max_seconds: 8 + download_failure_rate: 0.0 + download_skipped_failure_rate: 0.0 + + +execution_control: + workers: 4 + # How long a worker should pause if it cannot find an available profile or task. + worker_polling_interval_seconds: 1 + # Run until conditions + run_until: + # Run for this many minutes (0 = unlimited) + minutes: 0 + # Process this many requests (0 = unlimited) + requests: 0 + +download_policy: + profile_prefix: "user1" + # Default cooldown in seconds if not specified by the enforcer in Redis. + # The value from Redis (set via `unlock_cooldown_seconds` in the enforcer policy) + # will always take precedence. This is a fallback. + # Can be an integer (e.g., 1) or a range (e.g., [1, 3]). + default_unlock_cooldown_seconds: 1 + # Directory to save downloaded files + output_dir: "downloaded_media/queue_downloads" + # Extra arguments to pass to the download command + extra_args: "--verbose" + + # After a download task is successfully processed, rename the source info.json + # to prevent re-processing. This is safe if you generate one download task per info.json. + rename_source_info_json_on_success: true + + # --- Airflow Integration --- + # If true, move downloaded media and info.json to a timestamped, video-id-based + # directory structure that the Airflow DAGs can process. + output_to_airflow_ready_dir: true + airflow_ready_dir_base_path: "downloadfiles/videos/ready" + +queue_policy: + # Set to false to use legacy, unprefixed queue names (e.g., 'queue2_dl_inbox'). + # Set to true (or omit) to use environment-prefixed names (e.g., 'sim_download_queue2_dl_inbox'). + use_env_prefix: false + + # How many tasks to process in a batch. For downloads, this should be 1, + # as each worker locks a profile for a single download task. + batch_size: 1 + +simulation_parameters: + download_env: "sim_download" diff --git a/ytops_client-source/policies/queue_full_stack_simulation.yaml b/ytops_client-source/policies/queue_full_stack_simulation.yaml new file mode 100644 index 0000000..44e1926 --- /dev/null +++ b/ytops_client-source/policies/queue_full_stack_simulation.yaml @@ -0,0 +1,78 @@ +# Policy: Queue-based Full Stack Simulation +# +# This policy simulates a complete workflow by running both authentication +# and download workers simultaneously, processing tasks from Redis queues. +# +name: queue_full_stack_simulation + +settings: + mode: full_stack + orchestration_mode: queue_full_stack + profile_mode: from_pool_with_lock + # Directory to save info.json files (optional) + save_info_json_dir: "run/queue_auth_results" + + dummy_simulation_settings: + auth_min_seconds: 0.75 + auth_max_seconds: 1.5 + auth_failure_rate: 0.0 + auth_skipped_failure_rate: 0.0 + download_min_seconds: 5 + download_max_seconds: 8 + download_failure_rate: 0.0 + download_skipped_failure_rate: 0.0 + +execution_control: + # Number of workers for each stage + auth_workers: 2 + download_workers: 4 + # How long a worker should pause if it cannot find an available profile or task. + worker_polling_interval_seconds: 1 + # Run until conditions + run_until: + # Run for this many minutes (0 = unlimited) + minutes: 0 + # Process this many requests (0 = unlimited) + requests: 0 + +info_json_generation_policy: + profile_prefix: "user1" + client: "ytdlp" + # Extra arguments to pass to the get-info command + extra_args: "--verbose" + +download_policy: + profile_prefix: "user1" + # Default cooldown in seconds if not specified by the enforcer in Redis. + default_unlock_cooldown_seconds: 1 + # Directory to save downloaded files + output_dir: "downloaded_media/queue_downloads" + # Extra arguments to pass to the download command + extra_args: "--verbose" + + # After a download task is successfully processed, rename the source info.json + # to prevent re-processing. This is safe if you generate one download task per info.json. + rename_source_info_json_on_success: true + +queue_policy: + # Redis connection settings (can be overridden by CLI args) + redis_host: "localhost" + redis_port: 6379 + redis_password: "" + redis_db: 0 + + # If specified, create download tasks for these formats + # Can be "all", a specific format ID, or a list of format IDs + formats_to_download: "140-dashy/140-dashy-0/140,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy" + + # How many tasks to process in a batch (for batch operations) + batch_size: 10 + + # Queue management options + requeue_failed_tasks: true + requeue_batch_size: 50 + requeue_interval_seconds: 300 + +simulation_parameters: + auth_env: "sim_auth" + download_env: "sim_download" diff --git a/setup.py b/ytops_client-source/setup.py similarity index 100% rename from setup.py rename to ytops_client-source/setup.py diff --git a/thrift_exceptions_patch.py b/ytops_client-source/thrift_exceptions_patch.py similarity index 100% rename from thrift_exceptions_patch.py rename to ytops_client-source/thrift_exceptions_patch.py diff --git a/thrift_model/.gitignore b/ytops_client-source/thrift_model/.gitignore similarity index 100% rename from thrift_model/.gitignore rename to ytops_client-source/thrift_model/.gitignore diff --git a/thrift_model/__init__.py b/ytops_client-source/thrift_model/__init__.py similarity index 100% rename from thrift_model/__init__.py rename to ytops_client-source/thrift_model/__init__.py diff --git a/thrift_model/data/common.thrift b/ytops_client-source/thrift_model/data/common.thrift similarity index 100% rename from thrift_model/data/common.thrift rename to ytops_client-source/thrift_model/data/common.thrift diff --git a/thrift_model/data/exceptions.thrift b/ytops_client-source/thrift_model/data/exceptions.thrift similarity index 100% rename from thrift_model/data/exceptions.thrift rename to ytops_client-source/thrift_model/data/exceptions.thrift diff --git a/thrift_model/gen_py/__init__.py b/ytops_client-source/thrift_model/gen_py/__init__.py similarity index 100% rename from thrift_model/gen_py/__init__.py rename to ytops_client-source/thrift_model/gen_py/__init__.py diff --git a/thrift_model/gen_py/pangramia/__init__.py b/ytops_client-source/thrift_model/gen_py/pangramia/__init__.py similarity index 100% rename from thrift_model/gen_py/pangramia/__init__.py rename to ytops_client-source/thrift_model/gen_py/pangramia/__init__.py diff --git a/thrift_model/gen_py/pangramia/base_service/BaseService-remote b/ytops_client-source/thrift_model/gen_py/pangramia/base_service/BaseService-remote similarity index 100% rename from thrift_model/gen_py/pangramia/base_service/BaseService-remote rename to ytops_client-source/thrift_model/gen_py/pangramia/base_service/BaseService-remote diff --git a/thrift_model/gen_py/pangramia/base_service/BaseService.py b/ytops_client-source/thrift_model/gen_py/pangramia/base_service/BaseService.py similarity index 100% rename from thrift_model/gen_py/pangramia/base_service/BaseService.py rename to ytops_client-source/thrift_model/gen_py/pangramia/base_service/BaseService.py diff --git a/thrift_model/gen_py/pangramia/base_service/__init__.py b/ytops_client-source/thrift_model/gen_py/pangramia/base_service/__init__.py similarity index 100% rename from thrift_model/gen_py/pangramia/base_service/__init__.py rename to ytops_client-source/thrift_model/gen_py/pangramia/base_service/__init__.py diff --git a/thrift_model/gen_py/pangramia/base_service/constants.py b/ytops_client-source/thrift_model/gen_py/pangramia/base_service/constants.py similarity index 100% rename from thrift_model/gen_py/pangramia/base_service/constants.py rename to ytops_client-source/thrift_model/gen_py/pangramia/base_service/constants.py diff --git a/thrift_model/gen_py/pangramia/base_service/ttypes.py b/ytops_client-source/thrift_model/gen_py/pangramia/base_service/ttypes.py similarity index 100% rename from thrift_model/gen_py/pangramia/base_service/ttypes.py rename to ytops_client-source/thrift_model/gen_py/pangramia/base_service/ttypes.py diff --git a/thrift_model/gen_py/pangramia/yt/__init__.py b/ytops_client-source/thrift_model/gen_py/pangramia/yt/__init__.py similarity index 100% rename from thrift_model/gen_py/pangramia/yt/__init__.py rename to ytops_client-source/thrift_model/gen_py/pangramia/yt/__init__.py diff --git a/thrift_model/gen_py/pangramia/yt/common/__init__.py b/ytops_client-source/thrift_model/gen_py/pangramia/yt/common/__init__.py similarity index 100% rename from thrift_model/gen_py/pangramia/yt/common/__init__.py rename to ytops_client-source/thrift_model/gen_py/pangramia/yt/common/__init__.py diff --git a/thrift_model/gen_py/pangramia/yt/common/constants.py b/ytops_client-source/thrift_model/gen_py/pangramia/yt/common/constants.py similarity index 100% rename from thrift_model/gen_py/pangramia/yt/common/constants.py rename to ytops_client-source/thrift_model/gen_py/pangramia/yt/common/constants.py diff --git a/thrift_model/gen_py/pangramia/yt/common/ttypes.py b/ytops_client-source/thrift_model/gen_py/pangramia/yt/common/ttypes.py similarity index 100% rename from thrift_model/gen_py/pangramia/yt/common/ttypes.py rename to ytops_client-source/thrift_model/gen_py/pangramia/yt/common/ttypes.py diff --git a/thrift_model/gen_py/pangramia/yt/exceptions/__init__.py b/ytops_client-source/thrift_model/gen_py/pangramia/yt/exceptions/__init__.py similarity index 100% rename from thrift_model/gen_py/pangramia/yt/exceptions/__init__.py rename to ytops_client-source/thrift_model/gen_py/pangramia/yt/exceptions/__init__.py diff --git a/thrift_model/gen_py/pangramia/yt/exceptions/constants.py b/ytops_client-source/thrift_model/gen_py/pangramia/yt/exceptions/constants.py similarity index 100% rename from thrift_model/gen_py/pangramia/yt/exceptions/constants.py rename to ytops_client-source/thrift_model/gen_py/pangramia/yt/exceptions/constants.py diff --git a/thrift_model/gen_py/pangramia/yt/exceptions/ttypes.py b/ytops_client-source/thrift_model/gen_py/pangramia/yt/exceptions/ttypes.py similarity index 100% rename from thrift_model/gen_py/pangramia/yt/exceptions/ttypes.py rename to ytops_client-source/thrift_model/gen_py/pangramia/yt/exceptions/ttypes.py diff --git a/thrift_model/gen_py/pangramia/yt/management/YTManagementService-remote b/ytops_client-source/thrift_model/gen_py/pangramia/yt/management/YTManagementService-remote similarity index 100% rename from thrift_model/gen_py/pangramia/yt/management/YTManagementService-remote rename to ytops_client-source/thrift_model/gen_py/pangramia/yt/management/YTManagementService-remote diff --git a/thrift_model/gen_py/pangramia/yt/management/YTManagementService.py b/ytops_client-source/thrift_model/gen_py/pangramia/yt/management/YTManagementService.py similarity index 100% rename from thrift_model/gen_py/pangramia/yt/management/YTManagementService.py rename to ytops_client-source/thrift_model/gen_py/pangramia/yt/management/YTManagementService.py diff --git a/thrift_model/gen_py/pangramia/yt/management/__init__.py b/ytops_client-source/thrift_model/gen_py/pangramia/yt/management/__init__.py similarity index 100% rename from thrift_model/gen_py/pangramia/yt/management/__init__.py rename to ytops_client-source/thrift_model/gen_py/pangramia/yt/management/__init__.py diff --git a/thrift_model/gen_py/pangramia/yt/management/constants.py b/ytops_client-source/thrift_model/gen_py/pangramia/yt/management/constants.py similarity index 100% rename from thrift_model/gen_py/pangramia/yt/management/constants.py rename to ytops_client-source/thrift_model/gen_py/pangramia/yt/management/constants.py diff --git a/thrift_model/gen_py/pangramia/yt/management/ttypes.py b/ytops_client-source/thrift_model/gen_py/pangramia/yt/management/ttypes.py similarity index 100% rename from thrift_model/gen_py/pangramia/yt/management/ttypes.py rename to ytops_client-source/thrift_model/gen_py/pangramia/yt/management/ttypes.py diff --git a/thrift_model/gen_py/pangramia/yt/tokens_ops/YTTokenOpService-remote b/ytops_client-source/thrift_model/gen_py/pangramia/yt/tokens_ops/YTTokenOpService-remote similarity index 100% rename from thrift_model/gen_py/pangramia/yt/tokens_ops/YTTokenOpService-remote rename to ytops_client-source/thrift_model/gen_py/pangramia/yt/tokens_ops/YTTokenOpService-remote diff --git a/thrift_model/gen_py/pangramia/yt/tokens_ops/YTTokenOpService.py b/ytops_client-source/thrift_model/gen_py/pangramia/yt/tokens_ops/YTTokenOpService.py similarity index 100% rename from thrift_model/gen_py/pangramia/yt/tokens_ops/YTTokenOpService.py rename to ytops_client-source/thrift_model/gen_py/pangramia/yt/tokens_ops/YTTokenOpService.py diff --git a/thrift_model/gen_py/pangramia/yt/tokens_ops/__init__.py b/ytops_client-source/thrift_model/gen_py/pangramia/yt/tokens_ops/__init__.py similarity index 100% rename from thrift_model/gen_py/pangramia/yt/tokens_ops/__init__.py rename to ytops_client-source/thrift_model/gen_py/pangramia/yt/tokens_ops/__init__.py diff --git a/thrift_model/gen_py/pangramia/yt/tokens_ops/constants.py b/ytops_client-source/thrift_model/gen_py/pangramia/yt/tokens_ops/constants.py similarity index 100% rename from thrift_model/gen_py/pangramia/yt/tokens_ops/constants.py rename to ytops_client-source/thrift_model/gen_py/pangramia/yt/tokens_ops/constants.py diff --git a/thrift_model/gen_py/pangramia/yt/tokens_ops/ttypes.py b/ytops_client-source/thrift_model/gen_py/pangramia/yt/tokens_ops/ttypes.py similarity index 100% rename from thrift_model/gen_py/pangramia/yt/tokens_ops/ttypes.py rename to ytops_client-source/thrift_model/gen_py/pangramia/yt/tokens_ops/ttypes.py diff --git a/thrift_model/pom.xml b/ytops_client-source/thrift_model/pom.xml similarity index 100% rename from thrift_model/pom.xml rename to ytops_client-source/thrift_model/pom.xml diff --git a/thrift_model/services/base_service.thrift b/ytops_client-source/thrift_model/services/base_service.thrift similarity index 100% rename from thrift_model/services/base_service.thrift rename to ytops_client-source/thrift_model/services/base_service.thrift diff --git a/thrift_model/services/yt_admin_ops.thrift b/ytops_client-source/thrift_model/services/yt_admin_ops.thrift similarity index 100% rename from thrift_model/services/yt_admin_ops.thrift rename to ytops_client-source/thrift_model/services/yt_admin_ops.thrift diff --git a/thrift_model/services/yt_management.thrift b/ytops_client-source/thrift_model/services/yt_management.thrift similarity index 100% rename from thrift_model/services/yt_management.thrift rename to ytops_client-source/thrift_model/services/yt_management.thrift diff --git a/thrift_model/services/yt_tokens_ops.thrift b/ytops_client-source/thrift_model/services/yt_tokens_ops.thrift similarity index 100% rename from thrift_model/services/yt_tokens_ops.thrift rename to ytops_client-source/thrift_model/services/yt_tokens_ops.thrift diff --git a/yt_ops_services/__init__.py b/ytops_client-source/yt_ops_services/__init__.py similarity index 100% rename from yt_ops_services/__init__.py rename to ytops_client-source/yt_ops_services/__init__.py diff --git a/yt_ops_services/client_utils.py b/ytops_client-source/yt_ops_services/client_utils.py similarity index 100% rename from yt_ops_services/client_utils.py rename to ytops_client-source/yt_ops_services/client_utils.py diff --git a/yt_ops_services/version.py b/ytops_client-source/yt_ops_services/version.py similarity index 100% rename from yt_ops_services/version.py rename to ytops_client-source/yt_ops_services/version.py diff --git a/ytops_client-source/ytdlp.json b/ytops_client-source/ytdlp.json new file mode 100644 index 0000000..414e5a6 --- /dev/null +++ b/ytops_client-source/ytdlp.json @@ -0,0 +1,59 @@ +{ + + "ytops": { + "force_renew": [], + "session_params": { + "visitor_rotation_threshold": 0 + } + }, + + "ytdlp_params": { + "debug_printtraffic": true, + "write_pages": false, + "verbose": true, + "no_color": true, + "ignoreerrors": true, + "noresizebuffer": true, + "buffersize": "4M", + "concurrent_fragments": 8, + "socket_timeout": 60, + "outtmpl": { + "default": "%(id)s.f%(format_id)s.%(ext)s" + }, + "restrictfilenames": true, + "updatetime": false, + "noplaylist": true, + "match_filter": "!is_live", + "writeinfojson": true, + "skip_download": true, + "allow_playlist_files": false, + "clean_infojson": true, + "getcomments": false, + "writesubtitles": false, + "writethumbnail": false, + "sleep_interval_requests": 0.75, + "parse_metadata": [ + ":(?P)" + ], + "extractor_args": { + "youtube": { + "player_client": ["tv_simply"], + "formats": ["duplicate"], + "jsc_trace": ["true"], + "pot_trace": ["true"], + "skip": ["translated_subs", "hls"] + }, + "youtubepot-bgutilhttp": { + "base_url": ["http://172.17.0.1:4416"] + } + }, + "noprogress": true, + "format_sort": [ + "res", + "ext:mp4:m4a" + ], + "remuxvideo": "mp4", + "nooverwrites": true, + "continuedl": true + } +} diff --git a/ytops_client/__init__.py b/ytops_client-source/ytops_client/__init__.py similarity index 100% rename from ytops_client/__init__.py rename to ytops_client-source/ytops_client/__init__.py diff --git a/ytops_client/check_expiry_tool.py b/ytops_client-source/ytops_client/check_expiry_tool.py similarity index 100% rename from ytops_client/check_expiry_tool.py rename to ytops_client-source/ytops_client/check_expiry_tool.py diff --git a/ytops_client/check_log_pattern_tool.py b/ytops_client-source/ytops_client/check_log_pattern_tool.py similarity index 100% rename from ytops_client/check_log_pattern_tool.py rename to ytops_client-source/ytops_client/check_log_pattern_tool.py diff --git a/ytops_client/cli.py b/ytops_client-source/ytops_client/cli.py similarity index 97% rename from ytops_client/cli.py rename to ytops_client-source/ytops_client/cli.py index 0b72f96..9ac244f 100644 --- a/ytops_client/cli.py +++ b/ytops_client-source/ytops_client/cli.py @@ -42,6 +42,7 @@ from .locking_download_emulator_tool import add_locking_download_emulator_parser from .task_generator_tool import add_task_generator_parser, main_task_generator from .yt_dlp_dummy_tool import add_yt_dlp_dummy_parser, main_yt_dlp_dummy from .check_log_pattern_tool import add_check_log_pattern_parser, main_check_log_pattern +from .queue_manager_tool import add_queue_manager_parser, main_queue_manager def main(): @@ -106,6 +107,7 @@ def main(): add_task_generator_parser(subparsers) add_yt_dlp_dummy_parser(subparsers) add_check_log_pattern_parser(subparsers) + add_queue_manager_parser(subparsers) args = parser.parse_args() @@ -158,6 +160,8 @@ def main(): return main_yt_dlp_dummy(args) elif args.command == 'check-log-pattern': return main_check_log_pattern(args) + elif args.command == 'queue': + return main_queue_manager(args) # This path should not be reachable if a command is required or handled above. parser.print_help() diff --git a/ytops_client/config_tool.py b/ytops_client-source/ytops_client/config_tool.py similarity index 100% rename from ytops_client/config_tool.py rename to ytops_client-source/ytops_client/config_tool.py diff --git a/ytops_client/cookie_tool.py b/ytops_client-source/ytops_client/cookie_tool.py similarity index 100% rename from ytops_client/cookie_tool.py rename to ytops_client-source/ytops_client/cookie_tool.py diff --git a/ytops_client/download_aria_tool.py b/ytops_client-source/ytops_client/download_aria_tool.py similarity index 100% rename from ytops_client/download_aria_tool.py rename to ytops_client-source/ytops_client/download_aria_tool.py diff --git a/ytops_client/download_emulator_tool.py b/ytops_client-source/ytops_client/download_emulator_tool.py similarity index 100% rename from ytops_client/download_emulator_tool.py rename to ytops_client-source/ytops_client/download_emulator_tool.py diff --git a/ytops_client/download_native_py_tool.py b/ytops_client-source/ytops_client/download_native_py_tool.py similarity index 100% rename from ytops_client/download_native_py_tool.py rename to ytops_client-source/ytops_client/download_native_py_tool.py diff --git a/ytops_client/download_tool.py b/ytops_client-source/ytops_client/download_tool.py similarity index 100% rename from ytops_client/download_tool.py rename to ytops_client-source/ytops_client/download_tool.py diff --git a/ytops_client/downloader.py b/ytops_client-source/ytops_client/downloader.py similarity index 100% rename from ytops_client/downloader.py rename to ytops_client-source/ytops_client/downloader.py diff --git a/ytops_client/get_info_tool.py b/ytops_client-source/ytops_client/get_info_tool.py similarity index 100% rename from ytops_client/get_info_tool.py rename to ytops_client-source/ytops_client/get_info_tool.py diff --git a/ytops_client/go_ytdlp_cli/go-ytdlp b/ytops_client-source/ytops_client/go_ytdlp_cli/go-ytdlp similarity index 100% rename from ytops_client/go_ytdlp_cli/go-ytdlp rename to ytops_client-source/ytops_client/go_ytdlp_cli/go-ytdlp diff --git a/ytops_client/go_ytdlp_cli/go.mod b/ytops_client-source/ytops_client/go_ytdlp_cli/go.mod similarity index 100% rename from ytops_client/go_ytdlp_cli/go.mod rename to ytops_client-source/ytops_client/go_ytdlp_cli/go.mod diff --git a/ytops_client/go_ytdlp_cli/go.sum b/ytops_client-source/ytops_client/go_ytdlp_cli/go.sum similarity index 100% rename from ytops_client/go_ytdlp_cli/go.sum rename to ytops_client-source/ytops_client/go_ytdlp_cli/go.sum diff --git a/ytops_client/go_ytdlp_cli/main.go b/ytops_client-source/ytops_client/go_ytdlp_cli/main.go similarity index 100% rename from ytops_client/go_ytdlp_cli/main.go rename to ytops_client-source/ytops_client/go_ytdlp_cli/main.go diff --git a/ytops_client/list_formats_tool.py b/ytops_client-source/ytops_client/list_formats_tool.py similarity index 100% rename from ytops_client/list_formats_tool.py rename to ytops_client-source/ytops_client/list_formats_tool.py diff --git a/ytops_client/locking_download_emulator_tool.py b/ytops_client-source/ytops_client/locking_download_emulator_tool.py similarity index 100% rename from ytops_client/locking_download_emulator_tool.py rename to ytops_client-source/ytops_client/locking_download_emulator_tool.py diff --git a/ytops_client/manage_tool.py b/ytops_client-source/ytops_client/manage_tool.py similarity index 100% rename from ytops_client/manage_tool.py rename to ytops_client-source/ytops_client/manage_tool.py diff --git a/ytops_client/policy_enforcer_tool.py b/ytops_client-source/ytops_client/policy_enforcer_tool.py similarity index 97% rename from ytops_client/policy_enforcer_tool.py rename to ytops_client-source/ytops_client/policy_enforcer_tool.py index 4b517ee..4391c35 100644 --- a/ytops_client/policy_enforcer_tool.py +++ b/ytops_client-source/ytops_client/policy_enforcer_tool.py @@ -194,10 +194,41 @@ class PolicyEnforcer: live_active_counts[group_name] = count # --- End group logic setup --- + # --- New logic: Identify groups with waiting profiles --- + groups_with_waiting_profiles = {} + if profile_groups: + for group in profile_groups: + group_name = group.get('name') + if not group_name: continue + + defer_activation = group.get('defer_activation_if_any_waiting', False) + if not defer_activation: continue + + profiles_in_group = group_to_profiles_map.get(group_name, []) + waiting_profile = next( + (p for p_name, p in all_profiles_map.items() + if p_name in profiles_in_group and p.get('rest_reason') == 'waiting_downloads'), + None + ) + if waiting_profile: + groups_with_waiting_profiles[group_name] = waiting_profile['name'] + # --- End new logic --- + unique_proxies = sorted(list(set(p['proxy'] for p in profiles_to_check if p.get('proxy')))) proxy_states = self.manager.get_proxy_states(unique_proxies) for profile in profiles_to_check: + profile_name = profile['name'] + group_name = profile_to_group_map.get(profile_name) + + # --- New logic: Defer activation if group has a waiting profile --- + if group_name in groups_with_waiting_profiles: + waiting_profile_name = groups_with_waiting_profiles[group_name] + if profile_name != waiting_profile_name: + logger.debug(f"Profile '{profile_name}' activation deferred because profile '{waiting_profile_name}' in group '{group_name}' is waiting for downloads.") + continue + # --- End new logic --- + # --- New logic for waiting_downloads --- if profile.get('rest_reason') == 'waiting_downloads': profile_name = profile['name'] @@ -257,23 +288,6 @@ class PolicyEnforcer: if not group_policy: continue # Should not happen if maps are built correctly - # --- New check: Defer activation if another profile in the group is waiting for downloads --- - defer_activation = group_policy.get('defer_activation_if_any_waiting', False) - if defer_activation: - profiles_in_group = group_to_profiles_map.get(group_name, []) - # Find if any profile in the group is currently in the waiting state. - # This check is crucial to ensure strict sequential processing. - waiting_profile = next( - (p for p_name, p in all_profiles_map.items() - if p_name in profiles_in_group and p.get('rest_reason') == 'waiting_downloads'), - None - ) - - if waiting_profile and waiting_profile['name'] != profile_name: - logger.debug(f"Profile '{profile_name}' rest ended, but profile '{waiting_profile['name']}' in group '{group_name}' is waiting for downloads. Deferring activation.") - continue # Do not activate, another is waiting. - # --- End new check --- - max_active = group_policy.get('max_active_profiles', 1) # Check if the group is already at its capacity for active profiles. diff --git a/ytops_client/profile_allocator_tool.py b/ytops_client-source/ytops_client/profile_allocator_tool.py similarity index 100% rename from ytops_client/profile_allocator_tool.py rename to ytops_client-source/ytops_client/profile_allocator_tool.py diff --git a/ytops_client/profile_manager_tool.py b/ytops_client-source/ytops_client/profile_manager_tool.py similarity index 100% rename from ytops_client/profile_manager_tool.py rename to ytops_client-source/ytops_client/profile_manager_tool.py diff --git a/ytops_client/profile_setup_tool.py b/ytops_client-source/ytops_client/profile_setup_tool.py similarity index 100% rename from ytops_client/profile_setup_tool.py rename to ytops_client-source/ytops_client/profile_setup_tool.py diff --git a/ytops_client-source/ytops_client/queue_manager_tool.py b/ytops_client-source/ytops_client/queue_manager_tool.py new file mode 100644 index 0000000..6bd422a --- /dev/null +++ b/ytops_client-source/ytops_client/queue_manager_tool.py @@ -0,0 +1,261 @@ +#!/usr/bin/env python3 +""" +Redis Queue Management CLI Tool for yt-ops-client. +""" + +import argparse +import json +import logging +import os +import sys +from typing import Optional + +import redis + +try: + from dotenv import load_dotenv +except ImportError: + load_dotenv = None + +try: + from tabulate import tabulate +except ImportError: + print("'tabulate' library not found. Please install it with: pip install tabulate", file=sys.stderr) + tabulate = None + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' +) +logger = logging.getLogger(__name__) + + +class QueueManager: + """Manages Redis lists (queues).""" + + def __init__(self, redis_host='localhost', redis_port=6379, redis_password=None): + """Initialize Redis connection.""" + logger.info(f"Attempting to connect to Redis at {redis_host}:{redis_port}...") + try: + self.redis = redis.Redis( + host=redis_host, + port=redis_port, + password=redis_password, + decode_responses=True, + socket_connect_timeout=5, + socket_timeout=5 + ) + self.redis.ping() + logger.info(f"Successfully connected to Redis.") + except redis.exceptions.ConnectionError as e: + logger.error(f"Failed to connect to Redis at {redis_host}:{redis_port}: {e}") + sys.exit(1) + + def list_queues(self, pattern: str): + """Lists queues matching a pattern and their sizes.""" + queues = [] + for key in self.redis.scan_iter(match=pattern): + key_type = self.redis.type(key) + if key_type == 'list': + size = self.redis.llen(key) + queues.append({'name': key, 'size': size}) + return queues + + def peek(self, queue_name: str, count: int): + """Returns the top `count` items from a queue without removing them.""" + return self.redis.lrange(queue_name, 0, count - 1) + + def count(self, queue_name: str) -> int: + """Returns the number of items in a queue.""" + return self.redis.llen(queue_name) + + def populate(self, queue_name: str, file_path: str) -> int: + """Populates a queue from a file (text with one item per line, or JSON with an array of strings).""" + count = 0 + + if file_path.lower().endswith('.json'): + logger.info("Detected JSON file. Attempting to parse as an array of strings.") + try: + with open(file_path, 'r', encoding='utf-8') as f: + data = json.load(f) + if not isinstance(data, list): + logger.error("JSON file must contain a list/array.") + return 0 + + items_to_add = [str(item).strip() for item in data if str(item).strip()] + + pipe = self.redis.pipeline() + for item in items_to_add: + pipe.rpush(queue_name, item) + count += 1 + if count % 1000 == 0: + pipe.execute() + logger.info(f"Pushed {count} items...") + pipe.execute() + + except (IOError, json.JSONDecodeError) as e: + logger.error(f"Failed to read or parse JSON file '{file_path}': {e}") + return 0 + else: + logger.info("Reading items from text file (one per line).") + try: + with open(file_path, 'r', encoding='utf-8') as f: + pipe = self.redis.pipeline() + for line in f: + item = line.strip() + if item: + pipe.rpush(queue_name, item) + count += 1 + if count % 1000 == 0: + pipe.execute() + logger.info(f"Pushed {count} items...") + pipe.execute() + except IOError as e: + logger.error(f"Failed to read file '{file_path}': {e}") + return 0 + + logger.info(f"Finished. Pushed a total of {count} items to '{queue_name}'.") + return count + + def clear(self, queue_name: str, dump_path: Optional[str] = None) -> int: + """Clears a queue, optionally dumping its contents to a file.""" + size = self.redis.llen(queue_name) + if size == 0: + logger.info(f"Queue '{queue_name}' is already empty.") + return 0 + + if dump_path: + logger.info(f"Dumping {size} items from '{queue_name}' to '{dump_path}'...") + with open(dump_path, 'w') as f: + # Use lpop to be memory efficient for very large queues + while True: + item = self.redis.lpop(queue_name) + if item is None: + break + f.write(item + '\n') + logger.info("Dump complete.") + # After lpop, the queue is already empty. + return size + + deleted_count = self.redis.delete(queue_name) + if deleted_count > 0: + logger.info(f"Cleared queue '{queue_name}' ({size} items).") + + return size + + +def add_queue_manager_parser(subparsers): + """Adds the parser for the 'queue' command.""" + parser = subparsers.add_parser( + 'queue', + description='Manage Redis queues.', + formatter_class=argparse.RawTextHelpFormatter, + help='Manage Redis queues.' + ) + + # Common arguments for all queue manager subcommands + common_parser = argparse.ArgumentParser(add_help=False) + common_parser.add_argument('--env-file', help='Path to a .env file to load environment variables from.') + common_parser.add_argument('--env', default='dev', help="Environment name for queue prefixes (e.g., 'stg', 'prod'). Defaults to 'dev'.") + common_parser.add_argument('--redis-host', default=None, help='Redis host. Defaults to REDIS_HOST or MASTER_HOST_IP env var, or localhost.') + common_parser.add_argument('--redis-port', type=int, default=None, help='Redis port. Defaults to REDIS_PORT env var, or 6379.') + common_parser.add_argument('--redis-password', default=None, help='Redis password. Defaults to REDIS_PASSWORD env var.') + common_parser.add_argument('--verbose', action='store_true', help='Enable verbose logging') + + subparsers = parser.add_subparsers(dest='queue_command', help='Command to execute', required=True) + + # List command + list_parser = subparsers.add_parser('list', help='List queues and their sizes.', parents=[common_parser]) + list_parser.add_argument('--pattern', default='*queue*', help="Pattern to search for queue keys (default: '*queue*')") + + # Peek command + peek_parser = subparsers.add_parser('peek', help='View items in a queue without removing them.', parents=[common_parser]) + peek_parser.add_argument('queue_name', nargs='?', help="Name of the queue. Defaults to '_stress_inbox'.") + peek_parser.add_argument('--count', type=int, default=10, help='Number of items to show (default: 10)') + + # Populate command + populate_parser = subparsers.add_parser('populate', help='Populate a queue from a file (one item per line).', parents=[common_parser]) + populate_parser.add_argument('file_path', help='Path to the file containing items to add.') + populate_parser.add_argument('--queue-name', help="Name of the queue to populate. Defaults to '_stress_inbox'.") + + # Clear command + clear_parser = subparsers.add_parser('clear', help='Clear a queue, optionally dumping its contents.', parents=[common_parser]) + clear_parser.add_argument('queue_name', nargs='?', help="Name of the queue to clear. Defaults to '_stress_inbox'.") + clear_parser.add_argument('--dump-to', help='File path to dump queue contents before clearing.') + clear_parser.add_argument('--confirm', action='store_true', help='Confirm this destructive action (required).') + + return parser + + +def main_queue_manager(args): + """Main dispatcher for 'queue' command.""" + if load_dotenv: + was_loaded = load_dotenv(args.env_file) + if was_loaded: + print(f"Loaded environment variables from {args.env_file or '.env file'}", file=sys.stderr) + elif args.env_file: + print(f"ERROR: The specified --env-file was not found: {args.env_file}", file=sys.stderr) + return 1 + + if args.redis_host is None: + args.redis_host = os.getenv('REDIS_HOST', os.getenv('MASTER_HOST_IP', 'localhost')) + if args.redis_port is None: + args.redis_port = int(os.getenv('REDIS_PORT', 6379)) + if args.redis_password is None: + args.redis_password = os.getenv('REDIS_PASSWORD') + + if args.verbose: + logging.getLogger().setLevel(logging.DEBUG) + + manager = QueueManager( + redis_host=args.redis_host, + redis_port=args.redis_port, + redis_password=args.redis_password + ) + + # For commands that operate on a single queue, set a default name based on the environment if not provided. + is_single_queue_command = args.queue_command in ['peek', 'populate', 'clear'] + if is_single_queue_command: + # `populate` uses an option (--queue-name), while `peek` and `clear` use a positional argument. + # We check for `queue_name` attribute and if it's falsy (None or empty string). + if not getattr(args, 'queue_name', None): + default_queue_name = f"{args.env}_stress_inbox" + args.queue_name = default_queue_name + print(f"INFO: No queue name specified, defaulting to '{default_queue_name}' based on --env='{args.env}'.", file=sys.stderr) + + if args.queue_command == 'list': + queues = manager.list_queues(args.pattern) + if not queues: + print(f"No queues found matching pattern '{args.pattern}'.") + return 0 + if tabulate: + print(tabulate(queues, headers='keys', tablefmt='grid')) + else: + for q in queues: + print(f"{q['name']}: {q['size']}") + return 0 + + elif args.queue_command == 'peek': + size = manager.count(args.queue_name) + items = manager.peek(args.queue_name, args.count) + print(f"Queue '{args.queue_name}' has {size} items. Showing top {len(items)}:") + for i, item in enumerate(items): + print(f"{i+1: >3}: {item}") + return 0 + + elif args.queue_command == 'populate': + if not os.path.exists(args.file_path): + print(f"Error: File not found at '{args.file_path}'", file=sys.stderr) + return 1 + manager.populate(args.queue_name, args.file_path) + return 0 + + elif args.queue_command == 'clear': + if not args.confirm: + print("Error: --confirm flag is required for this destructive action.", file=sys.stderr) + return 1 + manager.clear(args.queue_name, args.dump_to) + return 0 + + return 1 # Should not be reached diff --git a/ytops_client/request_params_help.py b/ytops_client-source/ytops_client/request_params_help.py similarity index 100% rename from ytops_client/request_params_help.py rename to ytops_client-source/ytops_client/request_params_help.py diff --git a/ytops_client/requirements.txt b/ytops_client-source/ytops_client/requirements.txt similarity index 100% rename from ytops_client/requirements.txt rename to ytops_client-source/ytops_client/requirements.txt diff --git a/ytops_client/simulation_tool.py b/ytops_client-source/ytops_client/simulation_tool.py similarity index 100% rename from ytops_client/simulation_tool.py rename to ytops_client-source/ytops_client/simulation_tool.py diff --git a/ytops_client/stress_formats_tool.py b/ytops_client-source/ytops_client/stress_formats_tool.py similarity index 100% rename from ytops_client/stress_formats_tool.py rename to ytops_client-source/ytops_client/stress_formats_tool.py diff --git a/ytops_client/stress_policy/__init__.py b/ytops_client-source/ytops_client/stress_policy/__init__.py similarity index 100% rename from ytops_client/stress_policy/__init__.py rename to ytops_client-source/ytops_client/stress_policy/__init__.py diff --git a/ytops_client/stress_policy/arg_parser.py b/ytops_client-source/ytops_client/stress_policy/arg_parser.py similarity index 97% rename from ytops_client/stress_policy/arg_parser.py rename to ytops_client-source/ytops_client/stress_policy/arg_parser.py index fbd7159..71e4351 100644 --- a/ytops_client/stress_policy/arg_parser.py +++ b/ytops_client-source/ytops_client/stress_policy/arg_parser.py @@ -182,9 +182,11 @@ Overridable Policy Parameters via --set: parser.add_argument('--print-downloader-log', action='store_true', help='Stream the live stdout/stderr from the download subprocess to the console.') parser.add_argument('--dry-run', action='store_true', help='Print the effective policy and exit without running the test.') parser.add_argument('--dummy', action='store_true', help='Simulate auth and download without running external commands. Used to test profile management logic.\nDummy behavior (e.g., failure rates, durations) can be configured in the policy file under settings.dummy_simulation_settings.') + parser.add_argument('--dummy-batch', action='store_true', help="[Dummy Mode] Simulate batch modes ('direct_batch_cli', 'direct_docker_cli') by creating dummy info.json files without running yt-dlp. Updates profile counters for each simulated URL.") parser.add_argument('--dummy-auth-failure-rate', type=float, default=0.0, help='[Dummy Mode] The probability (0.0 to 1.0) of a simulated auth request failing fatally.') parser.add_argument('--dummy-auth-skipped-failure-rate', type=float, default=0.0, help='[Dummy Mode] The probability (0.0 to 1.0) of a simulated auth request having a tolerated failure (e.g., 429).') parser.add_argument('--disable-log-writing', action='store_true', help='Disable writing state, stats, and log files. By default, files are created for each run.') + parser.add_argument('--requeue-failed', action='store_true', help='[Queue Modes] Requeue all tasks from the failure queues back into the inbox before starting.') # Add a group for download-specific utilities download_util_group = parser.add_argument_group('Download Mode Utilities') @@ -208,6 +210,7 @@ Overridable Policy Parameters via --set: redis_group.add_argument('--redis-host', default=None, help='Redis host. Defaults to REDIS_HOST or MASTER_HOST_IP env var, or localhost.') redis_group.add_argument('--redis-port', type=int, default=None, help='Redis port. Defaults to REDIS_PORT env var, or 6379.') redis_group.add_argument('--redis-password', default=None, help='Redis password. Defaults to REDIS_PASSWORD env var.') + redis_group.add_argument('--redis-db', type=int, default=None, help='Redis DB number. Defaults to REDIS_DB env var, or 0.') redis_group.add_argument('--env', default=None, help="Default environment name for Redis key prefix (e.g., 'stg', 'prod'). Used if --auth-env or --download-env are not specified. Overrides policy file setting.") redis_group.add_argument('--auth-env', help="Override the environment for the Auth simulation. Overrides --env.") redis_group.add_argument('--download-env', help="Override the environment for the Download simulation. Overrides --env.") diff --git a/ytops_client/stress_policy/process_runners.py b/ytops_client-source/ytops_client/stress_policy/process_runners.py similarity index 100% rename from ytops_client/stress_policy/process_runners.py rename to ytops_client-source/ytops_client/stress_policy/process_runners.py diff --git a/ytops_client-source/ytops_client/stress_policy/queue_provider.py b/ytops_client-source/ytops_client/stress_policy/queue_provider.py new file mode 100644 index 0000000..85da619 --- /dev/null +++ b/ytops_client-source/ytops_client/stress_policy/queue_provider.py @@ -0,0 +1,470 @@ +""" +Queue provider for stress policy tool. + +This module provides interfaces and implementations for queue operations, +supporting both authentication and download workflows. +""" + +import json +import logging +import time +from abc import ABC, abstractmethod +from typing import Dict, List, Optional, Any, Tuple, Union + +import redis + +logger = logging.getLogger(__name__) + + +class QueueProvider(ABC): + """Abstract base class for queue operations.""" + + @abstractmethod + def get_task(self, queue_name: str) -> Optional[Dict]: + """Get a task from the specified queue.""" + pass + + @abstractmethod + def get_tasks_batch(self, queue_name: str, batch_size: int) -> List[Dict]: + """Get a batch of tasks from the specified queue.""" + pass + + @abstractmethod + def report_success(self, queue_name: str, task_id: str, result: Dict) -> bool: + """Report a successful task completion.""" + pass + + @abstractmethod + def report_failure(self, queue_name: str, task_id: str, error: Dict) -> bool: + """Report a task failure.""" + pass + + @abstractmethod + def report_skipped(self, queue_name: str, task_id: str, reason: Dict) -> bool: + """Report a task that was skipped.""" + pass + + @abstractmethod + def mark_in_progress(self, queue_name: str, task_id: str, worker_id: str) -> bool: + """Mark a task as in progress.""" + pass + + @abstractmethod + def remove_in_progress(self, queue_name: str, task_id: str) -> bool: + """Remove a task from the in-progress tracking.""" + pass + + @abstractmethod + def get_queue_length(self, queue_name: str) -> int: + """Get the current length of a queue.""" + pass + + @abstractmethod + def add_task(self, queue_name: str, task: Dict) -> bool: + """Add a task to a queue.""" + pass + + @abstractmethod + def add_tasks_batch(self, queue_name: str, tasks: List[Dict]) -> int: + """Add a batch of tasks to a queue. Returns number of tasks added.""" + pass + + +class RedisQueueProvider(QueueProvider): + """Redis implementation of the QueueProvider interface.""" + + def __init__(self, redis_host: str = "localhost", redis_port: int = 6379, + redis_password: Optional[str] = None, redis_db: int = 0, + env_prefix: Optional[str] = None): + """Initialize the Redis queue provider.""" + prefix = f"{env_prefix}_" if env_prefix else "" + + # Queue name constants + # Authentication stage + self.AUTH_INBOX = f"{prefix}queue2_auth_inbox" + self.AUTH_RESULT = f"{prefix}queue2_auth_result" + self.AUTH_FAIL = f"{prefix}queue2_auth_fail" + self.AUTH_SKIPPED = f"{prefix}queue2_auth_skipped" + self.AUTH_PROGRESS = f"{prefix}queue2_auth_progress" + + # Download stage + self.DL_TASKS = f"{prefix}queue2_dl_inbox" + self.DL_RESULT = f"{prefix}queue2_dl_result" + self.DL_FAIL = f"{prefix}queue2_dl_fail" + self.DL_SKIPPED = f"{prefix}queue2_dl_skipped" + self.DL_PROGRESS = f"{prefix}queue2_dl_progress" + + self.redis_client = redis.Redis( + host=redis_host, + port=redis_port, + password=redis_password, + db=redis_db, + decode_responses=True + ) + self._validate_connection() + + def _validate_connection(self) -> None: + """Validate the Redis connection.""" + try: + self.redis_client.ping() + logger.info("Successfully connected to Redis") + except redis.ConnectionError as e: + logger.error(f"Failed to connect to Redis: {e}") + raise + + def get_task(self, queue_name: str) -> Optional[Dict]: + """Get a task from the specified queue. + + For LIST type queues, this pops an item. + For HASH type queues, this just reads an item without removing it. + """ + try: + queue_type = self._get_queue_type(queue_name) + + if queue_type == "list": + # BRPOP with a timeout of 1 second + result = self.redis_client.brpop(queue_name, timeout=1) + if result: + _, task_data = result + try: + # Assume it's a JSON object, which is the standard format. + return json.loads(task_data) + except json.JSONDecodeError: + # If it fails, check if it's the auth inbox queue and a plain string. + # This provides backward compatibility with queues populated with raw URLs. + if queue_name == self.AUTH_INBOX and isinstance(task_data, str): + logger.debug(f"Task from '{queue_name}' is a plain string. Wrapping it in a task dictionary.") + return {"url": task_data} + else: + # If it's not the auth inbox or not a string, log and re-raise. + logger.error(f"Failed to decode JSON task from queue '{queue_name}': {task_data}") + raise + return None + + elif queue_type == "hash": + # For hash queues, we just get a random key + keys = self.redis_client.hkeys(queue_name) + if not keys: + return None + + # Get a random key + import random + key = random.choice(keys) + value = self.redis_client.hget(queue_name, key) + + if value: + task = json.loads(value) + task["id"] = key # Add the key as id + return task + + return None + + else: + logger.warning(f"Unsupported queue type for {queue_name}: {queue_type}") + return None + + except Exception as e: + logger.error(f"Error getting task from {queue_name}: {e}") + return None + + def get_tasks_batch(self, queue_name: str, batch_size: int) -> List[Dict]: + """Get a batch of tasks from the specified queue.""" + tasks = [] + try: + queue_type = self._get_queue_type(queue_name) + + if queue_type == "list": + # Use pipeline for efficiency + pipe = self.redis_client.pipeline() + for _ in range(batch_size): + pipe.rpop(queue_name) + results = pipe.execute() + + for result in results: + if result: + try: + tasks.append(json.loads(result)) + except json.JSONDecodeError: + if queue_name == self.AUTH_INBOX and isinstance(result, str): + tasks.append({"url": result}) + else: + logger.error(f"Failed to decode JSON task from batch in queue '{queue_name}': {result}") + # In batch mode, we skip the malformed item and continue. + + elif queue_type == "hash": + # For hash queues, get multiple random keys + keys = self.redis_client.hkeys(queue_name) + if not keys: + return [] + + # Get random keys up to batch_size + import random + selected_keys = random.sample(keys, min(batch_size, len(keys))) + + # Use pipeline for efficiency + pipe = self.redis_client.pipeline() + for key in selected_keys: + pipe.hget(queue_name, key) + results = pipe.execute() + + for i, result in enumerate(results): + if result: + task = json.loads(result) + task["id"] = selected_keys[i] # Add the key as id + tasks.append(task) + + else: + logger.warning(f"Unsupported queue type for batch operations: {queue_name}") + + except Exception as e: + logger.error(f"Error getting tasks batch from {queue_name}: {e}") + + return tasks + + def report_success(self, queue_name: str, task_id: str, result: Dict) -> bool: + """Report a successful task completion.""" + try: + # Ensure task_id is included in the result + result["task_id"] = task_id + result["timestamp"] = time.time() + + # Store in the success hash + self.redis_client.hset(queue_name, task_id, json.dumps(result)) + return True + except Exception as e: + logger.error(f"Error reporting success to {queue_name}: {e}") + return False + + def report_failure(self, queue_name: str, task_id: str, error: Dict) -> bool: + """Report a task failure.""" + try: + # Ensure task_id is included in the error + error["task_id"] = task_id + error["timestamp"] = time.time() + + # Store in the failure hash + self.redis_client.hset(queue_name, task_id, json.dumps(error)) + return True + except Exception as e: + logger.error(f"Error reporting failure to {queue_name}: {e}") + return False + + def report_skipped(self, queue_name: str, task_id: str, reason: Dict) -> bool: + """Report a task that was skipped.""" + try: + # Ensure task_id is included in the reason + reason["task_id"] = task_id + reason["timestamp"] = time.time() + + # Store in the skipped hash + self.redis_client.hset(queue_name, task_id, json.dumps(reason)) + return True + except Exception as e: + logger.error(f"Error reporting skipped to {queue_name}: {e}") + return False + + def mark_in_progress(self, queue_name: str, task_id: str, worker_id: str) -> bool: + """Mark a task as in progress.""" + try: + progress_data = { + "task_id": task_id, + "worker_id": worker_id, + "start_time": time.time() + } + + # Store in the progress hash + self.redis_client.hset(queue_name, task_id, json.dumps(progress_data)) + return True + except Exception as e: + logger.error(f"Error marking task in progress in {queue_name}: {e}") + return False + + def remove_in_progress(self, queue_name: str, task_id: str) -> bool: + """Remove a task from the in-progress tracking.""" + try: + # Remove from the progress hash + self.redis_client.hdel(queue_name, task_id) + return True + except Exception as e: + logger.error(f"Error removing task from progress in {queue_name}: {e}") + return False + + def get_queue_length(self, queue_name: str) -> int: + """Get the current length of a queue.""" + try: + queue_type = self._get_queue_type(queue_name) + + if queue_type == "list": + return self.redis_client.llen(queue_name) + elif queue_type == "hash": + return self.redis_client.hlen(queue_name) + else: + logger.warning(f"Unsupported queue type for {queue_name}: {queue_type}") + return 0 + except Exception as e: + logger.error(f"Error getting queue length for {queue_name}: {e}") + return 0 + + def add_task(self, queue_name: str, task: Dict) -> bool: + """Add a task to a queue.""" + try: + queue_type = self._get_queue_type(queue_name) + + if queue_type == "list": + # For list queues, we push to the left (LPUSH) + self.redis_client.lpush(queue_name, json.dumps(task)) + return True + + elif queue_type == "hash": + # For hash queues, we need a task_id + task_id = task.get("id") or task.get("task_id") + if not task_id: + logger.error(f"Cannot add task to hash queue {queue_name} without an id") + return False + + self.redis_client.hset(queue_name, task_id, json.dumps(task)) + return True + + else: + logger.warning(f"Unsupported queue type for {queue_name}: {queue_type}") + return False + + except Exception as e: + logger.error(f"Error adding task to {queue_name}: {e}") + return False + + def add_tasks_batch(self, queue_name: str, tasks: List[Dict]) -> int: + """Add a batch of tasks to a queue. Returns number of tasks added.""" + if not tasks: + return 0 + + try: + queue_type = self._get_queue_type(queue_name) + + if queue_type == "list": + # Use pipeline for efficiency + pipe = self.redis_client.pipeline() + for task in tasks: + pipe.lpush(queue_name, json.dumps(task)) + results = pipe.execute() + return len([r for r in results if r]) + + elif queue_type == "hash": + # Use pipeline for efficiency + pipe = self.redis_client.pipeline() + added_count = 0 + + for task in tasks: + task_id = task.get("id") or task.get("task_id") + if task_id: + pipe.hset(queue_name, task_id, json.dumps(task)) + added_count += 1 + else: + logger.warning(f"Skipping task without id for hash queue {queue_name}") + + pipe.execute() + return added_count + + else: + logger.warning(f"Unsupported queue type for batch operations: {queue_name}") + return 0 + + except Exception as e: + logger.error(f"Error adding tasks batch to {queue_name}: {e}") + return 0 + + def _get_queue_type(self, queue_name: str) -> str: + """Determine the Redis data type of a queue.""" + try: + queue_type = self.redis_client.type(queue_name) + if not queue_type or queue_type == "none": + # Queue doesn't exist yet, infer type from name + if queue_name.endswith(("_inbox", "_tasks")): + return "list" + else: + return "hash" + return queue_type + except Exception as e: + logger.error(f"Error determining queue type for {queue_name}: {e}") + return "unknown" + + def requeue_failed_tasks(self, source_queue: str, target_queue: str, + batch_size: int = 100) -> int: + """Requeue failed tasks from a failure queue to an inbox queue.""" + try: + # Get failed tasks + failed_tasks = self.get_tasks_batch(source_queue, batch_size) + if not failed_tasks: + return 0 + + # Prepare tasks for requeuing + requeued_count = 0 + requeue_tasks = [] + + for task in failed_tasks: + # Extract the original URL or task data + url = task.get("url") + if url: + # For auth failures, we just need the URL + requeue_tasks.append({"url": url}) + requeued_count += 1 + else: + # For download failures, we need the original task data + original_task = task.get("original_task") + if original_task: + requeue_tasks.append(original_task) + requeued_count += 1 + + # Add tasks to target queue + if requeue_tasks: + self.add_tasks_batch(target_queue, requeue_tasks) + + # Remove from source queue + pipe = self.redis_client.pipeline() + for task in failed_tasks: + task_id = task.get("id") or task.get("task_id") + if task_id: + pipe.hdel(source_queue, task_id) + pipe.execute() + + return requeued_count + + except Exception as e: + logger.error(f"Error requeuing failed tasks from {source_queue} to {target_queue}: {e}") + return 0 + + def get_queue_stats(self) -> Dict[str, Dict[str, int]]: + """Get statistics for all queues.""" + stats = {} + + # Authentication queues + auth_queues = { + "auth_inbox": self.AUTH_INBOX, + "auth_result": self.AUTH_RESULT, + "auth_fail": self.AUTH_FAIL, + "auth_skipped": self.AUTH_SKIPPED, + "auth_progress": self.AUTH_PROGRESS + } + + # Download queues + dl_queues = { + "dl_tasks": self.DL_TASKS, + "dl_result": self.DL_RESULT, + "dl_fail": self.DL_FAIL, + "dl_skipped": self.DL_SKIPPED, + "dl_progress": self.DL_PROGRESS + } + + # Get stats for auth queues + auth_stats = {} + for name, queue in auth_queues.items(): + auth_stats[name] = self.get_queue_length(queue) + stats["auth"] = auth_stats + + # Get stats for download queues + dl_stats = {} + for name, queue in dl_queues.items(): + dl_stats[name] = self.get_queue_length(queue) + stats["download"] = dl_stats + + return stats diff --git a/ytops_client-source/ytops_client/stress_policy/queue_workers.py b/ytops_client-source/ytops_client/stress_policy/queue_workers.py new file mode 100644 index 0000000..1a5857f --- /dev/null +++ b/ytops_client-source/ytops_client/stress_policy/queue_workers.py @@ -0,0 +1,579 @@ +""" +Queue-based worker functions for the stress policy tool. + +This module contains worker functions that process tasks from Redis queues +instead of files, supporting both authentication and download workflows. +""" + +import json +import logging +import os +import random +import re +import shlex +import sys +import tempfile +import shutil +import threading +import time +from copy import deepcopy +from datetime import datetime, timezone +from pathlib import Path +from typing import Dict, List, Optional, Any, Tuple, Union + +from . import utils as sp_utils +from .process_runners import run_command, run_docker_container, get_worker_id +from .workers import get_auth_manager +from .queue_provider import RedisQueueProvider + +logger = logging.getLogger(__name__) + + +def run_queue_auth_worker(worker_id, policy, state_manager, args, profile_manager_instance, running_processes, process_lock): + """Worker function for processing authentication tasks from a queue in batches.""" + owner_id = f"queue-auth-worker-{worker_id}" + settings = policy.get('settings', {}) + exec_control = policy.get('execution_control', {}) + gen_policy = policy.get('info_json_generation_policy', {}) + queue_policy = policy.get('queue_policy', {}) + + profile_prefix = gen_policy.get('profile_prefix') + if not profile_prefix: + logger.error(f"[Worker {worker_id}] Queue auth mode requires 'info_json_generation_policy.profile_prefix'. Worker exiting.") + return [] + + save_dir = settings.get('save_info_json_dir') + if not save_dir and queue_policy.get('formats_to_download'): + save_dir = os.path.join('run', 'stress_policy', 'info_jsons') + logger.info(f"[Worker {worker_id}] 'formats_to_download' is set and 'save_info_json_dir' is not, defaulting to '{save_dir}'") + + if save_dir: + os.makedirs(save_dir, exist_ok=True) + logger.info(f"[Worker {worker_id}] Will save info.json files to '{save_dir}'") + + batch_size = queue_policy.get('batch_size', 1) + logger.info(f"[Worker {worker_id}] Auth worker configured to process tasks in batches of {batch_size}.") + task_counter = 0 + + while not state_manager.shutdown_event.is_set(): + locked_profile = None + tasks = [] + + try: + # 1. Lock a profile FIRST + locked_profile = profile_manager_instance.lock_profile(owner=owner_id, profile_prefix=profile_prefix) + if not locked_profile: + polling_interval = exec_control.get('worker_polling_interval_seconds', 1) + logger.debug(f"[Worker {worker_id}] No profiles available to lock. Sleeping for {polling_interval}s.") + time.sleep(polling_interval) + continue + + profile_name = locked_profile['name'] + proxy_url = locked_profile['proxy'] + + # 2. Get a batch of tasks from the queue + tasks = state_manager.get_auth_tasks_batch(batch_size) + if not tasks: + polling_interval = exec_control.get('worker_polling_interval_seconds', 1) + logger.debug(f"[Worker {worker_id}] No tasks available for profile '{profile_name}'. Unlocking and sleeping for {polling_interval}s.") + # Unlock immediately since we have no work to do. + # No cooldown is applied here to make the profile available again quickly. + profile_manager_instance.unlock_profile(profile_name, owner=owner_id) + locked_profile = None # To prevent double-unlock in finally + time.sleep(polling_interval) + continue + + logger.info(f"[Worker {worker_id}] Locked profile '{profile_name}' to process a batch of {len(tasks)} tasks.") + + # 3. Process each task in the batch + for task in tasks: + if state_manager.shutdown_event.is_set(): + logger.info(f"[Worker {worker_id}] Shutdown requested, stopping batch processing for profile '{profile_name}'.") + break + + temp_task_dir = None + task_id = None + url = None + try: + temp_task_dir = tempfile.mkdtemp(prefix=f"queue-auth-{worker_id}-") + task_id = task.get('id') or task.get('task_id') + if not task_id: + task_id = f"task_{worker_id}_{task_counter}" + task_counter += 1 + task['task_id'] = task_id + + url = task.get('url') + if not url: + logger.error(f"[Worker {worker_id}] Task {task_id} has no URL. Skipping.") + state_manager.report_auth_skipped(task_id, {"error": "No URL in task", "task": task}) + continue + + logger.info(f"[Worker {worker_id}] [{profile_name}] Processing task {task_id}: {url}") + state_manager.mark_auth_in_progress(task_id, owner_id) + + # --- Main processing logic for a single task --- + success, info_data, stderr, retcode = False, None, "", 0 + if args.dummy or args.dummy_batch: + logger.info(f"[Worker {worker_id}] [{profile_name}] DUMMY MODE: Simulating auth for {url}") + dummy_settings = policy.get('settings', {}).get('dummy_simulation_settings', {}) + min_seconds = dummy_settings.get('auth_min_seconds', 0.1) + max_seconds = dummy_settings.get('auth_max_seconds', 0.5) + failure_rate = args.dummy_auth_failure_rate or dummy_settings.get('auth_failure_rate', 0.0) + skipped_rate = args.dummy_auth_skipped_failure_rate or dummy_settings.get('auth_skipped_failure_rate', 0.0) + time.sleep(random.uniform(min_seconds, max_seconds)) + + rand_val = random.random() + if rand_val < skipped_rate: + stderr = "Dummy skipped failure" + elif rand_val < (skipped_rate + failure_rate): + stderr = "Dummy fatal failure" + else: + success = True + video_id = sp_utils.get_video_id(url) or f"dummy_{random.randint(1000, 9999)}" + info_data = {'id': video_id, 'title': f'Dummy Video {video_id}', '_dummy': True, 'formats': [{'format_id': '18'}, {'format_id': '140'}]} + else: + client, req_params = state_manager.get_client_for_request(profile_name, gen_policy) + cmd = [ + sys.executable, '-m', 'ytops_client.cli', 'get-info', + '--client', client, '--profile', profile_name + ] + if proxy_url: + cmd.extend(['--proxy', proxy_url]) + if req_params: + cmd.extend(['--request-params', json.dumps(req_params)]) + extra_args = gen_policy.get('extra_args') + if extra_args: + cmd.extend(shlex.split(extra_args)) + + # The URL must be the last positional argument + cmd.append(url) + + logger.info(f"[Worker {worker_id}] Running command: {' '.join(shlex.quote(s) for s in cmd)}") + retcode, stdout, stderr = run_command( + cmd, running_processes, process_lock, stream_output=args.verbose, + stream_prefix=f"[Worker {worker_id} | get-info] " + ) + success = (retcode == 0) + if success: + info_json_path = next((line.strip() for line in stdout.strip().split('\n') if line.endswith('.json') and os.path.exists(line.strip())), None) + if info_json_path: + try: + with open(info_json_path, 'r', encoding='utf-8') as f: + info_data = json.load(f) + except (IOError, json.JSONDecodeError) as e: + logger.error(f"[Worker {worker_id}] Failed to read/parse info.json from get-info: {e}") + success = False + stderr += f"\nFailed to read/parse info.json: {e}" + else: + logger.error(f"[Worker {worker_id}] Command succeeded but no info.json path found in output.") + success = False + stderr += "\nNo info.json path in output" + + # --- Result processing for a single task --- + if success and info_data: + try: + auth_env_name = profile_manager_instance.key_prefix.replace('_profile_mgmt_', '') + info_data['_ytops_metadata'] = { + 'profile_name': profile_name, 'proxy_url': proxy_url, + 'generation_timestamp_utc': datetime.now(timezone.utc).isoformat(), + 'task_id': task_id, 'url': url, 'auth_env': auth_env_name + } + + final_path = None + if save_dir: + video_id = info_data.get('id', 'unknown') + sanitized_proxy = re.sub(r'[:/]', '_', proxy_url) if proxy_url else 'noproxy' + new_name = f"{video_id}-{profile_name}-{sanitized_proxy}.json" + final_path = os.path.join(save_dir, new_name) + with open(final_path, 'w', encoding='utf-8') as f: + json.dump(info_data, f, indent=2) + logger.info(f"[Worker {worker_id}] [{profile_name}] Saved info.json to '{final_path}'") + else: + # This case means auth-only (no downloads) and no save_dir specified. + # The info.json is not persisted. + logger.debug(f"[Worker {worker_id}] [{profile_name}] Auth-only task succeeded. No save_dir, so info.json is not saved.") + + profile_manager_instance.record_activity(profile_name, 'success') + + formats_to_download = queue_policy.get('formats_to_download') + download_tasks = [] + if formats_to_download: + task_formats = [] + if formats_to_download == 'all': + task_formats = [f['format_id'] for f in info_data.get('formats', [])] + elif isinstance(formats_to_download, list): + task_formats = formats_to_download + else: + task_formats = [str(formats_to_download)] + + for format_id in task_formats: + download_tasks.append({ + 'info_json_path': final_path, 'format_id': format_id, + 'video_id': info_data.get('id'), 'url': url, + 'auth_profile_name': profile_name, 'proxy_url': proxy_url, + 'auth_env': auth_env_name, + 'original_task': task + }) + + if download_tasks: + added_count = state_manager.add_download_tasks_batch(download_tasks) + logger.info(f"[Worker {worker_id}] [{profile_name}] Added {added_count} download tasks to queue") + profile_manager_instance.increment_pending_downloads(profile_name, count=added_count) + + state_manager.report_auth_success(task_id, { + "url": url, "video_id": info_data.get('id'), "profile_name": profile_name, + "proxy_url": proxy_url, "info_json_path": final_path, + "download_tasks_created": len(download_tasks) + }) + except Exception as e: + logger.error(f"[Worker {worker_id}] [{profile_name}] Error processing successful auth result: {e}", exc_info=True) + profile_manager_instance.record_activity(profile_name, 'failure') + state_manager.report_auth_failure(task_id, {"error": f"Error processing info.json: {str(e)}", "url": url}) + else: + is_bot_error = "Sign in to confirm you're not a bot" in stderr + is_timeout_error = "Read timed out" in stderr + is_unavailable = "This video is unavailable" in stderr or "Video unavailable" in stderr + is_private = "This video is private" in stderr or "Private video" in stderr + is_deleted = "This video has been removed" in stderr + is_dummy_skipped = "Dummy skipped failure" in stderr + + if is_unavailable or is_private or is_deleted or is_dummy_skipped: + reason = "Video unavailable" if is_unavailable else "Private video" if is_private else "Video removed" if is_deleted else "Dummy skipped" + logger.warning(f"[Worker {worker_id}] [{profile_name}] Auth skipped for {url}: {reason}") + profile_manager_instance.record_activity(profile_name, 'tolerated_error') + state_manager.report_auth_skipped(task_id, {"url": url, "reason": reason, "stderr": stderr}) + else: + error_type = "Bot detection" if is_bot_error else "Timeout" if is_timeout_error else "Dummy fatal failure" if "Dummy fatal failure" in stderr else f"Exit code {retcode}" + logger.error(f"[Worker {worker_id}] [{profile_name}] Authentication failed ({error_type}): {url}") + profile_manager_instance.record_activity(profile_name, 'failure') + state_manager.report_auth_failure(task_id, {"url": url, "error_type": error_type, "stderr": stderr, "exit_code": retcode}) + + except Exception as e: + logger.error(f"[Worker {worker_id}] [{profile_name}] Unexpected error processing task {task_id}: {e}", exc_info=True) + if task_id: + state_manager.report_auth_failure(task_id, {"error": f"Unexpected error: {str(e)}", "url": url or "unknown"}) + profile_manager_instance.record_activity(profile_name, 'failure') + finally: + if temp_task_dir and os.path.exists(temp_task_dir): + shutil.rmtree(temp_task_dir) + if task_id: + state_manager.remove_auth_in_progress(task_id) + + except Exception as e: + logger.error(f"[Worker {worker_id}] Unexpected error in outer worker loop: {e}", exc_info=True) + if locked_profile: + profile_manager_instance.record_activity(locked_profile['name'], 'failure') + + finally: + if locked_profile: + cooldown = None + cooldown_config = profile_manager_instance.get_config('unlock_cooldown_seconds') + if cooldown_config: + try: + val = json.loads(cooldown_config) + if isinstance(val, list) and len(val) == 2 and val[0] < val[1]: + cooldown = random.randint(val[0], val[1]) + elif isinstance(val, int): + cooldown = val + except (json.JSONDecodeError, TypeError): + if cooldown_config.isdigit(): + cooldown = int(cooldown_config) + + if cooldown: + logger.info(f"[Worker {worker_id}] Putting profile '{locked_profile['name']}' into COOLDOWN for {cooldown}s.") + + profile_manager_instance.unlock_profile( + locked_profile['name'], + owner=owner_id, + rest_for_seconds=cooldown + ) + + logger.info(f"[Worker {worker_id}] Queue auth worker exiting.") + return [] + + +def run_queue_download_worker(worker_id, policy, state_manager, args, profile_manager_instance, running_processes, process_lock): + """Worker function for processing download tasks from a queue.""" + owner_id = f"queue-dl-worker-{worker_id}" + settings = policy.get('settings', {}) + exec_control = policy.get('execution_control', {}) + d_policy = policy.get('download_policy', {}) + queue_policy = policy.get('queue_policy', {}) + + profile_prefix = d_policy.get('profile_prefix') + if not profile_prefix: + logger.error(f"[Worker {worker_id}] Queue download mode requires 'download_policy.profile_prefix'. Worker exiting.") + return [] + + output_dir = d_policy.get('output_dir') + if output_dir: + os.makedirs(output_dir, exist_ok=True) + logger.info(f"[Worker {worker_id}] Will save downloads to '{output_dir}'") + + task_counter = 0 + + while not state_manager.shutdown_event.is_set(): + locked_profile = None + temp_task_dir = None + task = None + auth_profile_name, auth_env = None, None + + try: + task = state_manager.get_download_task() + if not task: + polling_interval = exec_control.get('worker_polling_interval_seconds', 1) + logger.debug(f"[Worker {worker_id}] No download tasks available in queue. Sleeping for {polling_interval}s.") + time.sleep(polling_interval) + continue + + task_id = task.get('id') or task.get('task_id') + if not task_id: + task_id = f"dl_task_{worker_id}_{task_counter}" + task_counter += 1 + task['task_id'] = task_id + + info_json_path = task.get('info_json_path') + format_id = task.get('format_id') + video_id = task.get('video_id') + url = task.get('url') + + # Get auth metadata for decrementing counter later. + # Primary source is the task itself. Fallback to info.json for older tasks. + auth_profile_name = task.get('auth_profile_name') + auth_env = task.get('auth_env') + + if (not auth_profile_name or not auth_env): + if info_json_path and os.path.exists(info_json_path): + try: + with open(info_json_path, 'r', encoding='utf-8') as f: + info_data = json.load(f) + metadata = info_data.get('_ytops_metadata', {}) + auth_profile_name = auth_profile_name or metadata.get('profile_name') + auth_env = auth_env or metadata.get('auth_env') + logger.debug(f"Read auth metadata from info.json fallback: profile={auth_profile_name}, env={auth_env}") + except (IOError, json.JSONDecodeError) as e: + logger.warning(f"[Worker {worker_id}] Could not read info.json to get auth metadata: {e}") + else: + logger.warning(f"[Worker {worker_id}] Task missing auth metadata and info.json is missing or not specified. Pending downloads counter will not be decremented.") + + # In dummy mode, the info.json file doesn't need to exist locally. + if not (args.dummy or args.dummy_batch): + if not info_json_path or not os.path.exists(info_json_path): + logger.error(f"[Worker {worker_id}] Task {task_id} has invalid info_json_path: {info_json_path}. Skipping.") + state_manager.report_download_skipped(task_id, {"error": "Invalid info_json_path", "task": task}) + # The finally block will handle decrementing the pending downloads counter. + continue + + if not format_id: + logger.error(f"[Worker {worker_id}] Task {task_id} has no format_id. Skipping.") + state_manager.report_download_skipped(task_id, {"error": "No format_id in task", "task": task}) + continue + + + logger.info(f"[Worker {worker_id}] Processing download task {task_id}: {video_id or url} format {format_id}") + state_manager.mark_download_in_progress(task_id, owner_id) + + specific_profile = task.get('auth_profile_name') or task.get('profile_name') # 'profile_name' for backward compatibility + if specific_profile: + locked_profile = profile_manager_instance.lock_profile(owner=owner_id, specific_profile_name=specific_profile) + if not locked_profile: + logger.warning(f"[Worker {worker_id}] Could not lock specific profile '{specific_profile}'. Trying any profile with prefix.") + locked_profile = profile_manager_instance.lock_profile(owner=owner_id, profile_prefix=profile_prefix) + else: + locked_profile = profile_manager_instance.lock_profile(owner=owner_id, profile_prefix=profile_prefix) + + if not locked_profile: + polling_interval = exec_control.get('worker_polling_interval_seconds', 1) + logger.warning(f"[Worker {worker_id}] No profiles available for task {task_id}. Re-queueing and sleeping for {polling_interval}s.") + # Re-queue the task by adding it back to the inbox. + state_manager.add_download_tasks_batch([task]) + # The 'in_progress' marker for this attempt will be cleaned up by the finally block. + time.sleep(polling_interval) + continue + + profile_name = locked_profile['name'] + proxy_url = locked_profile['proxy'] + logger.info(f"[Worker {worker_id}] Locked profile '{profile_name}' with proxy '{proxy_url}'") + + temp_task_dir = tempfile.mkdtemp(prefix=f"queue-dl-{worker_id}-") + success = False + downloaded_filepath = None + stderr = "" + + if args.dummy or args.dummy_batch: + logger.info(f"[Worker {worker_id}] [{profile_name}] DUMMY MODE: Simulating download for {video_id or url} format {format_id}") + dummy_settings = policy.get('settings', {}).get('dummy_simulation_settings', {}) + min_seconds = dummy_settings.get('download_min_seconds', 1.0) + max_seconds = dummy_settings.get('download_max_seconds', 3.0) + failure_rate = dummy_settings.get('download_failure_rate', 0.0) + skipped_rate = dummy_settings.get('download_skipped_failure_rate', 0.0) + time.sleep(random.uniform(min_seconds, max_seconds)) + + rand_val = random.random() + if rand_val < skipped_rate: + stderr = "Dummy skipped failure" + elif rand_val < (skipped_rate + failure_rate): + stderr = "Dummy fatal failure" + else: + success = True + downloaded_filepath = f"/dev/null/{video_id}.mp4" + # In dummy mode, the info.json is a read-only artifact, just like in + # the non-dummy Airflow path. It is not renamed or deleted to avoid + # race conditions between workers processing different formats for the + # same video. The files can be cleaned up manually between runs. + else: + cmd = [ + sys.executable, '-m', 'ytops_client.cli', 'download', 'py', + '--load-info-json', info_json_path, + '-f', format_id + ] + if proxy_url: + cmd.extend(['--proxy', proxy_url]) + if output_dir: + cmd.extend(['--output-dir', output_dir]) + extra_args = d_policy.get('extra_args') + if extra_args: + cmd.extend(shlex.split(extra_args)) + + logger.info(f"[Worker {worker_id}] Running command: {' '.join(shlex.quote(s) for s in cmd)}") + retcode, stdout, stderr = run_command( + cmd, running_processes, process_lock, stream_output=args.verbose, + stream_prefix=f"[Worker {worker_id} | download] " + ) + success = (retcode == 0) + if success: + for line in stdout.strip().split('\n'): + if os.path.exists(line.strip()): + downloaded_filepath = line.strip() + break + + if success: + # --- Airflow Integration --- + if d_policy.get('output_to_airflow_ready_dir'): + base_path = d_policy.get('airflow_ready_dir_base_path') + if not base_path: + logger.error(f"[Worker {worker_id}] 'output_to_airflow_ready_dir' is true but 'airflow_ready_dir_base_path' is not set. Skipping Airflow output.") + else: + try: + # Create a unique, timestamped directory for the video + ts = datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S_%f') + final_dir = os.path.join(base_path, ts + '_' + (video_id or 'unknown_video')) + os.makedirs(final_dir, exist_ok=True) + + # Copy info.json to avoid a race condition where multiple download workers + # for different formats try to move the same source file. + if info_json_path and os.path.exists(info_json_path): + final_info_json_path = os.path.join(final_dir, os.path.basename(info_json_path)) + shutil.copy(info_json_path, final_info_json_path) + logger.info(f"[Worker {worker_id}] Copied info.json to Airflow-ready dir: {final_info_json_path}") + # Update the path for reporting to point to the new copy + info_json_path = final_info_json_path + + # Move downloaded file (if not in dummy mode) + if downloaded_filepath and os.path.exists(downloaded_filepath): + final_media_path = os.path.join(final_dir, os.path.basename(downloaded_filepath)) + shutil.move(downloaded_filepath, final_media_path) + logger.info(f"[Worker {worker_id}] Moved media file to Airflow-ready dir: {final_media_path}") + # Update the path for reporting + downloaded_filepath = final_media_path + + except Exception as e: + logger.error(f"[Worker {worker_id}] Failed to move files to Airflow-ready directory: {e}", exc_info=True) + + profile_manager_instance.record_activity(profile_name, 'download_success') + state_manager.report_download_success(task_id, { + "video_id": video_id, "url": url, "format_id": format_id, + "profile_name": profile_name, "proxy_url": proxy_url, + "downloaded_filepath": downloaded_filepath, "info_json_path": info_json_path + }) + logger.info(f"[Worker {worker_id}] Download successful: {video_id or url} format {format_id}") + + # --- Rename source info.json to mark as processed --- + if d_policy.get('rename_source_info_json_on_success'): + # Use the original path from the task, as the `info_json_path` variable + # may have been updated by the Airflow logic to point to a copy. + source_path_to_rename = task.get('info_json_path') + if source_path_to_rename and os.path.exists(source_path_to_rename): + try: + processed_path = source_path_to_rename + ".processed" + shutil.move(source_path_to_rename, processed_path) + logger.info(f"[Worker {worker_id}] Renamed source info.json to '{processed_path}'") + except Exception as e: + logger.warning(f"[Worker {worker_id}] Could not rename source info.json '{source_path_to_rename}': {e}") + else: + is_bot_error = "Sign in to confirm you're not a bot" in stderr + is_timeout_error = "Read timed out" in stderr + is_unavailable = "This video is unavailable" in stderr or "Video unavailable" in stderr + is_format_error = "requested format not available" in stderr + + if is_unavailable or is_format_error: + logger.warning(f"[Worker {worker_id}] Download skipped: {video_id or url} format {format_id}") + profile_manager_instance.record_activity(profile_name, 'tolerated_error') + state_manager.report_download_skipped(task_id, { + "video_id": video_id, "url": url, "format_id": format_id, + "reason": "Video unavailable" if is_unavailable else "Format not available", "stderr": stderr + }) + else: + error_type = "Bot detection" if is_bot_error else "Timeout" if is_timeout_error else f"Exit code {retcode}" + logger.error(f"[Worker {worker_id}] Download failed ({error_type}): {video_id or url} format {format_id}") + profile_manager_instance.record_activity(profile_name, 'download_error') + state_manager.report_download_failure(task_id, { + "video_id": video_id, "url": url, "format_id": format_id, + "error_type": error_type, "stderr": stderr, "exit_code": retcode, + "original_task": task + }) + + # Decrement pending downloads counter on the original auth profile, regardless of outcome + if auth_profile_name and auth_env: + auth_manager = get_auth_manager(profile_manager_instance, auth_env) + if auth_manager: + auth_manager.decrement_pending_downloads(auth_profile_name) + else: + logger.error(f"Could not get auth profile manager for env '{auth_env}'. Pending downloads counter will not be decremented.") + elif task: # Only warn if we had a task but couldn't get metadata + logger.warning(f"Could not find auth profile name and/or auth_env in info.json metadata. Pending downloads counter will not be decremented.") + + except Exception as e: + logger.error(f"[Worker {worker_id}] Unexpected error: {e}", exc_info=True) + if task and task_id: + state_manager.report_download_failure(task_id, { + "video_id": video_id if 'video_id' in locals() else "unknown", + "url": url if 'url' in locals() else "unknown", + "format_id": format_id if 'format_id' in locals() else "unknown", + "error": f"Unexpected error: {str(e)}", "original_task": task + }) + if locked_profile: + profile_manager_instance.record_activity(locked_profile['name'], 'download_error') + + finally: + if locked_profile: + cooldown = None + cooldown_config = profile_manager_instance.get_config('unlock_cooldown_seconds') + if cooldown_config: + try: + val = json.loads(cooldown_config) + if isinstance(val, list) and len(val) == 2 and val[0] < val[1]: + cooldown = random.randint(val[0], val[1]) + elif isinstance(val, int): + cooldown = val + except (json.JSONDecodeError, TypeError): + if cooldown_config.isdigit(): + cooldown = int(cooldown_config) + + if cooldown: + logger.info(f"[Worker {worker_id}] Putting profile '{locked_profile['name']}' into COOLDOWN for {cooldown}s.") + + profile_manager_instance.unlock_profile( + locked_profile['name'], + owner=owner_id, + rest_for_seconds=cooldown + ) + + if temp_task_dir and os.path.exists(temp_task_dir): + shutil.rmtree(temp_task_dir) + + if task and task_id: + state_manager.remove_download_in_progress(task_id) + + logger.info(f"[Worker {worker_id}] Queue download worker exiting.") + return [] diff --git a/ytops_client/stress_policy/state_manager.py b/ytops_client-source/ytops_client/stress_policy/state_manager.py similarity index 81% rename from ytops_client/stress_policy/state_manager.py rename to ytops_client-source/ytops_client/stress_policy/state_manager.py index 6605b69..af139c7 100644 --- a/ytops_client/stress_policy/state_manager.py +++ b/ytops_client-source/ytops_client/stress_policy/state_manager.py @@ -7,15 +7,18 @@ import threading import time from datetime import datetime from pathlib import Path +from typing import Dict, List, Optional, Any, Tuple, Union from . import utils as sp_utils +from .queue_provider import QueueProvider, RedisQueueProvider logger = logging.getLogger(__name__) class StateManager: """Tracks statistics, manages rate limits, and persists state across runs.""" - def __init__(self, policy_name, disable_log_writing=False, shutdown_event=None): + def __init__(self, policy_name, disable_log_writing=False, shutdown_event=None, + queue_provider: Optional[QueueProvider] = None): self.disable_log_writing = disable_log_writing self.state_file_path = Path(f"{policy_name}_state.json") self.stats_file_path = Path(f"{policy_name}_stats.jsonl") @@ -42,8 +45,24 @@ class StateManager: 'successful_batches': 0, 'failed_batches': 0, 'total_videos_processed': 0, + # For queue modes + 'queue_stats': { + 'auth': { + 'total_processed': 0, + 'successful': 0, + 'failed': 0, + 'skipped': 0 + }, + 'download': { + 'total_processed': 0, + 'successful': 0, + 'failed': 0, + 'skipped': 0 + } + } } self.stats_file_handle = None + self.queue_provider = queue_provider self._load_state() self.print_historical_summary() self._open_stats_log() @@ -792,3 +811,211 @@ class StateManager: logger.info(f" - {proxy}: {count} attempts (avg this run: {rate_per_hour:.2f}/hour)") logger.info("--------------------") + # --- Queue-specific methods --- + + def initialize_queue_provider(self, redis_host: str, redis_port: int, + redis_password: Optional[str] = None, redis_db: int = 0, + env_prefix: Optional[str] = None): + """Initialize the queue provider if not already set.""" + if not self.queue_provider: + self.queue_provider = RedisQueueProvider( + redis_host=redis_host, + redis_port=redis_port, + redis_password=redis_password, + redis_db=redis_db, + env_prefix=env_prefix + ) + logger.info(f"Initialized Redis queue provider with prefix: '{env_prefix}'") + return self.queue_provider + + def get_auth_task(self) -> Optional[Dict]: + """Get an authentication task from the queue.""" + if not self.queue_provider: + logger.error("Queue provider not initialized") + return None + + task = self.queue_provider.get_task(self.queue_provider.AUTH_INBOX) + if task: + with self.lock: + self.state['queue_stats']['auth']['total_processed'] += 1 + return task + + def get_auth_tasks_batch(self, batch_size: int) -> List[Dict]: + """Get a batch of authentication tasks from the queue.""" + if not self.queue_provider: + logger.error("Queue provider not initialized") + return [] + + tasks = self.queue_provider.get_tasks_batch(self.queue_provider.AUTH_INBOX, batch_size) + if tasks: + with self.lock: + self.state['queue_stats']['auth']['total_processed'] += len(tasks) + return tasks + + def report_auth_success(self, task_id: str, result: Dict) -> bool: + """Report a successful authentication task.""" + if not self.queue_provider: + logger.error("Queue provider not initialized") + return False + + success = self.queue_provider.report_success(self.queue_provider.AUTH_RESULT, task_id, result) + if success: + with self.lock: + self.state['queue_stats']['auth']['successful'] += 1 + return success + + def report_auth_failure(self, task_id: str, error: Dict) -> bool: + """Report an authentication task failure.""" + if not self.queue_provider: + logger.error("Queue provider not initialized") + return False + + success = self.queue_provider.report_failure(self.queue_provider.AUTH_FAIL, task_id, error) + if success: + with self.lock: + self.state['queue_stats']['auth']['failed'] += 1 + return success + + def report_auth_skipped(self, task_id: str, reason: Dict) -> bool: + """Report an authentication task that was skipped.""" + if not self.queue_provider: + logger.error("Queue provider not initialized") + return False + + success = self.queue_provider.report_skipped(self.queue_provider.AUTH_SKIPPED, task_id, reason) + if success: + with self.lock: + self.state['queue_stats']['auth']['skipped'] += 1 + return success + + def mark_auth_in_progress(self, task_id: str, worker_id: str) -> bool: + """Mark an authentication task as in progress.""" + if not self.queue_provider: + logger.error("Queue provider not initialized") + return False + + return self.queue_provider.mark_in_progress(self.queue_provider.AUTH_PROGRESS, task_id, worker_id) + + def remove_auth_in_progress(self, task_id: str) -> bool: + """Remove an authentication task from the in-progress tracking.""" + if not self.queue_provider: + logger.error("Queue provider not initialized") + return False + + return self.queue_provider.remove_in_progress(self.queue_provider.AUTH_PROGRESS, task_id) + + def add_download_task(self, task: Dict) -> bool: + """Add a download task to the queue.""" + if not self.queue_provider: + logger.error("Queue provider not initialized") + return False + + return self.queue_provider.add_task(self.queue_provider.DL_TASKS, task) + + def add_download_tasks_batch(self, tasks: List[Dict]) -> int: + """Add a batch of download tasks to the queue.""" + if not self.queue_provider: + logger.error("Queue provider not initialized") + return 0 + + return self.queue_provider.add_tasks_batch(self.queue_provider.DL_TASKS, tasks) + + def get_download_task(self) -> Optional[Dict]: + """Get a download task from the queue.""" + if not self.queue_provider: + logger.error("Queue provider not initialized") + return None + + task = self.queue_provider.get_task(self.queue_provider.DL_TASKS) + if task: + with self.lock: + self.state['queue_stats']['download']['total_processed'] += 1 + return task + + def get_download_tasks_batch(self, batch_size: int) -> List[Dict]: + """Get a batch of download tasks from the queue.""" + if not self.queue_provider: + logger.error("Queue provider not initialized") + return [] + + tasks = self.queue_provider.get_tasks_batch(self.queue_provider.DL_TASKS, batch_size) + if tasks: + with self.lock: + self.state['queue_stats']['download']['total_processed'] += len(tasks) + return tasks + + def report_download_success(self, task_id: str, result: Dict) -> bool: + """Report a successful download task.""" + if not self.queue_provider: + logger.error("Queue provider not initialized") + return False + + success = self.queue_provider.report_success(self.queue_provider.DL_RESULT, task_id, result) + if success: + with self.lock: + self.state['queue_stats']['download']['successful'] += 1 + return success + + def report_download_failure(self, task_id: str, error: Dict) -> bool: + """Report a download task failure.""" + if not self.queue_provider: + logger.error("Queue provider not initialized") + return False + + success = self.queue_provider.report_failure(self.queue_provider.DL_FAIL, task_id, error) + if success: + with self.lock: + self.state['queue_stats']['download']['failed'] += 1 + return success + + def report_download_skipped(self, task_id: str, reason: Dict) -> bool: + """Report a download task that was skipped.""" + if not self.queue_provider: + logger.error("Queue provider not initialized") + return False + + success = self.queue_provider.report_skipped(self.queue_provider.DL_SKIPPED, task_id, reason) + if success: + with self.lock: + self.state['queue_stats']['download']['skipped'] += 1 + return success + + def mark_download_in_progress(self, task_id: str, worker_id: str) -> bool: + """Mark a download task as in progress.""" + if not self.queue_provider: + logger.error("Queue provider not initialized") + return False + + return self.queue_provider.mark_in_progress(self.queue_provider.DL_PROGRESS, task_id, worker_id) + + def remove_download_in_progress(self, task_id: str) -> bool: + """Remove a download task from the in-progress tracking.""" + if not self.queue_provider: + logger.error("Queue provider not initialized") + return False + + return self.queue_provider.remove_in_progress(self.queue_provider.DL_PROGRESS, task_id) + + def requeue_failed_auth_tasks(self, batch_size: int = 100) -> int: + """Requeue failed authentication tasks.""" + if not self.queue_provider: + logger.error("Queue provider not initialized") + return 0 + + return self.queue_provider.requeue_failed_tasks( + self.queue_provider.AUTH_FAIL, + self.queue_provider.AUTH_INBOX, + batch_size + ) + + def requeue_failed_download_tasks(self, batch_size: int = 100) -> int: + """Requeue failed download tasks.""" + if not self.queue_provider: + logger.error("Queue provider not initialized") + return 0 + + return self.queue_provider.requeue_failed_tasks( + self.queue_provider.DL_FAIL, + self.queue_provider.DL_TASKS, + batch_size + ) diff --git a/ytops_client/stress_policy/utils.py b/ytops_client-source/ytops_client/stress_policy/utils.py similarity index 100% rename from ytops_client/stress_policy/utils.py rename to ytops_client-source/ytops_client/stress_policy/utils.py diff --git a/ytops_client/stress_policy/workers.py b/ytops_client-source/ytops_client/stress_policy/workers.py similarity index 93% rename from ytops_client/stress_policy/workers.py rename to ytops_client-source/ytops_client/stress_policy/workers.py index 42cb176..261787f 100644 --- a/ytops_client/stress_policy/workers.py +++ b/ytops_client-source/ytops_client/stress_policy/workers.py @@ -27,7 +27,7 @@ logger = logging.getLogger(__name__) _auth_manager_cache = {} _auth_manager_lock = threading.Lock() -def _get_auth_manager(current_manager, auth_env: str): +def get_auth_manager(current_manager, auth_env: str): """ Gets a ProfileManager instance for a specific auth simulation environment. It uses the auth_env provided from the info.json metadata. @@ -1171,99 +1171,61 @@ def run_direct_batch_worker(worker_id, policy, state_manager, args, profile_mana if args.verbose and '--verbose' not in cmd: cmd.append('--verbose') - if args.dummy: - # In dummy mode, we replace the real yt-dlp command with our dummy script. - # The dummy script will handle Redis interactions (checking for bans, recording activity). - - # For logging, construct what the real command would have been - log_cmd = list(cmd) # cmd has most args now + if args.dummy_batch: + # In dummy batch mode, we simulate the entire batch process directly. + log_cmd = list(cmd) log_cmd.extend(['-o', os.path.join('temp_dir', output_template_str)]) - logger.info(f"[Worker {worker_id}] [{profile_name}] DUMMY MODE: Would run real command: {' '.join(shlex.quote(s) for s in log_cmd)}") - logger.info(f"[Worker {worker_id}] [{profile_name}] DUMMY MODE: With environment for real command: {custom_env}") + logger.info(f"[Worker {worker_id}] [{profile_name}] DUMMY BATCH MODE: Simulating batch of {len(url_batch)} URLs.") + logger.info(f"[Worker {worker_id}] [{profile_name}] DUMMY BATCH MODE: Would run real command: {' '.join(shlex.quote(s) for s in log_cmd)}") + logger.info(f"[Worker {worker_id}] [{profile_name}] DUMMY BATCH MODE: With environment: {custom_env}") - cmd = [ - sys.executable, '-m', 'ytops_client.cli', - 'yt-dlp-dummy' - ] + dummy_settings = policy.get('settings', {}).get('dummy_simulation_settings', {}) + auth_failure_rate = dummy_settings.get('auth_failure_rate', 0.0) + auth_skipped_rate = dummy_settings.get('auth_skipped_failure_rate', 0.0) + min_seconds = dummy_settings.get('auth_min_seconds', 0.1) + max_seconds = dummy_settings.get('auth_max_seconds', 0.5) - # The orchestrator is still responsible for managing temp directories and post-processing. - with tempfile.TemporaryDirectory(prefix=f"ytdlp-dummy-batch-{worker_id}-") as temp_output_dir: - output_template = os.path.join(temp_output_dir, output_template_str) - cmd.extend(['--batch-file', temp_batch_file]) - cmd.extend(['-o', output_template]) - if args.verbose: - cmd.append('--verbose') + for url in url_batch: + time.sleep(random.uniform(min_seconds, max_seconds)) + video_id = sp_utils.get_video_id(url) or f"dummy_{random.randint(1000, 9999)}" - # Pass failure rates and Redis connection info to the dummy script via environment - dummy_settings = policy.get('settings', {}).get('dummy_simulation_settings', {}) - auth_failure_rate = dummy_settings.get('auth_failure_rate', 0.0) - auth_skipped_rate = dummy_settings.get('auth_skipped_failure_rate', 0.0) - custom_env['YTDLP_DUMMY_FAILURE_RATE'] = auth_failure_rate - custom_env['YTDLP_DUMMY_SKIPPED_FAILURE_RATE'] = auth_skipped_rate - custom_env['REDIS_HOST'] = profile_manager_instance.redis.connection_pool.connection_kwargs.get('host') - custom_env['REDIS_PORT'] = profile_manager_instance.redis.connection_pool.connection_kwargs.get('port') - redis_password = profile_manager_instance.redis.connection_pool.connection_kwargs.get('password') - if redis_password: - custom_env['REDIS_PASSWORD'] = redis_password - - logger.info(f"[Worker {worker_id}] [{profile_name}] DUMMY MODE: Running dummy yt-dlp script with updated environment: {custom_env}") - retcode, stdout, stderr = run_command( - cmd, running_processes, process_lock, env=custom_env, stream_output=args.verbose, - stream_prefix=f"[Worker {worker_id} | yt-dlp-dummy] " - ) - - # --- Post-processing is the same as in non-dummy mode --- - processed_files = list(Path(temp_output_dir).glob('*.json')) - - for temp_path in processed_files: + rand_val = random.random() + if rand_val < auth_skipped_rate: + logger.warning(f"[Worker {worker_id}] [{profile_name}] DUMMY: Simulating tolerated failure for {video_id}.") + profile_manager_instance.record_activity(profile_name, 'tolerated_error') + elif rand_val < (auth_skipped_rate + auth_failure_rate): + logger.warning(f"[Worker {worker_id}] [{profile_name}] DUMMY: Simulating fatal failure for {video_id}.") + profile_manager_instance.record_activity(profile_name, 'failure') + else: + # Success - create dummy info.json + profile_manager_instance.record_activity(profile_name, 'success') files_created += 1 - video_id = "unknown" + info_data = {'id': video_id, 'title': f'Dummy Video {video_id}', '_dummy': True} + env_name = profile_manager_instance.key_prefix.replace('_profile_mgmt_', '') + info_data['_ytops_metadata'] = { + 'profile_name': profile_name, 'proxy_url': proxy_url, + 'generation_timestamp_utc': datetime.now(timezone.utc).isoformat(), + 'auth_env': env_name + } + + final_path = Path(save_dir) / f"{video_id}.info.json" + rename_template = direct_policy.get('rename_file_template') + if rename_template: + sanitized_proxy = re.sub(r'[:/]', '_', proxy_url) + new_name = rename_template.format(video_id=video_id, profile_name=profile_name, proxy=sanitized_proxy) + final_path = Path(save_dir) / new_name + try: - # The orchestrator injects its own metadata after the fact. - with open(temp_path, 'r+', encoding='utf-8') as f: - info_data = json.load(f) - video_id = info_data.get('id', 'unknown') - env_name = profile_manager_instance.key_prefix.replace('_profile_mgmt_', '') - info_data['_ytops_metadata'] = { - 'profile_name': profile_name, - 'proxy_url': proxy_url, - 'generation_timestamp_utc': datetime.now(timezone.utc).isoformat(), - 'auth_env': env_name - } - f.seek(0) + with open(final_path, 'w', encoding='utf-8') as f: json.dump(info_data, f, indent=2) - f.truncate() + logger.info(f"[Worker {worker_id}] [{profile_name}] DUMMY: Created dummy info.json: '{final_path}'") + except IOError as e: + logger.error(f"[Worker {worker_id}] [{profile_name}] DUMMY: Failed to write dummy info.json: {e}") - final_path = Path(save_dir) / temp_path.name - rename_template = direct_policy.get('rename_file_template') - if rename_template: - sanitized_proxy = re.sub(r'[:/]', '_', proxy_url) - new_name = rename_template.format( - video_id=video_id, profile_name=profile_name, proxy=sanitized_proxy - ) - final_path = Path(save_dir) / new_name - - shutil.move(str(temp_path), str(final_path)) - logger.info(f"[Worker {worker_id}] Post-processed and moved info.json to '{final_path}'") - except (IOError, json.JSONDecodeError, OSError) as e: - logger.error(f"[Worker {worker_id}] DUMMY MODE: Error post-processing '{temp_path.name}' (video: {video_id}): {e}") - - # The orchestrator still determines overall batch success and logs its own event. - # It does NOT call record_activity, as the dummy script did that per-URL. - success = (retcode == 0 and files_created > 0) - - if not success: - reason = f"exit code was {retcode}" if retcode != 0 else f"0 files created" - logger.warning(f"[Worker {worker_id}] [{profile_name}] DUMMY MODE: Marking batch as FAILED. Reason: {reason}.") - - # Record batch stats - state_manager.record_batch_result(success, len(url_batch), profile_name=profile_name) - - event_details = f"Dummy batch completed. Files created: {files_created}/{len(url_batch)}." - if not success and stderr: - event_details += f" Stderr: {stderr.strip().splitlines()[-1]}" - event = { 'type': 'fetch_batch', 'profile': profile_name, 'proxy_url': proxy_url, 'success': success, 'details': event_details, 'video_count': len(url_batch) } - state_manager.log_event(event) + success = (files_created > 0) + state_manager.record_batch_result(success, len(url_batch), profile_name=profile_name) + event = { 'type': 'fetch_batch', 'profile': profile_name, 'proxy_url': proxy_url, 'success': success, 'details': f"Dummy batch completed. Files created: {files_created}/{len(url_batch)}.", 'video_count': len(url_batch) } + state_manager.log_event(event) else: with tempfile.TemporaryDirectory(prefix=f"ytdlp-batch-{worker_id}-") as temp_output_dir: @@ -1803,18 +1765,60 @@ def run_direct_docker_worker(worker_id, policy, state_manager, args, profile_man return False - retcode, stdout, stderr, stop_reason = run_docker_container( - image_name=image_name, - command=command, - volumes=volumes, - stream_prefix=f"[Worker {worker_id} | docker-ytdlp] ", - network_name=network_name, - log_callback=log_parser_callback, - profile_manager=profile_manager_instance, - profile_name=profile_name, - environment=environment, - log_command_override=log_command_override - ) + if args.dummy_batch: + logger.info(f"[Worker {worker_id}] [{profile_name}] DUMMY BATCH MODE: Simulating Docker batch of {len(url_batch)} URLs.") + logger.info(f"[Worker {worker_id}] [{profile_name}] DUMMY BATCH MODE: Would run docker command: {' '.join(shlex.quote(s) for s in command)}") + logger.info(f"[Worker {worker_id}] [{profile_name}] DUMMY BATCH MODE: With environment: {environment}") + + dummy_settings = policy.get('settings', {}).get('dummy_simulation_settings', {}) + auth_failure_rate = dummy_settings.get('auth_failure_rate', 0.0) + auth_skipped_rate = dummy_settings.get('auth_skipped_failure_rate', 0.0) + min_seconds = dummy_settings.get('auth_min_seconds', 0.1) + max_seconds = dummy_settings.get('auth_max_seconds', 0.5) + + for url in url_batch: + time.sleep(random.uniform(min_seconds, max_seconds)) + video_id = sp_utils.get_video_id(url) or f"dummy_{random.randint(1000, 9999)}" + + rand_val = random.random() + if rand_val < auth_skipped_rate: + logger.warning(f"[Worker {worker_id}] [{profile_name}] DUMMY: Simulating tolerated failure for {video_id}.") + profile_manager_instance.record_activity(profile_name, 'tolerated_error') + live_tolerated_count += 1 + elif rand_val < (auth_skipped_rate + auth_failure_rate): + logger.warning(f"[Worker {worker_id}] [{profile_name}] DUMMY: Simulating fatal failure for {video_id}.") + profile_manager_instance.record_activity(profile_name, 'failure') + live_failure_count += 1 + else: + # Success - create dummy info.json + profile_manager_instance.record_activity(profile_name, 'success') + live_success_count += 1 + info_data = {'id': video_id, 'title': f'Dummy Video {video_id}', '_dummy': True} + + # Create a dummy file in the temp task dir for post-processing to find + dummy_file_path = Path(temp_task_dir_host) / f"{video_id}.info.json" + try: + with open(dummy_file_path, 'w', encoding='utf-8') as f: + json.dump(info_data, f) + except IOError as e: + logger.error(f"[Worker {worker_id}] [{profile_name}] DUMMY: Failed to write dummy info.json for post-processing: {e}") + + retcode = 0 + stdout, stderr, stop_reason = "", "", None + + else: + retcode, stdout, stderr, stop_reason = run_docker_container( + image_name=image_name, + command=command, + volumes=volumes, + stream_prefix=f"[Worker {worker_id} | docker-ytdlp] ", + network_name=network_name, + log_callback=log_parser_callback, + profile_manager=profile_manager_instance, + profile_name=profile_name, + environment=environment, + log_command_override=log_command_override + ) # 5. Post-process results logger.info(f"[Worker {worker_id}] [{profile_name}] Docker container finished. Post-processing results...") @@ -1999,6 +2003,45 @@ def run_direct_docker_download_worker(worker_id, policy, state_manager, args, pr logger.error(f"CRITICAL: Could not read or parse task file '{claimed_task_path_host.name}': {e}. This task will be skipped, but the pending downloads counter CANNOT be decremented.") continue # Skip to finally block to unlock profile + if args.dummy or args.dummy_batch: + logger.info(f"========== [Worker {worker_id}] BEGIN DUMMY DOCKER DOWNLOAD ==========") + logger.info(f"[Worker {worker_id}] Profile: {profile_name} | Task: {claimed_task_path_host.name}") + + dummy_settings = policy.get('settings', {}).get('dummy_simulation_settings', {}) + min_seconds = dummy_settings.get('download_min_seconds', 1.0) + max_seconds = dummy_settings.get('download_max_seconds', 3.0) + failure_rate = dummy_settings.get('download_failure_rate', 0.0) + skipped_rate = dummy_settings.get('download_skipped_failure_rate', 0.0) + + time.sleep(random.uniform(min_seconds, max_seconds)) + + rand_val = random.random() + should_fail_skipped = rand_val < skipped_rate + should_fail_fatal = not should_fail_skipped and rand_val < (skipped_rate + failure_rate) + + if should_fail_skipped: + logger.warning(f"[Worker {worker_id}] DUMMY: Simulating skipped download failure.") + profile_manager_instance.record_activity(profile_name, 'tolerated_error') + elif should_fail_fatal: + logger.warning(f"[Worker {worker_id}] DUMMY: Simulating fatal download failure.") + profile_manager_instance.record_activity(profile_name, 'download_error') + else: + logger.info(f"[Worker {worker_id}] DUMMY: Simulating download success.") + profile_manager_instance.record_activity(profile_name, 'download') + + logger.info(f"========== [Worker {worker_id}] END DUMMY DOCKER DOWNLOAD ==========") + + # In dummy mode, we just rename the file to processed and continue to the finally block. + try: + base_path_str = str(claimed_task_path_host).rsplit('.LOCKED.', 1)[0] + processed_path = Path(f"{base_path_str}.processed") + claimed_task_path_host.rename(processed_path) + logger.debug(f"DUMMY MODE: Renamed processed task file to '{processed_path.name}'.") + except (OSError, IndexError) as e: + logger.error(f"DUMMY MODE: Failed to rename processed task file '{claimed_task_path_host}': {e}") + + continue # Skip to finally block + # --- Check for URL expiration before running Docker --- if d_policy.get('check_url_expiration', True): # Heuristic: check the first available format URL @@ -2496,7 +2539,7 @@ def run_direct_download_worker(worker_id, policy, state_manager, args, profile_m logger.error(f"[Worker {worker_id}] Failed to create cache directory '{profile_cache_dir}': {e}") logger.info(f"[Worker {worker_id}] [{profile_name}] Processing task '{claimed_task_path.name}'...") - if args.dummy: + if args.dummy or args.dummy_batch: logger.info(f"========== [Worker {worker_id}] BEGIN DUMMY DIRECT DOWNLOAD ==========") logger.info(f"[Worker {worker_id}] Profile: {profile_name} | Task: {claimed_task_path.name}") logger.info(f"[Worker {worker_id}] Would run command: {' '.join(shlex.quote(s) for s in cmd)}") @@ -2516,8 +2559,6 @@ def run_direct_download_worker(worker_id, policy, state_manager, args, profile_m if should_fail_skipped: logger.warning(f"[Worker {worker_id}] DUMMY: Simulating skipped download failure.") - # A skipped/tolerated failure in yt-dlp usually results in exit code 0. - # The orchestrator will see this as a success but the stderr can be used for context. retcode = 0 stderr = "Dummy skipped failure" elif should_fail_fatal: diff --git a/ytops_client/stress_policy_tool.py b/ytops_client-source/ytops_client/stress_policy_tool.py similarity index 81% rename from ytops_client/stress_policy_tool.py rename to ytops_client-source/ytops_client/stress_policy_tool.py index 1741298..ef8a751 100644 --- a/ytops_client/stress_policy_tool.py +++ b/ytops_client-source/ytops_client/stress_policy_tool.py @@ -110,6 +110,10 @@ from .stress_policy.workers import ( run_direct_docker_worker, find_task_and_lock_profile, run_direct_docker_download_worker, run_direct_download_worker ) +from .stress_policy.queue_workers import ( + run_queue_auth_worker, run_queue_download_worker +) +from .stress_policy.queue_provider import RedisQueueProvider from .stress_policy.arg_parser import add_stress_policy_parser # Add a global event for graceful shutdown @@ -733,6 +737,270 @@ def main_stress_policy(args): state_manager.print_summary(policy) state_manager.close() return 0 + + # --- Queue Auth Orchestration Mode --- + elif orchestration_mode == 'queue_auth': + logger.info("--- Queue Auth Orchestration Mode Enabled ---") + if mode != 'fetch_only' or settings.get('profile_mode') != 'from_pool_with_lock': + logger.error("Orchestration mode 'queue_auth' is only compatible with 'fetch_only' mode and 'from_pool_with_lock' profile mode.") + return 1 + + auth_manager = profile_managers.get('auth') + if not auth_manager: + logger.error("Queue auth mode requires an auth profile manager.") + return 1 + + # Initialize queue provider + queue_policy = policy.get('queue_policy', {}) + redis_host = args.redis_host or os.getenv('REDIS_HOST') or queue_policy.get('redis_host') or 'localhost' + redis_port = args.redis_port if args.redis_port is not None else (int(os.getenv('REDIS_PORT')) if os.getenv('REDIS_PORT') else (queue_policy.get('redis_port') or 6379)) + redis_password = args.redis_password or os.getenv('REDIS_PASSWORD') or queue_policy.get('redis_password') + redis_db = args.redis_db if args.redis_db is not None else (int(os.getenv('REDIS_DB')) if os.getenv('REDIS_DB') else (queue_policy.get('redis_db') or 0)) + + # Extract env from manager's key prefix, unless disabled by policy + use_env_prefix = queue_policy.get('use_env_prefix', True) + env_prefix = None + if use_env_prefix: + env_prefix = auth_manager.key_prefix.removesuffix('_profile_mgmt_') + + state_manager.initialize_queue_provider( + redis_host=redis_host, + redis_port=redis_port, + redis_password=redis_password, + redis_db=redis_db, + env_prefix=env_prefix + ) + + # Create save directory if specified + save_dir = settings.get('save_info_json_dir') + if save_dir: + try: + os.makedirs(save_dir, exist_ok=True) + logger.info(f"Created save directory for info.json files: {save_dir}") + except OSError as e: + logger.error(f"Failed to create save directory '{save_dir}': {e}") + return 1 + + # Requeue failed tasks if requested + if args.requeue_failed: + requeued = state_manager.requeue_failed_auth_tasks( + batch_size=queue_policy.get('requeue_batch_size', 100) + ) + logger.info(f"Requeued {requeued} failed authentication tasks.") + + sp_utils.display_effective_policy(policy, policy_name, sources=[]) + if args.dry_run: return 0 + + workers = exec_control.get('workers', 1) + with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor: + futures = [ + executor.submit(run_queue_auth_worker, i, policy, state_manager, args, auth_manager, running_processes, process_lock) + for i in range(workers) + ] + shutdown_event.wait() + logger.info("Shutdown signal received, waiting for queue auth workers to finish...") + concurrent.futures.wait(futures) + + state_manager.print_summary(policy) + state_manager.close() + return 0 + + # --- Queue Download Orchestration Mode --- + elif orchestration_mode == 'queue_download': + logger.info("--- Queue Download Orchestration Mode Enabled ---") + if mode != 'download_only' or settings.get('profile_mode') != 'from_pool_with_lock': + logger.error("Orchestration mode 'queue_download' is only compatible with 'download_only' mode and 'from_pool_with_lock' profile mode.") + return 1 + + download_manager = profile_managers.get('download') + if not download_manager: + logger.error("Queue download mode requires a download profile manager.") + return 1 + + # Initialize queue provider + queue_policy = policy.get('queue_policy', {}) + redis_host = args.redis_host or os.getenv('REDIS_HOST') or queue_policy.get('redis_host') or 'localhost' + redis_port = args.redis_port if args.redis_port is not None else (int(os.getenv('REDIS_PORT')) if os.getenv('REDIS_PORT') else (queue_policy.get('redis_port') or 6379)) + redis_password = args.redis_password or os.getenv('REDIS_PASSWORD') or queue_policy.get('redis_password') + redis_db = args.redis_db if args.redis_db is not None else (int(os.getenv('REDIS_DB')) if os.getenv('REDIS_DB') else (queue_policy.get('redis_db') or 0)) + + # Extract env from manager's key prefix, unless disabled by policy + use_env_prefix = queue_policy.get('use_env_prefix', True) + env_prefix = None + if use_env_prefix: + env_prefix = download_manager.key_prefix.removesuffix('_profile_mgmt_') + + state_manager.initialize_queue_provider( + redis_host=redis_host, + redis_port=redis_port, + redis_password=redis_password, + redis_db=redis_db, + env_prefix=env_prefix + ) + + # Create output directory if specified + output_dir = d_policy.get('output_dir') + if output_dir: + try: + os.makedirs(output_dir, exist_ok=True) + logger.info(f"Created output directory for downloads: {output_dir}") + except OSError as e: + logger.error(f"Failed to create output directory '{output_dir}': {e}") + return 1 + + # Requeue failed tasks if requested + if args.requeue_failed: + requeued = state_manager.requeue_failed_download_tasks( + batch_size=queue_policy.get('requeue_batch_size', 100) + ) + logger.info(f"Requeued {requeued} failed download tasks.") + + sp_utils.display_effective_policy(policy, policy_name, sources=[]) + if args.dry_run: return 0 + + workers = exec_control.get('workers', 1) + with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor: + futures = [ + executor.submit(run_queue_download_worker, i, policy, state_manager, args, download_manager, running_processes, process_lock) + for i in range(workers) + ] + shutdown_event.wait() + logger.info("Shutdown signal received, waiting for queue download workers to finish...") + concurrent.futures.wait(futures) + + state_manager.print_summary(policy) + state_manager.close() + return 0 + + # --- Queue Full Stack Orchestration Mode --- + elif orchestration_mode == 'queue_full_stack': + logger.info("--- Queue Full Stack Orchestration Mode Enabled ---") + if mode != 'full_stack' or settings.get('profile_mode') != 'from_pool_with_lock': + logger.error("Orchestration mode 'queue_full_stack' is only compatible with 'full_stack' mode and 'from_pool_with_lock' profile mode.") + return 1 + + auth_manager = profile_managers.get('auth') + if not auth_manager: + logger.error("Queue full stack mode requires an auth profile manager.") + return 1 + + download_manager = profile_managers.get('download') + if not download_manager: + logger.error("Queue full stack mode requires a download profile manager.") + return 1 + + # Initialize queue provider + queue_policy = policy.get('queue_policy', {}) + redis_host = args.redis_host or os.getenv('REDIS_HOST') or queue_policy.get('redis_host') or 'localhost' + redis_port = args.redis_port if args.redis_port is not None else (int(os.getenv('REDIS_PORT')) if os.getenv('REDIS_PORT') else (queue_policy.get('redis_port') or 6379)) + redis_password = args.redis_password or os.getenv('REDIS_PASSWORD') or queue_policy.get('redis_password') + redis_db = args.redis_db if args.redis_db is not None else (int(os.getenv('REDIS_DB')) if os.getenv('REDIS_DB') else (queue_policy.get('redis_db') or 0)) + + # Extract env from auth manager's key prefix, unless disabled by policy + use_env_prefix = queue_policy.get('use_env_prefix', True) + env_prefix = None + if use_env_prefix: + auth_prefix = auth_manager.key_prefix.removesuffix('_profile_mgmt_') + download_prefix = download_manager.key_prefix.removesuffix('_profile_mgmt_') + if auth_prefix != download_prefix: + logger.warning(f"Auth environment ('{auth_prefix}') and Download environment ('{download_prefix}') are different.") + logger.warning(f"Using '{auth_prefix}' as the prefix for all shared Redis queues.") + env_prefix = auth_prefix + + state_manager.initialize_queue_provider( + redis_host=redis_host, + redis_port=redis_port, + redis_password=redis_password, + redis_db=redis_db, + env_prefix=env_prefix + ) + + # Create directories if specified + save_dir = settings.get('save_info_json_dir') + if save_dir: + try: + os.makedirs(save_dir, exist_ok=True) + logger.info(f"Created save directory for info.json files: {save_dir}") + except OSError as e: + logger.error(f"Failed to create save directory '{save_dir}': {e}") + return 1 + + output_dir = d_policy.get('output_dir') + if output_dir: + try: + os.makedirs(output_dir, exist_ok=True) + logger.info(f"Created output directory for downloads: {output_dir}") + except OSError as e: + logger.error(f"Failed to create output directory '{output_dir}': {e}") + return 1 + + # Requeue failed tasks if requested + if args.requeue_failed: + requeued_auth = state_manager.requeue_failed_auth_tasks( + batch_size=queue_policy.get('requeue_batch_size', 100) + ) + requeued_dl = state_manager.requeue_failed_download_tasks( + batch_size=queue_policy.get('requeue_batch_size', 100) + ) + logger.info(f"Requeued {requeued_auth} failed authentication tasks and {requeued_dl} failed download tasks.") + + sp_utils.display_effective_policy(policy, policy_name, sources=[]) + if args.dry_run: return 0 + + # Start both auth and download workers + auth_workers = exec_control.get('auth_workers', 1) + download_workers = exec_control.get('download_workers', 2) + + with concurrent.futures.ThreadPoolExecutor(max_workers=auth_workers + download_workers) as executor: + # Start auth workers + auth_futures = [ + executor.submit(run_queue_auth_worker, i, policy, state_manager, args, auth_manager, running_processes, process_lock) + for i in range(auth_workers) + ] + + # Start download workers + dl_futures = [ + executor.submit(run_queue_download_worker, i + auth_workers, policy, state_manager, args, download_manager, running_processes, process_lock) + for i in range(download_workers) + ] + + # Start requeue task if configured + requeue_interval = queue_policy.get('requeue_interval_seconds') + requeue_enabled = queue_policy.get('requeue_failed_tasks', False) + + if requeue_enabled and requeue_interval: + def requeue_task(): + while not shutdown_event.is_set(): + time.sleep(requeue_interval) + if shutdown_event.is_set(): + break + + try: + requeued_auth = state_manager.requeue_failed_auth_tasks( + batch_size=queue_policy.get('requeue_batch_size', 100) + ) + requeued_dl = state_manager.requeue_failed_download_tasks( + batch_size=queue_policy.get('requeue_batch_size', 100) + ) + + if requeued_auth > 0 or requeued_dl > 0: + logger.info(f"Auto-requeued {requeued_auth} failed auth tasks and {requeued_dl} failed download tasks.") + except Exception as e: + logger.error(f"Error in auto-requeue task: {e}") + + requeue_future = executor.submit(requeue_task) + all_futures = auth_futures + dl_futures + [requeue_future] + else: + all_futures = auth_futures + dl_futures + + # Wait for shutdown signal + shutdown_event.wait() + logger.info("Shutdown signal received, waiting for queue workers to finish...") + concurrent.futures.wait(all_futures) + + state_manager.print_summary(policy) + state_manager.close() + return 0 # --- Default (Task-First) Orchestration Mode --- sources = [] # This will be a list of URLs or Path objects diff --git a/ytops_client/task_generator_tool.py b/ytops_client-source/ytops_client/task_generator_tool.py similarity index 100% rename from ytops_client/task_generator_tool.py rename to ytops_client-source/ytops_client/task_generator_tool.py diff --git a/ytops_client/youtube-dl/Dockerfile b/ytops_client-source/ytops_client/youtube-dl/Dockerfile similarity index 100% rename from ytops_client/youtube-dl/Dockerfile rename to ytops_client-source/ytops_client/youtube-dl/Dockerfile diff --git a/ytops_client/youtube-dl/README.md b/ytops_client-source/ytops_client/youtube-dl/README.md similarity index 100% rename from ytops_client/youtube-dl/README.md rename to ytops_client-source/ytops_client/youtube-dl/README.md diff --git a/ytops_client/youtube-dl/release-versions/latest.txt b/ytops_client-source/ytops_client/youtube-dl/release-versions/latest.txt similarity index 100% rename from ytops_client/youtube-dl/release-versions/latest.txt rename to ytops_client-source/ytops_client/youtube-dl/release-versions/latest.txt diff --git a/ytops_client/yt_dlp_dummy_tool.py b/ytops_client-source/ytops_client/yt_dlp_dummy_tool.py similarity index 100% rename from ytops_client/yt_dlp_dummy_tool.py rename to ytops_client-source/ytops_client/yt_dlp_dummy_tool.py