206 lines
10 KiB
Python
206 lines
10 KiB
Python
# -*- coding: utf-8 -*-
|
|
# vim:fenc=utf-8
|
|
#
|
|
# Copyright © 2024 rl <rl@rlmbp>
|
|
#
|
|
# Distributed under terms of the MIT license.
|
|
|
|
"""
|
|
DAG to orchestrate ytdlp_ops_worker_per_url DAG runs based on a defined policy.
|
|
It fetches URLs from a Redis queue and launches workers in controlled bunches.
|
|
"""
|
|
|
|
from airflow import DAG
|
|
from airflow.exceptions import AirflowException, AirflowSkipException
|
|
from airflow.operators.python import PythonOperator
|
|
from airflow.models.param import Param
|
|
from airflow.models.variable import Variable
|
|
from airflow.utils.dates import days_ago
|
|
from airflow.api.common.trigger_dag import trigger_dag
|
|
from airflow.models.dagrun import DagRun
|
|
from airflow.models.dag import DagModel
|
|
from datetime import timedelta
|
|
import logging
|
|
import random
|
|
import time
|
|
|
|
# Import utility functions
|
|
from utils.redis_utils import _get_redis_client
|
|
|
|
# Import Thrift modules for proxy status check
|
|
from pangramia.yt.tokens_ops import YTTokenOpService
|
|
from thrift.protocol import TBinaryProtocol
|
|
from thrift.transport import TSocket, TTransport
|
|
|
|
# Configure logging
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Default settings
|
|
DEFAULT_QUEUE_NAME = 'video_queue'
|
|
DEFAULT_REDIS_CONN_ID = 'redis_default'
|
|
DEFAULT_TOTAL_WORKERS = 3
|
|
DEFAULT_WORKERS_PER_BUNCH = 1
|
|
DEFAULT_WORKER_DELAY_S = 5
|
|
DEFAULT_BUNCH_DELAY_S = 20
|
|
|
|
DEFAULT_YT_AUTH_SERVICE_IP = Variable.get("YT_AUTH_SERVICE_IP", default_var="16.162.82.212")
|
|
DEFAULT_YT_AUTH_SERVICE_PORT = Variable.get("YT_AUTH_SERVICE_PORT", default_var=9080)
|
|
|
|
# --- Helper Functions ---
|
|
|
|
|
|
# --- Main Orchestration Callable ---
|
|
|
|
def orchestrate_workers_ignition_callable(**context):
|
|
"""
|
|
Main orchestration logic. Triggers a specified number of worker DAGs
|
|
to initiate self-sustaining processing loops.
|
|
"""
|
|
params = context['params']
|
|
logger.info("Starting worker ignition sequence.")
|
|
|
|
worker_dag_id = 'ytdlp_ops_worker_per_url'
|
|
dag_model = DagModel.get_dagmodel(worker_dag_id)
|
|
if dag_model and dag_model.is_paused:
|
|
raise AirflowException(f"Worker DAG '{worker_dag_id}' is paused. Cannot start worker loops.")
|
|
|
|
total_workers = int(params['total_workers'])
|
|
workers_per_bunch = int(params['workers_per_bunch'])
|
|
worker_delay = int(params['delay_between_workers_s'])
|
|
bunch_delay = int(params['delay_between_bunches_s'])
|
|
|
|
# Create a list of worker numbers to trigger
|
|
worker_indices = list(range(total_workers))
|
|
bunches = [worker_indices[i:i + workers_per_bunch] for i in range(0, len(worker_indices), workers_per_bunch)]
|
|
|
|
# Get and parse worker hosts (which are used as queue names)
|
|
worker_hosts_str = params.get('worker_hosts', 'celery@dl002')
|
|
worker_hosts = [h.strip() for h in worker_hosts_str.split(',') if h.strip()]
|
|
if not worker_hosts:
|
|
raise AirflowException("The 'worker_hosts' parameter cannot be empty.")
|
|
|
|
logger.info(f"Plan: Starting {total_workers} total workers in {len(bunches)} bunches, distributing across hosts (queues): {worker_hosts}")
|
|
|
|
dag_run_id = context['dag_run'].run_id
|
|
total_triggered = 0
|
|
|
|
for i, bunch in enumerate(bunches):
|
|
logger.info(f"--- Igniting Bunch {i+1}/{len(bunches)} (contains {len(bunch)} worker(s)) ---")
|
|
for j, _ in enumerate(bunch):
|
|
# Create a unique run_id for each worker loop starter
|
|
run_id = f"ignited_{dag_run_id}_{total_triggered}"
|
|
|
|
# Pass all orchestrator params to the worker so it has the full context for its loop.
|
|
conf_to_pass = {p: params[p] for p in params}
|
|
# The worker pulls its own URL, so we don't pass one.
|
|
if 'url' in conf_to_pass:
|
|
del conf_to_pass['url']
|
|
|
|
# Assign host/queue in a round-robin fashion
|
|
queue_for_worker = worker_hosts[total_triggered % len(worker_hosts)]
|
|
conf_to_pass['queue'] = queue_for_worker
|
|
|
|
logger.info(f"Igniting worker {j+1}/{len(bunch)} in bunch {i+1} (loop {total_triggered + 1}/{total_workers}) on host (queue) '{queue_for_worker}' (Run ID: {run_id})")
|
|
logger.debug(f"Full conf for worker loop {run_id}: {conf_to_pass}")
|
|
|
|
trigger_dag(
|
|
dag_id=worker_dag_id,
|
|
run_id=run_id,
|
|
conf=conf_to_pass,
|
|
replace_microseconds=False
|
|
)
|
|
total_triggered += 1
|
|
|
|
# Delay between workers in a bunch
|
|
if j < len(bunch) - 1:
|
|
logger.info(f"Waiting {worker_delay}s before next worker in bunch...")
|
|
time.sleep(worker_delay)
|
|
|
|
# Delay between bunches
|
|
if i < len(bunches) - 1:
|
|
logger.info(f"--- Bunch {i+1} ignited. Waiting {bunch_delay}s before next bunch... ---")
|
|
time.sleep(bunch_delay)
|
|
|
|
logger.info(f"--- Ignition sequence complete. Total worker loops started: {total_triggered}. ---")
|
|
|
|
|
|
|
|
|
|
# =============================================================================
|
|
# DAG Definition
|
|
# =============================================================================
|
|
|
|
default_args = {
|
|
'owner': 'airflow',
|
|
'depends_on_past': False,
|
|
'email_on_failure': False,
|
|
'email_on_retry': False,
|
|
'retries': 1,
|
|
'retry_delay': timedelta(minutes=1),
|
|
'start_date': days_ago(1),
|
|
}
|
|
|
|
with DAG(
|
|
dag_id='ytdlp_ops_orchestrator',
|
|
default_args=default_args,
|
|
schedule_interval=None, # This DAG runs only when triggered.
|
|
max_active_runs=1, # Only one ignition process should run at a time.
|
|
catchup=False,
|
|
description='Ignition system for ytdlp_ops_worker_per_url DAGs. Starts self-sustaining worker loops.',
|
|
doc_md="""
|
|
### YT-DLP Worker Ignition System
|
|
|
|
This DAG acts as an "ignition system" to start one or more self-sustaining worker loops.
|
|
It does **not** process URLs itself. Its only job is to trigger a specified number of `ytdlp_ops_worker_per_url` DAGs.
|
|
|
|
#### How it Works:
|
|
|
|
1. **Manual Trigger:** You manually trigger this DAG with parameters defining how many worker loops to start (`total_workers`), in what configuration (`workers_per_bunch`, delays).
|
|
2. **Ignition:** The orchestrator triggers the initial set of worker DAGs in a "fire-and-forget" manner, passing all its configuration parameters to them.
|
|
3. **Completion:** Once all initial workers have been triggered, the orchestrator's job is complete.
|
|
|
|
The workers then take over, each running its own continuous processing loop.
|
|
""",
|
|
tags=['ytdlp', 'mgmt', 'master'],
|
|
params={
|
|
# --- Ignition Control Parameters ---
|
|
'total_workers': Param(DEFAULT_TOTAL_WORKERS, type="integer", description="Total number of worker loops to start."),
|
|
'workers_per_bunch': Param(DEFAULT_WORKERS_PER_BUNCH, type="integer", description="Number of workers to start in each bunch."),
|
|
'delay_between_workers_s': Param(DEFAULT_WORKER_DELAY_S, type="integer", description="Delay in seconds between starting each worker within a bunch."),
|
|
'delay_between_bunches_s': Param(DEFAULT_BUNCH_DELAY_S, type="integer", description="Delay in seconds between starting each bunch."),
|
|
|
|
# --- Worker Passthrough Parameters ---
|
|
'worker_hosts': Param('celery@dl002', type="string", title="[Worker Param] Worker Hosts", description="Comma-separated list of Celery worker hostnames (e.g., 'celery@dl002') to distribute workers to. These are used as queue names. Workers will be assigned to these queues in a round-robin fashion."),
|
|
'on_bannable_failure': Param(
|
|
'retry_with_new_account',
|
|
type="string",
|
|
enum=['stop_loop', 'retry_with_new_account'],
|
|
title="[Worker Param] On Bannable Failure Policy",
|
|
description="Policy for a worker when a bannable error occurs. "
|
|
"'stop_loop': Ban the account, mark URL as failed, and stop the worker's loop. "
|
|
"'retry_with_new_account': Ban the failed account, retry ONCE with a new account. If retry fails, ban the second account and proxy, then stop."
|
|
),
|
|
'queue_name': Param(DEFAULT_QUEUE_NAME, type="string", description="[Worker Param] Base name for Redis queues."),
|
|
'redis_conn_id': Param(DEFAULT_REDIS_CONN_ID, type="string", description="[Worker Param] Airflow Redis connection ID."),
|
|
'clients': Param('mweb,ios,android', type="string", description="[Worker Param] Comma-separated list of clients for token generation."),
|
|
'account_pool': Param('ytdlp_account', type="string", description="[Worker Param] Account pool prefix or comma-separated list."),
|
|
'account_pool_size': Param(10, type=["integer", "null"], description="[Worker Param] If using a prefix for 'account_pool', this specifies the number of accounts to generate (e.g., 10 for 'prefix_01' through 'prefix_10'). Required when using a prefix."),
|
|
'service_ip': Param(DEFAULT_YT_AUTH_SERVICE_IP, type="string", description="[Worker Param] IP of the ytdlp-ops-server. Default is from Airflow variable YT_AUTH_SERVICE_IP or hardcoded."),
|
|
'service_port': Param(DEFAULT_YT_AUTH_SERVICE_PORT, type="integer", description="[Worker Param] Port of the Envoy load balancer. Default is from Airflow variable YT_AUTH_SERVICE_PORT or hardcoded."),
|
|
'machine_id': Param("ytdlp-ops-airflow-service", type="string", description="[Worker Param] Identifier for the client machine."),
|
|
'auto_create_new_accounts_on_exhaustion': Param(True, type="boolean", description="[Worker Param] If True and all accounts in a prefix-based pool are exhausted, create a new one automatically."),
|
|
'retrigger_delay_on_empty_s': Param(60, type="integer", description="[Worker Param] Delay in seconds before a worker re-triggers itself if the queue is empty. Set to -1 to stop the loop."),
|
|
}
|
|
) as dag:
|
|
|
|
orchestrate_task = PythonOperator(
|
|
task_id='start_worker_loops',
|
|
python_callable=orchestrate_workers_ignition_callable,
|
|
)
|
|
orchestrate_task.doc_md = """
|
|
### Start Worker Loops
|
|
This is the main task that executes the ignition policy.
|
|
- It triggers `ytdlp_ops_worker_per_url` DAGs according to the batch settings.
|
|
- It passes all its parameters down to the workers, which will use them to run their continuous loops.
|
|
"""
|