yt-dlp-dags/airflow/dags/ytdlp_ops_v02_dispatcher_auth.py

99 lines
3.7 KiB
Python

# -*- coding: utf-8 -*-
"""
DAG to dispatch work to ytdlp_ops_worker_per_url_auth DAGs.
It pulls a URL from Redis and triggers an auth worker with a pinned queue.
"""
from __future__ import annotations
import logging
import os
import socket
from datetime import timedelta
from airflow.decorators import task
from airflow.exceptions import AirflowSkipException
from airflow.models.dag import DAG
from airflow.models.param import Param
from airflow.api.common.trigger_dag import trigger_dag
from airflow.utils.dates import days_ago
from utils.redis_utils import _get_redis_client
logger = logging.getLogger(__name__)
DEFAULT_QUEUE_NAME = 'queue2_auth'
DEFAULT_REDIS_CONN_ID = 'redis_default'
@task(queue='queue-auth')
def dispatch_url_to_auth_worker(**context):
"""
Pulls one URL from Redis, determines the current worker's dedicated queue,
and triggers the auth worker DAG to process the URL on that specific queue.
"""
ti = context['task_instance']
logger.info(f"Auth Dispatcher task '{ti.task_id}' running on queue '{ti.queue}'.")
# --- Check for worker pause lock file ---
lock_file_path = '/opt/airflow/inputfiles/AIRFLOW.PREVENT_URL_PULL.lockfile'
hostname = socket.gethostname()
if os.path.exists(lock_file_path):
logger.info(f"Worker '{hostname}' is paused. Lock file found at '{lock_file_path}'. Skipping URL pull.")
raise AirflowSkipException(f"Worker '{hostname}' is paused.")
else:
logger.info(f"Worker '{hostname}' is active (no lock file found at '{lock_file_path}'). Proceeding to pull URL.")
params = context['params']
redis_conn_id = params['redis_conn_id']
queue_name = params['queue_name']
inbox_queue = f"{queue_name}_inbox"
logger.info(f"Attempting to pull one URL from Redis queue '{inbox_queue}'...")
client = _get_redis_client(redis_conn_id)
url_bytes = client.lpop(inbox_queue)
if not url_bytes:
logger.info("Redis auth inbox queue is empty. No work to dispatch. Skipping task.")
raise AirflowSkipException("Redis auth inbox queue is empty. No work to dispatch.")
url_to_process = url_bytes.decode('utf-8')
logger.info(f"Pulled URL '{url_to_process}' from the queue.")
# Determine the worker-specific queue for affinity
hostname = socket.gethostname()
worker_queue = f"queue-auth-{hostname}"
logger.info(f"Running on worker '{hostname}'. Dispatching job to its dedicated queue '{worker_queue}'.")
conf_to_pass = {**params, 'url_to_process': url_to_process, 'worker_queue': worker_queue}
run_id = f"worker_run_auth_{context['dag_run'].run_id}_{context['ts_nodash']}_q_{worker_queue}"
logger.info(f"Triggering 'ytdlp_ops_v02_worker_per_url_auth' with run_id '{run_id}'")
trigger_dag(
dag_id='ytdlp_ops_v02_worker_per_url_auth',
run_id=run_id,
conf=conf_to_pass,
replace_microseconds=False
)
with DAG(
dag_id='ytdlp_ops_v02_dispatcher_auth',
default_args={'owner': 'airflow', 'retries': 0},
schedule=None,
start_date=days_ago(1),
catchup=False,
tags=['ytdlp', 'worker', 'dispatcher', 'auth'],
is_paused_upon_creation=True,
doc_md="""
### YT-DLP Auth URL Dispatcher
This DAG dispatches a single URL to an auth worker with a pinned queue.
It pulls from the `queue2_auth_inbox` Redis queue and triggers the `ytdlp_ops_v02_worker_per_url_auth` DAG.
""",
render_template_as_native_obj=True,
params={
'queue_name': Param(DEFAULT_QUEUE_NAME, type='string', title='Queue Name', description='The base name of the Redis queue to pull URLs from.'),
'redis_conn_id': Param(DEFAULT_REDIS_CONN_ID, type='string', title='Redis Connection ID'),
},
) as dag:
dispatch_url_to_auth_worker()