# -*- coding: utf-8 -*- """ DAG to dispatch download jobs to ytdlp_ops_worker_per_url_dl DAGs. It pulls a job payload from Redis and triggers a download worker. """ from __future__ import annotations import logging import os import socket from datetime import timedelta from airflow.decorators import task from airflow.exceptions import AirflowSkipException from airflow.models.dag import DAG from airflow.models.param import Param from airflow.api.common.trigger_dag import trigger_dag from airflow.utils.dates import days_ago from utils.redis_utils import _get_redis_client logger = logging.getLogger(__name__) DEFAULT_QUEUE_NAME = 'queue2_dl' DEFAULT_REDIS_CONN_ID = 'redis_default' @task(queue='queue-dl') def dispatch_job_to_dl_worker(**context): """ Pulls one job payload from Redis, determines the current worker's dedicated queue, and triggers the download worker DAG to process the job on that specific queue. """ ti = context['task_instance'] logger.info(f"Download Dispatcher task '{ti.task_id}' running on queue '{ti.queue}'.") params = context['params'] redis_conn_id = params['redis_conn_id'] queue_name = params['queue_name'] inbox_queue = f"{queue_name}_inbox" logger.info(f"Attempting to pull one job from Redis queue '{inbox_queue}'...") client = _get_redis_client(redis_conn_id) job_bytes = client.lpop(inbox_queue) if not job_bytes: logger.info("Redis download inbox queue is empty. No work to dispatch. Skipping task.") raise AirflowSkipException("Redis download inbox queue is empty. No work to dispatch.") job_data_str = job_bytes.decode('utf-8') logger.info(f"Pulled job from the queue.") # Determine the worker-specific queue for affinity hostname = socket.gethostname() worker_queue = f"queue-dl-{hostname}" logger.info(f"Running on worker '{hostname}'. Dispatching job to its dedicated queue '{worker_queue}'.") conf_to_pass = {**params, 'job_data': job_data_str, 'worker_queue': worker_queue} run_id = f"worker_run_dl_{context['dag_run'].run_id}_{context['ts_nodash']}_q_{worker_queue}" logger.info(f"Triggering 'ytdlp_ops_v02_worker_per_url_dl' with run_id '{run_id}'") trigger_dag( dag_id='ytdlp_ops_v02_worker_per_url_dl', run_id=run_id, conf=conf_to_pass, replace_microseconds=False ) with DAG( dag_id='ytdlp_ops_v02_dispatcher_dl', default_args={'owner': 'airflow', 'retries': 0}, schedule=None, start_date=days_ago(1), catchup=False, tags=['ytdlp', 'worker', 'dispatcher', 'download'], is_paused_upon_creation=True, doc_md=""" ### YT-DLP Download Job Dispatcher This DAG dispatches a single download job to a download worker with a pinned queue. It pulls a JSON payload from the `queue2_dl_inbox` Redis queue and triggers the `ytdlp_ops_v02_worker_per_url_dl` DAG. """, render_template_as_native_obj=True, params={ 'queue_name': Param(DEFAULT_QUEUE_NAME, type='string', title='Queue Name', description='The base name of the Redis queue to pull job payloads from.'), 'redis_conn_id': Param(DEFAULT_REDIS_CONN_ID, type='string', title='Redis Connection ID'), }, ) as dag: dispatch_job_to_dl_worker()