yt-dlp-dags/airflow/dags/ytdlp_ops_v02_dispatcher_dl.py

80 lines
2.9 KiB
Python

# -*- coding: utf-8 -*-
"""
DAG to dispatch download jobs to ytdlp_ops_worker_per_url_dl DAGs.
It pulls a job payload from Redis and triggers a download worker.
"""
from __future__ import annotations
import logging
import os
import socket
from datetime import timedelta
from airflow.decorators import task
from airflow.exceptions import AirflowSkipException
from airflow.models.dag import DAG
from airflow.models.param import Param
from airflow.api.common.trigger_dag import trigger_dag
from airflow.utils.dates import days_ago
from utils.redis_utils import _get_redis_client
logger = logging.getLogger(__name__)
DEFAULT_QUEUE_NAME = 'queue2_dl'
DEFAULT_REDIS_CONN_ID = 'redis_default'
@task(queue='queue-dl')
def dispatch_job_to_dl_worker(**context):
"""
Triggers a v2 download worker for the 'profile-first' model.
The worker itself is responsible for locking a profile and finding a suitable task.
This dispatcher simply starts a worker process.
"""
ti = context['task_instance']
logger.info(f"Download Dispatcher task '{ti.task_id}' running on queue '{ti.queue}'.")
params = context['params']
# Determine the worker-specific queue for affinity
hostname = socket.gethostname()
worker_queue = f"queue-dl-{hostname}"
logger.info(f"Running on worker '{hostname}'. Dispatching a new profile-first worker instance to its dedicated queue '{worker_queue}'.")
# Pass all orchestrator params, but remove job_data as the worker finds its own job.
conf_to_pass = {**params, 'worker_queue': worker_queue}
conf_to_pass.pop('job_data', None)
run_id = f"worker_run_dl_{context['dag_run'].run_id}_{context['ts_nodash']}_q_{worker_queue}"
logger.info(f"Triggering 'ytdlp_ops_v02_worker_per_url_dl' with run_id '{run_id}'")
trigger_dag(
dag_id='ytdlp_ops_v02_worker_per_url_dl',
run_id=run_id,
conf=conf_to_pass,
replace_microseconds=False
)
with DAG(
dag_id='ytdlp_ops_v02_dispatcher_dl',
default_args={'owner': 'airflow', 'retries': 0},
schedule=None,
start_date=days_ago(1),
catchup=False,
tags=['ytdlp', 'worker', 'dispatcher', 'download'],
is_paused_upon_creation=True,
doc_md="""
### YT-DLP v2 Download Worker Dispatcher (Profile-First)
This DAG dispatches a single "profile-first" download worker.
It does **not** pull a job from a queue. Instead, it triggers the `ytdlp_ops_v02_worker_per_url_dl` DAG,
which is responsible for locking an available download profile and then finding a matching task
from the `queue_dl_format_tasks` Redis list.
""",
render_template_as_native_obj=True,
params={
'queue_name': Param(DEFAULT_QUEUE_NAME, type='string', title='Queue Name', description='The base name of the Redis queue to pull job payloads from.'),
'redis_conn_id': Param(DEFAULT_REDIS_CONN_ID, type='string', title='Redis Connection ID'),
},
) as dag:
dispatch_job_to_dl_worker()