80 lines
2.9 KiB
Python
80 lines
2.9 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""
|
|
DAG to dispatch download jobs to ytdlp_ops_worker_per_url_dl DAGs.
|
|
It pulls a job payload from Redis and triggers a download worker.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
import logging
|
|
import os
|
|
import socket
|
|
from datetime import timedelta
|
|
|
|
from airflow.decorators import task
|
|
from airflow.exceptions import AirflowSkipException
|
|
from airflow.models.dag import DAG
|
|
from airflow.models.param import Param
|
|
from airflow.api.common.trigger_dag import trigger_dag
|
|
from airflow.utils.dates import days_ago
|
|
|
|
from utils.redis_utils import _get_redis_client
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
DEFAULT_QUEUE_NAME = 'queue2_dl'
|
|
DEFAULT_REDIS_CONN_ID = 'redis_default'
|
|
|
|
@task(queue='queue-dl')
|
|
def dispatch_job_to_dl_worker(**context):
|
|
"""
|
|
Triggers a v2 download worker for the 'profile-first' model.
|
|
The worker itself is responsible for locking a profile and finding a suitable task.
|
|
This dispatcher simply starts a worker process.
|
|
"""
|
|
ti = context['task_instance']
|
|
logger.info(f"Download Dispatcher task '{ti.task_id}' running on queue '{ti.queue}'.")
|
|
params = context['params']
|
|
|
|
# Determine the worker-specific queue for affinity
|
|
hostname = socket.gethostname()
|
|
worker_queue = f"queue-dl-{hostname}"
|
|
logger.info(f"Running on worker '{hostname}'. Dispatching a new profile-first worker instance to its dedicated queue '{worker_queue}'.")
|
|
|
|
# Pass all orchestrator params, but remove job_data as the worker finds its own job.
|
|
conf_to_pass = {**params, 'worker_queue': worker_queue}
|
|
conf_to_pass.pop('job_data', None)
|
|
|
|
run_id = f"worker_run_dl_{context['dag_run'].run_id}_{context['ts_nodash']}_q_{worker_queue}"
|
|
|
|
logger.info(f"Triggering 'ytdlp_ops_v02_worker_per_url_dl' with run_id '{run_id}'")
|
|
trigger_dag(
|
|
dag_id='ytdlp_ops_v02_worker_per_url_dl',
|
|
run_id=run_id,
|
|
conf=conf_to_pass,
|
|
replace_microseconds=False
|
|
)
|
|
|
|
with DAG(
|
|
dag_id='ytdlp_ops_v02_dispatcher_dl',
|
|
default_args={'owner': 'airflow', 'retries': 0},
|
|
schedule=None,
|
|
start_date=days_ago(1),
|
|
catchup=False,
|
|
tags=['ytdlp', 'worker', 'dispatcher', 'download'],
|
|
is_paused_upon_creation=True,
|
|
doc_md="""
|
|
### YT-DLP v2 Download Worker Dispatcher (Profile-First)
|
|
|
|
This DAG dispatches a single "profile-first" download worker.
|
|
It does **not** pull a job from a queue. Instead, it triggers the `ytdlp_ops_v02_worker_per_url_dl` DAG,
|
|
which is responsible for locking an available download profile and then finding a matching task
|
|
from the `queue_dl_format_tasks` Redis list.
|
|
""",
|
|
render_template_as_native_obj=True,
|
|
params={
|
|
'queue_name': Param(DEFAULT_QUEUE_NAME, type='string', title='Queue Name', description='The base name of the Redis queue to pull job payloads from.'),
|
|
'redis_conn_id': Param(DEFAULT_REDIS_CONN_ID, type='string', title='Redis Connection ID'),
|
|
},
|
|
) as dag:
|
|
dispatch_job_to_dl_worker()
|