yt-dlp-dags/airflow/config/custom_task_hooks.py

59 lines
3.0 KiB
Python

# Version: 2025-09-22-08
__version__ = "2025-09-22-08"
# This file contains custom hooks for the Airflow environment.
import logging
from airflow import settings
from airflow.configuration import conf
logger = logging.getLogger(__name__)
def task_instance_mutation_hook(ti):
"""
This hook modifies the task instance queue at runtime for worker pinning.
It relies exclusively on parsing the queue from the run_id, which is guaranteed
to be set by the dispatcher DAG. This avoids database race conditions.
"""
logger.debug(f"MUTATION HOOK: Running for dag '{ti.dag_id}', task '{ti.task_id}'.")
# This hook targets all worker DAGs, which follow a naming convention.
if 'worker_per_url' in ti.dag_id:
# If the run_id isn't populated yet, just return. The hook may be called again.
if not ti.run_id:
logger.debug(f"MUTATION HOOK: run_id not yet available for task '{ti.task_id}'. Skipping this invocation.")
return
logger.debug(f"MUTATION HOOK: Matched DAG '{ti.dag_id}'. Attempting to pin task '{ti.task_id}' for run_id '{ti.run_id}'.")
worker_queue = None
# The dispatcher embeds the queue in the run_id like: ..._q_queue-dl-worker-hostname
if ti.run_id and '_q_' in ti.run_id:
try:
parsed_queue = ti.run_id.split('_q_')[-1]
# Check for valid v1 (dl) or v2 (auth/dl) queue prefixes.
if parsed_queue.startswith(('queue-dl-', 'queue-auth-')):
worker_queue = parsed_queue
except Exception as e:
logger.error(f"MUTATION HOOK: CRITICAL: Error parsing queue from run_id '{ti.run_id}': {e}.", exc_info=True)
if worker_queue:
logger.debug(f"MUTATION HOOK: Pinning task '{ti.task_id}' (run_id: {ti.run_id}) to queue '{worker_queue}' from run_id.")
ti.queue = worker_queue
else:
# If the queue is not found, it's a critical failure in the dispatching logic.
# We fall back to the default queue but log it as a high-severity warning.
fallback_queue = 'queue-auth' if 'auth' in ti.dag_id else 'queue-dl'
logger.warning(f"MUTATION HOOK: Could not find worker queue in run_id '{ti.run_id}'. Falling back to '{fallback_queue}'. Pinning will fail.")
ti.queue = fallback_queue
# --- Hook Registration ---
# This registration logic is placed here to work around environments where this file
# might be loaded directly as the local settings file via AIRFLOW__CORE__LOCAL_SETTINGS_PATH.
try:
if not conf.get('core', 'executor').lower().startswith('debug'):
settings.task_instance_mutation_hook = task_instance_mutation_hook
logger.info(f"Successfully self-registered task_instance_mutation_hook (Version: {__version__}) for worker pinning.")
else:
logger.info("Skipping self-registration of task_instance_mutation_hook due to DebugExecutor.")
except Exception as e:
logger.warning(f"Could not self-register custom_task_hooks: {e}. Worker pinning may not function.", exc_info=True)