yt-dlp-dags/dags/ytdlp_mgmt_queue_check_status.py
2025-07-18 17:17:19 +03:00

180 lines
7.6 KiB
Python

# -*- coding: utf-8 -*-
# vim:fenc=utf-8
#
# Copyright © 2024 rl <rl@rlmbp>
#
# Distributed under terms of the MIT license.
"""
Airflow DAG for manually checking the status (type and size) of a specific Redis key used by YTDLP queues.
"""
from airflow import DAG
from airflow.exceptions import AirflowException
from airflow.models.param import Param
from airflow.operators.python import PythonOperator
from airflow.providers.redis.hooks.redis import RedisHook
from airflow.utils.dates import days_ago
from datetime import datetime, timedelta, timezone
import logging
import json
import redis # Import redis exceptions if needed
# Configure logging
logger = logging.getLogger(__name__)
# Default settings
DEFAULT_REDIS_CONN_ID = 'redis_default'
DEFAULT_QUEUE_BASE_NAME = 'video_queue'
DEFAULT_MAX_ITEMS_TO_LIST = 25
# Import utility functions
from utils.redis_utils import _get_redis_client
# --- Python Callable for Check and List Task ---
def check_and_list_queue_callable(**context):
"""Checks the type and size of a Redis key and lists its recent contents."""
params = context['params']
redis_conn_id = params['redis_conn_id']
# queue_suffix is passed from the PythonOperator's op_kwargs, which are available in the context
queue_suffix = context['queue_suffix']
queue_name = params.get('queue_name', DEFAULT_QUEUE_BASE_NAME)
queue_to_check = f"{queue_name}{queue_suffix}"
max_items = int(params.get('max_items_to_list', DEFAULT_MAX_ITEMS_TO_LIST))
logger.info(f"--- Checking Status and Contents of Redis Key: '{queue_to_check}' ---")
logger.info(f"Using connection '{redis_conn_id}', listing up to {max_items} items.")
try:
redis_client = _get_redis_client(redis_conn_id)
key_type_bytes = redis_client.type(queue_to_check)
key_type = key_type_bytes.decode('utf-8')
if key_type == 'list':
list_length = redis_client.llen(queue_to_check)
logger.info(f"Redis key '{queue_to_check}' is a LIST with {list_length} items.")
if list_length > 0:
items_to_fetch = min(max_items, list_length)
# lrange with negative indices gets items from the end (most recent for rpush)
contents_bytes = redis_client.lrange(queue_to_check, -items_to_fetch, -1)
contents = [item.decode('utf-8') for item in contents_bytes]
contents.reverse() # Show most recent first
logger.info(f"--- Showing most recent {len(contents)} of {list_length} items ---")
for i, item in enumerate(contents):
logger.info(f" [recent_{i}]: {item}")
if list_length > len(contents):
logger.info(f" ... ({list_length - len(contents)} older items not shown)")
logger.info(f"--- End of List Contents ---")
elif key_type == 'hash':
hash_size = redis_client.hlen(queue_to_check)
logger.info(f"Redis key '{queue_to_check}' is a HASH with {hash_size} fields.")
if hash_size > 0:
logger.info(f"--- Showing a sample of up to {max_items} fields ---")
item_count = 0
# Using hscan_iter to safely iterate over hash fields, count is a hint
for field_bytes, value_bytes in redis_client.hscan_iter(queue_to_check, count=max_items):
if item_count >= max_items:
logger.info(f" ... (stopped listing after {max_items} items of {hash_size})")
break
field = field_bytes.decode('utf-8')
value = value_bytes.decode('utf-8')
# Try to pretty-print if value is JSON
try:
parsed_value = json.loads(value)
# Check for timestamp to show age
timestamp = parsed_value.get('end_time') or parsed_value.get('start_time')
age_str = ""
if timestamp:
age_seconds = (datetime.now(timezone.utc) - datetime.fromtimestamp(timestamp, timezone.utc)).total_seconds()
age_str = f" (age: {timedelta(seconds=age_seconds)})"
pretty_value = json.dumps(parsed_value, indent=2)
logger.info(f" Field '{field}'{age_str}:\n{pretty_value}")
except (json.JSONDecodeError, TypeError):
logger.info(f" Field '{field}': {value}")
item_count += 1
logger.info(f"--- End of Hash Contents ---")
elif key_type == 'none':
logger.info(f"Redis key '{queue_to_check}' does not exist.")
else:
logger.info(f"Redis key '{queue_to_check}' is of type '{key_type}'. Listing contents for this type is not implemented.")
except Exception as e:
logger.error(f"Failed to check/list contents of Redis key '{queue_to_check}': {e}", exc_info=True)
raise AirflowException(f"Failed to process Redis key: {e}")
# --- DAG Definition ---
default_args = {
'owner': 'airflow',
'depends_on_past': False,
'email_on_failure': False,
'email_on_retry': False,
'retries': 0, # No retries for a manual check/list operation
'start_date': days_ago(1)
}
with DAG(
dag_id='ytdlp_mgmt_queues_check_status',
default_args=default_args,
schedule_interval=None, # Manually triggered
catchup=False,
description='Manually check the status and recent items of all YTDLP Redis queues for a given base name.',
tags=['ytdlp', 'queue', 'management', 'redis', 'manual', 'status', 'list'],
params={
'redis_conn_id': Param(DEFAULT_REDIS_CONN_ID, type="string", description="Airflow Redis connection ID."),
'queue_name': Param(
DEFAULT_QUEUE_BASE_NAME,
type="string",
description="Base name for the Redis queues (e.g., 'video_queue')."
),
'max_items_to_list': Param(DEFAULT_MAX_ITEMS_TO_LIST, type="integer", description="Maximum number of recent items/fields to list from each queue."),
}
) as dag:
check_inbox_queue = PythonOperator(
task_id='check_inbox_queue',
python_callable=check_and_list_queue_callable,
op_kwargs={'queue_suffix': '_inbox'},
)
check_inbox_queue.doc_md = """
### Check Inbox Queue (`_inbox`)
Checks the status and lists the most recent URLs waiting to be processed.
The full queue name is `{{ params.queue_name }}_inbox`.
"""
check_progress_queue = PythonOperator(
task_id='check_progress_queue',
python_callable=check_and_list_queue_callable,
op_kwargs={'queue_suffix': '_progress'},
)
check_progress_queue.doc_md = """
### Check Progress Queue (`_progress`)
Checks the status and lists a sample of URLs currently being processed.
The full queue name is `{{ params.queue_name }}_progress`.
"""
check_result_queue = PythonOperator(
task_id='check_result_queue',
python_callable=check_and_list_queue_callable,
op_kwargs={'queue_suffix': '_result'},
)
check_result_queue.doc_md = """
### Check Result Queue (`_result`)
Checks the status and lists a sample of successfully processed URLs.
The full queue name is `{{ params.queue_name }}_result`.
"""
check_fail_queue = PythonOperator(
task_id='check_fail_queue',
python_callable=check_and_list_queue_callable,
op_kwargs={'queue_suffix': '_fail'},
)
check_fail_queue.doc_md = """
### Check Fail Queue (`_fail`)
Checks the status and lists a sample of failed URLs.
The full queue name is `{{ params.queue_name }}_fail`.
"""