yt-dlp-dags/dags/ytdlp_mgmt_queue_list_contents.py

168 lines
7.8 KiB
Python

# -*- coding: utf-8 -*-
# vim:fenc=utf-8
#
# Copyright © 2024 rl <rl@rlmbp>
#
# Distributed under terms of the MIT license.
"""
Airflow DAG for manually listing the contents of a specific Redis key used by YTDLP queues.
"""
from airflow import DAG
from airflow.exceptions import AirflowException
from airflow.models.param import Param
from airflow.operators.python import PythonOperator
from airflow.providers.redis.hooks.redis import RedisHook
from airflow.utils.dates import days_ago
from datetime import timedelta
import logging
import json
import redis # Import redis exceptions if needed
# Configure logging
logger = logging.getLogger(__name__)
# Default settings
DEFAULT_REDIS_CONN_ID = 'redis_default'
# Default to a common inbox pattern, user should override with the specific key
DEFAULT_QUEUE_TO_LIST = 'video_queue_inbox'
DEFAULT_MAX_ITEMS = 10 # Limit number of items listed by default
# --- Helper Function ---
def _get_redis_client(redis_conn_id):
"""Gets a Redis client connection using RedisHook."""
try:
hook = RedisHook(redis_conn_id=redis_conn_id)
# decode_responses=True removed as it's not supported by get_conn in some environments
# We will decode manually where needed.
client = hook.get_conn()
client.ping()
logger.info(f"Successfully connected to Redis using connection '{redis_conn_id}'.")
return client
except redis.exceptions.AuthenticationError:
logger.error(f"Redis authentication failed for connection '{redis_conn_id}'. Check password.")
raise AirflowException(f"Redis authentication failed for '{redis_conn_id}'.")
except Exception as e:
logger.error(f"Failed to get Redis client for connection '{redis_conn_id}': {e}")
raise AirflowException(f"Redis connection failed for '{redis_conn_id}': {e}")
# --- Python Callable for List Contents Task ---
def list_contents_callable(**context):
"""Lists the contents of the specified Redis key (list or hash)."""
params = context['params']
redis_conn_id = params['redis_conn_id']
queue_to_list = params['queue_to_list']
max_items = params.get('max_items', DEFAULT_MAX_ITEMS)
if not queue_to_list:
raise ValueError("Parameter 'queue_to_list' cannot be empty.")
logger.info(f"Attempting to list contents of Redis key '{queue_to_list}' (max: {max_items}) using connection '{redis_conn_id}'.")
try:
redis_client = _get_redis_client(redis_conn_id)
key_type_bytes = redis_client.type(queue_to_list)
key_type = key_type_bytes.decode('utf-8') # Decode type
if key_type == 'list':
list_length = redis_client.llen(queue_to_list)
# Get the last N items, which are the most recently added with rpush
items_to_fetch = min(max_items, list_length)
# lrange with negative indices gets items from the end of the list.
# -N to -1 gets the last N items.
contents_bytes = redis_client.lrange(queue_to_list, -items_to_fetch, -1)
contents = [item.decode('utf-8') for item in contents_bytes]
# Reverse the list so the absolute most recent item is printed first
contents.reverse()
logger.info(f"--- Contents of Redis List '{queue_to_list}' (showing most recent {len(contents)} of {list_length}) ---")
for i, item in enumerate(contents):
# The index here is just for display, 0 is the most recent
logger.info(f" [recent_{i}]: {item}")
if list_length > len(contents):
logger.info(f" ... ({list_length - len(contents)} older items not shown)")
logger.info(f"--- End of List Contents ---")
# Optionally push contents to XCom if small enough
# context['task_instance'].xcom_push(key='list_contents', value=contents)
elif key_type == 'hash':
hash_size = redis_client.hlen(queue_to_list)
# HGETALL can be risky for large hashes. Consider HSCAN for production.
# For manual inspection, HGETALL is often acceptable.
if hash_size > max_items * 2: # Heuristic: avoid huge HGETALL
logger.warning(f"Hash '{queue_to_list}' has {hash_size} fields, which is large. Listing might be slow or incomplete. Consider using redis-cli HSCAN.")
# Optionally implement HSCAN here for large hashes
# hgetall returns dict of bytes keys and bytes values, decode them
contents_bytes = redis_client.hgetall(queue_to_list)
contents = {k.decode('utf-8'): v.decode('utf-8') for k, v in contents_bytes.items()}
logger.info(f"--- Contents of Redis Hash '{queue_to_list}' ({len(contents)} fields) ---")
item_count = 0
for key, value in contents.items(): # key and value are now strings
if item_count >= max_items:
logger.info(f" ... (stopped listing after {max_items} items of {hash_size})")
break
# Attempt to pretty-print if value is JSON
try:
parsed_value = json.loads(value)
pretty_value = json.dumps(parsed_value, indent=2)
logger.info(f" '{key}':\n{pretty_value}")
except json.JSONDecodeError:
logger.info(f" '{key}': {value}") # Print as string if not JSON
item_count += 1
logger.info(f"--- End of Hash Contents ---")
# Optionally push contents to XCom if small enough
# context['task_instance'].xcom_push(key='hash_contents', value=contents)
elif key_type == 'none':
logger.info(f"Redis key '{queue_to_list}' does not exist.")
else:
logger.info(f"Redis key '{queue_to_list}' is of type '{key_type}'. Listing contents for this type is not implemented.")
except Exception as e:
logger.error(f"Failed to list contents of Redis key '{queue_to_list}': {e}", exc_info=True)
raise AirflowException(f"Failed to list Redis key contents: {e}")
# --- DAG Definition ---
default_args = {
'owner': 'airflow',
'depends_on_past': False,
'email_on_failure': False,
'email_on_retry': False,
'retries': 0, # No retries for manual list operation
'start_date': days_ago(1)
}
with DAG(
dag_id='ytdlp_mgmt_queue_list_contents',
default_args=default_args,
schedule_interval=None, # Manually triggered
catchup=False,
description='Manually list the contents of a specific YTDLP Redis queue/key (list or hash).',
tags=['ytdlp', 'queue', 'management', 'redis', 'manual', 'list'],
params={
'redis_conn_id': Param(DEFAULT_REDIS_CONN_ID, type="string", description="Airflow Redis connection ID."),
'queue_to_list': Param(
DEFAULT_QUEUE_TO_LIST,
type="string",
description="Exact name of the Redis key (list/hash) to list contents for (e.g., 'video_queue_inbox_account_xyz', 'video_queue_progress', etc.)."
),
'max_items': Param(DEFAULT_MAX_ITEMS, type="integer", description="Maximum number of items/fields to list. For lists, shows the most recent items."),
}
) as dag:
list_contents_task = PythonOperator(
task_id='list_specified_queue_contents',
python_callable=list_contents_callable,
# Params are implicitly passed via context['params']
)
list_contents_task.doc_md = """
### List Specified Queue/Key Contents Task
Lists the contents of the Redis key specified by `queue_to_list`.
- For **Lists** (e.g., `_inbox`), shows the first `max_items`.
- For **Hashes** (e.g., `_progress`, `_result`, `_fail`), shows up to `max_items` key-value pairs. Attempts to pretty-print JSON values.
- Logs a warning for very large hashes.
*Trigger this task manually via the UI.*
"""