# -*- coding: utf-8 -*- # vim:fenc=utf-8 # # Copyright © 2024 rl # # Distributed under terms of the MIT license. """ Airflow DAG for manually listing the contents of a specific Redis key used by YTDLP queues. """ from airflow import DAG from airflow.exceptions import AirflowException from airflow.models.param import Param from airflow.operators.python import PythonOperator from airflow.providers.redis.hooks.redis import RedisHook from airflow.utils.dates import days_ago from datetime import timedelta import logging import json import redis # Import redis exceptions if needed # Configure logging logger = logging.getLogger(__name__) # Default settings DEFAULT_REDIS_CONN_ID = 'redis_default' # Default to a common inbox pattern, user should override with the specific key DEFAULT_QUEUE_TO_LIST = 'video_queue_inbox' DEFAULT_MAX_ITEMS = 100 # Limit number of items listed by default # --- Helper Function --- def _get_redis_client(redis_conn_id): """Gets a Redis client connection using RedisHook.""" try: hook = RedisHook(redis_conn_id=redis_conn_id) # decode_responses=True removed as it's not supported by get_conn in some environments # We will decode manually where needed. client = hook.get_conn() client.ping() logger.info(f"Successfully connected to Redis using connection '{redis_conn_id}'.") return client except redis.exceptions.AuthenticationError: logger.error(f"Redis authentication failed for connection '{redis_conn_id}'. Check password.") raise AirflowException(f"Redis authentication failed for '{redis_conn_id}'.") except Exception as e: logger.error(f"Failed to get Redis client for connection '{redis_conn_id}': {e}") raise AirflowException(f"Redis connection failed for '{redis_conn_id}': {e}") # --- Python Callable for List Contents Task --- def list_contents_callable(**context): """Lists the contents of the specified Redis key (list or hash).""" params = context['params'] redis_conn_id = params['redis_conn_id'] queue_to_list = params['queue_to_list'] max_items = params.get('max_items', DEFAULT_MAX_ITEMS) if not queue_to_list: raise ValueError("Parameter 'queue_to_list' cannot be empty.") logger.info(f"Attempting to list contents of Redis key '{queue_to_list}' (max: {max_items}) using connection '{redis_conn_id}'.") try: redis_client = _get_redis_client(redis_conn_id) key_type_bytes = redis_client.type(queue_to_list) key_type = key_type_bytes.decode('utf-8') # Decode type if key_type == 'list': list_length = redis_client.llen(queue_to_list) # Get range, respecting max_items (0 to max_items-1) items_to_fetch = min(max_items, list_length) # lrange returns list of bytes, decode each item contents_bytes = redis_client.lrange(queue_to_list, 0, items_to_fetch - 1) contents = [item.decode('utf-8') for item in contents_bytes] logger.info(f"--- Contents of Redis List '{queue_to_list}' (showing first {len(contents)} of {list_length}) ---") for i, item in enumerate(contents): logger.info(f" [{i}]: {item}") # item is now a string if list_length > len(contents): logger.info(f" ... ({list_length - len(contents)} more items not shown)") logger.info(f"--- End of List Contents ---") # Optionally push contents to XCom if small enough # context['task_instance'].xcom_push(key='list_contents', value=contents) elif key_type == 'hash': hash_size = redis_client.hlen(queue_to_list) # HGETALL can be risky for large hashes. Consider HSCAN for production. # For manual inspection, HGETALL is often acceptable. if hash_size > max_items * 2: # Heuristic: avoid huge HGETALL logger.warning(f"Hash '{queue_to_list}' has {hash_size} fields, which is large. Listing might be slow or incomplete. Consider using redis-cli HSCAN.") # Optionally implement HSCAN here for large hashes # hgetall returns dict of bytes keys and bytes values, decode them contents_bytes = redis_client.hgetall(queue_to_list) contents = {k.decode('utf-8'): v.decode('utf-8') for k, v in contents_bytes.items()} logger.info(f"--- Contents of Redis Hash '{queue_to_list}' ({len(contents)} fields) ---") item_count = 0 for key, value in contents.items(): # key and value are now strings if item_count >= max_items: logger.info(f" ... (stopped listing after {max_items} items of {hash_size})") break # Attempt to pretty-print if value is JSON try: parsed_value = json.loads(value) pretty_value = json.dumps(parsed_value, indent=2) logger.info(f" '{key}':\n{pretty_value}") except json.JSONDecodeError: logger.info(f" '{key}': {value}") # Print as string if not JSON item_count += 1 logger.info(f"--- End of Hash Contents ---") # Optionally push contents to XCom if small enough # context['task_instance'].xcom_push(key='hash_contents', value=contents) elif key_type == 'none': logger.info(f"Redis key '{queue_to_list}' does not exist.") else: logger.info(f"Redis key '{queue_to_list}' is of type '{key_type}'. Listing contents for this type is not implemented.") except Exception as e: logger.error(f"Failed to list contents of Redis key '{queue_to_list}': {e}", exc_info=True) raise AirflowException(f"Failed to list Redis key contents: {e}") # --- DAG Definition --- default_args = { 'owner': 'airflow', 'depends_on_past': False, 'email_on_failure': False, 'email_on_retry': False, 'retries': 0, # No retries for manual list operation 'start_date': days_ago(1) } with DAG( dag_id='ytdlp_mgmt_queue_list_contents', default_args=default_args, schedule_interval=None, # Manually triggered catchup=False, description='Manually list the contents of a specific YTDLP Redis queue/key (list or hash).', tags=['ytdlp', 'queue', 'management', 'redis', 'manual', 'list'], params={ 'redis_conn_id': Param(DEFAULT_REDIS_CONN_ID, type="string", description="Airflow Redis connection ID."), 'queue_to_list': Param( DEFAULT_QUEUE_TO_LIST, type="string", description="Exact name of the Redis key (list/hash) to list contents for (e.g., 'video_queue_inbox_account_xyz', 'video_queue_progress', etc.)." ), 'max_items': Param(DEFAULT_MAX_ITEMS, type="integer", description="Maximum number of items/fields to list from the key."), } ) as dag: list_contents_task = PythonOperator( task_id='list_specified_queue_contents', python_callable=list_contents_callable, # Params are implicitly passed via context['params'] ) list_contents_task.doc_md = """ ### List Specified Queue/Key Contents Task Lists the contents of the Redis key specified by `queue_to_list`. - For **Lists** (e.g., `_inbox`), shows the first `max_items`. - For **Hashes** (e.g., `_progress`, `_result`, `_fail`), shows up to `max_items` key-value pairs. Attempts to pretty-print JSON values. - Logs a warning for very large hashes. *Trigger this task manually via the UI.* """