164 lines
7.5 KiB
Python
164 lines
7.5 KiB
Python
# -*- coding: utf-8 -*-
|
|
# vim:fenc=utf-8
|
|
#
|
|
# Copyright © 2024 rl <rl@rlmbp>
|
|
#
|
|
# Distributed under terms of the MIT license.
|
|
|
|
"""
|
|
Airflow DAG for manually listing the contents of a specific Redis key used by YTDLP queues.
|
|
"""
|
|
|
|
from airflow import DAG
|
|
from airflow.exceptions import AirflowException
|
|
from airflow.models.param import Param
|
|
from airflow.operators.python import PythonOperator
|
|
from airflow.providers.redis.hooks.redis import RedisHook
|
|
from airflow.utils.dates import days_ago
|
|
from datetime import timedelta
|
|
import logging
|
|
import json
|
|
import redis # Import redis exceptions if needed
|
|
|
|
# Configure logging
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Default settings
|
|
DEFAULT_REDIS_CONN_ID = 'redis_default'
|
|
# Default to a common inbox pattern, user should override with the specific key
|
|
DEFAULT_QUEUE_TO_LIST = 'video_queue_inbox'
|
|
DEFAULT_MAX_ITEMS = 100 # Limit number of items listed by default
|
|
|
|
# --- Helper Function ---
|
|
|
|
def _get_redis_client(redis_conn_id):
|
|
"""Gets a Redis client connection using RedisHook."""
|
|
try:
|
|
hook = RedisHook(redis_conn_id=redis_conn_id)
|
|
# decode_responses=True removed as it's not supported by get_conn in some environments
|
|
# We will decode manually where needed.
|
|
client = hook.get_conn()
|
|
client.ping()
|
|
logger.info(f"Successfully connected to Redis using connection '{redis_conn_id}'.")
|
|
return client
|
|
except redis.exceptions.AuthenticationError:
|
|
logger.error(f"Redis authentication failed for connection '{redis_conn_id}'. Check password.")
|
|
raise AirflowException(f"Redis authentication failed for '{redis_conn_id}'.")
|
|
except Exception as e:
|
|
logger.error(f"Failed to get Redis client for connection '{redis_conn_id}': {e}")
|
|
raise AirflowException(f"Redis connection failed for '{redis_conn_id}': {e}")
|
|
|
|
# --- Python Callable for List Contents Task ---
|
|
|
|
def list_contents_callable(**context):
|
|
"""Lists the contents of the specified Redis key (list or hash)."""
|
|
params = context['params']
|
|
redis_conn_id = params['redis_conn_id']
|
|
queue_to_list = params['queue_to_list']
|
|
max_items = params.get('max_items', DEFAULT_MAX_ITEMS)
|
|
|
|
if not queue_to_list:
|
|
raise ValueError("Parameter 'queue_to_list' cannot be empty.")
|
|
|
|
logger.info(f"Attempting to list contents of Redis key '{queue_to_list}' (max: {max_items}) using connection '{redis_conn_id}'.")
|
|
try:
|
|
redis_client = _get_redis_client(redis_conn_id)
|
|
key_type_bytes = redis_client.type(queue_to_list)
|
|
key_type = key_type_bytes.decode('utf-8') # Decode type
|
|
|
|
if key_type == 'list':
|
|
list_length = redis_client.llen(queue_to_list)
|
|
# Get range, respecting max_items (0 to max_items-1)
|
|
items_to_fetch = min(max_items, list_length)
|
|
# lrange returns list of bytes, decode each item
|
|
contents_bytes = redis_client.lrange(queue_to_list, 0, items_to_fetch - 1)
|
|
contents = [item.decode('utf-8') for item in contents_bytes]
|
|
logger.info(f"--- Contents of Redis List '{queue_to_list}' (showing first {len(contents)} of {list_length}) ---")
|
|
for i, item in enumerate(contents):
|
|
logger.info(f" [{i}]: {item}") # item is now a string
|
|
if list_length > len(contents):
|
|
logger.info(f" ... ({list_length - len(contents)} more items not shown)")
|
|
logger.info(f"--- End of List Contents ---")
|
|
# Optionally push contents to XCom if small enough
|
|
# context['task_instance'].xcom_push(key='list_contents', value=contents)
|
|
|
|
elif key_type == 'hash':
|
|
hash_size = redis_client.hlen(queue_to_list)
|
|
# HGETALL can be risky for large hashes. Consider HSCAN for production.
|
|
# For manual inspection, HGETALL is often acceptable.
|
|
if hash_size > max_items * 2: # Heuristic: avoid huge HGETALL
|
|
logger.warning(f"Hash '{queue_to_list}' has {hash_size} fields, which is large. Listing might be slow or incomplete. Consider using redis-cli HSCAN.")
|
|
# Optionally implement HSCAN here for large hashes
|
|
# hgetall returns dict of bytes keys and bytes values, decode them
|
|
contents_bytes = redis_client.hgetall(queue_to_list)
|
|
contents = {k.decode('utf-8'): v.decode('utf-8') for k, v in contents_bytes.items()}
|
|
logger.info(f"--- Contents of Redis Hash '{queue_to_list}' ({len(contents)} fields) ---")
|
|
item_count = 0
|
|
for key, value in contents.items(): # key and value are now strings
|
|
if item_count >= max_items:
|
|
logger.info(f" ... (stopped listing after {max_items} items of {hash_size})")
|
|
break
|
|
# Attempt to pretty-print if value is JSON
|
|
try:
|
|
parsed_value = json.loads(value)
|
|
pretty_value = json.dumps(parsed_value, indent=2)
|
|
logger.info(f" '{key}':\n{pretty_value}")
|
|
except json.JSONDecodeError:
|
|
logger.info(f" '{key}': {value}") # Print as string if not JSON
|
|
item_count += 1
|
|
logger.info(f"--- End of Hash Contents ---")
|
|
# Optionally push contents to XCom if small enough
|
|
# context['task_instance'].xcom_push(key='hash_contents', value=contents)
|
|
|
|
elif key_type == 'none':
|
|
logger.info(f"Redis key '{queue_to_list}' does not exist.")
|
|
else:
|
|
logger.info(f"Redis key '{queue_to_list}' is of type '{key_type}'. Listing contents for this type is not implemented.")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to list contents of Redis key '{queue_to_list}': {e}", exc_info=True)
|
|
raise AirflowException(f"Failed to list Redis key contents: {e}")
|
|
|
|
# --- DAG Definition ---
|
|
default_args = {
|
|
'owner': 'airflow',
|
|
'depends_on_past': False,
|
|
'email_on_failure': False,
|
|
'email_on_retry': False,
|
|
'retries': 0, # No retries for manual list operation
|
|
'start_date': days_ago(1)
|
|
}
|
|
|
|
with DAG(
|
|
dag_id='ytdlp_mgmt_queue_list_contents',
|
|
default_args=default_args,
|
|
schedule_interval=None, # Manually triggered
|
|
catchup=False,
|
|
description='Manually list the contents of a specific YTDLP Redis queue/key (list or hash).',
|
|
tags=['ytdlp', 'queue', 'management', 'redis', 'manual', 'list'],
|
|
params={
|
|
'redis_conn_id': Param(DEFAULT_REDIS_CONN_ID, type="string", description="Airflow Redis connection ID."),
|
|
'queue_to_list': Param(
|
|
DEFAULT_QUEUE_TO_LIST,
|
|
type="string",
|
|
description="Exact name of the Redis key (list/hash) to list contents for (e.g., 'video_queue_inbox_account_xyz', 'video_queue_progress', etc.)."
|
|
),
|
|
'max_items': Param(DEFAULT_MAX_ITEMS, type="integer", description="Maximum number of items/fields to list from the key."),
|
|
}
|
|
) as dag:
|
|
|
|
list_contents_task = PythonOperator(
|
|
task_id='list_specified_queue_contents',
|
|
python_callable=list_contents_callable,
|
|
# Params are implicitly passed via context['params']
|
|
)
|
|
list_contents_task.doc_md = """
|
|
### List Specified Queue/Key Contents Task
|
|
Lists the contents of the Redis key specified by `queue_to_list`.
|
|
- For **Lists** (e.g., `_inbox`), shows the first `max_items`.
|
|
- For **Hashes** (e.g., `_progress`, `_result`, `_fail`), shows up to `max_items` key-value pairs. Attempts to pretty-print JSON values.
|
|
- Logs a warning for very large hashes.
|
|
|
|
*Trigger this task manually via the UI.*
|
|
"""
|