180 lines
7.6 KiB
Python
180 lines
7.6 KiB
Python
# -*- coding: utf-8 -*-
|
|
# vim:fenc=utf-8
|
|
#
|
|
# Copyright © 2024 rl <rl@rlmbp>
|
|
#
|
|
# Distributed under terms of the MIT license.
|
|
|
|
"""
|
|
Airflow DAG for manually checking the status (type and size) of a specific Redis key used by YTDLP queues.
|
|
"""
|
|
|
|
from airflow import DAG
|
|
from airflow.exceptions import AirflowException
|
|
from airflow.models.param import Param
|
|
from airflow.operators.python import PythonOperator
|
|
from airflow.providers.redis.hooks.redis import RedisHook
|
|
from airflow.utils.dates import days_ago
|
|
from datetime import datetime, timedelta, timezone
|
|
import logging
|
|
import json
|
|
import redis # Import redis exceptions if needed
|
|
|
|
# Configure logging
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Default settings
|
|
DEFAULT_REDIS_CONN_ID = 'redis_default'
|
|
DEFAULT_QUEUE_BASE_NAME = 'video_queue'
|
|
DEFAULT_MAX_ITEMS_TO_LIST = 25
|
|
|
|
# Import utility functions
|
|
from utils.redis_utils import _get_redis_client
|
|
|
|
# --- Python Callable for Check and List Task ---
|
|
|
|
def check_and_list_queue_callable(**context):
|
|
"""Checks the type and size of a Redis key and lists its recent contents."""
|
|
params = context['params']
|
|
redis_conn_id = params['redis_conn_id']
|
|
# queue_suffix is passed from the PythonOperator's op_kwargs, which are available in the context
|
|
queue_suffix = context['queue_suffix']
|
|
queue_name = params.get('queue_name', DEFAULT_QUEUE_BASE_NAME)
|
|
queue_to_check = f"{queue_name}{queue_suffix}"
|
|
max_items = int(params.get('max_items_to_list', DEFAULT_MAX_ITEMS_TO_LIST))
|
|
|
|
logger.info(f"--- Checking Status and Contents of Redis Key: '{queue_to_check}' ---")
|
|
logger.info(f"Using connection '{redis_conn_id}', listing up to {max_items} items.")
|
|
|
|
try:
|
|
redis_client = _get_redis_client(redis_conn_id)
|
|
key_type_bytes = redis_client.type(queue_to_check)
|
|
key_type = key_type_bytes.decode('utf-8')
|
|
|
|
if key_type == 'list':
|
|
list_length = redis_client.llen(queue_to_check)
|
|
logger.info(f"Redis key '{queue_to_check}' is a LIST with {list_length} items.")
|
|
if list_length > 0:
|
|
items_to_fetch = min(max_items, list_length)
|
|
# lrange with negative indices gets items from the end (most recent for rpush)
|
|
contents_bytes = redis_client.lrange(queue_to_check, -items_to_fetch, -1)
|
|
contents = [item.decode('utf-8') for item in contents_bytes]
|
|
contents.reverse() # Show most recent first
|
|
logger.info(f"--- Showing most recent {len(contents)} of {list_length} items ---")
|
|
for i, item in enumerate(contents):
|
|
logger.info(f" [recent_{i}]: {item}")
|
|
if list_length > len(contents):
|
|
logger.info(f" ... ({list_length - len(contents)} older items not shown)")
|
|
logger.info(f"--- End of List Contents ---")
|
|
|
|
elif key_type == 'hash':
|
|
hash_size = redis_client.hlen(queue_to_check)
|
|
logger.info(f"Redis key '{queue_to_check}' is a HASH with {hash_size} fields.")
|
|
if hash_size > 0:
|
|
logger.info(f"--- Showing a sample of up to {max_items} fields ---")
|
|
item_count = 0
|
|
# Using hscan_iter to safely iterate over hash fields, count is a hint
|
|
for field_bytes, value_bytes in redis_client.hscan_iter(queue_to_check, count=max_items):
|
|
if item_count >= max_items:
|
|
logger.info(f" ... (stopped listing after {max_items} items of {hash_size})")
|
|
break
|
|
field = field_bytes.decode('utf-8')
|
|
value = value_bytes.decode('utf-8')
|
|
# Try to pretty-print if value is JSON
|
|
try:
|
|
parsed_value = json.loads(value)
|
|
# Check for timestamp to show age
|
|
timestamp = parsed_value.get('end_time') or parsed_value.get('start_time')
|
|
age_str = ""
|
|
if timestamp:
|
|
age_seconds = (datetime.now(timezone.utc) - datetime.fromtimestamp(timestamp, timezone.utc)).total_seconds()
|
|
age_str = f" (age: {timedelta(seconds=age_seconds)})"
|
|
|
|
pretty_value = json.dumps(parsed_value, indent=2)
|
|
logger.info(f" Field '{field}'{age_str}:\n{pretty_value}")
|
|
except (json.JSONDecodeError, TypeError):
|
|
logger.info(f" Field '{field}': {value}")
|
|
item_count += 1
|
|
logger.info(f"--- End of Hash Contents ---")
|
|
|
|
elif key_type == 'none':
|
|
logger.info(f"Redis key '{queue_to_check}' does not exist.")
|
|
else:
|
|
logger.info(f"Redis key '{queue_to_check}' is of type '{key_type}'. Listing contents for this type is not implemented.")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to check/list contents of Redis key '{queue_to_check}': {e}", exc_info=True)
|
|
raise AirflowException(f"Failed to process Redis key: {e}")
|
|
|
|
# --- DAG Definition ---
|
|
default_args = {
|
|
'owner': 'airflow',
|
|
'depends_on_past': False,
|
|
'email_on_failure': False,
|
|
'email_on_retry': False,
|
|
'retries': 0, # No retries for a manual check/list operation
|
|
'start_date': days_ago(1)
|
|
}
|
|
|
|
with DAG(
|
|
dag_id='ytdlp_mgmt_queues_check_status',
|
|
default_args=default_args,
|
|
schedule_interval=None, # Manually triggered
|
|
catchup=False,
|
|
description='Manually check the status and recent items of all YTDLP Redis queues for a given base name.',
|
|
tags=['ytdlp', 'queue', 'management', 'redis', 'manual', 'status', 'list'],
|
|
params={
|
|
'redis_conn_id': Param(DEFAULT_REDIS_CONN_ID, type="string", description="Airflow Redis connection ID."),
|
|
'queue_name': Param(
|
|
DEFAULT_QUEUE_BASE_NAME,
|
|
type="string",
|
|
description="Base name for the Redis queues (e.g., 'video_queue')."
|
|
),
|
|
'max_items_to_list': Param(DEFAULT_MAX_ITEMS_TO_LIST, type="integer", description="Maximum number of recent items/fields to list from each queue."),
|
|
}
|
|
) as dag:
|
|
|
|
check_inbox_queue = PythonOperator(
|
|
task_id='check_inbox_queue',
|
|
python_callable=check_and_list_queue_callable,
|
|
op_kwargs={'queue_suffix': '_inbox'},
|
|
)
|
|
check_inbox_queue.doc_md = """
|
|
### Check Inbox Queue (`_inbox`)
|
|
Checks the status and lists the most recent URLs waiting to be processed.
|
|
The full queue name is `{{ params.queue_name }}_inbox`.
|
|
"""
|
|
|
|
check_progress_queue = PythonOperator(
|
|
task_id='check_progress_queue',
|
|
python_callable=check_and_list_queue_callable,
|
|
op_kwargs={'queue_suffix': '_progress'},
|
|
)
|
|
check_progress_queue.doc_md = """
|
|
### Check Progress Queue (`_progress`)
|
|
Checks the status and lists a sample of URLs currently being processed.
|
|
The full queue name is `{{ params.queue_name }}_progress`.
|
|
"""
|
|
|
|
check_result_queue = PythonOperator(
|
|
task_id='check_result_queue',
|
|
python_callable=check_and_list_queue_callable,
|
|
op_kwargs={'queue_suffix': '_result'},
|
|
)
|
|
check_result_queue.doc_md = """
|
|
### Check Result Queue (`_result`)
|
|
Checks the status and lists a sample of successfully processed URLs.
|
|
The full queue name is `{{ params.queue_name }}_result`.
|
|
"""
|
|
|
|
check_fail_queue = PythonOperator(
|
|
task_id='check_fail_queue',
|
|
python_callable=check_and_list_queue_callable,
|
|
op_kwargs={'queue_suffix': '_fail'},
|
|
)
|
|
check_fail_queue.doc_md = """
|
|
### Check Fail Queue (`_fail`)
|
|
Checks the status and lists a sample of failed URLs.
|
|
The full queue name is `{{ params.queue_name }}_fail`.
|
|
"""
|