yt-dlp-dags/ytops_client/locking_download_emulator_tool.py

#!/usr/bin/env python3
"""
Standalone worker tool for the distributed download simulation.
This tool is responsible for the "lock-execute-unlock" workflow for a single
download task based on an info.json file. It's designed to be called by an
orchestrator like `stress_policy_tool.py`.
"""

import argparse
import json
import logging
import os
import sys
import time
from copy import deepcopy

try:
    from dotenv import load_dotenv
except ImportError:
    load_dotenv = None

# Temporarily add project root to path to allow importing from sibling packages
script_dir = os.path.dirname(os.path.abspath(__file__))
project_root = os.path.abspath(os.path.join(script_dir, '..'))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

from ytops_client.profile_manager_tool import ProfileManager
from ytops_client.stress_policy import utils as sp_utils
from ytops_client.stress_policy.state_manager import StateManager
from ytops_client.stress_policy.utils import load_policy, apply_overrides
from ytops_client.stress_policy.workers import _run_download_logic
from ytops_client.stress_policy_tool import shutdown_event

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)


def add_locking_download_emulator_parser(subparsers):
    """Adds the parser for the 'download-emulator' command."""
    parser = subparsers.add_parser(
        'download-emulator',
        help='(Internal) Standalone download worker.',
        description='Internal tool to run a single download task with profile locking. Not intended for direct user invocation.'
    )
    # Since this is an internal tool, we expect one subcommand.
    download_subparsers = parser.add_subparsers(dest='download_emulator_command', help='Action to perform', required=True)

    run_parser = download_subparsers.add_parser(
        'lock-and-run',
        help='Lock a profile, run a download, and unlock it.',
        formatter_class=argparse.RawTextHelpFormatter
    )
    run_parser.add_argument('--policy-file', required=True, help='Path to the YAML policy file.')
    run_parser.add_argument('--info-json-path', required=True, help='Path to the info.json file to process.')
    run_parser.add_argument('--set', action='append', default=[], help="Override a policy setting using 'key.subkey=value'.")

    # Redis connection arguments, to be passed from the orchestrator
    redis_group = run_parser.add_argument_group('Redis Connection')
    redis_group.add_argument('--env-file', help='Path to a .env file.')
    redis_group.add_argument('--redis-host', help='Redis host.')
    redis_group.add_argument('--redis-port', type=int, help='Redis port.')
    redis_group.add_argument('--redis-password', help='Redis password.')
    redis_group.add_argument('--env', help="Environment name for Redis key prefix.")
    redis_group.add_argument('--key-prefix', help='Explicit key prefix for Redis.')

    run_parser.add_argument('--verbose', action='store_true', help='Enable verbose logging.')


def main_locking_download_emulator(args):
    """Main logic for the 'download-emulator' tool."""
    if args.verbose:
        logging.getLogger().setLevel(logging.DEBUG)

    # --- Load Policy ---
    policy = load_policy(args.policy_file)
    if not policy:
        return 1
    policy = apply_overrides(policy, args.set)

    # --- Load .env if specified ---
    if load_dotenv and args.env_file:
        if load_dotenv(args.env_file):
            logger.info(f"Loaded environment variables from {args.env_file}")
        else:
            logger.error(f"Specified --env-file was not found: {args.env_file}")
            return 1

    # --- Setup ProfileManager ---
    redis_host = args.redis_host or os.getenv('REDIS_HOST', 'localhost')
    redis_port = args.redis_port or int(os.getenv('REDIS_PORT', 6379))
    redis_password = args.redis_password or os.getenv('REDIS_PASSWORD')

    if args.key_prefix:
        key_prefix = args.key_prefix
    elif args.env:
        key_prefix = f"{args.env}_profile_mgmt_"
    else:
        logger.error("Must provide --env or --key-prefix for Redis connection.")
        return 1

    manager = ProfileManager(
        redis_host=redis_host,
        redis_port=redis_port,
        redis_password=redis_password,
        key_prefix=key_prefix
    )

    download_policy = policy.get('download_policy', {})
    profile_prefix = download_policy.get('profile_prefix')
    if not profile_prefix:
        logger.error("Policy file must specify 'download_policy.profile_prefix'.")
        return 1

    # --- Main Lock-Execute-Unlock Logic ---
    owner_id = f"dl-emulator-{os.getpid()}"
    locked_profile = None
    lock_attempts = 0

    try:
        # --- 1. Lock a profile (with wait & backoff) ---
        while not shutdown_event.is_set():
            locked_profile = manager.lock_profile(owner=owner_id, profile_prefix=profile_prefix)
            if locked_profile:
                logger.info(f"Locked profile '{locked_profile['name']}' with proxy '{locked_profile['proxy']}'.")
                break

            # Simplified wait logic from stress_policy_tool
            backoff_seconds = [3, 5, 9, 20, 50]
            sleep_duration = backoff_seconds[min(lock_attempts, len(backoff_seconds) - 1)]
            logger.info(f"No download profiles available. Waiting {sleep_duration}s... (attempt {lock_attempts + 1})")
            time.sleep(sleep_duration)
            lock_attempts += 1

        if not locked_profile:
            logger.warning("Could not lock a profile; shutting down.")
            return 1

        # --- 2. Read info.json ---
        try:
            with open(args.info_json_path, 'r', encoding='utf-8') as f:
                info_json_content = f.read()
        except (IOError, FileNotFoundError) as e:
            logger.error(f"Could not read info.json file '{args.info_json_path}': {e}")
            return 1

        # --- 3. Execute download logic ---
        # The locked profile's proxy MUST be used for the download.
        local_policy = deepcopy(policy)
        local_policy.setdefault('download_policy', {})['proxy'] = locked_profile['proxy']

        # The StateManager is used by _run_download_logic for rate limiting and cooldowns,
        # but for this standalone worker, we don't need its persistence features.
        # We disable log writing to prevent creating state files.
        dummy_state_manager = StateManager(policy_name="locking_emulator_run", disable_log_writing=True)

        results = _run_download_logic(
            source=args.info_json_path,
            info_json_content=info_json_content,
            policy=local_policy,
            state_manager=dummy_state_manager,
            args=args, # Pass orchestrator args through
            profile_name=locked_profile['name'],
            profile_manager_instance=manager
        )

        # --- 4. Record overall task activity ---
        # Note: Download-specific activity ('download'/'download_error') is recorded
        # inside _run_download_logic -> process_info_json_cycle.
        download_success = all(r.get('success') for r in results) if results else False
        activity_type = 'success' if download_success else 'failure'
        manager.record_activity(locked_profile['name'], activity_type)

        logger.info(f"Finished processing '{sp_utils.get_display_name(args.info_json_path)}' with profile '{locked_profile['name']}'. Overall success: {download_success}")

        return 0 if download_success else 1

    except Exception as e:
        logger.error(f"An unexpected error occurred: {e}", exc_info=True)
        return 1
    finally:
        # --- 5. Unlock the profile ---
        if locked_profile:
            cooldown_str = manager.get_config('unlock_cooldown_seconds')
            cooldown_seconds = int(cooldown_str) if cooldown_str and cooldown_str.isdigit() else None

            if cooldown_seconds and cooldown_seconds > 0:
                logger.info(f"Unlocking profile '{locked_profile['name']}' with a {cooldown_seconds}s cooldown.")
                manager.unlock_profile(locked_profile['name'], owner=owner_id, rest_for_seconds=cooldown_seconds)
            else:
                logger.info(f"Unlocking profile '{locked_profile['name']}'.")
                manager.unlock_profile(locked_profile['name'], owner=owner_id)

if __name__ == '__main__':
    # This is a simplified parser setup for direct execution,
    # the real one is defined in `add_..._parser` for the main CLI.
    parser = argparse.ArgumentParser()
    subparsers = parser.add_subparsers(dest='command')
    add_locking_download_emulator_parser(subparsers)
    args = parser.parse_args()

    if hasattr(args, 'download_emulator_command'):
        sys.exit(main_locking_download_emulator(args))
    else:
        parser.print_help()