yt-dlp-dags/ytops_client/locking_download_emulator_tool.py

210 lines
8.7 KiB
Python

#!/usr/bin/env python3
"""
Standalone worker tool for the distributed download simulation.
This tool is responsible for the "lock-execute-unlock" workflow for a single
download task based on an info.json file. It's designed to be called by an
orchestrator like `stress_policy_tool.py`.
"""
import argparse
import json
import logging
import os
import sys
import time
from copy import deepcopy
try:
from dotenv import load_dotenv
except ImportError:
load_dotenv = None
# Temporarily add project root to path to allow importing from sibling packages
script_dir = os.path.dirname(os.path.abspath(__file__))
project_root = os.path.abspath(os.path.join(script_dir, '..'))
if project_root not in sys.path:
sys.path.insert(0, project_root)
from ytops_client.profile_manager_tool import ProfileManager
from ytops_client.stress_policy import utils as sp_utils
from ytops_client.stress_policy.state_manager import StateManager
from ytops_client.stress_policy.utils import load_policy, apply_overrides
from ytops_client.stress_policy.workers import _run_download_logic
from ytops_client.stress_policy_tool import shutdown_event
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
def add_locking_download_emulator_parser(subparsers):
"""Adds the parser for the 'download-emulator' command."""
parser = subparsers.add_parser(
'download-emulator',
help='(Internal) Standalone download worker.',
description='Internal tool to run a single download task with profile locking. Not intended for direct user invocation.'
)
# Since this is an internal tool, we expect one subcommand.
download_subparsers = parser.add_subparsers(dest='download_emulator_command', help='Action to perform', required=True)
run_parser = download_subparsers.add_parser(
'lock-and-run',
help='Lock a profile, run a download, and unlock it.',
formatter_class=argparse.RawTextHelpFormatter
)
run_parser.add_argument('--policy-file', required=True, help='Path to the YAML policy file.')
run_parser.add_argument('--info-json-path', required=True, help='Path to the info.json file to process.')
run_parser.add_argument('--set', action='append', default=[], help="Override a policy setting using 'key.subkey=value'.")
# Redis connection arguments, to be passed from the orchestrator
redis_group = run_parser.add_argument_group('Redis Connection')
redis_group.add_argument('--env-file', help='Path to a .env file.')
redis_group.add_argument('--redis-host', help='Redis host.')
redis_group.add_argument('--redis-port', type=int, help='Redis port.')
redis_group.add_argument('--redis-password', help='Redis password.')
redis_group.add_argument('--env', help="Environment name for Redis key prefix.")
redis_group.add_argument('--key-prefix', help='Explicit key prefix for Redis.')
run_parser.add_argument('--verbose', action='store_true', help='Enable verbose logging.')
def main_locking_download_emulator(args):
"""Main logic for the 'download-emulator' tool."""
if args.verbose:
logging.getLogger().setLevel(logging.DEBUG)
# --- Load Policy ---
policy = load_policy(args.policy_file)
if not policy:
return 1
policy = apply_overrides(policy, args.set)
# --- Load .env if specified ---
if load_dotenv and args.env_file:
if load_dotenv(args.env_file):
logger.info(f"Loaded environment variables from {args.env_file}")
else:
logger.error(f"Specified --env-file was not found: {args.env_file}")
return 1
# --- Setup ProfileManager ---
redis_host = args.redis_host or os.getenv('REDIS_HOST', 'localhost')
redis_port = args.redis_port or int(os.getenv('REDIS_PORT', 6379))
redis_password = args.redis_password or os.getenv('REDIS_PASSWORD')
if args.key_prefix:
key_prefix = args.key_prefix
elif args.env:
key_prefix = f"{args.env}_profile_mgmt_"
else:
logger.error("Must provide --env or --key-prefix for Redis connection.")
return 1
manager = ProfileManager(
redis_host=redis_host,
redis_port=redis_port,
redis_password=redis_password,
key_prefix=key_prefix
)
download_policy = policy.get('download_policy', {})
profile_prefix = download_policy.get('profile_prefix')
if not profile_prefix:
logger.error("Policy file must specify 'download_policy.profile_prefix'.")
return 1
# --- Main Lock-Execute-Unlock Logic ---
owner_id = f"dl-emulator-{os.getpid()}"
locked_profile = None
lock_attempts = 0
try:
# --- 1. Lock a profile (with wait & backoff) ---
while not shutdown_event.is_set():
locked_profile = manager.lock_profile(owner=owner_id, profile_prefix=profile_prefix)
if locked_profile:
logger.info(f"Locked profile '{locked_profile['name']}' with proxy '{locked_profile['proxy']}'.")
break
# Simplified wait logic from stress_policy_tool
backoff_seconds = [3, 5, 9, 20, 50]
sleep_duration = backoff_seconds[min(lock_attempts, len(backoff_seconds) - 1)]
logger.info(f"No download profiles available. Waiting {sleep_duration}s... (attempt {lock_attempts + 1})")
time.sleep(sleep_duration)
lock_attempts += 1
if not locked_profile:
logger.warning("Could not lock a profile; shutting down.")
return 1
# --- 2. Read info.json ---
try:
with open(args.info_json_path, 'r', encoding='utf-8') as f:
info_json_content = f.read()
except (IOError, FileNotFoundError) as e:
logger.error(f"Could not read info.json file '{args.info_json_path}': {e}")
return 1
# --- 3. Execute download logic ---
# The locked profile's proxy MUST be used for the download.
local_policy = deepcopy(policy)
local_policy.setdefault('download_policy', {})['proxy'] = locked_profile['proxy']
# The StateManager is used by _run_download_logic for rate limiting and cooldowns,
# but for this standalone worker, we don't need its persistence features.
# We disable log writing to prevent creating state files.
dummy_state_manager = StateManager(policy_name="locking_emulator_run", disable_log_writing=True)
results = _run_download_logic(
source=args.info_json_path,
info_json_content=info_json_content,
policy=local_policy,
state_manager=dummy_state_manager,
args=args, # Pass orchestrator args through
profile_name=locked_profile['name'],
profile_manager_instance=manager
)
# --- 4. Record overall task activity ---
# Note: Download-specific activity ('download'/'download_error') is recorded
# inside _run_download_logic -> process_info_json_cycle.
download_success = all(r.get('success') for r in results) if results else False
activity_type = 'success' if download_success else 'failure'
manager.record_activity(locked_profile['name'], activity_type)
logger.info(f"Finished processing '{sp_utils.get_display_name(args.info_json_path)}' with profile '{locked_profile['name']}'. Overall success: {download_success}")
return 0 if download_success else 1
except Exception as e:
logger.error(f"An unexpected error occurred: {e}", exc_info=True)
return 1
finally:
# --- 5. Unlock the profile ---
if locked_profile:
cooldown_str = manager.get_config('unlock_cooldown_seconds')
cooldown_seconds = int(cooldown_str) if cooldown_str and cooldown_str.isdigit() else None
if cooldown_seconds and cooldown_seconds > 0:
logger.info(f"Unlocking profile '{locked_profile['name']}' with a {cooldown_seconds}s cooldown.")
manager.unlock_profile(locked_profile['name'], owner=owner_id, rest_for_seconds=cooldown_seconds)
else:
logger.info(f"Unlocking profile '{locked_profile['name']}'.")
manager.unlock_profile(locked_profile['name'], owner=owner_id)
if __name__ == '__main__':
# This is a simplified parser setup for direct execution,
# the real one is defined in `add_..._parser` for the main CLI.
parser = argparse.ArgumentParser()
subparsers = parser.add_subparsers(dest='command')
add_locking_download_emulator_parser(subparsers)
args = parser.parse_args()
if hasattr(args, 'download_emulator_command'):
sys.exit(main_locking_download_emulator(args))
else:
parser.print_help()