yt-dlp-dags/ytops_client/yt_dlp_dummy_tool.py

#!/usr/bin/env python3
"""
(Internal) A dummy yt-dlp script that simulates Redis interactions for testing.
"""

import argparse
import json
import logging
import os
import random
import re
import sys
import time
from datetime import datetime, timezone
from pathlib import Path

# Add project root to path to import ProfileManager and other utils
script_dir = os.path.dirname(os.path.abspath(__file__))
project_root = os.path.abspath(os.path.join(script_dir, '..'))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

from ytops_client.profile_manager_tool import ProfileManager
from ytops_client.stress_policy import utils as sp_utils

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)


def add_yt_dlp_dummy_parser(subparsers):
    """Adds the parser for the 'yt-dlp-dummy' command."""
    parser = subparsers.add_parser(
        'yt-dlp-dummy',
        description='(Internal) A dummy yt-dlp script that simulates Redis interactions for testing.',
        formatter_class=argparse.RawTextHelpFormatter,
        help='(Internal) Dummy yt-dlp for simulation.'
    )
    # Mimic a subset of yt-dlp's arguments required by the orchestrator
    parser.add_argument('--batch-file', required=True, help='File containing URLs to process.')
    parser.add_argument('-o', '--output', dest='output_template', required=True, help='Output template for info.json files.')
    parser.add_argument('--proxy', help='Proxy URL to use (for logging purposes).')
    parser.add_argument('--verbose', action='store_true', help='Enable verbose logging.')
    # Note: Other yt-dlp args passed by the orchestrator are safely ignored.


def main_yt_dlp_dummy(args):
    """Main logic for the 'yt-dlp-dummy' tool."""
    if args.verbose:
        logging.getLogger().setLevel(logging.DEBUG)

    # --- Read configuration from environment variables ---
    profile_name = os.environ.get('YTDLP_PROFILE_NAME')
    sim_mode_env = os.environ.get('YTDLP_SIM_MODE')
    drop_on_banned = os.environ.get('YTDLP_DROP_BATCH_ON_BANNED_PROFILE_EVEN_EXTERNALLY_MANAGED') == '1'

    failure_rate = float(os.environ.get('YTDLP_DUMMY_FAILURE_RATE', 0.0))
    tolerated_rate = float(os.environ.get('YTDLP_DUMMY_SKIPPED_FAILURE_RATE', 0.0))

    # These env vars are set by the orchestrator for Redis connection
    redis_host = os.getenv('REDIS_HOST', 'localhost')
    redis_port = int(os.getenv('REDIS_PORT', 6379))
    redis_password = os.getenv('REDIS_PASSWORD')

    if not profile_name or not sim_mode_env:
        logger.error("Missing required environment variables: YTDLP_PROFILE_NAME and YTDLP_SIM_MODE")
        return 1

    # --- Connect to Redis ---
    key_prefix = f"{sim_mode_env}_profile_mgmt_"
    manager = ProfileManager(
        redis_host=redis_host, redis_port=redis_port,
        redis_password=redis_password, key_prefix=key_prefix
    )

    # --- Read URLs from batch file ---
    try:
        with open(args.batch_file, 'r', encoding='utf-8') as f:
            urls = [line.strip() for line in f if line.strip()]
    except IOError as e:
        logger.error(f"Failed to read batch file '{args.batch_file}': {e}")
        return 1

    logger.info(f"Dummy yt-dlp starting batch for profile '{profile_name}'. Processing {len(urls)} URLs.")

    files_created = 0
    hard_failures = 0

    for url in urls:
        time.sleep(random.uniform(0.1, 0.3)) # Simulate work per URL

        # 1. Check if profile has been banned externally
        if drop_on_banned:
            profile_data = manager.get_profile(profile_name)
            if profile_data and profile_data.get('state') == manager.STATE_BANNED:
                logger.warning(f"Profile '{profile_name}' is BANNED. Stopping batch as per policy.")
                return 1

        # 2. Simulate success/failure and record activity in Redis
        rand_val = random.random()

        if rand_val < failure_rate:
            logger.warning(f"Simulating HARD failure for URL '{sp_utils.get_video_id(url)}'.")
            logger.info(f"Recording 'failure' for profile '{profile_name}' in Redis.")
            manager.record_activity(profile_name, 'failure')
            hard_failures += 1
            continue
        elif rand_val < (failure_rate + tolerated_rate):
            logger.warning(f"Simulating TOLERATED failure for URL '{sp_utils.get_video_id(url)}'.")
            logger.info(f"Recording 'tolerated_error' for profile '{profile_name}' in Redis.")
            manager.record_activity(profile_name, 'tolerated_error')
            continue
        else:
            # Success
            logger.info(f"Simulating SUCCESS for URL '{sp_utils.get_video_id(url)}'. Recording 'success' for profile '{profile_name}' in Redis.")
            manager.record_activity(profile_name, 'success')

            # 3. Create dummy info.json file
            video_id = sp_utils.get_video_id(url)
            dummy_formats = [
                '18', '140', '299-dashy', '298-dashy', '137-dashy',
                '136-dashy', '135-dashy', '134-dashy', '133-dashy'
            ]
            info_data = {
                'id': video_id,
                'formats': [
                    {'format_id': f_id, 'url': f'http://dummy.url/{video_id}/{f_id}'}
                    for f_id in dummy_formats
                ],
            }

            # This is a simplified version of yt-dlp's output template handling
            output_path_str = args.output_template.replace('%(id)s', video_id)
            # Real yt-dlp adds .info.json when --write-info-json is used, so we do too.
            if not output_path_str.endswith('.info.json'):
                output_path_str += '.info.json'
            output_path = Path(output_path_str)

            output_path.parent.mkdir(parents=True, exist_ok=True)

            try:
                with open(output_path, 'w', encoding='utf-8') as f:
                    json.dump(info_data, f, indent=2)
                logger.debug(f"Created dummy info.json: {output_path}")
                files_created += 1
            except (IOError, OSError) as e:
                logger.error(f"Failed to write dummy info.json to '{output_path}': {e}")
                hard_failures += 1

    logger.info(f"Dummy yt-dlp finished batch. Created {files_created} files. Had {hard_failures} hard failures.")

    # yt-dlp exits 0 with --ignore-errors. Our dummy does the same.
    # The orchestrator judges batch success based on files_created.
    return 0