yt-dlp-dags/ytops_client/stress_policy_tool.py

#!/usr/bin/env python3
"""
Policy-driven stress-testing orchestrator for video format downloads.
"""

import argparse
import collections
import collections.abc
import concurrent.futures
import json
import logging
import os
import random
import re
import shlex
import signal
import subprocess
import sys
import threading
import time
from copy import deepcopy
from datetime import datetime, timezone
from pathlib import Path
from urllib.parse import urlparse, parse_qs

try:
    import yaml
except ImportError:
    print("PyYAML is not installed. Please install it with: pip install PyYAML", file=sys.stderr)
    sys.exit(1)

# Add a global event for graceful shutdown
shutdown_event = threading.Event()

# Globals for tracking and terminating subprocesses on shutdown
running_processes = set()
process_lock = threading.Lock()

# Globals for assigning a stable ID to each worker thread
worker_id_map = {}
worker_id_counter = 0
worker_id_lock = threading.Lock()

# Configure logging
logger = logging.getLogger('stress_policy_tool')


def get_worker_id():
    """Assigns a stable, sequential ID to each worker thread."""
    global worker_id_counter
    thread_id = threading.get_ident()
    with worker_id_lock:
        if thread_id not in worker_id_map:
            worker_id_map[thread_id] = worker_id_counter
            worker_id_counter += 1
        return worker_id_map[thread_id]


def get_video_id(url: str) -> str:
    """Extracts a YouTube video ID from a URL."""
    match = re.search(r"v=([0-9A-Za-z_-]{11})", url)
    if match:
        return match.group(1)
    match = re.search(r"youtu\.be\/([0-9A-Za-z_-]{11})", url)
    if match:
        return match.group(1)
    if re.fullmatch(r'[0-9A-Za-z_-]{11}', url):
        return url
    return "unknown_video_id"


def get_display_name(path_or_url):
    """Returns a clean name for logging, either a filename or a video ID."""
    if isinstance(path_or_url, Path):
        return path_or_url.name

    path_str = str(path_or_url)
    video_id = get_video_id(path_str)
    if video_id != "unknown_video_id":
        return video_id

    return Path(path_str).name


def format_size(b):
    """Format size in bytes to human-readable string."""
    if b is None:
        return 'N/A'
    if b < 1024:
        return f"{b}B"
    elif b < 1024**2:
        return f"{b/1024:.2f}KiB"
    elif b < 1024**3:
        return f"{b/1024**2:.2f}MiB"
    else:
        return f"{b/1024**3:.2f}GiB"


def flatten_dict(d, parent_key='', sep='.'):
    """Flattens a nested dictionary."""
    items = {}
    for k, v in d.items():
        new_key = parent_key + sep + k if parent_key else k
        if isinstance(v, collections.abc.MutableMapping):
            items.update(flatten_dict(v, new_key, sep=sep))
        else:
            items[new_key] = v
    return items


def print_policy_overrides(policy):
    """Prints all policy values as a single-line of --set arguments."""
    # We don't want to include the 'name' key in the overrides.
    policy_copy = deepcopy(policy)
    policy_copy.pop('name', None)

    flat_policy = flatten_dict(policy_copy)

    set_args = []
    for key, value in sorted(flat_policy.items()):
        if value is None:
            value_str = 'null'
        elif isinstance(value, bool):
            value_str = str(value).lower()
        elif isinstance(value, (list, dict)):
            # Use compact JSON for lists/dicts
            value_str = json.dumps(value, separators=(',', ':'))
        else:
            value_str = str(value)

        # Use shlex.quote to handle spaces and special characters safely
        set_args.append(f"--set {shlex.quote(f'{key}={value_str}')}")

    print(' '.join(set_args))


def get_profile_from_filename(path, regex_pattern):
    """Extracts a profile name from a filename using a regex."""
    if not regex_pattern:
        return None
    match = re.search(regex_pattern, path.name)
    if match:
        # Assume the first capturing group is the profile name
        if match.groups():
            return match.group(1)
    return None


class StateManager:
    """Tracks statistics, manages rate limits, and persists state across runs."""
    def __init__(self, policy_name, disable_log_writing=False):
        self.disable_log_writing = disable_log_writing
        self.state_file_path = Path(f"{policy_name}_state.json")
        self.stats_file_path = Path(f"{policy_name}_stats.jsonl")
        self.lock = threading.RLock()
        self.start_time = time.time()
        self.events = []
        self.state = {
            'global_request_count': 0,
            'rate_limit_trackers': {}, # e.g., {'per_ip': [ts1, ts2], 'profile_foo': [ts3, ts4]}
            'profile_request_counts': {}, # for client rotation
            'profile_last_refresh_time': {}, # for client rotation
            'proxy_last_finish_time': {}, # for per-proxy sleep
            'processed_files': [], # For continuous download_only mode
            # For dynamic profile cooldown strategy
            'profile_cooldown_counts': {},
            'profile_cooldown_sleep_until': {},
            'profile_pool_size': 0,
            'profile_run_suffix': None,
            'worker_profile_generations': {}
        }
        self.stats_file_handle = None
        self._load_state()
        self.print_historical_summary()
        self._open_stats_log()

    def _load_state(self):
        if self.disable_log_writing:
            logger.info("Log writing is disabled. State will not be loaded from disk.")
            return
        if not self.state_file_path.exists():
            logger.info(f"State file not found at '{self.state_file_path}', starting fresh.")
            return
        try:
            with open(self.state_file_path, 'r', encoding='utf-8') as f:
                self.state = json.load(f)
                # Ensure keys exist
                self.state.setdefault('global_request_count', 0)
                self.state.setdefault('rate_limit_trackers', {})
                self.state.setdefault('profile_request_counts', {})
                self.state.setdefault('profile_last_refresh_time', {})
                self.state.setdefault('proxy_last_finish_time', {})
                self.state.setdefault('processed_files', [])
                # For dynamic profile cooldown strategy
                self.state.setdefault('profile_cooldown_counts', {})
                self.state.setdefault('profile_cooldown_sleep_until', {})
                self.state.setdefault('profile_pool_size', 0)
                self.state.setdefault('profile_run_suffix', None)
                self.state.setdefault('worker_profile_generations', {})
                logger.info(f"Loaded state from {self.state_file_path}")
        except (IOError, json.JSONDecodeError) as e:
            logger.error(f"Could not load or parse state file {self.state_file_path}: {e}. Starting fresh.")

    def _save_state(self):
        if self.disable_log_writing:
            return
        with self.lock:
            try:
                with open(self.state_file_path, 'w', encoding='utf-8') as f:
                    json.dump(self.state, f, indent=2)
                logger.info(f"Saved state to {self.state_file_path}")
            except IOError as e:
                logger.error(f"Could not save state to {self.state_file_path}: {e}")

    def _open_stats_log(self):
        if self.disable_log_writing:
            return
        try:
            self.stats_file_handle = open(self.stats_file_path, 'a', encoding='utf-8')
        except IOError as e:
            logger.error(f"Could not open stats file {self.stats_file_path}: {e}")

    def close(self):
        """Saves state and closes file handles."""
        self._save_state()
        if self.stats_file_handle:
            self.stats_file_handle.close()
            self.stats_file_handle = None

    def mark_file_as_processed(self, file_path):
        """Adds a file path to the list of processed files in the state."""
        with self.lock:
            # Using a list and checking for existence is fine for moderate numbers of files.
            # A set isn't JSON serializable.
            processed = self.state.setdefault('processed_files', [])
            file_str = str(file_path)
            if file_str not in processed:
                processed.append(file_str)

    def get_processed_files(self):
        """Returns a set of file paths that have been processed."""
        with self.lock:
            return set(self.state.get('processed_files', []))

    def print_historical_summary(self):
        """Prints a summary based on the state loaded from disk, before new events."""
        with self.lock:
            now = time.time()
            rate_trackers = self.state.get('rate_limit_trackers', {})
            total_requests = self.state.get('global_request_count', 0)

            if not rate_trackers and not total_requests:
                logger.info("No historical data found in state file.")
                return

            logger.info("\n--- Summary From Previous Runs ---")
            logger.info(f"Total info.json requests (all previous runs): {total_requests}")

            if rate_trackers:
                for key, timestamps in sorted(rate_trackers.items()):
                    # Time windows in seconds
                    windows = {
                        'last 10 min': 600,
                        'last 60 min': 3600,
                        'last 6 hours': 21600,
                        'last 24 hours': 86400
                    }

                    rates_str_parts = []
                    for name, seconds in windows.items():
                        count = sum(1 for ts in timestamps if now - ts <= seconds)
                        # Calculate rate in requests per minute
                        rate_rpm = (count / seconds) * 60 if seconds > 0 else 0
                        rates_str_parts.append(f"{count} req in {name} ({rate_rpm:.2f} rpm)")

                    logger.info(f"Tracker '{key}': " + ", ".join(rates_str_parts))
            logger.info("------------------------------------")

    def log_event(self, event_data):
        with self.lock:
            event_data['timestamp'] = datetime.now().isoformat()
            self.events.append(event_data)
            if self.stats_file_handle:
                self.stats_file_handle.write(json.dumps(event_data) + '\n')
                self.stats_file_handle.flush()

    def get_request_count(self):
        with self.lock:
            return self.state.get('global_request_count', 0)

    def increment_request_count(self):
        with self.lock:
            self.state['global_request_count'] = self.state.get('global_request_count', 0) + 1

    def check_cumulative_error_rate(self, max_errors, per_minutes, error_type=None):
        """
        Checks if a cumulative error rate has been exceeded.
        If error_type is None, checks for any failure.
        Returns the number of errors found if the threshold is met, otherwise 0.
        """
        with self.lock:
            now = time.time()
            window_seconds = per_minutes * 60

            if error_type:
                recent_errors = [
                    e for e in self.events
                    if e.get('error_type') == error_type and (now - datetime.fromisoformat(e['timestamp']).timestamp()) <= window_seconds
                ]
            else: # Generic failure check
                recent_errors = [
                    e for e in self.events
                    if not e.get('success') and (now - datetime.fromisoformat(e['timestamp']).timestamp()) <= window_seconds
                ]

            if len(recent_errors) >= max_errors:
                return len(recent_errors)
            return 0

    def check_quality_degradation_rate(self, max_triggers, per_minutes):
        """
        Checks if the quality degradation trigger rate has been exceeded.
        Returns the number of triggers found if the threshold is met, otherwise 0.
        """
        with self.lock:
            now = time.time()
            window_seconds = per_minutes * 60

            recent_triggers = [
                e for e in self.events
                if e.get('quality_degradation_trigger') and (now - datetime.fromisoformat(e['timestamp']).timestamp()) <= window_seconds
            ]

            if len(recent_triggers) >= max_triggers:
                return len(recent_triggers)
            return 0

    def check_and_update_rate_limit(self, profile_name, policy):
        """
        Checks if a request is allowed based on policy rate limits.
        If allowed, updates the internal state. Returns True if allowed, False otherwise.
        """
        with self.lock:
            now = time.time()
            gen_policy = policy.get('info_json_generation_policy', {})
            rate_limits = gen_policy.get('rate_limits', {})

            # Check per-IP limit
            ip_limit = rate_limits.get('per_ip')
            if ip_limit:
                tracker_key = 'per_ip'
                max_req = ip_limit.get('max_requests')
                period_min = ip_limit.get('per_minutes')
                if max_req and period_min:
                    timestamps = self.state['rate_limit_trackers'].get(tracker_key, [])
                    # Filter out old timestamps
                    timestamps = [ts for ts in timestamps if now - ts < period_min * 60]
                    if len(timestamps) >= max_req:
                        logger.warning("Per-IP rate limit reached. Skipping task.")
                        return False
                    self.state['rate_limit_trackers'][tracker_key] = timestamps

            # Check per-profile limit
            profile_limit = rate_limits.get('per_profile')
            if profile_limit and profile_name:
                tracker_key = f"profile_{profile_name}"
                max_req = profile_limit.get('max_requests')
                period_min = profile_limit.get('per_minutes')
                if max_req and period_min:
                    timestamps = self.state['rate_limit_trackers'].get(tracker_key, [])
                    timestamps = [ts for ts in timestamps if now - ts < period_min * 60]
                    if len(timestamps) >= max_req:
                        logger.warning(f"Per-profile rate limit for '{profile_name}' reached. Skipping task.")
                        return False
                    self.state['rate_limit_trackers'][tracker_key] = timestamps

            # If all checks pass, record the new request timestamp for all relevant trackers
            if ip_limit and ip_limit.get('max_requests'):
                self.state['rate_limit_trackers'].setdefault('per_ip', []).append(now)
            if profile_limit and profile_limit.get('max_requests') and profile_name:
                self.state['rate_limit_trackers'].setdefault(f"profile_{profile_name}", []).append(now)

            return True

    def get_client_for_request(self, profile_name, gen_policy):
        """
        Determines which client to use based on the client_rotation_policy.
        Returns a tuple: (client_name, request_params_dict).
        """
        with self.lock:
            rotation_policy = gen_policy.get('client_rotation_policy')

            # If no rotation policy, use the simple 'client' key.
            if not rotation_policy:
                client = gen_policy.get('client')
                logger.info(f"Using client '{client}' for profile '{profile_name}'.")
                req_params = gen_policy.get('request_params')
                return client, req_params

            # --- Rotation logic ---
            now = time.time()
            major_client = rotation_policy.get('major_client')
            refresh_client = rotation_policy.get('refresh_client')
            refresh_every = rotation_policy.get('refresh_every', {})

            if not refresh_client or not refresh_every:
                return major_client, rotation_policy.get('major_client_params')

            should_refresh = False

            # Check time-based refresh
            refresh_minutes = refresh_every.get('minutes')
            last_refresh_time = self.state['profile_last_refresh_time'].get(profile_name, 0)
            if refresh_minutes and (now - last_refresh_time) > (refresh_minutes * 60):
                should_refresh = True

            # Check request-count-based refresh
            refresh_requests = refresh_every.get('requests')
            request_count = self.state['profile_request_counts'].get(profile_name, 0)
            if refresh_requests and request_count >= refresh_requests:
                should_refresh = True

            if should_refresh:
                logger.info(f"Profile '{profile_name}' is due for a refresh. Using refresh client '{refresh_client}'.")
                self.state['profile_last_refresh_time'][profile_name] = now
                self.state['profile_request_counts'][profile_name] = 0 # Reset counter
                return refresh_client, rotation_policy.get('refresh_client_params')
            else:
                # Not refreshing, so increment request count for this profile
                self.state['profile_request_counts'][profile_name] = request_count + 1
                return major_client, rotation_policy.get('major_client_params')

    def get_next_available_profile(self, policy):
        """
        Finds or creates an available profile based on the dynamic cooldown policy.
        Returns a profile name, or None if no profile is available.
        """
        with self.lock:
            now = time.time()
            settings = policy.get('settings', {})
            pm_policy = settings.get('profile_management')

            if not pm_policy:
                return None

            prefix = pm_policy.get('prefix')
            if not prefix:
                logger.error("Profile management policy requires 'prefix'.")
                return None

            # Determine and persist the suffix for this run to ensure profile names are stable
            run_suffix = self.state.get('profile_run_suffix')
            if not run_suffix:
                suffix_config = pm_policy.get('suffix')
                if suffix_config == 'auto':
                    run_suffix = datetime.now().strftime('%Y%m%d%H%M')
                else:
                    run_suffix = suffix_config or ''
                self.state['profile_run_suffix'] = run_suffix

            # Initialize pool size from policy if not already in state
            if self.state.get('profile_pool_size', 0) == 0:
                self.state['profile_pool_size'] = pm_policy.get('initial_pool_size', 1)

            max_reqs = pm_policy.get('max_requests_per_profile')
            sleep_mins = pm_policy.get('sleep_minutes_on_exhaustion')

            # Loop until a profile is found or we decide we can't find one
            while True:
                # Try to find an existing, available profile
                for i in range(self.state['profile_pool_size']):
                    profile_name = f"{prefix}_{run_suffix}_{i}" if run_suffix else f"{prefix}_{i}"

                    # Check if sleeping
                    sleep_until = self.state['profile_cooldown_sleep_until'].get(profile_name, 0)
                    if now < sleep_until:
                        continue  # Still sleeping

                    # Check if it needs to be put to sleep
                    req_count = self.state['profile_cooldown_counts'].get(profile_name, 0)
                    if max_reqs and req_count >= max_reqs:
                        sleep_duration_seconds = (sleep_mins or 0) * 60
                        self.state['profile_cooldown_sleep_until'][profile_name] = now + sleep_duration_seconds
                        self.state['profile_cooldown_counts'][profile_name] = 0  # Reset count for next time
                        logger.info(f"Profile '{profile_name}' reached request limit ({req_count}/{max_reqs}). Putting to sleep for {sleep_mins} minutes.")
                        continue  # Now sleeping, try next profile

                    # This profile is available
                    logger.info(f"Selected available profile '{profile_name}' (request count: {req_count}/{max_reqs if max_reqs else 'unlimited'}).")
                    return profile_name

                # If we get here, no existing profile was available
                if pm_policy.get('auto_expand_pool'):
                    new_profile_index = self.state['profile_pool_size']
                    self.state['profile_pool_size'] += 1
                    profile_name = f"{prefix}_{run_suffix}_{new_profile_index}" if run_suffix else f"{prefix}_{new_profile_index}"
                    logger.info(f"Profile pool exhausted. Expanding pool to size {self.state['profile_pool_size']}. New profile: '{profile_name}'")
                    return profile_name
                else:
                    # No available profiles and pool expansion is disabled
                    return None

    def get_or_rotate_worker_profile(self, worker_id, policy):
        """
        Gets the current profile for a worker, rotating to a new generation if the lifetime limit is met.
        This is used by the 'per_worker_with_rotation' profile mode.
        """
        with self.lock:
            pm_policy = policy.get('settings', {}).get('profile_management', {})
            if not pm_policy:
                logger.error("Profile mode 'per_worker_with_rotation' requires 'settings.profile_management' configuration in the policy.")
                return f"error_profile_{worker_id}"

            prefix = pm_policy.get('prefix')
            if not prefix:
                logger.error("Profile management for 'per_worker_with_rotation' requires a 'prefix'.")
                return f"error_profile_{worker_id}"

            max_reqs = pm_policy.get('max_requests_per_profile')

            generations = self.state.setdefault('worker_profile_generations', {})
            # worker_id is an int, but JSON keys must be strings
            worker_id_str = str(worker_id)
            current_gen = generations.get(worker_id_str, 0)

            profile_name = f"{prefix}_{worker_id}_{current_gen}"

            if not max_reqs:  # No lifetime limit defined, so never rotate.
                return profile_name

            req_count = self.state.get('profile_cooldown_counts', {}).get(profile_name, 0)

            if req_count >= max_reqs:
                logger.info(f"Profile '{profile_name}' reached lifetime request limit ({req_count}/{max_reqs}). Rotating to new generation for worker {worker_id}.")
                new_gen = current_gen + 1
                generations[worker_id_str] = new_gen
                # The request counts for the old profile are implicitly left behind.
                # The new profile will start with a count of 0.
                profile_name = f"{prefix}_{worker_id}_{new_gen}"

            return profile_name

    def record_profile_request(self, profile_name):
        """Increments the request counter for a profile for the cooldown policy."""
        with self.lock:
            if not profile_name:
                return
            counts = self.state.setdefault('profile_cooldown_counts', {})
            counts[profile_name] = counts.get(profile_name, 0) + 1

    def record_proxy_usage(self, proxy_url):
        """Records a request timestamp for a given proxy URL for statistical purposes."""
        if not proxy_url:
            return
        with self.lock:
            now = time.time()
            # Use a prefix to avoid collisions with profile names or other keys
            tracker_key = f"proxy_{proxy_url}"
            self.state['rate_limit_trackers'].setdefault(tracker_key, []).append(now)

    def check_and_update_download_rate_limit(self, proxy_url, policy):
        """Checks download rate limits. Returns True if allowed, False otherwise."""
        with self.lock:
            now = time.time()
            d_policy = policy.get('download_policy', {})
            rate_limits = d_policy.get('rate_limits', {})

            # Check per-IP limit
            ip_limit = rate_limits.get('per_ip')
            if ip_limit:
                tracker_key = 'download_per_ip' # Use a distinct key
                max_req = ip_limit.get('max_requests')
                period_min = ip_limit.get('per_minutes')
                if max_req and period_min:
                    timestamps = self.state['rate_limit_trackers'].get(tracker_key, [])
                    timestamps = [ts for ts in timestamps if now - ts < period_min * 60]
                    if len(timestamps) >= max_req:
                        logger.warning("Per-IP download rate limit reached. Skipping task.")
                        return False
                    self.state['rate_limit_trackers'][tracker_key] = timestamps

            # Check per-proxy limit
            proxy_limit = rate_limits.get('per_proxy')
            if proxy_limit and proxy_url:
                tracker_key = f"download_proxy_{proxy_url}"
                max_req = proxy_limit.get('max_requests')
                period_min = proxy_limit.get('per_minutes')
                if max_req and period_min:
                    timestamps = self.state['rate_limit_trackers'].get(tracker_key, [])
                    timestamps = [ts for ts in timestamps if now - ts < period_min * 60]
                    if len(timestamps) >= max_req:
                        logger.warning(f"Per-proxy download rate limit for '{proxy_url}' reached. Skipping task.")
                        return False
                    self.state['rate_limit_trackers'][tracker_key] = timestamps

            # If all checks pass, record the new request timestamp for all relevant trackers
            if ip_limit and ip_limit.get('max_requests'):
                self.state['rate_limit_trackers'].setdefault('download_per_ip', []).append(now)
            if proxy_limit and proxy_limit.get('max_requests') and proxy_url:
                self.state['rate_limit_trackers'].setdefault(f"download_proxy_{proxy_url}", []).append(now)

            return True

    def wait_for_proxy_cooldown(self, proxy_url, policy):
        """If a per-proxy sleep is defined, wait until the cooldown period has passed."""
        with self.lock:
            d_policy = policy.get('download_policy', {})
            sleep_duration = d_policy.get('sleep_per_proxy_seconds', 0)
            if not proxy_url or not sleep_duration > 0:
                return

            last_finish = self.state.setdefault('proxy_last_finish_time', {}).get(proxy_url, 0)
            elapsed = time.time() - last_finish

            if elapsed < sleep_duration:
                time_to_sleep = sleep_duration - elapsed
                logger.info(f"Proxy '{proxy_url}' was used recently. Sleeping for {time_to_sleep:.2f}s.")
                # Interruptible sleep
                sleep_end_time = time.time() + time_to_sleep
                while time.time() < sleep_end_time:
                    if shutdown_event.is_set():
                        logger.info("Shutdown requested during proxy cooldown sleep.")
                        break
                    time.sleep(0.2)

    def update_proxy_finish_time(self, proxy_url):
        """Updates the last finish time for a proxy."""
        with self.lock:
            if not proxy_url:
                return
            self.state.setdefault('proxy_last_finish_time', {})[proxy_url] = time.time()

    def print_summary(self, policy=None):
        """Print a summary of the test run."""
        with self.lock:
            # --- Cumulative Stats from State ---
            now = time.time()
            rate_trackers = self.state.get('rate_limit_trackers', {})
            if rate_trackers:
                logger.info("\n--- Cumulative Rate Summary (All Runs, updated at end of run) ---")
                logger.info("This shows the total number of requests/downloads over various time windows, including previous runs.")

                fetch_trackers = {k: v for k, v in rate_trackers.items() if not k.startswith('download_')}
                download_trackers = {k: v for k, v in rate_trackers.items() if k.startswith('download_')}

                def print_tracker_stats(trackers, tracker_type):
                    if not trackers:
                        logger.info(f"No historical {tracker_type} trackers found.")
                        return

                    logger.info(f"Historical {tracker_type} Trackers:")
                    for key, timestamps in sorted(trackers.items()):
                        windows = {
                            'last 10 min': 600, 'last 60 min': 3600,
                            'last 6 hours': 21600, 'last 24 hours': 86400
                        }
                        rates_str_parts = []
                        for name, seconds in windows.items():
                            count = sum(1 for ts in timestamps if now - ts <= seconds)
                            rate_rpm = (count / seconds) * 60 if seconds > 0 else 0
                            rates_str_parts.append(f"{count} in {name} ({rate_rpm:.2f}/min)")

                        # Clean up key for display
                        display_key = key.replace('download_', '').replace('per_ip', 'all_proxies/ips')
                        logger.info(f"  - Tracker '{display_key}': " + ", ".join(rates_str_parts))

                print_tracker_stats(fetch_trackers, "Fetch Request")
                print_tracker_stats(download_trackers, "Download Attempt")

            if not self.events:
                logger.info("\nNo new events were recorded in this session.")
                return

            duration = time.time() - self.start_time
            fetch_events = [e for e in self.events if e.get('type') == 'fetch']
            download_events = [e for e in self.events if e.get('type') != 'fetch']

            logger.info("\n--- Test Summary (This Run) ---")
            logger.info(f"Total duration: {duration:.2f} seconds")
            logger.info(f"Total info.json requests (cumulative): {self.get_request_count()}")

            if policy:
                logger.info("\n--- Test Configuration ---")
                settings = policy.get('settings', {})
                d_policy = policy.get('download_policy', {})

                if settings.get('urls_file'):
                    logger.info(f"URL source file: {settings['urls_file']}")
                if settings.get('info_json_dir'):
                    logger.info(f"Info.json source dir: {settings['info_json_dir']}")

                if d_policy:
                    logger.info(f"Download formats: {d_policy.get('formats', 'N/A')}")
                    if d_policy.get('downloader'):
                        logger.info(f"Downloader: {d_policy.get('downloader')}")
                    if d_policy.get('downloader_args'):
                        logger.info(f"Downloader args: {d_policy.get('downloader_args')}")
                    if d_policy.get('pause_before_download_seconds'):
                        logger.info(f"Pause before download: {d_policy.get('pause_before_download_seconds')}s")
                    if d_policy.get('sleep_between_formats'):
                        sleep_cfg = d_policy.get('sleep_between_formats')
                        logger.info(f"Sleep between formats: {sleep_cfg.get('min_seconds', 0)}-{sleep_cfg.get('max_seconds', 0)}s")

            if fetch_events:
                total_fetches = len(fetch_events)
                successful_fetches = sum(1 for e in fetch_events if e['success'])
                cancelled_fetches = sum(1 for e in fetch_events if e.get('error_type') == 'Cancelled')
                failed_fetches = total_fetches - successful_fetches - cancelled_fetches

                logger.info("\n--- Fetch Summary (This Run) ---")
                logger.info(f"Total info.json fetch attempts: {total_fetches}")
                logger.info(f"  - Successful: {successful_fetches}")
                logger.info(f"  - Failed:     {failed_fetches}")
                if cancelled_fetches > 0:
                    logger.info(f"  - Cancelled:  {cancelled_fetches}")

                completed_fetches = successful_fetches + failed_fetches
                if completed_fetches > 0:
                    success_rate = (successful_fetches / completed_fetches) * 100
                    logger.info(f"Success rate (of completed): {success_rate:.2f}%")
                elif total_fetches > 0:
                    logger.info("Success rate: N/A (no tasks completed)")

                if duration > 1 and total_fetches > 0:
                    rpm = (total_fetches / duration) * 60
                    logger.info(f"Actual fetch rate: {rpm:.2f} requests/minute")

                if failed_fetches > 0:
                    error_counts = collections.Counter(
                        e.get('error_type', 'Unknown')
                        for e in fetch_events if not e['success'] and e.get('error_type') != 'Cancelled'
                    )
                    logger.info("Failure breakdown:")
                    for error_type, count in sorted(error_counts.items()):
                        logger.info(f"  - {error_type}: {count}")

                profile_counts = collections.Counter(e.get('profile') for e in fetch_events if e.get('profile'))
                if profile_counts:
                    logger.info("Requests per profile:")
                    for profile, count in sorted(profile_counts.items()):
                        logger.info(f"  - {profile}: {count}")

                proxy_counts = collections.Counter(e.get('proxy_url') for e in fetch_events if e.get('proxy_url'))
                if proxy_counts:
                    logger.info("Requests per proxy:")
                    for proxy, count in sorted(proxy_counts.items()):
                        logger.info(f"  - {proxy}: {count}")

                profile_counts = collections.Counter(e.get('profile') for e in fetch_events if e.get('profile'))
                if profile_counts:
                    logger.info("Requests per profile:")
                    for profile, count in sorted(profile_counts.items()):
                        logger.info(f"  - {profile}: {count}")

                proxy_counts = collections.Counter(e.get('proxy_url') for e in fetch_events if e.get('proxy_url'))
                if proxy_counts:
                    logger.info("Requests per proxy:")
                    for proxy, count in sorted(proxy_counts.items()):
                        logger.info(f"  - {proxy}: {count}")

            if download_events:
                total_attempts = len(download_events)
                successes = sum(1 for e in download_events if e['success'])
                cancelled = sum(1 for e in download_events if e.get('error_type') == 'Cancelled')
                failures = total_attempts - successes - cancelled

                # --- Profile Association for Download Events ---
                download_profiles = [e.get('profile') for e in download_events]

                # For download_only mode, we might need to fall back to regex extraction
                # if the profile wasn't passed down (e.g., no profile grouping).
                profile_regex = None
                if policy:
                    settings = policy.get('settings', {})
                    if settings.get('mode') == 'download_only':
                        profile_regex = settings.get('profile_extraction_regex')

                if profile_regex:
                    for i, e in enumerate(download_events):
                        if not download_profiles[i]:  # If profile wasn't set in the event
                            path = Path(e.get('path', ''))
                            match = re.search(profile_regex, path.name)
                            if match and match.groups():
                                download_profiles[i] = match.group(1)

                # Replace any remaining Nones with 'unknown_profile'
                download_profiles = [p or 'unknown_profile' for p in download_profiles]

                num_profiles_used = len(set(p for p in download_profiles if p != 'unknown_profile'))

                logger.info("\n--- Download Summary (This Run) ---")
                if policy:
                    workers = policy.get('execution_control', {}).get('workers', 'N/A')
                    logger.info(f"Workers configured: {workers}")

                logger.info(f"Profiles utilized for downloads: {num_profiles_used}")
                logger.info(f"Total download attempts: {total_attempts}")
                logger.info(f"  - Successful: {successes}")
                logger.info(f"  - Failed:     {failures}")
                if cancelled > 0:
                    logger.info(f"  - Cancelled:  {cancelled}")

                completed_downloads = successes + failures
                if completed_downloads > 0:
                    success_rate = (successes / completed_downloads) * 100
                    logger.info(f"Success rate (of completed): {success_rate:.2f}%")
                elif total_attempts > 0:
                    logger.info("Success rate: N/A (no tasks completed)")

                duration_hours = duration / 3600.0
                if duration > 1 and total_attempts > 0:
                    dpm = (total_attempts / duration) * 60
                    logger.info(f"Actual overall download rate: {dpm:.2f} attempts/minute")

                total_bytes = sum(e.get('downloaded_bytes', 0) for e in download_events if e['success'])
                if total_bytes > 0:
                    logger.info(f"Total data downloaded: {format_size(total_bytes)}")

                if failures > 0:
                    error_counts = collections.Counter(
                        e.get('error_type', 'Unknown')
                        for e in download_events if not e['success'] and e.get('error_type') != 'Cancelled'
                    )
                    logger.info("Failure breakdown:")
                    for error_type, count in sorted(error_counts.items()):
                        logger.info(f"  - {error_type}: {count}")

                # Add profile to each download event for easier counting
                for i, e in enumerate(download_events):
                    e['profile'] = download_profiles[i]

                profile_counts = collections.Counter(e.get('profile') for e in download_events if e.get('profile'))
                if profile_counts:
                    logger.info("Downloads per profile:")
                    for profile, count in sorted(profile_counts.items()):
                        rate_per_hour = (count / duration_hours) if duration_hours > 0 else 0
                        logger.info(f"  - {profile}: {count} attempts (avg this run: {rate_per_hour:.2f}/hour)")

                proxy_counts = collections.Counter(e.get('proxy_url') for e in download_events if e.get('proxy_url'))
                if proxy_counts:
                    logger.info("Downloads per proxy:")
                    for proxy, count in sorted(proxy_counts.items()):
                        rate_per_hour = (count / duration_hours) if duration_hours > 0 else 0
                        logger.info(f"  - {proxy}: {count} attempts (avg this run: {rate_per_hour:.2f}/hour)")

            logger.info("--------------------")


def _run_download_logic(source, info_json_content, policy, state_manager, profile_name=None):
    """Shared download logic for a single info.json."""
    proxy_url = None
    if info_json_content:
        try:
            info_data = json.loads(info_json_content)
            proxy_url = info_data.get('_proxy_url')
        except (json.JSONDecodeError, AttributeError):
            logger.warning(f"[{get_display_name(source)}] Could not parse info.json to get proxy for download controls.")

    if not state_manager.check_and_update_download_rate_limit(proxy_url, policy):
        return []

    state_manager.wait_for_proxy_cooldown(proxy_url, policy)
    results = process_info_json_cycle(source, info_json_content, policy, state_manager, proxy_url=proxy_url, profile_name=profile_name)
    state_manager.update_proxy_finish_time(proxy_url)
    return results


def process_profile_task(profile_name, file_list, policy, state_manager, cycle_num):
    """Worker task for a profile, processing its files sequentially."""
    logger.info(f"Worker {get_worker_id()} starting task for profile '{profile_name}' with {len(file_list)} files.")
    all_results = []
    for i, file_path in enumerate(file_list):
        if shutdown_event.is_set():
            logger.info(f"Shutdown requested, stopping task for profile '{profile_name}'.")
            break

        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                info_json_content = f.read()
        except (IOError, FileNotFoundError) as e:
            logger.error(f"[{get_display_name(file_path)}] Could not read info.json file: {e}")
            continue # Skip this file

        results_for_file = _run_download_logic(file_path, info_json_content, policy, state_manager, profile_name=profile_name)
        all_results.extend(results_for_file)

        # Check for stop conditions after processing each file
        should_stop_profile = False
        for result in results_for_file:
            if not result['success']:
                s_conditions = policy.get('stop_conditions', {})
                if s_conditions.get('on_failure') or \
                   (s_conditions.get('on_http_403') and result['error_type'] == 'HTTP 403') or \
                   (s_conditions.get('on_timeout') and result['error_type'] == 'Timeout'):
                    logger.info(f"Stopping further processing for profile '{profile_name}' due to failure.")
                    should_stop_profile = True
                    break
        if should_stop_profile:
            break

        # Apply sleep between tasks for this profile
        if i < len(file_list) - 1:
            exec_control = policy.get('execution_control', {})
            sleep_cfg = exec_control.get('sleep_between_tasks', {})
            sleep_min = sleep_cfg.get('min_seconds', 0)

            if sleep_min > 0:
                sleep_max = sleep_cfg.get('max_seconds') or sleep_min
                sleep_duration = random.uniform(sleep_min, sleep_max) if sleep_max > sleep_min else sleep_min

                logger.debug(f"Profile '{profile_name}' sleeping for {sleep_duration:.2f}s before next file.")
                # Interruptible sleep
                sleep_end_time = time.time() + sleep_duration
                while time.time() < sleep_end_time:
                    if shutdown_event.is_set():
                        break
                    time.sleep(0.2)

    return all_results


def run_command(cmd, input_data=None, binary_stdout=False):
    """
    Runs a command, captures its output, and returns status.
    If binary_stdout is True, stdout is returned as bytes. Otherwise, both are decoded strings.
    """
    logger.debug(f"Running command: {' '.join(cmd)}")
    process = None
    try:
        # Always open in binary mode to handle both cases. We will decode later.
        process = subprocess.Popen(
            cmd,
            stdin=subprocess.PIPE if input_data else None,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            preexec_fn=os.setsid # Start in a new process group to isolate from terminal signals
        )
        with process_lock:
            running_processes.add(process)

        stdout_capture = []
        stderr_capture = []

        def read_pipe(pipe, capture_list, display_pipe=None):
            """Reads a pipe line by line (as bytes), appending to a list and optionally displaying."""
            for line in iter(pipe.readline, b''):
                capture_list.append(line)
                if display_pipe:
                    # Decode for display
                    display_line = line.decode('utf-8', errors='replace')
                    display_pipe.write(display_line)
                    display_pipe.flush()

        # We must read stdout and stderr in parallel to prevent deadlocks.
        stdout_thread = threading.Thread(target=read_pipe, args=(process.stdout, stdout_capture))
        # Display stderr in real-time as it often contains progress info.
        stderr_thread = threading.Thread(target=read_pipe, args=(process.stderr, stderr_capture, sys.stderr))

        stdout_thread.start()
        stderr_thread.start()

        # Handle stdin after starting to read outputs to avoid deadlocks.
        if input_data:
            try:
                process.stdin.write(input_data.encode('utf-8'))
                process.stdin.close()
            except (IOError, BrokenPipeError):
                # This can happen if the process exits quickly or doesn't read stdin.
                logger.debug(f"Could not write to stdin for command: {' '.join(cmd)}. Process may have already exited.")

        # Wait for the process to finish and for all output to be read.
        retcode = process.wait()
        stdout_thread.join()
        stderr_thread.join()

        stdout_bytes = b"".join(stdout_capture)
        stderr_bytes = b"".join(stderr_capture)

        stdout = stdout_bytes if binary_stdout else stdout_bytes.decode('utf-8', errors='replace')
        stderr = stderr_bytes.decode('utf-8', errors='replace')

        return retcode, stdout, stderr

    except FileNotFoundError:
        logger.error(f"Command not found: {cmd[0]}. Make sure it's in your PATH.")
        return -1, "", f"Command not found: {cmd[0]}"
    except Exception as e:
        logger.error(f"An error occurred while running command: {' '.join(cmd)}. Error: {e}")
        return -1, "", str(e)
    finally:
        if process:
            with process_lock:
                running_processes.discard(process)


def run_download_worker(info_json_path, info_json_content, format_to_download, policy, profile_name=None):
    """
    Performs a single download attempt. Designed to be run in a worker thread.
    """
    download_policy = policy.get('download_policy', {})
    settings = policy.get('settings', {})
    downloader = download_policy.get('downloader')

    # Get script command from settings, with fallback to download_policy for old format.
    script_cmd_str = settings.get('download_script')
    if not script_cmd_str:
        script_cmd_str = download_policy.get('script')

    if script_cmd_str:
        download_cmd = shlex.split(script_cmd_str)
    elif downloader == 'aria2c_rpc':
        download_cmd = [sys.executable, '-m', 'ytops_client.cli', 'download', 'aria-rpc']
    elif downloader == 'native-cli':
        download_cmd = [sys.executable, '-m', 'ytops_client.cli', 'download', 'cli']
    else:
        # Default to the new native-py downloader if downloader is 'native-py' or not specified.
        download_cmd = [sys.executable, '-m', 'ytops_client.cli', 'download', 'py']

    download_cmd.extend(['-f', format_to_download])

    if downloader == 'aria2c_rpc':
        if download_policy.get('aria_host'):
            download_cmd.extend(['--aria-host', str(download_policy['aria_host'])])
        if download_policy.get('aria_port'):
            download_cmd.extend(['--aria-port', str(download_policy['aria_port'])])
        if download_policy.get('aria_secret'):
            download_cmd.extend(['--aria-secret', str(download_policy['aria_secret'])])
        if download_policy.get('output_dir'):
            download_cmd.extend(['--output-dir', str(download_policy['output_dir'])])
        if download_policy.get('aria_remote_dir'):
            download_cmd.extend(['--remote-dir', str(download_policy['aria_remote_dir'])])
        if download_policy.get('aria_fragments_dir'):
            download_cmd.extend(['--fragments-dir', str(download_policy['aria_fragments_dir'])])
        # For stress testing, waiting is the desired default to get a success/fail result.
        # Allow disabling it by explicitly setting aria_wait: false in the policy.
        if download_policy.get('aria_wait', True):
            download_cmd.append('--wait')

        if download_policy.get('auto_merge_fragments'):
            download_cmd.append('--auto-merge-fragments')
        if download_policy.get('remove_fragments_after_merge'):
            download_cmd.append('--remove-fragments-after-merge')
        if download_policy.get('cleanup'):
            download_cmd.append('--cleanup')
        if download_policy.get('purge_on_complete'):
            download_cmd.append('--purge-on-complete')

        downloader_args = download_policy.get('downloader_args')
        proxy = download_policy.get('proxy')
        if proxy:
            # Note: proxy_rename is not supported for aria2c_rpc mode.
            proxy_arg = f"--all-proxy {shlex.quote(str(proxy))}"
            if downloader_args:
                downloader_args = f"{downloader_args} {proxy_arg}"
            else:
                downloader_args = proxy_arg

        if downloader_args:
            # For aria2c_rpc, the downloader_args value is passed directly to the script's --downloader-args option.
            download_cmd.extend(['--downloader-args', downloader_args])
    elif downloader == 'native-cli':
        # This is the logic for the legacy download_tool.py (yt-dlp CLI wrapper).
        pause_seconds = download_policy.get('pause_before_download_seconds')
        if pause_seconds and isinstance(pause_seconds, (int, float)) and pause_seconds > 0:
            download_cmd.extend(['--pause', str(pause_seconds)])

        if download_policy.get('continue_downloads'):
            download_cmd.append('--download-continue')

        # Add proxy if specified directly in the policy
        proxy = download_policy.get('proxy')
        if proxy:
            download_cmd.extend(['--proxy', str(proxy)])

        proxy_rename = download_policy.get('proxy_rename')
        if proxy_rename:
            download_cmd.extend(['--proxy-rename', str(proxy_rename)])

        extra_args = download_policy.get('extra_args')
        if extra_args:
            download_cmd.extend(shlex.split(extra_args))

        # Note: 'downloader' here refers to yt-dlp's internal downloader, not our script.
        # The policy key 'external_downloader' is more clear, but we support 'downloader' for backward compatibility.
        ext_downloader = download_policy.get('external_downloader') or download_policy.get('downloader')
        if ext_downloader and ext_downloader not in ['native-cli', 'native-py', 'aria2c_rpc']:
             download_cmd.extend(['--downloader', str(ext_downloader)])

        downloader_args = download_policy.get('downloader_args')
        if downloader_args:
            download_cmd.extend(['--downloader-args', str(downloader_args)])

        if download_policy.get('merge_output_format'):
            download_cmd.extend(['--merge-output-format', str(download_policy['merge_output_format'])])

        if download_policy.get('merge_output_format'):
            download_cmd.extend(['--merge-output-format', str(download_policy['merge_output_format'])])

        if download_policy.get('cleanup'):
            download_cmd.append('--cleanup')
    else:
        # This is the default logic for the new native-py downloader.
        if download_policy.get('output_to_buffer'):
            download_cmd.append('--output-buffer')
        else:
            # --output-dir is only relevant if not outputting to buffer.
            if download_policy.get('output_dir'):
                download_cmd.extend(['--output-dir', str(download_policy['output_dir'])])

        if download_policy.get('temp_path'):
            download_cmd.extend(['--temp-path', str(download_policy['temp_path'])])
        if download_policy.get('continue_downloads'):
            download_cmd.append('--download-continue')

        pause_seconds = download_policy.get('pause_before_download_seconds')
        if pause_seconds and isinstance(pause_seconds, (int, float)) and pause_seconds > 0:
            download_cmd.extend(['--pause', str(pause_seconds)])

        proxy = download_policy.get('proxy')
        if proxy:
            download_cmd.extend(['--proxy', str(proxy)])

        proxy_rename = download_policy.get('proxy_rename')
        if proxy_rename:
            download_cmd.extend(['--proxy-rename', str(proxy_rename)])

        # The 'extra_args' from the policy are for the download script itself, not for yt-dlp.
        # We need to split them and add them to the command.
        extra_args = download_policy.get('extra_args')
        if extra_args:
            download_cmd.extend(shlex.split(extra_args))

        # Pass through downloader settings for yt-dlp to use
        # e.g. to tell yt-dlp to use aria2c as its backend
        ext_downloader = download_policy.get('external_downloader')
        if ext_downloader:
            download_cmd.extend(['--downloader', str(ext_downloader)])

        downloader_args = download_policy.get('downloader_args')
        if downloader_args:
            download_cmd.extend(['--downloader-args', str(downloader_args)])

    worker_id = get_worker_id()
    display_name = get_display_name(info_json_path)
    profile_log_part = f" [Profile: {profile_name}]" if profile_name else ""
    log_prefix = f"[Worker {worker_id}]{profile_log_part} [{display_name} @ {format_to_download}]"
    logger.info(f"{log_prefix} Kicking off download process...")

    temp_info_file_path = None
    try:
        if isinstance(info_json_path, Path) and info_json_path.exists():
            # The info.json is already in a file, pass its path directly.
            download_cmd.extend(['--load-info-json', str(info_json_path)])
        else:
            # The info.json content is in memory, so write it to a temporary file.
            import tempfile
            with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json', encoding='utf-8') as temp_f:
                temp_f.write(info_json_content)
                temp_info_file_path = temp_f.name
            download_cmd.extend(['--load-info-json', temp_info_file_path])

        cmd_str_for_log = ' '.join(shlex.quote(s) for s in download_cmd)
        logger.info(f"{log_prefix} Running download command: {cmd_str_for_log}")
        output_to_buffer = download_policy.get('output_to_buffer', False)
        retcode, stdout, stderr = run_command(download_cmd, binary_stdout=output_to_buffer)
    finally:
        if temp_info_file_path and os.path.exists(temp_info_file_path):
            os.unlink(temp_info_file_path)

    is_403_error = "HTTP Error 403" in stderr
    is_timeout_error = "Read timed out" in stderr
    output_to_buffer = download_policy.get('output_to_buffer', False)

    result = {
        'type': 'download',
        'path': str(info_json_path),
        'format': format_to_download,
        'success': retcode == 0,
        'error_type': None,
        'details': '',
        'downloaded_bytes': 0,
        'profile': profile_name
    }

    if retcode == 0:
        details_str = "OK"
        size_in_bytes = 0
        if output_to_buffer:
            # The most accurate size is the length of the stdout buffer.
            size_in_bytes = len(stdout) # stdout is bytes
            details_str += f" (Buffered {format_size(size_in_bytes)})"
        else:
            size_match = re.search(r'\[download\]\s+100%\s+of\s+~?([0-9.]+)(B|KiB|MiB|GiB)', stderr)
            if size_match:
                value = float(size_match.group(1))
                unit = size_match.group(2)
                multipliers = {"B": 1, "KiB": 1024, "MiB": 1024**2, "GiB": 1024**3}
                size_in_bytes = int(value * multipliers.get(unit, 1))
                details_str += f" ({size_match.group(1)}{unit})"

        result['downloaded_bytes'] = size_in_bytes
        result['details'] = details_str
    else:
        # Check both stdout and stderr for error messages, as logging might be directed to stdout.
        full_output = f"{stdout}\n{stderr}"
        error_lines = [line for line in full_output.strip().split('\n') if 'ERROR:' in line]
        result['details'] = error_lines[-1].strip() if error_lines else "Unknown error"

        if is_403_error:
            result['error_type'] = 'HTTP 403'
        elif is_timeout_error:
            result['error_type'] = 'Timeout'
        else:
            result['error_type'] = f'Exit Code {retcode}'

    return result


def process_info_json_cycle(path, content, policy, state_manager, proxy_url=None, profile_name=None):
    """
    Processes one info.json file for one cycle, downloading selected formats.
    """
    results = []
    display_name = get_display_name(path)
    d_policy = policy.get('download_policy', {})
    s_conditions = policy.get('stop_conditions', {})
    format_selection = d_policy.get('formats', '')

    try:
        info_data = json.loads(content)
        available_formats = [f['format_id'] for f in info_data.get('formats', [])]
        if not available_formats:
            logger.warning(f"[{display_name}] No formats found in info.json. Skipping.")
            return []

        formats_to_test = []
        if format_selection == 'all':
            formats_to_test = available_formats
        elif format_selection.startswith('random:'):
            percent = float(format_selection.split(':')[1].rstrip('%'))
            count = max(1, int(len(available_formats) * (percent / 100.0)))
            formats_to_test = random.sample(available_formats, k=count)
        elif format_selection.startswith('random_from:'):
            choices = [f.strip() for f in format_selection.split(':', 1)[1].split(',')]
            valid_choices = [f for f in choices if f in available_formats]
            if valid_choices:
                formats_to_test = [random.choice(valid_choices)]
        else:
            requested_formats = [f.strip() for f in format_selection.split(',') if f.strip()]
            formats_to_test = []
            for req_fmt in requested_formats:
                # If it's a complex selector with slashes, don't try to validate it against available formats.
                if '/' in req_fmt:
                    formats_to_test.append(req_fmt)
                    continue

                # Check for exact match first
                if req_fmt in available_formats:
                    formats_to_test.append(req_fmt)
                    continue

                # If no exact match, check for formats that start with this ID + '-'
                # e.g., req_fmt '140' should match '140-0'
                prefix_match = f"{req_fmt}-"
                first_match = next((af for af in available_formats if af.startswith(prefix_match)), None)

                if first_match:
                    logger.info(f"[{display_name}] Requested format '{req_fmt}' not found. Using first available match: '{first_match}'.")
                    formats_to_test.append(first_match)
                else:
                    # This could be a complex selector like 'bestvideo' or '299/298', so keep it.
                    if req_fmt not in available_formats:
                        logger.warning(f"[{display_name}] Requested format '{req_fmt}' not found in available formats.")
                    formats_to_test.append(req_fmt)

    except json.JSONDecodeError:
        logger.error(f"[{display_name}] Failed to parse info.json. Skipping.")
        return []

    for i, format_id in enumerate(formats_to_test):
        if shutdown_event.is_set():
            logger.info(f"Shutdown requested, stopping further format tests for {display_name}.")
            break

        # Check if the format URL is expired before attempting to download
        format_details = next((f for f in info_data.get('formats', []) if f.get('format_id') == format_id), None)
        if format_details and 'url' in format_details:
            parsed_url = urlparse(format_details['url'])
            query_params = parse_qs(parsed_url.query)
            expire_ts_str = query_params.get('expire', [None])[0]
            if expire_ts_str and expire_ts_str.isdigit():
                expire_ts = int(expire_ts_str)
                if expire_ts < time.time():
                    logger.warning(f"[{display_name}] Skipping format '{format_id}' because its URL is expired.")
                    result = {
                        'type': 'download', 'path': str(path), 'format': format_id,
                        'success': True, 'error_type': 'Skipped',
                        'details': 'Download URL is expired', 'downloaded_bytes': 0
                    }
                    if proxy_url:
                        result['proxy_url'] = proxy_url
                    state_manager.log_event(result)
                    results.append(result)
                    continue # Move to the next format

        result = run_download_worker(path, content, format_id, policy, profile_name=profile_name)
        if proxy_url:
            result['proxy_url'] = proxy_url
        state_manager.log_event(result)
        results.append(result)

        worker_id = get_worker_id()
        status = "SUCCESS" if result['success'] else f"FAILURE ({result['error_type']})"
        profile_log_part = f" [Profile: {profile_name}]" if profile_name else ""
        logger.info(f"[Worker {worker_id}]{profile_log_part} Result for {display_name} (format {format_id}): {status} - {result.get('details', 'OK')}")

        if not result['success']:
            if s_conditions.get('on_failure') or \
               (s_conditions.get('on_http_403') and result['error_type'] == 'HTTP 403') or \
               (s_conditions.get('on_timeout') and result['error_type'] == 'Timeout'):
                logger.info(f"Stopping further format tests for {display_name} in this cycle due to failure.")
                break

        sleep_cfg = d_policy.get('sleep_between_formats', {})
        sleep_min = sleep_cfg.get('min_seconds', 0)
        if sleep_min > 0 and i < len(formats_to_test) - 1:
            sleep_max = sleep_cfg.get('max_seconds') or sleep_min
            if sleep_max > sleep_min:
                sleep_duration = random.uniform(sleep_min, sleep_max)
            else:
                sleep_duration = sleep_min

            logger.debug(f"Sleeping for {sleep_duration:.2f}s between formats for {display_name}.")
            # Interruptible sleep
            sleep_end_time = time.time() + sleep_duration
            while time.time() < sleep_end_time:
                if shutdown_event.is_set():
                    break
                time.sleep(0.2)

    return results


def update_dict(d, u):
    """Recursively update a dictionary."""
    for k, v in u.items():
        if isinstance(v, collections.abc.Mapping):
            d[k] = update_dict(d.get(k, {}), v)
        else:
            d[k] = v
    return d


def load_policy(policy_file, policy_name=None):
    """Load a policy from a YAML file."""
    try:
        with open(policy_file, 'r', encoding='utf-8') as f:
            # If a policy name is given, look for that specific document
            if policy_name:
                docs = list(yaml.safe_load_all(f))
                for doc in docs:
                    if isinstance(doc, dict) and doc.get('name') == policy_name:
                        return doc
                raise ValueError(f"Policy '{policy_name}' not found in {policy_file}")
            # Otherwise, load the first document
            return yaml.safe_load(f)
    except (IOError, yaml.YAMLError, ValueError) as e:
        logger.error(f"Failed to load policy file {policy_file}: {e}")
        sys.exit(1)


def apply_overrides(policy, overrides):
    """Apply command-line overrides to the policy."""
    for override in overrides:
        try:
            key, value = override.split('=', 1)
            keys = key.split('.')

            # Try to parse as JSON/YAML if it looks like a list or dict, otherwise treat as scalar
            if (value.startswith('[') and value.endswith(']')) or \
               (value.startswith('{') and value.endswith('}')):
                try:
                    value = yaml.safe_load(value)
                except yaml.YAMLError:
                    logger.warning(f"Could not parse override value '{value}' as YAML. Treating as a string.")
            else:
                # Try to auto-convert scalar value type
                if value.lower() == 'true':
                    value = True
                elif value.lower() == 'false':
                    value = False
                elif value.lower() == 'null':
                    value = None
                else:
                    try:
                        value = int(value)
                    except ValueError:
                        try:
                            value = float(value)
                        except ValueError:
                            pass # Keep as string

            d = policy
            for k in keys[:-1]:
                d = d.setdefault(k, {})
            d[keys[-1]] = value
        except ValueError:
            logger.error(f"Invalid override format: '{override}'. Use 'key.subkey=value'.")
            sys.exit(1)
    return policy


def display_effective_policy(policy, name, sources=None, profile_names=None, original_workers_setting=None):
    """Prints a human-readable summary of the effective policy."""
    logger.info(f"--- Effective Policy: {name} ---")
    settings = policy.get('settings', {})
    exec_control = policy.get('execution_control', {})

    logger.info(f"Mode: {settings.get('mode', 'full_stack')}")
    if profile_names:
        num_profiles = len(profile_names)
        logger.info(f"Profiles found: {num_profiles}")
        if num_profiles > 0:
            # Sort profiles for consistent display, show top 10
            sorted_profiles = sorted(profile_names)
            profiles_to_show = sorted_profiles[:10]
            logger.info(f"  (e.g., {', '.join(profiles_to_show)}{'...' if num_profiles > 10 else ''})")

    workers_display = str(exec_control.get('workers', 1))
    if original_workers_setting == 'auto':
        workers_display = f"auto (calculated: {workers_display})"
    logger.info(f"Workers: {workers_display}")

    sleep_cfg = exec_control.get('sleep_between_tasks', {})
    sleep_min = sleep_cfg.get('min_seconds')
    if sleep_min is not None:
        sleep_max = sleep_cfg.get('max_seconds') or sleep_min
        if sleep_max > sleep_min:
            logger.info(f"Sleep between tasks (per worker): {sleep_min}-{sleep_max}s (random)")
        else:
            logger.info(f"Sleep between tasks (per worker): {sleep_min}s")

    run_until = exec_control.get('run_until', {})
    run_conditions = []
    if 'minutes' in run_until:
        run_conditions.append(f"for {run_until['minutes']} minutes")
    if 'requests' in run_until:
        run_conditions.append(f"until {run_until['requests']} total requests")
    if 'cycles' in run_until:
        run_conditions.append(f"for {run_until['cycles']} cycles")

    if run_conditions:
        logger.info(f"Run condition: Stop after running {' or '.join(run_conditions)}.")
        if 'minutes' in run_until and 'cycles' not in run_until:
            logger.info("Will continuously cycle through sources until time limit is reached.")
    else:
        logger.warning("WARNING: No 'run_until' condition is set. This test will run forever unless stopped manually.")
        logger.info("Run condition: No stop condition defined, will run indefinitely (until Ctrl+C).")

    # --- Rate Calculation ---
    if sources:
        workers = exec_control.get('workers', 1)
        num_sources = len(profile_names) if profile_names else len(sources)

        min_sleep = sleep_cfg.get('min_seconds', 0)
        max_sleep = sleep_cfg.get('max_seconds') or min_sleep
        avg_sleep_per_task = (min_sleep + max_sleep) / 2

        # Assume an average task duration. This is a major assumption.
        mode = settings.get('mode', 'full_stack')
        assumptions = exec_control.get('assumptions', {})

        assumed_fetch_duration = 0
        if mode in ['full_stack', 'fetch_only']:
            assumed_fetch_duration = assumptions.get('fetch_task_duration', 12 if mode == 'full_stack' else 3)

        assumed_download_duration = 0
        if mode in ['full_stack', 'download_only']:
            # This assumes the total time to download all formats for a single source.
            assumed_download_duration = assumptions.get('download_task_duration', 60)

        total_assumed_task_duration = assumed_fetch_duration + assumed_download_duration

        if workers > 0 and total_assumed_task_duration > 0:
            total_time_per_task = total_assumed_task_duration + avg_sleep_per_task
            tasks_per_minute_per_worker = 60 / total_time_per_task
            total_tasks_per_minute = tasks_per_minute_per_worker * workers

            logger.info("--- Rate Estimation ---")
            logger.info(f"Source count: {num_sources}")
            if mode in ['full_stack', 'fetch_only']:
                logger.info(f"Est. fetch time per source: {assumed_fetch_duration}s (override via execution_control.assumptions.fetch_task_duration)")
            if mode in ['full_stack', 'download_only']:
                logger.info(f"Est. download time per source: {assumed_download_duration}s (override via execution_control.assumptions.download_task_duration)")
                logger.info("  (Note: This assumes total time for all formats per source)")

            logger.info(f"Est. sleep per task: {avg_sleep_per_task:.1f}s")
            logger.info(f"==> Expected task rate: ~{total_tasks_per_minute:.2f} tasks/minute ({workers} workers * {tasks_per_minute_per_worker:.2f} tasks/min/worker)")

            target_rate_cfg = exec_control.get('target_rate', {})
            target_reqs = target_rate_cfg.get('requests')
            target_mins = target_rate_cfg.get('per_minutes')
            if target_reqs and target_mins:
                target_rpm = target_reqs / target_mins
                logger.info(f"Target rate: {target_rpm:.2f} tasks/minute")
                if total_tasks_per_minute < target_rpm * 0.8:
                    logger.warning("Warning: Expected rate is significantly lower than target rate.")
                    logger.warning("Consider increasing workers, reducing sleep, or checking task performance.")

    logger.info("---------------------------------")
    time.sleep(2)  # Give user time to read


def add_stress_policy_parser(subparsers):
    """Add the parser for the 'stress-policy' command."""
    parser = subparsers.add_parser(
        'stress-policy',
        description="The primary, policy-driven stress-testing orchestrator.\nIt runs complex, multi-stage stress tests based on a YAML policy file.\nUse '--list-policies' to see available pre-configured scenarios.\n\nModes supported:\n- full_stack: Generate info.json and then download from it.\n- fetch_only: Only generate info.json files.\n- download_only: Only download from existing info.json files.",
        formatter_class=argparse.RawTextHelpFormatter,
        help='Run advanced, policy-driven stress tests (recommended).',
        epilog="""
Examples:

1. Fetch info.jsons for a TV client with a single profile and a rate limit:
   ytops-client stress-policy --policy policies/1_fetch_only_policies.yaml \\
     --policy-name tv_downgraded_single_profile \\
     --set settings.urls_file=my_urls.txt \\
     --set execution_control.run_until.minutes=30
   # This runs a 'fetch_only' test using the 'tv_downgraded' client. It uses a single,
   # static profile for all requests and enforces a safety limit of 450 requests per hour.

2. Fetch info.jsons for an Android client using cookies for authentication:
   ytops-client stress-policy --policy policies/1_fetch_only_policies.yaml \\
     --policy-name android_sdkless_with_cookies \\
     --set settings.urls_file=my_urls.txt \\
     --set info_json_generation_policy.request_params.cookies_file_path=/path/to/my_cookies.txt
   # This demonstrates an authenticated 'fetch_only' test. It passes the path to a
   # Netscape cookie file, which the server will use for the requests.

3. Download from a folder of info.jsons, grouped by profile, with auto-workers:
   ytops-client stress-policy --policy policies/2_download_only_policies.yaml \\
     --policy-name basic_profile_aware_download \\
     --set settings.info_json_dir=/path/to/my/infojsons
   # This runs a 'download_only' test. It scans a directory, extracts profile names from
   # the filenames (e.g., 'tv_user_1' from '...-VIDEOID-tv_user_1.json'), and groups
   # them. 'workers=auto' sets the number of workers to the number of unique profiles found.

4. Full-stack test with multiple workers and profile rotation:
   ytops-client stress-policy --policy policies/3_full_stack_policies.yaml \\
     --policy-name tv_simply_profile_rotation \\
     --set settings.urls_file=my_urls.txt \\
     --set execution_control.workers=4 \\
     --set settings.profile_management.max_requests_per_profile=500
   # This runs a 'full_stack' test with 4 parallel workers. Each worker gets a unique
   # profile (e.g., tv_simply_user_0_0, tv_simply_user_1_0, etc.). After a profile is
   # used 500 times, it is retired, and a new "generation" is created (e.g., tv_simply_user_0_1).

5. Full-stack authenticated test with a pool of profiles and corresponding cookie files:
   ytops-client stress-policy --policy policies/3_full_stack_policies.yaml \\
     --policy-name mweb_multi_profile_with_cookies \\
     --set settings.urls_file=my_urls.txt \\
     --set settings.profile_management.cookie_files='["/path/c1.txt","/path/c2.txt"]'
   # This runs a 'full_stack' test using a pool of profiles (e.g., mweb_user_0, mweb_user_1).
   # It uses the 'cookie_files' list to assign a specific cookie file to each profile in the
   # pool, enabling multi-account authenticated testing. Note the JSON/YAML list format for the override.

6. Full-stack test submitting downloads to an aria2c RPC server:
   ytops-client stress-policy --policy policies/3_full_stack_policies.yaml \\
     --policy-name tv_simply_profile_rotation_aria2c_rpc \\
     --set settings.urls_file=my_urls.txt \\
     --set download_policy.aria_host=192.168.1.100 \\
     --set download_policy.aria_port=6801
   # This runs a test where downloads are not performed by the worker itself, but are
   # sent to a remote aria2c daemon. The policy specifies 'downloader: aria2c_rpc'
   # and provides connection details. This is useful for offloading download traffic.

--------------------------------------------------------------------------------
Overridable Policy Parameters via --set:

  Key                                     Description
  --------------------------------------  ------------------------------------------------
  [settings]
  settings.mode                           Test mode: 'full_stack', 'fetch_only', or 'download_only'.
  settings.urls_file                      Path to file with URLs/video IDs.
  settings.info_json_dir                  Path to directory with existing info.json files.
  settings.profile_extraction_regex       For 'download_only' mode, a regex to extract profile names from info.json filenames. The first capture group is used as the profile name. E.g., '.*-(.*?).json'. This enables profile-aware sequential downloading.
  settings.info_json_dir_sample_percent   Randomly sample this %% of files from the directory (for 'once' scan mode).
  settings.directory_scan_mode            For 'download_only': 'once' (default) or 'continuous' to watch for new files.
  settings.mark_processed_files           For 'continuous' scan mode: if true, rename processed files to '*.<timestamp>.processed' to avoid reprocessing.
  settings.max_files_per_cycle            For 'continuous' scan mode: max new files to process per cycle.
  settings.sleep_if_no_new_files_seconds  For 'continuous' scan mode: seconds to sleep if no new files are found (default: 10).
  settings.profile_prefix                 (Legacy) Prefix for profile names (e.g., 'test_user').
  settings.profile_pool                   (Legacy) Size of the profile pool.
  settings.profile_mode                   Profile strategy. 'per_request' (legacy), 'per_worker' (legacy), or 'per_worker_with_rotation' (requires profile_management).
  settings.info_json_script               Command to run the info.json generation script (e.g., 'bin/ytops-client get-info').
  settings.save_info_json_dir             If set, save all successfully generated info.json files to this directory.

  [settings.profile_management] (New, preferred method for profile control)
  profile_management.prefix               Prefix for profile names (e.g., 'dyn_user').
  profile_management.suffix               Suffix for profile names. Set to 'auto' for a timestamp, or provide a string.
  profile_management.initial_pool_size    The number of profiles to start with.
  profile_management.auto_expand_pool     If true, create new profiles when the initial pool is exhausted (all sleeping).
  profile_management.max_requests_per_profile  Max requests a profile can make before it must 'sleep'.
  profile_management.sleep_minutes_on_exhaustion  How many minutes a profile 'sleeps' after hitting its request limit.
  profile_management.cookie_files         A list of paths to cookie files. Used to assign a unique cookie file to each profile in a pool.

  [execution_control]
  execution_control.workers               Number of parallel worker threads. Set to "auto" to calculate from target_rate or number of profiles.
  execution_control.auto_workers_max      The maximum number of workers to use when 'workers' is 'auto' in profile-aware download mode (default: 8).
  execution_control.target_rate.requests  Target requests for 'auto' workers calculation.
  execution_control.target_rate.per_minutes Period in minutes for target_rate.
  execution_control.run_until.minutes     Stop test after N minutes. Will continuously cycle through sources.
  execution_control.run_until.cycles      Stop test after N cycles. A cycle is one full pass through all sources.
  execution_control.run_until.requests    Stop test after N total info.json requests (cumulative across runs).
  execution_control.sleep_between_tasks.min_seconds  Min sleep time between tasks, per worker.

  [info_json_generation_policy]
  info_json_generation_policy.client      Client to use (e.g., 'mweb', 'tv_camoufox').
  info_json_generation_policy.auth_host   Host for the auth/Thrift service.
  info_json_generation_policy.auth_port   Port for the auth/Thrift service.
  info_json_generation_policy.assigned_proxy_url A specific proxy to use for a request, overriding the server's proxy pool.
  info_json_generation_policy.proxy_rename  Regex substitution for the assigned proxy URL (e.g., 's/old/new/').
  info_json_generation_policy.command_template  A full command template for the info.json script. Overrides other keys.
  info_json_generation_policy.rate_limits.per_ip.max_requests   Max requests for the given time period from one IP.
  info_json_generation_policy.rate_limits.per_ip.per_minutes    Time period in minutes for the per_ip rate limit.
  info_json_generation_policy.rate_limits.per_profile.max_requests Max requests for a single profile in a time period.
  info_json_generation_policy.rate_limits.per_profile.per_minutes Time period in minutes for the per_profile rate limit.
  info_json_generation_policy.client_rotation_policy.major_client   The primary client to use for most requests.
  info_json_generation_policy.client_rotation_policy.refresh_client The client to use periodically to refresh context.
  info_json_generation_policy.client_rotation_policy.refresh_every.requests  Trigger refresh client after N requests for a profile.

  [download_policy]
  download_policy.formats                 Formats to download (e.g., '18,140', 'random:50%%').
  download_policy.downloader              Orchestrator script to use: 'native-py' (default, Python lib), 'native-cli' (legacy CLI wrapper), or 'aria2c_rpc'.
  download_policy.external_downloader     For 'native-py' or default, the backend yt-dlp should use (e.g., 'aria2c', 'native').
  download_policy.downloader_args         Arguments for the external_downloader. For yt-dlp, e.g., 'aria2c:-x 8'.
  download_policy.merge_output_format     Container to merge to (e.g., 'mkv'). Defaults to 'mp4' via cli.config.
  download_policy.temp_path               For 'native-py', path to a directory for temporary files (e.g., a RAM disk like /dev/shm).
  download_policy.output_to_buffer        For 'native-py', download to an in-memory buffer and pipe to stdout instead of saving to a file (true/false). Best for single-file formats.
  download_policy.proxy                   Proxy for direct downloads (e.g., "socks5://127.0.0.1:1080").
  download_policy.proxy_rename            Regex substitution for the proxy URL (e.g., 's/old/new/').
  download_policy.pause_before_download_seconds Pause for N seconds before starting each download attempt.
  download_policy.continue_downloads      Enable download continuation (true/false).
  download_policy.cleanup                 After success: for native downloaders, rename and truncate file to 0 bytes; for 'aria2c_rpc', remove file(s) from filesystem.
  download_policy.extra_args              A string of extra arguments for the download script (e.g., "--limit-rate 5M").
  download_policy.sleep_per_proxy_seconds Cooldown in seconds between downloads on the same proxy.
  download_policy.rate_limits.per_proxy.max_requests Max downloads for a single proxy in a time period.
  download_policy.rate_limits.per_proxy.per_minutes Time period in minutes for the per_proxy download rate limit.
  # For downloader: 'aria2c_rpc'
  download_policy.aria_host               Hostname of the aria2c RPC server.
  download_policy.aria_port               Port of the aria2c RPC server.
  download_policy.aria_secret             Secret token for the aria2c RPC server.
  download_policy.aria_wait               Wait for aria2c downloads to complete (true/false).
  download_policy.cleanup                 Remove downloaded file(s) from the filesystem on success. Requires script access to the download directory.
  download_policy.purge_on_complete       On success, purge ALL completed/failed downloads from aria2c history. Use as a workaround for older aria2c versions where targeted removal fails.
  download_policy.output_dir              Output directory for downloads.
  download_policy.aria_remote_dir         The absolute download path on the remote aria2c host.
  download_policy.aria_fragments_dir      The local path to find fragments for merging (if different from output_dir).
  download_policy.auto_merge_fragments    For fragmented downloads, automatically merge parts after download (true/false). Requires aria_wait=true.
  download_policy.remove_fragments_after_merge For fragmented downloads, delete fragment files after a successful merge (true/false). Requires auto_merge_fragments=true.

  [stop_conditions]
  stop_conditions.on_failure              Stop on any download failure (true/false).
  stop_conditions.on_http_403             Stop on any HTTP 403 error (true/false).
  stop_conditions.on_error_rate.max_errors  Stop test if more than N errors (of any type) occur within the time period.
  stop_conditions.on_error_rate.per_minutes Time period in minutes for the error rate calculation.
  stop_conditions.on_cumulative_403.max_errors Stop test if more than N HTTP 403 errors occur within the time period.
  stop_conditions.on_cumulative_403.per_minutes Time period in minutes for the cumulative 403 calculation.
  stop_conditions.on_quality_degradation.trigger_if_missing_formats A format ID or comma-separated list of IDs. Triggers if any are missing.
  stop_conditions.on_quality_degradation.max_triggers Stop test if quality degradation is detected N times.
  stop_conditions.on_quality_degradation.per_minutes Time period in minutes for the quality degradation calculation.
--------------------------------------------------------------------------------
"""
    )
    parser.add_argument('--policy', help='Path to the YAML policy file. Required unless --list-policies is used.')
    parser.add_argument('--policy-name', help='Name of the policy to run from a multi-policy file (if it contains "---" separators).')
    parser.add_argument('--list-policies', action='store_true', help='List all available policies from the default policies directory and exit.')
    parser.add_argument('--show-overrides', action='store_true', help='Load the specified policy and print all its defined values as a single-line of --set arguments, then exit.')
    parser.add_argument('--set', action='append', default=[], help="Override a policy setting using 'key.subkey=value' format.\n(e.g., --set execution_control.workers=5)")

    # Add a group for aria2c-specific overrides for clarity in --help
    aria_group = parser.add_argument_group('Aria2c RPC Downloader Overrides', 'Shortcuts for common --set options for the aria2c_rpc downloader.')
    aria_group.add_argument('--auto-merge-fragments', action=argparse.BooleanOptionalAction, default=None, help='Shortcut to enable/disable download_policy.auto_merge_fragments.')
    aria_group.add_argument('--remove-fragments-after-merge', action=argparse.BooleanOptionalAction, default=None, help='Shortcut to enable/disable download_policy.remove_fragments_after_merge.')
    aria_group.add_argument('--fragments-dir', help='Shortcut for --set download_policy.aria_fragments_dir=PATH.')
    aria_group.add_argument('--remote-dir', help='Shortcut for --set download_policy.aria_remote_dir=PATH.')
    aria_group.add_argument('--cleanup', action=argparse.BooleanOptionalAction, default=None, help='Shortcut to enable/disable download_policy.cleanup.')

    parser.add_argument('--verbose', action='store_true', help='Enable verbose output for the orchestrator and underlying scripts.')
    parser.add_argument('--dry-run', action='store_true', help='Print the effective policy and exit without running the test.')
    parser.add_argument('--disable-log-writing', action='store_true', help='Disable writing state, stats, and log files. By default, files are created for each run.')
    return parser


def list_policies():
    """Scans the policies directory and prints a list of available policies."""
    script_dir = os.path.dirname(os.path.abspath(__file__))
    project_root = os.path.abspath(os.path.join(script_dir, '..'))
    policies_dir = os.path.join(project_root, 'policies')

    if not os.path.isdir(policies_dir):
        print(f"Error: Policies directory not found at '{policies_dir}'", file=sys.stderr)
        return 1

    print("Available Policies:")
    print("=" * 20)

    policy_files = sorted(Path(policies_dir).glob('*.yaml'))
    if not policy_files:
        print("No policy files (.yaml) found.")
        return 0

    for policy_file in policy_files:
        print(f"\n--- File: {policy_file.relative_to(project_root)} ---")
        try:
            with open(policy_file, 'r', encoding='utf-8') as f:
                content = f.read()

            # Split into documents. The separator is a line that is exactly '---'.
            documents = re.split(r'^\-\-\-$', content, flags=re.MULTILINE)

            found_any_in_file = False
            for doc in documents:
                doc = doc.strip()
                if not doc:
                    continue

                lines = doc.split('\n')
                policy_name = None
                description_lines = []

                # Find name and description
                for i, line in enumerate(lines):
                    if line.strip().startswith('name:'):
                        policy_name = line.split(':', 1)[1].strip()

                        # Look backwards for comments
                        j = i - 1
                        current_desc_block = []
                        while j >= 0 and lines[j].strip().startswith('#'):
                            comment = lines[j].strip().lstrip('#').strip()
                            current_desc_block.insert(0, comment)
                            j -= 1

                        if current_desc_block:
                            description_lines = current_desc_block
                        break

                if policy_name:
                    found_any_in_file = True
                    print(f"  - Name: {policy_name}")
                    if description_lines:
                        # Heuristic to clean up "Policy: " prefix
                        if description_lines[0].lower().startswith('policy:'):
                            description_lines[0] = description_lines[0][len('policy:'):].strip()

                        print(f"    Description: {description_lines[0]}")
                        for desc_line in description_lines[1:]:
                            print(f"                 {desc_line}")
                    else:
                        print("    Description: (No description found)")

                    relative_path = policy_file.relative_to(project_root)
                    print(f"    Usage: --policy {relative_path} --policy-name {policy_name}")

            if not found_any_in_file:
                print("  (No named policies found in this file)")

        except Exception as e:
            print(f"  Error parsing {policy_file.name}: {e}")

    return 0


def main_stress_policy(args):
    """Main logic for the 'stress-policy' command."""
    if args.list_policies:
        return list_policies()

    if not args.policy:
        print("Error: --policy is required unless using --list-policies.", file=sys.stderr)
        return 1

    # Handle --show-overrides early, as it doesn't run the test.
    if args.show_overrides:
        policy = load_policy(args.policy, args.policy_name)
        if not policy:
            return 1 # load_policy prints its own error
        print_policy_overrides(policy)
        return 0

    policy = load_policy(args.policy, args.policy_name)
    policy = apply_overrides(policy, args.set)

    # Apply direct CLI overrides after --set, so they have final precedence.
    if args.auto_merge_fragments is not None:
        policy.setdefault('download_policy', {})['auto_merge_fragments'] = args.auto_merge_fragments
    if args.remove_fragments_after_merge is not None:
        policy.setdefault('download_policy', {})['remove_fragments_after_merge'] = args.remove_fragments_after_merge
    if args.fragments_dir is not None:
        policy.setdefault('download_policy', {})['aria_fragments_dir'] = args.fragments_dir
    if args.remote_dir is not None:
        policy.setdefault('download_policy', {})['aria_remote_dir'] = args.remote_dir
    if args.cleanup is not None:
        policy.setdefault('download_policy', {})['cleanup'] = args.cleanup

    policy_name = policy.get('name', args.policy_name or Path(args.policy).stem)

    # --- Logging Setup ---
    log_level = logging.DEBUG if args.verbose else logging.INFO
    log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' if args.verbose else '%(asctime)s - %(message)s'
    date_format = None if args.verbose else '%H:%M:%S'

    root_logger = logging.getLogger()
    root_logger.setLevel(log_level)

    # Remove any existing handlers to avoid duplicate logs
    for handler in root_logger.handlers[:]:
        root_logger.removeHandler(handler)

    # Add console handler
    console_handler = logging.StreamHandler(sys.stdout)
    console_handler.setFormatter(logging.Formatter(log_format, datefmt=date_format))
    root_logger.addHandler(console_handler)

    if not args.disable_log_writing:
        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        log_filename = f"stress-policy-{timestamp}-{policy_name}.log"
        try:
            file_handler = logging.FileHandler(log_filename, encoding='utf-8')
            file_handler.setFormatter(logging.Formatter(log_format, datefmt=date_format))
            root_logger.addHandler(file_handler)
            # Use print because logger is just being set up.
            print(f"Logging to file: {log_filename}", file=sys.stderr)
        except IOError as e:
            print(f"Error: Could not open log file {log_filename}: {e}", file=sys.stderr)

    state_manager = StateManager(policy_name, disable_log_writing=args.disable_log_writing)

    # --- Graceful shutdown handler ---
    def shutdown_handler(signum, frame):
        if not shutdown_event.is_set():
            logger.info(f"\nSignal {signum} received, shutting down gracefully...")
            shutdown_event.set()

            # Save state immediately to prevent loss on interrupt.
            logger.info("Attempting to save state before shutdown...")
            state_manager.close()

            # Kill running subprocesses to unblock workers
            with process_lock:
                if running_processes:
                    logger.info(f"Terminating {len(running_processes)} running subprocess(es)...")
                    for p in running_processes:
                        try:
                            # Kill the entire process group to ensure child processes (like yt-dlp) are terminated.
                            os.killpg(os.getpgid(p.pid), signal.SIGKILL)
                        except (ProcessLookupError, PermissionError):
                            pass  # Process already finished or we lack permissions
            logger.info("Subprocesses terminated. Waiting for workers to finish. Press Ctrl+C again to force exit.")
        else:
            logger.info("Second signal received, forcing exit.")
            # Use os._exit for a hard exit that doesn't run cleanup handlers,
            # which can deadlock if locks are held.
            os._exit(1)

    signal.signal(signal.SIGINT, shutdown_handler)
    signal.signal(signal.SIGTERM, shutdown_handler)

    settings = policy.get('settings', {})

    # --- Load sources based on mode ---
    mode = settings.get('mode', 'full_stack')
    sources = [] # This will be a list of URLs or Path objects
    if mode in ['full_stack', 'fetch_only']:
        urls_file = settings.get('urls_file')
        if not urls_file:
            logger.error("Policy mode requires 'settings.urls_file'.")
            return 1
        try:
            with open(urls_file, 'r', encoding='utf-8') as f:
                content = f.read()
                try:
                    data = json.loads(content)
                    if isinstance(data, list) and all(isinstance(item, str) for item in data):
                        sources = data
                        logger.info(f"Loaded {len(sources)} URLs/IDs from JSON array in {urls_file}.")
                    else:
                        logger.error(f"URL file '{urls_file}' is valid JSON but not an array of strings.")
                        return 1
                except json.JSONDecodeError:
                    sources = [line.strip() for line in content.splitlines() if line.strip()]
                    logger.info(f"Loaded {len(sources)} URLs/IDs from text file {urls_file}.")
        except IOError as e:
            logger.error(f"Failed to read urls_file {urls_file}: {e}")
            return 1

        # Clean up URLs/IDs which might have extra quotes, commas, or brackets from copy-pasting
        cleaned_sources = []
        for source in sources:
            cleaned_source = source.strip().rstrip(',').strip().strip('\'"[]').strip()
            if cleaned_source:
                cleaned_sources.append(cleaned_source)

        if len(cleaned_sources) != len(sources):
            logger.info(f"Cleaned URL list, removed {len(sources) - len(cleaned_sources)} empty or invalid entries.")

        sources = cleaned_sources
    elif mode == 'download_only':
        # If not in continuous mode, load sources once at the start.
        # In continuous mode, `sources` is populated at the start of each cycle.
        if settings.get('directory_scan_mode') != 'continuous':
            info_json_dir = settings.get('info_json_dir')
            if not info_json_dir:
                logger.error("Policy mode 'download_only' requires 'settings.info_json_dir'.")
                return 1
            try:
                all_files = sorted(Path(info_json_dir).glob('*.json'))
                sample_percent = settings.get('info_json_dir_sample_percent')
                if sample_percent and 0 < sample_percent <= 100:
                    sample_count = int(len(all_files) * (sample_percent / 100.0))
                    num_to_sample = min(len(all_files), max(1, sample_count))
                    sources = random.sample(all_files, k=num_to_sample)
                    logger.info(f"Randomly sampled {len(sources)} files ({sample_percent}%) from {info_json_dir}")
                else:
                    sources = all_files
            except (IOError, FileNotFoundError) as e:
                logger.error(f"Failed to read info_json_dir {info_json_dir}: {e}")
                return 1

    # In continuous download mode, sources are loaded inside the loop, so we skip this check.
    if settings.get('directory_scan_mode') != 'continuous' and not sources:
        logger.error("No sources (URLs or info.json files) to process. Exiting.")
        return 1

    # Grouping of sources by profile is now handled inside the main loop to support continuous mode.
    profile_extraction_regex = settings.get('profile_extraction_regex')

    # For 'auto' worker calculation and initial display, we need to group sources once.
    # This will be re-calculated inside the loop for continuous mode.
    profile_tasks = None
    if mode == 'download_only' and profile_extraction_regex:
        profile_tasks = collections.defaultdict(list)
        for source_path in sources:
            profile_name = get_profile_from_filename(source_path, profile_extraction_regex)
            if profile_name:
                profile_tasks[profile_name].append(source_path)
            else:
                profile_tasks['unmatched_profile'].append(source_path)

    # --- Auto-calculate workers if needed ---
    exec_control = policy.get('execution_control', {})
    original_workers_setting = exec_control.get('workers')
    if original_workers_setting == 'auto':
        if mode == 'download_only' and profile_tasks is not None:
            num_profiles = len(profile_tasks)
            # Use auto_workers_max from policy, with a default of 8.
            max_workers = exec_control.get('auto_workers_max', 8)
            num_workers = min(num_profiles, max_workers)
            exec_control['workers'] = max(1, num_workers)
            logger.info(f"Calculated 'auto' workers based on {num_profiles} profiles (max: {max_workers}): {exec_control['workers']}")
        else:
            target_rate_cfg = exec_control.get('target_rate', {})
            target_reqs = target_rate_cfg.get('requests')
            target_mins = target_rate_cfg.get('per_minutes')
            if target_reqs and target_mins and sources:
                target_rpm = target_reqs / target_mins
                num_sources = len(sources)
                sleep_cfg = exec_control.get('sleep_between_tasks', {})
                avg_sleep = (sleep_cfg.get('min_seconds', 0) + sleep_cfg.get('max_seconds', 0)) / 2
                assumed_task_duration = 12  # Must match assumption in display_effective_policy

                # Formula: workers = (total_work_seconds) / (total_time_for_work)
                # total_time_for_work is derived from the target rate:
                # (total_cycle_time) = (60 * num_sources) / target_rpm
                # total_time_for_work = total_cycle_time - avg_sleep
                work_time_available = (60 * num_sources / target_rpm) - avg_sleep

                if work_time_available <= 0:
                    # The sleep time alone makes the target rate impossible.
                    # Set workers to max parallelism as a best-effort.
                    num_workers = num_sources
                    logger.warning(f"Target rate of {target_rpm} req/min is likely unachievable due to sleep time of {avg_sleep}s.")
                    logger.warning(f"Setting workers to max parallelism ({num_workers}) as a best effort.")
                else:
                    total_work_seconds = num_sources * assumed_task_duration
                    num_workers = total_work_seconds / work_time_available

                calculated_workers = max(1, int(num_workers + 0.99))  # Ceiling
                exec_control['workers'] = calculated_workers
                logger.info(f"Calculated 'auto' workers based on target rate: {calculated_workers}")
            else:
                logger.warning("Cannot calculate 'auto' workers: 'target_rate' or sources are not defined. Defaulting to 1 worker.")
                exec_control['workers'] = 1

    display_effective_policy(
        policy,
        policy_name,
        sources=sources,
        profile_names=list(profile_tasks.keys()) if profile_tasks is not None else None,
        original_workers_setting=original_workers_setting
    )

    if args.dry_run:
        logger.info("Dry run complete. Exiting.")
        return 0

    start_time = time.time()

    run_until_cfg = exec_control.get('run_until', {})
    duration_seconds = (run_until_cfg.get('minutes') or 0) * 60
    max_cycles = run_until_cfg.get('cycles') or 0
    max_requests = run_until_cfg.get('requests') or 0

    # --- Main test loop ---
    cycles = 0
    try:
        def process_task(source, source_index, cycle_num):
            """Worker task for one source (URL or file path)."""
            try:
                if shutdown_event.is_set():
                    return [] # Shutdown initiated, do not start new work

                # --- Step 1: Get info.json content ---
                info_json_content = None
                profile_name = None
                if mode in ['full_stack', 'fetch_only']:
                    gen_policy = policy.get('info_json_generation_policy', {})
                    cmd_template = gen_policy.get('command_template')

                    # --- Profile Generation ---
                    profile_mode = settings.get('profile_mode')
                    pm_policy = settings.get('profile_management')

                    if profile_mode == 'per_worker_with_rotation':
                        if not pm_policy:
                            logger.error("Profile mode 'per_worker_with_rotation' requires 'settings.profile_management' configuration.")
                            # Log a failure event and skip
                            event = {'type': 'fetch', 'path': str(source), 'success': False, 'error_type': 'ConfigError', 'details': 'Missing profile_management section'}
                            state_manager.log_event(event)
                            return []
                        worker_id = get_worker_id()
                        profile_name = state_manager.get_or_rotate_worker_profile(worker_id, policy)
                    elif pm_policy:
                        # This is the existing dynamic cooldown logic
                        profile_name = state_manager.get_next_available_profile(policy)
                        if not profile_name:
                            logger.warning("No available profiles to run task. Skipping.")
                            return []
                    else:
                        # This is the legacy logic
                        profile_prefix = settings.get('profile_prefix')
                        if profile_prefix:
                            if profile_mode == 'per_request':
                                timestamp = datetime.now().strftime('%Y%m%d%H%M%S%f')
                                profile_name = f"{profile_prefix}_{timestamp}_{source_index}"
                            elif profile_mode == 'per_worker':
                                worker_index = get_worker_id()
                                profile_name = f"{profile_prefix}_{worker_index}"
                            else: # Default to pool logic
                                profile_pool = settings.get('profile_pool')
                                if profile_pool:
                                    profile_name = f"{profile_prefix}_{source_index % profile_pool}"
                        else:
                            profile_name = "default" # A final fallback

                    # --- Rate Limit Check ---
                    if not state_manager.check_and_update_rate_limit(profile_name, policy):
                        return [] # Rate limited, skip this task

                    # --- Command Generation ---
                    gen_cmd = []
                    save_dir = settings.get('save_info_json_dir')
                    save_path = None

                    if cmd_template:
                        # Low-level template mode. The user is responsible for output.
                        video_id = get_video_id(source)

                        # A heuristic to add '--' if the video ID looks like an option.
                        # We split the template, find the standalone '{url}' placeholder,
                        # and insert '--' before it. This assumes it's a positional argument.
                        template_parts = shlex.split(cmd_template)
                        try:
                            # Find from the end, in case it's used in an option value earlier.
                            url_index = len(template_parts) - 1 - template_parts[::-1].index('{url}')
                            if video_id.startswith('-'):
                                template_parts.insert(url_index, '--')
                        except ValueError:
                            # '{url}' not found as a standalone token, do nothing special.
                            pass

                        # Rejoin and then format the whole string.
                        gen_cmd_str = ' '.join(template_parts)
                        gen_cmd_str = gen_cmd_str.format(url=video_id, profile=profile_name)
                        gen_cmd = shlex.split(gen_cmd_str)
                        if args.verbose and '--verbose' not in gen_cmd:
                            gen_cmd.append('--verbose')
                    else:
                        # High-level policy mode. Orchestrator builds the command.
                        script_cmd_str = settings.get('info_json_script')
                        if not script_cmd_str:
                            logger.error("High-level policy requires 'settings.info_json_script'.")
                            return []
                        gen_cmd = shlex.split(script_cmd_str)
                        video_id = get_video_id(source)

                        client_to_use, request_params = state_manager.get_client_for_request(profile_name, gen_policy)

                        # --- Multi-Cookie File Logic ---
                        if pm_policy:
                            cookie_files = pm_policy.get('cookie_files')
                            if cookie_files and isinstance(cookie_files, list) and len(cookie_files) > 0:
                                profile_index = -1
                                # Extract index from profile name. Matches _<index> or _<worker_id>_<gen>
                                match = re.search(r'_(\d+)(?:_(\d+))?$', profile_name)
                                if match:
                                    # For rotation mode, the first group is worker_id. For pool mode, it's the profile index.
                                    profile_index = int(match.group(1))

                                if profile_index != -1:
                                    cookie_file_path = cookie_files[profile_index % len(cookie_files)]
                                    if not request_params:
                                        request_params = {}
                                    request_params['cookies_file_path'] = cookie_file_path
                                    logger.info(f"[{source}] Assigned cookie file '{os.path.basename(cookie_file_path)}' to profile '{profile_name}'")
                                else:
                                    logger.warning(f"[{source}] Could not determine index for profile '{profile_name}' to assign cookie file.")

                        if client_to_use:
                            gen_cmd.extend(['--client', str(client_to_use)])
                        if gen_policy.get('auth_host'):
                            gen_cmd.extend(['--auth-host', str(gen_policy.get('auth_host'))])
                        if gen_policy.get('auth_port'):
                            gen_cmd.extend(['--auth-port', str(gen_policy.get('auth_port'))])
                        if profile_name != "default":
                            gen_cmd.extend(['--profile', profile_name])

                        # Add --print-proxy so we can track it for stats
                        if '--print-proxy' not in gen_cmd:
                            gen_cmd.append('--print-proxy')

                        if request_params:
                            gen_cmd.extend(['--request-params-json', json.dumps(request_params)])
                        if gen_policy.get('assigned_proxy_url'):
                            gen_cmd.extend(['--assigned-proxy-url', str(gen_policy.get('assigned_proxy_url'))])
                        if gen_policy.get('proxy_rename'):
                            gen_cmd.extend(['--proxy-rename', str(gen_policy.get('proxy_rename'))])

                        if args.verbose:
                            gen_cmd.append('--verbose')

                        # If saving is enabled, delegate saving to the client script.
                        if save_dir:
                            try:
                                os.makedirs(save_dir, exist_ok=True)
                                timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
                                # Note: Using a timestamped filename to avoid race conditions.
                                filename = f"{timestamp}-{video_id}-{profile_name}.json"
                                save_path = Path(save_dir) / filename
                                gen_cmd.extend(['--output', str(save_path)])
                                # No longer need to suppress, it's the default.
                            except IOError as e:
                                logger.error(f"[{source}] Could not prepare save path in '{save_dir}': {e}")
                                # Continue without saving
                                save_path = None

                        # If not saving to a file, we need the output on stdout for the download step.
                        if not save_dir:
                            gen_cmd.append('--print-info-out')

                        # The positional video_id argument must come after all options.
                        # Use '--' to ensure it's not parsed as an option if it starts with a dash.
                        if video_id.startswith('-'):
                            gen_cmd.append('--')
                        gen_cmd.append(video_id)

                    worker_id = get_worker_id()
                    profile_log_part = f" [Profile: {profile_name}]" if profile_name else ""
                    logger.info(f"[Worker {worker_id}]{profile_log_part} [{source}] Running info.json command: {' '.join(shlex.quote(s) for s in gen_cmd)}")
                    retcode, stdout, stderr = run_command(gen_cmd)
                    info_json_content = stdout

                    # --- Extract proxy from stderr and record it for stats ---
                    proxy_url = None
                    proxy_match = re.search(r"Proxy used: (.*)", stderr)
                    if proxy_match:
                        proxy_url = proxy_match.group(1).strip()
                        state_manager.record_proxy_usage(proxy_url)

                    if retcode == 0:
                        # If the client script saved the file, stdout will be empty.
                        # If we need the content for a download step, we must read it back.
                        if not info_json_content.strip():
                            # Check stderr for the success message to confirm save.
                            saved_path_match = re.search(r"Successfully saved info.json to (.*)", stderr)
                            if saved_path_match:
                                output_file_str = saved_path_match.group(1).strip().strip("'\"")
                                logger.info(f"[{source}] -> {saved_path_match.group(0).strip()}")

                                # If this is a full_stack test, we need the content for the download worker.
                                if mode == 'full_stack':
                                    try:
                                        with open(output_file_str, 'r', encoding='utf-8') as f:
                                            info_json_content = f.read()
                                    except IOError as e:
                                        logger.error(f"Could not read back info.json from '{output_file_str}': {e}")
                                        retcode = -1 # Treat as failure
                            elif save_path:
                                # Command was told to save, but didn't confirm. Assume it worked if exit code is 0.
                                logger.info(f"[{source}] -> Client script exited 0, assuming info.json was saved to '{save_path}'")
                                if mode == 'full_stack':
                                    try:
                                        with open(save_path, 'r', encoding='utf-8') as f:
                                            info_json_content = f.read()
                                    except IOError as e:
                                        logger.error(f"Could not read back info.json from '{save_path}': {e}")
                                        retcode = -1
                            # If stdout is empty and we weren't saving, it's an issue.
                            elif not save_path and not cmd_template:
                                 logger.error(f"[{source}] info.json generation gave no stdout and was not asked to save to a file.")
                                 retcode = -1
                        else:
                            logger.info(f"[{source}] -> Successfully fetched info.json to memory/stdout.")

                    event = {'type': 'fetch', 'path': str(source), 'profile': profile_name}
                    if proxy_url:
                        event['proxy_url'] = proxy_url

                    if retcode != 0:
                        error_lines = [line for line in stderr.strip().split('\n') if 'error' in line.lower()]
                        error_msg = error_lines[-1] if error_lines else stderr.strip().split('\n')[-1]
                        logger.error(f"[{source}] Failed to generate info.json: {error_msg}")
                        event.update({'success': False, 'error_type': 'GetInfoJsonFail', 'details': error_msg})
                        state_manager.log_event(event)
                        return []

                    # Check for quality degradation before logging success
                    s_conditions = policy.get('stop_conditions', {})
                    quality_policy = s_conditions.get('on_quality_degradation')
                    if quality_policy and info_json_content:
                        try:
                            info_data = json.loads(info_json_content)
                            available_formats = {f.get('format_id') for f in info_data.get('formats', [])}

                            required_formats = quality_policy.get('trigger_if_missing_formats')
                            if required_formats:
                                # Can be a single string, a comma-separated string, or a list of strings.
                                if isinstance(required_formats, str):
                                    required_formats = [f.strip() for f in required_formats.split(',')]

                                missing_formats = [f for f in required_formats if f not in available_formats]

                                if missing_formats:
                                    logger.warning(f"[{source}] Quality degradation detected. Missing required formats: {', '.join(missing_formats)}.")
                                    event['quality_degradation_trigger'] = True
                                    event['missing_formats'] = missing_formats
                        except (json.JSONDecodeError, TypeError):
                            logger.warning(f"[{source}] Could not parse info.json or find formats to check for quality degradation.")

                    # Record request for profile cooldown policy if active
                    if pm_policy:
                        state_manager.record_profile_request(profile_name)

                    state_manager.increment_request_count()
                    event.update({'success': True, 'details': 'OK'})
                    state_manager.log_event(event)

                    # Saving is now delegated to the client script when a save_dir is provided.
                    # The orchestrator no longer saves the file itself.

                elif mode == 'download_only':
                    # This path is for non-profile-grouped download_only mode.
                    try:
                        with open(source, 'r', encoding='utf-8') as f:
                            info_json_content = f.read()
                    except (IOError, FileNotFoundError) as e:
                        logger.error(f"[{get_display_name(source)}] Could not read info.json file: {e}")
                        return []

                if mode != 'fetch_only':
                    return _run_download_logic(source, info_json_content, policy, state_manager, profile_name=profile_name)

                return []
            finally:
                # Sleep after the task is completed to space out requests from this worker.
                exec_control = policy.get('execution_control', {})
                sleep_cfg = exec_control.get('sleep_between_tasks', {})
                sleep_min = sleep_cfg.get('min_seconds', 0)

                if sleep_min > 0:
                    sleep_max = sleep_cfg.get('max_seconds') or sleep_min
                    if sleep_max > sleep_min:
                        sleep_duration = random.uniform(sleep_min, sleep_max)
                    else:
                        sleep_duration = sleep_min

                    logger.debug(f"Worker sleeping for {sleep_duration:.2f}s after task for {get_display_name(source)}.")
                    # Interruptible sleep
                    sleep_end_time = time.time() + sleep_duration
                    while time.time() < sleep_end_time:
                        if shutdown_event.is_set():
                            break
                        time.sleep(0.2)

        while not shutdown_event.is_set():
            if duration_seconds and (time.time() - start_time) > duration_seconds:
                logger.info("Reached duration limit. Stopping.")
                break
            if max_requests > 0 and state_manager.get_request_count() >= max_requests:
                logger.info(f"Reached max requests ({max_requests}). Stopping.")
                break

            # --- Rescan for sources if in continuous download mode ---
            if mode == 'download_only' and settings.get('directory_scan_mode') == 'continuous':
                info_json_dir = settings.get('info_json_dir')
                try:
                    all_files_in_dir = Path(info_json_dir).glob('*.json')
                    processed_files = state_manager.get_processed_files()

                    new_files = [f for f in all_files_in_dir if str(f) not in processed_files]

                    # Sort by modification time, oldest first, to process in order of creation
                    new_files.sort(key=os.path.getmtime)

                    max_files_per_cycle = settings.get('max_files_per_cycle')
                    if max_files_per_cycle and len(new_files) > max_files_per_cycle:
                        sources = new_files[:max_files_per_cycle]
                    else:
                        sources = new_files

                    if not sources:
                        sleep_duration = settings.get('sleep_if_no_new_files_seconds', 10)
                        logger.info(f"No new info.json files found in '{info_json_dir}'. Sleeping for {sleep_duration}s...")

                        # Interruptible sleep
                        sleep_end_time = time.time() + sleep_duration
                        while time.time() < sleep_end_time:
                            if shutdown_event.is_set():
                                break
                            time.sleep(0.5)

                        if shutdown_event.is_set():
                            break
                        continue # Skip to next iteration of the while loop

                except (IOError, FileNotFoundError) as e:
                    logger.error(f"Failed to read info_json_dir {info_json_dir}: {e}. Retrying in 10s.")
                    time.sleep(10)
                    continue

            # --- Group sources for this cycle ---
            task_items = sources
            profile_tasks = None
            if mode == 'download_only' and profile_extraction_regex:
                profile_tasks = collections.defaultdict(list)
                for source_path in sources:
                    profile_name = get_profile_from_filename(source_path, profile_extraction_regex)
                    if profile_name:
                        profile_tasks[profile_name].append(source_path)
                    else:
                        profile_tasks['unmatched_profile'].append(source_path)
                task_items = list(profile_tasks.items())

            # If there's nothing to do this cycle, skip.
            if not task_items:
                if mode == 'download_only' and settings.get('directory_scan_mode') == 'continuous':
                    # The sleep logic is handled inside the rescanning block.
                    continue
                else:
                    logger.info("No more sources to process. Ending test.")
                    break

            cycles += 1
            if max_cycles > 0 and cycles > max_cycles:
                logger.info(f"Reached max cycles ({max_cycles}). Stopping.")
                break

            logger.info(f"--- Cycle #{cycles} (Total Requests: {state_manager.get_request_count()}) ---")

            with concurrent.futures.ThreadPoolExecutor(max_workers=exec_control.get('workers', 1)) as executor:
                if mode == 'download_only' and profile_tasks is not None:
                    # New: submit profile tasks
                    future_to_source = {
                        executor.submit(process_profile_task, profile_name, file_list, policy, state_manager, cycles): profile_name
                        for profile_name, file_list in task_items
                    }
                else:
                    # Old: submit individual file/url tasks
                    future_to_source = {
                        executor.submit(process_task, source, i, cycles): source
                        for i, source in enumerate(task_items)
                    }

                should_stop = False
                pending_futures = set(future_to_source.keys())

                while pending_futures and not should_stop:
                    done, pending_futures = concurrent.futures.wait(
                        pending_futures, return_when=concurrent.futures.FIRST_COMPLETED
                    )

                    for future in done:
                        if shutdown_event.is_set():
                            should_stop = True
                            break

                        source = future_to_source[future]
                        try:
                            results = future.result()

                            # Mark file as processed in continuous download mode
                            if mode == 'download_only' and settings.get('directory_scan_mode') == 'continuous':
                                state_manager.mark_file_as_processed(source)

                                if settings.get('mark_processed_files'):
                                    try:
                                        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
                                        new_path = source.parent / f"{source.name}.{timestamp}.processed"
                                        source.rename(new_path)
                                        logger.info(f"Marked '{source.name}' as processed by renaming to '{new_path.name}'")
                                    except (IOError, OSError) as e:
                                        logger.error(f"Failed to rename processed file '{source.name}': {e}")

                            for result in results:
                                if not result['success']:
                                    s_conditions = policy.get('stop_conditions', {})
                                    is_cumulative_403_active = s_conditions.get('on_cumulative_403', {}).get('max_errors')
                                    if s_conditions.get('on_failure') or \
                                       (s_conditions.get('on_http_403') and not is_cumulative_403_active and result['error_type'] == 'HTTP 403') or \
                                       (s_conditions.get('on_timeout') and result['error_type'] == 'Timeout'):
                                        logger.info(f"!!! STOP CONDITION MET: Immediate stop on failure '{result['error_type']}' for {get_display_name(source)}. Shutting down all workers. !!!")
                                        should_stop = True
                                        break
                        except concurrent.futures.CancelledError:
                            logger.info(f"Task for {get_display_name(source)} was cancelled during shutdown.")
                            event = {
                                'type': 'fetch' if mode != 'download_only' else 'download',
                                'path': str(source),
                                'success': False,
                                'error_type': 'Cancelled',
                                'details': 'Task cancelled during shutdown.'
                            }
                            state_manager.log_event(event)
                        except Exception as exc:
                            logger.error(f'{get_display_name(source)} generated an exception: {exc}')

                        if should_stop:
                            break

                        # Check for cumulative error rate stop conditions
                        s_conditions = policy.get('stop_conditions', {})
                        error_rate_policy = s_conditions.get('on_error_rate')
                        if error_rate_policy and not should_stop:
                            max_errors = error_rate_policy.get('max_errors')
                            per_minutes = error_rate_policy.get('per_minutes')
                            if max_errors and per_minutes:
                                error_count = state_manager.check_cumulative_error_rate(max_errors, per_minutes)
                                if error_count > 0:
                                    logger.info(f"!!! STOP CONDITION MET: Error rate exceeded: {error_count} errors in the last {per_minutes} minute(s). Shutting down. !!!")
                                    should_stop = True

                        cumulative_403_policy = s_conditions.get('on_cumulative_403')
                        if cumulative_403_policy and not should_stop:
                            max_errors = cumulative_403_policy.get('max_errors')
                            per_minutes = cumulative_403_policy.get('per_minutes')
                            if max_errors and per_minutes:
                                error_count = state_manager.check_cumulative_error_rate(max_errors, per_minutes, error_type='HTTP 403')
                                if error_count > 0:
                                    logger.info(f"!!! STOP CONDITION MET: Cumulative 403 error rate exceeded: {error_count} errors in the last {per_minutes} minute(s). Shutting down. !!!")
                                    should_stop = True

                        quality_degradation_policy = s_conditions.get('on_quality_degradation')
                        if quality_degradation_policy and not should_stop:
                            max_triggers = quality_degradation_policy.get('max_triggers')
                            per_minutes = quality_degradation_policy.get('per_minutes')
                            if max_triggers and per_minutes:
                                trigger_count = state_manager.check_quality_degradation_rate(max_triggers, per_minutes)
                                if trigger_count > 0:
                                    logger.info(f"!!! STOP CONDITION MET: Quality degradation triggered {trigger_count} times in the last {per_minutes} minute(s). Shutting down. !!!")
                                    should_stop = True

                    if should_stop:
                        break

                    # Check for duration limit after each task completes
                    if duration_seconds and (time.time() - start_time) > duration_seconds:
                        logger.info("Reached duration limit. Cancelling remaining tasks.")
                        should_stop = True

                if should_stop and pending_futures:
                    logger.info(f"Cancelling {len(pending_futures)} outstanding task(s).")
                    for future in pending_futures:
                        future.cancel()

            if should_stop: break

            if max_cycles > 0 and cycles >= max_cycles:
                break

            logger.info("Cycle complete.")

    except KeyboardInterrupt:
        logger.info("\nForceful shutdown requested...")
    finally:
        state_manager.print_summary(policy)
        state_manager.close()

    return 0