2510 lines
129 KiB
Python
2510 lines
129 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Policy-driven stress-testing orchestrator for video format downloads.
|
|
"""
|
|
|
|
import argparse
|
|
import collections
|
|
import collections.abc
|
|
import concurrent.futures
|
|
import json
|
|
import logging
|
|
import os
|
|
import random
|
|
import re
|
|
import shlex
|
|
import signal
|
|
import subprocess
|
|
import sys
|
|
import threading
|
|
import time
|
|
from copy import deepcopy
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
from urllib.parse import urlparse, parse_qs
|
|
|
|
try:
|
|
import yaml
|
|
except ImportError:
|
|
print("PyYAML is not installed. Please install it with: pip install PyYAML", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
# Add a global event for graceful shutdown
|
|
shutdown_event = threading.Event()
|
|
|
|
# Globals for tracking and terminating subprocesses on shutdown
|
|
running_processes = set()
|
|
process_lock = threading.Lock()
|
|
|
|
# Globals for assigning a stable ID to each worker thread
|
|
worker_id_map = {}
|
|
worker_id_counter = 0
|
|
worker_id_lock = threading.Lock()
|
|
|
|
# Configure logging
|
|
logger = logging.getLogger('stress_policy_tool')
|
|
|
|
|
|
def get_worker_id():
|
|
"""Assigns a stable, sequential ID to each worker thread."""
|
|
global worker_id_counter
|
|
thread_id = threading.get_ident()
|
|
with worker_id_lock:
|
|
if thread_id not in worker_id_map:
|
|
worker_id_map[thread_id] = worker_id_counter
|
|
worker_id_counter += 1
|
|
return worker_id_map[thread_id]
|
|
|
|
|
|
def get_video_id(url: str) -> str:
|
|
"""Extracts a YouTube video ID from a URL."""
|
|
match = re.search(r"v=([0-9A-Za-z_-]{11})", url)
|
|
if match:
|
|
return match.group(1)
|
|
match = re.search(r"youtu\.be\/([0-9A-Za-z_-]{11})", url)
|
|
if match:
|
|
return match.group(1)
|
|
if re.fullmatch(r'[0-9A-Za-z_-]{11}', url):
|
|
return url
|
|
return "unknown_video_id"
|
|
|
|
|
|
def get_display_name(path_or_url):
|
|
"""Returns a clean name for logging, either a filename or a video ID."""
|
|
if isinstance(path_or_url, Path):
|
|
return path_or_url.name
|
|
|
|
path_str = str(path_or_url)
|
|
video_id = get_video_id(path_str)
|
|
if video_id != "unknown_video_id":
|
|
return video_id
|
|
|
|
return Path(path_str).name
|
|
|
|
|
|
def format_size(b):
|
|
"""Format size in bytes to human-readable string."""
|
|
if b is None:
|
|
return 'N/A'
|
|
if b < 1024:
|
|
return f"{b}B"
|
|
elif b < 1024**2:
|
|
return f"{b/1024:.2f}KiB"
|
|
elif b < 1024**3:
|
|
return f"{b/1024**2:.2f}MiB"
|
|
else:
|
|
return f"{b/1024**3:.2f}GiB"
|
|
|
|
|
|
def flatten_dict(d, parent_key='', sep='.'):
|
|
"""Flattens a nested dictionary."""
|
|
items = {}
|
|
for k, v in d.items():
|
|
new_key = parent_key + sep + k if parent_key else k
|
|
if isinstance(v, collections.abc.MutableMapping):
|
|
items.update(flatten_dict(v, new_key, sep=sep))
|
|
else:
|
|
items[new_key] = v
|
|
return items
|
|
|
|
|
|
def print_policy_overrides(policy):
|
|
"""Prints all policy values as a single-line of --set arguments."""
|
|
# We don't want to include the 'name' key in the overrides.
|
|
policy_copy = deepcopy(policy)
|
|
policy_copy.pop('name', None)
|
|
|
|
flat_policy = flatten_dict(policy_copy)
|
|
|
|
set_args = []
|
|
for key, value in sorted(flat_policy.items()):
|
|
if value is None:
|
|
value_str = 'null'
|
|
elif isinstance(value, bool):
|
|
value_str = str(value).lower()
|
|
elif isinstance(value, (list, dict)):
|
|
# Use compact JSON for lists/dicts
|
|
value_str = json.dumps(value, separators=(',', ':'))
|
|
else:
|
|
value_str = str(value)
|
|
|
|
# Use shlex.quote to handle spaces and special characters safely
|
|
set_args.append(f"--set {shlex.quote(f'{key}={value_str}')}")
|
|
|
|
print(' '.join(set_args))
|
|
|
|
|
|
def get_profile_from_filename(path, regex_pattern):
|
|
"""Extracts a profile name from a filename using a regex."""
|
|
if not regex_pattern:
|
|
return None
|
|
match = re.search(regex_pattern, path.name)
|
|
if match:
|
|
# Assume the first capturing group is the profile name
|
|
if match.groups():
|
|
return match.group(1)
|
|
return None
|
|
|
|
|
|
class StateManager:
|
|
"""Tracks statistics, manages rate limits, and persists state across runs."""
|
|
def __init__(self, policy_name, disable_log_writing=False):
|
|
self.disable_log_writing = disable_log_writing
|
|
self.state_file_path = Path(f"{policy_name}_state.json")
|
|
self.stats_file_path = Path(f"{policy_name}_stats.jsonl")
|
|
self.lock = threading.RLock()
|
|
self.start_time = time.time()
|
|
self.events = []
|
|
self.state = {
|
|
'global_request_count': 0,
|
|
'rate_limit_trackers': {}, # e.g., {'per_ip': [ts1, ts2], 'profile_foo': [ts3, ts4]}
|
|
'profile_request_counts': {}, # for client rotation
|
|
'profile_last_refresh_time': {}, # for client rotation
|
|
'proxy_last_finish_time': {}, # for per-proxy sleep
|
|
'processed_files': [], # For continuous download_only mode
|
|
# For dynamic profile cooldown strategy
|
|
'profile_cooldown_counts': {},
|
|
'profile_cooldown_sleep_until': {},
|
|
'profile_pool_size': 0,
|
|
'profile_run_suffix': None,
|
|
'worker_profile_generations': {}
|
|
}
|
|
self.stats_file_handle = None
|
|
self._load_state()
|
|
self.print_historical_summary()
|
|
self._open_stats_log()
|
|
|
|
def _load_state(self):
|
|
if self.disable_log_writing:
|
|
logger.info("Log writing is disabled. State will not be loaded from disk.")
|
|
return
|
|
if not self.state_file_path.exists():
|
|
logger.info(f"State file not found at '{self.state_file_path}', starting fresh.")
|
|
return
|
|
try:
|
|
with open(self.state_file_path, 'r', encoding='utf-8') as f:
|
|
self.state = json.load(f)
|
|
# Ensure keys exist
|
|
self.state.setdefault('global_request_count', 0)
|
|
self.state.setdefault('rate_limit_trackers', {})
|
|
self.state.setdefault('profile_request_counts', {})
|
|
self.state.setdefault('profile_last_refresh_time', {})
|
|
self.state.setdefault('proxy_last_finish_time', {})
|
|
self.state.setdefault('processed_files', [])
|
|
# For dynamic profile cooldown strategy
|
|
self.state.setdefault('profile_cooldown_counts', {})
|
|
self.state.setdefault('profile_cooldown_sleep_until', {})
|
|
self.state.setdefault('profile_pool_size', 0)
|
|
self.state.setdefault('profile_run_suffix', None)
|
|
self.state.setdefault('worker_profile_generations', {})
|
|
logger.info(f"Loaded state from {self.state_file_path}")
|
|
except (IOError, json.JSONDecodeError) as e:
|
|
logger.error(f"Could not load or parse state file {self.state_file_path}: {e}. Starting fresh.")
|
|
|
|
def _save_state(self):
|
|
if self.disable_log_writing:
|
|
return
|
|
with self.lock:
|
|
try:
|
|
with open(self.state_file_path, 'w', encoding='utf-8') as f:
|
|
json.dump(self.state, f, indent=2)
|
|
logger.info(f"Saved state to {self.state_file_path}")
|
|
except IOError as e:
|
|
logger.error(f"Could not save state to {self.state_file_path}: {e}")
|
|
|
|
def _open_stats_log(self):
|
|
if self.disable_log_writing:
|
|
return
|
|
try:
|
|
self.stats_file_handle = open(self.stats_file_path, 'a', encoding='utf-8')
|
|
except IOError as e:
|
|
logger.error(f"Could not open stats file {self.stats_file_path}: {e}")
|
|
|
|
def close(self):
|
|
"""Saves state and closes file handles."""
|
|
self._save_state()
|
|
if self.stats_file_handle:
|
|
self.stats_file_handle.close()
|
|
self.stats_file_handle = None
|
|
|
|
def mark_file_as_processed(self, file_path):
|
|
"""Adds a file path to the list of processed files in the state."""
|
|
with self.lock:
|
|
# Using a list and checking for existence is fine for moderate numbers of files.
|
|
# A set isn't JSON serializable.
|
|
processed = self.state.setdefault('processed_files', [])
|
|
file_str = str(file_path)
|
|
if file_str not in processed:
|
|
processed.append(file_str)
|
|
|
|
def get_processed_files(self):
|
|
"""Returns a set of file paths that have been processed."""
|
|
with self.lock:
|
|
return set(self.state.get('processed_files', []))
|
|
|
|
def print_historical_summary(self):
|
|
"""Prints a summary based on the state loaded from disk, before new events."""
|
|
with self.lock:
|
|
now = time.time()
|
|
rate_trackers = self.state.get('rate_limit_trackers', {})
|
|
total_requests = self.state.get('global_request_count', 0)
|
|
|
|
if not rate_trackers and not total_requests:
|
|
logger.info("No historical data found in state file.")
|
|
return
|
|
|
|
logger.info("\n--- Summary From Previous Runs ---")
|
|
logger.info(f"Total info.json requests (all previous runs): {total_requests}")
|
|
|
|
if rate_trackers:
|
|
for key, timestamps in sorted(rate_trackers.items()):
|
|
# Time windows in seconds
|
|
windows = {
|
|
'last 10 min': 600,
|
|
'last 60 min': 3600,
|
|
'last 6 hours': 21600,
|
|
'last 24 hours': 86400
|
|
}
|
|
|
|
rates_str_parts = []
|
|
for name, seconds in windows.items():
|
|
count = sum(1 for ts in timestamps if now - ts <= seconds)
|
|
# Calculate rate in requests per minute
|
|
rate_rpm = (count / seconds) * 60 if seconds > 0 else 0
|
|
rates_str_parts.append(f"{count} req in {name} ({rate_rpm:.2f} rpm)")
|
|
|
|
logger.info(f"Tracker '{key}': " + ", ".join(rates_str_parts))
|
|
logger.info("------------------------------------")
|
|
|
|
def log_event(self, event_data):
|
|
with self.lock:
|
|
event_data['timestamp'] = datetime.now().isoformat()
|
|
self.events.append(event_data)
|
|
if self.stats_file_handle:
|
|
self.stats_file_handle.write(json.dumps(event_data) + '\n')
|
|
self.stats_file_handle.flush()
|
|
|
|
def get_request_count(self):
|
|
with self.lock:
|
|
return self.state.get('global_request_count', 0)
|
|
|
|
def increment_request_count(self):
|
|
with self.lock:
|
|
self.state['global_request_count'] = self.state.get('global_request_count', 0) + 1
|
|
|
|
def check_cumulative_error_rate(self, max_errors, per_minutes, error_type=None):
|
|
"""
|
|
Checks if a cumulative error rate has been exceeded.
|
|
If error_type is None, checks for any failure.
|
|
Returns the number of errors found if the threshold is met, otherwise 0.
|
|
"""
|
|
with self.lock:
|
|
now = time.time()
|
|
window_seconds = per_minutes * 60
|
|
|
|
if error_type:
|
|
recent_errors = [
|
|
e for e in self.events
|
|
if e.get('error_type') == error_type and (now - datetime.fromisoformat(e['timestamp']).timestamp()) <= window_seconds
|
|
]
|
|
else: # Generic failure check
|
|
recent_errors = [
|
|
e for e in self.events
|
|
if not e.get('success') and (now - datetime.fromisoformat(e['timestamp']).timestamp()) <= window_seconds
|
|
]
|
|
|
|
if len(recent_errors) >= max_errors:
|
|
return len(recent_errors)
|
|
return 0
|
|
|
|
def check_quality_degradation_rate(self, max_triggers, per_minutes):
|
|
"""
|
|
Checks if the quality degradation trigger rate has been exceeded.
|
|
Returns the number of triggers found if the threshold is met, otherwise 0.
|
|
"""
|
|
with self.lock:
|
|
now = time.time()
|
|
window_seconds = per_minutes * 60
|
|
|
|
recent_triggers = [
|
|
e for e in self.events
|
|
if e.get('quality_degradation_trigger') and (now - datetime.fromisoformat(e['timestamp']).timestamp()) <= window_seconds
|
|
]
|
|
|
|
if len(recent_triggers) >= max_triggers:
|
|
return len(recent_triggers)
|
|
return 0
|
|
|
|
def check_and_update_rate_limit(self, profile_name, policy):
|
|
"""
|
|
Checks if a request is allowed based on policy rate limits.
|
|
If allowed, updates the internal state. Returns True if allowed, False otherwise.
|
|
"""
|
|
with self.lock:
|
|
now = time.time()
|
|
gen_policy = policy.get('info_json_generation_policy', {})
|
|
rate_limits = gen_policy.get('rate_limits', {})
|
|
|
|
# Check per-IP limit
|
|
ip_limit = rate_limits.get('per_ip')
|
|
if ip_limit:
|
|
tracker_key = 'per_ip'
|
|
max_req = ip_limit.get('max_requests')
|
|
period_min = ip_limit.get('per_minutes')
|
|
if max_req and period_min:
|
|
timestamps = self.state['rate_limit_trackers'].get(tracker_key, [])
|
|
# Filter out old timestamps
|
|
timestamps = [ts for ts in timestamps if now - ts < period_min * 60]
|
|
if len(timestamps) >= max_req:
|
|
logger.warning("Per-IP rate limit reached. Skipping task.")
|
|
return False
|
|
self.state['rate_limit_trackers'][tracker_key] = timestamps
|
|
|
|
# Check per-profile limit
|
|
profile_limit = rate_limits.get('per_profile')
|
|
if profile_limit and profile_name:
|
|
tracker_key = f"profile_{profile_name}"
|
|
max_req = profile_limit.get('max_requests')
|
|
period_min = profile_limit.get('per_minutes')
|
|
if max_req and period_min:
|
|
timestamps = self.state['rate_limit_trackers'].get(tracker_key, [])
|
|
timestamps = [ts for ts in timestamps if now - ts < period_min * 60]
|
|
if len(timestamps) >= max_req:
|
|
logger.warning(f"Per-profile rate limit for '{profile_name}' reached. Skipping task.")
|
|
return False
|
|
self.state['rate_limit_trackers'][tracker_key] = timestamps
|
|
|
|
# If all checks pass, record the new request timestamp for all relevant trackers
|
|
if ip_limit and ip_limit.get('max_requests'):
|
|
self.state['rate_limit_trackers'].setdefault('per_ip', []).append(now)
|
|
if profile_limit and profile_limit.get('max_requests') and profile_name:
|
|
self.state['rate_limit_trackers'].setdefault(f"profile_{profile_name}", []).append(now)
|
|
|
|
return True
|
|
|
|
def get_client_for_request(self, profile_name, gen_policy):
|
|
"""
|
|
Determines which client to use based on the client_rotation_policy.
|
|
Returns a tuple: (client_name, request_params_dict).
|
|
"""
|
|
with self.lock:
|
|
rotation_policy = gen_policy.get('client_rotation_policy')
|
|
|
|
# If no rotation policy, use the simple 'client' key.
|
|
if not rotation_policy:
|
|
client = gen_policy.get('client')
|
|
logger.info(f"Using client '{client}' for profile '{profile_name}'.")
|
|
req_params = gen_policy.get('request_params')
|
|
return client, req_params
|
|
|
|
# --- Rotation logic ---
|
|
now = time.time()
|
|
major_client = rotation_policy.get('major_client')
|
|
refresh_client = rotation_policy.get('refresh_client')
|
|
refresh_every = rotation_policy.get('refresh_every', {})
|
|
|
|
if not refresh_client or not refresh_every:
|
|
return major_client, rotation_policy.get('major_client_params')
|
|
|
|
should_refresh = False
|
|
|
|
# Check time-based refresh
|
|
refresh_minutes = refresh_every.get('minutes')
|
|
last_refresh_time = self.state['profile_last_refresh_time'].get(profile_name, 0)
|
|
if refresh_minutes and (now - last_refresh_time) > (refresh_minutes * 60):
|
|
should_refresh = True
|
|
|
|
# Check request-count-based refresh
|
|
refresh_requests = refresh_every.get('requests')
|
|
request_count = self.state['profile_request_counts'].get(profile_name, 0)
|
|
if refresh_requests and request_count >= refresh_requests:
|
|
should_refresh = True
|
|
|
|
if should_refresh:
|
|
logger.info(f"Profile '{profile_name}' is due for a refresh. Using refresh client '{refresh_client}'.")
|
|
self.state['profile_last_refresh_time'][profile_name] = now
|
|
self.state['profile_request_counts'][profile_name] = 0 # Reset counter
|
|
return refresh_client, rotation_policy.get('refresh_client_params')
|
|
else:
|
|
# Not refreshing, so increment request count for this profile
|
|
self.state['profile_request_counts'][profile_name] = request_count + 1
|
|
return major_client, rotation_policy.get('major_client_params')
|
|
|
|
def get_next_available_profile(self, policy):
|
|
"""
|
|
Finds or creates an available profile based on the dynamic cooldown policy.
|
|
Returns a profile name, or None if no profile is available.
|
|
"""
|
|
with self.lock:
|
|
now = time.time()
|
|
settings = policy.get('settings', {})
|
|
pm_policy = settings.get('profile_management')
|
|
|
|
if not pm_policy:
|
|
return None
|
|
|
|
prefix = pm_policy.get('prefix')
|
|
if not prefix:
|
|
logger.error("Profile management policy requires 'prefix'.")
|
|
return None
|
|
|
|
# Determine and persist the suffix for this run to ensure profile names are stable
|
|
run_suffix = self.state.get('profile_run_suffix')
|
|
if not run_suffix:
|
|
suffix_config = pm_policy.get('suffix')
|
|
if suffix_config == 'auto':
|
|
run_suffix = datetime.now().strftime('%Y%m%d%H%M')
|
|
else:
|
|
run_suffix = suffix_config or ''
|
|
self.state['profile_run_suffix'] = run_suffix
|
|
|
|
# Initialize pool size from policy if not already in state
|
|
if self.state.get('profile_pool_size', 0) == 0:
|
|
self.state['profile_pool_size'] = pm_policy.get('initial_pool_size', 1)
|
|
|
|
max_reqs = pm_policy.get('max_requests_per_profile')
|
|
sleep_mins = pm_policy.get('sleep_minutes_on_exhaustion')
|
|
|
|
# Loop until a profile is found or we decide we can't find one
|
|
while True:
|
|
# Try to find an existing, available profile
|
|
for i in range(self.state['profile_pool_size']):
|
|
profile_name = f"{prefix}_{run_suffix}_{i}" if run_suffix else f"{prefix}_{i}"
|
|
|
|
# Check if sleeping
|
|
sleep_until = self.state['profile_cooldown_sleep_until'].get(profile_name, 0)
|
|
if now < sleep_until:
|
|
continue # Still sleeping
|
|
|
|
# Check if it needs to be put to sleep
|
|
req_count = self.state['profile_cooldown_counts'].get(profile_name, 0)
|
|
if max_reqs and req_count >= max_reqs:
|
|
sleep_duration_seconds = (sleep_mins or 0) * 60
|
|
self.state['profile_cooldown_sleep_until'][profile_name] = now + sleep_duration_seconds
|
|
self.state['profile_cooldown_counts'][profile_name] = 0 # Reset count for next time
|
|
logger.info(f"Profile '{profile_name}' reached request limit ({req_count}/{max_reqs}). Putting to sleep for {sleep_mins} minutes.")
|
|
continue # Now sleeping, try next profile
|
|
|
|
# This profile is available
|
|
logger.info(f"Selected available profile '{profile_name}' (request count: {req_count}/{max_reqs if max_reqs else 'unlimited'}).")
|
|
return profile_name
|
|
|
|
# If we get here, no existing profile was available
|
|
if pm_policy.get('auto_expand_pool'):
|
|
new_profile_index = self.state['profile_pool_size']
|
|
self.state['profile_pool_size'] += 1
|
|
profile_name = f"{prefix}_{run_suffix}_{new_profile_index}" if run_suffix else f"{prefix}_{new_profile_index}"
|
|
logger.info(f"Profile pool exhausted. Expanding pool to size {self.state['profile_pool_size']}. New profile: '{profile_name}'")
|
|
return profile_name
|
|
else:
|
|
# No available profiles and pool expansion is disabled
|
|
return None
|
|
|
|
def get_or_rotate_worker_profile(self, worker_id, policy):
|
|
"""
|
|
Gets the current profile for a worker, rotating to a new generation if the lifetime limit is met.
|
|
This is used by the 'per_worker_with_rotation' profile mode.
|
|
"""
|
|
with self.lock:
|
|
pm_policy = policy.get('settings', {}).get('profile_management', {})
|
|
if not pm_policy:
|
|
logger.error("Profile mode 'per_worker_with_rotation' requires 'settings.profile_management' configuration in the policy.")
|
|
return f"error_profile_{worker_id}"
|
|
|
|
prefix = pm_policy.get('prefix')
|
|
if not prefix:
|
|
logger.error("Profile management for 'per_worker_with_rotation' requires a 'prefix'.")
|
|
return f"error_profile_{worker_id}"
|
|
|
|
max_reqs = pm_policy.get('max_requests_per_profile')
|
|
|
|
generations = self.state.setdefault('worker_profile_generations', {})
|
|
# worker_id is an int, but JSON keys must be strings
|
|
worker_id_str = str(worker_id)
|
|
current_gen = generations.get(worker_id_str, 0)
|
|
|
|
profile_name = f"{prefix}_{worker_id}_{current_gen}"
|
|
|
|
if not max_reqs: # No lifetime limit defined, so never rotate.
|
|
return profile_name
|
|
|
|
req_count = self.state.get('profile_cooldown_counts', {}).get(profile_name, 0)
|
|
|
|
if req_count >= max_reqs:
|
|
logger.info(f"Profile '{profile_name}' reached lifetime request limit ({req_count}/{max_reqs}). Rotating to new generation for worker {worker_id}.")
|
|
new_gen = current_gen + 1
|
|
generations[worker_id_str] = new_gen
|
|
# The request counts for the old profile are implicitly left behind.
|
|
# The new profile will start with a count of 0.
|
|
profile_name = f"{prefix}_{worker_id}_{new_gen}"
|
|
|
|
return profile_name
|
|
|
|
def record_profile_request(self, profile_name):
|
|
"""Increments the request counter for a profile for the cooldown policy."""
|
|
with self.lock:
|
|
if not profile_name:
|
|
return
|
|
counts = self.state.setdefault('profile_cooldown_counts', {})
|
|
counts[profile_name] = counts.get(profile_name, 0) + 1
|
|
|
|
def record_proxy_usage(self, proxy_url):
|
|
"""Records a request timestamp for a given proxy URL for statistical purposes."""
|
|
if not proxy_url:
|
|
return
|
|
with self.lock:
|
|
now = time.time()
|
|
# Use a prefix to avoid collisions with profile names or other keys
|
|
tracker_key = f"proxy_{proxy_url}"
|
|
self.state['rate_limit_trackers'].setdefault(tracker_key, []).append(now)
|
|
|
|
def check_and_update_download_rate_limit(self, proxy_url, policy):
|
|
"""Checks download rate limits. Returns True if allowed, False otherwise."""
|
|
with self.lock:
|
|
now = time.time()
|
|
d_policy = policy.get('download_policy', {})
|
|
rate_limits = d_policy.get('rate_limits', {})
|
|
|
|
# Check per-IP limit
|
|
ip_limit = rate_limits.get('per_ip')
|
|
if ip_limit:
|
|
tracker_key = 'download_per_ip' # Use a distinct key
|
|
max_req = ip_limit.get('max_requests')
|
|
period_min = ip_limit.get('per_minutes')
|
|
if max_req and period_min:
|
|
timestamps = self.state['rate_limit_trackers'].get(tracker_key, [])
|
|
timestamps = [ts for ts in timestamps if now - ts < period_min * 60]
|
|
if len(timestamps) >= max_req:
|
|
logger.warning("Per-IP download rate limit reached. Skipping task.")
|
|
return False
|
|
self.state['rate_limit_trackers'][tracker_key] = timestamps
|
|
|
|
# Check per-proxy limit
|
|
proxy_limit = rate_limits.get('per_proxy')
|
|
if proxy_limit and proxy_url:
|
|
tracker_key = f"download_proxy_{proxy_url}"
|
|
max_req = proxy_limit.get('max_requests')
|
|
period_min = proxy_limit.get('per_minutes')
|
|
if max_req and period_min:
|
|
timestamps = self.state['rate_limit_trackers'].get(tracker_key, [])
|
|
timestamps = [ts for ts in timestamps if now - ts < period_min * 60]
|
|
if len(timestamps) >= max_req:
|
|
logger.warning(f"Per-proxy download rate limit for '{proxy_url}' reached. Skipping task.")
|
|
return False
|
|
self.state['rate_limit_trackers'][tracker_key] = timestamps
|
|
|
|
# If all checks pass, record the new request timestamp for all relevant trackers
|
|
if ip_limit and ip_limit.get('max_requests'):
|
|
self.state['rate_limit_trackers'].setdefault('download_per_ip', []).append(now)
|
|
if proxy_limit and proxy_limit.get('max_requests') and proxy_url:
|
|
self.state['rate_limit_trackers'].setdefault(f"download_proxy_{proxy_url}", []).append(now)
|
|
|
|
return True
|
|
|
|
def wait_for_proxy_cooldown(self, proxy_url, policy):
|
|
"""If a per-proxy sleep is defined, wait until the cooldown period has passed."""
|
|
with self.lock:
|
|
d_policy = policy.get('download_policy', {})
|
|
sleep_duration = d_policy.get('sleep_per_proxy_seconds', 0)
|
|
if not proxy_url or not sleep_duration > 0:
|
|
return
|
|
|
|
last_finish = self.state.setdefault('proxy_last_finish_time', {}).get(proxy_url, 0)
|
|
elapsed = time.time() - last_finish
|
|
|
|
if elapsed < sleep_duration:
|
|
time_to_sleep = sleep_duration - elapsed
|
|
logger.info(f"Proxy '{proxy_url}' was used recently. Sleeping for {time_to_sleep:.2f}s.")
|
|
# Interruptible sleep
|
|
sleep_end_time = time.time() + time_to_sleep
|
|
while time.time() < sleep_end_time:
|
|
if shutdown_event.is_set():
|
|
logger.info("Shutdown requested during proxy cooldown sleep.")
|
|
break
|
|
time.sleep(0.2)
|
|
|
|
def update_proxy_finish_time(self, proxy_url):
|
|
"""Updates the last finish time for a proxy."""
|
|
with self.lock:
|
|
if not proxy_url:
|
|
return
|
|
self.state.setdefault('proxy_last_finish_time', {})[proxy_url] = time.time()
|
|
|
|
def print_summary(self, policy=None):
|
|
"""Print a summary of the test run."""
|
|
with self.lock:
|
|
# --- Cumulative Stats from State ---
|
|
now = time.time()
|
|
rate_trackers = self.state.get('rate_limit_trackers', {})
|
|
if rate_trackers:
|
|
logger.info("\n--- Cumulative Rate Summary (All Runs, updated at end of run) ---")
|
|
logger.info("This shows the total number of requests/downloads over various time windows, including previous runs.")
|
|
|
|
fetch_trackers = {k: v for k, v in rate_trackers.items() if not k.startswith('download_')}
|
|
download_trackers = {k: v for k, v in rate_trackers.items() if k.startswith('download_')}
|
|
|
|
def print_tracker_stats(trackers, tracker_type):
|
|
if not trackers:
|
|
logger.info(f"No historical {tracker_type} trackers found.")
|
|
return
|
|
|
|
logger.info(f"Historical {tracker_type} Trackers:")
|
|
for key, timestamps in sorted(trackers.items()):
|
|
windows = {
|
|
'last 10 min': 600, 'last 60 min': 3600,
|
|
'last 6 hours': 21600, 'last 24 hours': 86400
|
|
}
|
|
rates_str_parts = []
|
|
for name, seconds in windows.items():
|
|
count = sum(1 for ts in timestamps if now - ts <= seconds)
|
|
rate_rpm = (count / seconds) * 60 if seconds > 0 else 0
|
|
rates_str_parts.append(f"{count} in {name} ({rate_rpm:.2f}/min)")
|
|
|
|
# Clean up key for display
|
|
display_key = key.replace('download_', '').replace('per_ip', 'all_proxies/ips')
|
|
logger.info(f" - Tracker '{display_key}': " + ", ".join(rates_str_parts))
|
|
|
|
print_tracker_stats(fetch_trackers, "Fetch Request")
|
|
print_tracker_stats(download_trackers, "Download Attempt")
|
|
|
|
if not self.events:
|
|
logger.info("\nNo new events were recorded in this session.")
|
|
return
|
|
|
|
duration = time.time() - self.start_time
|
|
fetch_events = [e for e in self.events if e.get('type') == 'fetch']
|
|
download_events = [e for e in self.events if e.get('type') != 'fetch']
|
|
|
|
logger.info("\n--- Test Summary (This Run) ---")
|
|
logger.info(f"Total duration: {duration:.2f} seconds")
|
|
logger.info(f"Total info.json requests (cumulative): {self.get_request_count()}")
|
|
|
|
if policy:
|
|
logger.info("\n--- Test Configuration ---")
|
|
settings = policy.get('settings', {})
|
|
d_policy = policy.get('download_policy', {})
|
|
|
|
if settings.get('urls_file'):
|
|
logger.info(f"URL source file: {settings['urls_file']}")
|
|
if settings.get('info_json_dir'):
|
|
logger.info(f"Info.json source dir: {settings['info_json_dir']}")
|
|
|
|
if d_policy:
|
|
logger.info(f"Download formats: {d_policy.get('formats', 'N/A')}")
|
|
if d_policy.get('downloader'):
|
|
logger.info(f"Downloader: {d_policy.get('downloader')}")
|
|
if d_policy.get('downloader_args'):
|
|
logger.info(f"Downloader args: {d_policy.get('downloader_args')}")
|
|
if d_policy.get('pause_before_download_seconds'):
|
|
logger.info(f"Pause before download: {d_policy.get('pause_before_download_seconds')}s")
|
|
if d_policy.get('sleep_between_formats'):
|
|
sleep_cfg = d_policy.get('sleep_between_formats')
|
|
logger.info(f"Sleep between formats: {sleep_cfg.get('min_seconds', 0)}-{sleep_cfg.get('max_seconds', 0)}s")
|
|
|
|
if fetch_events:
|
|
total_fetches = len(fetch_events)
|
|
successful_fetches = sum(1 for e in fetch_events if e['success'])
|
|
cancelled_fetches = sum(1 for e in fetch_events if e.get('error_type') == 'Cancelled')
|
|
failed_fetches = total_fetches - successful_fetches - cancelled_fetches
|
|
|
|
logger.info("\n--- Fetch Summary (This Run) ---")
|
|
logger.info(f"Total info.json fetch attempts: {total_fetches}")
|
|
logger.info(f" - Successful: {successful_fetches}")
|
|
logger.info(f" - Failed: {failed_fetches}")
|
|
if cancelled_fetches > 0:
|
|
logger.info(f" - Cancelled: {cancelled_fetches}")
|
|
|
|
completed_fetches = successful_fetches + failed_fetches
|
|
if completed_fetches > 0:
|
|
success_rate = (successful_fetches / completed_fetches) * 100
|
|
logger.info(f"Success rate (of completed): {success_rate:.2f}%")
|
|
elif total_fetches > 0:
|
|
logger.info("Success rate: N/A (no tasks completed)")
|
|
|
|
if duration > 1 and total_fetches > 0:
|
|
rpm = (total_fetches / duration) * 60
|
|
logger.info(f"Actual fetch rate: {rpm:.2f} requests/minute")
|
|
|
|
if failed_fetches > 0:
|
|
error_counts = collections.Counter(
|
|
e.get('error_type', 'Unknown')
|
|
for e in fetch_events if not e['success'] and e.get('error_type') != 'Cancelled'
|
|
)
|
|
logger.info("Failure breakdown:")
|
|
for error_type, count in sorted(error_counts.items()):
|
|
logger.info(f" - {error_type}: {count}")
|
|
|
|
profile_counts = collections.Counter(e.get('profile') for e in fetch_events if e.get('profile'))
|
|
if profile_counts:
|
|
logger.info("Requests per profile:")
|
|
for profile, count in sorted(profile_counts.items()):
|
|
logger.info(f" - {profile}: {count}")
|
|
|
|
proxy_counts = collections.Counter(e.get('proxy_url') for e in fetch_events if e.get('proxy_url'))
|
|
if proxy_counts:
|
|
logger.info("Requests per proxy:")
|
|
for proxy, count in sorted(proxy_counts.items()):
|
|
logger.info(f" - {proxy}: {count}")
|
|
|
|
profile_counts = collections.Counter(e.get('profile') for e in fetch_events if e.get('profile'))
|
|
if profile_counts:
|
|
logger.info("Requests per profile:")
|
|
for profile, count in sorted(profile_counts.items()):
|
|
logger.info(f" - {profile}: {count}")
|
|
|
|
proxy_counts = collections.Counter(e.get('proxy_url') for e in fetch_events if e.get('proxy_url'))
|
|
if proxy_counts:
|
|
logger.info("Requests per proxy:")
|
|
for proxy, count in sorted(proxy_counts.items()):
|
|
logger.info(f" - {proxy}: {count}")
|
|
|
|
if download_events:
|
|
total_attempts = len(download_events)
|
|
successes = sum(1 for e in download_events if e['success'])
|
|
cancelled = sum(1 for e in download_events if e.get('error_type') == 'Cancelled')
|
|
failures = total_attempts - successes - cancelled
|
|
|
|
# --- Profile Association for Download Events ---
|
|
download_profiles = [e.get('profile') for e in download_events]
|
|
|
|
# For download_only mode, we might need to fall back to regex extraction
|
|
# if the profile wasn't passed down (e.g., no profile grouping).
|
|
profile_regex = None
|
|
if policy:
|
|
settings = policy.get('settings', {})
|
|
if settings.get('mode') == 'download_only':
|
|
profile_regex = settings.get('profile_extraction_regex')
|
|
|
|
if profile_regex:
|
|
for i, e in enumerate(download_events):
|
|
if not download_profiles[i]: # If profile wasn't set in the event
|
|
path = Path(e.get('path', ''))
|
|
match = re.search(profile_regex, path.name)
|
|
if match and match.groups():
|
|
download_profiles[i] = match.group(1)
|
|
|
|
# Replace any remaining Nones with 'unknown_profile'
|
|
download_profiles = [p or 'unknown_profile' for p in download_profiles]
|
|
|
|
num_profiles_used = len(set(p for p in download_profiles if p != 'unknown_profile'))
|
|
|
|
logger.info("\n--- Download Summary (This Run) ---")
|
|
if policy:
|
|
workers = policy.get('execution_control', {}).get('workers', 'N/A')
|
|
logger.info(f"Workers configured: {workers}")
|
|
|
|
logger.info(f"Profiles utilized for downloads: {num_profiles_used}")
|
|
logger.info(f"Total download attempts: {total_attempts}")
|
|
logger.info(f" - Successful: {successes}")
|
|
logger.info(f" - Failed: {failures}")
|
|
if cancelled > 0:
|
|
logger.info(f" - Cancelled: {cancelled}")
|
|
|
|
completed_downloads = successes + failures
|
|
if completed_downloads > 0:
|
|
success_rate = (successes / completed_downloads) * 100
|
|
logger.info(f"Success rate (of completed): {success_rate:.2f}%")
|
|
elif total_attempts > 0:
|
|
logger.info("Success rate: N/A (no tasks completed)")
|
|
|
|
duration_hours = duration / 3600.0
|
|
if duration > 1 and total_attempts > 0:
|
|
dpm = (total_attempts / duration) * 60
|
|
logger.info(f"Actual overall download rate: {dpm:.2f} attempts/minute")
|
|
|
|
total_bytes = sum(e.get('downloaded_bytes', 0) for e in download_events if e['success'])
|
|
if total_bytes > 0:
|
|
logger.info(f"Total data downloaded: {format_size(total_bytes)}")
|
|
|
|
if failures > 0:
|
|
error_counts = collections.Counter(
|
|
e.get('error_type', 'Unknown')
|
|
for e in download_events if not e['success'] and e.get('error_type') != 'Cancelled'
|
|
)
|
|
logger.info("Failure breakdown:")
|
|
for error_type, count in sorted(error_counts.items()):
|
|
logger.info(f" - {error_type}: {count}")
|
|
|
|
# Add profile to each download event for easier counting
|
|
for i, e in enumerate(download_events):
|
|
e['profile'] = download_profiles[i]
|
|
|
|
profile_counts = collections.Counter(e.get('profile') for e in download_events if e.get('profile'))
|
|
if profile_counts:
|
|
logger.info("Downloads per profile:")
|
|
for profile, count in sorted(profile_counts.items()):
|
|
rate_per_hour = (count / duration_hours) if duration_hours > 0 else 0
|
|
logger.info(f" - {profile}: {count} attempts (avg this run: {rate_per_hour:.2f}/hour)")
|
|
|
|
proxy_counts = collections.Counter(e.get('proxy_url') for e in download_events if e.get('proxy_url'))
|
|
if proxy_counts:
|
|
logger.info("Downloads per proxy:")
|
|
for proxy, count in sorted(proxy_counts.items()):
|
|
rate_per_hour = (count / duration_hours) if duration_hours > 0 else 0
|
|
logger.info(f" - {proxy}: {count} attempts (avg this run: {rate_per_hour:.2f}/hour)")
|
|
|
|
logger.info("--------------------")
|
|
|
|
|
|
def _run_download_logic(source, info_json_content, policy, state_manager, profile_name=None):
|
|
"""Shared download logic for a single info.json."""
|
|
proxy_url = None
|
|
if info_json_content:
|
|
try:
|
|
info_data = json.loads(info_json_content)
|
|
proxy_url = info_data.get('_proxy_url')
|
|
except (json.JSONDecodeError, AttributeError):
|
|
logger.warning(f"[{get_display_name(source)}] Could not parse info.json to get proxy for download controls.")
|
|
|
|
if not state_manager.check_and_update_download_rate_limit(proxy_url, policy):
|
|
return []
|
|
|
|
state_manager.wait_for_proxy_cooldown(proxy_url, policy)
|
|
results = process_info_json_cycle(source, info_json_content, policy, state_manager, proxy_url=proxy_url, profile_name=profile_name)
|
|
state_manager.update_proxy_finish_time(proxy_url)
|
|
return results
|
|
|
|
|
|
def process_profile_task(profile_name, file_list, policy, state_manager, cycle_num):
|
|
"""Worker task for a profile, processing its files sequentially."""
|
|
logger.info(f"Worker {get_worker_id()} starting task for profile '{profile_name}' with {len(file_list)} files.")
|
|
all_results = []
|
|
for i, file_path in enumerate(file_list):
|
|
if shutdown_event.is_set():
|
|
logger.info(f"Shutdown requested, stopping task for profile '{profile_name}'.")
|
|
break
|
|
|
|
try:
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
info_json_content = f.read()
|
|
except (IOError, FileNotFoundError) as e:
|
|
logger.error(f"[{get_display_name(file_path)}] Could not read info.json file: {e}")
|
|
continue # Skip this file
|
|
|
|
results_for_file = _run_download_logic(file_path, info_json_content, policy, state_manager, profile_name=profile_name)
|
|
all_results.extend(results_for_file)
|
|
|
|
# Check for stop conditions after processing each file
|
|
should_stop_profile = False
|
|
for result in results_for_file:
|
|
if not result['success']:
|
|
s_conditions = policy.get('stop_conditions', {})
|
|
if s_conditions.get('on_failure') or \
|
|
(s_conditions.get('on_http_403') and result['error_type'] == 'HTTP 403') or \
|
|
(s_conditions.get('on_timeout') and result['error_type'] == 'Timeout'):
|
|
logger.info(f"Stopping further processing for profile '{profile_name}' due to failure.")
|
|
should_stop_profile = True
|
|
break
|
|
if should_stop_profile:
|
|
break
|
|
|
|
# Apply sleep between tasks for this profile
|
|
if i < len(file_list) - 1:
|
|
exec_control = policy.get('execution_control', {})
|
|
sleep_cfg = exec_control.get('sleep_between_tasks', {})
|
|
sleep_min = sleep_cfg.get('min_seconds', 0)
|
|
|
|
if sleep_min > 0:
|
|
sleep_max = sleep_cfg.get('max_seconds') or sleep_min
|
|
sleep_duration = random.uniform(sleep_min, sleep_max) if sleep_max > sleep_min else sleep_min
|
|
|
|
logger.debug(f"Profile '{profile_name}' sleeping for {sleep_duration:.2f}s before next file.")
|
|
# Interruptible sleep
|
|
sleep_end_time = time.time() + sleep_duration
|
|
while time.time() < sleep_end_time:
|
|
if shutdown_event.is_set():
|
|
break
|
|
time.sleep(0.2)
|
|
|
|
return all_results
|
|
|
|
|
|
def run_command(cmd, input_data=None, binary_stdout=False):
|
|
"""
|
|
Runs a command, captures its output, and returns status.
|
|
If binary_stdout is True, stdout is returned as bytes. Otherwise, both are decoded strings.
|
|
"""
|
|
logger.debug(f"Running command: {' '.join(cmd)}")
|
|
process = None
|
|
try:
|
|
# Always open in binary mode to handle both cases. We will decode later.
|
|
process = subprocess.Popen(
|
|
cmd,
|
|
stdin=subprocess.PIPE if input_data else None,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE,
|
|
preexec_fn=os.setsid # Start in a new process group to isolate from terminal signals
|
|
)
|
|
with process_lock:
|
|
running_processes.add(process)
|
|
|
|
stdout_capture = []
|
|
stderr_capture = []
|
|
|
|
def read_pipe(pipe, capture_list, display_pipe=None):
|
|
"""Reads a pipe line by line (as bytes), appending to a list and optionally displaying."""
|
|
for line in iter(pipe.readline, b''):
|
|
capture_list.append(line)
|
|
if display_pipe:
|
|
# Decode for display
|
|
display_line = line.decode('utf-8', errors='replace')
|
|
display_pipe.write(display_line)
|
|
display_pipe.flush()
|
|
|
|
# We must read stdout and stderr in parallel to prevent deadlocks.
|
|
stdout_thread = threading.Thread(target=read_pipe, args=(process.stdout, stdout_capture))
|
|
# Display stderr in real-time as it often contains progress info.
|
|
stderr_thread = threading.Thread(target=read_pipe, args=(process.stderr, stderr_capture, sys.stderr))
|
|
|
|
stdout_thread.start()
|
|
stderr_thread.start()
|
|
|
|
# Handle stdin after starting to read outputs to avoid deadlocks.
|
|
if input_data:
|
|
try:
|
|
process.stdin.write(input_data.encode('utf-8'))
|
|
process.stdin.close()
|
|
except (IOError, BrokenPipeError):
|
|
# This can happen if the process exits quickly or doesn't read stdin.
|
|
logger.debug(f"Could not write to stdin for command: {' '.join(cmd)}. Process may have already exited.")
|
|
|
|
# Wait for the process to finish and for all output to be read.
|
|
retcode = process.wait()
|
|
stdout_thread.join()
|
|
stderr_thread.join()
|
|
|
|
stdout_bytes = b"".join(stdout_capture)
|
|
stderr_bytes = b"".join(stderr_capture)
|
|
|
|
stdout = stdout_bytes if binary_stdout else stdout_bytes.decode('utf-8', errors='replace')
|
|
stderr = stderr_bytes.decode('utf-8', errors='replace')
|
|
|
|
return retcode, stdout, stderr
|
|
|
|
except FileNotFoundError:
|
|
logger.error(f"Command not found: {cmd[0]}. Make sure it's in your PATH.")
|
|
return -1, "", f"Command not found: {cmd[0]}"
|
|
except Exception as e:
|
|
logger.error(f"An error occurred while running command: {' '.join(cmd)}. Error: {e}")
|
|
return -1, "", str(e)
|
|
finally:
|
|
if process:
|
|
with process_lock:
|
|
running_processes.discard(process)
|
|
|
|
|
|
def run_download_worker(info_json_path, info_json_content, format_to_download, policy, profile_name=None):
|
|
"""
|
|
Performs a single download attempt. Designed to be run in a worker thread.
|
|
"""
|
|
download_policy = policy.get('download_policy', {})
|
|
settings = policy.get('settings', {})
|
|
downloader = download_policy.get('downloader')
|
|
|
|
# Get script command from settings, with fallback to download_policy for old format.
|
|
script_cmd_str = settings.get('download_script')
|
|
if not script_cmd_str:
|
|
script_cmd_str = download_policy.get('script')
|
|
|
|
if script_cmd_str:
|
|
download_cmd = shlex.split(script_cmd_str)
|
|
elif downloader == 'aria2c_rpc':
|
|
download_cmd = [sys.executable, '-m', 'ytops_client.cli', 'download', 'aria-rpc']
|
|
elif downloader == 'native-cli':
|
|
download_cmd = [sys.executable, '-m', 'ytops_client.cli', 'download', 'cli']
|
|
else:
|
|
# Default to the new native-py downloader if downloader is 'native-py' or not specified.
|
|
download_cmd = [sys.executable, '-m', 'ytops_client.cli', 'download', 'py']
|
|
|
|
download_cmd.extend(['-f', format_to_download])
|
|
|
|
if downloader == 'aria2c_rpc':
|
|
if download_policy.get('aria_host'):
|
|
download_cmd.extend(['--aria-host', str(download_policy['aria_host'])])
|
|
if download_policy.get('aria_port'):
|
|
download_cmd.extend(['--aria-port', str(download_policy['aria_port'])])
|
|
if download_policy.get('aria_secret'):
|
|
download_cmd.extend(['--aria-secret', str(download_policy['aria_secret'])])
|
|
if download_policy.get('output_dir'):
|
|
download_cmd.extend(['--output-dir', str(download_policy['output_dir'])])
|
|
if download_policy.get('aria_remote_dir'):
|
|
download_cmd.extend(['--remote-dir', str(download_policy['aria_remote_dir'])])
|
|
if download_policy.get('aria_fragments_dir'):
|
|
download_cmd.extend(['--fragments-dir', str(download_policy['aria_fragments_dir'])])
|
|
# For stress testing, waiting is the desired default to get a success/fail result.
|
|
# Allow disabling it by explicitly setting aria_wait: false in the policy.
|
|
if download_policy.get('aria_wait', True):
|
|
download_cmd.append('--wait')
|
|
|
|
if download_policy.get('auto_merge_fragments'):
|
|
download_cmd.append('--auto-merge-fragments')
|
|
if download_policy.get('remove_fragments_after_merge'):
|
|
download_cmd.append('--remove-fragments-after-merge')
|
|
if download_policy.get('cleanup'):
|
|
download_cmd.append('--cleanup')
|
|
if download_policy.get('purge_on_complete'):
|
|
download_cmd.append('--purge-on-complete')
|
|
|
|
downloader_args = download_policy.get('downloader_args')
|
|
proxy = download_policy.get('proxy')
|
|
if proxy:
|
|
# Note: proxy_rename is not supported for aria2c_rpc mode.
|
|
proxy_arg = f"--all-proxy {shlex.quote(str(proxy))}"
|
|
if downloader_args:
|
|
downloader_args = f"{downloader_args} {proxy_arg}"
|
|
else:
|
|
downloader_args = proxy_arg
|
|
|
|
if downloader_args:
|
|
# For aria2c_rpc, the downloader_args value is passed directly to the script's --downloader-args option.
|
|
download_cmd.extend(['--downloader-args', downloader_args])
|
|
elif downloader == 'native-cli':
|
|
# This is the logic for the legacy download_tool.py (yt-dlp CLI wrapper).
|
|
pause_seconds = download_policy.get('pause_before_download_seconds')
|
|
if pause_seconds and isinstance(pause_seconds, (int, float)) and pause_seconds > 0:
|
|
download_cmd.extend(['--pause', str(pause_seconds)])
|
|
|
|
if download_policy.get('continue_downloads'):
|
|
download_cmd.append('--download-continue')
|
|
|
|
# Add proxy if specified directly in the policy
|
|
proxy = download_policy.get('proxy')
|
|
if proxy:
|
|
download_cmd.extend(['--proxy', str(proxy)])
|
|
|
|
proxy_rename = download_policy.get('proxy_rename')
|
|
if proxy_rename:
|
|
download_cmd.extend(['--proxy-rename', str(proxy_rename)])
|
|
|
|
extra_args = download_policy.get('extra_args')
|
|
if extra_args:
|
|
download_cmd.extend(shlex.split(extra_args))
|
|
|
|
# Note: 'downloader' here refers to yt-dlp's internal downloader, not our script.
|
|
# The policy key 'external_downloader' is more clear, but we support 'downloader' for backward compatibility.
|
|
ext_downloader = download_policy.get('external_downloader') or download_policy.get('downloader')
|
|
if ext_downloader and ext_downloader not in ['native-cli', 'native-py', 'aria2c_rpc']:
|
|
download_cmd.extend(['--downloader', str(ext_downloader)])
|
|
|
|
downloader_args = download_policy.get('downloader_args')
|
|
if downloader_args:
|
|
download_cmd.extend(['--downloader-args', str(downloader_args)])
|
|
|
|
if download_policy.get('merge_output_format'):
|
|
download_cmd.extend(['--merge-output-format', str(download_policy['merge_output_format'])])
|
|
|
|
if download_policy.get('merge_output_format'):
|
|
download_cmd.extend(['--merge-output-format', str(download_policy['merge_output_format'])])
|
|
|
|
if download_policy.get('cleanup'):
|
|
download_cmd.append('--cleanup')
|
|
else:
|
|
# This is the default logic for the new native-py downloader.
|
|
if download_policy.get('output_to_buffer'):
|
|
download_cmd.append('--output-buffer')
|
|
else:
|
|
# --output-dir is only relevant if not outputting to buffer.
|
|
if download_policy.get('output_dir'):
|
|
download_cmd.extend(['--output-dir', str(download_policy['output_dir'])])
|
|
|
|
if download_policy.get('temp_path'):
|
|
download_cmd.extend(['--temp-path', str(download_policy['temp_path'])])
|
|
if download_policy.get('continue_downloads'):
|
|
download_cmd.append('--download-continue')
|
|
|
|
pause_seconds = download_policy.get('pause_before_download_seconds')
|
|
if pause_seconds and isinstance(pause_seconds, (int, float)) and pause_seconds > 0:
|
|
download_cmd.extend(['--pause', str(pause_seconds)])
|
|
|
|
proxy = download_policy.get('proxy')
|
|
if proxy:
|
|
download_cmd.extend(['--proxy', str(proxy)])
|
|
|
|
proxy_rename = download_policy.get('proxy_rename')
|
|
if proxy_rename:
|
|
download_cmd.extend(['--proxy-rename', str(proxy_rename)])
|
|
|
|
# The 'extra_args' from the policy are for the download script itself, not for yt-dlp.
|
|
# We need to split them and add them to the command.
|
|
extra_args = download_policy.get('extra_args')
|
|
if extra_args:
|
|
download_cmd.extend(shlex.split(extra_args))
|
|
|
|
# Pass through downloader settings for yt-dlp to use
|
|
# e.g. to tell yt-dlp to use aria2c as its backend
|
|
ext_downloader = download_policy.get('external_downloader')
|
|
if ext_downloader:
|
|
download_cmd.extend(['--downloader', str(ext_downloader)])
|
|
|
|
downloader_args = download_policy.get('downloader_args')
|
|
if downloader_args:
|
|
download_cmd.extend(['--downloader-args', str(downloader_args)])
|
|
|
|
worker_id = get_worker_id()
|
|
display_name = get_display_name(info_json_path)
|
|
profile_log_part = f" [Profile: {profile_name}]" if profile_name else ""
|
|
log_prefix = f"[Worker {worker_id}]{profile_log_part} [{display_name} @ {format_to_download}]"
|
|
logger.info(f"{log_prefix} Kicking off download process...")
|
|
|
|
temp_info_file_path = None
|
|
try:
|
|
if isinstance(info_json_path, Path) and info_json_path.exists():
|
|
# The info.json is already in a file, pass its path directly.
|
|
download_cmd.extend(['--load-info-json', str(info_json_path)])
|
|
else:
|
|
# The info.json content is in memory, so write it to a temporary file.
|
|
import tempfile
|
|
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json', encoding='utf-8') as temp_f:
|
|
temp_f.write(info_json_content)
|
|
temp_info_file_path = temp_f.name
|
|
download_cmd.extend(['--load-info-json', temp_info_file_path])
|
|
|
|
cmd_str_for_log = ' '.join(shlex.quote(s) for s in download_cmd)
|
|
logger.info(f"{log_prefix} Running download command: {cmd_str_for_log}")
|
|
output_to_buffer = download_policy.get('output_to_buffer', False)
|
|
retcode, stdout, stderr = run_command(download_cmd, binary_stdout=output_to_buffer)
|
|
finally:
|
|
if temp_info_file_path and os.path.exists(temp_info_file_path):
|
|
os.unlink(temp_info_file_path)
|
|
|
|
is_403_error = "HTTP Error 403" in stderr
|
|
is_timeout_error = "Read timed out" in stderr
|
|
output_to_buffer = download_policy.get('output_to_buffer', False)
|
|
|
|
result = {
|
|
'type': 'download',
|
|
'path': str(info_json_path),
|
|
'format': format_to_download,
|
|
'success': retcode == 0,
|
|
'error_type': None,
|
|
'details': '',
|
|
'downloaded_bytes': 0,
|
|
'profile': profile_name
|
|
}
|
|
|
|
if retcode == 0:
|
|
details_str = "OK"
|
|
size_in_bytes = 0
|
|
if output_to_buffer:
|
|
# The most accurate size is the length of the stdout buffer.
|
|
size_in_bytes = len(stdout) # stdout is bytes
|
|
details_str += f" (Buffered {format_size(size_in_bytes)})"
|
|
else:
|
|
size_match = re.search(r'\[download\]\s+100%\s+of\s+~?([0-9.]+)(B|KiB|MiB|GiB)', stderr)
|
|
if size_match:
|
|
value = float(size_match.group(1))
|
|
unit = size_match.group(2)
|
|
multipliers = {"B": 1, "KiB": 1024, "MiB": 1024**2, "GiB": 1024**3}
|
|
size_in_bytes = int(value * multipliers.get(unit, 1))
|
|
details_str += f" ({size_match.group(1)}{unit})"
|
|
|
|
result['downloaded_bytes'] = size_in_bytes
|
|
result['details'] = details_str
|
|
else:
|
|
# Check both stdout and stderr for error messages, as logging might be directed to stdout.
|
|
full_output = f"{stdout}\n{stderr}"
|
|
error_lines = [line for line in full_output.strip().split('\n') if 'ERROR:' in line]
|
|
result['details'] = error_lines[-1].strip() if error_lines else "Unknown error"
|
|
|
|
if is_403_error:
|
|
result['error_type'] = 'HTTP 403'
|
|
elif is_timeout_error:
|
|
result['error_type'] = 'Timeout'
|
|
else:
|
|
result['error_type'] = f'Exit Code {retcode}'
|
|
|
|
return result
|
|
|
|
|
|
def process_info_json_cycle(path, content, policy, state_manager, proxy_url=None, profile_name=None):
|
|
"""
|
|
Processes one info.json file for one cycle, downloading selected formats.
|
|
"""
|
|
results = []
|
|
display_name = get_display_name(path)
|
|
d_policy = policy.get('download_policy', {})
|
|
s_conditions = policy.get('stop_conditions', {})
|
|
format_selection = d_policy.get('formats', '')
|
|
|
|
try:
|
|
info_data = json.loads(content)
|
|
available_formats = [f['format_id'] for f in info_data.get('formats', [])]
|
|
if not available_formats:
|
|
logger.warning(f"[{display_name}] No formats found in info.json. Skipping.")
|
|
return []
|
|
|
|
formats_to_test = []
|
|
if format_selection == 'all':
|
|
formats_to_test = available_formats
|
|
elif format_selection.startswith('random:'):
|
|
percent = float(format_selection.split(':')[1].rstrip('%'))
|
|
count = max(1, int(len(available_formats) * (percent / 100.0)))
|
|
formats_to_test = random.sample(available_formats, k=count)
|
|
elif format_selection.startswith('random_from:'):
|
|
choices = [f.strip() for f in format_selection.split(':', 1)[1].split(',')]
|
|
valid_choices = [f for f in choices if f in available_formats]
|
|
if valid_choices:
|
|
formats_to_test = [random.choice(valid_choices)]
|
|
else:
|
|
requested_formats = [f.strip() for f in format_selection.split(',') if f.strip()]
|
|
formats_to_test = []
|
|
for req_fmt in requested_formats:
|
|
# If it's a complex selector with slashes, don't try to validate it against available formats.
|
|
if '/' in req_fmt:
|
|
formats_to_test.append(req_fmt)
|
|
continue
|
|
|
|
# Check for exact match first
|
|
if req_fmt in available_formats:
|
|
formats_to_test.append(req_fmt)
|
|
continue
|
|
|
|
# If no exact match, check for formats that start with this ID + '-'
|
|
# e.g., req_fmt '140' should match '140-0'
|
|
prefix_match = f"{req_fmt}-"
|
|
first_match = next((af for af in available_formats if af.startswith(prefix_match)), None)
|
|
|
|
if first_match:
|
|
logger.info(f"[{display_name}] Requested format '{req_fmt}' not found. Using first available match: '{first_match}'.")
|
|
formats_to_test.append(first_match)
|
|
else:
|
|
# This could be a complex selector like 'bestvideo' or '299/298', so keep it.
|
|
if req_fmt not in available_formats:
|
|
logger.warning(f"[{display_name}] Requested format '{req_fmt}' not found in available formats.")
|
|
formats_to_test.append(req_fmt)
|
|
|
|
except json.JSONDecodeError:
|
|
logger.error(f"[{display_name}] Failed to parse info.json. Skipping.")
|
|
return []
|
|
|
|
for i, format_id in enumerate(formats_to_test):
|
|
if shutdown_event.is_set():
|
|
logger.info(f"Shutdown requested, stopping further format tests for {display_name}.")
|
|
break
|
|
|
|
# Check if the format URL is expired before attempting to download
|
|
format_details = next((f for f in info_data.get('formats', []) if f.get('format_id') == format_id), None)
|
|
if format_details and 'url' in format_details:
|
|
parsed_url = urlparse(format_details['url'])
|
|
query_params = parse_qs(parsed_url.query)
|
|
expire_ts_str = query_params.get('expire', [None])[0]
|
|
if expire_ts_str and expire_ts_str.isdigit():
|
|
expire_ts = int(expire_ts_str)
|
|
if expire_ts < time.time():
|
|
logger.warning(f"[{display_name}] Skipping format '{format_id}' because its URL is expired.")
|
|
result = {
|
|
'type': 'download', 'path': str(path), 'format': format_id,
|
|
'success': True, 'error_type': 'Skipped',
|
|
'details': 'Download URL is expired', 'downloaded_bytes': 0
|
|
}
|
|
if proxy_url:
|
|
result['proxy_url'] = proxy_url
|
|
state_manager.log_event(result)
|
|
results.append(result)
|
|
continue # Move to the next format
|
|
|
|
result = run_download_worker(path, content, format_id, policy, profile_name=profile_name)
|
|
if proxy_url:
|
|
result['proxy_url'] = proxy_url
|
|
state_manager.log_event(result)
|
|
results.append(result)
|
|
|
|
worker_id = get_worker_id()
|
|
status = "SUCCESS" if result['success'] else f"FAILURE ({result['error_type']})"
|
|
profile_log_part = f" [Profile: {profile_name}]" if profile_name else ""
|
|
logger.info(f"[Worker {worker_id}]{profile_log_part} Result for {display_name} (format {format_id}): {status} - {result.get('details', 'OK')}")
|
|
|
|
if not result['success']:
|
|
if s_conditions.get('on_failure') or \
|
|
(s_conditions.get('on_http_403') and result['error_type'] == 'HTTP 403') or \
|
|
(s_conditions.get('on_timeout') and result['error_type'] == 'Timeout'):
|
|
logger.info(f"Stopping further format tests for {display_name} in this cycle due to failure.")
|
|
break
|
|
|
|
sleep_cfg = d_policy.get('sleep_between_formats', {})
|
|
sleep_min = sleep_cfg.get('min_seconds', 0)
|
|
if sleep_min > 0 and i < len(formats_to_test) - 1:
|
|
sleep_max = sleep_cfg.get('max_seconds') or sleep_min
|
|
if sleep_max > sleep_min:
|
|
sleep_duration = random.uniform(sleep_min, sleep_max)
|
|
else:
|
|
sleep_duration = sleep_min
|
|
|
|
logger.debug(f"Sleeping for {sleep_duration:.2f}s between formats for {display_name}.")
|
|
# Interruptible sleep
|
|
sleep_end_time = time.time() + sleep_duration
|
|
while time.time() < sleep_end_time:
|
|
if shutdown_event.is_set():
|
|
break
|
|
time.sleep(0.2)
|
|
|
|
return results
|
|
|
|
|
|
def update_dict(d, u):
|
|
"""Recursively update a dictionary."""
|
|
for k, v in u.items():
|
|
if isinstance(v, collections.abc.Mapping):
|
|
d[k] = update_dict(d.get(k, {}), v)
|
|
else:
|
|
d[k] = v
|
|
return d
|
|
|
|
|
|
def load_policy(policy_file, policy_name=None):
|
|
"""Load a policy from a YAML file."""
|
|
try:
|
|
with open(policy_file, 'r', encoding='utf-8') as f:
|
|
# If a policy name is given, look for that specific document
|
|
if policy_name:
|
|
docs = list(yaml.safe_load_all(f))
|
|
for doc in docs:
|
|
if isinstance(doc, dict) and doc.get('name') == policy_name:
|
|
return doc
|
|
raise ValueError(f"Policy '{policy_name}' not found in {policy_file}")
|
|
# Otherwise, load the first document
|
|
return yaml.safe_load(f)
|
|
except (IOError, yaml.YAMLError, ValueError) as e:
|
|
logger.error(f"Failed to load policy file {policy_file}: {e}")
|
|
sys.exit(1)
|
|
|
|
|
|
def apply_overrides(policy, overrides):
|
|
"""Apply command-line overrides to the policy."""
|
|
for override in overrides:
|
|
try:
|
|
key, value = override.split('=', 1)
|
|
keys = key.split('.')
|
|
|
|
# Try to parse as JSON/YAML if it looks like a list or dict, otherwise treat as scalar
|
|
if (value.startswith('[') and value.endswith(']')) or \
|
|
(value.startswith('{') and value.endswith('}')):
|
|
try:
|
|
value = yaml.safe_load(value)
|
|
except yaml.YAMLError:
|
|
logger.warning(f"Could not parse override value '{value}' as YAML. Treating as a string.")
|
|
else:
|
|
# Try to auto-convert scalar value type
|
|
if value.lower() == 'true':
|
|
value = True
|
|
elif value.lower() == 'false':
|
|
value = False
|
|
elif value.lower() == 'null':
|
|
value = None
|
|
else:
|
|
try:
|
|
value = int(value)
|
|
except ValueError:
|
|
try:
|
|
value = float(value)
|
|
except ValueError:
|
|
pass # Keep as string
|
|
|
|
d = policy
|
|
for k in keys[:-1]:
|
|
d = d.setdefault(k, {})
|
|
d[keys[-1]] = value
|
|
except ValueError:
|
|
logger.error(f"Invalid override format: '{override}'. Use 'key.subkey=value'.")
|
|
sys.exit(1)
|
|
return policy
|
|
|
|
|
|
def display_effective_policy(policy, name, sources=None, profile_names=None, original_workers_setting=None):
|
|
"""Prints a human-readable summary of the effective policy."""
|
|
logger.info(f"--- Effective Policy: {name} ---")
|
|
settings = policy.get('settings', {})
|
|
exec_control = policy.get('execution_control', {})
|
|
|
|
logger.info(f"Mode: {settings.get('mode', 'full_stack')}")
|
|
if profile_names:
|
|
num_profiles = len(profile_names)
|
|
logger.info(f"Profiles found: {num_profiles}")
|
|
if num_profiles > 0:
|
|
# Sort profiles for consistent display, show top 10
|
|
sorted_profiles = sorted(profile_names)
|
|
profiles_to_show = sorted_profiles[:10]
|
|
logger.info(f" (e.g., {', '.join(profiles_to_show)}{'...' if num_profiles > 10 else ''})")
|
|
|
|
workers_display = str(exec_control.get('workers', 1))
|
|
if original_workers_setting == 'auto':
|
|
workers_display = f"auto (calculated: {workers_display})"
|
|
logger.info(f"Workers: {workers_display}")
|
|
|
|
sleep_cfg = exec_control.get('sleep_between_tasks', {})
|
|
sleep_min = sleep_cfg.get('min_seconds')
|
|
if sleep_min is not None:
|
|
sleep_max = sleep_cfg.get('max_seconds') or sleep_min
|
|
if sleep_max > sleep_min:
|
|
logger.info(f"Sleep between tasks (per worker): {sleep_min}-{sleep_max}s (random)")
|
|
else:
|
|
logger.info(f"Sleep between tasks (per worker): {sleep_min}s")
|
|
|
|
run_until = exec_control.get('run_until', {})
|
|
run_conditions = []
|
|
if 'minutes' in run_until:
|
|
run_conditions.append(f"for {run_until['minutes']} minutes")
|
|
if 'requests' in run_until:
|
|
run_conditions.append(f"until {run_until['requests']} total requests")
|
|
if 'cycles' in run_until:
|
|
run_conditions.append(f"for {run_until['cycles']} cycles")
|
|
|
|
if run_conditions:
|
|
logger.info(f"Run condition: Stop after running {' or '.join(run_conditions)}.")
|
|
if 'minutes' in run_until and 'cycles' not in run_until:
|
|
logger.info("Will continuously cycle through sources until time limit is reached.")
|
|
else:
|
|
logger.warning("WARNING: No 'run_until' condition is set. This test will run forever unless stopped manually.")
|
|
logger.info("Run condition: No stop condition defined, will run indefinitely (until Ctrl+C).")
|
|
|
|
# --- Rate Calculation ---
|
|
if sources:
|
|
workers = exec_control.get('workers', 1)
|
|
num_sources = len(profile_names) if profile_names else len(sources)
|
|
|
|
min_sleep = sleep_cfg.get('min_seconds', 0)
|
|
max_sleep = sleep_cfg.get('max_seconds') or min_sleep
|
|
avg_sleep_per_task = (min_sleep + max_sleep) / 2
|
|
|
|
# Assume an average task duration. This is a major assumption.
|
|
mode = settings.get('mode', 'full_stack')
|
|
assumptions = exec_control.get('assumptions', {})
|
|
|
|
assumed_fetch_duration = 0
|
|
if mode in ['full_stack', 'fetch_only']:
|
|
assumed_fetch_duration = assumptions.get('fetch_task_duration', 12 if mode == 'full_stack' else 3)
|
|
|
|
assumed_download_duration = 0
|
|
if mode in ['full_stack', 'download_only']:
|
|
# This assumes the total time to download all formats for a single source.
|
|
assumed_download_duration = assumptions.get('download_task_duration', 60)
|
|
|
|
total_assumed_task_duration = assumed_fetch_duration + assumed_download_duration
|
|
|
|
if workers > 0 and total_assumed_task_duration > 0:
|
|
total_time_per_task = total_assumed_task_duration + avg_sleep_per_task
|
|
tasks_per_minute_per_worker = 60 / total_time_per_task
|
|
total_tasks_per_minute = tasks_per_minute_per_worker * workers
|
|
|
|
logger.info("--- Rate Estimation ---")
|
|
logger.info(f"Source count: {num_sources}")
|
|
if mode in ['full_stack', 'fetch_only']:
|
|
logger.info(f"Est. fetch time per source: {assumed_fetch_duration}s (override via execution_control.assumptions.fetch_task_duration)")
|
|
if mode in ['full_stack', 'download_only']:
|
|
logger.info(f"Est. download time per source: {assumed_download_duration}s (override via execution_control.assumptions.download_task_duration)")
|
|
logger.info(" (Note: This assumes total time for all formats per source)")
|
|
|
|
logger.info(f"Est. sleep per task: {avg_sleep_per_task:.1f}s")
|
|
logger.info(f"==> Expected task rate: ~{total_tasks_per_minute:.2f} tasks/minute ({workers} workers * {tasks_per_minute_per_worker:.2f} tasks/min/worker)")
|
|
|
|
target_rate_cfg = exec_control.get('target_rate', {})
|
|
target_reqs = target_rate_cfg.get('requests')
|
|
target_mins = target_rate_cfg.get('per_minutes')
|
|
if target_reqs and target_mins:
|
|
target_rpm = target_reqs / target_mins
|
|
logger.info(f"Target rate: {target_rpm:.2f} tasks/minute")
|
|
if total_tasks_per_minute < target_rpm * 0.8:
|
|
logger.warning("Warning: Expected rate is significantly lower than target rate.")
|
|
logger.warning("Consider increasing workers, reducing sleep, or checking task performance.")
|
|
|
|
logger.info("---------------------------------")
|
|
time.sleep(2) # Give user time to read
|
|
|
|
|
|
def add_stress_policy_parser(subparsers):
|
|
"""Add the parser for the 'stress-policy' command."""
|
|
parser = subparsers.add_parser(
|
|
'stress-policy',
|
|
description="The primary, policy-driven stress-testing orchestrator.\nIt runs complex, multi-stage stress tests based on a YAML policy file.\nUse '--list-policies' to see available pre-configured scenarios.\n\nModes supported:\n- full_stack: Generate info.json and then download from it.\n- fetch_only: Only generate info.json files.\n- download_only: Only download from existing info.json files.",
|
|
formatter_class=argparse.RawTextHelpFormatter,
|
|
help='Run advanced, policy-driven stress tests (recommended).',
|
|
epilog="""
|
|
Examples:
|
|
|
|
1. Fetch info.jsons for a TV client with a single profile and a rate limit:
|
|
ytops-client stress-policy --policy policies/1_fetch_only_policies.yaml \\
|
|
--policy-name tv_downgraded_single_profile \\
|
|
--set settings.urls_file=my_urls.txt \\
|
|
--set execution_control.run_until.minutes=30
|
|
# This runs a 'fetch_only' test using the 'tv_downgraded' client. It uses a single,
|
|
# static profile for all requests and enforces a safety limit of 450 requests per hour.
|
|
|
|
2. Fetch info.jsons for an Android client using cookies for authentication:
|
|
ytops-client stress-policy --policy policies/1_fetch_only_policies.yaml \\
|
|
--policy-name android_sdkless_with_cookies \\
|
|
--set settings.urls_file=my_urls.txt \\
|
|
--set info_json_generation_policy.request_params.cookies_file_path=/path/to/my_cookies.txt
|
|
# This demonstrates an authenticated 'fetch_only' test. It passes the path to a
|
|
# Netscape cookie file, which the server will use for the requests.
|
|
|
|
3. Download from a folder of info.jsons, grouped by profile, with auto-workers:
|
|
ytops-client stress-policy --policy policies/2_download_only_policies.yaml \\
|
|
--policy-name basic_profile_aware_download \\
|
|
--set settings.info_json_dir=/path/to/my/infojsons
|
|
# This runs a 'download_only' test. It scans a directory, extracts profile names from
|
|
# the filenames (e.g., 'tv_user_1' from '...-VIDEOID-tv_user_1.json'), and groups
|
|
# them. 'workers=auto' sets the number of workers to the number of unique profiles found.
|
|
|
|
4. Full-stack test with multiple workers and profile rotation:
|
|
ytops-client stress-policy --policy policies/3_full_stack_policies.yaml \\
|
|
--policy-name tv_simply_profile_rotation \\
|
|
--set settings.urls_file=my_urls.txt \\
|
|
--set execution_control.workers=4 \\
|
|
--set settings.profile_management.max_requests_per_profile=500
|
|
# This runs a 'full_stack' test with 4 parallel workers. Each worker gets a unique
|
|
# profile (e.g., tv_simply_user_0_0, tv_simply_user_1_0, etc.). After a profile is
|
|
# used 500 times, it is retired, and a new "generation" is created (e.g., tv_simply_user_0_1).
|
|
|
|
5. Full-stack authenticated test with a pool of profiles and corresponding cookie files:
|
|
ytops-client stress-policy --policy policies/3_full_stack_policies.yaml \\
|
|
--policy-name mweb_multi_profile_with_cookies \\
|
|
--set settings.urls_file=my_urls.txt \\
|
|
--set settings.profile_management.cookie_files='["/path/c1.txt","/path/c2.txt"]'
|
|
# This runs a 'full_stack' test using a pool of profiles (e.g., mweb_user_0, mweb_user_1).
|
|
# It uses the 'cookie_files' list to assign a specific cookie file to each profile in the
|
|
# pool, enabling multi-account authenticated testing. Note the JSON/YAML list format for the override.
|
|
|
|
6. Full-stack test submitting downloads to an aria2c RPC server:
|
|
ytops-client stress-policy --policy policies/3_full_stack_policies.yaml \\
|
|
--policy-name tv_simply_profile_rotation_aria2c_rpc \\
|
|
--set settings.urls_file=my_urls.txt \\
|
|
--set download_policy.aria_host=192.168.1.100 \\
|
|
--set download_policy.aria_port=6801
|
|
# This runs a test where downloads are not performed by the worker itself, but are
|
|
# sent to a remote aria2c daemon. The policy specifies 'downloader: aria2c_rpc'
|
|
# and provides connection details. This is useful for offloading download traffic.
|
|
|
|
--------------------------------------------------------------------------------
|
|
Overridable Policy Parameters via --set:
|
|
|
|
Key Description
|
|
-------------------------------------- ------------------------------------------------
|
|
[settings]
|
|
settings.mode Test mode: 'full_stack', 'fetch_only', or 'download_only'.
|
|
settings.urls_file Path to file with URLs/video IDs.
|
|
settings.info_json_dir Path to directory with existing info.json files.
|
|
settings.profile_extraction_regex For 'download_only' mode, a regex to extract profile names from info.json filenames. The first capture group is used as the profile name. E.g., '.*-(.*?).json'. This enables profile-aware sequential downloading.
|
|
settings.info_json_dir_sample_percent Randomly sample this %% of files from the directory (for 'once' scan mode).
|
|
settings.directory_scan_mode For 'download_only': 'once' (default) or 'continuous' to watch for new files.
|
|
settings.mark_processed_files For 'continuous' scan mode: if true, rename processed files to '*.<timestamp>.processed' to avoid reprocessing.
|
|
settings.max_files_per_cycle For 'continuous' scan mode: max new files to process per cycle.
|
|
settings.sleep_if_no_new_files_seconds For 'continuous' scan mode: seconds to sleep if no new files are found (default: 10).
|
|
settings.profile_prefix (Legacy) Prefix for profile names (e.g., 'test_user').
|
|
settings.profile_pool (Legacy) Size of the profile pool.
|
|
settings.profile_mode Profile strategy. 'per_request' (legacy), 'per_worker' (legacy), or 'per_worker_with_rotation' (requires profile_management).
|
|
settings.info_json_script Command to run the info.json generation script (e.g., 'bin/ytops-client get-info').
|
|
settings.save_info_json_dir If set, save all successfully generated info.json files to this directory.
|
|
|
|
[settings.profile_management] (New, preferred method for profile control)
|
|
profile_management.prefix Prefix for profile names (e.g., 'dyn_user').
|
|
profile_management.suffix Suffix for profile names. Set to 'auto' for a timestamp, or provide a string.
|
|
profile_management.initial_pool_size The number of profiles to start with.
|
|
profile_management.auto_expand_pool If true, create new profiles when the initial pool is exhausted (all sleeping).
|
|
profile_management.max_requests_per_profile Max requests a profile can make before it must 'sleep'.
|
|
profile_management.sleep_minutes_on_exhaustion How many minutes a profile 'sleeps' after hitting its request limit.
|
|
profile_management.cookie_files A list of paths to cookie files. Used to assign a unique cookie file to each profile in a pool.
|
|
|
|
[execution_control]
|
|
execution_control.workers Number of parallel worker threads. Set to "auto" to calculate from target_rate or number of profiles.
|
|
execution_control.auto_workers_max The maximum number of workers to use when 'workers' is 'auto' in profile-aware download mode (default: 8).
|
|
execution_control.target_rate.requests Target requests for 'auto' workers calculation.
|
|
execution_control.target_rate.per_minutes Period in minutes for target_rate.
|
|
execution_control.run_until.minutes Stop test after N minutes. Will continuously cycle through sources.
|
|
execution_control.run_until.cycles Stop test after N cycles. A cycle is one full pass through all sources.
|
|
execution_control.run_until.requests Stop test after N total info.json requests (cumulative across runs).
|
|
execution_control.sleep_between_tasks.min_seconds Min sleep time between tasks, per worker.
|
|
|
|
[info_json_generation_policy]
|
|
info_json_generation_policy.client Client to use (e.g., 'mweb', 'tv_camoufox').
|
|
info_json_generation_policy.auth_host Host for the auth/Thrift service.
|
|
info_json_generation_policy.auth_port Port for the auth/Thrift service.
|
|
info_json_generation_policy.assigned_proxy_url A specific proxy to use for a request, overriding the server's proxy pool.
|
|
info_json_generation_policy.proxy_rename Regex substitution for the assigned proxy URL (e.g., 's/old/new/').
|
|
info_json_generation_policy.command_template A full command template for the info.json script. Overrides other keys.
|
|
info_json_generation_policy.rate_limits.per_ip.max_requests Max requests for the given time period from one IP.
|
|
info_json_generation_policy.rate_limits.per_ip.per_minutes Time period in minutes for the per_ip rate limit.
|
|
info_json_generation_policy.rate_limits.per_profile.max_requests Max requests for a single profile in a time period.
|
|
info_json_generation_policy.rate_limits.per_profile.per_minutes Time period in minutes for the per_profile rate limit.
|
|
info_json_generation_policy.client_rotation_policy.major_client The primary client to use for most requests.
|
|
info_json_generation_policy.client_rotation_policy.refresh_client The client to use periodically to refresh context.
|
|
info_json_generation_policy.client_rotation_policy.refresh_every.requests Trigger refresh client after N requests for a profile.
|
|
|
|
[download_policy]
|
|
download_policy.formats Formats to download (e.g., '18,140', 'random:50%%').
|
|
download_policy.downloader Orchestrator script to use: 'native-py' (default, Python lib), 'native-cli' (legacy CLI wrapper), or 'aria2c_rpc'.
|
|
download_policy.external_downloader For 'native-py' or default, the backend yt-dlp should use (e.g., 'aria2c', 'native').
|
|
download_policy.downloader_args Arguments for the external_downloader. For yt-dlp, e.g., 'aria2c:-x 8'.
|
|
download_policy.merge_output_format Container to merge to (e.g., 'mkv'). Defaults to 'mp4' via cli.config.
|
|
download_policy.temp_path For 'native-py', path to a directory for temporary files (e.g., a RAM disk like /dev/shm).
|
|
download_policy.output_to_buffer For 'native-py', download to an in-memory buffer and pipe to stdout instead of saving to a file (true/false). Best for single-file formats.
|
|
download_policy.proxy Proxy for direct downloads (e.g., "socks5://127.0.0.1:1080").
|
|
download_policy.proxy_rename Regex substitution for the proxy URL (e.g., 's/old/new/').
|
|
download_policy.pause_before_download_seconds Pause for N seconds before starting each download attempt.
|
|
download_policy.continue_downloads Enable download continuation (true/false).
|
|
download_policy.cleanup After success: for native downloaders, rename and truncate file to 0 bytes; for 'aria2c_rpc', remove file(s) from filesystem.
|
|
download_policy.extra_args A string of extra arguments for the download script (e.g., "--limit-rate 5M").
|
|
download_policy.sleep_per_proxy_seconds Cooldown in seconds between downloads on the same proxy.
|
|
download_policy.rate_limits.per_proxy.max_requests Max downloads for a single proxy in a time period.
|
|
download_policy.rate_limits.per_proxy.per_minutes Time period in minutes for the per_proxy download rate limit.
|
|
# For downloader: 'aria2c_rpc'
|
|
download_policy.aria_host Hostname of the aria2c RPC server.
|
|
download_policy.aria_port Port of the aria2c RPC server.
|
|
download_policy.aria_secret Secret token for the aria2c RPC server.
|
|
download_policy.aria_wait Wait for aria2c downloads to complete (true/false).
|
|
download_policy.cleanup Remove downloaded file(s) from the filesystem on success. Requires script access to the download directory.
|
|
download_policy.purge_on_complete On success, purge ALL completed/failed downloads from aria2c history. Use as a workaround for older aria2c versions where targeted removal fails.
|
|
download_policy.output_dir Output directory for downloads.
|
|
download_policy.aria_remote_dir The absolute download path on the remote aria2c host.
|
|
download_policy.aria_fragments_dir The local path to find fragments for merging (if different from output_dir).
|
|
download_policy.auto_merge_fragments For fragmented downloads, automatically merge parts after download (true/false). Requires aria_wait=true.
|
|
download_policy.remove_fragments_after_merge For fragmented downloads, delete fragment files after a successful merge (true/false). Requires auto_merge_fragments=true.
|
|
|
|
[stop_conditions]
|
|
stop_conditions.on_failure Stop on any download failure (true/false).
|
|
stop_conditions.on_http_403 Stop on any HTTP 403 error (true/false).
|
|
stop_conditions.on_error_rate.max_errors Stop test if more than N errors (of any type) occur within the time period.
|
|
stop_conditions.on_error_rate.per_minutes Time period in minutes for the error rate calculation.
|
|
stop_conditions.on_cumulative_403.max_errors Stop test if more than N HTTP 403 errors occur within the time period.
|
|
stop_conditions.on_cumulative_403.per_minutes Time period in minutes for the cumulative 403 calculation.
|
|
stop_conditions.on_quality_degradation.trigger_if_missing_formats A format ID or comma-separated list of IDs. Triggers if any are missing.
|
|
stop_conditions.on_quality_degradation.max_triggers Stop test if quality degradation is detected N times.
|
|
stop_conditions.on_quality_degradation.per_minutes Time period in minutes for the quality degradation calculation.
|
|
--------------------------------------------------------------------------------
|
|
"""
|
|
)
|
|
parser.add_argument('--policy', help='Path to the YAML policy file. Required unless --list-policies is used.')
|
|
parser.add_argument('--policy-name', help='Name of the policy to run from a multi-policy file (if it contains "---" separators).')
|
|
parser.add_argument('--list-policies', action='store_true', help='List all available policies from the default policies directory and exit.')
|
|
parser.add_argument('--show-overrides', action='store_true', help='Load the specified policy and print all its defined values as a single-line of --set arguments, then exit.')
|
|
parser.add_argument('--set', action='append', default=[], help="Override a policy setting using 'key.subkey=value' format.\n(e.g., --set execution_control.workers=5)")
|
|
|
|
# Add a group for aria2c-specific overrides for clarity in --help
|
|
aria_group = parser.add_argument_group('Aria2c RPC Downloader Overrides', 'Shortcuts for common --set options for the aria2c_rpc downloader.')
|
|
aria_group.add_argument('--auto-merge-fragments', action=argparse.BooleanOptionalAction, default=None, help='Shortcut to enable/disable download_policy.auto_merge_fragments.')
|
|
aria_group.add_argument('--remove-fragments-after-merge', action=argparse.BooleanOptionalAction, default=None, help='Shortcut to enable/disable download_policy.remove_fragments_after_merge.')
|
|
aria_group.add_argument('--fragments-dir', help='Shortcut for --set download_policy.aria_fragments_dir=PATH.')
|
|
aria_group.add_argument('--remote-dir', help='Shortcut for --set download_policy.aria_remote_dir=PATH.')
|
|
aria_group.add_argument('--cleanup', action=argparse.BooleanOptionalAction, default=None, help='Shortcut to enable/disable download_policy.cleanup.')
|
|
|
|
parser.add_argument('--verbose', action='store_true', help='Enable verbose output for the orchestrator and underlying scripts.')
|
|
parser.add_argument('--dry-run', action='store_true', help='Print the effective policy and exit without running the test.')
|
|
parser.add_argument('--disable-log-writing', action='store_true', help='Disable writing state, stats, and log files. By default, files are created for each run.')
|
|
return parser
|
|
|
|
|
|
def list_policies():
|
|
"""Scans the policies directory and prints a list of available policies."""
|
|
script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
project_root = os.path.abspath(os.path.join(script_dir, '..'))
|
|
policies_dir = os.path.join(project_root, 'policies')
|
|
|
|
if not os.path.isdir(policies_dir):
|
|
print(f"Error: Policies directory not found at '{policies_dir}'", file=sys.stderr)
|
|
return 1
|
|
|
|
print("Available Policies:")
|
|
print("=" * 20)
|
|
|
|
policy_files = sorted(Path(policies_dir).glob('*.yaml'))
|
|
if not policy_files:
|
|
print("No policy files (.yaml) found.")
|
|
return 0
|
|
|
|
for policy_file in policy_files:
|
|
print(f"\n--- File: {policy_file.relative_to(project_root)} ---")
|
|
try:
|
|
with open(policy_file, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
# Split into documents. The separator is a line that is exactly '---'.
|
|
documents = re.split(r'^\-\-\-$', content, flags=re.MULTILINE)
|
|
|
|
found_any_in_file = False
|
|
for doc in documents:
|
|
doc = doc.strip()
|
|
if not doc:
|
|
continue
|
|
|
|
lines = doc.split('\n')
|
|
policy_name = None
|
|
description_lines = []
|
|
|
|
# Find name and description
|
|
for i, line in enumerate(lines):
|
|
if line.strip().startswith('name:'):
|
|
policy_name = line.split(':', 1)[1].strip()
|
|
|
|
# Look backwards for comments
|
|
j = i - 1
|
|
current_desc_block = []
|
|
while j >= 0 and lines[j].strip().startswith('#'):
|
|
comment = lines[j].strip().lstrip('#').strip()
|
|
current_desc_block.insert(0, comment)
|
|
j -= 1
|
|
|
|
if current_desc_block:
|
|
description_lines = current_desc_block
|
|
break
|
|
|
|
if policy_name:
|
|
found_any_in_file = True
|
|
print(f" - Name: {policy_name}")
|
|
if description_lines:
|
|
# Heuristic to clean up "Policy: " prefix
|
|
if description_lines[0].lower().startswith('policy:'):
|
|
description_lines[0] = description_lines[0][len('policy:'):].strip()
|
|
|
|
print(f" Description: {description_lines[0]}")
|
|
for desc_line in description_lines[1:]:
|
|
print(f" {desc_line}")
|
|
else:
|
|
print(" Description: (No description found)")
|
|
|
|
relative_path = policy_file.relative_to(project_root)
|
|
print(f" Usage: --policy {relative_path} --policy-name {policy_name}")
|
|
|
|
if not found_any_in_file:
|
|
print(" (No named policies found in this file)")
|
|
|
|
except Exception as e:
|
|
print(f" Error parsing {policy_file.name}: {e}")
|
|
|
|
return 0
|
|
|
|
|
|
def main_stress_policy(args):
|
|
"""Main logic for the 'stress-policy' command."""
|
|
if args.list_policies:
|
|
return list_policies()
|
|
|
|
if not args.policy:
|
|
print("Error: --policy is required unless using --list-policies.", file=sys.stderr)
|
|
return 1
|
|
|
|
# Handle --show-overrides early, as it doesn't run the test.
|
|
if args.show_overrides:
|
|
policy = load_policy(args.policy, args.policy_name)
|
|
if not policy:
|
|
return 1 # load_policy prints its own error
|
|
print_policy_overrides(policy)
|
|
return 0
|
|
|
|
policy = load_policy(args.policy, args.policy_name)
|
|
policy = apply_overrides(policy, args.set)
|
|
|
|
# Apply direct CLI overrides after --set, so they have final precedence.
|
|
if args.auto_merge_fragments is not None:
|
|
policy.setdefault('download_policy', {})['auto_merge_fragments'] = args.auto_merge_fragments
|
|
if args.remove_fragments_after_merge is not None:
|
|
policy.setdefault('download_policy', {})['remove_fragments_after_merge'] = args.remove_fragments_after_merge
|
|
if args.fragments_dir is not None:
|
|
policy.setdefault('download_policy', {})['aria_fragments_dir'] = args.fragments_dir
|
|
if args.remote_dir is not None:
|
|
policy.setdefault('download_policy', {})['aria_remote_dir'] = args.remote_dir
|
|
if args.cleanup is not None:
|
|
policy.setdefault('download_policy', {})['cleanup'] = args.cleanup
|
|
|
|
policy_name = policy.get('name', args.policy_name or Path(args.policy).stem)
|
|
|
|
# --- Logging Setup ---
|
|
log_level = logging.DEBUG if args.verbose else logging.INFO
|
|
log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' if args.verbose else '%(asctime)s - %(message)s'
|
|
date_format = None if args.verbose else '%H:%M:%S'
|
|
|
|
root_logger = logging.getLogger()
|
|
root_logger.setLevel(log_level)
|
|
|
|
# Remove any existing handlers to avoid duplicate logs
|
|
for handler in root_logger.handlers[:]:
|
|
root_logger.removeHandler(handler)
|
|
|
|
# Add console handler
|
|
console_handler = logging.StreamHandler(sys.stdout)
|
|
console_handler.setFormatter(logging.Formatter(log_format, datefmt=date_format))
|
|
root_logger.addHandler(console_handler)
|
|
|
|
if not args.disable_log_writing:
|
|
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
|
log_filename = f"stress-policy-{timestamp}-{policy_name}.log"
|
|
try:
|
|
file_handler = logging.FileHandler(log_filename, encoding='utf-8')
|
|
file_handler.setFormatter(logging.Formatter(log_format, datefmt=date_format))
|
|
root_logger.addHandler(file_handler)
|
|
# Use print because logger is just being set up.
|
|
print(f"Logging to file: {log_filename}", file=sys.stderr)
|
|
except IOError as e:
|
|
print(f"Error: Could not open log file {log_filename}: {e}", file=sys.stderr)
|
|
|
|
state_manager = StateManager(policy_name, disable_log_writing=args.disable_log_writing)
|
|
|
|
# --- Graceful shutdown handler ---
|
|
def shutdown_handler(signum, frame):
|
|
if not shutdown_event.is_set():
|
|
logger.info(f"\nSignal {signum} received, shutting down gracefully...")
|
|
shutdown_event.set()
|
|
|
|
# Save state immediately to prevent loss on interrupt.
|
|
logger.info("Attempting to save state before shutdown...")
|
|
state_manager.close()
|
|
|
|
# Kill running subprocesses to unblock workers
|
|
with process_lock:
|
|
if running_processes:
|
|
logger.info(f"Terminating {len(running_processes)} running subprocess(es)...")
|
|
for p in running_processes:
|
|
try:
|
|
# Kill the entire process group to ensure child processes (like yt-dlp) are terminated.
|
|
os.killpg(os.getpgid(p.pid), signal.SIGKILL)
|
|
except (ProcessLookupError, PermissionError):
|
|
pass # Process already finished or we lack permissions
|
|
logger.info("Subprocesses terminated. Waiting for workers to finish. Press Ctrl+C again to force exit.")
|
|
else:
|
|
logger.info("Second signal received, forcing exit.")
|
|
# Use os._exit for a hard exit that doesn't run cleanup handlers,
|
|
# which can deadlock if locks are held.
|
|
os._exit(1)
|
|
|
|
signal.signal(signal.SIGINT, shutdown_handler)
|
|
signal.signal(signal.SIGTERM, shutdown_handler)
|
|
|
|
settings = policy.get('settings', {})
|
|
|
|
# --- Load sources based on mode ---
|
|
mode = settings.get('mode', 'full_stack')
|
|
sources = [] # This will be a list of URLs or Path objects
|
|
if mode in ['full_stack', 'fetch_only']:
|
|
urls_file = settings.get('urls_file')
|
|
if not urls_file:
|
|
logger.error("Policy mode requires 'settings.urls_file'.")
|
|
return 1
|
|
try:
|
|
with open(urls_file, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
try:
|
|
data = json.loads(content)
|
|
if isinstance(data, list) and all(isinstance(item, str) for item in data):
|
|
sources = data
|
|
logger.info(f"Loaded {len(sources)} URLs/IDs from JSON array in {urls_file}.")
|
|
else:
|
|
logger.error(f"URL file '{urls_file}' is valid JSON but not an array of strings.")
|
|
return 1
|
|
except json.JSONDecodeError:
|
|
sources = [line.strip() for line in content.splitlines() if line.strip()]
|
|
logger.info(f"Loaded {len(sources)} URLs/IDs from text file {urls_file}.")
|
|
except IOError as e:
|
|
logger.error(f"Failed to read urls_file {urls_file}: {e}")
|
|
return 1
|
|
|
|
# Clean up URLs/IDs which might have extra quotes, commas, or brackets from copy-pasting
|
|
cleaned_sources = []
|
|
for source in sources:
|
|
cleaned_source = source.strip().rstrip(',').strip().strip('\'"[]').strip()
|
|
if cleaned_source:
|
|
cleaned_sources.append(cleaned_source)
|
|
|
|
if len(cleaned_sources) != len(sources):
|
|
logger.info(f"Cleaned URL list, removed {len(sources) - len(cleaned_sources)} empty or invalid entries.")
|
|
|
|
sources = cleaned_sources
|
|
elif mode == 'download_only':
|
|
# If not in continuous mode, load sources once at the start.
|
|
# In continuous mode, `sources` is populated at the start of each cycle.
|
|
if settings.get('directory_scan_mode') != 'continuous':
|
|
info_json_dir = settings.get('info_json_dir')
|
|
if not info_json_dir:
|
|
logger.error("Policy mode 'download_only' requires 'settings.info_json_dir'.")
|
|
return 1
|
|
try:
|
|
all_files = sorted(Path(info_json_dir).glob('*.json'))
|
|
sample_percent = settings.get('info_json_dir_sample_percent')
|
|
if sample_percent and 0 < sample_percent <= 100:
|
|
sample_count = int(len(all_files) * (sample_percent / 100.0))
|
|
num_to_sample = min(len(all_files), max(1, sample_count))
|
|
sources = random.sample(all_files, k=num_to_sample)
|
|
logger.info(f"Randomly sampled {len(sources)} files ({sample_percent}%) from {info_json_dir}")
|
|
else:
|
|
sources = all_files
|
|
except (IOError, FileNotFoundError) as e:
|
|
logger.error(f"Failed to read info_json_dir {info_json_dir}: {e}")
|
|
return 1
|
|
|
|
# In continuous download mode, sources are loaded inside the loop, so we skip this check.
|
|
if settings.get('directory_scan_mode') != 'continuous' and not sources:
|
|
logger.error("No sources (URLs or info.json files) to process. Exiting.")
|
|
return 1
|
|
|
|
# Grouping of sources by profile is now handled inside the main loop to support continuous mode.
|
|
profile_extraction_regex = settings.get('profile_extraction_regex')
|
|
|
|
# For 'auto' worker calculation and initial display, we need to group sources once.
|
|
# This will be re-calculated inside the loop for continuous mode.
|
|
profile_tasks = None
|
|
if mode == 'download_only' and profile_extraction_regex:
|
|
profile_tasks = collections.defaultdict(list)
|
|
for source_path in sources:
|
|
profile_name = get_profile_from_filename(source_path, profile_extraction_regex)
|
|
if profile_name:
|
|
profile_tasks[profile_name].append(source_path)
|
|
else:
|
|
profile_tasks['unmatched_profile'].append(source_path)
|
|
|
|
# --- Auto-calculate workers if needed ---
|
|
exec_control = policy.get('execution_control', {})
|
|
original_workers_setting = exec_control.get('workers')
|
|
if original_workers_setting == 'auto':
|
|
if mode == 'download_only' and profile_tasks is not None:
|
|
num_profiles = len(profile_tasks)
|
|
# Use auto_workers_max from policy, with a default of 8.
|
|
max_workers = exec_control.get('auto_workers_max', 8)
|
|
num_workers = min(num_profiles, max_workers)
|
|
exec_control['workers'] = max(1, num_workers)
|
|
logger.info(f"Calculated 'auto' workers based on {num_profiles} profiles (max: {max_workers}): {exec_control['workers']}")
|
|
else:
|
|
target_rate_cfg = exec_control.get('target_rate', {})
|
|
target_reqs = target_rate_cfg.get('requests')
|
|
target_mins = target_rate_cfg.get('per_minutes')
|
|
if target_reqs and target_mins and sources:
|
|
target_rpm = target_reqs / target_mins
|
|
num_sources = len(sources)
|
|
sleep_cfg = exec_control.get('sleep_between_tasks', {})
|
|
avg_sleep = (sleep_cfg.get('min_seconds', 0) + sleep_cfg.get('max_seconds', 0)) / 2
|
|
assumed_task_duration = 12 # Must match assumption in display_effective_policy
|
|
|
|
# Formula: workers = (total_work_seconds) / (total_time_for_work)
|
|
# total_time_for_work is derived from the target rate:
|
|
# (total_cycle_time) = (60 * num_sources) / target_rpm
|
|
# total_time_for_work = total_cycle_time - avg_sleep
|
|
work_time_available = (60 * num_sources / target_rpm) - avg_sleep
|
|
|
|
if work_time_available <= 0:
|
|
# The sleep time alone makes the target rate impossible.
|
|
# Set workers to max parallelism as a best-effort.
|
|
num_workers = num_sources
|
|
logger.warning(f"Target rate of {target_rpm} req/min is likely unachievable due to sleep time of {avg_sleep}s.")
|
|
logger.warning(f"Setting workers to max parallelism ({num_workers}) as a best effort.")
|
|
else:
|
|
total_work_seconds = num_sources * assumed_task_duration
|
|
num_workers = total_work_seconds / work_time_available
|
|
|
|
calculated_workers = max(1, int(num_workers + 0.99)) # Ceiling
|
|
exec_control['workers'] = calculated_workers
|
|
logger.info(f"Calculated 'auto' workers based on target rate: {calculated_workers}")
|
|
else:
|
|
logger.warning("Cannot calculate 'auto' workers: 'target_rate' or sources are not defined. Defaulting to 1 worker.")
|
|
exec_control['workers'] = 1
|
|
|
|
display_effective_policy(
|
|
policy,
|
|
policy_name,
|
|
sources=sources,
|
|
profile_names=list(profile_tasks.keys()) if profile_tasks is not None else None,
|
|
original_workers_setting=original_workers_setting
|
|
)
|
|
|
|
if args.dry_run:
|
|
logger.info("Dry run complete. Exiting.")
|
|
return 0
|
|
|
|
start_time = time.time()
|
|
|
|
run_until_cfg = exec_control.get('run_until', {})
|
|
duration_seconds = (run_until_cfg.get('minutes') or 0) * 60
|
|
max_cycles = run_until_cfg.get('cycles') or 0
|
|
max_requests = run_until_cfg.get('requests') or 0
|
|
|
|
# --- Main test loop ---
|
|
cycles = 0
|
|
try:
|
|
def process_task(source, source_index, cycle_num):
|
|
"""Worker task for one source (URL or file path)."""
|
|
try:
|
|
if shutdown_event.is_set():
|
|
return [] # Shutdown initiated, do not start new work
|
|
|
|
# --- Step 1: Get info.json content ---
|
|
info_json_content = None
|
|
profile_name = None
|
|
if mode in ['full_stack', 'fetch_only']:
|
|
gen_policy = policy.get('info_json_generation_policy', {})
|
|
cmd_template = gen_policy.get('command_template')
|
|
|
|
# --- Profile Generation ---
|
|
profile_mode = settings.get('profile_mode')
|
|
pm_policy = settings.get('profile_management')
|
|
|
|
if profile_mode == 'per_worker_with_rotation':
|
|
if not pm_policy:
|
|
logger.error("Profile mode 'per_worker_with_rotation' requires 'settings.profile_management' configuration.")
|
|
# Log a failure event and skip
|
|
event = {'type': 'fetch', 'path': str(source), 'success': False, 'error_type': 'ConfigError', 'details': 'Missing profile_management section'}
|
|
state_manager.log_event(event)
|
|
return []
|
|
worker_id = get_worker_id()
|
|
profile_name = state_manager.get_or_rotate_worker_profile(worker_id, policy)
|
|
elif pm_policy:
|
|
# This is the existing dynamic cooldown logic
|
|
profile_name = state_manager.get_next_available_profile(policy)
|
|
if not profile_name:
|
|
logger.warning("No available profiles to run task. Skipping.")
|
|
return []
|
|
else:
|
|
# This is the legacy logic
|
|
profile_prefix = settings.get('profile_prefix')
|
|
if profile_prefix:
|
|
if profile_mode == 'per_request':
|
|
timestamp = datetime.now().strftime('%Y%m%d%H%M%S%f')
|
|
profile_name = f"{profile_prefix}_{timestamp}_{source_index}"
|
|
elif profile_mode == 'per_worker':
|
|
worker_index = get_worker_id()
|
|
profile_name = f"{profile_prefix}_{worker_index}"
|
|
else: # Default to pool logic
|
|
profile_pool = settings.get('profile_pool')
|
|
if profile_pool:
|
|
profile_name = f"{profile_prefix}_{source_index % profile_pool}"
|
|
else:
|
|
profile_name = "default" # A final fallback
|
|
|
|
# --- Rate Limit Check ---
|
|
if not state_manager.check_and_update_rate_limit(profile_name, policy):
|
|
return [] # Rate limited, skip this task
|
|
|
|
# --- Command Generation ---
|
|
gen_cmd = []
|
|
save_dir = settings.get('save_info_json_dir')
|
|
save_path = None
|
|
|
|
if cmd_template:
|
|
# Low-level template mode. The user is responsible for output.
|
|
video_id = get_video_id(source)
|
|
|
|
# A heuristic to add '--' if the video ID looks like an option.
|
|
# We split the template, find the standalone '{url}' placeholder,
|
|
# and insert '--' before it. This assumes it's a positional argument.
|
|
template_parts = shlex.split(cmd_template)
|
|
try:
|
|
# Find from the end, in case it's used in an option value earlier.
|
|
url_index = len(template_parts) - 1 - template_parts[::-1].index('{url}')
|
|
if video_id.startswith('-'):
|
|
template_parts.insert(url_index, '--')
|
|
except ValueError:
|
|
# '{url}' not found as a standalone token, do nothing special.
|
|
pass
|
|
|
|
# Rejoin and then format the whole string.
|
|
gen_cmd_str = ' '.join(template_parts)
|
|
gen_cmd_str = gen_cmd_str.format(url=video_id, profile=profile_name)
|
|
gen_cmd = shlex.split(gen_cmd_str)
|
|
if args.verbose and '--verbose' not in gen_cmd:
|
|
gen_cmd.append('--verbose')
|
|
else:
|
|
# High-level policy mode. Orchestrator builds the command.
|
|
script_cmd_str = settings.get('info_json_script')
|
|
if not script_cmd_str:
|
|
logger.error("High-level policy requires 'settings.info_json_script'.")
|
|
return []
|
|
gen_cmd = shlex.split(script_cmd_str)
|
|
video_id = get_video_id(source)
|
|
|
|
client_to_use, request_params = state_manager.get_client_for_request(profile_name, gen_policy)
|
|
|
|
# --- Multi-Cookie File Logic ---
|
|
if pm_policy:
|
|
cookie_files = pm_policy.get('cookie_files')
|
|
if cookie_files and isinstance(cookie_files, list) and len(cookie_files) > 0:
|
|
profile_index = -1
|
|
# Extract index from profile name. Matches _<index> or _<worker_id>_<gen>
|
|
match = re.search(r'_(\d+)(?:_(\d+))?$', profile_name)
|
|
if match:
|
|
# For rotation mode, the first group is worker_id. For pool mode, it's the profile index.
|
|
profile_index = int(match.group(1))
|
|
|
|
if profile_index != -1:
|
|
cookie_file_path = cookie_files[profile_index % len(cookie_files)]
|
|
if not request_params:
|
|
request_params = {}
|
|
request_params['cookies_file_path'] = cookie_file_path
|
|
logger.info(f"[{source}] Assigned cookie file '{os.path.basename(cookie_file_path)}' to profile '{profile_name}'")
|
|
else:
|
|
logger.warning(f"[{source}] Could not determine index for profile '{profile_name}' to assign cookie file.")
|
|
|
|
if client_to_use:
|
|
gen_cmd.extend(['--client', str(client_to_use)])
|
|
if gen_policy.get('auth_host'):
|
|
gen_cmd.extend(['--auth-host', str(gen_policy.get('auth_host'))])
|
|
if gen_policy.get('auth_port'):
|
|
gen_cmd.extend(['--auth-port', str(gen_policy.get('auth_port'))])
|
|
if profile_name != "default":
|
|
gen_cmd.extend(['--profile', profile_name])
|
|
|
|
# Add --print-proxy so we can track it for stats
|
|
if '--print-proxy' not in gen_cmd:
|
|
gen_cmd.append('--print-proxy')
|
|
|
|
if request_params:
|
|
gen_cmd.extend(['--request-params-json', json.dumps(request_params)])
|
|
if gen_policy.get('assigned_proxy_url'):
|
|
gen_cmd.extend(['--assigned-proxy-url', str(gen_policy.get('assigned_proxy_url'))])
|
|
if gen_policy.get('proxy_rename'):
|
|
gen_cmd.extend(['--proxy-rename', str(gen_policy.get('proxy_rename'))])
|
|
|
|
if args.verbose:
|
|
gen_cmd.append('--verbose')
|
|
|
|
# If saving is enabled, delegate saving to the client script.
|
|
if save_dir:
|
|
try:
|
|
os.makedirs(save_dir, exist_ok=True)
|
|
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
|
# Note: Using a timestamped filename to avoid race conditions.
|
|
filename = f"{timestamp}-{video_id}-{profile_name}.json"
|
|
save_path = Path(save_dir) / filename
|
|
gen_cmd.extend(['--output', str(save_path)])
|
|
# No longer need to suppress, it's the default.
|
|
except IOError as e:
|
|
logger.error(f"[{source}] Could not prepare save path in '{save_dir}': {e}")
|
|
# Continue without saving
|
|
save_path = None
|
|
|
|
# If not saving to a file, we need the output on stdout for the download step.
|
|
if not save_dir:
|
|
gen_cmd.append('--print-info-out')
|
|
|
|
# The positional video_id argument must come after all options.
|
|
# Use '--' to ensure it's not parsed as an option if it starts with a dash.
|
|
if video_id.startswith('-'):
|
|
gen_cmd.append('--')
|
|
gen_cmd.append(video_id)
|
|
|
|
worker_id = get_worker_id()
|
|
profile_log_part = f" [Profile: {profile_name}]" if profile_name else ""
|
|
logger.info(f"[Worker {worker_id}]{profile_log_part} [{source}] Running info.json command: {' '.join(shlex.quote(s) for s in gen_cmd)}")
|
|
retcode, stdout, stderr = run_command(gen_cmd)
|
|
info_json_content = stdout
|
|
|
|
# --- Extract proxy from stderr and record it for stats ---
|
|
proxy_url = None
|
|
proxy_match = re.search(r"Proxy used: (.*)", stderr)
|
|
if proxy_match:
|
|
proxy_url = proxy_match.group(1).strip()
|
|
state_manager.record_proxy_usage(proxy_url)
|
|
|
|
if retcode == 0:
|
|
# If the client script saved the file, stdout will be empty.
|
|
# If we need the content for a download step, we must read it back.
|
|
if not info_json_content.strip():
|
|
# Check stderr for the success message to confirm save.
|
|
saved_path_match = re.search(r"Successfully saved info.json to (.*)", stderr)
|
|
if saved_path_match:
|
|
output_file_str = saved_path_match.group(1).strip().strip("'\"")
|
|
logger.info(f"[{source}] -> {saved_path_match.group(0).strip()}")
|
|
|
|
# If this is a full_stack test, we need the content for the download worker.
|
|
if mode == 'full_stack':
|
|
try:
|
|
with open(output_file_str, 'r', encoding='utf-8') as f:
|
|
info_json_content = f.read()
|
|
except IOError as e:
|
|
logger.error(f"Could not read back info.json from '{output_file_str}': {e}")
|
|
retcode = -1 # Treat as failure
|
|
elif save_path:
|
|
# Command was told to save, but didn't confirm. Assume it worked if exit code is 0.
|
|
logger.info(f"[{source}] -> Client script exited 0, assuming info.json was saved to '{save_path}'")
|
|
if mode == 'full_stack':
|
|
try:
|
|
with open(save_path, 'r', encoding='utf-8') as f:
|
|
info_json_content = f.read()
|
|
except IOError as e:
|
|
logger.error(f"Could not read back info.json from '{save_path}': {e}")
|
|
retcode = -1
|
|
# If stdout is empty and we weren't saving, it's an issue.
|
|
elif not save_path and not cmd_template:
|
|
logger.error(f"[{source}] info.json generation gave no stdout and was not asked to save to a file.")
|
|
retcode = -1
|
|
else:
|
|
logger.info(f"[{source}] -> Successfully fetched info.json to memory/stdout.")
|
|
|
|
event = {'type': 'fetch', 'path': str(source), 'profile': profile_name}
|
|
if proxy_url:
|
|
event['proxy_url'] = proxy_url
|
|
|
|
if retcode != 0:
|
|
error_lines = [line for line in stderr.strip().split('\n') if 'error' in line.lower()]
|
|
error_msg = error_lines[-1] if error_lines else stderr.strip().split('\n')[-1]
|
|
logger.error(f"[{source}] Failed to generate info.json: {error_msg}")
|
|
event.update({'success': False, 'error_type': 'GetInfoJsonFail', 'details': error_msg})
|
|
state_manager.log_event(event)
|
|
return []
|
|
|
|
# Check for quality degradation before logging success
|
|
s_conditions = policy.get('stop_conditions', {})
|
|
quality_policy = s_conditions.get('on_quality_degradation')
|
|
if quality_policy and info_json_content:
|
|
try:
|
|
info_data = json.loads(info_json_content)
|
|
available_formats = {f.get('format_id') for f in info_data.get('formats', [])}
|
|
|
|
required_formats = quality_policy.get('trigger_if_missing_formats')
|
|
if required_formats:
|
|
# Can be a single string, a comma-separated string, or a list of strings.
|
|
if isinstance(required_formats, str):
|
|
required_formats = [f.strip() for f in required_formats.split(',')]
|
|
|
|
missing_formats = [f for f in required_formats if f not in available_formats]
|
|
|
|
if missing_formats:
|
|
logger.warning(f"[{source}] Quality degradation detected. Missing required formats: {', '.join(missing_formats)}.")
|
|
event['quality_degradation_trigger'] = True
|
|
event['missing_formats'] = missing_formats
|
|
except (json.JSONDecodeError, TypeError):
|
|
logger.warning(f"[{source}] Could not parse info.json or find formats to check for quality degradation.")
|
|
|
|
# Record request for profile cooldown policy if active
|
|
if pm_policy:
|
|
state_manager.record_profile_request(profile_name)
|
|
|
|
state_manager.increment_request_count()
|
|
event.update({'success': True, 'details': 'OK'})
|
|
state_manager.log_event(event)
|
|
|
|
# Saving is now delegated to the client script when a save_dir is provided.
|
|
# The orchestrator no longer saves the file itself.
|
|
|
|
elif mode == 'download_only':
|
|
# This path is for non-profile-grouped download_only mode.
|
|
try:
|
|
with open(source, 'r', encoding='utf-8') as f:
|
|
info_json_content = f.read()
|
|
except (IOError, FileNotFoundError) as e:
|
|
logger.error(f"[{get_display_name(source)}] Could not read info.json file: {e}")
|
|
return []
|
|
|
|
if mode != 'fetch_only':
|
|
return _run_download_logic(source, info_json_content, policy, state_manager, profile_name=profile_name)
|
|
|
|
return []
|
|
finally:
|
|
# Sleep after the task is completed to space out requests from this worker.
|
|
exec_control = policy.get('execution_control', {})
|
|
sleep_cfg = exec_control.get('sleep_between_tasks', {})
|
|
sleep_min = sleep_cfg.get('min_seconds', 0)
|
|
|
|
if sleep_min > 0:
|
|
sleep_max = sleep_cfg.get('max_seconds') or sleep_min
|
|
if sleep_max > sleep_min:
|
|
sleep_duration = random.uniform(sleep_min, sleep_max)
|
|
else:
|
|
sleep_duration = sleep_min
|
|
|
|
logger.debug(f"Worker sleeping for {sleep_duration:.2f}s after task for {get_display_name(source)}.")
|
|
# Interruptible sleep
|
|
sleep_end_time = time.time() + sleep_duration
|
|
while time.time() < sleep_end_time:
|
|
if shutdown_event.is_set():
|
|
break
|
|
time.sleep(0.2)
|
|
|
|
while not shutdown_event.is_set():
|
|
if duration_seconds and (time.time() - start_time) > duration_seconds:
|
|
logger.info("Reached duration limit. Stopping.")
|
|
break
|
|
if max_requests > 0 and state_manager.get_request_count() >= max_requests:
|
|
logger.info(f"Reached max requests ({max_requests}). Stopping.")
|
|
break
|
|
|
|
# --- Rescan for sources if in continuous download mode ---
|
|
if mode == 'download_only' and settings.get('directory_scan_mode') == 'continuous':
|
|
info_json_dir = settings.get('info_json_dir')
|
|
try:
|
|
all_files_in_dir = Path(info_json_dir).glob('*.json')
|
|
processed_files = state_manager.get_processed_files()
|
|
|
|
new_files = [f for f in all_files_in_dir if str(f) not in processed_files]
|
|
|
|
# Sort by modification time, oldest first, to process in order of creation
|
|
new_files.sort(key=os.path.getmtime)
|
|
|
|
max_files_per_cycle = settings.get('max_files_per_cycle')
|
|
if max_files_per_cycle and len(new_files) > max_files_per_cycle:
|
|
sources = new_files[:max_files_per_cycle]
|
|
else:
|
|
sources = new_files
|
|
|
|
if not sources:
|
|
sleep_duration = settings.get('sleep_if_no_new_files_seconds', 10)
|
|
logger.info(f"No new info.json files found in '{info_json_dir}'. Sleeping for {sleep_duration}s...")
|
|
|
|
# Interruptible sleep
|
|
sleep_end_time = time.time() + sleep_duration
|
|
while time.time() < sleep_end_time:
|
|
if shutdown_event.is_set():
|
|
break
|
|
time.sleep(0.5)
|
|
|
|
if shutdown_event.is_set():
|
|
break
|
|
continue # Skip to next iteration of the while loop
|
|
|
|
except (IOError, FileNotFoundError) as e:
|
|
logger.error(f"Failed to read info_json_dir {info_json_dir}: {e}. Retrying in 10s.")
|
|
time.sleep(10)
|
|
continue
|
|
|
|
# --- Group sources for this cycle ---
|
|
task_items = sources
|
|
profile_tasks = None
|
|
if mode == 'download_only' and profile_extraction_regex:
|
|
profile_tasks = collections.defaultdict(list)
|
|
for source_path in sources:
|
|
profile_name = get_profile_from_filename(source_path, profile_extraction_regex)
|
|
if profile_name:
|
|
profile_tasks[profile_name].append(source_path)
|
|
else:
|
|
profile_tasks['unmatched_profile'].append(source_path)
|
|
task_items = list(profile_tasks.items())
|
|
|
|
# If there's nothing to do this cycle, skip.
|
|
if not task_items:
|
|
if mode == 'download_only' and settings.get('directory_scan_mode') == 'continuous':
|
|
# The sleep logic is handled inside the rescanning block.
|
|
continue
|
|
else:
|
|
logger.info("No more sources to process. Ending test.")
|
|
break
|
|
|
|
cycles += 1
|
|
if max_cycles > 0 and cycles > max_cycles:
|
|
logger.info(f"Reached max cycles ({max_cycles}). Stopping.")
|
|
break
|
|
|
|
logger.info(f"--- Cycle #{cycles} (Total Requests: {state_manager.get_request_count()}) ---")
|
|
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=exec_control.get('workers', 1)) as executor:
|
|
if mode == 'download_only' and profile_tasks is not None:
|
|
# New: submit profile tasks
|
|
future_to_source = {
|
|
executor.submit(process_profile_task, profile_name, file_list, policy, state_manager, cycles): profile_name
|
|
for profile_name, file_list in task_items
|
|
}
|
|
else:
|
|
# Old: submit individual file/url tasks
|
|
future_to_source = {
|
|
executor.submit(process_task, source, i, cycles): source
|
|
for i, source in enumerate(task_items)
|
|
}
|
|
|
|
should_stop = False
|
|
pending_futures = set(future_to_source.keys())
|
|
|
|
while pending_futures and not should_stop:
|
|
done, pending_futures = concurrent.futures.wait(
|
|
pending_futures, return_when=concurrent.futures.FIRST_COMPLETED
|
|
)
|
|
|
|
for future in done:
|
|
if shutdown_event.is_set():
|
|
should_stop = True
|
|
break
|
|
|
|
source = future_to_source[future]
|
|
try:
|
|
results = future.result()
|
|
|
|
# Mark file as processed in continuous download mode
|
|
if mode == 'download_only' and settings.get('directory_scan_mode') == 'continuous':
|
|
state_manager.mark_file_as_processed(source)
|
|
|
|
if settings.get('mark_processed_files'):
|
|
try:
|
|
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
|
new_path = source.parent / f"{source.name}.{timestamp}.processed"
|
|
source.rename(new_path)
|
|
logger.info(f"Marked '{source.name}' as processed by renaming to '{new_path.name}'")
|
|
except (IOError, OSError) as e:
|
|
logger.error(f"Failed to rename processed file '{source.name}': {e}")
|
|
|
|
for result in results:
|
|
if not result['success']:
|
|
s_conditions = policy.get('stop_conditions', {})
|
|
is_cumulative_403_active = s_conditions.get('on_cumulative_403', {}).get('max_errors')
|
|
if s_conditions.get('on_failure') or \
|
|
(s_conditions.get('on_http_403') and not is_cumulative_403_active and result['error_type'] == 'HTTP 403') or \
|
|
(s_conditions.get('on_timeout') and result['error_type'] == 'Timeout'):
|
|
logger.info(f"!!! STOP CONDITION MET: Immediate stop on failure '{result['error_type']}' for {get_display_name(source)}. Shutting down all workers. !!!")
|
|
should_stop = True
|
|
break
|
|
except concurrent.futures.CancelledError:
|
|
logger.info(f"Task for {get_display_name(source)} was cancelled during shutdown.")
|
|
event = {
|
|
'type': 'fetch' if mode != 'download_only' else 'download',
|
|
'path': str(source),
|
|
'success': False,
|
|
'error_type': 'Cancelled',
|
|
'details': 'Task cancelled during shutdown.'
|
|
}
|
|
state_manager.log_event(event)
|
|
except Exception as exc:
|
|
logger.error(f'{get_display_name(source)} generated an exception: {exc}')
|
|
|
|
if should_stop:
|
|
break
|
|
|
|
# Check for cumulative error rate stop conditions
|
|
s_conditions = policy.get('stop_conditions', {})
|
|
error_rate_policy = s_conditions.get('on_error_rate')
|
|
if error_rate_policy and not should_stop:
|
|
max_errors = error_rate_policy.get('max_errors')
|
|
per_minutes = error_rate_policy.get('per_minutes')
|
|
if max_errors and per_minutes:
|
|
error_count = state_manager.check_cumulative_error_rate(max_errors, per_minutes)
|
|
if error_count > 0:
|
|
logger.info(f"!!! STOP CONDITION MET: Error rate exceeded: {error_count} errors in the last {per_minutes} minute(s). Shutting down. !!!")
|
|
should_stop = True
|
|
|
|
cumulative_403_policy = s_conditions.get('on_cumulative_403')
|
|
if cumulative_403_policy and not should_stop:
|
|
max_errors = cumulative_403_policy.get('max_errors')
|
|
per_minutes = cumulative_403_policy.get('per_minutes')
|
|
if max_errors and per_minutes:
|
|
error_count = state_manager.check_cumulative_error_rate(max_errors, per_minutes, error_type='HTTP 403')
|
|
if error_count > 0:
|
|
logger.info(f"!!! STOP CONDITION MET: Cumulative 403 error rate exceeded: {error_count} errors in the last {per_minutes} minute(s). Shutting down. !!!")
|
|
should_stop = True
|
|
|
|
quality_degradation_policy = s_conditions.get('on_quality_degradation')
|
|
if quality_degradation_policy and not should_stop:
|
|
max_triggers = quality_degradation_policy.get('max_triggers')
|
|
per_minutes = quality_degradation_policy.get('per_minutes')
|
|
if max_triggers and per_minutes:
|
|
trigger_count = state_manager.check_quality_degradation_rate(max_triggers, per_minutes)
|
|
if trigger_count > 0:
|
|
logger.info(f"!!! STOP CONDITION MET: Quality degradation triggered {trigger_count} times in the last {per_minutes} minute(s). Shutting down. !!!")
|
|
should_stop = True
|
|
|
|
if should_stop:
|
|
break
|
|
|
|
# Check for duration limit after each task completes
|
|
if duration_seconds and (time.time() - start_time) > duration_seconds:
|
|
logger.info("Reached duration limit. Cancelling remaining tasks.")
|
|
should_stop = True
|
|
|
|
if should_stop and pending_futures:
|
|
logger.info(f"Cancelling {len(pending_futures)} outstanding task(s).")
|
|
for future in pending_futures:
|
|
future.cancel()
|
|
|
|
if should_stop: break
|
|
|
|
if max_cycles > 0 and cycles >= max_cycles:
|
|
break
|
|
|
|
logger.info("Cycle complete.")
|
|
|
|
except KeyboardInterrupt:
|
|
logger.info("\nForceful shutdown requested...")
|
|
finally:
|
|
state_manager.print_summary(policy)
|
|
state_manager.close()
|
|
|
|
return 0
|