#!/usr/bin/env python3 """ Tool to stress-test video format download URLs from an info.json. """ import argparse import collections import concurrent.futures import json import logging import os import random import re import shlex import signal import subprocess import sys import threading import time from datetime import datetime, timezone from pathlib import Path from urllib.parse import urlparse, parse_qs # Configure logging logger = logging.getLogger('stress_formats_tool') def get_video_id(url: str) -> str: """Extracts a YouTube video ID from a URL.""" # For URLs like https://www.youtube.com/watch?v=VIDEO_ID match = re.search(r"v=([0-9A-Za-z_-]{11})", url) if match: return match.group(1) # For URLs like https://youtu.be/VIDEO_ID match = re.search(r"youtu\.be\/([0-9A-Za-z_-]{11})", url) if match: return match.group(1) # For plain video IDs if re.fullmatch(r'[0-9A-Za-z_-]{11}', url): return url return "unknown_video_id" def get_display_name(path_or_url): """Returns a clean name for logging, either a filename or a video ID.""" if isinstance(path_or_url, Path): return path_or_url.name path_str = str(path_or_url) video_id = get_video_id(path_str) if video_id != "unknown_video_id": return video_id # Fallback for file paths as strings or weird URLs return Path(path_str).name def format_size(b): """Format size in bytes to human-readable string.""" if b is None: return 'N/A' if b < 1024: return f"{b}B" elif b < 1024**2: return f"{b/1024:.2f}KiB" elif b < 1024**3: return f"{b/1024**2:.2f}MiB" else: return f"{b/1024**3:.2f}GiB" class StatsTracker: """Tracks and reports statistics for the stress test.""" def __init__(self, stats_file=None): self.events = [] self.start_time = time.time() self.lock = threading.Lock() self.stats_file_path = stats_file self.stats_file_handle = None if self.stats_file_path: try: self.stats_file_handle = open(self.stats_file_path, 'a', encoding='utf-8') except IOError as e: logger.error(f"Could not open stats file {self.stats_file_path}: {e}") def log_event(self, event_data): """Log a download attempt event.""" with self.lock: event_data['timestamp'] = datetime.now().isoformat() self.events.append(event_data) if self.stats_file_handle: self.stats_file_handle.write(json.dumps(event_data) + '\n') self.stats_file_handle.flush() def close(self): """Close the stats file.""" if self.stats_file_handle: self.stats_file_handle.close() def print_summary(self): """Print a summary of the test run.""" with self.lock: if not self.events: logger.info("No events were recorded.") return duration = time.time() - self.start_time # Separate events by type fetch_events = [e for e in self.events if e.get('type') == 'fetch'] download_events = [e for e in self.events if e.get('type') != 'fetch'] # Default to download for old events logger.info("\n--- Test Summary ---") logger.info(f"Total duration: {duration:.2f} seconds") if fetch_events: total_fetches = len(fetch_events) successful_fetches = sum(1 for e in fetch_events if e['success']) failed_fetches = total_fetches - successful_fetches logger.info("\n--- Fetch Summary ---") logger.info(f"Total info.json fetch attempts: {total_fetches}") logger.info(f" - Successful: {successful_fetches}") logger.info(f" - Failed: {failed_fetches}") if total_fetches > 0: success_rate = (successful_fetches / total_fetches) * 100 logger.info(f"Success rate: {success_rate:.2f}%") if failed_fetches > 0: error_counts = collections.Counter(e.get('error_type', 'Unknown') for e in fetch_events if not e['success']) logger.info("Failure breakdown:") for error_type, count in sorted(error_counts.items()): logger.info(f" - {error_type}: {count}") if download_events: total_attempts = len(download_events) successes = sum(1 for e in download_events if e['success']) failures = total_attempts - successes logger.info("\n--- Download Summary ---") logger.info(f"Total download attempts: {total_attempts}") logger.info(f" - Successful: {successes}") logger.info(f" - Failed: {failures}") if total_attempts > 0: success_rate = (successes / total_attempts) * 100 logger.info(f"Success rate: {success_rate:.2f}%") if duration > 1 and total_attempts > 0: dpm = (total_attempts / duration) * 60 logger.info(f"Attempt rate: {dpm:.2f} attempts/minute") # Download volume stats total_bytes = sum(e.get('downloaded_bytes', 0) for e in download_events if e['success']) if total_bytes > 0: logger.info(f"Total data downloaded: {format_size(total_bytes)}") if duration > 1: bytes_per_second = total_bytes / duration gb_per_hour = (bytes_per_second * 3600) / (1024**3) gb_per_day = gb_per_hour * 24 logger.info(f"Download rate: {gb_per_hour:.3f} GB/hour ({gb_per_day:.3f} GB/day)") if failures > 0: error_counts = collections.Counter(e.get('error_type', 'Unknown') for e in download_events if not e['success']) logger.info("Failure breakdown:") for error_type, count in sorted(error_counts.items()): logger.info(f" - {error_type}: {count}") logger.info("--------------------") def print_banner(args, info_jsons=None, urls=None): """Prints a summary of the test configuration.""" logger.info("--- Stress Test Configuration ---") if args.urls_file: if args.fetch_only: logger.info(f"Mode: Fetch-only. Generating info.json files from URL list.") else: logger.info(f"Mode: Full-stack test from URL list.") logger.info(f"URL file: {args.urls_file} ({len(urls)} URLs)") logger.info(f"Workers: {args.workers}") logger.info(f"Info.json command: {args.info_json_gen_cmd}") if args.info_json_gen_cmd_alt and args.alt_cmd_every_n > 0: logger.info(f"Alternate command (every {args.alt_cmd_every_n} URLs): {args.info_json_gen_cmd_alt}") if args.profile_prefix: if args.profile_pool: logger.info(f"Profile mode: Pool of {args.profile_pool} (prefix: {args.profile_prefix})") elif args.profile_per_request: logger.info(f"Profile mode: New profile per request (prefix: {args.profile_prefix})") else: # info-json-files logger.info(f"Mode: Download-only from static info.json files.") if info_jsons: logger.info(f"Files: {', '.join(str(p.name) for p in info_jsons.keys())}") logger.info(f"Workers: {args.workers}") logger.info(f"Format selection: {args.format}") logger.info(f"Sleep between cycles: {args.sleep}s") if args.sleep_formats > 0: logger.info(f"Sleep between formats: {args.sleep_formats}s") if args.duration > 0: logger.info(f"Test duration: {args.duration} minutes") if args.max_attempts > 0: logger.info(f"Max cycles: {args.max_attempts}") logger.info(f"Stop on failure: {args.stop_on_failure}") if args.stop_on_403: logger.info(f"Stop on 403 error: True") if args.stop_on_timeout: logger.info(f"Stop on timeout: True") logger.info(f"Stats file: {args.stats_file}") if args.stats_interval > 0: logger.info(f"Periodic stats interval: {args.stats_interval}s") if args.format_download_args: logger.info(f"Extra download args: {args.format_download_args}") logger.info("Download volume: Tracking total data downloaded") logger.info("---------------------------------") def add_stress_formats_parser(subparsers): """Add the parser for the 'stress-formats' command.""" parser = subparsers.add_parser( 'stress-formats', description="A simple, command-line driven stress-testing tool for basic scenarios.\nAll options are configured via flags. For more complex scenarios and advanced\nfeatures like rate limiting and client rotation, use the 'stress-policy' command.", formatter_class=argparse.RawTextHelpFormatter, help='Run simple, flag-driven stress tests.', epilog=""" Usage examples: # Test a format from a static info.json every 60 seconds ytops-client stress-formats --info-json-files my_video.json -f 18 --sleep 60 # Test with multiple info.json files in parallel using 4 workers ytops-client stress-formats --info-json-files "file1.json,file2.json,file3.json" -f 18 --sleep 60 --workers 4 # Fetch a new info.json for a URL and test a format every 5 minutes ytops-client stress-formats --urls-file urls.txt --info-json-gen-cmd "bin/ytops-client get-info {url}" -f "18" --sleep 300 # Run the test for exactly 10 cycles, continuing on failure ytops-client stress-formats --info-json-files my_video.json -f 18 --sleep 10 --max-attempts 10 --no-stop-on-failure """ ) source_group = parser.add_mutually_exclusive_group(required=True) source_group.add_argument('--info-json-files', help='Comma-separated paths to static info.json files to use for testing.') source_group.add_argument('--urls-file', help='Path to a file with URLs/IDs to test. Can be a text file (one per line) or a JSON array of strings.') parser.add_argument('-f', '--format', help='The format selection string. Can be a comma-separated list of IDs (e.g., "18,137"), "all", "random:X%%" (e.g., "random:10%%"), or "random_from:ID1,ID2,..." to pick one from a list. Required unless --fetch-only is used.') parser.add_argument('--sleep', type=int, default=60, help='Seconds to wait between batches of download attempts. Default: 60.') parser.add_argument('--sleep-formats', type=int, default=0, help='Seconds to wait between format downloads within a single file/cycle. Default: 0.') parser.add_argument('--max-attempts', type=int, default=0, help='Maximum number of test cycles. 0 means run indefinitely. Default: 0.') parser.add_argument('--duration', type=int, default=0, help='Total duration to run the test in minutes. 0 means run indefinitely (or until max-attempts is reached). Default: 0.') parser.add_argument('--stop-on-failure', action='store_true', help='Stop the test immediately after the first download failure.') parser.add_argument('--no-stop-on-failure', dest='stop_on_failure', action='store_false', help='Continue testing even after a download failure. (Default)') parser.set_defaults(stop_on_failure=False) parser.add_argument('--stop-on-403', action='store_true', help='Stop the test immediately after a 403 Forbidden error.') parser.add_argument('--stop-on-timeout', action='store_true', help='Stop the test immediately after a read timeout error.') parser.add_argument('--fetch-only', action='store_true', help='When used with --urls-file, only fetch and save info.json files without performing download tests.') parser.add_argument('--workers', type=int, default=1, help='Number of parallel workers for multi-file mode. Default: 1.') parser.add_argument('--stats-file', default='stress_test_stats.jsonl', help='File to log statistics for each attempt. Default: stress_test_stats.jsonl') parser.add_argument('--stats-interval', type=int, default=0, help='Interval in seconds to print stats summary periodically. 0 disables. Default: 0.') # Arguments for info.json generation parser.add_argument('--info-json-gen-cmd', help='Command template to generate info.json. Use {url}, {worker_id}, {cycle}, and {profile} as placeholders. Required with --urls-file.') parser.add_argument('--info-json-gen-cmd-alt', help='Alternate command template for info.json generation.') parser.add_argument('--alt-cmd-every-n', type=int, default=0, help='Use the alternate command for every N-th URL (e.g., N=3 means URLs 3, 6, 9...). Requires --info-json-gen-cmd-alt.') # Profile generation options profile_group = parser.add_argument_group('Profile Generation Options (for --urls-file mode)') profile_group.add_argument('--profile-prefix', help='Base name for generated profile IDs (e.g., "test_user"). Used with --profile-pool or --profile-per-request.') profile_group.add_argument('--profile-pool', type=int, metavar='N', help='Use a pool of N profiles. Profile ID will be {prefix}_{worker_id %% N}. Requires --profile-prefix.') profile_group.add_argument('--profile-per-request', action='store_true', help='Generate a new unique profile ID for each request. Profile ID will be {prefix}_{timestamp}_{worker_id}. Requires --profile-prefix.') # Arguments to pass to format_download.py parser.add_argument('--format-download-args', nargs='+', help='Additional arguments to pass to the download tool. E.g., --proxy-rename s/old/new/ --cleanup') parser.add_argument('--verbose', action='store_true', help='Enable verbose output.') return parser def run_command(cmd, input_data=None): """Runs a command, captures its output, and returns status.""" logger.debug(f"Running command: {' '.join(cmd)}") try: process = subprocess.Popen( cmd, stdin=subprocess.PIPE if input_data else None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, encoding='utf-8' ) stdout, stderr = process.communicate(input=input_data) return process.returncode, stdout, stderr except FileNotFoundError: logger.error(f"Command not found: {cmd[0]}. Make sure it's in your PATH.") return -1, "", f"Command not found: {cmd[0]}" except Exception as e: logger.error(f"An error occurred while running command: {' '.join(cmd)}. Error: {e}") return -1, "", str(e) def run_download_worker(info_json_path, info_json_content, format_to_download, args): """ Performs a single download attempt. Designed to be run in a worker thread. """ # 1. Attempt download download_cmd = [ sys.executable, '-m', 'ytops_client.cli', 'download', '-f', format_to_download ] if args.format_download_args: # with nargs='+', this is a list. # If it's one item, it might be a single quoted string of args that needs splitting. if len(args.format_download_args) == 1: download_cmd.extend(shlex.split(args.format_download_args[0])) else: # multiple items, assume they are already split by shell download_cmd.extend(args.format_download_args) display_name = get_display_name(info_json_path) logger.info(f"[{display_name} @ {format_to_download}] Kicking off download process...") retcode, stdout, stderr = run_command(download_cmd, input_data=info_json_content) # 2. Check result is_403_error = "HTTP Error 403" in stderr is_timeout_error = "Read timed out" in stderr result = { 'type': 'download', 'path': str(info_json_path), 'format': format_to_download, 'success': retcode == 0, 'error_type': None, 'details': '', 'downloaded_bytes': 0 } if retcode == 0: # Success downloaded_filepath = '' # The filename is the last non-empty line of stdout that doesn't look like a progress bar lines = stdout.splitlines() for line in reversed(lines): if line and not line.strip().startswith('['): downloaded_filepath = line.strip() break details_str = "OK" if downloaded_filepath: details_str = f"Downloaded: {Path(downloaded_filepath).name}" # Parse download size from stderr size_in_bytes = 0 size_match = re.search(r'\[download\]\s+100%\s+of\s+~?([0-9.]+)(B|KiB|MiB|GiB)', stderr) if size_match: value = float(size_match.group(1)) unit = size_match.group(2) multipliers = {"B": 1, "KiB": 1024, "MiB": 1024**2, "GiB": 1024**3} size_in_bytes = int(value * multipliers.get(unit, 1)) result['downloaded_bytes'] = size_in_bytes details_str += f" ({size_match.group(1)}{unit})" result['details'] = details_str else: # Failure # Try to get the most relevant error line error_lines = [line for line in stderr.strip().split('\n') if 'ERROR:' in line] if error_lines: result['details'] = error_lines[-1] else: # If no "ERROR:" line, use the last few lines of stderr for context. last_lines = stderr.strip().split('\n')[-3:] # Get up to last 3 lines result['details'] = ' | '.join(line.strip() for line in last_lines if line.strip()) if not result['details']: result['details'] = "Unknown error (stderr was empty)" if is_403_error: result['error_type'] = 'HTTP 403' elif is_timeout_error: result['error_type'] = 'Timeout' else: result['error_type'] = f'Exit Code {retcode}' return result def process_info_json_cycle(path, content, args, stats): """ Processes one info.json file for one cycle, downloading selected formats sequentially. Logs events and returns a list of results. """ results = [] should_stop_file = False display_name = get_display_name(path) # Determine formats to test based on the info.json content try: info_data = json.loads(content) available_formats = info_data.get('formats', []) if not available_formats: logger.warning(f"[{display_name}] No formats found in info.json. Skipping.") return [] available_format_ids = [f['format_id'] for f in available_formats] formats_to_test = [] format_selection_mode = args.format.lower() if format_selection_mode == 'all': formats_to_test = available_format_ids logger.info(f"[{display_name}] Testing all {len(formats_to_test)} available formats.") elif format_selection_mode.startswith('random:'): try: percent_str = format_selection_mode.split(':')[1].rstrip('%') percent = float(percent_str) if not (0 < percent <= 100): raise ValueError("Percentage must be between 0 and 100.") count = max(1, int(len(available_format_ids) * (percent / 100.0))) formats_to_test = random.sample(available_format_ids, k=count) logger.info(f"[{display_name}] Randomly selected {len(formats_to_test)} formats ({percent}%) from all available to test: {', '.join(formats_to_test)}") except (ValueError, IndexError) as e: logger.error(f"[{display_name}] Invalid random format selection '{args.format}': {e}. Skipping.") return [] elif format_selection_mode.startswith('random_from:'): try: choices_str = format_selection_mode.split(':', 1)[1] if not choices_str: raise ValueError("No formats provided after 'random_from:'.") format_choices = [f.strip() for f in choices_str.split(',') if f.strip()] # Filter the choices to only those available in the current info.json valid_choices = [f for f in format_choices if f in available_format_ids] if not valid_choices: logger.warning(f"[{display_name}] None of the requested formats for random selection ({', '.join(format_choices)}) are available. Skipping.") return [] formats_to_test = [random.choice(valid_choices)] logger.info(f"[{display_name}] Randomly selected 1 format from your list to test: {formats_to_test[0]}") except (ValueError, IndexError) as e: logger.error(f"[{display_name}] Invalid random_from format selection '{args.format}': {e}. Skipping.") return [] else: # Standard comma-separated list requested_formats = [f.strip() for f in args.format.split(',') if f.strip()] formats_to_test = [] for req_fmt in requested_formats: # Check for exact match first if req_fmt in available_format_ids: formats_to_test.append(req_fmt) continue # If no exact match, check for formats that start with this ID + '-' # e.g., req_fmt '140' should match '140-0' prefix_match = f"{req_fmt}-" first_match = next((af for af in available_format_ids if af.startswith(prefix_match)), None) if first_match: logger.info(f"[{display_name}] Requested format '{req_fmt}' not found. Using first available match: '{first_match}'.") formats_to_test.append(first_match) else: # This could be a complex selector like 'bestvideo' or '299/298', so keep it. if req_fmt not in available_format_ids: logger.warning(f"[{display_name}] Requested format '{req_fmt}' not found in available formats.") formats_to_test.append(req_fmt) except json.JSONDecodeError: logger.error(f"[{display_name}] Failed to parse info.json. Skipping.") return [] for i, format_id in enumerate(formats_to_test): if should_stop_file: break # Check if the format URL is expired before attempting to download format_details = next((f for f in available_formats if f.get('format_id') == format_id), None) if format_details and 'url' in format_details: parsed_url = urlparse(format_details['url']) query_params = parse_qs(parsed_url.query) expire_ts_str = query_params.get('expire', [None])[0] if expire_ts_str and expire_ts_str.isdigit(): expire_ts = int(expire_ts_str) if expire_ts < time.time(): logger.warning(f"[{display_name}] Skipping format '{format_id}' because its URL is expired.") result = { 'type': 'download', 'path': str(path), 'format': format_id, 'success': True, 'error_type': 'Skipped', 'details': 'Download URL is expired', 'downloaded_bytes': 0 } stats.log_event(result) results.append(result) continue # Move to the next format result = run_download_worker(path, content, format_id, args) stats.log_event(result) results.append(result) status = "SUCCESS" if result['success'] else f"FAILURE ({result['error_type']})" logger.info(f"Result for {display_name} (format {format_id}): {status} - {result.get('details', 'OK')}") if not result['success']: # This flag stops processing more formats for THIS file in this cycle # The main loop will decide if all cycles should stop. if args.stop_on_failure or \ (args.stop_on_403 and result['error_type'] == 'HTTP 403') or \ (args.stop_on_timeout and result['error_type'] == 'Timeout'): logger.info(f"Stopping further format tests for {display_name} in this cycle due to failure.") should_stop_file = True # Sleep between formats if needed if args.sleep_formats > 0 and i < len(formats_to_test) - 1: logger.info(f"Sleeping for {args.sleep_formats}s before next format for {display_name}...") time.sleep(args.sleep_formats) return results def main_stress_formats(args): """Main logic for the 'stress-formats' command.""" # The --format argument is required unless we are only fetching info.json files. if not args.fetch_only and not args.format: logger.error("Error: argument -f/--format is required when not using --fetch-only.") return 1 if (args.profile_pool or args.profile_per_request) and not args.profile_prefix: logger.error("--profile-prefix is required when using --profile-pool or --profile-per-request.") return 1 if args.urls_file and args.fetch_only and not args.info_json_gen_cmd: logger.error("--info-json-gen-cmd is required when using --urls-file with --fetch-only.") return 1 if args.verbose: logging.getLogger().setLevel(logging.DEBUG) else: # Make the default logger more concise for test output for handler in logging.root.handlers: handler.setFormatter(logging.Formatter('%(asctime)s - %(message)s', datefmt='%H:%M:%S')) stats = StatsTracker(args.stats_file) start_time = time.time() duration_seconds = args.duration * 60 if args.duration > 0 else 0 # --- Load sources --- info_jsons = {} urls = [] if args.info_json_files: info_json_files = [Path(p.strip()) for p in args.info_json_files.split(',')] for file_path in info_json_files: if not file_path.is_file(): logger.error(f"Info.json file not found: {file_path}") continue try: with open(file_path, 'r', encoding='utf-8') as f: info_jsons[file_path] = f.read() except (IOError, json.JSONDecodeError) as e: logger.error(f"Failed to read or parse {file_path}: {e}") if not info_jsons: logger.error("No valid info.json files to process. Exiting.") return 1 logger.info(f"Loaded {len(info_jsons)} info.json file(s).") print_banner(args, info_jsons=info_jsons) elif args.urls_file: if not args.info_json_gen_cmd: logger.error("--info-json-gen-cmd is required when using --urls-file.") return 1 try: with open(args.urls_file, 'r', encoding='utf-8') as f: content = f.read() # Try parsing as JSON array first try: data = json.loads(content) if isinstance(data, list) and all(isinstance(item, str) for item in data): urls = data logger.info(f"Loaded {len(urls)} URLs/IDs from JSON array in {args.urls_file}.") else: # Valid JSON, but not a list of strings. Treat as error to avoid confusion. logger.error(f"URL file '{args.urls_file}' is valid JSON but not an array of strings.") return 1 except json.JSONDecodeError: # Fallback to line-by-line parsing for plain text files urls = [line.strip() for line in content.splitlines() if line.strip()] logger.info(f"Loaded {len(urls)} URLs/IDs from text file {args.urls_file}.") if not urls: logger.error(f"URL file '{args.urls_file}' is empty or contains no valid URLs/IDs.") return 1 except IOError as e: logger.error(f"Failed to read URL file {args.urls_file}: {e}") return 1 # Clean up URLs/IDs which might have extra quotes, commas, or brackets from copy-pasting cleaned_urls = [] for url in urls: # Strip whitespace, then trailing comma, then surrounding junk, then whitespace again cleaned_url = url.strip().rstrip(',').strip().strip('\'"[]').strip() if cleaned_url: cleaned_urls.append(cleaned_url) if len(cleaned_urls) != len(urls): logger.info(f"Cleaned URL list, removed {len(urls) - len(cleaned_urls)} empty or invalid entries.") urls = cleaned_urls if not urls: logger.error("URL list is empty after cleaning. Exiting.") return 1 print_banner(args, urls=urls) # --- Main test loop --- cycles = 0 last_stats_print_time = time.time() try: # --- Worker function for URL mode --- def process_url_task(url, url_index, cycle_num): """Worker to generate info.json for a URL and then test formats.""" # 1. Generate profile name if configured profile_name = None if args.profile_prefix: if args.profile_pool: profile_name = f"{args.profile_prefix}_{url_index % args.profile_pool}" elif args.profile_per_request: timestamp = datetime.now().strftime('%Y%m%d%H%M%S') profile_name = f"{args.profile_prefix}_{timestamp}_{url_index}" # 2. Select and format the generation command gen_cmd_template = args.info_json_gen_cmd if args.alt_cmd_every_n > 0 and args.info_json_gen_cmd_alt and (url_index + 1) % args.alt_cmd_every_n == 0: gen_cmd_template = args.info_json_gen_cmd_alt logger.info(f"Using alternate command for URL #{url_index + 1}: {url}") try: # shlex.split handles quoted arguments in the template video_id = get_video_id(url) gen_cmd = [] template_args = shlex.split(gen_cmd_template) # If the video ID could be mistaken for an option, and it appears to be # a positional argument, insert '--' to prevent misinterpretation. if video_id.startswith('-'): try: # Heuristic: if {url} is the last token, it's likely positional. if template_args and template_args[-1] == '{url}': template_args.insert(-1, '--') except (ValueError, IndexError): pass # {url} not found or list is empty. for arg in template_args: # Replace placeholders formatted_arg = arg.replace('{url}', video_id) \ .replace('{worker_id}', str(url_index)) \ .replace('{cycle}', str(cycle_num)) if profile_name: formatted_arg = formatted_arg.replace('{profile}', profile_name) gen_cmd.append(formatted_arg) # Pass verbose flag through if set if args.verbose and 'get_info_json_client.py' in gen_cmd_template and '--verbose' not in gen_cmd_template: gen_cmd.append('--verbose') except Exception as e: logger.error(f"Failed to format --info-json-gen-cmd: {e}") stats.log_event({'path': url, 'success': False, 'error_type': 'BadGenCmd', 'details': 'Cmd format error'}) return [] # 3. Run command to get info.json log_msg = f"[{url}] Generating info.json" if profile_name: log_msg += f" with profile '{profile_name}'" log_msg += "..." logger.info(log_msg) retcode, stdout, stderr = run_command(gen_cmd) if retcode != 0: error_msg = stderr.strip().split('\n')[-1] logger.error(f"[{url}] Failed to generate info.json: {error_msg}") event = {'type': 'fetch', 'path': url, 'success': False, 'error_type': 'GetInfoJsonFail', 'details': error_msg} stats.log_event(event) return [] # Return empty list, as no formats were tested # Handle --fetch-only if args.fetch_only: logger.info(f"[{url}] Successfully fetched info.json. Skipping download due to --fetch-only.") event = {'type': 'fetch', 'path': url, 'success': True, 'details': 'OK'} stats.log_event(event) return [] # Return empty list, indicating no downloads to check for failure # 4. Pass to the format processing function return process_info_json_cycle(url, stdout, args, stats) while True: if duration_seconds and (time.time() - start_time) > duration_seconds: logger.info(f"Reached duration limit of {args.duration} minutes. Stopping.") break cycles += 1 if args.max_attempts > 0 and cycles > args.max_attempts: logger.info(f"Reached max cycles ({args.max_attempts}). Stopping.") break logger.info(f"--- Cycle #{cycles} ---") with concurrent.futures.ThreadPoolExecutor(max_workers=args.workers) as executor: future_to_identifier = {} if args.info_json_files: future_to_identifier = { executor.submit(process_info_json_cycle, path, content, args, stats): path for path, content in info_jsons.items() } elif args.urls_file: future_to_identifier = { executor.submit(process_url_task, url, i, cycles): url for i, url in enumerate(urls) } should_stop = False # Use a set of futures that we can modify while iterating futures = set(future_to_identifier.keys()) while futures and not should_stop: # Wait for the next future to complete done, futures = concurrent.futures.wait(futures, return_when=concurrent.futures.FIRST_COMPLETED) for future in done: identifier = future_to_identifier[future] identifier_name = get_display_name(identifier) try: results = future.result() # Check if any result from this file triggers a global stop for result in results: if not result['success']: if args.stop_on_failure: logger.info(f"Failure on {identifier_name} (format {result['format']}). Shutting down all workers due to --stop-on-failure.") should_stop = True elif args.stop_on_403 and result['error_type'] == 'HTTP 403': logger.info(f"403 error on {identifier_name} (format {result['format']}). Shutting down all workers due to --stop-on-403.") should_stop = True elif args.stop_on_timeout and result['error_type'] == 'Timeout': logger.info(f"Timeout on {identifier_name} (format {result['format']}). Shutting down all workers due to --stop-on-timeout.") should_stop = True except Exception as exc: logger.error(f'{identifier_name} generated an exception: {exc}') stats.log_event({'path': str(identifier), 'success': False, 'error_type': 'Exception', 'details': str(exc)}) if should_stop: break # Stop processing results from 'done' set # Check for duration limit after each batch of tasks completes if duration_seconds and (time.time() - start_time) > duration_seconds: logger.info(f"Reached duration limit of {args.duration} minutes. Cancelling remaining tasks.") should_stop = True # If the loop was exited, cancel any remaining tasks if should_stop and futures: logger.info(f"Cancelling {len(futures)} outstanding task(s).") for future in futures: future.cancel() if should_stop: break if args.stats_interval > 0 and (time.time() - last_stats_print_time) >= args.stats_interval: stats.print_summary() last_stats_print_time = time.time() if args.max_attempts > 0 and cycles >= args.max_attempts: break logger.info(f"Cycle complete. Sleeping for {args.sleep} seconds...") # Interruptible sleep that respects the total test duration sleep_end_time = time.time() + args.sleep should_stop_after_sleep = False while time.time() < sleep_end_time: if duration_seconds and (time.time() - start_time) >= duration_seconds: logger.info(f"Reached duration limit of {args.duration} minutes during sleep. Stopping.") should_stop_after_sleep = True break time.sleep(1) # Check every second if should_stop_after_sleep: break except KeyboardInterrupt: logger.info("\nCtrl+C received, shutting down...") finally: stats.print_summary() stats.close() return 0 if not any(not e['success'] for e in stats.events) else 1