#!/usr/bin/env python3 """ CLI tool to generate granular download task files from a directory of info.json files. """ import argparse import json import logging import os import re import signal import sys import time from pathlib import Path # Configure logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) # Graceful shutdown handler shutdown_event = False def handle_shutdown(sig, frame): global shutdown_event logger.info("Shutdown signal received. Stopping task generator...") shutdown_event = True def sanitize_format_for_filename(format_str: str) -> str: """Sanitizes a format selector string to be filesystem-friendly.""" # Replace common problematic characters with underscores sanitized = re.sub(r'[\\/+:\[\]\s]', '_', format_str) # Remove any trailing characters that might be problematic sanitized = sanitized.strip('._-') return sanitized def add_task_generator_parser(subparsers): """Adds the parser for the 'task-generator' command.""" parser = subparsers.add_parser( 'task-generator', description="Generate granular download task files from info.jsons.", formatter_class=argparse.RawTextHelpFormatter, help="Generate granular download task files." ) # All functionality is under subcommands for extensibility. generate_subparsers = parser.add_subparsers(dest='task_generator_command', help='Action to perform', required=True) gen_parser = generate_subparsers.add_parser( 'generate', help='Generate task files from a source directory.', description='Reads info.json files from a source directory and creates one task file per format in an output directory.' ) gen_parser.add_argument('--source-dir', required=True, help='Directory containing the source info.json files.') gen_parser.add_argument('--output-dir', required=True, help='Directory where the generated task files will be saved.') gen_parser.add_argument('--formats', required=True, help='A comma-separated list of format IDs or selectors to generate tasks for (e.g., "18,140,bestvideo").') gen_parser.add_argument('--live', action='store_true', help='Run continuously, watching the source directory for new files.') gen_parser.add_argument('--interval-seconds', type=int, default=10, help='When in --live mode, how often to scan for new files.') gen_parser.add_argument('--verbose', action='store_true', help='Enable verbose logging.') reset_parser = generate_subparsers.add_parser( 'reset', help='Reset processed source files.', description='Finds all *.processed files in the source directory and renames them back to *.json to allow re-generation.' ) reset_parser.add_argument('--source-dir', required=True, help='Directory containing the source info.json files to reset.') reset_parser.add_argument('--verbose', action='store_true', help='Enable verbose logging.') def _main_task_generator_reset(args): """Main logic for the 'reset' command.""" source_dir = Path(args.source_dir) if not source_dir.is_dir(): logger.error(f"Source directory does not exist or is not a directory: {source_dir}") return 1 logger.info(f"Scanning for *.processed and *.LOCKED.* files in '{source_dir}' (recursively) to reset...") # Use rglob for recursive search processed_files = list(source_dir.rglob('*.json.processed')) locked_files = list(source_dir.rglob('*.json.LOCKED.*')) files_to_reset = processed_files + locked_files if not files_to_reset: logger.info("No processed or locked files found to reset.") return 0 reset_count = 0 for file_to_reset in files_to_reset: original_path = None if file_to_reset.name.endswith('.processed'): # Handles cases like file.json.processed original_path_str = str(file_to_reset).removesuffix('.processed') original_path = Path(original_path_str) elif '.LOCKED.' in file_to_reset.name: # Handles cases like file.json.LOCKED.0 original_path_str = str(file_to_reset).split('.LOCKED.')[0] original_path = Path(original_path_str) if original_path: try: if original_path.exists(): logger.warning(f"Original file '{original_path.name}' already exists. Deleting '{file_to_reset.name}' instead of renaming.") file_to_reset.unlink() else: file_to_reset.rename(original_path) logger.debug(f"Reset '{file_to_reset.name}' to '{original_path.name}'") reset_count += 1 except (IOError, OSError) as e: logger.error(f"Failed to reset '{file_to_reset.name}': {e}") else: logger.warning(f"Could not determine original filename for '{file_to_reset.name}'. Skipping.") logger.info(f"Successfully reset {reset_count} file(s).") return 0 def main_task_generator(args): """Main logic for the 'task-generator' tool.""" if args.task_generator_command == 'generate': return _main_task_generator_generate(args) elif args.task_generator_command == 'reset': return _main_task_generator_reset(args) return 1 def _generate_tasks_for_file(source_file, output_dir, formats_to_generate): """Helper function to generate task files for a single source info.json.""" try: with open(source_file, 'r', encoding='utf-8') as f: info_json_content = json.load(f) except (IOError, json.JSONDecodeError) as e: logger.warning(f"Skipping file '{source_file.name}' due to read/parse error: {e}") return 0 except Exception as e: logger.error(f"An unexpected error occurred while processing '{source_file.name}': {e}") return 0 tasks_generated_this_file = 0 try: # Use metadata to create a profile-specific subdirectory for better organization. profile_name_from_meta = info_json_content.get('_ytops_metadata', {}).get('profile_name') final_output_dir = output_dir if profile_name_from_meta: final_output_dir = output_dir / profile_name_from_meta # Ensure subdirectory exists. This is done once per source file. try: final_output_dir.mkdir(parents=True, exist_ok=True) except OSError as e: logger.error(f"Could not create profile subdirectory '{final_output_dir}': {e}. Skipping tasks for this source file.") return 0 for format_str in formats_to_generate: task_data = info_json_content.copy() # Add the target format to the task data itself. This makes the task file self-contained. task_data['_ytops_download_format'] = format_str # Create a unique filename for the task original_stem = source_file.stem safe_format_str = sanitize_format_for_filename(format_str) task_filename = f"{original_stem}-format-{safe_format_str}.json" output_path = final_output_dir / task_filename # Check if this specific task file already exists to avoid re-writing if output_path.exists(): logger.debug(f"Task file already exists, skipping generation: {output_path}") continue with open(output_path, 'w', encoding='utf-8') as f: json.dump(task_data, f, indent=2) logger.debug(f"Generated task file: {output_path}") tasks_generated_this_file += 1 # Mark source file as processed now that we've iterated through all formats for it. try: processed_path = source_file.with_suffix(f"{source_file.suffix}.processed") source_file.rename(processed_path) logger.debug(f"Marked '{source_file.name}' as processed.") except (IOError, OSError) as e: logger.error(f"Failed to mark source file '{source_file.name}' as processed: {e}") except IOError as e: logger.error(f"An I/O error occurred while generating tasks for '{source_file.name}': {e}. It will be retried on the next run.") # The file is not renamed, so it will be picked up again return tasks_generated_this_file def _main_task_generator_generate(args): if args.verbose: logging.getLogger().setLevel(logging.DEBUG) signal.signal(signal.SIGINT, handle_shutdown) signal.signal(signal.SIGTERM, handle_shutdown) source_dir = Path(args.source_dir) output_dir = Path(args.output_dir) formats_to_generate = [f.strip() for f in args.formats.split(',') if f.strip()] if not source_dir.is_dir(): logger.error(f"Source directory does not exist or is not a directory: {source_dir}") return 1 try: output_dir.mkdir(parents=True, exist_ok=True) except OSError as e: logger.error(f"Could not create output directory '{output_dir}': {e}") return 1 if not args.live: logger.info(f"Scanning for info.json files in '{source_dir}' (recursively)...") source_files = list(source_dir.rglob('*.json')) if not source_files: logger.info(f"No .json files found in '{source_dir}'. Nothing to do.") return 0 logger.info(f"Found {len(source_files)} source file(s). Generating tasks for formats: {', '.join(formats_to_generate)}...") total_tasks_generated = 0 for source_file in source_files: tasks_for_file = _generate_tasks_for_file(source_file, output_dir, formats_to_generate) total_tasks_generated += tasks_for_file logger.info(f"Successfully generated {total_tasks_generated} new task file(s) in '{output_dir}'.") return 0 # --- Live Mode --- logger.info(f"Running in LIVE mode. Watching '{source_dir}' for new files every {args.interval_seconds}s. Press Ctrl+C to stop.") total_tasks_generated = 0 while not shutdown_event: try: logger.debug("Live mode: Scanning for new source files...") source_files = list(source_dir.rglob('*.json')) if not source_files: logger.debug("Live mode: No source files found.") else: logger.info(f"Live mode: Found {len(source_files)} source file(s) to process.") for source_file in source_files: if shutdown_event: break tasks_for_file = _generate_tasks_for_file(source_file, output_dir, formats_to_generate) total_tasks_generated += tasks_for_file if shutdown_event: break logger.debug(f"Live mode: Scan complete. Sleeping for {args.interval_seconds}s...") time.sleep(args.interval_seconds) except Exception as e: logger.error(f"An unexpected error occurred in the live loop: {e}", exc_info=True) time.sleep(5) # Pause before retrying to avoid spamming errors logger.info(f"Task generator stopped. Total tasks generated in this run: {total_tasks_generated}.") return 0