#!/usr/bin/env python3 """ Tool to download a specified format using an info.json from stdin. """ import argparse import json import logging import os import re import shlex import subprocess import sys import tempfile import time from datetime import datetime # Configure logging logger = logging.getLogger('download_tool') def add_download_parser(subparsers): """Add the parser for the 'download cli' command.""" parser = subparsers.add_parser( 'cli', description='Download using the legacy yt-dlp CLI wrapper. This method invokes yt-dlp as a subprocess.', formatter_class=argparse.RawTextHelpFormatter, help='Download using the legacy yt-dlp CLI wrapper.' ) parser.add_argument('--load-info-json', type=argparse.FileType('r', encoding='utf-8'), help="Path to the info.json file. If not provided, reads from stdin.") parser.add_argument('-f', '--format', required=True, help='The format selection string to download (e.g., "18", "299/137", "bestvideo+bestaudio").') parser.add_argument('--output-dir', default='.', help='Directory to save the downloaded file. Defaults to current directory.') parser.add_argument('--save-info-json-dir', help='If specified, save the info.json received from stdin to this directory with an auto-generated name.') parser.add_argument('--proxy', help='Proxy to use for the download, e.g., "socks5://127.0.0.1:1080". This option sets the proxy, overriding any value from the info.json.') parser.add_argument('--proxy-rename', help='Apply sed-style regex substitution to the proxy URL. Format: s/pattern/replacement/') parser.add_argument('--pause', type=int, default=0, help='Seconds to wait before starting the download.') parser.add_argument('--print-traffic', action='store_true', help='Print traffic instead of a progress bar.') parser.add_argument('--download-continue', action='store_true', help='Enable download continuation (--continue and --part flags for yt-dlp).') parser.add_argument('--verbose', action='store_true', help='Enable verbose output for this script and yt-dlp.') parser.add_argument('--cli-config', default='cli.config', help='Path to a yt-dlp configuration file. Defaults to "cli.config".') parser.add_argument('--cleanup', action='store_true', help='After download, rename the file to include a timestamp and truncate it to 0 bytes.') parser.add_argument('--log-file', help='Append full yt-dlp output to the specified log file.') parser.add_argument('--yt-dlp-path', default='yt-dlp', help='Path to the yt-dlp executable. Defaults to "yt-dlp" in PATH.') parser.add_argument('--extra-ytdlp-args', help='A string of extra command-line arguments to pass to yt-dlp.') parser.add_argument('--downloader', help='Name of the external downloader to use (e.g., "aria2c", "native").') parser.add_argument('--downloader-args', help='Arguments to pass to the external downloader (e.g., "aria2c:-x 8").') parser.add_argument('--merge-output-format', help='Container format to merge to (e.g., "mp4", "mkv"). Overrides config file.') return parser def main_download(args): """Main logic for the 'download' command.""" if args.verbose: logging.getLogger().setLevel(logging.DEBUG) if args.pause > 0: logger.info(f"Pausing for {args.pause} seconds...") time.sleep(args.pause) info_json_content = "" input_source_name = "" if args.load_info_json: info_json_content = args.load_info_json.read() input_source_name = args.load_info_json.name else: info_json_content = sys.stdin.read() input_source_name = "stdin" if not info_json_content.strip(): logger.error(f"Failed to read info.json from {input_source_name}. Input is empty.") return 1 try: info_data = json.loads(info_json_content) logger.info(f"Successfully loaded info.json from {input_source_name}.") except json.JSONDecodeError: logger.error(f"Failed to parse info.json from {input_source_name}. Is the input valid JSON?") return 1 if args.save_info_json_dir: try: video_id = info_data.get('id', 'unknown_video_id') timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') filename = f"{timestamp}-{video_id}-info.json" output_path = os.path.join(args.save_info_json_dir, filename) os.makedirs(args.save_info_json_dir, exist_ok=True) with open(output_path, 'w', encoding='utf-8') as f: json.dump(info_data, f, indent=2) logger.info(f"Saved info.json to {output_path}") except Exception as e: logger.error(f"Failed to save info.json: {e}") # Determine proxy to use proxy_url = args.proxy if not proxy_url: proxy_url = info_data.get('_proxy_url') if proxy_url: logger.info(f"Using proxy from info.json: {proxy_url}") if proxy_url and args.proxy_rename: rename_rule = args.proxy_rename # The user's command line might include quotes that are preserved by shlex. # Strip them to get the raw rule. rename_rule = rename_rule.strip("'\"") if rename_rule.startswith('s/') and rename_rule.count('/') >= 2: try: parts = rename_rule.split('/') pattern = parts[1] replacement = parts[2] original_proxy = proxy_url proxy_url = re.sub(pattern, replacement, proxy_url) logger.info(f"Renamed proxy URL from '{original_proxy}' to '{proxy_url}' using rule '{rename_rule}'") except re.error as e: logger.error(f"Invalid regex in --proxy-rename: {e}") return 1 except IndexError: logger.error("Invalid --proxy-rename format. Expected: s/pattern/replacement/") return 1 else: logger.error("Invalid --proxy-rename format. Expected: s/pattern/replacement/") return 1 # yt-dlp needs to load the info.json from a file with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json', encoding='utf-8') as tmp: json.dump(info_data, tmp) info_json_path = tmp.name logger.debug(f"Temporarily saved info.json to {info_json_path}") downloaded_filepath = None return_code = 1 # Default to error try: # Create output directory if it doesn't exist os.makedirs(args.output_dir, exist_ok=True) output_template = os.path.join(args.output_dir, '%(title)s [%(id)s].f%(format_id)s.%(ext)s') cmd = [ args.yt_dlp_path, '--load-info-json', info_json_path, '-f', args.format, '-o', output_template, '--print', 'filename', ] if args.extra_ytdlp_args: cmd.extend(shlex.split(args.extra_ytdlp_args)) if args.downloader: cmd.extend(['--downloader', args.downloader]) if args.downloader_args: cmd.extend(['--downloader-args', args.downloader_args]) if args.merge_output_format: cmd.extend(['--merge-output-format', args.merge_output_format]) if args.download_continue: cmd.extend(['--continue', '--part']) if os.path.exists(args.cli_config): logger.info(f"Using config file: {args.cli_config}") cmd.extend(['--config-location', args.cli_config]) else: logger.info(f"Config file '{args.cli_config}' not found. Using yt-dlp defaults.") if args.print_traffic: cmd.append('--print-traffic') cmd.append('--no-progress') else: cmd.append('--progress') if args.verbose: cmd.append('--verbose') if proxy_url: cmd.extend(['--proxy', proxy_url]) # Determine if we need to capture output. capture_output = args.cleanup or args.log_file or args.print_traffic if capture_output and not args.print_traffic: logger.info("Note: --cleanup or --log-file requires capturing output, which may affect progress bar display.") logger.info(f"Executing yt-dlp command for format '{args.format}'") # Construct a display version of the command for logging display_cmd_str = ' '.join(f"'{arg}'" if ' ' in arg else arg for arg in cmd) if os.path.exists(args.cli_config): try: with open(args.cli_config, 'r', encoding='utf-8') as f: config_contents = ' '.join(f.read().split()) if config_contents: logger.info(f"cli.config contents: {config_contents}") except IOError as e: logger.warning(f"Could not read config file {args.cli_config}: {e}") logger.info(f"Full command: {display_cmd_str}") if capture_output: process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, encoding='utf-8') log_f = None if args.log_file: try: log_f = open(args.log_file, 'a', encoding='utf-8') log_f.write(f"\n--- Log entry: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} ---\n") log_f.write(f"Command: {' '.join(cmd)}\n\n") except IOError as e: logger.error(f"Failed to open log file {args.log_file}: {e}") stdout_data, stderr_data = process.communicate() return_code = process.returncode # Write captured output to terminal and log file if stdout_data: sys.stdout.write(stdout_data) sys.stdout.flush() if log_f: for line in stdout_data.splitlines(True): log_f.write(f"[stdout] {line}") if stderr_data: sys.stderr.write(stderr_data) sys.stderr.flush() if log_f: for line in stderr_data.splitlines(True): log_f.write(f"[stderr] {line}") stdout_lines = stdout_data.splitlines() if stdout_data else [] if log_f: log_f.write(f"\n--- End log entry (yt-dlp exit code: {return_code}) ---\n") log_f.close() for line in reversed(stdout_lines): if line and os.path.exists(line): downloaded_filepath = line logger.info(f"Detected downloaded file: {downloaded_filepath}") break else: # Original behavior: progress bar direct to terminal, no capture process = subprocess.Popen(cmd) process.wait() return_code = process.returncode if return_code != 0: logger.error(f"yt-dlp exited with error code {return_code}") else: logger.info("yt-dlp command completed successfully.") except Exception as e: logger.exception(f"An unexpected error occurred: {e}") return 1 finally: # Clean up the temporary file if os.path.exists(info_json_path): os.unlink(info_json_path) logger.debug(f"Removed temporary file {info_json_path}") # Cleanup phase if args.cleanup: if downloaded_filepath and os.path.exists(downloaded_filepath): try: logger.info(f"Cleanup: Renaming and truncating '{downloaded_filepath}'") timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') directory, original_filename = os.path.split(downloaded_filepath) filename_base, filename_ext = os.path.splitext(original_filename) # New name format is [base]_[timestamp][ext].empty new_filename = f"{filename_base}_{timestamp}{filename_ext}.empty" new_filepath = os.path.join(directory, new_filename) os.rename(downloaded_filepath, new_filepath) logger.info(f"Renamed to '{new_filepath}'") with open(new_filepath, 'w') as f: pass logger.info(f"Truncated '{new_filepath}' to 0 bytes.") except Exception as e: logger.error(f"Cleanup failed: {e}") return 1 else: logger.warning("Cleanup requested, but no downloaded file was found. Skipping cleanup.") return return_code