323 lines
15 KiB
Python
323 lines
15 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Tool to download a specified format using an info.json from stdin.
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import logging
|
|
import os
|
|
import re
|
|
import shlex
|
|
import subprocess
|
|
import sys
|
|
import tempfile
|
|
import time
|
|
from datetime import datetime
|
|
|
|
# Configure logging
|
|
logger = logging.getLogger('download_tool')
|
|
|
|
def add_download_parser(subparsers):
|
|
"""Add the parser for the 'download cli' command."""
|
|
parser = subparsers.add_parser(
|
|
'cli',
|
|
description='Download using the legacy yt-dlp CLI wrapper. This method invokes yt-dlp as a subprocess.',
|
|
formatter_class=argparse.RawTextHelpFormatter,
|
|
help='Download using the legacy yt-dlp CLI wrapper.'
|
|
)
|
|
parser.add_argument('--load-info-json', type=argparse.FileType('r', encoding='utf-8'), help="Path to the info.json file. If not provided, reads from stdin.")
|
|
parser.add_argument('-f', '--format', required=True, help='The format selection string to download (e.g., "18", "299/137", "bestvideo+bestaudio").')
|
|
parser.add_argument('--output-dir', default='.', help='Directory to save the downloaded file. Defaults to current directory.')
|
|
parser.add_argument('--save-info-json-dir', help='If specified, save the info.json received from stdin to this directory with an auto-generated name.')
|
|
parser.add_argument('--proxy', help='Proxy to use for the download, e.g., "socks5://127.0.0.1:1080". This option sets the proxy, overriding any value from the info.json.')
|
|
parser.add_argument('--proxy-rename', help='Apply sed-style regex substitution to the proxy URL. Format: s/pattern/replacement/')
|
|
parser.add_argument('--pause', type=int, default=0, help='Seconds to wait before starting the download.')
|
|
parser.add_argument('--print-traffic', action='store_true', help='Print traffic instead of a progress bar.')
|
|
parser.add_argument('--download-continue', action='store_true', help='Enable download continuation (--continue and --part flags for yt-dlp).')
|
|
parser.add_argument('--verbose', action='store_true', help='Enable verbose output for this script and yt-dlp.')
|
|
parser.add_argument('--cli-config', default='cli.config', help='Path to a yt-dlp configuration file. Defaults to "cli.config".')
|
|
parser.add_argument('--cleanup', action='store_true', help='After download, rename the file to include a timestamp and truncate it to 0 bytes.')
|
|
parser.add_argument('--log-file', help='Append full yt-dlp output to the specified log file.')
|
|
parser.add_argument('--yt-dlp-path', default='yt-dlp', help='Path to the yt-dlp executable. Defaults to "yt-dlp" in PATH.')
|
|
parser.add_argument('--extra-ytdlp-args', help='A string of extra command-line arguments to pass to yt-dlp.')
|
|
parser.add_argument('--downloader', help='Name of the external downloader to use (e.g., "aria2c", "native").')
|
|
parser.add_argument('--downloader-args', help='Arguments to pass to the external downloader (e.g., "aria2c:-x 8").')
|
|
parser.add_argument('--merge-output-format', help='Container format to merge to (e.g., "mp4", "mkv"). Overrides config file.')
|
|
parser.add_argument('--retries', help='Number of retries for the entire download (default: 10).')
|
|
parser.add_argument('--fragment-retries', help='Number of retries for each fragment (default: 10).')
|
|
parser.add_argument('--socket-timeout', help='Timeout for socket operations in seconds (default: 20).')
|
|
parser.add_argument('--lang', help='Language code for the request (e.g., "fr", "ja"). Affects metadata language.')
|
|
parser.add_argument('--timezone', help='Timezone for the request (e.g., "UTC", "America/New_York"). Note: not supported by yt-dlp.')
|
|
# Arguments to pass through to yt-dlp
|
|
parser.add_argument('--download-sections', help='yt-dlp --download-sections argument (e.g., "*0-10240").')
|
|
parser.add_argument('--test', action='store_true', help='yt-dlp --test argument (download small part).')
|
|
return parser
|
|
|
|
def main_download(args):
|
|
"""Main logic for the 'download' command."""
|
|
if args.verbose:
|
|
logging.getLogger().setLevel(logging.DEBUG)
|
|
|
|
if args.pause > 0:
|
|
logger.info(f"Pausing for {args.pause} seconds...")
|
|
time.sleep(args.pause)
|
|
|
|
info_json_content = ""
|
|
input_source_name = ""
|
|
if args.load_info_json:
|
|
info_json_content = args.load_info_json.read()
|
|
input_source_name = args.load_info_json.name
|
|
else:
|
|
info_json_content = sys.stdin.read()
|
|
input_source_name = "stdin"
|
|
|
|
if not info_json_content.strip():
|
|
logger.error(f"Failed to read info.json from {input_source_name}. Input is empty.")
|
|
return 1
|
|
|
|
try:
|
|
info_data = json.loads(info_json_content)
|
|
logger.info(f"Successfully loaded info.json from {input_source_name}.")
|
|
except json.JSONDecodeError:
|
|
logger.error(f"Failed to parse info.json from {input_source_name}. Is the input valid JSON?")
|
|
return 1
|
|
|
|
if args.save_info_json_dir:
|
|
try:
|
|
video_id = info_data.get('id', 'unknown_video_id')
|
|
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
|
filename = f"{timestamp}-{video_id}-info.json"
|
|
output_path = os.path.join(args.save_info_json_dir, filename)
|
|
os.makedirs(args.save_info_json_dir, exist_ok=True)
|
|
with open(output_path, 'w', encoding='utf-8') as f:
|
|
json.dump(info_data, f, indent=2)
|
|
logger.info(f"Saved info.json to {output_path}")
|
|
except Exception as e:
|
|
logger.error(f"Failed to save info.json: {e}")
|
|
|
|
# Determine proxy to use
|
|
proxy_url = args.proxy
|
|
if not proxy_url:
|
|
proxy_url = info_data.get('_proxy_url')
|
|
if proxy_url:
|
|
logger.info(f"Using proxy from info.json: {proxy_url}")
|
|
|
|
if proxy_url and args.proxy_rename:
|
|
rename_rule = args.proxy_rename
|
|
# The user's command line might include quotes that are preserved by shlex.
|
|
# Strip them to get the raw rule.
|
|
rename_rule = rename_rule.strip("'\"")
|
|
if rename_rule.startswith('s/') and rename_rule.count('/') >= 2:
|
|
try:
|
|
parts = rename_rule.split('/')
|
|
pattern = parts[1]
|
|
replacement = parts[2]
|
|
original_proxy = proxy_url
|
|
proxy_url = re.sub(pattern, replacement, proxy_url)
|
|
logger.info(f"Renamed proxy URL from '{original_proxy}' to '{proxy_url}' using rule '{rename_rule}'")
|
|
except re.error as e:
|
|
logger.error(f"Invalid regex in --proxy-rename: {e}")
|
|
return 1
|
|
except IndexError:
|
|
logger.error("Invalid --proxy-rename format. Expected: s/pattern/replacement/")
|
|
return 1
|
|
else:
|
|
logger.error("Invalid --proxy-rename format. Expected: s/pattern/replacement/")
|
|
return 1
|
|
|
|
# yt-dlp needs to load the info.json from a file
|
|
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json', encoding='utf-8') as tmp:
|
|
json.dump(info_data, tmp)
|
|
info_json_path = tmp.name
|
|
|
|
logger.debug(f"Temporarily saved info.json to {info_json_path}")
|
|
|
|
downloaded_filepath = None
|
|
return_code = 1 # Default to error
|
|
|
|
try:
|
|
# Create output directory if it doesn't exist
|
|
os.makedirs(args.output_dir, exist_ok=True)
|
|
output_template = os.path.join(args.output_dir, '%(title)s [%(id)s].f%(format_id)s.%(ext)s')
|
|
|
|
cmd = [
|
|
args.yt_dlp_path,
|
|
'--load-info-json', info_json_path,
|
|
'-f', args.format,
|
|
'-o', output_template,
|
|
'--print', 'filename',
|
|
]
|
|
|
|
if args.extra_ytdlp_args:
|
|
cmd.extend(shlex.split(args.extra_ytdlp_args))
|
|
|
|
if args.downloader:
|
|
cmd.extend(['--downloader', args.downloader])
|
|
if args.downloader_args:
|
|
cmd.extend(['--downloader-args', args.downloader_args])
|
|
if args.merge_output_format:
|
|
cmd.extend(['--merge-output-format', args.merge_output_format])
|
|
|
|
if args.download_sections:
|
|
cmd.extend(['--download-sections', args.download_sections])
|
|
|
|
if args.test:
|
|
cmd.append('--test')
|
|
|
|
if args.retries:
|
|
cmd.extend(['--retries', str(args.retries)])
|
|
if args.fragment_retries:
|
|
cmd.extend(['--fragment-retries', str(args.fragment_retries)])
|
|
if args.socket_timeout:
|
|
cmd.extend(['--socket-timeout', str(args.socket_timeout)])
|
|
|
|
if args.download_continue:
|
|
cmd.extend(['--continue', '--part'])
|
|
|
|
if os.path.exists(args.cli_config):
|
|
logger.info(f"Using config file: {args.cli_config}")
|
|
cmd.extend(['--config-location', args.cli_config])
|
|
else:
|
|
logger.info(f"Config file '{args.cli_config}' not found. Using yt-dlp defaults.")
|
|
|
|
if args.print_traffic:
|
|
cmd.append('--print-traffic')
|
|
cmd.append('--no-progress')
|
|
else:
|
|
cmd.append('--progress')
|
|
|
|
if args.verbose:
|
|
cmd.append('--verbose')
|
|
|
|
if proxy_url:
|
|
cmd.extend(['--proxy', proxy_url])
|
|
|
|
if args.lang:
|
|
cmd.extend(['--extractor-args', f'youtube:lang={args.lang}'])
|
|
|
|
if args.timezone:
|
|
logger.warning(f"Timezone override ('{args.timezone}') is not supported by yt-dlp and will be ignored.")
|
|
|
|
# Determine if we need to capture output.
|
|
capture_output = args.cleanup or args.log_file or args.print_traffic
|
|
|
|
if capture_output and not args.print_traffic:
|
|
logger.info("Note: --cleanup or --log-file requires capturing output, which may affect progress bar display.")
|
|
|
|
logger.info(f"Executing yt-dlp command for format '{args.format}'")
|
|
|
|
# Construct a display version of the command for logging
|
|
display_cmd_str = ' '.join(f"'{arg}'" if ' ' in arg else arg for arg in cmd)
|
|
if os.path.exists(args.cli_config):
|
|
try:
|
|
with open(args.cli_config, 'r', encoding='utf-8') as f:
|
|
config_contents = ' '.join(f.read().split())
|
|
if config_contents:
|
|
logger.info(f"cli.config contents: {config_contents}")
|
|
except IOError as e:
|
|
logger.warning(f"Could not read config file {args.cli_config}: {e}")
|
|
|
|
logger.info(f"Full command: {display_cmd_str}")
|
|
|
|
if capture_output:
|
|
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, encoding='utf-8')
|
|
|
|
log_f = None
|
|
if args.log_file:
|
|
try:
|
|
log_f = open(args.log_file, 'a', encoding='utf-8')
|
|
log_f.write(f"\n--- Log entry: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} ---\n")
|
|
log_f.write(f"Command: {' '.join(cmd)}\n\n")
|
|
except IOError as e:
|
|
logger.error(f"Failed to open log file {args.log_file}: {e}")
|
|
|
|
stdout_data, stderr_data = process.communicate()
|
|
return_code = process.returncode
|
|
|
|
# Post-run check for silent failures, like 403 errors where yt-dlp might still exit 0.
|
|
if return_code == 0:
|
|
output_text = (stdout_data or "") + (stderr_data or "")
|
|
if "HTTP Error 403" in output_text:
|
|
logger.error("yt-dlp exited successfully, but a 403 error was detected in its output. Forcing failure.")
|
|
return_code = 1 # Override success code
|
|
elif "timed out" in output_text.lower() or "timeout" in output_text.lower():
|
|
logger.error("yt-dlp exited successfully, but a timeout was detected in its output. Forcing failure.")
|
|
return_code = 1
|
|
|
|
# Write captured output to terminal and log file
|
|
if stdout_data:
|
|
sys.stdout.write(stdout_data)
|
|
sys.stdout.flush()
|
|
if log_f:
|
|
for line in stdout_data.splitlines(True):
|
|
log_f.write(f"[stdout] {line}")
|
|
|
|
if stderr_data:
|
|
sys.stderr.write(stderr_data)
|
|
sys.stderr.flush()
|
|
if log_f:
|
|
for line in stderr_data.splitlines(True):
|
|
log_f.write(f"[stderr] {line}")
|
|
|
|
stdout_lines = stdout_data.splitlines() if stdout_data else []
|
|
|
|
if log_f:
|
|
log_f.write(f"\n--- End log entry (yt-dlp exit code: {return_code}) ---\n")
|
|
log_f.close()
|
|
|
|
for line in reversed(stdout_lines):
|
|
if line and os.path.exists(line):
|
|
downloaded_filepath = line
|
|
logger.info(f"Detected downloaded file: {downloaded_filepath}")
|
|
break
|
|
else:
|
|
# Original behavior: progress bar direct to terminal, no capture
|
|
process = subprocess.Popen(cmd)
|
|
process.wait()
|
|
return_code = process.returncode
|
|
|
|
if return_code != 0:
|
|
logger.error(f"yt-dlp exited with error code {return_code}")
|
|
else:
|
|
logger.info("yt-dlp command completed successfully.")
|
|
|
|
except Exception as e:
|
|
logger.exception(f"An unexpected error occurred: {e}")
|
|
return 1
|
|
finally:
|
|
# Clean up the temporary file
|
|
if os.path.exists(info_json_path):
|
|
os.unlink(info_json_path)
|
|
logger.debug(f"Removed temporary file {info_json_path}")
|
|
|
|
# Cleanup phase
|
|
if args.cleanup:
|
|
if downloaded_filepath and os.path.exists(downloaded_filepath):
|
|
try:
|
|
logger.info(f"Cleanup: Renaming and truncating '{downloaded_filepath}'")
|
|
|
|
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
|
|
|
directory, original_filename = os.path.split(downloaded_filepath)
|
|
filename_base, filename_ext = os.path.splitext(original_filename)
|
|
|
|
# New name format is [base]_[timestamp][ext].empty
|
|
new_filename = f"{filename_base}_{timestamp}{filename_ext}.empty"
|
|
new_filepath = os.path.join(directory, new_filename)
|
|
|
|
os.rename(downloaded_filepath, new_filepath)
|
|
logger.info(f"Renamed to '{new_filepath}'")
|
|
|
|
with open(new_filepath, 'w') as f:
|
|
pass
|
|
logger.info(f"Truncated '{new_filepath}' to 0 bytes.")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Cleanup failed: {e}")
|
|
return 1
|
|
else:
|
|
logger.warning("Cleanup requested, but no downloaded file was found. Skipping cleanup.")
|
|
|
|
return return_code
|