298 lines
14 KiB
Python
298 lines
14 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Tool to download a specified format using yt-dlp as a Python library.
|
|
"""
|
|
|
|
import argparse
|
|
import contextlib
|
|
import io
|
|
import json
|
|
import logging
|
|
import os
|
|
import re
|
|
import shlex
|
|
import sys
|
|
import time
|
|
from datetime import datetime
|
|
|
|
try:
|
|
import yt_dlp
|
|
except ImportError:
|
|
print("yt-dlp is not installed. Please install it with: pip install yt-dlp", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
logger = logging.getLogger('download_native_py_tool')
|
|
|
|
# A custom logger for yt-dlp to capture output and key events
|
|
class YTDLPLogger:
|
|
def __init__(self):
|
|
self.final_filename = None
|
|
self.is_403 = False
|
|
self.is_timeout = False
|
|
|
|
def debug(self, msg):
|
|
# yt-dlp logs the destination file path at the debug level.
|
|
if msg.startswith('[download] Destination:'):
|
|
self.final_filename = msg.split(':', 1)[1].strip()
|
|
elif msg.startswith('[download]') and 'has already been downloaded' in msg:
|
|
match = re.search(r'\[download\]\s+(.*)\s+has already been downloaded', msg)
|
|
if match:
|
|
self.final_filename = match.group(1).strip()
|
|
logger.debug(msg)
|
|
|
|
def info(self, msg):
|
|
logger.info(msg)
|
|
|
|
def warning(self, msg):
|
|
logger.warning(msg)
|
|
|
|
def error(self, msg):
|
|
if "HTTP Error 403" in msg:
|
|
self.is_403 = True
|
|
if "Read timed out" in msg:
|
|
self.is_timeout = True
|
|
logger.error(msg)
|
|
|
|
def ytdlp_progress_hook(d, ytdlp_logger):
|
|
"""Progress hook to capture the final filename."""
|
|
if d['status'] == 'finished':
|
|
ytdlp_logger.final_filename = d.get('filename')
|
|
logger.info(f"Download finished. Final file: {ytdlp_logger.final_filename}")
|
|
|
|
def add_download_native_py_parser(subparsers):
|
|
"""Add the parser for the 'download py' command."""
|
|
parser = subparsers.add_parser(
|
|
'py',
|
|
description='Download using yt-dlp as a Python library (recommended). This method calls yt-dlp functions directly.',
|
|
formatter_class=argparse.RawTextHelpFormatter,
|
|
help='Download using a direct Python call to yt-dlp (recommended).'
|
|
)
|
|
parser.add_argument('--load-info-json', type=argparse.FileType('r', encoding='utf-8'), help="Path to the info.json file. If not provided, reads from stdin.")
|
|
parser.add_argument('-f', '--format', required=True, help='The format selection string to download (e.g., "18", "299/137", "bestvideo+bestaudio").')
|
|
parser.add_argument('--output-dir', default='.', help='Directory to save the downloaded file. Defaults to current directory.')
|
|
parser.add_argument('--save-info-json-dir', help='If specified, save the info.json received from stdin to this directory with an auto-generated name.')
|
|
parser.add_argument('--proxy', help='Proxy to use for the download, e.g., "socks5://127.0.0.1:1080".')
|
|
parser.add_argument('--proxy-rename', help='Apply sed-style regex substitution to the proxy URL. Format: s/pattern/replacement/')
|
|
parser.add_argument('--temp-path', help='Directory for temporary files (e.g., fragments). Use a RAM disk for best performance.')
|
|
parser.add_argument('--pause', type=int, default=0, help='Seconds to wait before starting the download.')
|
|
parser.add_argument('--download-continue', action='store_true', help='Enable download continuation (--no-overwrites and --continue flags for yt-dlp).')
|
|
parser.add_argument('--verbose', action='store_true', help='Enable verbose output for this script and yt-dlp.')
|
|
parser.add_argument('--cli-config', help='Path to a yt-dlp configuration file to load.')
|
|
parser.add_argument('--downloader', help='Name of the external downloader backend for yt-dlp to use (e.g., "aria2c", "native").')
|
|
parser.add_argument('--downloader-args', help='Arguments to pass to the external downloader backend (e.g., "aria2c:-x 8").')
|
|
parser.add_argument('--extra-ytdlp-args', help='A string of extra command-line arguments to pass to yt-dlp.')
|
|
parser.add_argument('--output-buffer', action='store_true', help='Download to an in-memory buffer and print raw bytes to stdout. Final filename is printed to stderr.')
|
|
parser.add_argument('--cleanup', action='store_true', help='After download, rename the file to include a timestamp and truncate it to 0 bytes.')
|
|
parser.add_argument('--merge-output-format', help='Container format to merge to (e.g., "mp4", "mkv"). Overrides config file.')
|
|
return parser
|
|
|
|
def main_download_native_py(args):
|
|
"""Main logic for the 'download-native-py' command."""
|
|
# If outputting to buffer, all logging must go to stderr to keep stdout clean for binary data.
|
|
log_stream = sys.stderr if args.output_buffer else sys.stdout
|
|
log_level = logging.DEBUG if args.verbose else logging.INFO
|
|
# Reconfigure root logger
|
|
for handler in logging.root.handlers[:]:
|
|
logging.root.removeHandler(handler)
|
|
logging.basicConfig(level=log_level, stream=log_stream, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
|
|
|
if args.pause > 0:
|
|
logger.info(f"Pausing for {args.pause} seconds...")
|
|
time.sleep(args.pause)
|
|
|
|
info_json_content = ""
|
|
input_source_name = ""
|
|
if args.load_info_json:
|
|
info_json_content = args.load_info_json.read()
|
|
input_source_name = args.load_info_json.name
|
|
else:
|
|
info_json_content = sys.stdin.read()
|
|
input_source_name = "stdin"
|
|
|
|
if not info_json_content.strip():
|
|
logger.error(f"Failed to read info.json from {input_source_name}. Input is empty.")
|
|
return 1
|
|
|
|
try:
|
|
info_data = json.loads(info_json_content)
|
|
logger.info(f"Successfully loaded info.json from {input_source_name}.")
|
|
except json.JSONDecodeError:
|
|
logger.error(f"Failed to parse info.json from {input_source_name}. Is the input valid JSON?")
|
|
return 1
|
|
|
|
if args.save_info_json_dir:
|
|
try:
|
|
video_id = info_data.get('id', 'unknown_video_id')
|
|
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
|
filename = f"{timestamp}-{video_id}-info.json"
|
|
output_path = os.path.join(args.save_info_json_dir, filename)
|
|
os.makedirs(args.save_info_json_dir, exist_ok=True)
|
|
with open(output_path, 'w', encoding='utf-8') as f:
|
|
json.dump(info_data, f, indent=2)
|
|
logger.info(f"Saved info.json to {output_path}")
|
|
except Exception as e:
|
|
logger.error(f"Failed to save info.json: {e}")
|
|
|
|
# Handle proxy and proxy rename
|
|
proxy_url = args.proxy
|
|
if not proxy_url:
|
|
proxy_url = info_data.get('_proxy_url')
|
|
if proxy_url:
|
|
logger.info(f"Using proxy from info.json: {proxy_url}")
|
|
|
|
if proxy_url and args.proxy_rename:
|
|
rename_rule = args.proxy_rename.strip("'\"")
|
|
if rename_rule.startswith('s/') and rename_rule.count('/') >= 2:
|
|
try:
|
|
parts = rename_rule.split('/')
|
|
pattern, replacement = parts[1], parts[2]
|
|
original_proxy = proxy_url
|
|
proxy_url = re.sub(pattern, replacement, proxy_url)
|
|
logger.info(f"Renamed proxy URL from '{original_proxy}' to '{proxy_url}' using rule '{rename_rule}'")
|
|
except re.error as e:
|
|
logger.error(f"Invalid regex in --proxy-rename: {e}")
|
|
return 1
|
|
else:
|
|
logger.error("Invalid --proxy-rename format. Expected: s/pattern/replacement/")
|
|
return 1
|
|
|
|
# Build the yt-dlp options dictionary
|
|
# Start by parsing options from config file and extra args to establish a baseline.
|
|
base_opts_args = []
|
|
if args.cli_config and os.path.exists(args.cli_config):
|
|
try:
|
|
with open(args.cli_config, 'r', encoding='utf-8') as f:
|
|
config_content = f.read()
|
|
base_opts_args.extend(shlex.split(config_content))
|
|
logger.info(f"Loaded {len(base_opts_args)} arguments from config file: {args.cli_config}")
|
|
except Exception as e:
|
|
logger.error(f"Failed to read or parse config file {args.cli_config}: {e}")
|
|
return 1
|
|
elif args.cli_config:
|
|
logger.warning(f"Config file '{args.cli_config}' not found. Ignoring.")
|
|
|
|
if args.extra_ytdlp_args:
|
|
extra_args_list = shlex.split(args.extra_ytdlp_args)
|
|
logger.info(f"Adding {len(extra_args_list)} extra arguments from --extra-ytdlp-args.")
|
|
base_opts_args.extend(extra_args_list)
|
|
|
|
ydl_opts = {}
|
|
if base_opts_args:
|
|
try:
|
|
# This is an internal API, but it's the most accurate way to parse CLI args
|
|
# into the ydl_opts dictionary format.
|
|
ydl_opts, _, _ = yt_dlp.parse_options(base_opts_args)
|
|
except Exception as e:
|
|
logger.error(f"Failed to parse options from config/extra_args: {e}")
|
|
return 1
|
|
|
|
# Now, layer the script's explicit arguments on top, as they have higher precedence.
|
|
os.makedirs(args.output_dir, exist_ok=True)
|
|
output_template = os.path.join(args.output_dir, '%(title)s [%(id)s].f%(format_id)s.%(ext)s')
|
|
|
|
ytdlp_logger = YTDLPLogger()
|
|
|
|
# Use update to merge, so explicit args overwrite config/extra args.
|
|
ydl_opts.update({
|
|
'format': args.format,
|
|
'outtmpl': '-' if args.output_buffer else output_template,
|
|
'logger': ytdlp_logger,
|
|
'progress_hooks': [lambda d: ytdlp_progress_hook(d, ytdlp_logger)],
|
|
'verbose': args.verbose,
|
|
})
|
|
|
|
if args.temp_path:
|
|
ydl_opts['paths'] = {'temp': args.temp_path}
|
|
logger.info(f"Using temporary path: {args.temp_path}")
|
|
|
|
if args.download_continue:
|
|
ydl_opts['continuedl'] = True
|
|
ydl_opts['nooverwrites'] = True
|
|
|
|
if proxy_url:
|
|
ydl_opts['proxy'] = proxy_url
|
|
|
|
if args.downloader:
|
|
ydl_opts['downloader'] = {args.downloader: None}
|
|
if args.downloader_args:
|
|
# yt-dlp expects a dict for downloader_args
|
|
# e.g., {'aria2c': ['-x', '8']}
|
|
try:
|
|
downloader_name, args_str = args.downloader_args.split(':', 1)
|
|
ydl_opts.setdefault('downloader_args', {})[downloader_name] = shlex.split(args_str)
|
|
except ValueError:
|
|
logger.error(f"Invalid --downloader-args format. Expected 'downloader:args'. Got: '{args.downloader_args}'")
|
|
return 1
|
|
|
|
if args.merge_output_format:
|
|
ydl_opts['merge_output_format'] = args.merge_output_format
|
|
|
|
try:
|
|
logger.info(f"Starting download for format '{args.format}' using yt-dlp library...")
|
|
|
|
download_buffer = None
|
|
if args.output_buffer:
|
|
# When downloading to buffer, we redirect stdout to capture the binary data.
|
|
download_buffer = io.BytesIO()
|
|
ctx_mgr = contextlib.redirect_stdout(download_buffer)
|
|
else:
|
|
# Otherwise, use a null context manager.
|
|
ctx_mgr = contextlib.nullcontext()
|
|
|
|
with ctx_mgr, yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
|
# The download() method is for URLs. For a pre-fetched info dict,
|
|
# we must use process_ie_result to bypass the info extraction step.
|
|
# It raises DownloadError on failure, which is caught by the outer try...except block.
|
|
ydl.process_ie_result(info_data)
|
|
# If process_ie_result completes without an exception, the download was successful.
|
|
retcode = 0
|
|
|
|
# The success path is now always taken if no exception was raised.
|
|
if retcode == 0:
|
|
logger.info("yt-dlp download completed successfully.")
|
|
|
|
if args.output_buffer:
|
|
# Write the captured binary data to the actual stdout.
|
|
sys.stdout.buffer.write(download_buffer.getvalue())
|
|
sys.stdout.buffer.flush()
|
|
# Print the filename to stderr for the orchestrator.
|
|
if ytdlp_logger.final_filename:
|
|
print(ytdlp_logger.final_filename, file=sys.stderr)
|
|
else:
|
|
# Print the filename to stdout as usual.
|
|
if ytdlp_logger.final_filename:
|
|
print(ytdlp_logger.final_filename, file=sys.stdout)
|
|
|
|
if args.cleanup:
|
|
downloaded_filepath = ytdlp_logger.final_filename
|
|
if downloaded_filepath and os.path.exists(downloaded_filepath):
|
|
try:
|
|
logger.info(f"Cleanup: Renaming and truncating '{downloaded_filepath}'")
|
|
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
|
directory, original_filename = os.path.split(downloaded_filepath)
|
|
filename_base, filename_ext = os.path.splitext(original_filename)
|
|
new_filename = f"{filename_base}_{timestamp}{filename_ext}.empty"
|
|
new_filepath = os.path.join(directory, new_filename)
|
|
os.rename(downloaded_filepath, new_filepath)
|
|
logger.info(f"Renamed to '{new_filepath}'")
|
|
with open(new_filepath, 'w') as f:
|
|
pass
|
|
logger.info(f"Truncated '{new_filepath}' to 0 bytes.")
|
|
except Exception as e:
|
|
logger.error(f"Cleanup failed: {e}")
|
|
return 1 # Treat cleanup failure as a script failure
|
|
elif not args.output_buffer:
|
|
logger.warning("Cleanup requested, but no downloaded file was found. Skipping cleanup.")
|
|
return 0
|
|
else:
|
|
logger.error(f"yt-dlp download failed with internal exit code {retcode}.")
|
|
return 1
|
|
|
|
except yt_dlp.utils.DownloadError as e:
|
|
# This catches download-specific errors from yt-dlp
|
|
logger.error(f"yt-dlp DownloadError: {e}")
|
|
return 1
|
|
except Exception as e:
|
|
logger.exception(f"An unexpected error occurred during yt-dlp execution: {e}")
|
|
return 1
|