671 lines
31 KiB
Python
671 lines
31 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Tool to download a specified format using yt-dlp as a Python library.
|
|
"""
|
|
|
|
import argparse
|
|
import contextlib
|
|
import copy
|
|
import io
|
|
import json
|
|
import logging
|
|
import os
|
|
import re
|
|
import shlex
|
|
import sys
|
|
import time
|
|
from datetime import datetime
|
|
|
|
|
|
logger = logging.getLogger('download_native_py_tool')
|
|
|
|
# A custom logger for yt-dlp to capture output and key events
|
|
class YTDLPLogger:
|
|
def __init__(self):
|
|
self.final_filename = None
|
|
self.is_403 = False
|
|
self.is_timeout = False
|
|
self.has_errors = False
|
|
|
|
def debug(self, msg):
|
|
# yt-dlp logs the destination file path at the debug level.
|
|
if msg.startswith('[download] Destination:'):
|
|
self.final_filename = msg.split(':', 1)[1].strip()
|
|
elif msg.startswith('[Merger] Merging formats into "'):
|
|
# This captures the final filename after merging.
|
|
self.final_filename = msg.split('"')[1]
|
|
elif msg.startswith('[download]') and 'has already been downloaded' in msg:
|
|
match = re.search(r'\[download\]\s+(.*)\s+has already been downloaded', msg)
|
|
if match:
|
|
self.final_filename = match.group(1).strip()
|
|
logger.debug(msg)
|
|
|
|
def info(self, msg):
|
|
logger.info(msg)
|
|
|
|
def warning(self, msg):
|
|
logger.warning(msg)
|
|
|
|
def error(self, msg):
|
|
if "HTTP Error 403" in msg:
|
|
self.is_403 = True
|
|
if "Read timed out" in msg:
|
|
self.is_timeout = True
|
|
self.has_errors = True
|
|
logger.error(msg)
|
|
|
|
def ytdlp_progress_hook(d, ytdlp_logger):
|
|
"""Progress hook to capture the final filename."""
|
|
if d['status'] == 'finished':
|
|
ytdlp_logger.final_filename = d.get('filename')
|
|
logger.info(f"Download finished. Final file: {ytdlp_logger.final_filename}")
|
|
|
|
def add_download_native_py_parser(subparsers):
|
|
"""Add the parser for the 'download py' command."""
|
|
parser = subparsers.add_parser(
|
|
'py',
|
|
description='Download using yt-dlp as a Python library (recommended). This method calls yt-dlp functions directly.',
|
|
formatter_class=argparse.RawTextHelpFormatter,
|
|
help='Download using a direct Python call to yt-dlp (recommended).'
|
|
)
|
|
parser.add_argument('--load-info-json', type=argparse.FileType('r', encoding='utf-8'), help="Path to the info.json file. If not provided, reads from stdin.")
|
|
parser.add_argument('-f', '--format', required=True, help='The format selection string to download (e.g., "18", "299/137", "bestvideo+bestaudio").')
|
|
parser.add_argument('--output-dir', default='.', help='Directory to save the downloaded file. Defaults to current directory.')
|
|
parser.add_argument('--save-info-json-dir', help='If specified, save the info.json received from stdin to this directory with an auto-generated name.')
|
|
parser.add_argument('--proxy', help='Proxy to use for the download, e.g., "socks5://127.0.0.1:1080".')
|
|
parser.add_argument('--proxy-rename', help='Apply sed-style regex substitution to the proxy URL. Format: s/pattern/replacement/')
|
|
parser.add_argument('--temp-path', help='Directory for temporary files (e.g., fragments). Use a RAM disk for best performance.')
|
|
parser.add_argument('--pause', type=int, default=0, help='Seconds to wait before starting the download.')
|
|
parser.add_argument('--download-continue', action='store_true', help='Enable download continuation (--no-overwrites and --continue flags for yt-dlp).')
|
|
parser.add_argument('--verbose', action='store_true', help='Enable verbose output for this script and yt-dlp.')
|
|
parser.add_argument('--config', default=None, help='Path to a yt-dlp JSON configuration file (e.g., ytdlp.json). If not provided, searches for ytdlp.json.')
|
|
parser.add_argument('--downloader', help='Name of the external downloader backend for yt-dlp to use (e.g., "aria2c", "native").')
|
|
parser.add_argument('--downloader-args', help='Arguments to pass to the external downloader backend (e.g., "aria2c:-x 8").')
|
|
parser.add_argument('--extra-ytdlp-args', help='A string of extra command-line arguments to pass to yt-dlp.')
|
|
parser.add_argument('--output-buffer', action='store_true', help='Download to an in-memory buffer and print raw bytes to stdout. Final filename is printed to stderr.')
|
|
parser.add_argument('--cleanup', action='store_true', help='After download, rename the file to include a timestamp and truncate it to 0 bytes.')
|
|
parser.add_argument('--merge-output-format', help='Container format to merge to (e.g., "mp4", "mkv"). Overrides config file.')
|
|
parser.add_argument('--retries', type=int, help='Number of retries for the entire download (default: 10).')
|
|
parser.add_argument('--fragment-retries', type=int, help='Number of retries for each fragment (default: 10).')
|
|
parser.add_argument('--socket-timeout', type=int, help='Timeout for socket operations in seconds (default: 20).')
|
|
parser.add_argument('--add-header', action='append', help='Add a custom HTTP header for the download. Format: "Key: Value". Can be used multiple times.')
|
|
parser.add_argument('--concurrent-fragments', type=int, help='Number of fragments to download concurrently for each media.')
|
|
# Arguments to pass through to yt-dlp
|
|
parser.add_argument('--download-sections', help='yt-dlp --download-sections argument (e.g., "*0-10240").')
|
|
parser.add_argument('--test', action='store_true', help='yt-dlp --test argument (download small part).')
|
|
return parser
|
|
|
|
|
|
def _download_single_format(format_id, info_data, base_ydl_opts, args):
|
|
"""
|
|
Download a single format ID from the given info_data.
|
|
This function filters info_data to only contain the requested format,
|
|
preventing yt-dlp from auto-merging with other streams.
|
|
|
|
Returns a tuple: (success: bool, ytdlp_logger: YTDLPLogger)
|
|
"""
|
|
import yt_dlp
|
|
|
|
# Deep copy info_data so we can modify it without affecting other downloads
|
|
local_info_data = copy.deepcopy(info_data)
|
|
|
|
available_formats = local_info_data.get('formats', [])
|
|
|
|
# Find the exact format
|
|
target_format = next((f for f in available_formats if f.get('format_id') == format_id), None)
|
|
|
|
if not target_format:
|
|
logger.error(f"Format '{format_id}' not found in info.json")
|
|
ytdlp_logger = YTDLPLogger()
|
|
ytdlp_logger.has_errors = True
|
|
return False, ytdlp_logger
|
|
|
|
# Filter to only this format - this is the key to preventing auto-merge
|
|
local_info_data['formats'] = [target_format]
|
|
|
|
# Clear any pre-selected format fields that might trigger merging
|
|
local_info_data.pop('requested_formats', None)
|
|
local_info_data.pop('format', None)
|
|
local_info_data.pop('format_id', None)
|
|
|
|
logger.info(f"Filtered info_data to only contain format '{format_id}' (removed {len(available_formats) - 1} other formats)")
|
|
|
|
# Create a fresh logger for this download
|
|
ytdlp_logger = YTDLPLogger()
|
|
|
|
# Copy base options and update with this format's specifics
|
|
ydl_opts = dict(base_ydl_opts)
|
|
ydl_opts['format'] = format_id
|
|
ydl_opts['logger'] = ytdlp_logger
|
|
ydl_opts['progress_hooks'] = [lambda d, yl=ytdlp_logger: ytdlp_progress_hook(d, yl)]
|
|
|
|
try:
|
|
download_buffer = None
|
|
if args.output_buffer:
|
|
download_buffer = io.BytesIO()
|
|
ctx_mgr = contextlib.redirect_stdout(download_buffer)
|
|
else:
|
|
ctx_mgr = contextlib.nullcontext()
|
|
|
|
with ctx_mgr, yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
|
ydl.process_ie_result(local_info_data)
|
|
|
|
if ytdlp_logger.has_errors:
|
|
logger.error(f"Download of format '{format_id}' failed: yt-dlp reported an error during execution.")
|
|
return False, ytdlp_logger
|
|
|
|
logger.info(f"Download of format '{format_id}' completed successfully.")
|
|
|
|
if args.output_buffer and download_buffer:
|
|
sys.stdout.buffer.write(download_buffer.getvalue())
|
|
sys.stdout.buffer.flush()
|
|
|
|
return True, ytdlp_logger
|
|
|
|
except yt_dlp.utils.DownloadError as e:
|
|
logger.error(f"yt-dlp DownloadError for format '{format_id}': {e}")
|
|
ytdlp_logger.has_errors = True
|
|
return False, ytdlp_logger
|
|
except Exception as e:
|
|
logger.exception(f"Unexpected error downloading format '{format_id}': {e}")
|
|
ytdlp_logger.has_errors = True
|
|
return False, ytdlp_logger
|
|
|
|
|
|
def main_download_native_py(args):
|
|
"""Main logic for the 'download-native-py' command."""
|
|
try:
|
|
import yt_dlp
|
|
from yt_dlp.utils import match_filter_func
|
|
except ImportError:
|
|
print("yt-dlp is not installed. Please install it with: pip install yt-dlp", file=sys.stderr)
|
|
return 1
|
|
|
|
# All logging should go to stderr to keep stdout clean for the final filename, or for binary data with --output-buffer.
|
|
log_stream = sys.stderr
|
|
log_level = logging.DEBUG if args.verbose else logging.INFO
|
|
# Reconfigure root logger
|
|
for handler in logging.root.handlers[:]:
|
|
logging.root.removeHandler(handler)
|
|
logging.basicConfig(level=log_level, stream=log_stream, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
|
|
|
if args.pause > 0:
|
|
logger.info(f"Pausing for {args.pause} seconds...")
|
|
time.sleep(args.pause)
|
|
|
|
info_json_content = ""
|
|
input_source_name = ""
|
|
if args.load_info_json:
|
|
info_json_content = args.load_info_json.read()
|
|
input_source_name = args.load_info_json.name
|
|
else:
|
|
info_json_content = sys.stdin.read()
|
|
input_source_name = "stdin"
|
|
|
|
if not info_json_content.strip():
|
|
logger.error(f"Failed to read info.json from {input_source_name}. Input is empty.")
|
|
return 1
|
|
|
|
try:
|
|
info_data = json.loads(info_json_content)
|
|
logger.info(f"Successfully loaded info.json from {input_source_name}.")
|
|
except json.JSONDecodeError:
|
|
logger.error(f"Failed to parse info.json from {input_source_name}. Is the input valid JSON?")
|
|
return 1
|
|
|
|
if args.save_info_json_dir:
|
|
try:
|
|
video_id = info_data.get('id', 'unknown_video_id')
|
|
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
|
filename = f"{timestamp}-{video_id}-info.json"
|
|
output_path = os.path.join(args.save_info_json_dir, filename)
|
|
os.makedirs(args.save_info_json_dir, exist_ok=True)
|
|
with open(output_path, 'w', encoding='utf-8') as f:
|
|
json.dump(info_data, f, indent=2)
|
|
logger.info(f"Saved info.json to {output_path}")
|
|
except Exception as e:
|
|
logger.error(f"Failed to save info.json: {e}")
|
|
|
|
# Handle proxy and proxy rename
|
|
proxy_url = args.proxy
|
|
if not proxy_url:
|
|
proxy_url = info_data.get('_proxy_url')
|
|
if proxy_url:
|
|
logger.info(f"Using proxy from info.json: {proxy_url}")
|
|
|
|
if proxy_url and args.proxy_rename:
|
|
rename_rule = args.proxy_rename.strip("'\"")
|
|
if rename_rule.startswith('s/') and rename_rule.count('/') >= 2:
|
|
try:
|
|
parts = rename_rule.split('/')
|
|
pattern, replacement = parts[1], parts[2]
|
|
original_proxy = proxy_url
|
|
proxy_url = re.sub(pattern, replacement, proxy_url)
|
|
logger.info(f"Renamed proxy URL from '{original_proxy}' to '{proxy_url}' using rule '{rename_rule}'")
|
|
except re.error as e:
|
|
logger.error(f"Invalid regex in --proxy-rename: {e}")
|
|
return 1
|
|
else:
|
|
logger.error("Invalid --proxy-rename format. Expected: s/pattern/replacement/")
|
|
return 1
|
|
|
|
# For library usage, ensure proxy URL has a scheme. Default to http if missing.
|
|
if proxy_url and '://' not in proxy_url:
|
|
original_proxy = proxy_url
|
|
proxy_url = 'http://' + proxy_url
|
|
logger.info(f"Proxy URL '{original_proxy}' has no scheme. Defaulting to '{proxy_url}'.")
|
|
|
|
# Build the yt-dlp options dictionary
|
|
logger.info("--- Configuring yt-dlp options ---")
|
|
|
|
param_sources = {}
|
|
ydl_opts = {}
|
|
|
|
def _parse_ytdlp_args(args_list, source_name, opts_dict, sources_dict):
|
|
"""Helper to parse a list of yt-dlp CLI-style args into an options dict."""
|
|
i = 0
|
|
while i < len(args_list):
|
|
arg = args_list[i]
|
|
if not arg.startswith('--'):
|
|
logger.warning(f"Skipping non-option argument from {source_name}: {arg}")
|
|
i += 1
|
|
continue
|
|
|
|
key_cli = arg.lstrip('-')
|
|
key_py = key_cli.replace('-', '_')
|
|
|
|
is_flag = i + 1 >= len(args_list) or args_list[i + 1].startswith('--')
|
|
|
|
if is_flag:
|
|
if key_py.startswith('no_'):
|
|
real_key = key_py[3:]
|
|
# Handle special cases where the Python option name is different
|
|
if real_key == 'resize_buffer': real_key = 'noresizebuffer'
|
|
opts_dict[real_key] = False
|
|
sources_dict[real_key] = source_name
|
|
else:
|
|
# Handle special cases
|
|
if key_py == 'resize_buffer':
|
|
opts_dict['noresizebuffer'] = False
|
|
sources_dict['noresizebuffer'] = source_name
|
|
else:
|
|
opts_dict[key_py] = True
|
|
sources_dict[key_py] = source_name
|
|
i += 1
|
|
else: # Has a value
|
|
value = args_list[i + 1]
|
|
# Special key name conversions
|
|
if key_py == 'limit_rate': key_py = 'ratelimit'
|
|
elif key_py == 'buffer_size': key_py = 'buffersize'
|
|
|
|
# Special value conversion for match_filter
|
|
if key_py == 'match_filter':
|
|
try:
|
|
value = match_filter_func(value)
|
|
except Exception as e:
|
|
logger.error(f"Failed to compile --match-filter '{value}': {e}")
|
|
# Skip this option
|
|
i += 2
|
|
continue
|
|
else:
|
|
# Try to convert values to numbers, which yt-dlp expects
|
|
try:
|
|
value = int(value)
|
|
except (ValueError, TypeError):
|
|
try:
|
|
value = float(value)
|
|
except (ValueError, TypeError):
|
|
pass # Keep as string
|
|
|
|
opts_dict[key_py] = value
|
|
sources_dict[key_py] = source_name
|
|
i += 2
|
|
|
|
# 1. Load from JSON config file
|
|
config_path = args.config
|
|
log_msg = ""
|
|
if config_path:
|
|
log_msg = f"1. [Source: Config File] Loading from: {config_path}"
|
|
else:
|
|
if os.path.exists('ytdlp.json'):
|
|
config_path = 'ytdlp.json'
|
|
log_msg = f"1. [Source: Config File] No --config provided. Found and loading local '{config_path}'."
|
|
|
|
if config_path and os.path.exists(config_path):
|
|
if log_msg:
|
|
logger.info(log_msg)
|
|
try:
|
|
with open(config_path, 'r', encoding='utf-8') as f:
|
|
config_data = json.load(f)
|
|
|
|
# All yt-dlp options are expected under the 'ytdlp_params' key.
|
|
config_opts = config_data.get('ytdlp_params', {})
|
|
|
|
if config_opts:
|
|
logger.info(f"Parameters from config file ('{config_path}'):")
|
|
config_str = json.dumps(config_opts, indent=2)
|
|
for line in config_str.splitlines():
|
|
logger.info(f" {line}")
|
|
|
|
# Special handling for match_filter before updating ydl_opts
|
|
if 'match_filter' in config_opts and isinstance(config_opts['match_filter'], str):
|
|
logger.info(f" Compiling 'match_filter' string from config file...")
|
|
try:
|
|
config_opts['match_filter'] = match_filter_func(config_opts['match_filter'])
|
|
except Exception as e:
|
|
logger.error(f"Failed to compile match_filter from config: {e}")
|
|
del config_opts['match_filter']
|
|
|
|
ydl_opts.update(config_opts)
|
|
for key in config_opts:
|
|
param_sources[key] = "Config File"
|
|
except (json.JSONDecodeError, IOError) as e:
|
|
logger.error(f"Failed to read or parse JSON config file {config_path}: {e}")
|
|
return 1
|
|
elif args.config:
|
|
logger.warning(f"Config file '{args.config}' not found. Ignoring.")
|
|
|
|
# 2. Load from extra command-line args
|
|
if args.extra_ytdlp_args:
|
|
logger.info(f"2. [Source: CLI Extra Args] Loading extra arguments...")
|
|
extra_args_list = shlex.split(args.extra_ytdlp_args)
|
|
_parse_ytdlp_args(extra_args_list, "CLI Extra Args", ydl_opts, param_sources)
|
|
|
|
# 3. Apply internal defaults if not already set
|
|
if 'noresizebuffer' not in ydl_opts:
|
|
ydl_opts['noresizebuffer'] = True
|
|
param_sources['noresizebuffer'] = "Internal Default"
|
|
if 'buffersize' not in ydl_opts:
|
|
ydl_opts['buffersize'] = '4M'
|
|
param_sources['buffersize'] = "Internal Default"
|
|
if 'force_progress' not in ydl_opts:
|
|
ydl_opts['force_progress'] = True
|
|
param_sources['force_progress'] = "Internal Default"
|
|
|
|
# 4. Apply explicit arguments from this script's CLI (highest priority)
|
|
logger.info("3. [Source: CLI Explicit Args] Applying explicit overrides:")
|
|
|
|
os.makedirs(args.output_dir, exist_ok=True)
|
|
|
|
ydl_opts['verbose'] = args.verbose
|
|
param_sources['verbose'] = "CLI Explicit"
|
|
|
|
# Handle output template ('outtmpl')
|
|
if args.output_buffer:
|
|
ydl_opts['outtmpl'] = '-'
|
|
param_sources['outtmpl'] = "CLI Explicit (Buffer)"
|
|
elif 'outtmpl' in ydl_opts:
|
|
# Respect outtmpl from config, but prepend the output directory
|
|
outtmpl_val = ydl_opts['outtmpl']
|
|
if isinstance(outtmpl_val, dict):
|
|
# Prepend dir to each template in the dict
|
|
ydl_opts['outtmpl'] = {k: os.path.join(args.output_dir, v) for k, v in outtmpl_val.items()}
|
|
else:
|
|
# Prepend dir to the single template string
|
|
ydl_opts['outtmpl'] = os.path.join(args.output_dir, str(outtmpl_val))
|
|
param_sources['outtmpl'] = "Config File (Path Applied)"
|
|
logger.info(f" Using 'outtmpl' from config, with output directory '{args.output_dir}' prepended.")
|
|
else:
|
|
# Use a default template if not specified in config
|
|
output_template = os.path.join(args.output_dir, '%(title)s [%(id)s].f%(format_id)s.%(ext)s')
|
|
ydl_opts['outtmpl'] = output_template
|
|
param_sources['outtmpl'] = "Internal Default"
|
|
logger.info(f" Using default 'outtmpl': {output_template}")
|
|
|
|
if args.temp_path:
|
|
ydl_opts['paths'] = {'temp': args.temp_path}
|
|
param_sources['paths'] = "CLI Explicit"
|
|
logger.info(f" Temporary path: {args.temp_path}")
|
|
|
|
if args.add_header:
|
|
if 'http_headers' not in ydl_opts:
|
|
ydl_opts['http_headers'] = {}
|
|
elif not isinstance(ydl_opts['http_headers'], dict):
|
|
logger.warning(f"Overwriting non-dictionary http_headers with headers from command line.")
|
|
ydl_opts['http_headers'] = {}
|
|
for header in args.add_header:
|
|
if ':' in header:
|
|
key, value = header.split(':', 1)
|
|
ydl_opts['http_headers'][key.strip()] = value.strip()
|
|
else:
|
|
logger.error(f"Invalid header format: '{header}'. Expected 'Key: Value'.")
|
|
param_sources['http_headers'] = "CLI Explicit (Merged)"
|
|
|
|
if args.download_continue:
|
|
ydl_opts['continuedl'] = True
|
|
ydl_opts['nooverwrites'] = True
|
|
param_sources['continuedl'] = "CLI Explicit"
|
|
param_sources['nooverwrites'] = "CLI Explicit"
|
|
|
|
if proxy_url:
|
|
ydl_opts['proxy'] = proxy_url
|
|
param_sources['proxy'] = "CLI Explicit"
|
|
|
|
if args.downloader:
|
|
ydl_opts['downloader'] = {args.downloader: None}
|
|
param_sources['downloader'] = "CLI Explicit"
|
|
if args.downloader_args:
|
|
try:
|
|
downloader_name, args_str = args.downloader_args.split(':', 1)
|
|
ydl_opts.setdefault('downloader_args', {})[downloader_name] = shlex.split(args_str)
|
|
param_sources['downloader_args'] = "CLI Explicit"
|
|
except ValueError:
|
|
logger.error(f"Invalid --downloader-args format. Expected 'downloader:args'.")
|
|
return 1
|
|
|
|
if args.merge_output_format:
|
|
ydl_opts['merge_output_format'] = args.merge_output_format
|
|
param_sources['merge_output_format'] = "CLI Explicit"
|
|
if args.download_sections:
|
|
ydl_opts['download_sections'] = args.download_sections
|
|
param_sources['download_sections'] = "CLI Explicit"
|
|
if args.test:
|
|
ydl_opts['test'] = True
|
|
param_sources['test'] = "CLI Explicit"
|
|
if args.retries is not None:
|
|
ydl_opts['retries'] = args.retries
|
|
param_sources['retries'] = "CLI Explicit"
|
|
if args.fragment_retries is not None:
|
|
ydl_opts['fragment_retries'] = args.fragment_retries
|
|
param_sources['fragment_retries'] = "CLI Explicit"
|
|
if args.socket_timeout is not None:
|
|
ydl_opts['socket_timeout'] = args.socket_timeout
|
|
param_sources['socket_timeout'] = "CLI Explicit"
|
|
if args.concurrent_fragments is not None:
|
|
ydl_opts['concurrent_fragments'] = args.concurrent_fragments
|
|
param_sources['concurrent_fragments'] = "CLI Explicit"
|
|
|
|
# To prevent timeouts on slow connections, ensure progress reporting is not disabled.
|
|
# The CLI wrapper enables this by default, so we match its behavior for robustness.
|
|
if ydl_opts.get('noprogress'):
|
|
logger.info("Overriding 'noprogress' option. Progress reporting is enabled to prevent network timeouts.")
|
|
ydl_opts['noprogress'] = False
|
|
param_sources['noprogress'] = "Internal Override"
|
|
|
|
# Ensure byte-size options are integers for library use
|
|
try:
|
|
from yt_dlp.utils import parse_bytes
|
|
if 'buffersize' in ydl_opts and isinstance(ydl_opts['buffersize'], str):
|
|
ydl_opts['buffersize'] = parse_bytes(ydl_opts['buffersize'])
|
|
param_sources['buffersize'] = param_sources.get('buffersize', 'Unknown') + ' (Parsed)'
|
|
except (ImportError, Exception) as e:
|
|
logger.warning(f"Could not parse 'buffersize' option: {e}")
|
|
|
|
# Force skip_download to False, as this script's purpose is to download.
|
|
if ydl_opts.get('skip_download'):
|
|
logger.info("Overriding 'skip_download: true' from config. This tool is for downloading.")
|
|
ydl_opts['skip_download'] = False
|
|
param_sources['skip_download'] = "Internal Override"
|
|
|
|
# Log final effective options with sources
|
|
logger.info("=== Final Effective yt-dlp Options (base) ===")
|
|
for k in sorted(ydl_opts.keys()):
|
|
v = ydl_opts[k]
|
|
src = param_sources.get(k, "Unknown")
|
|
if k in ['logger', 'progress_hooks']: continue
|
|
logger.info(f" {k}: {v} [Source: {src}]")
|
|
|
|
# --- Parse and process the format string ---
|
|
requested_format = args.format
|
|
available_formats = [str(f['format_id']) for f in info_data.get('formats', []) if 'format_id' in f]
|
|
|
|
# Determine what kind of format string we have
|
|
# Keywords that yt-dlp treats as special selectors
|
|
selector_keywords = ('best', 'worst', 'bestvideo', 'bestaudio')
|
|
|
|
# Split by comma to get individual format requests
|
|
# Each item could be a simple format ID or a fallback chain (with /)
|
|
format_items = [f.strip() for f in requested_format.split(',') if f.strip()]
|
|
|
|
logger.info(f"Format string '{requested_format}' parsed into {len(format_items)} item(s): {format_items}")
|
|
|
|
# Process each format item
|
|
all_success = True
|
|
final_filename = None
|
|
|
|
for format_item in format_items:
|
|
logger.info(f"--- Processing format item: '{format_item}' ---")
|
|
|
|
# Check if this specific item is a simple format ID or a complex selector
|
|
item_has_complex_syntax = any(c in format_item for c in '/+[]()') or format_item.startswith(selector_keywords)
|
|
|
|
if item_has_complex_syntax:
|
|
# This is a complex selector like "299/298/137" or "bestvideo+bestaudio"
|
|
# We need to handle fallback chains specially
|
|
|
|
if '/' in format_item and '+' not in format_item:
|
|
# This is a fallback chain like "299/298/137"
|
|
# Try each format in order until one succeeds
|
|
fallback_formats = [f.strip() for f in format_item.split('/') if f.strip()]
|
|
logger.info(f"Detected fallback chain with {len(fallback_formats)} options: {fallback_formats}")
|
|
|
|
item_success = False
|
|
for fallback_fmt in fallback_formats:
|
|
# Check if this fallback is a simple format ID that exists
|
|
if fallback_fmt in available_formats:
|
|
logger.info(f"Trying fallback format '{fallback_fmt}'...")
|
|
success, ytdlp_logger = _download_single_format(fallback_fmt, info_data, ydl_opts, args)
|
|
if success:
|
|
item_success = True
|
|
if ytdlp_logger.final_filename:
|
|
final_filename = ytdlp_logger.final_filename
|
|
break
|
|
else:
|
|
logger.warning(f"Fallback format '{fallback_fmt}' failed, trying next...")
|
|
else:
|
|
# Try to find a matching format with a suffix (e.g., "140" matches "140-0")
|
|
prefix_match_re = re.compile(rf'^{re.escape(fallback_fmt)}-\d+$')
|
|
first_match = next((af for af in available_formats if prefix_match_re.match(af)), None)
|
|
|
|
if first_match:
|
|
logger.info(f"Fallback format '{fallback_fmt}' not found exactly. Using match: '{first_match}'...")
|
|
success, ytdlp_logger = _download_single_format(first_match, info_data, ydl_opts, args)
|
|
if success:
|
|
item_success = True
|
|
if ytdlp_logger.final_filename:
|
|
final_filename = ytdlp_logger.final_filename
|
|
break
|
|
else:
|
|
logger.warning(f"Fallback format '{first_match}' failed, trying next...")
|
|
else:
|
|
logger.warning(f"Fallback format '{fallback_fmt}' not available, trying next...")
|
|
|
|
if not item_success:
|
|
logger.error(f"All fallback formats in '{format_item}' failed or were unavailable.")
|
|
all_success = False
|
|
else:
|
|
# This is a merge request or other complex selector
|
|
# We can't safely filter for these, so we pass through to yt-dlp
|
|
# but warn the user
|
|
logger.warning(f"Complex format selector '{format_item}' detected. Cannot prevent auto-merge for this type.")
|
|
logger.warning("If you experience merge errors, try specifying simple format IDs separated by commas.")
|
|
|
|
# Use the original yt-dlp behavior for complex selectors
|
|
ytdlp_logger = YTDLPLogger()
|
|
local_ydl_opts = dict(ydl_opts)
|
|
local_ydl_opts['format'] = format_item
|
|
local_ydl_opts['logger'] = ytdlp_logger
|
|
local_ydl_opts['progress_hooks'] = [lambda d, yl=ytdlp_logger: ytdlp_progress_hook(d, yl)]
|
|
|
|
try:
|
|
download_buffer = None
|
|
if args.output_buffer:
|
|
download_buffer = io.BytesIO()
|
|
ctx_mgr = contextlib.redirect_stdout(download_buffer)
|
|
else:
|
|
ctx_mgr = contextlib.nullcontext()
|
|
|
|
with ctx_mgr, yt_dlp.YoutubeDL(local_ydl_opts) as ydl:
|
|
ydl.process_ie_result(copy.deepcopy(info_data))
|
|
|
|
if ytdlp_logger.has_errors:
|
|
logger.error(f"Download of '{format_item}' failed.")
|
|
all_success = False
|
|
else:
|
|
if ytdlp_logger.final_filename:
|
|
final_filename = ytdlp_logger.final_filename
|
|
if args.output_buffer and download_buffer:
|
|
sys.stdout.buffer.write(download_buffer.getvalue())
|
|
sys.stdout.buffer.flush()
|
|
|
|
except yt_dlp.utils.DownloadError as e:
|
|
logger.error(f"yt-dlp DownloadError for '{format_item}': {e}")
|
|
all_success = False
|
|
except Exception as e:
|
|
logger.exception(f"Unexpected error downloading '{format_item}': {e}")
|
|
all_success = False
|
|
else:
|
|
# This is a simple format ID like "299-dashy" or "140"
|
|
# Check if it exists in available formats
|
|
if format_item in available_formats:
|
|
success, ytdlp_logger = _download_single_format(format_item, info_data, ydl_opts, args)
|
|
if success:
|
|
if ytdlp_logger.final_filename:
|
|
final_filename = ytdlp_logger.final_filename
|
|
else:
|
|
all_success = False
|
|
else:
|
|
# Try to find a matching format with a suffix (e.g., "140" matches "140-0")
|
|
prefix_match_re = re.compile(rf'^{re.escape(format_item)}-\d+$')
|
|
first_match = next((af for af in available_formats if prefix_match_re.match(af)), None)
|
|
|
|
if first_match:
|
|
logger.info(f"Requested format '{format_item}' not found. Using first available match: '{first_match}'.")
|
|
success, ytdlp_logger = _download_single_format(first_match, info_data, ydl_opts, args)
|
|
if success:
|
|
if ytdlp_logger.final_filename:
|
|
final_filename = ytdlp_logger.final_filename
|
|
else:
|
|
all_success = False
|
|
else:
|
|
logger.error(f"Requested format '{format_item}' not found in available formats: {available_formats}")
|
|
all_success = False
|
|
|
|
# Final output
|
|
if all_success:
|
|
logger.info("All format downloads completed successfully.")
|
|
if final_filename:
|
|
print(final_filename, file=sys.stdout)
|
|
|
|
if args.cleanup and final_filename and os.path.exists(final_filename):
|
|
try:
|
|
logger.info(f"Cleanup: Renaming and truncating '{final_filename}'")
|
|
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
|
directory, original_filename = os.path.split(final_filename)
|
|
filename_base, filename_ext = os.path.splitext(original_filename)
|
|
new_filename = f"{filename_base}_{timestamp}{filename_ext}.empty"
|
|
new_filepath = os.path.join(directory, new_filename)
|
|
os.rename(final_filename, new_filepath)
|
|
logger.info(f"Renamed to '{new_filepath}'")
|
|
with open(new_filepath, 'w') as f:
|
|
pass
|
|
logger.info(f"Truncated '{new_filepath}' to 0 bytes.")
|
|
except Exception as e:
|
|
logger.error(f"Cleanup failed: {e}")
|
|
return 1
|
|
|
|
return 0
|
|
else:
|
|
logger.error("One or more format downloads failed.")
|
|
return 1
|