780 lines
38 KiB
Python
780 lines
38 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Tool to send a download to an aria2c daemon via RPC.
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import logging
|
|
import sys
|
|
import os
|
|
import glob
|
|
import shutil
|
|
import re
|
|
import shlex
|
|
import threading
|
|
import time
|
|
from urllib.parse import urljoin
|
|
|
|
try:
|
|
import aria2p
|
|
from aria2p.utils import human_readable_bytes
|
|
import yt_dlp
|
|
except ImportError:
|
|
print("aria2p or yt-dlp is not installed. Please install them with: pip install aria2p yt-dlp", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
logger = logging.getLogger('download_aria_tool')
|
|
|
|
class TimeoutError(Exception):
|
|
pass
|
|
|
|
|
|
def add_download_aria_parser(subparsers):
|
|
"""Add the parser for the 'download aria-rpc' command."""
|
|
parser = subparsers.add_parser(
|
|
'aria-rpc',
|
|
description='Send a download to an aria2c daemon via RPC, using an info.json from stdin or a file.',
|
|
formatter_class=argparse.RawTextHelpFormatter,
|
|
help='Download a specific format using aria2c RPC.',
|
|
epilog="""
|
|
Usage Notes for Fragmented Downloads (e.g., DASH):
|
|
|
|
To download and automatically merge fragmented formats, you must:
|
|
1. Use '--wait' to make the operation synchronous.
|
|
2. Use '--auto-merge-fragments' to enable the merge logic.
|
|
3. Ensure this script has access to the directory where aria2c saves files.
|
|
|
|
Example for a remote aria2c daemon:
|
|
- The remote daemon saves files to '/srv/downloads' on its machine.
|
|
- This directory is mounted locally at '/mnt/remote_aria2_downloads'.
|
|
|
|
cat latest-info.json | yt-ops-client download aria-rpc -f "299/137" \\
|
|
--wait --auto-merge-fragments \\
|
|
--remote-dir /srv/downloads \\
|
|
--fragments-dir /mnt/remote_aria2_downloads
|
|
"""
|
|
)
|
|
parser.add_argument('--load-info-json', type=argparse.FileType('r', encoding='utf-8'), help="Path to the info.json file. If not provided, reads from stdin.")
|
|
parser.add_argument('-f', '--format', required=True, help='The format ID to download. Supports yt-dlp style format selectors (e.g., "137/136,140").')
|
|
parser.add_argument('--output-dir', help='Local directory to save the final merged file. Defaults to the current directory.')
|
|
parser.add_argument('--fragments-dir', help='The local path where this script should look for downloaded fragments. If the aria2c daemon is remote, this should be a local mount point corresponding to --remote-dir. Defaults to --output-dir.')
|
|
parser.add_argument('--remote-dir', help='The absolute path to the download directory on the remote aria2c host. This is passed via RPC.')
|
|
parser.add_argument('--aria-host', default='localhost', help='The host of the aria2c RPC server. Default: localhost.')
|
|
parser.add_argument('--aria-port', type=int, default=6800, help='The port of the aria2c RPC server. Default: 6800.')
|
|
parser.add_argument('--aria-secret', help='The secret token for the aria2c RPC server (often required, e.g., "SQGCQPLVFQIASMPNPOJYLVGJYLMIDIXDXAIXOTX").')
|
|
parser.add_argument('--proxy', help='Proxy to use for the download, e.g., "socks5://127.0.0.1:1080". This sets the "all-proxy" option in aria2c.')
|
|
parser.add_argument('--downloader-args', help='Arguments for aria2c, in yt-dlp format (e.g., "aria2c:[-x 8, -k 1M]").')
|
|
parser.add_argument('--wait', action='store_true', help='Wait for the download to complete and report its status. Note: This makes the operation synchronous and will block until the download finishes.')
|
|
parser.add_argument('--wait-timeout', help='Timeout in seconds for waiting on downloads. Use "auto" to calculate based on a minimum speed of 200KiB/s. Requires --wait. Default: no timeout.')
|
|
parser.add_argument('--max-concurrent-fragments', type=int, default=8, help='Maximum number of fragments to download concurrently when using --wait. Mimics aria2c\'s -j option. Default: 8.')
|
|
parser.add_argument('--auto-merge-fragments', action='store_true', help='Automatically merge fragments after download. Requires --wait and assumes the script has filesystem access to the aria2c host.')
|
|
parser.add_argument('--remove-fragments-after-merge', action='store_true', help='Delete individual fragment files after a successful merge. Requires --auto-merge-fragments.')
|
|
parser.add_argument('--cleanup', action='store_true', help='After a successful download, remove the final file(s) from the filesystem. For fragmented downloads, this implies --remove-fragments-after-merge.')
|
|
parser.add_argument('--remove-on-complete', action=argparse.BooleanOptionalAction, default=True, help='Remove the download from aria2c history on successful completion. Use --no-remove-on-complete to disable. May fail on older aria2c daemons.')
|
|
parser.add_argument('--purge-on-complete', action='store_true', help='Use aria2.purgeDownloadResult to clear ALL completed/failed downloads from history on success. Use as a workaround for older daemons.')
|
|
parser.add_argument('--add-header', action='append', help='Add a custom HTTP header for the download. Format: "Key: Value". Can be used multiple times.')
|
|
parser.add_argument('--user-agent', help='Specify a custom User-Agent. Overrides any User-Agent from info.json, --add-header, or the default.')
|
|
parser.add_argument('--verbose', action='store_true', help='Enable verbose output for this script.')
|
|
return parser
|
|
|
|
def cleanup_aria_download(api, downloads):
|
|
"""Pause and remove downloads from aria2c."""
|
|
if not downloads:
|
|
return
|
|
try:
|
|
logger.info(f"Attempting to clean up {len(downloads)} download(s) from aria2c...")
|
|
# Filter out downloads that might already be gone
|
|
valid_downloads = [d for d in downloads if hasattr(d, 'gid')]
|
|
if not valid_downloads:
|
|
logger.info("No valid downloads to clean up.")
|
|
return
|
|
api.pause(valid_downloads)
|
|
# Give aria2c a moment to process the pause command before removing
|
|
time.sleep(0.5)
|
|
api.remove(valid_downloads)
|
|
logger.info("Cleanup successful.")
|
|
except Exception as e:
|
|
logger.warning(f"An error occurred during aria2c cleanup: {e}")
|
|
|
|
|
|
def parse_aria_error(download):
|
|
"""Parses an aria2p Download object to get a detailed error message."""
|
|
error_code = download.error_code
|
|
error_message = download.error_message
|
|
|
|
if not error_message:
|
|
return f"Unknown aria2c error (Code: {error_code})"
|
|
|
|
# Handle specific error codes that provide more context
|
|
if error_code == 24: # Authorization failed
|
|
return f"HTTP Authorization Failed (Error 24). The URL may have expired or requires valid cookies/headers. Raw message: {error_message}"
|
|
|
|
# Check for common HTTP errors in the message
|
|
http_status_match = re.search(r'HTTP status (\d+)', error_message)
|
|
if http_status_match:
|
|
status_code = int(http_status_match.group(1))
|
|
if status_code == 403:
|
|
return f"HTTP Error 403: Forbidden. The URL may have expired or requires valid cookies/headers."
|
|
elif status_code == 404:
|
|
return f"HTTP Error 404: Not Found. The resource is unavailable."
|
|
else:
|
|
return f"HTTP Error {status_code}."
|
|
|
|
if "Timeout" in error_message or "timed out" in error_message.lower():
|
|
return "Download timed out."
|
|
|
|
# Fallback to the raw error message
|
|
return f"Aria2c error (Code: {error_code}): {error_message}"
|
|
|
|
|
|
def parse_aria_args_to_options(args_str):
|
|
"""
|
|
Parses yt-dlp style downloader args for aria2c.
|
|
Example: "aria2c:[-x 8, -k 1M]" or just "-x 8 -k 1M"
|
|
Returns a dictionary of options for aria2p.
|
|
"""
|
|
if not args_str or not args_str.strip():
|
|
return {}
|
|
|
|
inner_args_str = args_str.strip()
|
|
match = re.match(r'aria2c:\s*\[(.*)\]', inner_args_str)
|
|
if match:
|
|
# Handle yt-dlp's format
|
|
inner_args_str = match.group(1).replace(',', ' ')
|
|
else:
|
|
# If it doesn't match, assume the whole string is a set of arguments.
|
|
logger.debug(f"Downloader args '{args_str}' does not match 'aria2c:[...]' format. Parsing as a raw argument string.")
|
|
|
|
arg_list = shlex.split(inner_args_str)
|
|
|
|
# Use a mini-parser to handle CLI-style args
|
|
parser = argparse.ArgumentParser(add_help=False, prog="aria2c_args_parser")
|
|
parser.add_argument('-x', '--max-connection-per-server')
|
|
parser.add_argument('-k', '--min-split-size')
|
|
parser.add_argument('-s', '--split')
|
|
parser.add_argument('--http-proxy')
|
|
parser.add_argument('--https-proxy')
|
|
parser.add_argument('--all-proxy')
|
|
|
|
try:
|
|
# We only care about known arguments
|
|
known_args, unknown_args = parser.parse_known_args(arg_list)
|
|
if unknown_args:
|
|
logger.warning(f"Ignoring unknown arguments in --downloader-args: {unknown_args}")
|
|
# Convert to dict, removing None values.
|
|
# Convert to dict, removing None values, and converting underscores back to hyphens
|
|
# to match the option format expected by aria2c's RPC interface.
|
|
return {k.replace('_', '-'): v for k, v in vars(known_args).items() if v is not None}
|
|
except Exception:
|
|
logger.warning(f"Failed to parse arguments inside --downloader-args: '{inner_args_str}'")
|
|
return {}
|
|
|
|
|
|
def main_download_aria(args):
|
|
"""Main logic for the 'download-aria' command."""
|
|
log_level = logging.DEBUG if args.verbose else logging.INFO
|
|
# Reconfigure root logger to ensure our settings are applied.
|
|
for handler in logging.root.handlers[:]:
|
|
logging.root.removeHandler(handler)
|
|
logging.basicConfig(level=log_level, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', stream=sys.stderr)
|
|
|
|
if args.remove_fragments_after_merge and not args.auto_merge_fragments:
|
|
logger.error("--remove-fragments-after-merge requires --auto-merge-fragments.")
|
|
return 1
|
|
if args.auto_merge_fragments and not args.wait:
|
|
logger.error("--auto-merge-fragments requires --wait.")
|
|
return 1
|
|
if args.wait_timeout and not args.wait:
|
|
logger.error("--wait-timeout requires --wait.")
|
|
return 1
|
|
|
|
if args.wait:
|
|
logger.info("Will wait for download to complete and report status. This is a synchronous operation.")
|
|
else:
|
|
logger.info("Will submit download and exit immediately (asynchronous).")
|
|
|
|
info_json_content = ""
|
|
input_source_name = ""
|
|
if args.load_info_json:
|
|
info_json_content = args.load_info_json.read()
|
|
input_source_name = args.load_info_json.name
|
|
else:
|
|
info_json_content = sys.stdin.read()
|
|
input_source_name = "stdin"
|
|
|
|
if not info_json_content.strip():
|
|
logger.error(f"Failed to read info.json from {input_source_name}. Input is empty.")
|
|
return 1
|
|
|
|
try:
|
|
info_data = json.loads(info_json_content)
|
|
logger.info(f"Successfully loaded info.json from {input_source_name}.")
|
|
except json.JSONDecodeError:
|
|
logger.error(f"Failed to parse info.json from {input_source_name}. Is the input valid JSON?")
|
|
return 1
|
|
|
|
# Find the requested format using yt-dlp's own selection logic
|
|
try:
|
|
# We don't need a full ydl instance, just the format selection logic.
|
|
ydl = yt_dlp.YoutubeDL({'quiet': True, 'logger': logger, 'format': args.format})
|
|
formats = info_data.get('formats', [])
|
|
selector = ydl.build_format_selector(args.format)
|
|
ctx = {
|
|
'formats': formats,
|
|
'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
|
|
'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats)
|
|
or all(f.get('acodec') == 'none' for f in formats)),
|
|
}
|
|
selected_formats = list(selector(ctx))
|
|
except Exception as e:
|
|
logger.error(f"Failed to select format with selector '{args.format}': {e}", exc_info=args.verbose)
|
|
return 1
|
|
|
|
if not selected_formats:
|
|
logger.error(f"No suitable format found for selector '{args.format}' in info.json.")
|
|
return 1
|
|
|
|
# The selector might return multiple results if ',' is used. We'll process the first one.
|
|
target_format = selected_formats[0]
|
|
if len(selected_formats) > 1:
|
|
logger.warning(f"Format selector '{args.format}' resolved to multiple format combinations. Only the first one will be downloaded.")
|
|
|
|
formats_to_download = target_format.get('requested_formats', [target_format])
|
|
if len(formats_to_download) > 1:
|
|
logger.warning(
|
|
f"The selected format is a combination of {len(formats_to_download)} streams. "
|
|
f"This tool does not support merging separate video/audio streams. "
|
|
f"Only the first stream (format_id: {formats_to_download[0].get('format_id')}) will be downloaded. "
|
|
f"To download all streams, please specify their format IDs separately."
|
|
)
|
|
|
|
target_format = formats_to_download[0]
|
|
|
|
# Get file size for auto-timeout and dynamic options
|
|
total_filesize = target_format.get('filesize') or target_format.get('filesize_approx')
|
|
|
|
# Construct filename
|
|
video_id = info_data.get('id', 'unknown_video_id')
|
|
title = info_data.get('title', 'unknown_title')
|
|
ext = target_format.get('ext', 'mp4')
|
|
# Sanitize title for filename
|
|
safe_title = "".join([c for c in title if c.isalpha() or c.isdigit() or c in (' ', '-', '_')]).rstrip()
|
|
filename = f"{safe_title} [{video_id}].f{target_format['format_id']}.{ext}"
|
|
|
|
# Prepare options for aria2
|
|
aria_options = {
|
|
# Options from yt-dlp's aria2c integration for performance and reliability
|
|
'continue': 'true',
|
|
'max-connection-per-server': 16,
|
|
'split': 16,
|
|
'http-accept-gzip': 'true',
|
|
'file-allocation': 'none',
|
|
}
|
|
|
|
if args.proxy:
|
|
aria_options['all-proxy'] = args.proxy
|
|
|
|
custom_options = parse_aria_args_to_options(args.downloader_args)
|
|
|
|
# Set min-split-size. yt-dlp's default is 1M.
|
|
if 'min-split-size' not in custom_options:
|
|
if total_filesize and total_filesize > 100 * 1024 * 1024: # 100 MiB
|
|
aria_options['min-split-size'] = '5M'
|
|
logger.info("File is > 100MiB, dynamically setting min-split-size to 5M.")
|
|
else:
|
|
aria_options['min-split-size'] = '1M'
|
|
|
|
if custom_options:
|
|
aria_options.update(custom_options)
|
|
logger.info(f"Applied custom aria2c options from --downloader-args: {custom_options}")
|
|
|
|
# For older aria2c versions, SOCKS5 proxy must be specified with an 'http://' scheme.
|
|
if 'all-proxy' in aria_options and isinstance(aria_options['all-proxy'], str) and aria_options['all-proxy'].startswith('socks5://'):
|
|
proxy_url = aria_options['all-proxy']
|
|
logger.info("Replacing 'socks5://' with 'http://' in proxy URL for aria2c compatibility.")
|
|
aria_options['all-proxy'] = 'http://' + proxy_url[len('socks5://'):]
|
|
|
|
aria_options['out'] = filename
|
|
|
|
# Add headers from info.json, and allow overriding/adding with --add-header
|
|
headers = target_format.get('http_headers', {}).copy()
|
|
|
|
if args.add_header:
|
|
for header in args.add_header:
|
|
if ':' not in header:
|
|
logger.error(f"Invalid header format in --add-header: '{header}'. Expected 'Key: Value'.")
|
|
return 1
|
|
key, value = header.split(':', 1)
|
|
key = key.strip()
|
|
value = value.strip()
|
|
if key in headers:
|
|
logger.info(f"Overwriting header '{key}' from info.json with value from command line.")
|
|
else:
|
|
logger.info(f"Adding header from command line: {key}: {value}")
|
|
headers[key] = value
|
|
|
|
# Enforce a consistent User-Agent.
|
|
# First, remove any User-Agent that might have come from info.json, case-insensitively.
|
|
for key in list(headers.keys()):
|
|
if key.lower() == 'user-agent':
|
|
del headers[key]
|
|
|
|
# Set the default Cobalt User-Agent.
|
|
default_user_agent = 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/Version'
|
|
headers['User-Agent'] = default_user_agent
|
|
logger.info(f"Set default User-Agent to: {default_user_agent}")
|
|
|
|
# The --user-agent flag has the highest precedence and can override the default.
|
|
if args.user_agent:
|
|
headers['User-Agent'] = args.user_agent
|
|
logger.info(f"Overriding User-Agent with value from --user-agent: {args.user_agent}")
|
|
|
|
if headers:
|
|
header_list = [f'{key}: {value}' for key, value in headers.items()]
|
|
aria_options['header'] = header_list
|
|
logger.info(f"Adding {len(header_list)} HTTP headers to the download.")
|
|
if args.verbose:
|
|
for h in header_list:
|
|
if h.lower().startswith('cookie:'):
|
|
logger.debug(f" Header: Cookie: [REDACTED]")
|
|
else:
|
|
logger.debug(f" Header: {h}")
|
|
|
|
# Final check: ensure all option values are strings, as required by aria2c RPC.
|
|
# The 'header' option is a list of strings, which is a special case and should be preserved.
|
|
for key, value in aria_options.items():
|
|
if key != 'header' and not isinstance(value, str):
|
|
aria_options[key] = str(value)
|
|
|
|
is_fragmented = 'fragments' in target_format
|
|
if not is_fragmented:
|
|
url = target_format.get('url')
|
|
if not url:
|
|
logger.error(f"Format ID '{args.format}' has neither a URL nor fragments.")
|
|
return 1
|
|
|
|
try:
|
|
logger.info(f"Connecting to aria2c RPC at http://{args.aria_host}:{args.aria_port}")
|
|
client = aria2p.Client(
|
|
host=f"http://{args.aria_host}",
|
|
port=args.aria_port,
|
|
secret=args.aria_secret or ""
|
|
)
|
|
api = aria2p.API(client)
|
|
|
|
timeout_seconds = None
|
|
if args.wait_timeout:
|
|
if args.wait_timeout.lower() == 'auto':
|
|
if total_filesize:
|
|
# Min speed: 200 KiB/s. Min timeout: 30s.
|
|
min_speed = 200 * 1024
|
|
calculated_timeout = int(total_filesize / min_speed)
|
|
timeout_seconds = max(30, calculated_timeout)
|
|
total_filesize_hr, _ = human_readable_bytes(total_filesize)
|
|
logger.info(f"Auto-calculated timeout: {timeout_seconds}s (based on {total_filesize_hr} at 200KiB/s).")
|
|
else:
|
|
logger.warning("Cannot use 'auto' timeout: file size not available in info.json. Timeout disabled.")
|
|
else:
|
|
try:
|
|
timeout_seconds = int(args.wait_timeout)
|
|
if timeout_seconds <= 0:
|
|
raise ValueError
|
|
except ValueError:
|
|
logger.error(f"Invalid --wait-timeout value: '{args.wait_timeout}'. Must be a positive integer or 'auto'.")
|
|
return 1
|
|
|
|
# Determine the download directory for aria2c.
|
|
# If --remote-dir is specified, it takes precedence.
|
|
# Otherwise, assume a local setup and use --output-dir.
|
|
# It's crucial to use an absolute path to avoid ambiguity for the aria2c daemon.
|
|
download_dir_for_aria = args.remote_dir
|
|
if not download_dir_for_aria:
|
|
local_dir = args.output_dir or '.'
|
|
download_dir_for_aria = os.path.abspath(local_dir)
|
|
logger.info(f"No --remote-dir specified. Using local path for aria2c download directory: {download_dir_for_aria}")
|
|
|
|
if is_fragmented:
|
|
return download_fragments_aria(args, api, target_format, filename, aria_options, timeout_seconds, remote_dir=download_dir_for_aria)
|
|
else:
|
|
return download_url_aria(args, api, url, filename, aria_options, timeout_seconds, remote_dir=download_dir_for_aria)
|
|
|
|
except Exception as e:
|
|
logger.error(f"An error occurred while communicating with aria2c: {e}", exc_info=args.verbose)
|
|
return 1
|
|
|
|
def download_url_aria(args, api, url, filename, aria_options, timeout_seconds, remote_dir=None):
|
|
"""Handle downloading a single URL with aria2c."""
|
|
if remote_dir:
|
|
aria_options['dir'] = remote_dir
|
|
logger.info(f"Adding download for format '{args.format}' with URL: {url[:70]}...")
|
|
downloads = api.add_uris([url], options=aria_options)
|
|
|
|
if not downloads:
|
|
logger.error("Failed to add download to aria2c. The API returned an empty result.")
|
|
return 1
|
|
|
|
download = downloads[0] if isinstance(downloads, list) else downloads
|
|
logger.info(f"Successfully added download to aria2c. GID: {download.gid}")
|
|
|
|
if args.wait:
|
|
logger.info(f"Waiting for download {download.gid} to complete using WebSocket events...")
|
|
download_finished_event = threading.Event()
|
|
final_status = {}
|
|
|
|
def on_complete(api_ref, event_gid):
|
|
if event_gid == download.gid:
|
|
logger.debug(f"WebSocket: GID {event_gid} completed.")
|
|
final_status['status'] = 'complete'
|
|
download_finished_event.set()
|
|
|
|
def on_error(api_ref, event_gid):
|
|
if event_gid == download.gid:
|
|
logger.debug(f"WebSocket: GID {event_gid} errored.")
|
|
final_status['status'] = 'error'
|
|
download_finished_event.set()
|
|
|
|
def on_stop(api_ref, event_gid):
|
|
if event_gid == download.gid:
|
|
logger.debug(f"WebSocket: GID {event_gid} stopped.")
|
|
final_status['status'] = 'stopped'
|
|
download_finished_event.set()
|
|
|
|
listener_thread = threading.Thread(
|
|
target=api.listen_to_notifications,
|
|
kwargs={
|
|
'on_download_complete': on_complete,
|
|
'on_download_error': on_error,
|
|
'on_download_stop': on_stop,
|
|
'timeout': 1,
|
|
'handle_signals': False
|
|
},
|
|
daemon=True
|
|
)
|
|
|
|
try:
|
|
listener_thread.start()
|
|
finished = download_finished_event.wait(timeout=timeout_seconds)
|
|
if not finished and not download_finished_event.is_set():
|
|
raise TimeoutError(f"Download did not complete within {timeout_seconds}s timeout.")
|
|
except KeyboardInterrupt:
|
|
sys.stdout.write('\n')
|
|
logger.warning("Wait interrupted by user. Cleaning up download...")
|
|
cleanup_aria_download(api, [download])
|
|
return 130
|
|
except TimeoutError as e:
|
|
logger.error(f"Download timed out. Cleaning up... Error: {e}")
|
|
cleanup_aria_download(api, [download])
|
|
return 1
|
|
finally:
|
|
api.stop_listening()
|
|
if listener_thread.is_alive():
|
|
listener_thread.join(timeout=2)
|
|
|
|
# Re-fetch download object to get final details
|
|
try:
|
|
download.update()
|
|
except aria2p.ClientException as e:
|
|
logger.warning(f"Could not update final status for GID {download.gid} (maybe removed on completion?): {e}.")
|
|
if final_status.get('status') != 'complete':
|
|
logger.error(f"Download {download.gid} failed, but could not retrieve final error details.")
|
|
return 1
|
|
|
|
if final_status.get('status') == 'complete':
|
|
logger.info(f"Download {download.gid} completed successfully.")
|
|
downloaded_filepath_remote = download.files[0].path if download.files else None
|
|
if downloaded_filepath_remote:
|
|
print(f"Download successful: {downloaded_filepath_remote}")
|
|
else:
|
|
print("Download successful, but no file path reported by aria2c.")
|
|
|
|
if args.cleanup and downloaded_filepath_remote:
|
|
local_base_dir = args.fragments_dir or args.output_dir or '.'
|
|
if remote_dir and downloaded_filepath_remote.startswith(remote_dir):
|
|
relative_path = os.path.relpath(downloaded_filepath_remote, remote_dir)
|
|
local_filepath = os.path.join(local_base_dir, relative_path)
|
|
else:
|
|
local_filepath = downloaded_filepath_remote
|
|
if not remote_dir:
|
|
logger.warning(f"Cleanup: --remote-dir not specified. Assuming download path is accessible locally as '{local_filepath}'.")
|
|
|
|
try:
|
|
if os.path.exists(local_filepath):
|
|
os.remove(local_filepath)
|
|
logger.info(f"Cleanup: Removed downloaded file '{local_filepath}'")
|
|
else:
|
|
logger.warning(f"Cleanup: File not found at expected local path '{local_filepath}'. Skipping removal.")
|
|
except OSError as e:
|
|
logger.error(f"Cleanup failed: Could not remove file '{local_filepath}': {e}")
|
|
elif args.cleanup:
|
|
logger.warning("Cleanup requested, but no downloaded file path was reported by aria2c.")
|
|
|
|
if args.purge_on_complete:
|
|
try:
|
|
api.purge_download_result()
|
|
logger.info("Purged all completed/failed downloads from aria2c history.")
|
|
except Exception as e:
|
|
logger.warning(f"Failed to purge download history: {e}")
|
|
elif args.remove_on_complete:
|
|
try:
|
|
api.client.remove_download_result(download.gid)
|
|
logger.info(f"Removed download {download.gid} from aria2c history.")
|
|
except Exception as e:
|
|
logger.warning(f"Failed to remove download {download.gid} from history: {e}")
|
|
return 0
|
|
else:
|
|
detailed_error = parse_aria_error(download)
|
|
logger.error(f"Download {download.gid} failed. Error: {detailed_error}")
|
|
return 1
|
|
else:
|
|
print(f"Successfully added download. GID: {download.gid}")
|
|
return 0
|
|
|
|
def download_fragments_aria(args, api, target_format, filename, aria_options, timeout_seconds, remote_dir=None):
|
|
"""Handle downloading fragmented formats with aria2c."""
|
|
logger.info(f"Format '{args.format}' is fragmented. Adding all fragments to download queue.")
|
|
fragment_base_url = target_format.get('fragment_base_url')
|
|
fragments = target_format['fragments']
|
|
|
|
MAX_FRAGMENTS = 50000
|
|
if len(fragments) > MAX_FRAGMENTS:
|
|
logger.error(
|
|
f"The number of fragments ({len(fragments)}) exceeds the safety limit of {MAX_FRAGMENTS}. "
|
|
f"This is to prevent overwhelming the aria2c server. Aborting."
|
|
)
|
|
return 1
|
|
|
|
frag_aria_options = aria_options.copy()
|
|
frag_aria_options.pop('out', None)
|
|
if remote_dir:
|
|
frag_aria_options['dir'] = remote_dir
|
|
logger.info(f"Instructing remote aria2c to save fragments to: {remote_dir}")
|
|
|
|
base_filename, file_ext = os.path.splitext(filename)
|
|
logger.info(f"Preparing {len(fragments)} fragments for a batch submission to aria2c...")
|
|
multicall_payload = []
|
|
for i, fragment in enumerate(fragments):
|
|
frag_url = fragment.get('url') or urljoin(fragment_base_url, fragment['path'])
|
|
if not frag_url:
|
|
logger.error(f"Fragment {i} has no URL and no fragment_base_url is available. Aborting.")
|
|
return 1
|
|
fragment_filename = f"{base_filename}-Frag{i}{file_ext}"
|
|
current_frag_options = frag_aria_options.copy()
|
|
current_frag_options['out'] = os.path.basename(fragment_filename)
|
|
|
|
# The aria2p library will handle adding the secret token to each call in the multicall.
|
|
params = [[frag_url], current_frag_options]
|
|
multicall_payload.append({'methodName': 'aria2.addUri', 'params': params})
|
|
|
|
if not args.wait:
|
|
# Asynchronous mode: submit all fragments at once and exit.
|
|
gids, failed_count = [], 0
|
|
try:
|
|
logger.info(f"Submitting {len(multicall_payload)} fragments to aria2c in a single batch request...")
|
|
# The aria2p client library correctly handles authentication for multicalls.
|
|
results = api.client.multicall(multicall_payload)
|
|
for i, result in enumerate(results):
|
|
if isinstance(result, list) and len(result) == 1 and isinstance(result[0], str):
|
|
gids.append(result[0])
|
|
else:
|
|
failed_count += 1
|
|
logger.warning(f"Failed to add fragment {i + 1}: {result[0] if isinstance(result, list) else result}")
|
|
except Exception as e:
|
|
logger.error(f"Batch submission to aria2c failed: {e}", exc_info=args.verbose)
|
|
return 1
|
|
if failed_count > 0:
|
|
logger.warning(f"{failed_count} out of {len(fragments)} fragments failed to be added to aria2c.")
|
|
if not gids:
|
|
logger.error("Failed to add any fragments to aria2c. All submissions failed.")
|
|
return 1
|
|
print(f"Successfully added {len(gids)} fragments. GIDs: {gids}\nThese fragments will need to be merged manually after download.")
|
|
return 0
|
|
|
|
# Synchronous (--wait) mode with WebSockets
|
|
MAX_CONCURRENT_FRAGMENTS = args.max_concurrent_fragments
|
|
all_gids, failed_submission_count = [], 0
|
|
submitted_gids, completed_gids = set(), set()
|
|
lock = threading.Lock()
|
|
pending_fragments = list(enumerate(multicall_payload))
|
|
total_fragment_count = len(pending_fragments)
|
|
|
|
logger.info(f"Waiting for {total_fragment_count} fragments to complete using WebSocket events...")
|
|
logger.info(f"Will maintain up to {MAX_CONCURRENT_FRAGMENTS} active fragment downloads.")
|
|
|
|
def on_event(api_ref, event_gid):
|
|
with lock:
|
|
if event_gid in submitted_gids:
|
|
completed_gids.add(event_gid)
|
|
|
|
listener_thread = threading.Thread(
|
|
target=api.listen_to_notifications,
|
|
kwargs={'on_download_complete': on_event, 'on_download_error': on_event, 'on_download_stop': on_event, 'timeout': 1, 'handle_signals': False},
|
|
daemon=True
|
|
)
|
|
listener_thread.start()
|
|
start_time = time.time()
|
|
|
|
try:
|
|
while True:
|
|
with lock:
|
|
if len(completed_gids) >= total_fragment_count:
|
|
break
|
|
if timeout_seconds and (time.time() - start_time > timeout_seconds):
|
|
raise TimeoutError(f"Fragment downloads did not complete within {timeout_seconds}s timeout.")
|
|
|
|
with lock:
|
|
active_gids_count = len(submitted_gids) - len(completed_gids)
|
|
num_to_submit = MAX_CONCURRENT_FRAGMENTS - active_gids_count
|
|
|
|
if num_to_submit > 0 and pending_fragments:
|
|
chunk_to_submit = pending_fragments[:num_to_submit]
|
|
pending_fragments = pending_fragments[num_to_submit:]
|
|
indices = [item[0] for item in chunk_to_submit]
|
|
payloads = [item[1] for item in chunk_to_submit]
|
|
try:
|
|
# The aria2p client library correctly handles authentication for multicalls.
|
|
results = api.client.multicall(payloads)
|
|
with lock:
|
|
for i, result in enumerate(results):
|
|
original_index = indices[i]
|
|
if isinstance(result, list) and len(result) == 1 and isinstance(result[0], str):
|
|
gid = result[0]
|
|
all_gids.append(gid)
|
|
submitted_gids.add(gid)
|
|
else:
|
|
failed_submission_count += 1
|
|
completed_gids.add(f"failed-submission-{original_index}")
|
|
logger.warning(f"Failed to add fragment {original_index + 1}: {result[0] if isinstance(result, list) else result}")
|
|
except Exception as e:
|
|
logger.error(f"Batch submission to aria2c failed for a chunk: {e}", exc_info=args.verbose)
|
|
with lock:
|
|
for i in indices:
|
|
failed_submission_count += 1
|
|
completed_gids.add(f"failed-submission-{i}")
|
|
|
|
with lock:
|
|
completed_download_count = len(completed_gids)
|
|
progress_percent = (completed_download_count / total_fragment_count * 100) if total_fragment_count > 0 else 0
|
|
sys.stdout.write(f"\rProgress: {completed_download_count}/{total_fragment_count} fragments | {progress_percent:.1f}%")
|
|
sys.stdout.flush()
|
|
time.sleep(0.5)
|
|
except (KeyboardInterrupt, TimeoutError) as e:
|
|
sys.stdout.write('\n')
|
|
if isinstance(e, KeyboardInterrupt):
|
|
logger.warning("Wait interrupted by user. Cleaning up fragments...")
|
|
else:
|
|
logger.error(f"Download timed out. Cleaning up fragments... Error: {e}")
|
|
cleanup_aria_download(api, api.get_downloads(list(submitted_gids)))
|
|
return 130 if isinstance(e, KeyboardInterrupt) else 1
|
|
finally:
|
|
api.stop_listening()
|
|
if listener_thread.is_alive():
|
|
listener_thread.join(timeout=2)
|
|
|
|
sys.stdout.write('\n')
|
|
if failed_submission_count > 0:
|
|
logger.error(f"{failed_submission_count} fragments failed to be submitted to aria2c.")
|
|
|
|
final_downloads = []
|
|
if all_gids:
|
|
try:
|
|
final_downloads = api.get_downloads(all_gids)
|
|
except aria2p.ClientException as e:
|
|
logger.warning(f"Could not perform final status check for fragments (maybe removed on completion?): {e}. Assuming success.")
|
|
|
|
failed_downloads = [d for d in final_downloads if d.status != 'complete']
|
|
if failed_downloads:
|
|
logger.error(f"{len(failed_downloads)} fragments failed to download.")
|
|
for d in failed_downloads[:5]:
|
|
logger.error(f" GID {d.gid}: {parse_aria_error(d)}")
|
|
if len(failed_downloads) > 5:
|
|
logger.error(f" ... and {len(failed_downloads) - 5} more errors.")
|
|
return 1
|
|
if failed_submission_count > 0:
|
|
logger.error("Aborting due to fragment submission failures.")
|
|
return 1
|
|
|
|
logger.info("All fragments downloaded successfully.")
|
|
output_dir = args.output_dir or '.'
|
|
final_filepath = os.path.join(output_dir, filename)
|
|
fragments_lookup_dir = args.fragments_dir or output_dir
|
|
|
|
if args.auto_merge_fragments:
|
|
logger.info(f"Attempting to merge fragments into: {final_filepath}")
|
|
logger.info(f"Searching for fragments in local directory: {os.path.abspath(fragments_lookup_dir)}")
|
|
try:
|
|
escaped_base = glob.escape(base_filename)
|
|
search_path = os.path.join(fragments_lookup_dir, f"{escaped_base}-Frag*{file_ext}")
|
|
fragment_files = sorted(glob.glob(search_path), key=lambda f: int(re.search(r'Frag(\d+)', os.path.basename(f)).group(1)))
|
|
if not fragment_files:
|
|
logger.error(f"No fragment files found with pattern: {search_path}")
|
|
return 1
|
|
|
|
with open(final_filepath, 'wb') as dest_file:
|
|
for frag_path in fragment_files:
|
|
with open(frag_path, 'rb') as src_file:
|
|
shutil.copyfileobj(src_file, dest_file)
|
|
logger.info(f"Successfully merged {len(fragment_files)} fragments into {final_filepath}")
|
|
|
|
if args.remove_fragments_after_merge or args.cleanup:
|
|
logger.info("Removing fragment files...")
|
|
for frag_path in fragment_files: os.remove(frag_path)
|
|
logger.info("Fragment files removed.")
|
|
if args.cleanup:
|
|
try:
|
|
os.remove(final_filepath)
|
|
logger.info(f"Cleanup: Removed merged file '{final_filepath}'")
|
|
except OSError as e:
|
|
logger.error(f"Cleanup failed: Could not remove merged file '{final_filepath}': {e}")
|
|
|
|
print(f"Download and merge successful: {final_filepath}")
|
|
|
|
if args.purge_on_complete:
|
|
try:
|
|
api.purge_download_result()
|
|
logger.info("Purged all completed/failed downloads from aria2c history.")
|
|
except Exception as e:
|
|
logger.warning(f"Failed to purge download history: {e}")
|
|
elif args.remove_on_complete:
|
|
try:
|
|
for d in final_downloads:
|
|
try: api.client.remove_download_result(d.gid)
|
|
except aria2p.ClientException: pass
|
|
logger.info(f"Removed {len(final_downloads)} fragment downloads from aria2c history.")
|
|
except Exception as e:
|
|
logger.warning(f"Failed to remove fragment downloads from history: {e}")
|
|
return 0
|
|
except Exception as e:
|
|
logger.error(f"An error occurred during merging: {e}", exc_info=args.verbose)
|
|
logger.error("Fragments were downloaded but not merged.")
|
|
return 1
|
|
else:
|
|
print(f"Download successful. Fragments now need to be merged manually.\nThe final merged file should be named: {final_filepath}")
|
|
print(f"You can merge them with a command like:\n cat `ls -v '{os.path.join(fragments_lookup_dir, base_filename)}'-Frag*'{file_ext}'` > '{final_filepath}'")
|
|
if args.cleanup:
|
|
logger.info("Cleanup requested. Removing downloaded fragments...")
|
|
try:
|
|
escaped_base = glob.escape(base_filename)
|
|
search_path = os.path.join(fragments_lookup_dir, f"{escaped_base}-Frag*{file_ext}")
|
|
fragment_files = glob.glob(search_path)
|
|
if not fragment_files:
|
|
logger.warning(f"Cleanup: No fragment files found with pattern: {search_path}")
|
|
else:
|
|
for frag_path in fragment_files: os.remove(frag_path)
|
|
logger.info(f"Removed {len(fragment_files)} fragment files.")
|
|
except Exception as e:
|
|
logger.error(f"An error occurred during fragment cleanup: {e}", exc_info=args.verbose)
|
|
if args.purge_on_complete:
|
|
try:
|
|
api.purge_download_result()
|
|
logger.info("Purged all completed/failed downloads from aria2c history.")
|
|
except Exception as e:
|
|
logger.warning(f"Failed to purge download history: {e}")
|
|
elif args.remove_on_complete:
|
|
try:
|
|
api.remove_download_result(final_downloads)
|
|
logger.info(f"Removed {len(final_downloads)} fragment downloads from aria2c history.")
|
|
except Exception as e:
|
|
logger.warning(f"Could not remove fragment downloads from history (maybe already gone?): {e}")
|
|
return 0
|