yt-dlp-dags/ytops_client/download_tool.py

286 lines
12 KiB
Python

#!/usr/bin/env python3
"""
Tool to download a specified format using an info.json from stdin.
"""
import argparse
import json
import logging
import os
import re
import shlex
import subprocess
import sys
import tempfile
import time
from datetime import datetime
# Configure logging
logger = logging.getLogger('download_tool')
def add_download_parser(subparsers):
"""Add the parser for the 'download cli' command."""
parser = subparsers.add_parser(
'cli',
description='Download using the legacy yt-dlp CLI wrapper. This method invokes yt-dlp as a subprocess.',
formatter_class=argparse.RawTextHelpFormatter,
help='Download using the legacy yt-dlp CLI wrapper.'
)
parser.add_argument('--load-info-json', type=argparse.FileType('r', encoding='utf-8'), help="Path to the info.json file. If not provided, reads from stdin.")
parser.add_argument('-f', '--format', required=True, help='The format selection string to download (e.g., "18", "299/137", "bestvideo+bestaudio").')
parser.add_argument('--output-dir', default='.', help='Directory to save the downloaded file. Defaults to current directory.')
parser.add_argument('--save-info-json-dir', help='If specified, save the info.json received from stdin to this directory with an auto-generated name.')
parser.add_argument('--proxy', help='Proxy to use for the download, e.g., "socks5://127.0.0.1:1080". This option sets the proxy, overriding any value from the info.json.')
parser.add_argument('--proxy-rename', help='Apply sed-style regex substitution to the proxy URL. Format: s/pattern/replacement/')
parser.add_argument('--pause', type=int, default=0, help='Seconds to wait before starting the download.')
parser.add_argument('--print-traffic', action='store_true', help='Print traffic instead of a progress bar.')
parser.add_argument('--download-continue', action='store_true', help='Enable download continuation (--continue and --part flags for yt-dlp).')
parser.add_argument('--verbose', action='store_true', help='Enable verbose output for this script and yt-dlp.')
parser.add_argument('--cli-config', default='cli.config', help='Path to a yt-dlp configuration file. Defaults to "cli.config".')
parser.add_argument('--cleanup', action='store_true', help='After download, rename the file to include a timestamp and truncate it to 0 bytes.')
parser.add_argument('--log-file', help='Append full yt-dlp output to the specified log file.')
parser.add_argument('--yt-dlp-path', default='yt-dlp', help='Path to the yt-dlp executable. Defaults to "yt-dlp" in PATH.')
parser.add_argument('--extra-ytdlp-args', help='A string of extra command-line arguments to pass to yt-dlp.')
parser.add_argument('--downloader', help='Name of the external downloader to use (e.g., "aria2c", "native").')
parser.add_argument('--downloader-args', help='Arguments to pass to the external downloader (e.g., "aria2c:-x 8").')
parser.add_argument('--merge-output-format', help='Container format to merge to (e.g., "mp4", "mkv"). Overrides config file.')
return parser
def main_download(args):
"""Main logic for the 'download' command."""
if args.verbose:
logging.getLogger().setLevel(logging.DEBUG)
if args.pause > 0:
logger.info(f"Pausing for {args.pause} seconds...")
time.sleep(args.pause)
info_json_content = ""
input_source_name = ""
if args.load_info_json:
info_json_content = args.load_info_json.read()
input_source_name = args.load_info_json.name
else:
info_json_content = sys.stdin.read()
input_source_name = "stdin"
if not info_json_content.strip():
logger.error(f"Failed to read info.json from {input_source_name}. Input is empty.")
return 1
try:
info_data = json.loads(info_json_content)
logger.info(f"Successfully loaded info.json from {input_source_name}.")
except json.JSONDecodeError:
logger.error(f"Failed to parse info.json from {input_source_name}. Is the input valid JSON?")
return 1
if args.save_info_json_dir:
try:
video_id = info_data.get('id', 'unknown_video_id')
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
filename = f"{timestamp}-{video_id}-info.json"
output_path = os.path.join(args.save_info_json_dir, filename)
os.makedirs(args.save_info_json_dir, exist_ok=True)
with open(output_path, 'w', encoding='utf-8') as f:
json.dump(info_data, f, indent=2)
logger.info(f"Saved info.json to {output_path}")
except Exception as e:
logger.error(f"Failed to save info.json: {e}")
# Determine proxy to use
proxy_url = args.proxy
if not proxy_url:
proxy_url = info_data.get('_proxy_url')
if proxy_url:
logger.info(f"Using proxy from info.json: {proxy_url}")
if proxy_url and args.proxy_rename:
rename_rule = args.proxy_rename
# The user's command line might include quotes that are preserved by shlex.
# Strip them to get the raw rule.
rename_rule = rename_rule.strip("'\"")
if rename_rule.startswith('s/') and rename_rule.count('/') >= 2:
try:
parts = rename_rule.split('/')
pattern = parts[1]
replacement = parts[2]
original_proxy = proxy_url
proxy_url = re.sub(pattern, replacement, proxy_url)
logger.info(f"Renamed proxy URL from '{original_proxy}' to '{proxy_url}' using rule '{rename_rule}'")
except re.error as e:
logger.error(f"Invalid regex in --proxy-rename: {e}")
return 1
except IndexError:
logger.error("Invalid --proxy-rename format. Expected: s/pattern/replacement/")
return 1
else:
logger.error("Invalid --proxy-rename format. Expected: s/pattern/replacement/")
return 1
# yt-dlp needs to load the info.json from a file
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json', encoding='utf-8') as tmp:
json.dump(info_data, tmp)
info_json_path = tmp.name
logger.debug(f"Temporarily saved info.json to {info_json_path}")
downloaded_filepath = None
return_code = 1 # Default to error
try:
# Create output directory if it doesn't exist
os.makedirs(args.output_dir, exist_ok=True)
output_template = os.path.join(args.output_dir, '%(title)s [%(id)s].f%(format_id)s.%(ext)s')
cmd = [
args.yt_dlp_path,
'--load-info-json', info_json_path,
'-f', args.format,
'-o', output_template,
'--print', 'filename',
]
if args.extra_ytdlp_args:
cmd.extend(shlex.split(args.extra_ytdlp_args))
if args.downloader:
cmd.extend(['--downloader', args.downloader])
if args.downloader_args:
cmd.extend(['--downloader-args', args.downloader_args])
if args.merge_output_format:
cmd.extend(['--merge-output-format', args.merge_output_format])
if args.download_continue:
cmd.extend(['--continue', '--part'])
if os.path.exists(args.cli_config):
logger.info(f"Using config file: {args.cli_config}")
cmd.extend(['--config-location', args.cli_config])
else:
logger.info(f"Config file '{args.cli_config}' not found. Using yt-dlp defaults.")
if args.print_traffic:
cmd.append('--print-traffic')
cmd.append('--no-progress')
else:
cmd.append('--progress')
if args.verbose:
cmd.append('--verbose')
if proxy_url:
cmd.extend(['--proxy', proxy_url])
# Determine if we need to capture output.
capture_output = args.cleanup or args.log_file or args.print_traffic
if capture_output and not args.print_traffic:
logger.info("Note: --cleanup or --log-file requires capturing output, which may affect progress bar display.")
logger.info(f"Executing yt-dlp command for format '{args.format}'")
# Construct a display version of the command for logging
display_cmd_str = ' '.join(f"'{arg}'" if ' ' in arg else arg for arg in cmd)
if os.path.exists(args.cli_config):
try:
with open(args.cli_config, 'r', encoding='utf-8') as f:
config_contents = ' '.join(f.read().split())
if config_contents:
logger.info(f"cli.config contents: {config_contents}")
except IOError as e:
logger.warning(f"Could not read config file {args.cli_config}: {e}")
logger.info(f"Full command: {display_cmd_str}")
if capture_output:
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, encoding='utf-8')
log_f = None
if args.log_file:
try:
log_f = open(args.log_file, 'a', encoding='utf-8')
log_f.write(f"\n--- Log entry: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} ---\n")
log_f.write(f"Command: {' '.join(cmd)}\n\n")
except IOError as e:
logger.error(f"Failed to open log file {args.log_file}: {e}")
stdout_data, stderr_data = process.communicate()
return_code = process.returncode
# Write captured output to terminal and log file
if stdout_data:
sys.stdout.write(stdout_data)
sys.stdout.flush()
if log_f:
for line in stdout_data.splitlines(True):
log_f.write(f"[stdout] {line}")
if stderr_data:
sys.stderr.write(stderr_data)
sys.stderr.flush()
if log_f:
for line in stderr_data.splitlines(True):
log_f.write(f"[stderr] {line}")
stdout_lines = stdout_data.splitlines() if stdout_data else []
if log_f:
log_f.write(f"\n--- End log entry (yt-dlp exit code: {return_code}) ---\n")
log_f.close()
for line in reversed(stdout_lines):
if line and os.path.exists(line):
downloaded_filepath = line
logger.info(f"Detected downloaded file: {downloaded_filepath}")
break
else:
# Original behavior: progress bar direct to terminal, no capture
process = subprocess.Popen(cmd)
process.wait()
return_code = process.returncode
if return_code != 0:
logger.error(f"yt-dlp exited with error code {return_code}")
else:
logger.info("yt-dlp command completed successfully.")
except Exception as e:
logger.exception(f"An unexpected error occurred: {e}")
return 1
finally:
# Clean up the temporary file
if os.path.exists(info_json_path):
os.unlink(info_json_path)
logger.debug(f"Removed temporary file {info_json_path}")
# Cleanup phase
if args.cleanup:
if downloaded_filepath and os.path.exists(downloaded_filepath):
try:
logger.info(f"Cleanup: Renaming and truncating '{downloaded_filepath}'")
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
directory, original_filename = os.path.split(downloaded_filepath)
filename_base, filename_ext = os.path.splitext(original_filename)
# New name format is [base]_[timestamp][ext].empty
new_filename = f"{filename_base}_{timestamp}{filename_ext}.empty"
new_filepath = os.path.join(directory, new_filename)
os.rename(downloaded_filepath, new_filepath)
logger.info(f"Renamed to '{new_filepath}'")
with open(new_filepath, 'w') as f:
pass
logger.info(f"Truncated '{new_filepath}' to 0 bytes.")
except Exception as e:
logger.error(f"Cleanup failed: {e}")
return 1
else:
logger.warning("Cleanup requested, but no downloaded file was found. Skipping cleanup.")
return return_code