722 lines
34 KiB
Python
722 lines
34 KiB
Python
#!/usr/bin/env python3
|
|
|
|
from typing import Dict, List, Optional, Any
|
|
import argparse
|
|
import csv
|
|
import datetime
|
|
import json
|
|
import os
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
import uuid
|
|
import traceback
|
|
import logging
|
|
import signal
|
|
from pathlib import Path
|
|
from tabulate import tabulate
|
|
import yt_dlp
|
|
|
|
def signal_handler(sig: int, frame) -> None:
|
|
"""Handle shutdown signals gracefully."""
|
|
logger.info(f"Received signal {sig}, shutting down...")
|
|
# Clean up any resources here
|
|
sys.exit(0)
|
|
|
|
# Register signal handlers
|
|
signal.signal(signal.SIGINT, signal_handler)
|
|
signal.signal(signal.SIGTERM, signal_handler)
|
|
|
|
# Import the patch for Thrift exceptions
|
|
try:
|
|
import os
|
|
from thrift_exceptions_patch import patch_thrift_exceptions
|
|
# Explicitly call the patch function to ensure it's applied
|
|
patch_thrift_exceptions()
|
|
print("Applied Thrift exceptions patch for compatibility")
|
|
if 'AIRFLOW_HOME' in os.environ:
|
|
print("Running in Airflow environment - patch is essential")
|
|
else:
|
|
print("Not running in Airflow environment, but patch applied anyway for consistency")
|
|
except ImportError:
|
|
print("Could not import thrift_exceptions_patch, compatibility may be affected")
|
|
print("If running in Airflow, this may cause 'immutable instance' errors")
|
|
except Exception as e:
|
|
print(f"Error applying Thrift exceptions patch: {e}")
|
|
|
|
# --- Python Path Setup ---
|
|
# Ensure the script can find necessary modules, especially Thrift-generated code.
|
|
# Assumes the script is run from the project root or the path is adjusted accordingly.
|
|
project_root = Path(__file__).parent.absolute()
|
|
gen_py_dir = project_root / "thrift_model" / "gen_py"
|
|
|
|
# Add project root to sys.path (needed for the 'pangramia' symlink)
|
|
if str(project_root) not in sys.path:
|
|
sys.path.insert(0, str(project_root))
|
|
|
|
# Verify paths for debugging
|
|
# print("Project Root:", project_root)
|
|
# print("Project Root:", project_root)
|
|
# print("Gen Py Dir:", gen_py_dir)
|
|
# print("Sys Path:", sys.path)
|
|
# --- End Python Path Setup ---
|
|
|
|
from thrift.transport import TSocket, TTransport
|
|
from thrift.protocol import TBinaryProtocol
|
|
|
|
try:
|
|
from pangramia.yt.tokens_ops import YTTokenOpService
|
|
from pangramia.yt.common.ttypes import JobTokenData, TokenUpdateMode, JobState
|
|
from pangramia.yt.exceptions.ttypes import PBServiceException, PBUserException
|
|
except ImportError as e:
|
|
print(f"Error importing Thrift-generated modules: {e}")
|
|
print("Please ensure you have run './generate-thrift.py' successfully from the project root.")
|
|
print(f"Current sys.path includes: {gen_py_dir}")
|
|
sys.exit(1)
|
|
|
|
# Configure logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(levelname)s - %(message)s',
|
|
handlers=[
|
|
logging.StreamHandler(),
|
|
logging.FileHandler('ytdlp_ops_client.log')
|
|
]
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
def get_info_json(token_data):
|
|
"""Get infoJson from token_data"""
|
|
if not hasattr(token_data, 'infoJson'):
|
|
logger.error("infoJson attribute missing in token_data")
|
|
raise ValueError("Server response missing infoJson")
|
|
|
|
if not token_data.infoJson or token_data.infoJson == "{}":
|
|
logger.error("Empty infoJson received from server")
|
|
raise ValueError("Empty infoJson received from server")
|
|
|
|
logger.info(f"Using infoJson from server response ({len(token_data.infoJson)} bytes)")
|
|
return token_data.infoJson
|
|
|
|
def is_valid_json(json_str):
|
|
"""Check if a string is valid JSON and not empty"""
|
|
if not json_str or json_str == "{}" or json_str == "":
|
|
logger.warning("Empty JSON string received")
|
|
return False
|
|
|
|
try:
|
|
data = json.loads(json_str)
|
|
|
|
# Check if it's an empty object
|
|
if isinstance(data, dict) and not data:
|
|
logger.warning("Empty JSON object received")
|
|
return False
|
|
|
|
# Check if it has an error field
|
|
if isinstance(data, dict) and ('error' in data or 'errorCode' in data):
|
|
# It's valid JSON but contains an error
|
|
logger.warning(f"JSON contains error: {data.get('error', 'Unknown error')} (code: {data.get('errorCode', 'none')})")
|
|
return True
|
|
|
|
# Check if it has at least some basic fields
|
|
if isinstance(data, dict) and ('id' in data or 'title' in data):
|
|
logger.info(f"Valid JSON with video data: {data.get('title', 'Unknown title')}")
|
|
return True
|
|
|
|
# Check if it has token_data which is important
|
|
if isinstance(data, dict) and 'token_data' in data and data['token_data']:
|
|
logger.info("Valid JSON with token_data")
|
|
return True
|
|
|
|
logger.warning("JSON is valid but missing expected fields")
|
|
return True
|
|
except json.JSONDecodeError as e:
|
|
logger.warning(f"Invalid JSON: {e}")
|
|
return False
|
|
except Exception as e:
|
|
logger.warning(f"Unexpected error validating JSON: {e}")
|
|
return False
|
|
|
|
def extract_video_id(url: str) -> Optional[str]:
|
|
"""Extract video ID from a YouTube URL."""
|
|
# If it's already a video ID
|
|
if re.match(r'^[a-zA-Z0-9_-]{11}$', url):
|
|
return url
|
|
|
|
# Handle youtu.be URLs
|
|
youtu_be_match = re.search(r'youtu\.be/([a-zA-Z0-9_-]{11})', url)
|
|
if youtu_be_match:
|
|
return youtu_be_match.group(1)
|
|
|
|
# Handle youtube.com URLs
|
|
youtube_match = re.search(r'(?:youtube\.com/(?:watch\?v=|embed/|v/)|youtube\.com/.*[?&]v=)([a-zA-Z0-9_-]{11})', url)
|
|
if youtube_match:
|
|
return youtube_match.group(1)
|
|
|
|
# Handle shorts URLs
|
|
shorts_match = re.search(r'youtube\.com/shorts/([a-zA-Z0-9_-]{11})', url)
|
|
if shorts_match:
|
|
return shorts_match.group(1)
|
|
|
|
return None
|
|
|
|
def list_available_formats(url: str, args: argparse.Namespace) -> Optional[List[Dict[str, Any]]]:
|
|
"""List available formats for a YouTube video."""
|
|
ydl_opts = {
|
|
'quiet': not args.no_quiet if hasattr(args, 'no_quiet') else True,
|
|
'no_warnings': True,
|
|
'skip_download': True,
|
|
'extract_flat': True,
|
|
}
|
|
|
|
try:
|
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
|
info = ydl.extract_info(url, download=False)
|
|
|
|
if not info:
|
|
logger.error("Could not retrieve video information")
|
|
return None
|
|
|
|
formats = info.get('formats', [])
|
|
|
|
if not formats:
|
|
logger.warning("No formats available for this video")
|
|
return None
|
|
|
|
# Create a table of available formats
|
|
format_table = []
|
|
for f in formats:
|
|
format_table.append({
|
|
'format_id': f.get('format_id', 'unknown'),
|
|
'ext': f.get('ext', 'unknown'),
|
|
'resolution': f.get('resolution', 'unknown'),
|
|
'fps': f.get('fps', 'unknown'),
|
|
'vcodec': f.get('vcodec', 'unknown'),
|
|
'acodec': f.get('acodec', 'unknown'),
|
|
'filesize': f.get('filesize', 'unknown'),
|
|
'format_note': f.get('format_note', '')
|
|
})
|
|
|
|
return format_table
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error listing formats: {e}")
|
|
return None
|
|
def suggest_best_formats(formats: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
"""Suggest best formats based on resolution and codec."""
|
|
|
|
best = []
|
|
seen_resolutions = set()
|
|
|
|
# Prioritize higher resolutions and certain codecs
|
|
preferred_codecs = ["vp9", "avc1", "av01"] # In order of preference
|
|
|
|
for f in sorted(formats, key=lambda x: (
|
|
-int(x.get('height', 0) or 0), # Higher resolution first
|
|
preferred_codecs.index(x.get('vcodec', '').split('.')[0]) if x.get('vcodec', '').split('.')[0] in preferred_codecs else float('inf'), # Preferred codecs
|
|
x.get('filesize', 0) or 0 # Smaller filesize
|
|
)):
|
|
resolution = f.get('resolution')
|
|
if resolution and resolution not in seen_resolutions:
|
|
best.append(f)
|
|
seen_resolutions.add(resolution)
|
|
if len(best) >= 3: # Suggest up to 3 formats
|
|
break
|
|
return best
|
|
|
|
def load_info_json(path: str) -> Optional[Dict[str, Any]]:
|
|
"""Load and validate info.json file."""
|
|
try:
|
|
path = Path(path).resolve()
|
|
if not path.exists():
|
|
logger.error(f"Info.json file not found: {path}")
|
|
return None
|
|
|
|
with open(path, 'r') as f:
|
|
data = json.load(f)
|
|
|
|
# Basic validation
|
|
if not isinstance(data, dict):
|
|
logger.error("Invalid info.json format: not a JSON object")
|
|
return None
|
|
|
|
if 'id' not in data:
|
|
logger.warning("Info.json missing video ID")
|
|
|
|
return data
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error loading info.json: {e}")
|
|
return None
|
|
|
|
def save_info_json(info_json: str, video_id: str, context_dir: str) -> Optional[str]:
|
|
"""Save info.json to disk and return the saved path."""
|
|
try:
|
|
# Ensure context directory exists
|
|
Path(context_dir).mkdir(parents=True, exist_ok=True)
|
|
|
|
# Create filename with video ID and timestamp
|
|
timestamp = int(time.time())
|
|
output_path = Path(context_dir) / f"info_json_{video_id}_{timestamp}.json"
|
|
|
|
# Write the file
|
|
with open(output_path, 'w') as f:
|
|
f.write(info_json)
|
|
|
|
# Also create a symlink or copy to the standard name for compatibility
|
|
standard_path = Path(context_dir) / f"info_json_{video_id}.json"
|
|
try:
|
|
# Try to create a symlink first (more efficient)
|
|
if os.path.exists(standard_path):
|
|
os.remove(standard_path)
|
|
os.symlink(output_path, standard_path)
|
|
except (OSError, AttributeError):
|
|
# If symlink fails (e.g., on Windows), make a copy
|
|
with open(standard_path, 'w') as f:
|
|
f.write(info_json)
|
|
|
|
# Save latest.json
|
|
latest_path = Path(context_dir) / "latest.json"
|
|
with open(latest_path, 'w') as f:
|
|
f.write(info_json)
|
|
|
|
logger.info(f"Successfully saved info.json to {output_path} and latest.json to {latest_path}")
|
|
return str(output_path)
|
|
except Exception as e:
|
|
logger.error(f"Failed to save info.json: {e}")
|
|
logger.error(traceback.format_exc())
|
|
return False
|
|
|
|
def main():
|
|
# Create main parser
|
|
parser = argparse.ArgumentParser(description='''YtdlpOpsService Client
|
|
|
|
This client connects to the YTDLP Operations Server to generate tokens for YouTube videos.
|
|
The server performs SOCKS5 proxy connection testing with a 9-second timeout for early detection
|
|
of proxy issues. If a proxy connection fails, the server will immediately stop token generation
|
|
and return an error instead of trying other clients.''')
|
|
|
|
# Add global options
|
|
parser.add_argument('--host', default=os.getenv('YTDLP_HOST', 'localhost'),
|
|
help='Server host (default: localhost or YTDLP_HOST env)')
|
|
parser.add_argument('--port', type=int, default=int(os.getenv('YTDLP_PORT', '9090')),
|
|
help='Server port (default: 9090 or YTDLP_PORT env)')
|
|
parser.add_argument('--timeout', type=int, default=30000,
|
|
help='Timeout in milliseconds (default: 30000)')
|
|
parser.add_argument('--timeout-sec', type=int, default=30,
|
|
help='Timeout in seconds (default: 30, overrides --timeout if provided)')
|
|
parser.add_argument('--context-dir', default='.', help='Context directory to save info.json (default: .)')
|
|
parser.add_argument('--load-info-json', help='Path to existing info.json file to load')
|
|
parser.add_argument('--framed-transport', action='store_true',
|
|
help='Use TFramedTransport instead of TBufferedTransport for handling very large messages')
|
|
parser.add_argument('--force-framed-transport', action='store_true',
|
|
help='Force the use of TFramedTransport (recommended for large messages)')
|
|
|
|
# Create subparsers for commands
|
|
subparsers = parser.add_subparsers(dest='command', required=True, help='Commands')
|
|
|
|
# getToken command
|
|
get_token_parser = subparsers.add_parser('getToken', help='Get token for a YouTube URL',
|
|
description='''Get token for a YouTube URL
|
|
|
|
This command connects to the server to generate tokens for a YouTube video.
|
|
The server will test any configured SOCKS5 proxy with a 9-second timeout.
|
|
If the proxy connection fails, token generation will stop immediately with an error.''')
|
|
get_token_parser.add_argument('--url', required=True,
|
|
help='YouTube URL to process')
|
|
# --format removed, format/quality is determined by the server or embedded in the command
|
|
get_token_parser.add_argument('--account_id', default='default',
|
|
help='Account ID (default: default)')
|
|
get_token_parser.add_argument('--list-formats', action='store_true',
|
|
help='List available formats for the video')
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Handle info.json loading
|
|
if args.load_info_json:
|
|
info_json = load_info_json(args.load_info_json)
|
|
if info_json:
|
|
print("Loaded info.json:")
|
|
print(json.dumps(info_json, indent=2))
|
|
return
|
|
|
|
transport = None
|
|
try:
|
|
# Ensure context directory exists and is writable
|
|
try:
|
|
Path(args.context_dir).mkdir(parents=True, exist_ok=True)
|
|
test_file = Path(args.context_dir) / "test.txt"
|
|
test_file.touch()
|
|
test_file.unlink()
|
|
except Exception as e:
|
|
logger.error(f"Could not access context directory {args.context_dir}: {e}")
|
|
print(f"Error: Could not access context directory {args.context_dir}")
|
|
sys.exit(1)
|
|
|
|
try:
|
|
# Check if we should use framed transport for very large messages
|
|
use_framed_transport = args.framed_transport or args.force_framed_transport or os.environ.get('USE_FRAMED_TRANSPORT', '').lower() in ('1', 'true', 'yes')
|
|
logger.debug(f"Using framed transport: {use_framed_transport}") # Changed to DEBUG
|
|
|
|
# Create socket with configurable timeout, force IPv4
|
|
socket = TSocket.TSocket(args.host, args.port, socket_family=2) # AF_INET = 2 for IPv4
|
|
|
|
# Use timeout-sec if provided, otherwise use timeout in milliseconds
|
|
if args.timeout_sec is not None:
|
|
socket.setTimeout(args.timeout_sec * 1000) # Convert seconds to milliseconds
|
|
logger.debug(f"Using timeout of {args.timeout_sec} seconds") # Changed to DEBUG
|
|
else:
|
|
socket.setTimeout(args.timeout) # Use timeout from CLI in milliseconds
|
|
logger.debug(f"Using timeout of {args.timeout} milliseconds") # Changed to DEBUG
|
|
|
|
# Always use TFramedTransport to match the server
|
|
transport = TTransport.TFramedTransport(socket)
|
|
logger.debug("Using TFramedTransport for large messages") # Changed to DEBUG
|
|
|
|
protocol = TBinaryProtocol.TBinaryProtocol(transport)
|
|
client = YTTokenOpService.Client(protocol)
|
|
|
|
logger.info(f"Attempting to connect to server at {args.host}:{args.port}...")
|
|
try:
|
|
transport.open()
|
|
logger.info("Successfully connected to server")
|
|
except TTransport.TTransportException as e:
|
|
logger.error(f"Connection failed: {str(e)}")
|
|
print(f"Error: Could not connect to server at {args.host}:{args.port}")
|
|
print(f"Reason: {str(e)}")
|
|
sys.exit(1)
|
|
|
|
# Add connection test
|
|
try:
|
|
client.ping()
|
|
logger.info("Server connection test successful")
|
|
except Exception as e:
|
|
logger.error(f"Server connection test failed: {e}")
|
|
raise
|
|
except TTransport.TTransportException as e:
|
|
logger.error(f"Connection failed: {str(e)}")
|
|
logger.error(f"Could not connect to {args.host}:{args.port}")
|
|
sys.exit(1)
|
|
except Exception as e:
|
|
logger.error(f"Connection failed: {str(e)}")
|
|
logger.error(traceback.format_exc())
|
|
sys.exit(1)
|
|
|
|
|
|
if args.command == 'getToken':
|
|
url = args.url
|
|
# format_codes removed
|
|
|
|
# Handle format listing
|
|
if args.list_formats:
|
|
formats = list_available_formats(url, args)
|
|
if formats:
|
|
print("\nAvailable formats:")
|
|
print(tabulate(formats, headers="keys", showindex=True)) # Show index for format selection
|
|
|
|
# Suggest best formats based on resolution
|
|
best_formats = suggest_best_formats(formats)
|
|
if best_formats:
|
|
print("\nSuggested formats:")
|
|
print(tabulate(best_formats, headers="keys"))
|
|
else:
|
|
print("No formats available or could not retrieve format information")
|
|
return
|
|
elif args.youtube_url:
|
|
url = args.youtube_url
|
|
format_code = args.format
|
|
print("Warning: --youtube-url is deprecated, use 'getToken --url' instead")
|
|
else:
|
|
print("Please provide a YouTube URL using 'getToken --url' command")
|
|
return
|
|
|
|
# Get token for URL
|
|
try:
|
|
# Get token for URL
|
|
logger.info(f"Requesting token for URL: {url}")
|
|
token_data = client.getOrRefreshToken(
|
|
accountId=args.account_id,
|
|
updateType=TokenUpdateMode.AUTO,
|
|
url=url
|
|
)
|
|
|
|
if not token_data:
|
|
logger.error("Received empty token data from server")
|
|
print("Error: Received empty token data from server")
|
|
sys.exit(1)
|
|
|
|
# Validate token data
|
|
if not hasattr(token_data, 'ytdlpCommand') or not token_data.ytdlpCommand:
|
|
logger.error("Token data missing required ytdlpCommand")
|
|
print("Error: Token data missing required ytdlpCommand")
|
|
sys.exit(1)
|
|
|
|
logger.info("Successfully received token data from server")
|
|
|
|
# Log all attributes of token_data for debugging
|
|
token_attrs = [attr for attr in dir(token_data) if not attr.startswith('__') and not callable(getattr(token_data, attr))]
|
|
logger.debug(f"Received token_data attributes: {token_attrs}")
|
|
|
|
# Handle case where token_data is a dict-like object
|
|
if hasattr(token_data, 'items'):
|
|
# Convert to dict if needed
|
|
token_dict = dict(token_data.items())
|
|
logger.debug(f"Token data as dict: {token_dict}")
|
|
|
|
# If we have JSON data directly in the response
|
|
if isinstance(token_dict.get('infoJson', None), str):
|
|
received_info_json = token_dict['infoJson']
|
|
elif isinstance(token_dict.get('data', None), (dict, str)):
|
|
# Try to use the data field if it exists
|
|
data = token_dict['data']
|
|
if isinstance(data, str):
|
|
received_info_json = data
|
|
else:
|
|
received_info_json = json.dumps(data)
|
|
else:
|
|
# Create info.json from available fields
|
|
info_data = {
|
|
"id": token_dict.get('id', extract_video_id(url)),
|
|
"title": token_dict.get('title', ''),
|
|
"formats": token_dict.get('formats', []),
|
|
"timestamp": int(time.time()),
|
|
"ytdlp_command": token_dict.get('ytdlpCommand', '')
|
|
}
|
|
received_info_json = json.dumps(info_data)
|
|
else:
|
|
# Handle case where token_data is a regular object
|
|
received_info_json = getattr(token_data, 'infoJson', None)
|
|
|
|
if received_info_json:
|
|
logger.debug(f"Received info.json data ({len(received_info_json)} bytes)")
|
|
if len(received_info_json) > 100:
|
|
logger.debug(f"Preview: {received_info_json[:100]}...")
|
|
else:
|
|
logger.warning("No valid info.json data found in response")
|
|
|
|
except PBServiceException as e:
|
|
logger.error(f"Service exception: {e.message}")
|
|
if hasattr(e, 'errorCode'):
|
|
if e.errorCode == "BOT_DETECTED":
|
|
print(f"Error: {e.message}")
|
|
print("\nYouTube has detected bot activity. Authentication is required.")
|
|
|
|
# Print suggestions if available
|
|
if hasattr(e, 'context') and e.context and 'suggestions' in e.context:
|
|
print("\nSuggestions:")
|
|
for i, suggestion in enumerate(e.context['suggestions'], 1):
|
|
print(f" {i}. {suggestion}")
|
|
else:
|
|
print("\nTry:")
|
|
print(" 1. Use --cookies-from-browser to pass authentication cookies")
|
|
print(" 2. Export cookies from a logged-in browser session")
|
|
print(" 3. Try a different client type (ios, android, mweb)")
|
|
print(" 4. Use a different proxy or IP address")
|
|
print(" 5. Try again later")
|
|
|
|
sys.exit(1)
|
|
elif e.errorCode in ["SOCKS5_CONNECTION_FAILED", "SOCKS5_TIMEOUT", "SOCKS5_CONNECTION_REFUSED",
|
|
"SOCKS5_CONNECTION_TIMEOUT", "SOCKS5_HOST_NOT_FOUND", "SOCKS5_NETWORK_UNREACHABLE"]:
|
|
print(f"Error: {e.message}")
|
|
print("\nSOCKS5 proxy connection failed. Please check your proxy settings.")
|
|
|
|
# Provide more specific guidance based on error code
|
|
if e.errorCode == "SOCKS5_TIMEOUT" or e.errorCode == "SOCKS5_CONNECTION_TIMEOUT":
|
|
print("The proxy server did not respond within the timeout period (9 seconds).")
|
|
print("This could indicate network congestion or a proxy server that's overloaded.")
|
|
elif e.errorCode == "SOCKS5_CONNECTION_REFUSED":
|
|
print("The proxy server actively refused the connection.")
|
|
print("This usually means the proxy server is not running or is not accepting connections on the specified port.")
|
|
elif e.errorCode == "SOCKS5_HOST_NOT_FOUND":
|
|
print("The proxy host could not be resolved.")
|
|
print("Please check that the hostname is correct and your DNS is working properly.")
|
|
elif e.errorCode == "SOCKS5_NETWORK_UNREACHABLE":
|
|
print("The network containing the proxy server is unreachable.")
|
|
print("This could indicate network routing issues or firewall restrictions.")
|
|
|
|
print("\nPossible solutions:")
|
|
print("1. Try using a different proxy server")
|
|
print("2. Check if the proxy server is running and accessible")
|
|
print("3. Verify your network connection and firewall settings")
|
|
print("4. If using a remote proxy, check if it's accessible from your location")
|
|
|
|
# Exit with a specific error code for proxy failures
|
|
sys.exit(2)
|
|
elif e.errorCode == "GLOBAL_TIMEOUT":
|
|
print(f"Error: {e.message}")
|
|
print("\nThe server timed out while processing your request.")
|
|
print("This could be due to:")
|
|
print("1. Slow network connection")
|
|
print("2. Server overload")
|
|
print("3. Complex video that takes too long to process")
|
|
print("\nTry again later or with a different video.")
|
|
sys.exit(3)
|
|
elif e.errorCode == "CLIENT_TIMEOUT":
|
|
print(f"Error: {e.message}")
|
|
print("\nA client-specific timeout occurred while processing your request.")
|
|
print("The server has stopped processing to avoid wasting resources.")
|
|
print("\nPossible solutions:")
|
|
print("1. Try again later when network conditions improve")
|
|
print("2. Try a different video")
|
|
print("3. Check your internet connection")
|
|
sys.exit(3)
|
|
else:
|
|
print(f"Error: {e.message}")
|
|
else:
|
|
print(f"Error: {e.message}")
|
|
return
|
|
except PBUserException as e:
|
|
logger.error(f"User exception: {e.message}")
|
|
print(f"Error: {e.message}")
|
|
return
|
|
except Exception as e:
|
|
logger.error(f"Unexpected error: {str(e)}")
|
|
logger.error(traceback.format_exc())
|
|
print(f"Unexpected error: {str(e)}")
|
|
sys.exit(1)
|
|
|
|
# Log the entire token_data object for debugging AFTER potential exceptions
|
|
logger.debug(f"Processing received token_data: {token_data}")
|
|
|
|
# Check if valid infoJson was received from the server
|
|
info_json = None
|
|
if hasattr(token_data, 'infoJson') and token_data.infoJson and token_data.infoJson != "{}":
|
|
if is_valid_json(token_data.infoJson):
|
|
logger.debug("Valid info.json received from server.") # Changed to DEBUG
|
|
info_json = token_data.infoJson
|
|
else:
|
|
logger.warning("Received infoJson from server, but it is not valid JSON or is empty.")
|
|
else:
|
|
logger.warning("Valid info.json was NOT received from the server.")
|
|
|
|
# Proceed only if we have valid info_json
|
|
if info_json:
|
|
# Save info.json if present in the server response
|
|
video_id = extract_video_id(url)
|
|
if not video_id:
|
|
logger.warning(f"Could not extract video ID from URL: {url}") # Keep as WARNING
|
|
video_id = f"unknown_{int(time.time())}"
|
|
|
|
try:
|
|
info_data = json.loads(info_json)
|
|
# Check if it contains an error
|
|
if isinstance(info_data, dict) and ('error' in info_data or 'errorCode' in info_data):
|
|
error_msg = info_data.get('error', 'Unknown error')
|
|
error_code = info_data.get('errorCode', 'UNKNOWN_ERROR')
|
|
logger.warning(f"infoJson contains error: {error_msg} (code: {error_code})")
|
|
|
|
# If it's a bot detection error, raise appropriate exception
|
|
if error_code == 'BOT_DETECTED' or 'bot' in error_msg.lower() or 'sign in' in error_msg.lower():
|
|
raise PBUserException(
|
|
message=f"Bot detection triggered: {error_msg}",
|
|
errorCode="BOT_DETECTION",
|
|
context={
|
|
"video_id": extract_video_id(url),
|
|
"url": url,
|
|
"suggestions": info_data.get('suggestions', ["Try different client", "Use proxy", "Wait and retry later"])
|
|
}
|
|
)
|
|
except json.JSONDecodeError as e:
|
|
# This case should ideally not happen due to is_valid_json check, but handle defensively
|
|
logger.error(f"Invalid JSON received despite initial check: {e}")
|
|
print(f"Error: Received invalid JSON data from server.")
|
|
info_json = None # Ensure we don't proceed
|
|
|
|
# If info_json is still None after checks, handle the failure case
|
|
if not info_json:
|
|
logger.error("Failed to obtain valid info.json from the server.")
|
|
print("Error: No valid video information (info.json) was received from the server.")
|
|
# Optionally, print the raw ytdlp command if available
|
|
if hasattr(token_data, 'ytdlpCommand') and token_data.ytdlpCommand:
|
|
print("\nRaw command from server (may be incomplete or require info.json):")
|
|
print(token_data.ytdlpCommand)
|
|
sys.exit(1) # Exit with error
|
|
|
|
# --- We have valid info_json, proceed with saving and command generation ---
|
|
try:
|
|
info_data = json.loads(info_json) # We know this is valid now
|
|
|
|
# Check if it's an error response embedded in the JSON
|
|
if isinstance(info_data, dict) and "error" in info_data:
|
|
logger.error(f"Received error report from server: {info_json}")
|
|
|
|
# Check if this is a bot detection error
|
|
if (info_data.get('errorCode') == "BOT_DETECTED" or
|
|
"bot" in info_data.get('message', '').lower() or
|
|
"sign in to confirm" in info_data.get('message', '').lower() or
|
|
"sign in to confirm" in info_data.get('error', '').lower() or
|
|
"unusual traffic" in info_data.get('message', '').lower() or
|
|
"captcha" in info_data.get('message', '').lower() or
|
|
info_data.get('requires_auth') == True):
|
|
|
|
logger.error("Bot detection error detected in info.json")
|
|
# Raise PBServiceException for bot detection
|
|
raise PBServiceException(
|
|
message=f"Bot detection triggered: {info_data.get('message', 'Authentication required')}",
|
|
errorCode="BOT_DETECTED",
|
|
context={
|
|
"video_id": video_id,
|
|
"url": url,
|
|
"requires_auth": True,
|
|
"info_data": info_data,
|
|
"suggestions": info_data.get('suggestions', [
|
|
"Use --cookies-from-browser to pass authentication cookies",
|
|
"Export cookies from a logged-in browser session",
|
|
"Try a different client type (ios, android, mweb)",
|
|
"Use a different proxy or IP address"
|
|
])
|
|
}
|
|
)
|
|
else:
|
|
# Raise PBServiceException for other errors
|
|
raise PBServiceException(
|
|
message=f"Error extracting video info: {info_data.get('error', 'Unknown error')}",
|
|
errorCode=info_data.get('errorCode', "EXTRACTION_FAILED"),
|
|
context={"video_id": video_id, "url": url, "info_data": info_data}
|
|
)
|
|
|
|
# If it's a valid response, process it
|
|
if 'title' in info_data or 'id' in info_data:
|
|
print(f"Video info retrieved: {info_data.get('title', 'Unknown title')}")
|
|
saved_path = save_info_json(info_json, video_id, args.context_dir)
|
|
if saved_path:
|
|
print(f"info.json saved to: {saved_path}")
|
|
|
|
# Create simpler base command using only the saved info.json and proxy
|
|
base_cmd = f"yt-dlp --load-info-json \"{saved_path}\"" # Quote the path
|
|
if hasattr(token_data, 'socks') and token_data.socks:
|
|
if token_data.socks.startswith(('socks5://', 'ss://')):
|
|
# Quote the proxy URL as well
|
|
base_cmd += f" --proxy \"{token_data.socks}\""
|
|
|
|
# Show format listing command
|
|
print("\nTo list available formats:")
|
|
format_cmd = f"{base_cmd} -F"
|
|
print(format_cmd)
|
|
|
|
# Show download command (format is usually embedded in info.json or determined by yt-dlp)
|
|
simplified_cmd = f"{base_cmd} --simulate" # Removed format codes
|
|
|
|
print("\nTo download (with --simulate to preview):")
|
|
print(simplified_cmd)
|
|
print("\nRemove --simulate to actually download")
|
|
else:
|
|
logger.error("Failed to save info.json file")
|
|
print("Failed to save info.json file")
|
|
else:
|
|
logger.warning("info.json appears to be valid JSON but missing expected video fields")
|
|
print("Error: Received incomplete or invalid video data")
|
|
print("This usually indicates an authentication or access issue")
|
|
sys.exit(1)
|
|
except Exception as e: # Catch errors during saving or command generation
|
|
logger.error(f"Error processing valid info.json: {str(e)}")
|
|
# Re-raise the exception to be handled by the main error handler
|
|
raise
|
|
finally:
|
|
if transport:
|
|
transport.close()
|
|
|
|
if __name__ == "__main__":
|
|
main()
|