yt-dlp-dags/airflow/ytdlp-ops-auth/ytdlp_ops_client.py

#!/usr/bin/env python3

from typing import Dict, List, Optional, Any
import argparse
import csv
import datetime
import json
import os
import re
import subprocess
import sys
import time
import uuid
import traceback
import logging
import signal
from pathlib import Path
from tabulate import tabulate
import yt_dlp

def signal_handler(sig: int, frame) -> None:
    """Handle shutdown signals gracefully."""
    logger.info(f"Received signal {sig}, shutting down...")
    # Clean up any resources here
    sys.exit(0)

# Register signal handlers
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)

# Import the patch for Thrift exceptions
try:
    import os
    from thrift_exceptions_patch import patch_thrift_exceptions
    # Explicitly call the patch function to ensure it's applied
    patch_thrift_exceptions()
    print("Applied Thrift exceptions patch for compatibility")
    if 'AIRFLOW_HOME' in os.environ:
        print("Running in Airflow environment - patch is essential")
    else:
        print("Not running in Airflow environment, but patch applied anyway for consistency")
except ImportError:
    print("Could not import thrift_exceptions_patch, compatibility may be affected")
    print("If running in Airflow, this may cause 'immutable instance' errors")
except Exception as e:
    print(f"Error applying Thrift exceptions patch: {e}")

# --- Python Path Setup ---
# Ensure the script can find necessary modules, especially Thrift-generated code.
# Assumes the script is run from the project root or the path is adjusted accordingly.
project_root = Path(__file__).parent.absolute()
gen_py_dir = project_root / "thrift_model" / "gen_py"

# Add project root to sys.path (needed for the 'pangramia' symlink)
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

# Verify paths for debugging
# print("Project Root:", project_root)
# print("Project Root:", project_root)
# print("Gen Py Dir:", gen_py_dir)
# print("Sys Path:", sys.path)
# --- End Python Path Setup ---

from thrift.transport import TSocket, TTransport
from thrift.protocol import TBinaryProtocol

try:
    from pangramia.yt.tokens_ops import YTTokenOpService
    from pangramia.yt.common.ttypes import JobTokenData, TokenUpdateMode, JobState
    from pangramia.yt.exceptions.ttypes import PBServiceException, PBUserException
except ImportError as e:
    print(f"Error importing Thrift-generated modules: {e}")
    print("Please ensure you have run './generate-thrift.py' successfully from the project root.")
    print(f"Current sys.path includes: {gen_py_dir}")
    sys.exit(1)

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.StreamHandler(),
        logging.FileHandler('ytdlp_ops_client.log')
    ]
)
logger = logging.getLogger(__name__)

def get_info_json(token_data):
    """Get infoJson from token_data"""
    if not hasattr(token_data, 'infoJson'):
        logger.error("infoJson attribute missing in token_data")
        raise ValueError("Server response missing infoJson")

    if not token_data.infoJson or token_data.infoJson == "{}":
        logger.error("Empty infoJson received from server")
        raise ValueError("Empty infoJson received from server")

    logger.info(f"Using infoJson from server response ({len(token_data.infoJson)} bytes)")
    return token_data.infoJson

def is_valid_json(json_str):
    """Check if a string is valid JSON and not empty"""
    if not json_str or json_str == "{}" or json_str == "":
        logger.warning("Empty JSON string received")
        return False

    try:
        data = json.loads(json_str)

        # Check if it's an empty object
        if isinstance(data, dict) and not data:
            logger.warning("Empty JSON object received")
            return False

        # Check if it has an error field
        if isinstance(data, dict) and ('error' in data or 'errorCode' in data):
            # It's valid JSON but contains an error
            logger.warning(f"JSON contains error: {data.get('error', 'Unknown error')} (code: {data.get('errorCode', 'none')})")
            return True

        # Check if it has at least some basic fields
        if isinstance(data, dict) and ('id' in data or 'title' in data):
            logger.info(f"Valid JSON with video data: {data.get('title', 'Unknown title')}")
            return True

        # Check if it has token_data which is important
        if isinstance(data, dict) and 'token_data' in data and data['token_data']:
            logger.info("Valid JSON with token_data")
            return True

        logger.warning("JSON is valid but missing expected fields")
        return True
    except json.JSONDecodeError as e:
        logger.warning(f"Invalid JSON: {e}")
        return False
    except Exception as e:
        logger.warning(f"Unexpected error validating JSON: {e}")
        return False

def extract_video_id(url: str) -> Optional[str]:
    """Extract video ID from a YouTube URL."""
    # If it's already a video ID
    if re.match(r'^[a-zA-Z0-9_-]{11}$', url):
        return url

    # Handle youtu.be URLs
    youtu_be_match = re.search(r'youtu\.be/([a-zA-Z0-9_-]{11})', url)
    if youtu_be_match:
        return youtu_be_match.group(1)

    # Handle youtube.com URLs
    youtube_match = re.search(r'(?:youtube\.com/(?:watch\?v=|embed/|v/)|youtube\.com/.*[?&]v=)([a-zA-Z0-9_-]{11})', url)
    if youtube_match:
        return youtube_match.group(1)

    # Handle shorts URLs
    shorts_match = re.search(r'youtube\.com/shorts/([a-zA-Z0-9_-]{11})', url)
    if shorts_match:
        return shorts_match.group(1)

    return None

def list_available_formats(url: str, args: argparse.Namespace) -> Optional[List[Dict[str, Any]]]:
    """List available formats for a YouTube video."""
    ydl_opts = {
        'quiet': not args.no_quiet if hasattr(args, 'no_quiet') else True,
        'no_warnings': True,
        'skip_download': True,
        'extract_flat': True,
    }

    try:
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            info = ydl.extract_info(url, download=False)

            if not info:
                logger.error("Could not retrieve video information")
                return None

            formats = info.get('formats', [])

            if not formats:
                logger.warning("No formats available for this video")
                return None

            # Create a table of available formats
            format_table = []
            for f in formats:
                format_table.append({
                    'format_id': f.get('format_id', 'unknown'),
                    'ext': f.get('ext', 'unknown'),
                    'resolution': f.get('resolution', 'unknown'),
                    'fps': f.get('fps', 'unknown'),
                    'vcodec': f.get('vcodec', 'unknown'),
                    'acodec': f.get('acodec', 'unknown'),
                    'filesize': f.get('filesize', 'unknown'),
                    'format_note': f.get('format_note', '')
                })

            return format_table

    except Exception as e:
        logger.error(f"Error listing formats: {e}")
        return None
def suggest_best_formats(formats: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    """Suggest best formats based on resolution and codec."""

    best = []
    seen_resolutions = set()

    # Prioritize higher resolutions and certain codecs
    preferred_codecs = ["vp9", "avc1", "av01"]  # In order of preference

    for f in sorted(formats, key=lambda x: (
        -int(x.get('height', 0) or 0),  # Higher resolution first
        preferred_codecs.index(x.get('vcodec', '').split('.')[0]) if x.get('vcodec', '').split('.')[0] in preferred_codecs else float('inf'),  # Preferred codecs
        x.get('filesize', 0) or 0  # Smaller filesize
    )):
        resolution = f.get('resolution')
        if resolution and resolution not in seen_resolutions:
            best.append(f)
            seen_resolutions.add(resolution)
            if len(best) >= 3:  # Suggest up to 3 formats
                break
    return best

def load_info_json(path: str) -> Optional[Dict[str, Any]]:
    """Load and validate info.json file."""
    try:
        path = Path(path).resolve()
        if not path.exists():
            logger.error(f"Info.json file not found: {path}")
            return None

        with open(path, 'r') as f:
            data = json.load(f)

        # Basic validation
        if not isinstance(data, dict):
            logger.error("Invalid info.json format: not a JSON object")
            return None

        if 'id' not in data:
            logger.warning("Info.json missing video ID")

        return data

    except Exception as e:
        logger.error(f"Error loading info.json: {e}")
        return None

def save_info_json(info_json: str, video_id: str, context_dir: str) -> Optional[str]:
    """Save info.json to disk and return the saved path."""
    try:
        # Ensure context directory exists
        Path(context_dir).mkdir(parents=True, exist_ok=True)

        # Create filename with video ID and timestamp
        timestamp = int(time.time())
        output_path = Path(context_dir) / f"info_json_{video_id}_{timestamp}.json"

        # Write the file
        with open(output_path, 'w') as f:
            f.write(info_json)

        # Also create a symlink or copy to the standard name for compatibility
        standard_path = Path(context_dir) / f"info_json_{video_id}.json"
        try:
            # Try to create a symlink first (more efficient)
            if os.path.exists(standard_path):
                os.remove(standard_path)
            os.symlink(output_path, standard_path)
        except (OSError, AttributeError):
            # If symlink fails (e.g., on Windows), make a copy
            with open(standard_path, 'w') as f:
                f.write(info_json)

        # Save latest.json
        latest_path = Path(context_dir) / "latest.json"
        with open(latest_path, 'w') as f:
            f.write(info_json)

        logger.info(f"Successfully saved info.json to {output_path} and latest.json to {latest_path}")
        return str(output_path)
    except Exception as e:
        logger.error(f"Failed to save info.json: {e}")
        logger.error(traceback.format_exc())
        return False

def main():
    # Create main parser
    parser = argparse.ArgumentParser(description='''YtdlpOpsService Client

This client connects to the YTDLP Operations Server to generate tokens for YouTube videos.
The server performs SOCKS5 proxy connection testing with a 9-second timeout for early detection
of proxy issues. If a proxy connection fails, the server will immediately stop token generation
and return an error instead of trying other clients.''')

    # Add global options
    parser.add_argument('--host', default=os.getenv('YTDLP_HOST', 'localhost'),
                       help='Server host (default: localhost or YTDLP_HOST env)')
    parser.add_argument('--port', type=int, default=int(os.getenv('YTDLP_PORT', '9090')),
                       help='Server port (default: 9090 or YTDLP_PORT env)')
    parser.add_argument('--timeout', type=int, default=30000,
                      help='Timeout in milliseconds (default: 30000)')
    parser.add_argument('--timeout-sec', type=int, default=30,
                      help='Timeout in seconds (default: 30, overrides --timeout if provided)')
    parser.add_argument('--context-dir', default='.', help='Context directory to save info.json (default: .)')
    parser.add_argument('--load-info-json', help='Path to existing info.json file to load')
    parser.add_argument('--framed-transport', action='store_true',
                      help='Use TFramedTransport instead of TBufferedTransport for handling very large messages')
    parser.add_argument('--force-framed-transport', action='store_true',
                      help='Force the use of TFramedTransport (recommended for large messages)')

    # Create subparsers for commands
    subparsers = parser.add_subparsers(dest='command', required=True, help='Commands')

    # getToken command
    get_token_parser = subparsers.add_parser('getToken', help='Get token for a YouTube URL',
                                           description='''Get token for a YouTube URL

This command connects to the server to generate tokens for a YouTube video.
The server will test any configured SOCKS5 proxy with a 9-second timeout.
If the proxy connection fails, token generation will stop immediately with an error.''')
    get_token_parser.add_argument('--url', required=True,
                                help='YouTube URL to process')
    # --format removed, format/quality is determined by the server or embedded in the command
    get_token_parser.add_argument('--account_id', default='default',
                                help='Account ID (default: default)')
    get_token_parser.add_argument('--list-formats', action='store_true',
                                help='List available formats for the video')

    args = parser.parse_args()

    # Handle info.json loading
    if args.load_info_json:
        info_json = load_info_json(args.load_info_json)
        if info_json:
            print("Loaded info.json:")
            print(json.dumps(info_json, indent=2))
        return

    transport = None
    try:
        # Ensure context directory exists and is writable
        try:
            Path(args.context_dir).mkdir(parents=True, exist_ok=True)
            test_file = Path(args.context_dir) / "test.txt"
            test_file.touch()
            test_file.unlink()
        except Exception as e:
            logger.error(f"Could not access context directory {args.context_dir}: {e}")
            print(f"Error: Could not access context directory {args.context_dir}")
            sys.exit(1)

        try:
            # Check if we should use framed transport for very large messages
            use_framed_transport = args.framed_transport or args.force_framed_transport or os.environ.get('USE_FRAMED_TRANSPORT', '').lower() in ('1', 'true', 'yes')
            logger.debug(f"Using framed transport: {use_framed_transport}") # Changed to DEBUG

            # Create socket with configurable timeout, force IPv4
            socket = TSocket.TSocket(args.host, args.port, socket_family=2)  # AF_INET = 2 for IPv4

            # Use timeout-sec if provided, otherwise use timeout in milliseconds
            if args.timeout_sec is not None:
                socket.setTimeout(args.timeout_sec * 1000)  # Convert seconds to milliseconds
                logger.debug(f"Using timeout of {args.timeout_sec} seconds") # Changed to DEBUG
            else:
                socket.setTimeout(args.timeout)  # Use timeout from CLI in milliseconds
                logger.debug(f"Using timeout of {args.timeout} milliseconds") # Changed to DEBUG

            # Always use TFramedTransport to match the server
            transport = TTransport.TFramedTransport(socket)
            logger.debug("Using TFramedTransport for large messages") # Changed to DEBUG

            protocol = TBinaryProtocol.TBinaryProtocol(transport)
            client = YTTokenOpService.Client(protocol)

            logger.info(f"Attempting to connect to server at {args.host}:{args.port}...")
            try:
                transport.open()
                logger.info("Successfully connected to server")
            except TTransport.TTransportException as e:
                logger.error(f"Connection failed: {str(e)}")
                print(f"Error: Could not connect to server at {args.host}:{args.port}")
                print(f"Reason: {str(e)}")
                sys.exit(1)

            # Add connection test
            try:
                client.ping()
                logger.info("Server connection test successful")
            except Exception as e:
                logger.error(f"Server connection test failed: {e}")
                raise
        except TTransport.TTransportException as e:
            logger.error(f"Connection failed: {str(e)}")
            logger.error(f"Could not connect to {args.host}:{args.port}")
            sys.exit(1)
        except Exception as e:
            logger.error(f"Connection failed: {str(e)}")
            logger.error(traceback.format_exc())
            sys.exit(1)


        if args.command == 'getToken':
            url = args.url
            # format_codes removed

            # Handle format listing
            if args.list_formats:
                formats = list_available_formats(url, args)
                if formats:
                    print("\nAvailable formats:")
                    print(tabulate(formats, headers="keys", showindex=True)) # Show index for format selection

                    # Suggest best formats based on resolution
                    best_formats = suggest_best_formats(formats)
                    if best_formats:
                        print("\nSuggested formats:")
                        print(tabulate(best_formats, headers="keys"))
                else:
                    print("No formats available or could not retrieve format information")
                return
        elif args.youtube_url:
            url = args.youtube_url
            format_code = args.format
            print("Warning: --youtube-url is deprecated, use 'getToken --url' instead")
        else:
            print("Please provide a YouTube URL using 'getToken --url' command")
            return

        # Get token for URL
        try:
            # Get token for URL
            logger.info(f"Requesting token for URL: {url}")
            token_data = client.getOrRefreshToken(
                accountId=args.account_id,
                updateType=TokenUpdateMode.AUTO,
                url=url
            )

            if not token_data:
                logger.error("Received empty token data from server")
                print("Error: Received empty token data from server")
                sys.exit(1)

            # Validate token data
            if not hasattr(token_data, 'ytdlpCommand') or not token_data.ytdlpCommand:
                logger.error("Token data missing required ytdlpCommand")
                print("Error: Token data missing required ytdlpCommand")
                sys.exit(1)

            logger.info("Successfully received token data from server")

            # Log all attributes of token_data for debugging
            token_attrs = [attr for attr in dir(token_data) if not attr.startswith('__') and not callable(getattr(token_data, attr))]
            logger.debug(f"Received token_data attributes: {token_attrs}")

            # Handle case where token_data is a dict-like object
            if hasattr(token_data, 'items'):
                # Convert to dict if needed
                token_dict = dict(token_data.items())
                logger.debug(f"Token data as dict: {token_dict}")

                # If we have JSON data directly in the response
                if isinstance(token_dict.get('infoJson', None), str):
                    received_info_json = token_dict['infoJson']
                elif isinstance(token_dict.get('data', None), (dict, str)):
                    # Try to use the data field if it exists
                    data = token_dict['data']
                    if isinstance(data, str):
                        received_info_json = data
                    else:
                        received_info_json = json.dumps(data)
                else:
                    # Create info.json from available fields
                    info_data = {
                        "id": token_dict.get('id', extract_video_id(url)),
                        "title": token_dict.get('title', ''),
                        "formats": token_dict.get('formats', []),
                        "timestamp": int(time.time()),
                        "ytdlp_command": token_dict.get('ytdlpCommand', '')
                    }
                    received_info_json = json.dumps(info_data)
            else:
                # Handle case where token_data is a regular object
                received_info_json = getattr(token_data, 'infoJson', None)

            if received_info_json:
                logger.debug(f"Received info.json data ({len(received_info_json)} bytes)")
                if len(received_info_json) > 100:
                    logger.debug(f"Preview: {received_info_json[:100]}...")
            else:
                logger.warning("No valid info.json data found in response")

        except PBServiceException as e:
            logger.error(f"Service exception: {e.message}")
            if hasattr(e, 'errorCode'):
                if e.errorCode == "BOT_DETECTED":
                    print(f"Error: {e.message}")
                    print("\nYouTube has detected bot activity. Authentication is required.")

                    # Print suggestions if available
                    if hasattr(e, 'context') and e.context and 'suggestions' in e.context:
                        print("\nSuggestions:")
                        for i, suggestion in enumerate(e.context['suggestions'], 1):
                            print(f"  {i}. {suggestion}")
                    else:
                        print("\nTry:")
                        print("  1. Use --cookies-from-browser to pass authentication cookies")
                        print("  2. Export cookies from a logged-in browser session")
                        print("  3. Try a different client type (ios, android, mweb)")
                        print("  4. Use a different proxy or IP address")
                        print("  5. Try again later")

                    sys.exit(1)
                elif e.errorCode in ["SOCKS5_CONNECTION_FAILED", "SOCKS5_TIMEOUT", "SOCKS5_CONNECTION_REFUSED",
                                    "SOCKS5_CONNECTION_TIMEOUT", "SOCKS5_HOST_NOT_FOUND", "SOCKS5_NETWORK_UNREACHABLE"]:
                    print(f"Error: {e.message}")
                    print("\nSOCKS5 proxy connection failed. Please check your proxy settings.")

                    # Provide more specific guidance based on error code
                    if e.errorCode == "SOCKS5_TIMEOUT" or e.errorCode == "SOCKS5_CONNECTION_TIMEOUT":
                        print("The proxy server did not respond within the timeout period (9 seconds).")
                        print("This could indicate network congestion or a proxy server that's overloaded.")
                    elif e.errorCode == "SOCKS5_CONNECTION_REFUSED":
                        print("The proxy server actively refused the connection.")
                        print("This usually means the proxy server is not running or is not accepting connections on the specified port.")
                    elif e.errorCode == "SOCKS5_HOST_NOT_FOUND":
                        print("The proxy host could not be resolved.")
                        print("Please check that the hostname is correct and your DNS is working properly.")
                    elif e.errorCode == "SOCKS5_NETWORK_UNREACHABLE":
                        print("The network containing the proxy server is unreachable.")
                        print("This could indicate network routing issues or firewall restrictions.")

                    print("\nPossible solutions:")
                    print("1. Try using a different proxy server")
                    print("2. Check if the proxy server is running and accessible")
                    print("3. Verify your network connection and firewall settings")
                    print("4. If using a remote proxy, check if it's accessible from your location")

                    # Exit with a specific error code for proxy failures
                    sys.exit(2)
                elif e.errorCode == "GLOBAL_TIMEOUT":
                    print(f"Error: {e.message}")
                    print("\nThe server timed out while processing your request.")
                    print("This could be due to:")
                    print("1. Slow network connection")
                    print("2. Server overload")
                    print("3. Complex video that takes too long to process")
                    print("\nTry again later or with a different video.")
                    sys.exit(3)
                elif e.errorCode == "CLIENT_TIMEOUT":
                    print(f"Error: {e.message}")
                    print("\nA client-specific timeout occurred while processing your request.")
                    print("The server has stopped processing to avoid wasting resources.")
                    print("\nPossible solutions:")
                    print("1. Try again later when network conditions improve")
                    print("2. Try a different video")
                    print("3. Check your internet connection")
                    sys.exit(3)
                else:
                    print(f"Error: {e.message}")
            else:
                print(f"Error: {e.message}")
            return
        except PBUserException as e:
            logger.error(f"User exception: {e.message}")
            print(f"Error: {e.message}")
            return
        except Exception as e:
            logger.error(f"Unexpected error: {str(e)}")
            logger.error(traceback.format_exc())
            print(f"Unexpected error: {str(e)}")
            sys.exit(1)

        # Log the entire token_data object for debugging AFTER potential exceptions
        logger.debug(f"Processing received token_data: {token_data}")

        # Check if valid infoJson was received from the server
        info_json = None
        if hasattr(token_data, 'infoJson') and token_data.infoJson and token_data.infoJson != "{}":
            if is_valid_json(token_data.infoJson):
                 logger.debug("Valid info.json received from server.") # Changed to DEBUG
                 info_json = token_data.infoJson
            else:
                 logger.warning("Received infoJson from server, but it is not valid JSON or is empty.")
        else:
            logger.warning("Valid info.json was NOT received from the server.")

        # Proceed only if we have valid info_json
        if info_json:
            # Save info.json if present in the server response
            video_id = extract_video_id(url)
            if not video_id:
                logger.warning(f"Could not extract video ID from URL: {url}") # Keep as WARNING
                video_id = f"unknown_{int(time.time())}"

            try:
                info_data = json.loads(info_json)
                # Check if it contains an error
                if isinstance(info_data, dict) and ('error' in info_data or 'errorCode' in info_data):
                    error_msg = info_data.get('error', 'Unknown error')
                    error_code = info_data.get('errorCode', 'UNKNOWN_ERROR')
                    logger.warning(f"infoJson contains error: {error_msg} (code: {error_code})")

                    # If it's a bot detection error, raise appropriate exception
                    if error_code == 'BOT_DETECTED' or 'bot' in error_msg.lower() or 'sign in' in error_msg.lower():
                        raise PBUserException(
                            message=f"Bot detection triggered: {error_msg}",
                            errorCode="BOT_DETECTION",
                            context={
                                "video_id": extract_video_id(url),
                                "url": url,
                                "suggestions": info_data.get('suggestions', ["Try different client", "Use proxy", "Wait and retry later"])
                            }
                        )
            except json.JSONDecodeError as e:
                # This case should ideally not happen due to is_valid_json check, but handle defensively
                logger.error(f"Invalid JSON received despite initial check: {e}")
                print(f"Error: Received invalid JSON data from server.")
                info_json = None # Ensure we don't proceed

        # If info_json is still None after checks, handle the failure case
        if not info_json:
            logger.error("Failed to obtain valid info.json from the server.")
            print("Error: No valid video information (info.json) was received from the server.")
            # Optionally, print the raw ytdlp command if available
            if hasattr(token_data, 'ytdlpCommand') and token_data.ytdlpCommand:
                 print("\nRaw command from server (may be incomplete or require info.json):")
                 print(token_data.ytdlpCommand)
            sys.exit(1) # Exit with error

        # --- We have valid info_json, proceed with saving and command generation ---
        try:
            info_data = json.loads(info_json) # We know this is valid now

            # Check if it's an error response embedded in the JSON
            if isinstance(info_data, dict) and "error" in info_data:
                    logger.error(f"Received error report from server: {info_json}")

                    # Check if this is a bot detection error
                    if (info_data.get('errorCode') == "BOT_DETECTED" or
                        "bot" in info_data.get('message', '').lower() or
                        "sign in to confirm" in info_data.get('message', '').lower() or
                        "sign in to confirm" in info_data.get('error', '').lower() or
                        "unusual traffic" in info_data.get('message', '').lower() or
                        "captcha" in info_data.get('message', '').lower() or
                        info_data.get('requires_auth') == True):

                        logger.error("Bot detection error detected in info.json")
                        # Raise PBServiceException for bot detection
                        raise PBServiceException(
                            message=f"Bot detection triggered: {info_data.get('message', 'Authentication required')}",
                            errorCode="BOT_DETECTED",
                            context={
                                "video_id": video_id,
                                "url": url,
                                "requires_auth": True,
                                "info_data": info_data,
                                "suggestions": info_data.get('suggestions', [
                                    "Use --cookies-from-browser to pass authentication cookies",
                                    "Export cookies from a logged-in browser session",
                                    "Try a different client type (ios, android, mweb)",
                                    "Use a different proxy or IP address"
                                ])
                            }
                        )
                    else:
                        # Raise PBServiceException for other errors
                        raise PBServiceException(
                            message=f"Error extracting video info: {info_data.get('error', 'Unknown error')}",
                            errorCode=info_data.get('errorCode', "EXTRACTION_FAILED"),
                            context={"video_id": video_id, "url": url, "info_data": info_data}
                        )

            # If it's a valid response, process it
            if 'title' in info_data or 'id' in info_data:
                print(f"Video info retrieved: {info_data.get('title', 'Unknown title')}")
                saved_path = save_info_json(info_json, video_id, args.context_dir)
                if saved_path:
                    print(f"info.json saved to: {saved_path}")

                    # Create simpler base command using only the saved info.json and proxy
                    base_cmd = f"yt-dlp --load-info-json \"{saved_path}\"" # Quote the path
                    if hasattr(token_data, 'socks') and token_data.socks:
                        if token_data.socks.startswith(('socks5://', 'ss://')):
                            # Quote the proxy URL as well
                            base_cmd += f" --proxy \"{token_data.socks}\""

                    # Show format listing command
                    print("\nTo list available formats:")
                    format_cmd = f"{base_cmd} -F"
                    print(format_cmd)

                    # Show download command (format is usually embedded in info.json or determined by yt-dlp)
                    simplified_cmd = f"{base_cmd} --simulate" # Removed format codes

                    print("\nTo download (with --simulate to preview):")
                    print(simplified_cmd)
                    print("\nRemove --simulate to actually download")
                else:
                    logger.error("Failed to save info.json file")
                    print("Failed to save info.json file")
            else:
                logger.warning("info.json appears to be valid JSON but missing expected video fields")
                print("Error: Received incomplete or invalid video data")
                print("This usually indicates an authentication or access issue")
                sys.exit(1)
        except Exception as e: # Catch errors during saving or command generation
            logger.error(f"Error processing valid info.json: {str(e)}")
            # Re-raise the exception to be handled by the main error handler
            raise
    finally:
        if transport:
            transport.close()

if __name__ == "__main__":
    main()