#!/usr/bin/env python3 """ Script to verify that all camoufox services are running and accessible. This script should be run after deployment to ensure the cluster is healthy. """ import subprocess import sys import json import time import logging from typing import List, Dict, Tuple # Configure logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) def run_docker_command(cmd: List[str]) -> Tuple[int, str, str]: """Run a docker command and return (returncode, stdout, stderr)""" try: result = subprocess.run(cmd, capture_output=True, text=True, timeout=30) return result.returncode, result.stdout.strip(), result.stderr.strip() except subprocess.TimeoutExpired: logger.error(f"Command timed out: {' '.join(cmd)}") return 1, "", "Command timed out" except Exception as e: logger.error(f"Error running command: {' '.join(cmd)} - {e}") return 1, "", str(e) def get_docker_compose_services(project_path: str) -> List[Dict]: """Get list of services from docker-compose""" # Try different ways to get services since the project naming might vary possible_commands = [ ["docker", "compose", "-p", "ytdlp-ops", "ps", "--format", "json"], ["docker", "compose", "-p", "ytdlp-ops-camoufox", "ps", "--format", "json"], ["docker", "compose", "--project-directory", project_path, "ps", "--format", "json"], ["docker", "compose", "ps", "--format", "json"] ] for cmd in possible_commands: returncode, stdout, stderr = run_docker_command(cmd) if returncode == 0 and stdout: try: # Handle both single JSON object and JSON array if stdout.startswith('['): services = json.loads(stdout) else: # Multiple JSON objects, one per line services = [] for line in stdout.split('\n'): if line.strip(): services.append(json.loads(line)) if services: return services except json.JSONDecodeError as e: logger.debug(f"Failed to parse docker-compose output with command {' '.join(cmd)}: {e}") continue # If all commands failed, try to get all containers and filter for camoufox logger.info("Falling back to direct container inspection") returncode, stdout, stderr = run_docker_command(["docker", "ps", "--format", "json"]) if returncode == 0 and stdout: try: containers = [] for line in stdout.split('\n'): if line.strip(): containers.append(json.loads(line)) # Filter for camoufox containers camoufox_containers = [c for c in containers if 'camoufox' in c.get('Names', '')] return camoufox_containers except json.JSONDecodeError: pass logger.error("Failed to get docker-compose services with all methods") return [] def check_service_health(service_name: str, port: int = 12345) -> bool: """Check if a service is responding on its expected port""" # For camoufox services, we can check if they're running and have network connectivity # Since they're WebSocket services, we'll just verify they're running for now cmd = ["docker", "inspect", service_name] returncode, stdout, stderr = run_docker_command(cmd) if returncode != 0: logger.error(f"Failed to inspect service {service_name}: {stderr}") return False try: service_info = json.loads(stdout) if service_info and len(service_info) > 0: state = service_info[0].get('State', {}) running = state.get('Running', False) health = state.get('Health', {}).get('Status', 'unknown') if running: logger.info(f"Service {service_name} is running (health: {health})") return True else: logger.error(f"Service {service_name} is not running") return False except json.JSONDecodeError as e: logger.error(f"Failed to parse docker inspect output for {service_name}: {e}") return False def verify_camoufox_services(project_path: str = "/srv/airflow_dl_worker") -> bool: """Main function to verify all camoufox services""" logger.info("Starting camoufox service verification...") # Get all services services = get_docker_compose_services(project_path) if not services: logger.warning("No services found through docker-compose. Checking for running camoufox containers directly...") # Try to find camoufox containers directly cmd = ["docker", "ps", "--filter", "name=camoufox", "--format", "json"] returncode, stdout, stderr = run_docker_command(cmd) if returncode == 0 and stdout: try: camoufox_containers = [] for line in stdout.split('\n'): if line.strip(): camoufox_containers.append(json.loads(line)) services = camoufox_containers except json.JSONDecodeError: services = [] if not services: logger.error("No camoufox services or containers found.") # Check if we're on a worker node by looking for camoufox config import os if os.path.exists(f"{project_path}/configs/docker-compose.camoufox.yaml"): logger.info("Camoufox config exists but no services running. This might indicate a startup issue.") return False else: logger.info("No camoufox config found. This might be a master node.") return True logger.info(f"Found {len(services)} camoufox service(s) or container(s)") # Check each service all_healthy = True camoufox_services_found = 0 for service in services: # Different docker output formats have different field names service_name = (service.get('Name') or service.get('Names') or service.get('name') or service.get('Service', 'unknown')) # If we're dealing with container output, Names might be a string if isinstance(service_name, str): service_names = [service_name] else: service_names = service_name if isinstance(service_name, list) else [str(service_name)] # Check if any of the service names contain 'camoufox' is_camoufox_service = any('camoufox' in name.lower() for name in service_names) if not is_camoufox_service: continue camoufox_services_found += 1 logger.info(f"Checking service: {service_names[0] if service_names else 'unknown'}") # Use the first service name for health check name_to_check = service_names[0] if service_names else 'unknown' # Check if service is running if not check_service_health(name_to_check): all_healthy = False continue # Check service status from docker output service_status = (service.get('State') or service.get('status') or service.get('Status') or 'unknown') service_health = (service.get('Health') or service.get('health') or 'unknown') logger.info(f"Service {name_to_check} - Status: {service_status}, Health: {service_health}") if service_status not in ['running', 'Running']: logger.error(f"Service {name_to_check} is not running (status: {service_status})") all_healthy = False elif service_health not in ['healthy', 'unknown', '']: # unknown or empty is OK for services without healthcheck logger.warning(f"Service {name_to_check} health is {service_health}") if camoufox_services_found == 0: logger.warning("No camoufox services found in the service list") return False logger.info(f"Successfully verified {camoufox_services_found} camoufox service(s)") return all_healthy def main(): """Main entry point""" logger.info("Camoufox Service Verification Script") logger.info("=" * 40) # Try to detect project path import os project_paths = [ "/srv/airflow_dl_worker", # Worker node "/srv/airflow_master", # Master node "/app", # Container path "." # Current directory ] project_path = None for path in project_paths: if os.path.exists(path): project_path = path break if not project_path: logger.error("Could not determine project path") return 1 logger.info(f"Using project path: {project_path}") try: success = verify_camoufox_services(project_path) if success: logger.info("✅ All camoufox services verification PASSED") return 0 else: logger.error("❌ Camoufox services verification FAILED") return 1 except Exception as e: logger.error(f"Unexpected error during verification: {e}", exc_info=True) return 1 if __name__ == "__main__": sys.exit(main())