yt-dlp-dags/ansible/scripts/verify_camoufox_services.py

243 lines
9.4 KiB
Python

#!/usr/bin/env python3
"""
Script to verify that all camoufox services are running and accessible.
This script should be run after deployment to ensure the cluster is healthy.
"""
import subprocess
import sys
import json
import time
import logging
from typing import List, Dict, Tuple
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
def run_docker_command(cmd: List[str]) -> Tuple[int, str, str]:
"""Run a docker command and return (returncode, stdout, stderr)"""
try:
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
return result.returncode, result.stdout.strip(), result.stderr.strip()
except subprocess.TimeoutExpired:
logger.error(f"Command timed out: {' '.join(cmd)}")
return 1, "", "Command timed out"
except Exception as e:
logger.error(f"Error running command: {' '.join(cmd)} - {e}")
return 1, "", str(e)
def get_docker_compose_services(project_path: str) -> List[Dict]:
"""Get list of services from docker-compose"""
# Try different ways to get services since the project naming might vary
possible_commands = [
["docker", "compose", "-p", "ytdlp-ops", "ps", "--format", "json"],
["docker", "compose", "-p", "ytdlp-ops-camoufox", "ps", "--format", "json"],
["docker", "compose", "--project-directory", project_path, "ps", "--format", "json"],
["docker", "compose", "ps", "--format", "json"]
]
for cmd in possible_commands:
returncode, stdout, stderr = run_docker_command(cmd)
if returncode == 0 and stdout:
try:
# Handle both single JSON object and JSON array
if stdout.startswith('['):
services = json.loads(stdout)
else:
# Multiple JSON objects, one per line
services = []
for line in stdout.split('\n'):
if line.strip():
services.append(json.loads(line))
if services:
return services
except json.JSONDecodeError as e:
logger.debug(f"Failed to parse docker-compose output with command {' '.join(cmd)}: {e}")
continue
# If all commands failed, try to get all containers and filter for camoufox
logger.info("Falling back to direct container inspection")
returncode, stdout, stderr = run_docker_command(["docker", "ps", "--format", "json"])
if returncode == 0 and stdout:
try:
containers = []
for line in stdout.split('\n'):
if line.strip():
containers.append(json.loads(line))
# Filter for camoufox containers
camoufox_containers = [c for c in containers if 'camoufox' in c.get('Names', '')]
return camoufox_containers
except json.JSONDecodeError:
pass
logger.error("Failed to get docker-compose services with all methods")
return []
def check_service_health(service_name: str, port: int = 12345) -> bool:
"""Check if a service is responding on its expected port"""
# For camoufox services, we can check if they're running and have network connectivity
# Since they're WebSocket services, we'll just verify they're running for now
cmd = ["docker", "inspect", service_name]
returncode, stdout, stderr = run_docker_command(cmd)
if returncode != 0:
logger.error(f"Failed to inspect service {service_name}: {stderr}")
return False
try:
service_info = json.loads(stdout)
if service_info and len(service_info) > 0:
state = service_info[0].get('State', {})
running = state.get('Running', False)
health = state.get('Health', {}).get('Status', 'unknown')
if running:
logger.info(f"Service {service_name} is running (health: {health})")
return True
else:
logger.error(f"Service {service_name} is not running")
return False
except json.JSONDecodeError as e:
logger.error(f"Failed to parse docker inspect output for {service_name}: {e}")
return False
def verify_camoufox_services(project_path: str = "/srv/airflow_dl_worker") -> bool:
"""Main function to verify all camoufox services"""
logger.info("Starting camoufox service verification...")
# Get all services
services = get_docker_compose_services(project_path)
if not services:
logger.warning("No services found through docker-compose. Checking for running camoufox containers directly...")
# Try to find camoufox containers directly
cmd = ["docker", "ps", "--filter", "name=camoufox", "--format", "json"]
returncode, stdout, stderr = run_docker_command(cmd)
if returncode == 0 and stdout:
try:
camoufox_containers = []
for line in stdout.split('\n'):
if line.strip():
camoufox_containers.append(json.loads(line))
services = camoufox_containers
except json.JSONDecodeError:
services = []
if not services:
logger.error("No camoufox services or containers found.")
# Check if we're on a worker node by looking for camoufox config
import os
if os.path.exists(f"{project_path}/configs/docker-compose.camoufox.yaml"):
logger.info("Camoufox config exists but no services running. This might indicate a startup issue.")
return False
else:
logger.info("No camoufox config found. This might be a master node.")
return True
logger.info(f"Found {len(services)} camoufox service(s) or container(s)")
# Check each service
all_healthy = True
camoufox_services_found = 0
for service in services:
# Different docker output formats have different field names
service_name = (service.get('Name') or
service.get('Names') or
service.get('name') or
service.get('Service', 'unknown'))
# If we're dealing with container output, Names might be a string
if isinstance(service_name, str):
service_names = [service_name]
else:
service_names = service_name if isinstance(service_name, list) else [str(service_name)]
# Check if any of the service names contain 'camoufox'
is_camoufox_service = any('camoufox' in name.lower() for name in service_names)
if not is_camoufox_service:
continue
camoufox_services_found += 1
logger.info(f"Checking service: {service_names[0] if service_names else 'unknown'}")
# Use the first service name for health check
name_to_check = service_names[0] if service_names else 'unknown'
# Check if service is running
if not check_service_health(name_to_check):
all_healthy = False
continue
# Check service status from docker output
service_status = (service.get('State') or
service.get('status') or
service.get('Status') or
'unknown')
service_health = (service.get('Health') or
service.get('health') or
'unknown')
logger.info(f"Service {name_to_check} - Status: {service_status}, Health: {service_health}")
if service_status not in ['running', 'Running']:
logger.error(f"Service {name_to_check} is not running (status: {service_status})")
all_healthy = False
elif service_health not in ['healthy', 'unknown', '']: # unknown or empty is OK for services without healthcheck
logger.warning(f"Service {name_to_check} health is {service_health}")
if camoufox_services_found == 0:
logger.warning("No camoufox services found in the service list")
return False
logger.info(f"Successfully verified {camoufox_services_found} camoufox service(s)")
return all_healthy
def main():
"""Main entry point"""
logger.info("Camoufox Service Verification Script")
logger.info("=" * 40)
# Try to detect project path
import os
project_paths = [
"/srv/airflow_dl_worker", # Worker node
"/srv/airflow_master", # Master node
"/app", # Container path
"." # Current directory
]
project_path = None
for path in project_paths:
if os.path.exists(path):
project_path = path
break
if not project_path:
logger.error("Could not determine project path")
return 1
logger.info(f"Using project path: {project_path}")
try:
success = verify_camoufox_services(project_path)
if success:
logger.info("✅ All camoufox services verification PASSED")
return 0
else:
logger.error("❌ Camoufox services verification FAILED")
return 1
except Exception as e:
logger.error(f"Unexpected error during verification: {e}", exc_info=True)
return 1
if __name__ == "__main__":
sys.exit(main())