243 lines
9.4 KiB
Python
243 lines
9.4 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Script to verify that all camoufox services are running and accessible.
|
|
This script should be run after deployment to ensure the cluster is healthy.
|
|
"""
|
|
|
|
import subprocess
|
|
import sys
|
|
import json
|
|
import time
|
|
import logging
|
|
from typing import List, Dict, Tuple
|
|
|
|
# Configure logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(levelname)s - %(message)s'
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
def run_docker_command(cmd: List[str]) -> Tuple[int, str, str]:
|
|
"""Run a docker command and return (returncode, stdout, stderr)"""
|
|
try:
|
|
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
|
|
return result.returncode, result.stdout.strip(), result.stderr.strip()
|
|
except subprocess.TimeoutExpired:
|
|
logger.error(f"Command timed out: {' '.join(cmd)}")
|
|
return 1, "", "Command timed out"
|
|
except Exception as e:
|
|
logger.error(f"Error running command: {' '.join(cmd)} - {e}")
|
|
return 1, "", str(e)
|
|
|
|
def get_docker_compose_services(project_path: str) -> List[Dict]:
|
|
"""Get list of services from docker-compose"""
|
|
# Try different ways to get services since the project naming might vary
|
|
possible_commands = [
|
|
["docker", "compose", "-p", "ytdlp-ops", "ps", "--format", "json"],
|
|
["docker", "compose", "-p", "ytdlp-ops-camoufox", "ps", "--format", "json"],
|
|
["docker", "compose", "--project-directory", project_path, "ps", "--format", "json"],
|
|
["docker", "compose", "ps", "--format", "json"]
|
|
]
|
|
|
|
for cmd in possible_commands:
|
|
returncode, stdout, stderr = run_docker_command(cmd)
|
|
if returncode == 0 and stdout:
|
|
try:
|
|
# Handle both single JSON object and JSON array
|
|
if stdout.startswith('['):
|
|
services = json.loads(stdout)
|
|
else:
|
|
# Multiple JSON objects, one per line
|
|
services = []
|
|
for line in stdout.split('\n'):
|
|
if line.strip():
|
|
services.append(json.loads(line))
|
|
if services:
|
|
return services
|
|
except json.JSONDecodeError as e:
|
|
logger.debug(f"Failed to parse docker-compose output with command {' '.join(cmd)}: {e}")
|
|
continue
|
|
|
|
# If all commands failed, try to get all containers and filter for camoufox
|
|
logger.info("Falling back to direct container inspection")
|
|
returncode, stdout, stderr = run_docker_command(["docker", "ps", "--format", "json"])
|
|
if returncode == 0 and stdout:
|
|
try:
|
|
containers = []
|
|
for line in stdout.split('\n'):
|
|
if line.strip():
|
|
containers.append(json.loads(line))
|
|
|
|
# Filter for camoufox containers
|
|
camoufox_containers = [c for c in containers if 'camoufox' in c.get('Names', '')]
|
|
return camoufox_containers
|
|
except json.JSONDecodeError:
|
|
pass
|
|
|
|
logger.error("Failed to get docker-compose services with all methods")
|
|
return []
|
|
|
|
def check_service_health(service_name: str, port: int = 12345) -> bool:
|
|
"""Check if a service is responding on its expected port"""
|
|
# For camoufox services, we can check if they're running and have network connectivity
|
|
# Since they're WebSocket services, we'll just verify they're running for now
|
|
cmd = ["docker", "inspect", service_name]
|
|
returncode, stdout, stderr = run_docker_command(cmd)
|
|
|
|
if returncode != 0:
|
|
logger.error(f"Failed to inspect service {service_name}: {stderr}")
|
|
return False
|
|
|
|
try:
|
|
service_info = json.loads(stdout)
|
|
if service_info and len(service_info) > 0:
|
|
state = service_info[0].get('State', {})
|
|
running = state.get('Running', False)
|
|
health = state.get('Health', {}).get('Status', 'unknown')
|
|
|
|
if running:
|
|
logger.info(f"Service {service_name} is running (health: {health})")
|
|
return True
|
|
else:
|
|
logger.error(f"Service {service_name} is not running")
|
|
return False
|
|
except json.JSONDecodeError as e:
|
|
logger.error(f"Failed to parse docker inspect output for {service_name}: {e}")
|
|
return False
|
|
|
|
def verify_camoufox_services(project_path: str = "/srv/airflow_dl_worker") -> bool:
|
|
"""Main function to verify all camoufox services"""
|
|
logger.info("Starting camoufox service verification...")
|
|
|
|
# Get all services
|
|
services = get_docker_compose_services(project_path)
|
|
|
|
if not services:
|
|
logger.warning("No services found through docker-compose. Checking for running camoufox containers directly...")
|
|
# Try to find camoufox containers directly
|
|
cmd = ["docker", "ps", "--filter", "name=camoufox", "--format", "json"]
|
|
returncode, stdout, stderr = run_docker_command(cmd)
|
|
|
|
if returncode == 0 and stdout:
|
|
try:
|
|
camoufox_containers = []
|
|
for line in stdout.split('\n'):
|
|
if line.strip():
|
|
camoufox_containers.append(json.loads(line))
|
|
services = camoufox_containers
|
|
except json.JSONDecodeError:
|
|
services = []
|
|
|
|
if not services:
|
|
logger.error("No camoufox services or containers found.")
|
|
# Check if we're on a worker node by looking for camoufox config
|
|
import os
|
|
if os.path.exists(f"{project_path}/configs/docker-compose.camoufox.yaml"):
|
|
logger.info("Camoufox config exists but no services running. This might indicate a startup issue.")
|
|
return False
|
|
else:
|
|
logger.info("No camoufox config found. This might be a master node.")
|
|
return True
|
|
|
|
logger.info(f"Found {len(services)} camoufox service(s) or container(s)")
|
|
|
|
# Check each service
|
|
all_healthy = True
|
|
camoufox_services_found = 0
|
|
|
|
for service in services:
|
|
# Different docker output formats have different field names
|
|
service_name = (service.get('Name') or
|
|
service.get('Names') or
|
|
service.get('name') or
|
|
service.get('Service', 'unknown'))
|
|
|
|
# If we're dealing with container output, Names might be a string
|
|
if isinstance(service_name, str):
|
|
service_names = [service_name]
|
|
else:
|
|
service_names = service_name if isinstance(service_name, list) else [str(service_name)]
|
|
|
|
# Check if any of the service names contain 'camoufox'
|
|
is_camoufox_service = any('camoufox' in name.lower() for name in service_names)
|
|
|
|
if not is_camoufox_service:
|
|
continue
|
|
|
|
camoufox_services_found += 1
|
|
logger.info(f"Checking service: {service_names[0] if service_names else 'unknown'}")
|
|
|
|
# Use the first service name for health check
|
|
name_to_check = service_names[0] if service_names else 'unknown'
|
|
|
|
# Check if service is running
|
|
if not check_service_health(name_to_check):
|
|
all_healthy = False
|
|
continue
|
|
|
|
# Check service status from docker output
|
|
service_status = (service.get('State') or
|
|
service.get('status') or
|
|
service.get('Status') or
|
|
'unknown')
|
|
service_health = (service.get('Health') or
|
|
service.get('health') or
|
|
'unknown')
|
|
|
|
logger.info(f"Service {name_to_check} - Status: {service_status}, Health: {service_health}")
|
|
|
|
if service_status not in ['running', 'Running']:
|
|
logger.error(f"Service {name_to_check} is not running (status: {service_status})")
|
|
all_healthy = False
|
|
elif service_health not in ['healthy', 'unknown', '']: # unknown or empty is OK for services without healthcheck
|
|
logger.warning(f"Service {name_to_check} health is {service_health}")
|
|
|
|
if camoufox_services_found == 0:
|
|
logger.warning("No camoufox services found in the service list")
|
|
return False
|
|
|
|
logger.info(f"Successfully verified {camoufox_services_found} camoufox service(s)")
|
|
return all_healthy
|
|
|
|
def main():
|
|
"""Main entry point"""
|
|
logger.info("Camoufox Service Verification Script")
|
|
logger.info("=" * 40)
|
|
|
|
# Try to detect project path
|
|
import os
|
|
project_paths = [
|
|
"/srv/airflow_dl_worker", # Worker node
|
|
"/srv/airflow_master", # Master node
|
|
"/app", # Container path
|
|
"." # Current directory
|
|
]
|
|
|
|
project_path = None
|
|
for path in project_paths:
|
|
if os.path.exists(path):
|
|
project_path = path
|
|
break
|
|
|
|
if not project_path:
|
|
logger.error("Could not determine project path")
|
|
return 1
|
|
|
|
logger.info(f"Using project path: {project_path}")
|
|
|
|
try:
|
|
success = verify_camoufox_services(project_path)
|
|
if success:
|
|
logger.info("✅ All camoufox services verification PASSED")
|
|
return 0
|
|
else:
|
|
logger.error("❌ Camoufox services verification FAILED")
|
|
return 1
|
|
except Exception as e:
|
|
logger.error(f"Unexpected error during verification: {e}", exc_info=True)
|
|
return 1
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|