yt-dlp-dags/tools/generate-inventory.py

200 lines
8.1 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
import yaml
import sys
import os
import shutil
from jinja2 import Environment, FileSystemLoader
def load_cluster_config(config_path):
"""Load cluster configuration from YAML file"""
with open(config_path, 'r') as f:
return yaml.safe_load(f)
def generate_inventory(cluster_config, inventory_path):
"""Generate Ansible inventory file from cluster configuration"""
with open(inventory_path, 'w') as f:
f.write("# This file is auto-generated by tools/generate-inventory.py\n")
f.write("# Do not edit your changes will be overwritten.\n")
f.write("# Edit cluster.yml and re-run the generator instead.\n\n")
# Master group
f.write("[master]\n")
for hostname, config in cluster_config.get('master', {}).items():
line = f"{hostname} ansible_host={config['ip']}"
if 'port' in config:
line += f" ansible_port={config['port']}"
f.write(line + "\n")
f.write("\n")
# Workers group (handles case where workers are not defined)
f.write("[workers]\n")
for hostname, config in cluster_config.get('workers', {}).items():
line = f"{hostname} ansible_host={config['ip']}"
if 'port' in config:
line += f" ansible_port={config['port']}"
f.write(line + "\n")
def generate_host_vars(cluster_config, host_vars_dir):
"""Generate host-specific variables. This function is non-destructive and will only create or overwrite files for hosts defined in the cluster config."""
# Create host_vars directory if it doesn't exist
os.makedirs(host_vars_dir, exist_ok=True)
master_nodes = cluster_config.get('master', {})
if not master_nodes:
print("Error: 'master' section is missing or empty in cluster config. Cannot proceed.")
sys.exit(1)
master_ip = list(master_nodes.values())[0]['ip']
# Get global vars for aliases
global_vars = cluster_config.get('global_vars', {})
airflow_master_dir = global_vars.get('airflow_master_dir')
airflow_worker_dir = global_vars.get('airflow_worker_dir')
# Get global proxy definitions
shadowsocks_proxies = cluster_config.get('shadowsocks_proxies', {})
# Combine master and worker nodes for processing
all_nodes = {**cluster_config.get('master', {}), **cluster_config.get('workers', {})}
for hostname, config in all_nodes.items():
host_vars_file = os.path.join(host_vars_dir, f"{hostname}.yml")
# Per-node list of proxies to USE
worker_proxies = config.get('proxies', [])
profile_prefixes = config.get('profile_prefixes', [])
cleanup_settings = config.get('cleanup_settings')
with open(host_vars_file, 'w') as f:
f.write("---\n")
f.write(f"# Variables for {hostname}\n")
f.write(f"master_host_ip: {master_ip}\n")
f.write("redis_port: 52909\n")
# Add node-specific directory aliases for template compatibility
# The master path is needed by all nodes for the .env template.
if airflow_master_dir:
f.write(f"airflow_master: \"{airflow_master_dir}\"\n")
if hostname in cluster_config.get('workers', {}) and airflow_worker_dir:
f.write(f"airflow_dl_worker: \"{airflow_worker_dir}\"\n")
# Write the global proxy definitions for deployment
if shadowsocks_proxies:
f.write("shadowsocks_proxies:\n")
for name, proxy_config in shadowsocks_proxies.items():
f.write(f" {name}:\n")
f.write(f" server: \"{proxy_config['server']}\"\n")
f.write(f" server_port: {proxy_config['server_port']}\n")
f.write(f" local_port: {proxy_config['local_port']}\n")
f.write(f" vault_password_key: \"{proxy_config['vault_password_key']}\"\n")
# Write the per-node list of proxies to USE
if worker_proxies:
f.write("worker_proxies:\n")
for proxy in worker_proxies:
f.write(f" - \"{proxy}\"\n")
# Write worker-specific profile prefixes
if profile_prefixes:
f.write("profile_prefixes:\n")
for prefix in profile_prefixes:
f.write(f" - \"{prefix}\"\n")
# Write worker-specific cleanup settings (overrides global)
if cleanup_settings:
f.write("cleanup_settings:\n")
if 'enabled' in cleanup_settings:
f.write(f" enabled: {str(cleanup_settings['enabled']).lower()}\n")
if 'mode' in cleanup_settings:
f.write(f" mode: \"{cleanup_settings['mode']}\"\n")
if 'max_age_seconds' in cleanup_settings:
f.write(f" max_age_seconds: {cleanup_settings['max_age_seconds']}\n")
def generate_group_vars(cluster_config, group_vars_path):
"""Generate group-level variables"""
# Create parent directory if it doesn't exist
all_vars_dir = os.path.dirname(group_vars_path)
os.makedirs(all_vars_dir, exist_ok=True)
# Remove the specific generated file if it exists to avoid stale data.
if os.path.exists(group_vars_path):
os.remove(group_vars_path)
global_vars = cluster_config.get('global_vars', {})
external_ips = cluster_config.get('external_access_ips', [])
master_nodes = cluster_config.get('master', {})
if not master_nodes:
print("Error: 'master' section is missing or empty in cluster config. Cannot proceed.")
sys.exit(1)
master_ip = list(master_nodes.values())[0]['ip']
# Combine master and worker nodes to create a hostvars-like structure
all_nodes = {**cluster_config.get('master', {}), **cluster_config.get('workers', {})}
# Prepare data for YAML dump
generated_data = {
'master_host_ip': master_ip,
'redis_port': 52909,
'external_access_ips': external_ips if external_ips else []
}
generated_data.update(global_vars)
with open(group_vars_path, 'w') as f:
f.write("---\n")
f.write("# This file is auto-generated by tools/generate-inventory.py\n")
f.write("# Do not edit your changes will be overwritten.\n")
yaml.dump(generated_data, f, default_flow_style=False)
def main():
if len(sys.argv) != 2:
print("Usage: ./tools/generate-inventory.py <cluster-config-file>")
sys.exit(1)
config_path = sys.argv[1]
# Check if config file exists
if not os.path.exists(config_path):
print(f"Error: Configuration file {config_path} not found")
sys.exit(1)
# Derive environment name from config filename (e.g., cluster.stress.yml -> stress)
base_name = os.path.basename(config_path)
if base_name == 'cluster.yml':
env_name = ''
elif base_name.startswith('cluster.') and base_name.endswith('.yml'):
env_name = base_name[len('cluster.'):-len('.yml')]
else:
print(f"Warning: Unconventional config file name '{base_name}'. Using base name as environment identifier.")
env_name = os.path.splitext(base_name)[0]
# Define output paths based on environment
inventory_suffix = f".{env_name}" if env_name else ""
inventory_path = f"ansible/inventory{inventory_suffix}.ini"
vars_suffix = f".{env_name}" if env_name else ""
group_vars_path = f"ansible/group_vars/all/generated_vars{vars_suffix}.yml"
# Load cluster configuration
cluster_config = load_cluster_config(config_path)
# Generate inventory file
generate_inventory(cluster_config, inventory_path)
print(f"Generated {inventory_path}")
# Generate host variables
host_vars_dir = "ansible/host_vars"
generate_host_vars(cluster_config, host_vars_dir)
print(f"Generated host variables in {host_vars_dir}")
# Generate group variables
generate_group_vars(cluster_config, group_vars_path)
print(f"Generated group variables in {os.path.dirname(group_vars_path)}")
print("Inventory generation complete!")
if __name__ == "__main__":
main()