yt-dlp-dags/tools/generate-profile-setup-policy.py

#!/usr/bin/env python3
"""
Generates the profile setup policy YAML from the main cluster configuration file.

This script reads the worker configurations from a cluster.yml file, aggregates
all profile definitions, and generates a policy file that can be used by the
`ytops-client setup-profiles` command. This centralizes profile management
in the cluster configuration file.
"""

import yaml
import sys
import os
from collections import OrderedDict

# To ensure YAML dumps dicts in the order they are created
def represent_ordereddict(dumper, data):
    value = []
    for item_key, item_value in data.items():
        node_key = dumper.represent_data(item_key)
        node_value = dumper.represent_data(item_value)
        value.append((node_key, node_value))
    return yaml.nodes.MappingNode(u'tag:yaml.org,2002:map', value)

yaml.add_representer(OrderedDict, represent_ordereddict)


# Custom list type and representer to achieve flow style for inner lists
class FlowList(list):
    pass

def flow_style_list_representer(dumper, data):
    return dumper.represent_sequence(u'tag:yaml.org,2002:seq', data, flow_style=True)

yaml.add_representer(FlowList, flow_style_list_representer)


# Custom string type and representer for double-quoted strings
class QuotedString(str):
    pass

def quoted_string_representer(dumper, data):
    return dumper.represent_scalar(u'tag:yaml.org,2002:str', data, style='"')

yaml.add_representer(QuotedString, quoted_string_representer)


def load_cluster_config(config_path):
    """Load cluster configuration from YAML file"""
    with open(config_path, 'r') as f:
        return yaml.safe_load(f)

def generate_policy(cluster_config, output_path):
    """Generate the profile setup policy file using common pools."""

    shadowsocks_proxies = cluster_config.get('shadowsocks_proxies', {})
    all_workers = cluster_config.get('workers', {})

    common_pools = []

    # Aggregate profile pools from all workers
    for worker_name, worker_config in all_workers.items():
        for pool in worker_config.get('profile_pools', []):
            proxy_service = pool['proxy_service']
            if proxy_service not in shadowsocks_proxies:
                print(f"Warning: Proxy service '{proxy_service}' for profile pool '{pool['prefixes']}' on worker '{worker_name}' not found in global shadowsocks_proxies. Skipping.", file=sys.stderr)
                continue

            proxy_port = shadowsocks_proxies[proxy_service]['local_port']
            proxy_string = f"{proxy_service}:{proxy_port}"

            pool_entry = OrderedDict([
                ('prefixes', sorted(pool['prefixes'])),
                ('proxy', proxy_string),
                ('count', pool['count'])
            ])
            common_pools.append(pool_entry)

    # Sort the pools by the first prefix in each pool for consistent file output
    sorted_common_pools = sorted(common_pools, key=lambda x: x['prefixes'][0])

    # Write the policy file manually to ensure exact formatting and comments
    with open(output_path, 'w') as f:
        f.write("# Configuration for setting up profiles for a simulation or test run.\n")
        f.write("# This file is used by the `bin/ytops-client setup-profiles` command.\n")
        f.write("# It uses a common pool definition to avoid repetition.\n\n")
        f.write("# !!! THIS FILE IS AUTO-GENERATED by tools/generate-profile-setup-policy.py !!!\n")
        f.write("# !!! DO NOT EDIT. Your changes will be overwritten.              !!!\n")
        f.write("# !!! Edit cluster.green.yml and re-run the generator instead.    !!!\n\n")

        f.write("simulation_parameters:\n")
        f.write("  # --- Common Redis settings for all tools ---\n")
        f.write("  # The environment name ('env') is now specified in each setup block below.\n")
        f.write('  env_file: ".env"            # Optional: path to a .env file.\n')

        f.write("\n# --- Common Pool Definitions ---\n")
        f.write("# Define the profile pools once. They will be created in both\n")
        f.write("# the auth and download simulation environments.\n")
        f.write("# The `setup-profiles` tool must be updated to support this format.\n")
        f.write("common_pools:\n")
        for pool in sorted_common_pools:
            prefixes_str = ", ".join([f'"{p}"' for p in pool['prefixes']])
            f.write(f'  - prefixes: [{prefixes_str}]\n')
            f.write(f'    proxy: "{pool["proxy"]}"\n')
            f.write(f'    count: {pool["count"]}\n')

        f.write("\n# --- Profile setup for the AUTHENTICATION simulation ---\n")
        f.write("auth_profile_setup:\n")
        f.write('  env: "sim_auth"\n')
        f.write("  cleanup_before_run: true\n")
        f.write("  # The setup tool will use the 'common_pools' defined above.\n")
        f.write("  use_common_pools: true\n")

        f.write("\n# --- Profile setup for the DOWNLOAD simulation ---\n")
        f.write("download_profile_setup:\n")
        f.write('  env: "sim_download"\n')
        f.write("  cleanup_before_run: true\n")
        f.write("  # The setup tool will also use the 'common_pools' defined above.\n")
        f.write("  use_common_pools: true\n")

    print(f"Successfully generated profile setup policy at: {output_path}")


def main():
    if len(sys.argv) != 3:
        print("Usage: ./tools/generate-profile-setup-policy.py <cluster-config-file> <output-policy-file>")
        sys.exit(1)

    config_path = sys.argv[1]
    output_path = sys.argv[2]

    if not os.path.exists(config_path):
        print(f"Error: Cluster configuration file not found at '{config_path}'", file=sys.stderr)
        sys.exit(1)

    cluster_config = load_cluster_config(config_path)
    generate_policy(cluster_config, output_path)

if __name__ == "__main__":
    main()