#!/usr/bin/env python3 """ Generates the profile setup policy YAML from the main cluster configuration file. This script reads the worker configurations from a cluster.yml file, aggregates all profile definitions, and generates a policy file that can be used by the `ytops-client setup-profiles` command. This centralizes profile management in the cluster configuration file. """ import yaml import sys import os from collections import OrderedDict # To ensure YAML dumps dicts in the order they are created def represent_ordereddict(dumper, data): value = [] for item_key, item_value in data.items(): node_key = dumper.represent_data(item_key) node_value = dumper.represent_data(item_value) value.append((node_key, node_value)) return yaml.nodes.MappingNode(u'tag:yaml.org,2002:map', value) yaml.add_representer(OrderedDict, represent_ordereddict) # Custom list type and representer to achieve flow style for inner lists class FlowList(list): pass def flow_style_list_representer(dumper, data): return dumper.represent_sequence(u'tag:yaml.org,2002:seq', data, flow_style=True) yaml.add_representer(FlowList, flow_style_list_representer) # Custom string type and representer for double-quoted strings class QuotedString(str): pass def quoted_string_representer(dumper, data): return dumper.represent_scalar(u'tag:yaml.org,2002:str', data, style='"') yaml.add_representer(QuotedString, quoted_string_representer) def load_cluster_config(config_path): """Load cluster configuration from YAML file""" with open(config_path, 'r') as f: return yaml.safe_load(f) def generate_policy(cluster_config, output_path): """Generate the profile setup policy file using common pools.""" shadowsocks_proxies = cluster_config.get('shadowsocks_proxies', {}) all_workers = cluster_config.get('workers', {}) common_pools = [] # Aggregate profile pools from all workers for worker_name, worker_config in all_workers.items(): for pool in worker_config.get('profile_pools', []): proxy_service = pool['proxy_service'] if proxy_service not in shadowsocks_proxies: print(f"Warning: Proxy service '{proxy_service}' for profile pool '{pool['prefixes']}' on worker '{worker_name}' not found in global shadowsocks_proxies. Skipping.", file=sys.stderr) continue proxy_port = shadowsocks_proxies[proxy_service]['local_port'] proxy_string = f"{proxy_service}:{proxy_port}" pool_entry = OrderedDict([ ('prefixes', sorted(pool['prefixes'])), ('proxy', proxy_string), ('count', pool['count']) ]) common_pools.append(pool_entry) # Sort the pools by the first prefix in each pool for consistent file output sorted_common_pools = sorted(common_pools, key=lambda x: x['prefixes'][0]) # Write the policy file manually to ensure exact formatting and comments with open(output_path, 'w') as f: f.write("# Configuration for setting up profiles for a simulation or test run.\n") f.write("# This file is used by the `bin/ytops-client setup-profiles` command.\n") f.write("# It uses a common pool definition to avoid repetition.\n\n") f.write("# !!! THIS FILE IS AUTO-GENERATED by tools/generate-profile-setup-policy.py !!!\n") f.write("# !!! DO NOT EDIT. Your changes will be overwritten. !!!\n") f.write("# !!! Edit cluster.green.yml and re-run the generator instead. !!!\n\n") f.write("simulation_parameters:\n") f.write(" # --- Common Redis settings for all tools ---\n") f.write(" # The environment name ('env') is now specified in each setup block below.\n") f.write(' env_file: ".env" # Optional: path to a .env file.\n') f.write("\n# --- Common Pool Definitions ---\n") f.write("# Define the profile pools once. They will be created in both\n") f.write("# the auth and download simulation environments.\n") f.write("# The `setup-profiles` tool must be updated to support this format.\n") f.write("common_pools:\n") for pool in sorted_common_pools: prefixes_str = ", ".join([f'"{p}"' for p in pool['prefixes']]) f.write(f' - prefixes: [{prefixes_str}]\n') f.write(f' proxy: "{pool["proxy"]}"\n') f.write(f' count: {pool["count"]}\n') f.write("\n# --- Profile setup for the AUTHENTICATION simulation ---\n") f.write("auth_profile_setup:\n") f.write(' env: "sim_auth"\n') f.write(" cleanup_before_run: true\n") f.write(" # The setup tool will use the 'common_pools' defined above.\n") f.write(" use_common_pools: true\n") f.write("\n# --- Profile setup for the DOWNLOAD simulation ---\n") f.write("download_profile_setup:\n") f.write(' env: "sim_download"\n') f.write(" cleanup_before_run: true\n") f.write(" # The setup tool will also use the 'common_pools' defined above.\n") f.write(" use_common_pools: true\n") print(f"Successfully generated profile setup policy at: {output_path}") def generate_enforcer_policy(cluster_config, output_path): """Generate the enforcer policy file.""" all_workers = cluster_config.get('workers', {}) enforcement_pools = [] for worker_name, worker_config in sorted(all_workers.items()): all_prefixes = [] for pool in worker_config.get('profile_pools', []): all_prefixes.extend(pool.get('prefixes', [])) if not all_prefixes: continue pool_entry = OrderedDict([ ('name', f"server_{worker_name}"), ('profile_group_patterns', sorted(list(set(all_prefixes)))), ('max_active_profiles', 1) ]) enforcement_pools.append(pool_entry) with open(output_path, 'w') as f: f.write("# Policy for the unified simulation enforcer.\n") f.write("# This file is used by `bin/ytops-client policy-enforcer --live` to manage\n") f.write("# both the authentication and download simulation environments from a single process.\n\n") f.write("# !!! THIS FILE IS AUTO-GENERATED by tools/generate-profile-setup-policy.py !!!\n") f.write("# !!! DO NOT EDIT. Your changes will be overwritten. !!!\n") f.write("# !!! Edit cluster.green.yml and re-run the generator instead. !!!\n\n") f.write("simulation_parameters:\n") f.write(" # --- Common Redis settings for all tools ---\n") f.write(" # The enforcer will connect to two different Redis environments (key prefixes)\n") f.write(" # based on these settings, applying the corresponding policies to each.\n") f.write(' env_file: ".env"\n') f.write(' auth_env: "sim_auth"\n') f.write(' download_env: "sim_download"\n') f.write(" \n") f.write(" # How often the enforcer should wake up and apply all policies.\n") f.write(" interval_seconds: 2\n\n") f.write("# --- Common & Pool-specific Settings ---\n") f.write("# Common settings are applied to all profile groups discovered via the pools below.\n") f.write("# A pool can optionally override these settings by defining its own 'group_settings' block.\n") f.write("common_group_settings:\n") f.write(" auth:\n") f.write(" max_active_profiles: 1\n") f.write(" rotate_after_requests: 5\n") f.write(" rest_duration_minutes_on_rotation: 0.20\n") f.write(" wait_download_finish_per_group: true\n") f.write(" max_wait_for_downloads_minutes: 240\n") f.write(" download:\n") f.write(" max_active_profiles: 1\n") f.write(" rotate_after_requests: 0\n") f.write(" rest_duration_minutes_on_rotation: 0.2\n\n") f.write("# Defines pools of profile groups with their own concurrency limits.\n") f.write("enforcement_pools:\n") for pool in enforcement_pools: f.write(f' - name: "{pool["name"]}"\n') patterns_str = ", ".join([f'"{p}"' for p in pool['profile_group_patterns']]) f.write(f' profile_group_patterns: [{patterns_str}]\n') f.write(f' max_active_profiles: {pool["max_active_profiles"]}\n') rest_of_file = """ # --- Policies for the Authentication Simulation --- auth_policy_enforcer_config: # Ban if 2 failures occur within a 1-minute window. #ban_on_failures: 2 #ban_on_failures_window_minutes: 1 # The standard rest policy is disabled, as rotation is handled by the profile group. # New rate limit policy to enforce requests-per-hour limits. # For guest sessions, the limit is ~300 videos/hour. rate_limit_requests: 0 rate_limit_window_minutes: 60 rate_limit_rest_duration_minutes: 5 rest_after_requests: 0 rest_duration_minutes: 10 # NOTE on Rate Limits: With the default yt-dlp settings, the rate limit for guest # sessions is ~300 videos/hour (~1000 webpage/player requests per hour). # For accounts, it is ~2000 videos/hour (~4000 webpage/player requests per hour). # The settings below should be configured to respect these limits. # New setting for load balancing across profile groups. # "longest_idle": Activates the profile that has been idle the longest across all groups (based on last_used time). # This is a global FIFO strategy that effectively cycles through profiles regardless of their group. # "least_loaded": Prioritizes activating a profile from the group with the fewest pending downloads. # If multiple groups have zero pending downloads, it acts as a FIFO queue, activating # the one that finished its last download batch the earliest. This is useful when you want # to ensure a group finishes its entire workload before another group starts. profile_selection_strategy: "longest_idle" # The 'global_max_active_profiles' setting is now superseded by the per-pool limits # defined in the 'enforcement_pools' section. # The 'profile_groups' section is now inherited from 'profile_group_definitions' above. # The enforcer logic should be updated to read from there. proxy_work_minutes: 0 proxy_rest_duration_minutes: 0 # Global maximum time a proxy can be active before being rested, regardless of # other rules. Acts as a safety net. Set to 0 to disable. max_global_proxy_active_minutes: 0 rest_duration_on_max_active: 10 # Proxy-level ban on failure burst is disabled. proxy_ban_on_failures: 0 proxy_ban_window_minutes: 2 # Clean up locks held for more than 16 minutes (960s) to prevent stuck workers. # This should be longer than the docker container timeout (15m). unlock_stale_locks_after_seconds: 960 # A short post-task cooldown for auth simulation profiles. When a batch is finished, # the profile is put into COOLDOWN briefly. This prevents a worker from immediately # re-locking the same profile, giving the policy enforcer a window to perform rotation. unlock_cooldown_seconds: 0 # --- Cross-simulation synchronization --- # This section is simplified because the link between auth and download profiles # is now defined in the `profile_group_definitions`. cross_simulation_sync: # Which states to synchronize from auth to download. sync_states: - "BANNED" # If true, a BANNED state on an auth profile will force the download profile to also be BANNED. enforce_auth_lead: true # CRITICAL: Ensures the correct download profile GROUP is active. sync_active_profile: true # When an auth profile is in the 'waiting_downloads' state, ensure the matching download profile is active. sync_waiting_downloads: true # --- Policies for the Download Simulation --- download_policy_enforcer_config: # Ban if 1 failure occurs within a 1-minute window. ban_on_failures: 1 ban_on_failures_window_minutes: 1 # Standard rest policy is disabled in favor of group rotation. # New rate limit policy to enforce requests-per-hour limits. # For guest sessions, the limit is ~300 videos/hour. We set it slightly lower to be safe. rate_limit_requests: 280 rate_limit_window_minutes: 60 rate_limit_rest_duration_minutes: 5 rest_after_requests: 0 rest_duration_minutes: 20 # NOTE on Rate Limits: With the default yt-dlp settings, the rate limit for guest # sessions is ~300 videos/hour (~1000 webpage/player requests per hour). # For accounts, it is ~2000 videos/hour (~4000 webpage/player requests per hour). # The settings below should be configured to respect these limits. # The 'profile_groups' section is now inherited from 'profile_group_definitions' above. # The enforcer logic should be updated to read from there. # Time-based proxy rules are disabled. proxy_work_minutes: 0 proxy_rest_duration_minutes: 10 # Global maximum time a proxy can be active before being rested, regardless of # other rules. Acts as a safety net. Set to 0 to disable. max_global_proxy_active_minutes: 0 rest_duration_on_max_active: 10 # Proxy-level ban on failure burst is disabled. proxy_ban_on_failures: 3 proxy_ban_window_minutes: 1 # Clean up download locks held for more than 16 minutes (960s) to allow for long downloads. # This should be longer than the docker container timeout (15m). unlock_stale_locks_after_seconds: 960 # After a profile is used for a download, unlock it but put it in COOLDOWN # state for 2-3s. This is enforced by the worker, which reads this config from Redis. unlock_cooldown_seconds: [2, 3] """ with open(output_path, 'a') as f: f.write(rest_of_file) print(f"Successfully generated enforcer policy at: {output_path}") def main(): if len(sys.argv) < 3 or len(sys.argv) > 4: print("Usage: ./tools/generate-profile-setup-policy.py []") sys.exit(1) config_path = sys.argv[1] profile_output_path = sys.argv[2] if not os.path.exists(config_path): print(f"Error: Cluster configuration file not found at '{config_path}'", file=sys.stderr) sys.exit(1) cluster_config = load_cluster_config(config_path) generate_policy(cluster_config, profile_output_path) if len(sys.argv) == 4: enforcer_output_path = sys.argv[3] generate_enforcer_policy(cluster_config, enforcer_output_path) if __name__ == "__main__": main()