335 lines
14 KiB
Python
Executable File
335 lines
14 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Generates the profile setup policy YAML from the main cluster configuration file.
|
|
|
|
This script reads the worker configurations from a cluster.yml file, aggregates
|
|
all profile definitions, and generates a policy file that can be used by the
|
|
`ytops-client setup-profiles` command. This centralizes profile management
|
|
in the cluster configuration file.
|
|
"""
|
|
|
|
import yaml
|
|
import sys
|
|
import os
|
|
from collections import OrderedDict
|
|
|
|
# To ensure YAML dumps dicts in the order they are created
|
|
def represent_ordereddict(dumper, data):
|
|
value = []
|
|
for item_key, item_value in data.items():
|
|
node_key = dumper.represent_data(item_key)
|
|
node_value = dumper.represent_data(item_value)
|
|
value.append((node_key, node_value))
|
|
return yaml.nodes.MappingNode(u'tag:yaml.org,2002:map', value)
|
|
|
|
yaml.add_representer(OrderedDict, represent_ordereddict)
|
|
|
|
|
|
# Custom list type and representer to achieve flow style for inner lists
|
|
class FlowList(list):
|
|
pass
|
|
|
|
def flow_style_list_representer(dumper, data):
|
|
return dumper.represent_sequence(u'tag:yaml.org,2002:seq', data, flow_style=True)
|
|
|
|
yaml.add_representer(FlowList, flow_style_list_representer)
|
|
|
|
|
|
# Custom string type and representer for double-quoted strings
|
|
class QuotedString(str):
|
|
pass
|
|
|
|
def quoted_string_representer(dumper, data):
|
|
return dumper.represent_scalar(u'tag:yaml.org,2002:str', data, style='"')
|
|
|
|
yaml.add_representer(QuotedString, quoted_string_representer)
|
|
|
|
|
|
def load_cluster_config(config_path):
|
|
"""Load cluster configuration from YAML file"""
|
|
with open(config_path, 'r') as f:
|
|
return yaml.safe_load(f)
|
|
|
|
def generate_policy(cluster_config, output_path):
|
|
"""Generate the profile setup policy file using common pools."""
|
|
|
|
shadowsocks_proxies = cluster_config.get('shadowsocks_proxies', {})
|
|
all_workers = cluster_config.get('workers', {})
|
|
|
|
common_pools = []
|
|
|
|
# Aggregate profile pools from all workers
|
|
for worker_name, worker_config in all_workers.items():
|
|
for pool in worker_config.get('profile_pools', []):
|
|
proxy_service = pool['proxy_service']
|
|
if proxy_service not in shadowsocks_proxies:
|
|
print(f"Warning: Proxy service '{proxy_service}' for profile pool '{pool['prefixes']}' on worker '{worker_name}' not found in global shadowsocks_proxies. Skipping.", file=sys.stderr)
|
|
continue
|
|
|
|
proxy_port = shadowsocks_proxies[proxy_service]['local_port']
|
|
proxy_string = f"{proxy_service}:{proxy_port}"
|
|
|
|
pool_entry = OrderedDict([
|
|
('prefixes', sorted(pool['prefixes'])),
|
|
('proxy', proxy_string),
|
|
('count', pool['count'])
|
|
])
|
|
common_pools.append(pool_entry)
|
|
|
|
# Sort the pools by the first prefix in each pool for consistent file output
|
|
sorted_common_pools = sorted(common_pools, key=lambda x: x['prefixes'][0])
|
|
|
|
# Write the policy file manually to ensure exact formatting and comments
|
|
with open(output_path, 'w') as f:
|
|
f.write("# Configuration for setting up profiles for a simulation or test run.\n")
|
|
f.write("# This file is used by the `bin/ytops-client setup-profiles` command.\n")
|
|
f.write("# It uses a common pool definition to avoid repetition.\n\n")
|
|
f.write("# !!! THIS FILE IS AUTO-GENERATED by tools/generate-profile-setup-policy.py !!!\n")
|
|
f.write("# !!! DO NOT EDIT. Your changes will be overwritten. !!!\n")
|
|
f.write("# !!! Edit cluster.green.yml and re-run the generator instead. !!!\n\n")
|
|
|
|
f.write("simulation_parameters:\n")
|
|
f.write(" # --- Common Redis settings for all tools ---\n")
|
|
f.write(" # The environment name ('env') is now specified in each setup block below.\n")
|
|
f.write(' env_file: ".env" # Optional: path to a .env file.\n')
|
|
|
|
f.write("\n# --- Common Pool Definitions ---\n")
|
|
f.write("# Define the profile pools once. They will be created in both\n")
|
|
f.write("# the auth and download simulation environments.\n")
|
|
f.write("# The `setup-profiles` tool must be updated to support this format.\n")
|
|
f.write("common_pools:\n")
|
|
for pool in sorted_common_pools:
|
|
prefixes_str = ", ".join([f'"{p}"' for p in pool['prefixes']])
|
|
f.write(f' - prefixes: [{prefixes_str}]\n')
|
|
f.write(f' proxy: "{pool["proxy"]}"\n')
|
|
f.write(f' count: {pool["count"]}\n')
|
|
|
|
f.write("\n# --- Profile setup for the AUTHENTICATION simulation ---\n")
|
|
f.write("auth_profile_setup:\n")
|
|
f.write(' env: "sim_auth"\n')
|
|
f.write(" cleanup_before_run: true\n")
|
|
f.write(" # The setup tool will use the 'common_pools' defined above.\n")
|
|
f.write(" use_common_pools: true\n")
|
|
|
|
f.write("\n# --- Profile setup for the DOWNLOAD simulation ---\n")
|
|
f.write("download_profile_setup:\n")
|
|
f.write(' env: "sim_download"\n')
|
|
f.write(" cleanup_before_run: true\n")
|
|
f.write(" # The setup tool will also use the 'common_pools' defined above.\n")
|
|
f.write(" use_common_pools: true\n")
|
|
|
|
print(f"Successfully generated profile setup policy at: {output_path}")
|
|
|
|
|
|
def generate_enforcer_policy(cluster_config, output_path):
|
|
"""Generate the enforcer policy file."""
|
|
all_workers = cluster_config.get('workers', {})
|
|
|
|
enforcement_pools = []
|
|
for worker_name, worker_config in sorted(all_workers.items()):
|
|
all_prefixes = []
|
|
for pool in worker_config.get('profile_pools', []):
|
|
all_prefixes.extend(pool.get('prefixes', []))
|
|
|
|
if not all_prefixes:
|
|
continue
|
|
|
|
pool_entry = OrderedDict([
|
|
('name', f"server_{worker_name}"),
|
|
('profile_group_patterns', sorted(list(set(all_prefixes)))),
|
|
('max_active_profiles', 1)
|
|
])
|
|
enforcement_pools.append(pool_entry)
|
|
|
|
with open(output_path, 'w') as f:
|
|
f.write("# Policy for the unified simulation enforcer.\n")
|
|
f.write("# This file is used by `bin/ytops-client policy-enforcer --live` to manage\n")
|
|
f.write("# both the authentication and download simulation environments from a single process.\n\n")
|
|
f.write("# !!! THIS FILE IS AUTO-GENERATED by tools/generate-profile-setup-policy.py !!!\n")
|
|
f.write("# !!! DO NOT EDIT. Your changes will be overwritten. !!!\n")
|
|
f.write("# !!! Edit cluster.green.yml and re-run the generator instead. !!!\n\n")
|
|
|
|
f.write("simulation_parameters:\n")
|
|
f.write(" # --- Common Redis settings for all tools ---\n")
|
|
f.write(" # The enforcer will connect to two different Redis environments (key prefixes)\n")
|
|
f.write(" # based on these settings, applying the corresponding policies to each.\n")
|
|
f.write(' env_file: ".env"\n')
|
|
f.write(' auth_env: "sim_auth"\n')
|
|
f.write(' download_env: "sim_download"\n')
|
|
f.write(" \n")
|
|
f.write(" # How often the enforcer should wake up and apply all policies.\n")
|
|
f.write(" interval_seconds: 2\n\n")
|
|
|
|
f.write("# --- Common & Pool-specific Settings ---\n")
|
|
f.write("# Common settings are applied to all profile groups discovered via the pools below.\n")
|
|
f.write("# A pool can optionally override these settings by defining its own 'group_settings' block.\n")
|
|
f.write("common_group_settings:\n")
|
|
f.write(" auth:\n")
|
|
f.write(" max_active_profiles: 1\n")
|
|
f.write(" rotate_after_requests: 5\n")
|
|
f.write(" rest_duration_minutes_on_rotation: 0.20\n")
|
|
f.write(" wait_download_finish_per_group: true\n")
|
|
f.write(" max_wait_for_downloads_minutes: 240\n")
|
|
f.write(" download:\n")
|
|
f.write(" max_active_profiles: 1\n")
|
|
f.write(" rotate_after_requests: 0\n")
|
|
f.write(" rest_duration_minutes_on_rotation: 0.2\n\n")
|
|
|
|
f.write("# Defines pools of profile groups with their own concurrency limits.\n")
|
|
f.write("enforcement_pools:\n")
|
|
|
|
for pool in enforcement_pools:
|
|
f.write(f' - name: "{pool["name"]}"\n')
|
|
patterns_str = ", ".join([f'"{p}"' for p in pool['profile_group_patterns']])
|
|
f.write(f' profile_group_patterns: [{patterns_str}]\n')
|
|
f.write(f' max_active_profiles: {pool["max_active_profiles"]}\n')
|
|
|
|
rest_of_file = """
|
|
# --- Policies for the Authentication Simulation ---
|
|
auth_policy_enforcer_config:
|
|
|
|
# Ban if 2 failures occur within a 1-minute window.
|
|
#ban_on_failures: 2
|
|
#ban_on_failures_window_minutes: 1
|
|
|
|
# The standard rest policy is disabled, as rotation is handled by the profile group.
|
|
|
|
# New rate limit policy to enforce requests-per-hour limits.
|
|
# For guest sessions, the limit is ~300 videos/hour.
|
|
rate_limit_requests: 0
|
|
rate_limit_window_minutes: 60
|
|
rate_limit_rest_duration_minutes: 5
|
|
|
|
rest_after_requests: 0
|
|
rest_duration_minutes: 10
|
|
|
|
# NOTE on Rate Limits: With the default yt-dlp settings, the rate limit for guest
|
|
# sessions is ~300 videos/hour (~1000 webpage/player requests per hour).
|
|
# For accounts, it is ~2000 videos/hour (~4000 webpage/player requests per hour).
|
|
# The settings below should be configured to respect these limits.
|
|
|
|
# New setting for load balancing across profile groups.
|
|
# "longest_idle": Activates the profile that has been idle the longest across all groups (based on last_used time).
|
|
# This is a global FIFO strategy that effectively cycles through profiles regardless of their group.
|
|
# "least_loaded": Prioritizes activating a profile from the group with the fewest pending downloads.
|
|
# If multiple groups have zero pending downloads, it acts as a FIFO queue, activating
|
|
# the one that finished its last download batch the earliest. This is useful when you want
|
|
# to ensure a group finishes its entire workload before another group starts.
|
|
profile_selection_strategy: "longest_idle"
|
|
|
|
# The 'global_max_active_profiles' setting is now superseded by the per-pool limits
|
|
# defined in the 'enforcement_pools' section.
|
|
|
|
# The 'profile_groups' section is now inherited from 'profile_group_definitions' above.
|
|
# The enforcer logic should be updated to read from there.
|
|
|
|
proxy_work_minutes: 0
|
|
proxy_rest_duration_minutes: 0
|
|
|
|
# Global maximum time a proxy can be active before being rested, regardless of
|
|
# other rules. Acts as a safety net. Set to 0 to disable.
|
|
max_global_proxy_active_minutes: 0
|
|
rest_duration_on_max_active: 10
|
|
|
|
# Proxy-level ban on failure burst is disabled.
|
|
proxy_ban_on_failures: 0
|
|
proxy_ban_window_minutes: 2
|
|
|
|
# Clean up locks held for more than 16 minutes (960s) to prevent stuck workers.
|
|
# This should be longer than the docker container timeout (15m).
|
|
unlock_stale_locks_after_seconds: 960
|
|
|
|
# A short post-task cooldown for auth simulation profiles. When a batch is finished,
|
|
# the profile is put into COOLDOWN briefly. This prevents a worker from immediately
|
|
# re-locking the same profile, giving the policy enforcer a window to perform rotation.
|
|
unlock_cooldown_seconds: 0
|
|
|
|
# --- Cross-simulation synchronization ---
|
|
# This section is simplified because the link between auth and download profiles
|
|
# is now defined in the `profile_group_definitions`.
|
|
cross_simulation_sync:
|
|
# Which states to synchronize from auth to download.
|
|
sync_states:
|
|
- "BANNED"
|
|
# If true, a BANNED state on an auth profile will force the download profile to also be BANNED.
|
|
enforce_auth_lead: true
|
|
# CRITICAL: Ensures the correct download profile GROUP is active.
|
|
sync_active_profile: true
|
|
# When an auth profile is in the 'waiting_downloads' state, ensure the matching download profile is active.
|
|
sync_waiting_downloads: true
|
|
|
|
# --- Policies for the Download Simulation ---
|
|
download_policy_enforcer_config:
|
|
|
|
# Ban if 1 failure occurs within a 1-minute window.
|
|
ban_on_failures: 1
|
|
ban_on_failures_window_minutes: 1
|
|
|
|
# Standard rest policy is disabled in favor of group rotation.
|
|
|
|
# New rate limit policy to enforce requests-per-hour limits.
|
|
# For guest sessions, the limit is ~300 videos/hour. We set it slightly lower to be safe.
|
|
rate_limit_requests: 280
|
|
rate_limit_window_minutes: 60
|
|
rate_limit_rest_duration_minutes: 5
|
|
rest_after_requests: 0
|
|
rest_duration_minutes: 20
|
|
|
|
# NOTE on Rate Limits: With the default yt-dlp settings, the rate limit for guest
|
|
# sessions is ~300 videos/hour (~1000 webpage/player requests per hour).
|
|
# For accounts, it is ~2000 videos/hour (~4000 webpage/player requests per hour).
|
|
# The settings below should be configured to respect these limits.
|
|
|
|
# The 'profile_groups' section is now inherited from 'profile_group_definitions' above.
|
|
# The enforcer logic should be updated to read from there.
|
|
|
|
# Time-based proxy rules are disabled.
|
|
proxy_work_minutes: 0
|
|
proxy_rest_duration_minutes: 10
|
|
|
|
# Global maximum time a proxy can be active before being rested, regardless of
|
|
# other rules. Acts as a safety net. Set to 0 to disable.
|
|
max_global_proxy_active_minutes: 0
|
|
rest_duration_on_max_active: 10
|
|
|
|
# Proxy-level ban on failure burst is disabled.
|
|
proxy_ban_on_failures: 3
|
|
proxy_ban_window_minutes: 1
|
|
|
|
# Clean up download locks held for more than 16 minutes (960s) to allow for long downloads.
|
|
# This should be longer than the docker container timeout (15m).
|
|
unlock_stale_locks_after_seconds: 960
|
|
|
|
# After a profile is used for a download, unlock it but put it in COOLDOWN
|
|
# state for 2-3s. This is enforced by the worker, which reads this config from Redis.
|
|
unlock_cooldown_seconds: [2, 3]
|
|
"""
|
|
with open(output_path, 'a') as f:
|
|
f.write(rest_of_file)
|
|
|
|
print(f"Successfully generated enforcer policy at: {output_path}")
|
|
|
|
|
|
def main():
|
|
if len(sys.argv) < 3 or len(sys.argv) > 4:
|
|
print("Usage: ./tools/generate-profile-setup-policy.py <cluster-config-file> <output-profile-policy-file> [<output-enforcer-policy-file>]")
|
|
sys.exit(1)
|
|
|
|
config_path = sys.argv[1]
|
|
profile_output_path = sys.argv[2]
|
|
|
|
if not os.path.exists(config_path):
|
|
print(f"Error: Cluster configuration file not found at '{config_path}'", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
cluster_config = load_cluster_config(config_path)
|
|
generate_policy(cluster_config, profile_output_path)
|
|
|
|
if len(sys.argv) == 4:
|
|
enforcer_output_path = sys.argv[3]
|
|
generate_enforcer_policy(cluster_config, enforcer_output_path)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|