#!/usr/bin/env python3 """ CLI tool to enforce policies on profiles. """ import argparse import collections import json import logging import os import signal import sys import time import fnmatch from copy import deepcopy try: import yaml except ImportError: print("PyYAML is not installed. Please install it with: pip install PyYAML", file=sys.stderr) yaml = None try: from dotenv import load_dotenv except ImportError: load_dotenv = None from .profile_manager_tool import ProfileManager, natural_sort_key from .profile_statemachine import ProfileState # Configure logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) # Graceful shutdown handler shutdown_event = False def handle_shutdown(sig, frame): global shutdown_event logger.info("Shutdown signal received. Stopping policy enforcer...") shutdown_event = True class PolicyEnforcer: def __init__(self, manager, dry_run=False): self.manager = manager self.dry_run = dry_run self.actions_taken_this_cycle = 0 self._last_wait_log_message = "" PROXY_REST_REASON = "Proxy resting" def apply_policies(self, args): self.actions_taken_this_cycle = 0 logger.debug(f"Applying policies... (Dry run: {self.dry_run})") # --- Phase 1: Policies that don't depend on a consistent profile state snapshot --- # Manage proxy states and clean up stale locks before we fetch profile states. self.enforce_proxy_group_rotation(getattr(args, 'proxy_groups', [])) self.enforce_proxy_work_rest_cycle(args) self.enforce_max_proxy_active_time(args) self.enforce_proxy_policies(args) if args.unlock_stale_locks_after_seconds and args.unlock_stale_locks_after_seconds > 0: self.enforce_stale_lock_cleanup(args.unlock_stale_locks_after_seconds) # --- Phase 2: Policies that require a consistent, shared view of profile states --- # Fetch all profile states ONCE to create a consistent snapshot for this cycle. all_profiles_list = self.manager.list_profiles() all_profiles_map = {p['name']: p for p in all_profiles_list} # Apply profile group policies (rotation, max_active). This will modify the local `all_profiles_map`. self.enforce_profile_group_policies(getattr(args, 'profile_groups', []), all_profiles_map, args) # Un-rest profiles. This also reads from and modifies the local `all_profiles_map`. self.enforce_unrest_policy(getattr(args, 'profile_groups', []), all_profiles_map, args) # --- Phase 3: Apply policies to individual active profiles --- # Use the now-updated snapshot to determine which profiles are active. active_profiles = [p for p in all_profiles_map.values() if p['state'] == ProfileState.ACTIVE.value] # Filter out profiles that are managed by a profile group, as their state is handled separately. profile_groups = getattr(args, 'profile_groups', []) if profile_groups: grouped_profiles = set() for group in profile_groups: if 'profiles' in group: for p_name in group['profiles']: grouped_profiles.add(p_name) elif 'prefix' in group: prefix = group['prefix'] for p in all_profiles_list: if p['name'].startswith(prefix): grouped_profiles.add(p['name']) original_count = len(active_profiles) active_profiles = [p for p in active_profiles if p['name'] not in grouped_profiles] if len(active_profiles) != original_count: logger.debug(f"Filtered out {original_count - len(active_profiles)} profile(s) managed by profile groups.") for profile in active_profiles: # Check for failure burst first, as it's more severe. # If it's banned, no need to check other rules for it. if self.enforce_failure_burst_policy(profile, args.ban_on_failures, args.ban_on_failures_window_minutes): continue if self.enforce_rate_limit_policy(profile, getattr(args, 'rate_limit_requests', 0), getattr(args, 'rate_limit_window_minutes', 0), getattr(args, 'rate_limit_rest_duration_minutes', 0)): continue self.enforce_failure_rate_policy(profile, args.max_failure_rate, args.min_requests_for_rate) self.enforce_rest_policy(profile, args.rest_after_requests, args.rest_duration_minutes) return self.actions_taken_this_cycle > 0 def enforce_failure_burst_policy(self, profile, max_failures, window_minutes): if not max_failures or not window_minutes or max_failures <= 0 or window_minutes <= 0: return False window_seconds = window_minutes * 60 # Count only fatal error types (auth, download) for the ban policy. # Tolerated errors are excluded from this check. error_count = ( self.manager.get_activity_rate(profile['name'], 'failure', window_seconds) + self.manager.get_activity_rate(profile['name'], 'download_error', window_seconds) ) if error_count >= max_failures: reason = f"Error burst detected: {error_count} errors in the last {window_minutes} minute(s) (threshold: {max_failures})" logger.warning(f"Banning profile '{profile['name']}' due to error burst: {reason}") if not self.dry_run: sm = self.manager.get_state_machine(profile['name']) if sm: sm.ban(reason=reason) self.actions_taken_this_cycle += 1 return True # Indicates profile was banned return False def enforce_rate_limit_policy(self, profile, max_requests, window_minutes, rest_duration_minutes): if not max_requests or not window_minutes or max_requests <= 0 or window_minutes <= 0: return False window_seconds = window_minutes * 60 # Count all successful activities (auth, download) for rate limiting. # We don't count failures, as they often don't hit the target server in the same way. activity_count = ( self.manager.get_activity_rate(profile['name'], 'success', window_seconds) + self.manager.get_activity_rate(profile['name'], 'download', window_seconds) ) if activity_count >= max_requests: reason = f"Rate limit hit: {activity_count} requests in last {window_minutes} minute(s) (limit: {max_requests})" logger.info(f"Resting profile '{profile['name']}' for {rest_duration_minutes}m: {reason}") if not self.dry_run: sm = self.manager.get_state_machine(profile['name']) if sm: sm.rest(reason=reason, duration_minutes=rest_duration_minutes) self.actions_taken_this_cycle += 1 return True # Indicates profile was rested return False def enforce_unrest_policy(self, profile_groups, all_profiles_map, args): all_profiles_list = list(all_profiles_map.values()) resting_profiles = [p for p in all_profiles_list if p['state'] == ProfileState.RESTING.value] cooldown_profiles = [p for p in all_profiles_list if p['state'] == ProfileState.COOLDOWN.value] profiles_to_check = resting_profiles + cooldown_profiles now = time.time() if not profiles_to_check: return # --- Group-aware unrest logic --- profile_to_group_map = {} group_to_profiles_map = {} if profile_groups: for group in profile_groups: group_name = group.get('name') if not group_name: continue profiles_in_group = [] if 'profiles' in group: profiles_in_group = sorted(group['profiles'], key=natural_sort_key) elif 'prefix' in group: prefix = group['prefix'] profiles_in_group = sorted([p['name'] for p in all_profiles_list if p['name'].startswith(prefix)], key=natural_sort_key) group_to_profiles_map[group_name] = profiles_in_group for p_name in profiles_in_group: profile_to_group_map[p_name] = group_name # This will store the live count of active profiles for each group, # preventing race conditions within a single enforcer run. live_active_counts = {} if profile_groups: for group_name, profiles_in_group in group_to_profiles_map.items(): count = 0 for p_name in profiles_in_group: profile_state = all_profiles_map.get(p_name, {}).get('state') if profile_state in [ProfileState.ACTIVE.value, ProfileState.LOCKED.value, ProfileState.COOLDOWN.value]: count += 1 live_active_counts[group_name] = count logger.debug(f"Initial live active counts: {live_active_counts}") # --- New Global Max Active Logic --- global_max_active = getattr(args, 'global_max_active_profiles', 0) live_global_active_count = sum(live_active_counts.values()) if global_max_active > 0: logger.debug(f"Enforcing global max active profiles limit of {global_max_active}. Current global active: {live_global_active_count}") # --- End New Global Logic --- # --- End group logic setup --- # --- New logic: Determine if the system should wait for downloads --- is_system_blocked_by_downloads = False waiting_group_names = set() if profile_groups: # Identify all groups that are configured to wait for downloads. waiting_groups_config = [ g for g in profile_groups if g.get('wait_download_finish_per_group') or g.get('wait_download_finish_per_profile') ] if waiting_groups_config: is_any_group_idle = False groups_currently_waiting = [] for group_config in waiting_groups_config: group_name = group_config['name'] profiles_in_group_names = group_to_profiles_map.get(group_name, []) if not profiles_in_group_names: continue # Skip empty groups has_active_or_locked = any( all_profiles_map.get(p_name, {}).get('state') in [ProfileState.ACTIVE.value, ProfileState.LOCKED.value] for p_name in profiles_in_group_names ) total_pending = sum( all_profiles_map.get(p_name, {}).get('pending_downloads', 0) for p_name in profiles_in_group_names ) # An "idle" group is one that is not working and has no pending downloads. # It is ready to start a new work cycle. if not has_active_or_locked and total_pending == 0: is_any_group_idle = True # A "waiting" group is one that is not working but has pending downloads. if not has_active_or_locked and total_pending > 0: groups_currently_waiting.append(group_name) waiting_group_names = set(groups_currently_waiting) # The system is blocked if no configured group is idle, AND at least one group is actually waiting for downloads. # This prevents a deadlock if all groups are just 'Working' but none are in the 'waiting_downloads' state yet. if not is_any_group_idle and groups_currently_waiting: is_system_blocked_by_downloads = True log_message = f"System is waiting for downloads to finish in groups: {groups_currently_waiting}. No new profiles will be activated until a group is free." if log_message != self._last_wait_log_message: if self._last_wait_log_message: print(file=sys.stderr) # Newline if we were printing dots logger.info(log_message) self._last_wait_log_message = log_message else: print(".", end="", file=sys.stderr, flush=True) else: # If we are no longer blocked, reset the message tracker if self._last_wait_log_message: print(file=sys.stderr) # Newline to clean up after dots self._last_wait_log_message = "" if is_system_blocked_by_downloads: # When blocked, we only want to consider profiles that are in the 'waiting_downloads' state, # as they are the only ones that can unblock the system by finishing their downloads. # All other resting profiles must wait. original_count = len(profiles_to_check) profiles_to_check = [ p for p in profiles_to_check if p.get('rest_reason') == 'waiting_downloads' ] if len(profiles_to_check) != original_count: logger.debug("Activation is paused for profiles not waiting for downloads.") # --- End new logic --- # --- New Sorting Logic based on Profile Selection Strategy --- strategy = getattr(args, 'profile_selection_strategy', 'longest_idle') if strategy == 'least_loaded' and profile_groups: logger.debug("Applying 'least_loaded' profile selection strategy.") # Separate profiles that are ready from those that are not ready_profiles = [p for p in profiles_to_check if now >= p.get('rest_until', 0)] not_ready_profiles = [p for p in profiles_to_check if now < p.get('rest_until', 0)] # Group ready profiles by their group name ready_by_group = collections.defaultdict(list) for p in ready_profiles: group_name = profile_to_group_map.get(p['name']) if group_name: ready_by_group[group_name].append(p) # Get group states to access timestamps for tie-breaking all_group_names = list(group_to_profiles_map.keys()) all_group_states = self.manager.get_profile_group_states(all_group_names) # Calculate load for each group (sum of pending downloads of all profiles in the group) group_load = {} for group_name, profiles_in_group_names in group_to_profiles_map.items(): total_pending = sum( all_profiles_map.get(p_name, {}).get('pending_downloads', 0) for p_name in profiles_in_group_names ) group_load[group_name] = total_pending # --- New Tie-Breaking Logic --- # Sort groups by load, then by finish time (FIFO for idle groups), then by name for stability sorted_groups = sorted( group_load.items(), key=lambda item: ( item[1], # Primary sort: pending downloads (load) # Secondary: prefer group that finished its downloads earliest. # Use a large number for groups that have never finished, so they go last in a tie. all_group_states.get(item[0], {}).get('last_finished_downloads_ts', float('inf')), item[0] # Tertiary: alphabetical name for stability ) ) # --- End New Tie-Breaking Logic --- logger.debug(f"Group load order: {[(name, load) for name, load in sorted_groups]}") # Rebuild the list of ready profiles, ordered by group load sorted_ready_profiles = [] for group_name, load in sorted_groups: profiles_in_group = ready_by_group.get(group_name, []) # Within a group, prioritize unused profiles by name, then used profiles by oldest usage. # An "unused" profile is one with zero session requests. unused_profiles = [p for p in profiles_in_group if (p.get('success_count', 0) + p.get('failure_count', 0) + p.get('tolerated_error_count', 0) + p.get('download_count', 0) + p.get('download_error_count', 0)) == 0] used_profiles = [p for p in profiles_in_group if p not in unused_profiles] unused_profiles.sort(key=lambda p: natural_sort_key(p.get('name', ''))) used_profiles.sort(key=lambda p: (p.get('last_used', 0), natural_sort_key(p.get('name', '')))) sorted_ready_profiles.extend(unused_profiles + used_profiles) # Add profiles not in any group to the end profiles_not_in_group = [p for p in ready_profiles if not profile_to_group_map.get(p['name'])] # Sort ready profiles not in a group by oldest last_used time first profiles_not_in_group.sort(key=lambda p: (p.get('last_used', 0), natural_sort_key(p.get('name', '')))) sorted_ready_profiles.extend(profiles_not_in_group) # The final list to check is the sorted ready profiles, followed by the not-ready ones. not_ready_profiles.sort(key=lambda p: (p.get('rest_until', 0), natural_sort_key(p.get('name', '')))) profiles_to_check = sorted_ready_profiles + not_ready_profiles logger.debug(f"Activation candidates for 'least_loaded' strategy (first 10): {[p['name'] for p in profiles_to_check[:10]]}") else: # Default 'longest_idle' sort if strategy not in ['longest_idle']: logger.warning(f"Unknown or unhandled profile_selection_strategy '{strategy}'. Defaulting to 'longest_idle'.") # Separate profiles that are ready to be activated from those still resting. # A profile waiting for downloads is NOT considered ready for activation. ready_profiles = [ p for p in profiles_to_check if now >= p.get('rest_until', 0) and p.get('rest_reason') != 'waiting_downloads' ] # Profiles still in a timed rest, or waiting for downloads, are not ready. not_ready_profiles = [p for p in profiles_to_check if p not in ready_profiles] # Sort all ready profiles by last_used time to find the longest idle. # A truly new profile will have a very old 'last_used' time and be selected first. # A recently used profile will have a new 'last_used' time and be selected last. ready_profiles.sort(key=lambda p: (p.get('last_used', 0), natural_sort_key(p.get('name', '')))) # Sort not-ready profiles by when they will become available (standard FIFO). not_ready_profiles.sort(key=lambda p: (p.get('rest_until', 0), natural_sort_key(p.get('name', '')))) # The final list to check will process all ready profiles first, then wait for the not-ready ones. profiles_to_check = ready_profiles + not_ready_profiles logger.debug(f"Activation candidates for 'longest_idle' strategy (first 10): {[p['name'] for p in profiles_to_check[:10]]}") # --- End New Sorting Logic --- # --- New logic: Identify groups with waiting profiles --- groups_with_waiting_profiles = {} if profile_groups: for group in profile_groups: group_name = group.get('name') if not group_name: continue defer_activation = group.get('defer_activation_if_any_waiting', False) if not defer_activation: continue profiles_in_group = group_to_profiles_map.get(group_name, []) waiting_profile = next( (p for p_name, p in all_profiles_map.items() if p_name in profiles_in_group and p.get('rest_reason') == 'waiting_downloads'), None ) if waiting_profile: groups_with_waiting_profiles[group_name] = waiting_profile['name'] # --- End new logic --- unique_proxies = sorted(list(set(p['proxy'] for p in profiles_to_check if p.get('proxy')))) proxy_states = self.manager.get_proxy_states(unique_proxies) for profile in profiles_to_check: profile_name = profile['name'] group_name = profile_to_group_map.get(profile_name) # --- New: Early activation for profiles waiting on a now-active proxy --- proxy_url = profile.get('proxy') if proxy_url and profile.get('rest_reason') in ("Waiting for proxy", self.PROXY_REST_REASON): proxy_state_data = proxy_states.get(proxy_url, {}) if proxy_state_data.get('state') == ProfileState.ACTIVE.value: logger.debug(f"Profile '{profile_name}' was waiting for proxy '{proxy_url}', which is now active. Bypassing its rest period.") profile['rest_until'] = 0 # --- End new logic --- # --- New check to prevent activating profiles from a waiting group --- if group_name in waiting_group_names: logger.debug(f"Profile '{profile_name}' activation deferred because its group '{group_name}' is waiting for downloads to complete.") continue # --- End new logic --- # --- New logic: Defer activation if group has a waiting profile --- if group_name in groups_with_waiting_profiles: waiting_profile_name = groups_with_waiting_profiles[group_name] if profile_name != waiting_profile_name: logger.debug(f"Profile '{profile_name}' activation deferred because profile '{waiting_profile_name}' in group '{group_name}' is waiting for downloads.") continue # --- End new logic --- # --- New logic for waiting_downloads --- if profile.get('rest_reason') == 'waiting_downloads': profile_name = profile['name'] group_name = profile_to_group_map.get(profile_name) group_policy = next((g for g in profile_groups if g.get('name') == group_name), {}) max_wait_minutes = group_policy.get('max_wait_for_downloads_minutes', 240) wait_started_at = profile.get('wait_started_at', 0) downloads_pending = self.manager.get_pending_downloads(profile_name) is_timed_out = (time.time() - wait_started_at) > (max_wait_minutes * 60) if wait_started_at > 0 else False if downloads_pending <= 0 or is_timed_out: if is_timed_out: logger.warning(f"Profile '{profile_name}' download wait timed out after {max_wait_minutes}m. Forcing rotation.") else: logger.info(f"All pending downloads for profile '{profile_name}' are complete. Proceeding with rotation.") self.actions_taken_this_cycle += 1 # Transition to a normal post-rotation rest period. new_reason = "Rotation complete (downloads finished)" rest_duration_minutes = group_policy.get('rest_duration_minutes_on_rotation', 0) if not self.dry_run: # The profile is already in RESTING state (for 'waiting_downloads'). # We cannot call sm.rest() again as it would be a transition from RESTING to RESTING. # Instead, we manually update the profile's fields in Redis to reflect the # completion of the wait period and the start of the normal rotation rest. # 1. Reset session counters since this is a rotation. self.manager.reset_profile_counters(profile_name) # 2. Update rest reason and duration. rest_until_ts = time.time() + (rest_duration_minutes * 60) if rest_duration_minutes else 0 self.manager.update_profile_field(profile_name, 'rest_reason', new_reason) self.manager.update_profile_field(profile_name, 'rest_until', str(rest_until_ts)) self.manager.update_profile_field(profile_name, 'wait_started_at', '0') # 3. Clear the pending downloads counter. self.manager.clear_pending_downloads(profile_name) # Record when this group finished its downloads for FIFO tie-breaking if group_name: self.manager.set_profile_group_state(group_name, {'last_finished_downloads_ts': time.time()}) # Update local map so it can be activated in the same cycle if rest_duration is 0 rest_until_ts = time.time() + (rest_duration_minutes * 60) if rest_duration_minutes else 0 all_profiles_map[profile_name]['rest_reason'] = new_reason all_profiles_map[profile_name]['rest_until'] = rest_until_ts all_profiles_map[profile_name]['success_count'] = 0 all_profiles_map[profile_name]['failure_count'] = 0 all_profiles_map[profile_name]['tolerated_error_count'] = 0 all_profiles_map[profile_name]['download_count'] = 0 all_profiles_map[profile_name]['download_error_count'] = 0 all_profiles_map[profile_name]['wait_started_at'] = 0 # Let the rest of the unrest logic handle the activation now that rest_until is set. profile['rest_until'] = rest_until_ts # Update profile in loop else: logger.debug(f"Profile '{profile_name}' is still waiting for {downloads_pending} download(s) to complete.") continue # Skip to next profile, do not attempt to activate. # --- End new logic --- rest_until = profile.get('rest_until', 0) if now >= rest_until: profile_name = profile['name'] group_name = profile_to_group_map.get(profile_name) # --- New Global Max Active Check --- # This check prevents NEW profiles (in RESTING state) from becoming active if the global limit is reached. # It allows COOLDOWN profiles to become active, as they are already part of the active count. if global_max_active > 0 and live_global_active_count >= global_max_active and profile['state'] == ProfileState.RESTING.value: logger.debug(f"Profile '{profile_name}' rest ended, but global max active limit ({global_max_active}) has been reached. Deferring activation.") continue # --- End New Global Check --- # --- Group-aware unrest check --- if group_name: group_policy = next((g for g in profile_groups if g.get('name') == group_name), None) if not group_policy: continue # Should not happen if maps are built correctly max_active_config = group_policy.get('max_active_profiles') if max_active_config in (0, -1) or max_active_config is None: max_active = len(group_to_profiles_map.get(group_name, [])) if max_active == 0: max_active = 1 # Fallback for empty groups elif isinstance(max_active_config, int) and max_active_config > 0: max_active = max_active_config else: max_active = 1 # Default to 1 on invalid config or for safety # Check if the group is already at its capacity for active profiles. # We use the live counter which is updated during this enforcer cycle. # Special handling for COOLDOWN profiles: they should be allowed to become ACTIVE # even if the group is at capacity, because they are already counted as "active". # We check if the group would be over capacity *without* this profile. is_cooldown_profile = profile['state'] == ProfileState.COOLDOWN.value effective_active_count = live_active_counts.get(group_name, 0) # If we are considering a COOLDOWN profile, it's already in the count. # The check should be if activating it would exceed the limit, assuming # no *other* profile is active. capacity_check_count = effective_active_count if is_cooldown_profile: capacity_check_count -= 1 logger.debug( f"Checking capacity for '{profile_name}' in group '{group_name}': " f"is_cooldown={is_cooldown_profile}, " f"live_count={effective_active_count}, " f"check_count={capacity_check_count}, " f"max_active={max_active}" ) if capacity_check_count >= max_active: logger.debug(f"Profile '{profile_name}' rest ended, but group '{group_name}' is at capacity ({effective_active_count}/{max_active}). Deferring activation.") # If a profile's COOLDOWN ends but it can't be activated (because another # profile is active), move it to RESTING so it's clear it's waiting for capacity. if is_cooldown_profile: reason = "Waiting for group capacity" logger.info(f"Profile '{profile_name}' cooldown ended but group is full. Moving to RESTING to wait for a slot.") self.actions_taken_this_cycle += 1 if not self.dry_run: sm = self.manager.get_state_machine(profile['name']) if sm: # duration_minutes=0 will set rest_until to 0, making it immediately eligible. sm.rest(reason=reason, duration_minutes=0) # Update local map all_profiles_map[profile_name]['state'] = ProfileState.RESTING.value all_profiles_map[profile_name]['rest_until'] = 0 all_profiles_map[profile_name]['rest_reason'] = reason continue # Do not activate, group is full. else: # Defensive check for orphaned profiles that should be in a group. # This can happen if list_profiles() returns an incomplete list for one cycle, # causing the group maps to be incomplete. This check prevents a "stampede" # of activations that would violate group limits. is_orphan = False for group in profile_groups: prefix = group.get('prefix') if prefix and profile_name.startswith(prefix): is_orphan = True logger.warning( f"Profile '{profile_name}' appears to belong to group '{group.get('name')}' " f"but was not found in the initial scan. Deferring activation to prevent violating group limits." ) break if is_orphan: continue # Skip activation for this profile # --- End group check --- # Before activating, ensure the profile's proxy is not resting. proxy_url = profile.get('proxy') if proxy_url: proxy_state_data = proxy_states.get(proxy_url, {}) if proxy_state_data.get('state') == ProfileState.RESTING.value: logger.debug(f"Profile '{profile['name']}' rest period ended, but its proxy '{proxy_url}' is still resting. Deferring activation.") # Update reason for clarity in the UI when a profile is blocked by its proxy. new_reason = "Waiting for proxy" if profile.get('rest_reason') != new_reason: logger.info(f"Updating profile '{profile['name']}' reason to '{new_reason}'.") self.actions_taken_this_cycle += 1 if not self.dry_run: self.manager.update_profile_field(profile['name'], 'rest_reason', new_reason) # Update local map for consistency within this enforcer cycle. all_profiles_map[profile_name]['rest_reason'] = new_reason continue # Do not activate this profile yet. # Update group counter BEFORE making any changes, so subsequent checks in this cycle use the updated count if group_name and profile['state'] == ProfileState.RESTING.value: # For RESTING profiles, they're becoming active, so increment the count live_active_counts[group_name] = live_active_counts.get(group_name, 0) + 1 # Also increment the global counter if global_max_active > 0: live_global_active_count += 1 # COOLDOWN profiles are already counted, no change needed logger.info(f"Activating profile '{profile['name']}' (rest period completed).") self.actions_taken_this_cycle += 1 # Determine if this was a true rest or just a cooldown that was waiting for a slot. is_waiting_after_cooldown = profile.get('rest_reason') == "Waiting for group capacity" if not self.dry_run: sm = self.manager.get_state_machine(profile['name']) if sm: # Pass the profile object so the action can inspect its old state # without an extra Redis call. sm.activate(profile=profile) # Log the activation event for observability reason = f"{profile['state']} completed" if is_waiting_after_cooldown: reason = "Activated after waiting" log_entry = { 'ts': now, 'profile': profile['name'], 'group': group_name, 'reason': reason } self.manager.log_activation_event(log_entry) # Update the shared map to reflect the change immediately for this cycle. all_profiles_map[profile_name]['state'] = ProfileState.ACTIVE.value if profile['state'] == ProfileState.RESTING.value and not is_waiting_after_cooldown: all_profiles_map[profile_name]['success_count'] = 0 all_profiles_map[profile_name]['failure_count'] = 0 all_profiles_map[profile_name]['tolerated_error_count'] = 0 all_profiles_map[profile_name]['download_count'] = 0 all_profiles_map[profile_name]['download_error_count'] = 0 def enforce_failure_rate_policy(self, profile, max_failure_rate, min_requests): if max_failure_rate <= 0: return success = profile.get('global_success_count', 0) failure = profile.get('global_failure_count', 0) total = success + failure if total < min_requests: return current_failure_rate = failure / total if total > 0 else 0 if current_failure_rate >= max_failure_rate: reason = f"Global failure rate {current_failure_rate:.2f} >= threshold {max_failure_rate} ({int(failure)}/{int(total)} failures)" logger.warning(f"Banning profile '{profile['name']}' due to high failure rate: {reason}") if not self.dry_run: sm = self.manager.get_state_machine(profile['name']) if sm: sm.ban(reason=reason) self.actions_taken_this_cycle += 1 def enforce_rest_policy(self, profile, rest_after_requests, rest_duration_minutes): if not rest_after_requests or rest_after_requests <= 0 or not rest_duration_minutes or rest_duration_minutes <= 0: return total_requests = ( int(profile.get('success_count', 0)) + int(profile.get('failure_count', 0)) + int(profile.get('tolerated_error_count', 0)) + int(profile.get('download_count', 0)) + int(profile.get('download_error_count', 0)) ) if total_requests >= rest_after_requests: reason = f"Request count {total_requests} >= threshold {rest_after_requests}" logger.info(f"Resting profile '{profile['name']}' for {rest_duration_minutes}m: {reason}") self.actions_taken_this_cycle += 1 if not self.dry_run: sm = self.manager.get_state_machine(profile['name']) if sm: sm.rest(reason=reason, duration_minutes=rest_duration_minutes) def enforce_stale_lock_cleanup(self, max_lock_seconds): """Finds and unlocks profiles with stale locks.""" if self.dry_run: logger.info(f"[Dry Run] Would check for and clean up locks older than {max_lock_seconds} seconds.") return cleaned_count = self.manager.cleanup_stale_locks(max_lock_seconds) if cleaned_count > 0: self.actions_taken_this_cycle += cleaned_count def enforce_profile_group_policies(self, profile_groups, all_profiles_map, args): """ Manages profiles within defined groups. This includes: 1. Rotating out profiles that have met their request limit. 2. Healing the group by ensuring no more than `max_active_profiles` are active. 3. Initializing the group by activating a profile if none are active. 4. Healing across all groups to enforce `global_max_active_profiles`. This method operates on and modifies the `all_profiles_map` passed to it. """ if not profile_groups: return all_profiles_list = list(all_profiles_map.values()) for group in profile_groups: group_name = group.get('name') if not group_name: logger.warning("Found a profile group without a 'name'. Skipping.") continue profiles_in_group = set() if 'profiles' in group: profiles_in_group = set(group['profiles']) elif 'prefix' in group: prefix = group['prefix'] profiles_in_group = {p['name'] for p in all_profiles_list if p['name'].startswith(prefix)} if not profiles_in_group: logger.warning(f"Profile group '{group_name}' has no matching profiles. Skipping.") if not self.dry_run: # Clean up stale group state from Redis if it exists self.manager.redis.delete(self.manager._profile_group_state_key(group_name)) continue # --- Calculate and persist group load for observability --- total_pending_downloads = sum( all_profiles_map.get(p_name, {}).get('pending_downloads', 0) for p_name in profiles_in_group ) logger.debug(f"Group '{group_name}': Calculated pending_downloads={total_pending_downloads}. Persisting to Redis.") # --- Persist group policy to Redis for observability --- rotate_after_requests = group.get('rotate_after_requests') max_active_profiles = group.get('max_active_profiles') if not self.dry_run: # This is a non-critical update, so we don't need to check for existence. # We just update it on every cycle to ensure it's fresh. self.manager.set_profile_group_state(group_name, { 'rotate_after_requests': rotate_after_requests, 'max_active_profiles': max_active_profiles, 'prefix': group.get('prefix'), # Store prefix for observability 'pending_downloads': total_pending_downloads }) # --- 1. Handle Rotation for Active Profiles --- rotate_after_requests = group.get('rotate_after_requests') if rotate_after_requests and rotate_after_requests > 0: # Consider ACTIVE, LOCKED, and COOLDOWN profiles for rotation eligibility. eligible_for_rotation_check = [ p for p in all_profiles_list if p['name'] in profiles_in_group and p['state'] in [ProfileState.ACTIVE.value, ProfileState.LOCKED.value, ProfileState.COOLDOWN.value] ] for profile in eligible_for_rotation_check: total_requests = ( int(profile.get('success_count', 0)) + int(profile.get('failure_count', 0)) + int(profile.get('tolerated_error_count', 0)) + int(profile.get('download_count', 0)) + int(profile.get('download_error_count', 0)) ) if total_requests >= rotate_after_requests: # If a profile is LOCKED, we can't rotate it yet. # Instead, we update its reason to show that a rotation is pending. if profile['state'] == ProfileState.LOCKED.value: pending_reason = f"Pending Rotation (requests: {total_requests}/{rotate_after_requests})" # Only update if the reason is not already set, to avoid spamming Redis. if profile.get('reason') != pending_reason: logger.info(f"Profile '{profile['name']}' in group '{group_name}' is due for rotation but is LOCKED. Marking as pending.") self.actions_taken_this_cycle += 1 if not self.dry_run: self.manager.update_profile_field(profile['name'], 'reason', pending_reason) # Update local map all_profiles_map[profile['name']]['reason'] = pending_reason else: logger.debug(f"Profile '{profile['name']}' in group '{group_name}' is due for rotation but is currently LOCKED. Already marked as pending.") continue # If the profile is ACTIVE or in COOLDOWN, we can rotate it immediately. reason = f"Rotated after {total_requests} requests (limit: {rotate_after_requests})" logger.info(f"Rotating profile '{profile['name']}' in group '{group_name}': {reason}") self.actions_taken_this_cycle += 1 wait_for_downloads_group = group.get('wait_download_finish_per_group', False) # For backward compatibility wait_for_downloads_profile = group.get('wait_download_finish_per_profile', False) new_reason = reason rest_until_ts = 0 is_waiting_profile = False if wait_for_downloads_group or wait_for_downloads_profile: is_waiting_profile = True new_reason = "waiting_downloads" if wait_for_downloads_group: logger.info(f"Profile '{profile['name']}' rotated. Group '{group_name}' will wait for all downloads before yielding to other groups.") else: # wait_for_downloads_profile must be true logger.info(f"Profile '{profile['name']}' will wait for pending downloads to complete.") else: rest_duration_minutes = group.get('rest_duration_minutes_on_rotation') if rest_duration_minutes and rest_duration_minutes > 0: rest_until_ts = time.time() + rest_duration_minutes * 60 if not self.dry_run: sm = self.manager.get_state_machine(profile['name']) if sm: rest_duration_minutes = group.get('rest_duration_minutes_on_rotation') sm.rest( reason=new_reason, duration_minutes=rest_duration_minutes, is_waiting_profile=is_waiting_profile, is_rotation=True ) # Update our local map so subsequent policies in this cycle see the change immediately. all_profiles_map[profile['name']]['state'] = ProfileState.RESTING.value all_profiles_map[profile['name']]['rest_reason'] = new_reason if is_waiting_profile: all_profiles_map[profile['name']]['wait_started_at'] = time.time() all_profiles_map[profile['name']]['rest_until'] = 0 else: all_profiles_map[profile['name']]['rest_until'] = rest_until_ts all_profiles_map[profile['name']]['success_count'] = 0 all_profiles_map[profile['name']]['failure_count'] = 0 all_profiles_map[profile['name']]['tolerated_error_count'] = 0 all_profiles_map[profile['name']]['download_count'] = 0 all_profiles_map[profile['name']]['download_error_count'] = 0 # --- 2. Self-Healing: Enforce max_active_profiles --- max_active_config = group.get('max_active_profiles') if max_active_config in (0, -1) or max_active_config is None: max_active = len(profiles_in_group) if max_active == 0: max_active = 1 # Fallback for empty groups elif isinstance(max_active_config, int) and max_active_config > 0: max_active = max_active_config else: max_active = 1 # Default to 1 on invalid config or for safety # Get the current list of active/locked profiles from our potentially modified local map # A profile is considered "active" for group limits if it is ACTIVE, LOCKED, or in COOLDOWN. current_active_or_locked_profiles = [ p for name, p in all_profiles_map.items() if name in profiles_in_group and p['state'] in [ProfileState.ACTIVE.value, ProfileState.LOCKED.value, ProfileState.COOLDOWN.value] ] num_active_or_locked = len(current_active_or_locked_profiles) if num_active_or_locked > max_active: logger.warning(f"Healing group '{group_name}': Found {num_active_or_locked} active/locked profiles, but max is {max_active}. Resting excess ACTIVE profiles.") # We can only rest profiles that are in the ACTIVE state, not LOCKED. profiles_that_can_be_rested = [p for p in current_active_or_locked_profiles if p['state'] == ProfileState.ACTIVE.value] # Sort to determine which profiles to rest. We prefer to rest profiles # that have been used more. As a tie-breaker (especially for profiles # with 0 requests), we rest profiles with higher names first to ensure # deterministic behavior (e.g., user1_2 is rested before user1_1). profiles_that_can_be_rested.sort(key=lambda p: natural_sort_key(p.get('name', '')), reverse=True) # Higher name first profiles_that_can_be_rested.sort(key=lambda p: ( p.get('success_count', 0) + p.get('failure_count', 0) + p.get('tolerated_error_count', 0) + p.get('download_count', 0) + p.get('download_error_count', 0) ), reverse=True) # Most requests first num_to_rest = num_active_or_locked - max_active profiles_to_rest = profiles_that_can_be_rested[:num_to_rest] for profile in profiles_to_rest: req_count = ( profile.get('success_count', 0) + profile.get('failure_count', 0) + profile.get('tolerated_error_count', 0) + profile.get('download_count', 0) + profile.get('download_error_count', 0) ) logger.warning(f"Healing group '{group_name}': Resting profile '{profile['name']}' (request count: {req_count}).") self.actions_taken_this_cycle += 1 if not self.dry_run: sm = self.manager.get_state_machine(profile['name']) if sm: # Rest for a minimal duration to prevent immediate re-activation in the same cycle. sm.rest(reason="Group max_active healing", duration_minutes=0.02) # ~1.2 seconds # Update local map to reflect the change for this cycle all_profiles_map[profile['name']]['state'] = ProfileState.RESTING.value all_profiles_map[profile['name']]['rest_reason'] = "Group max_active healing" all_profiles_map[profile['name']]['rest_until'] = time.time() + (0.02 * 60) # --- 3. Initialization: Activate profiles if below capacity --- # This is a fallback for initialization or if all profiles were rested/banned. # The primary activation mechanism is in `enforce_unrest_policy`. elif num_active_or_locked < max_active: # Check if there are any non-active, non-banned, non-locked profiles to activate. eligible_profiles = [p for name, p in all_profiles_map.items() if name in profiles_in_group and p['state'] not in [ProfileState.ACTIVE.value, ProfileState.BANNED.value, ProfileState.LOCKED.value]] if eligible_profiles: # This is a simple initialization case. We don't activate here because # `enforce_unrest_policy` will handle it more intelligently based on rest times. # This block ensures that on the very first run, a group doesn't sit empty. if num_active_or_locked == 0: logger.debug(f"Group '{group_name}' has no active profiles. `enforce_unrest_policy` will attempt to activate one.") # --- 4. Global Self-Healing: Enforce global_max_active_profiles --- # This runs after all per-group healing and ensures the global limit is respected. global_max_active = getattr(args, 'global_max_active_profiles', 0) if global_max_active > 0: # Get all profiles managed by any group all_grouped_profiles = set() for group in profile_groups: profiles_in_group = set() if 'profiles' in group: profiles_in_group = set(group['profiles']) elif 'prefix' in group: prefix = group['prefix'] profiles_in_group = {p['name'] for p in all_profiles_list if p['name'].startswith(prefix)} all_grouped_profiles.update(profiles_in_group) # Get current active count across all groups from our local map current_global_active = [ p for name, p in all_profiles_map.items() if name in all_grouped_profiles and p['state'] in [ProfileState.ACTIVE.value, ProfileState.LOCKED.value, ProfileState.COOLDOWN.value] ] num_global_active = len(current_global_active) if num_global_active > global_max_active: logger.warning(f"Global Healing: Found {num_global_active} active profiles across all groups, but global max is {global_max_active}. Resting excess.") # We can only rest profiles that are in the ACTIVE state, not LOCKED. profiles_that_can_be_rested = [p for p in current_global_active if p['state'] == ProfileState.ACTIVE.value] # Sort to determine which profiles to rest, using the same logic as per-group healing. profiles_that_can_be_rested.sort(key=lambda p: natural_sort_key(p.get('name', '')), reverse=True) # Higher name first profiles_that_can_be_rested.sort(key=lambda p: ( p.get('success_count', 0) + p.get('failure_count', 0) + p.get('tolerated_error_count', 0) + p.get('download_count', 0) + p.get('download_error_count', 0) ), reverse=True) # Most requests first num_to_rest = num_global_active - global_max_active profiles_to_rest = profiles_that_can_be_rested[:num_to_rest] for profile in profiles_to_rest: logger.warning(f"Global Healing: Resting profile '{profile['name']}'.") self.actions_taken_this_cycle += 1 if not self.dry_run: sm = self.manager.get_state_machine(profile['name']) if sm: # Rest for a minimal duration to prevent immediate re-activation in the same cycle. sm.rest(reason="Global max_active healing", duration_minutes=0.02) # ~1.2 seconds # Update local map to reflect the change for this cycle all_profiles_map[profile['name']]['state'] = ProfileState.RESTING.value all_profiles_map[profile['name']]['rest_reason'] = "Global max_active healing" all_profiles_map[profile['name']]['rest_until'] = time.time() + (0.02 * 60) def enforce_proxy_group_rotation(self, proxy_groups): """Manages mutually exclusive work cycles for proxies within defined groups.""" if not proxy_groups: return group_names = [g['name'] for g in proxy_groups if g.get('name')] if not group_names: return group_states = self.manager.get_proxy_group_states(group_names) now = time.time() for group in proxy_groups: group_name = group.get('name') if not group_name: logger.warning("Found a proxy group without a 'name'. Skipping.") continue proxies_in_group = group.get('proxies', []) if not proxies_in_group: logger.warning(f"Proxy group '{group_name}' has no proxies defined. Skipping.") continue work_minutes = group.get('work_minutes_per_proxy') if not work_minutes or work_minutes <= 0: logger.warning(f"Proxy group '{group_name}' is missing 'work_minutes_per_proxy'. Skipping.") continue if not self.dry_run: for proxy_url in proxies_in_group: self.manager.set_proxy_group_membership(proxy_url, group_name, work_minutes) work_duration_seconds = work_minutes * 60 state = group_states.get(group_name, {}) if not state: # First run for this group, initialize it logger.info(f"Initializing new proxy group '{group_name}'. Activating first proxy '{proxies_in_group[0]}'.") self.actions_taken_this_cycle += 1 active_proxy_index = 0 next_rotation_ts = now + work_duration_seconds if not self.dry_run: # Activate the first, rest the others self.manager.set_proxy_state(proxies_in_group[0], ProfileState.ACTIVE.value) for i, proxy_url in enumerate(proxies_in_group): if i != active_proxy_index: # Rest indefinitely; group logic will activate it when its turn comes. self.manager.set_proxy_state(proxy_url, ProfileState.RESTING.value, rest_duration_minutes=99999) self.manager.set_proxy_group_state(group_name, active_proxy_index, next_rotation_ts) elif now >= state.get('next_rotation_timestamp', 0): # Time to rotate current_active_index = state.get('active_proxy_index', 0) next_active_index = (current_active_index + 1) % len(proxies_in_group) old_active_proxy = proxies_in_group[current_active_index] new_active_proxy = proxies_in_group[next_active_index] logger.info(f"Rotating proxy group '{group_name}': Deactivating '{old_active_proxy}', Activating '{new_active_proxy}'.") self.actions_taken_this_cycle += 1 next_rotation_ts = now + work_duration_seconds if not self.dry_run: # Rest the old proxy self.manager.set_proxy_state(old_active_proxy, ProfileState.RESTING.value, rest_duration_minutes=99999) # Activate the new one self.manager.set_proxy_state(new_active_proxy, ProfileState.ACTIVE.value) # Update group state self.manager.set_proxy_group_state(group_name, next_active_index, next_rotation_ts) def enforce_proxy_work_rest_cycle(self, args): """Enforces a work/rest cycle on proxies based on time.""" work_minutes = args.proxy_work_minutes rest_minutes = args.proxy_rest_duration_minutes if not work_minutes or work_minutes <= 0 or not rest_minutes or rest_minutes <= 0: return # Get a flat list of all proxies managed by groups, so we can ignore them. proxy_groups = getattr(args, 'proxy_groups', []) grouped_proxies = set() if proxy_groups: for group in proxy_groups: for proxy_url in group.get('proxies', []): grouped_proxies.add(proxy_url) all_profiles = self.manager.list_profiles() if not all_profiles: return unique_proxies = sorted(list(set(p['proxy'] for p in all_profiles if p.get('proxy')))) # Filter out proxies that are managed by the group rotation logic proxies_to_manage = [p for p in unique_proxies if p not in grouped_proxies] if not proxies_to_manage: logger.debug("All unique proxies are managed by proxy groups. Skipping individual work/rest cycle enforcement.") return proxy_states = self.manager.get_proxy_states(proxies_to_manage) now = time.time() for proxy_url, state_data in proxy_states.items(): state = state_data.get('state', ProfileState.ACTIVE.value) # Un-rest logic if state == ProfileState.RESTING.value: rest_until = state_data.get('rest_until', 0) if now >= rest_until: logger.info(f"Activating proxy '{proxy_url}' (rest period complete).") self.actions_taken_this_cycle += 1 if not self.dry_run: self.manager.set_proxy_state(proxy_url, ProfileState.ACTIVE.value) # Also activate any profiles that were resting due to this proxy profiles_for_proxy = [p for p in all_profiles if p.get('proxy') == proxy_url] for profile in profiles_for_proxy: if profile['state'] == ProfileState.RESTING.value and profile.get('rest_reason') == self.PROXY_REST_REASON: logger.info(f"Activating profile '{profile['name']}' as its proxy '{proxy_url}' is now active.") self.actions_taken_this_cycle += 1 if not self.dry_run: sm = self.manager.get_state_machine(profile['name']) if sm: sm.activate() else: # Proxy is still resting. Ensure any of its profiles that are ACTIVE are moved to RESTING. # This catches profiles that were unlocked while their proxy was resting. rest_until_ts = state_data.get('rest_until', 0) profiles_for_proxy = [p for p in all_profiles if p.get('proxy') == proxy_url] for profile in profiles_for_proxy: if profile['state'] == ProfileState.ACTIVE.value: logger.info(f"Resting profile '{profile['name']}' as its proxy '{proxy_url}' is resting.") self.actions_taken_this_cycle += 1 if not self.dry_run: duration_minutes = (rest_until_ts - time.time()) / 60 sm = self.manager.get_state_machine(profile['name']) if sm: sm.rest(reason=self.PROXY_REST_REASON, duration_minutes=max(0, duration_minutes)) # Rest logic elif state == ProfileState.ACTIVE.value: work_start = state_data.get('work_start_timestamp', 0) if work_start == 0: # Proxy was just created, start its work cycle if not self.dry_run: self.manager.set_proxy_state(proxy_url, ProfileState.ACTIVE.value) continue work_duration_seconds = work_minutes * 60 active_duration = now - work_start logger.debug(f"Proxy '{proxy_url}' has been active for {active_duration:.0f}s (limit: {work_duration_seconds}s).") if active_duration >= work_duration_seconds: logger.info(f"Resting proxy '{proxy_url}' for {rest_minutes}m (work period of {work_minutes}m complete).") self.actions_taken_this_cycle += 1 rest_until_ts = time.time() + rest_minutes * 60 if not self.dry_run: self.manager.set_proxy_state(proxy_url, ProfileState.RESTING.value, rest_minutes) # Also rest any active profiles using this proxy profiles_for_proxy = [p for p in all_profiles if p.get('proxy') == proxy_url] for profile in profiles_for_proxy: if profile['state'] == ProfileState.ACTIVE.value: logger.info(f"Resting profile '{profile['name']}' as its proxy '{proxy_url}' is resting.") self.actions_taken_this_cycle += 1 if not self.dry_run: duration_minutes = (rest_until_ts - time.time()) / 60 sm = self.manager.get_state_machine(profile['name']) if sm: sm.rest(reason=self.PROXY_REST_REASON, duration_minutes=max(0, duration_minutes)) def enforce_max_proxy_active_time(self, args): """ Enforces a global maximum active time for any proxy, regardless of group membership. This acts as a safety net to prevent a proxy from being stuck in an ACTIVE state. """ max_active_minutes = args.max_global_proxy_active_minutes rest_minutes = args.rest_duration_on_max_active if not max_active_minutes or max_active_minutes <= 0: return all_profiles = self.manager.list_profiles() if not all_profiles: return unique_proxies = sorted(list(set(p['proxy'] for p in all_profiles if p.get('proxy')))) if not unique_proxies: return proxy_states = self.manager.get_proxy_states(unique_proxies) now = time.time() for proxy_url, state_data in proxy_states.items(): if state_data.get('state') == ProfileState.ACTIVE.value: work_start = state_data.get('work_start_timestamp', 0) if work_start == 0: continue # Just activated, timestamp not set yet. active_duration_seconds = now - work_start max_active_seconds = max_active_minutes * 60 if active_duration_seconds >= max_active_seconds: reason = f"Exceeded max active time of {max_active_minutes}m" logger.warning(f"Resting proxy '{proxy_url}' for {rest_minutes}m: {reason}") self.actions_taken_this_cycle += 1 rest_until_ts = now + rest_minutes * 60 if not self.dry_run: self.manager.set_proxy_state(proxy_url, ProfileState.RESTING.value, rest_minutes) # Also rest any active profiles using this proxy profiles_for_proxy = [p for p in all_profiles if p.get('proxy') == proxy_url] for profile in profiles_for_proxy: if profile['state'] == ProfileState.ACTIVE.value: logger.info(f"Resting profile '{profile['name']}' as its proxy '{proxy_url}' is resting due to max active time.") self.actions_taken_this_cycle += 1 if not self.dry_run: duration_minutes = (rest_until_ts - time.time()) / 60 sm = self.manager.get_state_machine(profile['name']) if sm: sm.rest(reason=self.PROXY_REST_REASON, duration_minutes=max(0, duration_minutes)) def enforce_proxy_policies(self, args): proxy_ban_enabled = args.proxy_ban_on_failures and args.proxy_ban_on_failures > 0 proxy_rate_limit_enabled = getattr(args, 'proxy_rate_limit_requests', 0) > 0 if not proxy_ban_enabled and not proxy_rate_limit_enabled: return all_profiles = self.manager.list_profiles() if not all_profiles: return unique_proxies = sorted(list(set(p['proxy'] for p in all_profiles if p.get('proxy')))) if not unique_proxies: return logger.debug(f"Checking proxy policies for {len(unique_proxies)} unique proxies...") for proxy_url in unique_proxies: profiles_for_proxy = [p for p in all_profiles if p.get('proxy') == proxy_url] if self.enforce_proxy_failure_burst_policy( proxy_url, profiles_for_proxy, args.proxy_ban_on_failures, args.proxy_ban_window_minutes ): continue # Banned, no need for other checks self.enforce_proxy_rate_limit_policy( proxy_url, profiles_for_proxy, getattr(args, 'proxy_rate_limit_requests', 0), getattr(args, 'proxy_rate_limit_window_minutes', 0), getattr(args, 'proxy_rate_limit_rest_duration_minutes', 0) ) def enforce_proxy_failure_burst_policy(self, proxy_url, profiles_for_proxy, max_failures, window_minutes): if not max_failures or not window_minutes or max_failures <= 0 or window_minutes <= 0: return False window_seconds = window_minutes * 60 failure_count = self.manager.get_proxy_activity_rate(proxy_url, 'failure', window_seconds) if failure_count >= max_failures: reason = f"Proxy failure burst: {failure_count} failures in last {window_minutes}m (threshold: {max_failures})" logger.warning(f"Banning {len(profiles_for_proxy)} profile(s) on proxy '{proxy_url}' due to failure burst: {reason}") self.actions_taken_this_cycle += 1 if not self.dry_run: for profile in profiles_for_proxy: # Don't re-ban already banned profiles if profile['state'] != ProfileState.BANNED.value: sm = self.manager.get_state_machine(profile['name']) if sm: sm.ban(reason=reason) return True # Indicates action was taken return False def enforce_proxy_rate_limit_policy(self, proxy_url, profiles_for_proxy, max_requests, window_minutes, rest_duration_minutes): if not max_requests or not window_minutes or max_requests <= 0 or window_minutes <= 0: return False window_seconds = window_minutes * 60 # Count all successful activities for the proxy activity_count = ( self.manager.get_proxy_activity_rate(proxy_url, 'success', window_seconds) + self.manager.get_proxy_activity_rate(proxy_url, 'download', window_seconds) ) if activity_count >= max_requests: reason = f"Proxy rate limit hit: {activity_count} requests in last {window_minutes}m (limit: {max_requests})" logger.info(f"Resting proxy '{proxy_url}' for {rest_duration_minutes}m: {reason}") self.actions_taken_this_cycle += 1 if not self.dry_run: self.manager.set_proxy_state(proxy_url, ProfileState.RESTING.value, rest_duration_minutes) return True # Indicates action was taken return False def add_policy_enforcer_parser(subparsers): """Adds the parser for the 'policy-enforcer' command.""" parser = subparsers.add_parser( 'policy-enforcer', description='Apply policies to profiles (ban, rest, etc.).', formatter_class=argparse.RawTextHelpFormatter, help='Apply policies to profiles (ban, rest, etc.).' ) parser.add_argument('--policy', '--policy-file', dest='policy_file', help='Path to a YAML policy file to load default settings from.') parser.add_argument('--env-file', help='Path to a .env file to load environment variables from.') parser.add_argument('--redis-host', default=None, help='Redis host. Defaults to REDIS_HOST or MASTER_HOST_IP env var, or localhost.') parser.add_argument('--redis-port', type=int, default=None, help='Redis port. Defaults to REDIS_PORT env var, or 6379.') parser.add_argument('--redis-password', default=None, help='Redis password. Defaults to REDIS_PASSWORD env var.') parser.add_argument('--redis-db', type=int, default=None, help='Redis DB number. Defaults to REDIS_DB env var, or 0.') parser.add_argument('--env', default=None, help="Default environment name for Redis key prefix. Used if --auth-env or --download-env are not specified. Overrides policy file setting.") parser.add_argument('--auth-env', help="Override the environment for the Auth simulation.") parser.add_argument('--download-env', help="Override the environment for the Download simulation.") parser.add_argument('--legacy', action='store_true', help="Use legacy key prefix ('profile_mgmt_') without environment.") parser.add_argument('--key-prefix', default=None, help='Explicit key prefix for Redis. Overrides --env, --legacy and any defaults.') parser.add_argument('--verbose', action='store_true', help='Enable verbose logging') parser.add_argument('--dry-run', action='store_true', help='Show what would be done without making changes.') # Policy arguments policy_group = parser.add_argument_group('Policy Rules') policy_group.add_argument('--max-failure-rate', type=float, default=None, help='Ban a profile if its failure rate exceeds this value (0.0 to 1.0). Default: 0.5') policy_group.add_argument('--min-requests-for-rate', type=int, default=None, help='Minimum total requests before failure rate is calculated. Default: 20') policy_group.add_argument('--ban-on-failures', type=int, default=None, help='Ban a profile if it has this many failures within the time window (0 to disable). Default: 0') policy_group.add_argument('--ban-on-failures-window-minutes', type=int, default=None, help='The time window in minutes for the failure burst check. Default: 5') policy_group.add_argument('--rest-after-requests', type=int, default=None, help='Move a profile to RESTING after this many total requests (0 to disable). Default: 0') policy_group.add_argument('--rest-duration-minutes', type=int, default=None, help='How long a profile should rest. Default: 15') policy_group.add_argument('--rate-limit-requests', type=int, default=None, help='Rest a profile if it exceeds this many requests in the time window (0 to disable).') policy_group.add_argument('--rate-limit-window-minutes', type=int, default=None, help='The time window in minutes for the rate limit check.') policy_group.add_argument('--rate-limit-rest-duration-minutes', type=int, default=None, help='How long a profile should rest after hitting the rate limit.') policy_group.add_argument('--unlock-stale-locks-after-seconds', type=int, default=None, help='Unlock profiles that have been in a LOCKED state for more than this many seconds (0 to disable). Default: 120') proxy_policy_group = parser.add_argument_group('Proxy Policy Rules') proxy_policy_group.add_argument('--proxy-work-minutes', type=int, default=None, help='Work duration for a proxy before it rests (0 to disable). Default: 0') proxy_policy_group.add_argument('--proxy-rest-duration-minutes', type=int, default=None, help='Rest duration for a proxy after its work period. Default: 0') proxy_policy_group.add_argument('--proxy-ban-on-failures', type=int, default=None, help='Ban a proxy (and all its profiles) if it has this many failures within the time window (0 to disable). Default: 0') proxy_policy_group.add_argument('--proxy-ban-window-minutes', type=int, default=None, help='The time window in minutes for the proxy failure burst check. Default: 10') proxy_policy_group.add_argument('--proxy-rate-limit-requests', type=int, default=None, help='Rest a proxy if it exceeds this many requests in the time window (0 to disable).') proxy_policy_group.add_argument('--proxy-rate-limit-window-minutes', type=int, default=None, help='The time window in minutes for the proxy rate limit check.') proxy_policy_group.add_argument('--proxy-rate-limit-rest-duration-minutes', type=int, default=None, help='How long a proxy should rest after hitting the rate limit.') proxy_policy_group.add_argument('--max-global-proxy-active-minutes', type=int, default=None, help='Global maximum time a proxy can be active before being rested (0 to disable). Acts as a safety net. Default: 0') proxy_policy_group.add_argument('--rest-duration-on-max-active', type=int, default=None, help='How long a proxy should rest after hitting the global max active time. Default: 10') # Execution control exec_group = parser.add_argument_group('Execution Control') exec_group.add_argument('--live', action='store_true', help='Run continuously, applying policies periodically.') exec_group.add_argument('--interval-seconds', type=int, default=None, help='When in --live mode, how often to apply policies. Default: 60') exec_group.add_argument('--auth-only', action='store_true', help='Run enforcer for the auth simulation only.') exec_group.add_argument('--download-only', action='store_true', help='Run enforcer for the download simulation only.') return parser def sync_cross_simulation(auth_manager, download_manager, sync_config, dry_run=False): """Synchronize profile states between auth and download simulations.""" if not sync_config: return profile_links = sync_config.get('profile_links', []) sync_states = sync_config.get('sync_states', []) sync_rotation = sync_config.get('sync_rotation', False) enforce_auth_lead = sync_config.get('enforce_auth_lead', False) if not profile_links: return # --- Get all profiles once for efficiency --- all_auth_profiles = {p['name']: p for p in auth_manager.list_profiles()} all_download_profiles = {p['name']: p for p in download_manager.list_profiles()} # --- State and Rotation Sync (handles prefixes correctly) --- for link in profile_links: auth_prefix = link.get('auth') download_prefix = link.get('download') if not auth_prefix or not download_prefix: continue auth_profiles_in_group = [p for name, p in all_auth_profiles.items() if name.startswith(auth_prefix)] for auth_profile in auth_profiles_in_group: # Assume 1-to-1 name mapping (e.g., auth 'user1_0' maps to download 'user1_0') download_profile_name = auth_profile['name'] download_profile = all_download_profiles.get(download_profile_name) if not download_profile: logger.debug(f"Auth profile '{auth_profile['name']}' has no corresponding download profile.") continue auth_state = auth_profile.get('state') download_state = download_profile.get('state') # Sync states from auth to download if enforce_auth_lead and auth_state in sync_states and download_state != auth_state: auth_reason = auth_profile.get('reason', '') # If auth profile is waiting for downloads, we must NOT sync the RESTING state to the download profile, # as that would prevent it from processing the very downloads we are waiting for. if auth_state == ProfileState.RESTING.value and auth_reason == 'waiting_downloads': logger.debug(f"Auth profile '{auth_profile['name']}' is waiting for downloads. Skipping state sync to download profile to prevent deadlock.") else: logger.info(f"Syncing download profile '{download_profile_name}' to state '{auth_state}' (auth lead)") if not dry_run: sm = download_manager.get_state_machine(download_profile_name) if not sm: continue reason_to_sync = auth_reason or 'Synced from auth' if auth_state == ProfileState.BANNED.value: sm.ban(reason=f"Synced from auth: {reason_to_sync}") elif auth_state == ProfileState.RESTING.value: auth_rest_until = auth_profile.get('rest_until', 0) duration_minutes = max(0, (auth_rest_until - time.time()) / 60) sm.rest(reason=f"Synced from auth: {reason_to_sync}", duration_minutes=duration_minutes) # Handle rotation sync if sync_rotation: auth_reason = auth_profile.get('rest_reason', '') # If auth profile is waiting for downloads, we must NOT sync the RESTING state to the download profile, # as that would prevent it from processing the very downloads we are waiting for. if auth_reason == 'waiting_downloads': logger.debug(f"Auth profile '{auth_profile['name']}' is waiting for downloads. Skipping rotation sync to download profile to prevent deadlock.") elif auth_state == ProfileState.RESTING.value and 'rotate' in auth_reason.lower(): if download_state != ProfileState.RESTING.value: logger.info(f"Rotating download profile '{download_profile_name}' due to auth rotation") if not dry_run: sm = download_manager.get_state_machine(download_profile_name) if not sm: continue auth_rest_until = auth_profile.get('rest_until', 0) duration_minutes = max(0, (auth_rest_until - time.time()) / 60) sm.rest(reason=f"Rotated due to auth rotation: {auth_reason}", duration_minutes=duration_minutes, is_rotation=True) # --- Active Profile Sync --- sync_active = sync_config.get('sync_active_profile', False) sync_waiting_downloads = sync_config.get('sync_waiting_downloads', False) if not (sync_active or sync_waiting_downloads): return logger.debug("Syncing active profiles from Auth to Download simulation...") # Get profiles that should be active in the download simulation target_active_download_profiles = set() # 1. Add profiles that are active in auth simulation (if sync_active is enabled) if sync_active: active_auth_profiles = [p for p in all_auth_profiles.values() if p['state'] in [ProfileState.ACTIVE.value, ProfileState.LOCKED.value]] for auth_profile in active_auth_profiles: target_active_download_profiles.add(auth_profile['name']) # 2. Add profiles that are waiting for downloads to complete (if sync_waiting_downloads is enabled) if sync_waiting_downloads: waiting_auth_profiles = [p for p in all_auth_profiles.values() if p['state'] == ProfileState.RESTING.value and p.get('rest_reason') == 'waiting_downloads'] for auth_profile in waiting_auth_profiles: target_active_download_profiles.add(auth_profile['name']) logger.debug(f"Auth profile '{auth_profile['name']}' is waiting for downloads. Ensuring matching download profile is active.") if not target_active_download_profiles: logger.debug("No auth profiles found that need active download profiles.") # If no auth profiles need download profiles active, we should rest ALL download profiles # to prevent them from being used when they shouldn't be. # This is handled by the deactivation logic below (target_download_groups will be empty). # Get download profile group info from Redis dl_group_state_keys = [k for k in download_manager.redis.scan_iter(f"{download_manager.key_prefix}profile_group_state:*")] dl_group_names = [k.split(':')[-1] for k in dl_group_state_keys] dl_group_states = download_manager.get_profile_group_states(dl_group_names) dl_profile_to_group = {} for name, state in dl_group_states.items(): prefix = state.get('prefix') if prefix: for p_name in all_download_profiles: if p_name.startswith(prefix): dl_profile_to_group[p_name] = {'name': name, 'max_active': state.get('max_active_profiles', 1)} # Activate download profiles that should be active but aren't for target_profile_name in target_active_download_profiles: download_profile = all_download_profiles.get(target_profile_name) if not download_profile: logger.warning(f"Auth profile '{target_profile_name}' needs an active download profile, but no corresponding download profile found.") continue if download_profile['state'] not in [ProfileState.ACTIVE.value, ProfileState.LOCKED.value]: is_from_cooldown = download_profile['state'] == ProfileState.COOLDOWN.value log_msg_suffix = " (from COOLDOWN)" if is_from_cooldown else "" logger.info(f"Syncing active state: Activating download profile '{target_profile_name}' to match auth requirements{log_msg_suffix}.") if not dry_run: sm = download_manager.get_state_machine(target_profile_name) if sm: # Pass profile so action can decide whether to reset counters sm.activate(profile=download_profile) # --- Group-Aware Deactivation --- # Identify the target download groups based on target_active_download_profiles. # CRITICAL FIX: Directly map individual profiles to their groups instead of relying on name patterns. target_download_groups = set() for target_profile_name in target_active_download_profiles: group_info = dl_profile_to_group.get(target_profile_name) if group_info: target_download_groups.add(group_info['name']) logger.debug(f"Target download groups for this sync cycle: {target_download_groups}") # Deactivate any download profiles that are active but are not in a target group for dl_profile_name, dl_profile in all_download_profiles.items(): if dl_profile['state'] == ProfileState.ACTIVE.value: group_info = dl_profile_to_group.get(dl_profile_name) # If the profile is in a group, and that group is NOT a target group, rest it. if group_info and group_info['name'] not in target_download_groups: logger.info(f"Syncing active state: Resting download profile '{dl_profile_name}' as its group '{group_info['name']}' is no longer active.") if not dry_run: sm = download_manager.get_state_machine(dl_profile_name) if sm: # Use a very long duration to prevent immediate re-activation by enforce_unrest_policy. # The sync logic will wake it up when needed. sm.rest(reason="Synced rotation from auth", duration_minutes=999999) def main_policy_enforcer(args): """Main dispatcher for 'policy-enforcer' command.""" policy = {} if args.policy_file: if not yaml: logger.error("Cannot load policy file because PyYAML is not installed.") return 1 try: with open(args.policy_file, 'r') as f: policy = yaml.safe_load(f) or {} except (IOError, yaml.YAMLError) as e: logger.error(f"Failed to load or parse policy file {args.policy_file}: {e}") return 1 class Config: def __init__(self, cli_args, policy_defaults, code_defaults): for key, code_default in code_defaults.items(): cli_val = getattr(cli_args, key, None) policy_val = policy_defaults.get(key) if cli_val is not None: setattr(self, key, cli_val) elif policy_val is not None: setattr(self, key, policy_val) else: setattr(self, key, code_default) code_defaults = { 'max_failure_rate': 0.0, 'min_requests_for_rate': 20, 'ban_on_failures': 0, 'ban_on_failures_window_minutes': 5, 'rest_after_requests': 0, 'rest_duration_minutes': 15, 'rate_limit_requests': 0, 'rate_limit_window_minutes': 60, 'rate_limit_rest_duration_minutes': 5, 'proxy_work_minutes': 0, 'proxy_rest_duration_minutes': 0, 'proxy_ban_on_failures': 0, 'proxy_ban_window_minutes': 10, 'proxy_rate_limit_requests': 0, 'proxy_rate_limit_window_minutes': 60, 'proxy_rate_limit_rest_duration_minutes': 10, 'unlock_stale_locks_after_seconds': 120, 'unlock_cooldown_seconds': 0, 'max_global_proxy_active_minutes': 0, 'rest_duration_on_max_active': 10, 'profile_selection_strategy': 'longest_idle', 'global_max_active_profiles': 0, 'interval_seconds': 60, 'proxy_groups': [], 'profile_groups': [] } sim_params = policy.get('simulation_parameters', {}) if load_dotenv: env_file_from_policy = sim_params.get('env_file') env_file = args.env_file or env_file_from_policy if not env_file and args.env and '.env' in args.env and os.path.exists(args.env): print(f"WARNING: --env should be an environment name, not a file path. Treating '{args.env}' as --env-file.", file=sys.stderr) env_file = args.env was_loaded = load_dotenv(env_file) if was_loaded: print(f"Loaded environment variables from {env_file or '.env file'}", file=sys.stderr) elif args.env_file: # This only runs if was_loaded is False AND args.env_file was provided print(f"ERROR: The specified --env-file was not found: {args.env_file}", file=sys.stderr) return 1 redis_host = args.redis_host or os.getenv('REDIS_HOST', os.getenv('MASTER_HOST_IP', 'localhost')) redis_port = args.redis_port if args.redis_port is not None else int(os.getenv('REDIS_PORT', 6379)) redis_password = args.redis_password or os.getenv('REDIS_PASSWORD') redis_db = args.redis_db if args.redis_db is not None else int(os.getenv('REDIS_DB', 0)) if args.verbose: logging.getLogger().setLevel(logging.DEBUG) signal.signal(signal.SIGINT, handle_shutdown) signal.signal(signal.SIGTERM, handle_shutdown) enforcer_setups = [] def setup_enforcer(sim_type, env_cli_arg, policy_config_key, env_policy_key): policy_config = policy.get(policy_config_key) # Fallback for single-enforcer policy files if policy_config is None and sim_type == 'Auth': policy_config = policy.get('policy_enforcer_config', {}) if policy_config is None: logger.debug(f"No config block found for {sim_type} simulation ('{policy_config_key}'). Skipping.") return None logger.info(f"Setting up enforcer for {sim_type} simulation...") # --- Dynamic Profile Group Discovery --- profile_group_templates = policy.get('profile_group_templates') # Check if templates exist and if the config block doesn't already have groups (CLI overrides take precedence) if profile_group_templates and 'profile_groups' not in policy_config: logger.info(f"Found 'profile_group_templates'. Discovering profile groups dynamically for {sim_type}...") # Determine key_prefix to connect to the right Redis env (logic duplicated from below) policy_env = sim_params.get(env_policy_key) default_policy_env = sim_params.get('env') effective_env = env_cli_arg or args.env or policy_env or default_policy_env or 'dev' if args.key_prefix: temp_key_prefix = args.key_prefix elif args.legacy: temp_key_prefix = 'profile_mgmt_' else: temp_key_prefix = f"{effective_env}_profile_mgmt_" try: # Use a temporary manager to scan for profiles in the correct environment temp_manager = ProfileManager(redis_host, redis_port, redis_password, temp_key_prefix, redis_db) all_profiles = temp_manager.list_profiles() # Extract unique prefixes (e.g., 'user31' from 'user31_0') found_prefixes = set(p['name'].rsplit('_', 1)[0] for p in all_profiles) if not found_prefixes: logger.warning(f"Dynamic discovery found no profile prefixes for env '{effective_env}'. No group policies will be applied.") else: logger.info(f"Discovered {len(found_prefixes)} unique profile prefixes: {sorted(list(found_prefixes))}") generated_groups = [] for prefix in sorted(list(found_prefixes)): for template in profile_group_templates: pattern = template.get('pattern') if pattern and fnmatch.fnmatch(prefix, pattern): # Get the settings for the current simulation type (Auth/Download) sim_settings = template.get(sim_type.lower()) if not sim_settings: logger.debug(f"Template with pattern '{pattern}' has no settings for '{sim_type}'. Skipping for prefix '{prefix}'.") continue # Create a new group from the relevant part of the template new_group = deepcopy(sim_settings) new_group['prefix'] = prefix new_group['name'] = prefix # Use prefix as group name generated_groups.append(new_group) logger.debug(f"Applied template with pattern '{pattern}' to prefix '{prefix}' for {sim_type} simulation.") break # Move to next prefix once a match is found policy_config['profile_groups'] = generated_groups except Exception as e: logger.error(f"Failed during dynamic profile group discovery: {e}", exc_info=args.verbose) config = Config(args, policy_config, code_defaults) # Determine the effective environment name with correct precedence: # 1. Specific CLI arg (e.g., --auth-env) # 2. General CLI arg (--env) # 3. Specific policy setting (e.g., simulation_parameters.auth_env) # 4. General policy setting (simulation_parameters.env) # 5. Hardcoded default ('dev') policy_env = sim_params.get(env_policy_key) default_policy_env = sim_params.get('env') effective_env = env_cli_arg or args.env or policy_env or default_policy_env or 'dev' logger.info(f"Using environment '{effective_env}' for {sim_type}.") if args.key_prefix: key_prefix = args.key_prefix elif args.legacy: key_prefix = 'profile_mgmt_' else: key_prefix = f"{effective_env}_profile_mgmt_" manager = ProfileManager(redis_host, redis_port, redis_password, key_prefix, redis_db) enforcer = PolicyEnforcer(manager, dry_run=args.dry_run) # Write any relevant config to Redis for workers to use cooldown = getattr(config, 'unlock_cooldown_seconds', None) if cooldown is not None and not args.dry_run: # If it's a list or int, convert to JSON string to store in Redis manager.set_config('unlock_cooldown_seconds', json.dumps(cooldown)) proxy_work_minutes = getattr(config, 'proxy_work_minutes', None) if proxy_work_minutes is not None and not args.dry_run: manager.set_config('proxy_work_minutes', proxy_work_minutes) proxy_rest_duration_minutes = getattr(config, 'proxy_rest_duration_minutes', None) if proxy_rest_duration_minutes is not None and not args.dry_run: manager.set_config('proxy_rest_duration_minutes', proxy_rest_duration_minutes) # Write profile_selection_strategy to Redis for observability strategy = getattr(config, 'profile_selection_strategy', None) if strategy and not args.dry_run: manager.set_config('profile_selection_strategy', strategy) return {'name': sim_type, 'enforcer': enforcer, 'config': config} if not args.download_only: auth_setup = setup_enforcer('Auth', args.auth_env, 'auth_policy_enforcer_config', 'auth_env') if auth_setup: enforcer_setups.append(auth_setup) if not args.auth_only: download_setup = setup_enforcer('Download', args.download_env, 'download_policy_enforcer_config', 'download_env') if download_setup: enforcer_setups.append(download_setup) if not enforcer_setups: logger.error("No policies to enforce. Check policy file and --auth-only/--download-only flags.") return 1 # Determine interval. Precedence: CLI -> simulation_parameters -> per-setup config -> code default. # The CLI arg is already handled by the Config objects, so we just need to check sim_params. sim_params_interval = sim_params.get('interval_seconds') if args.interval_seconds is None and sim_params_interval is not None: interval = sim_params_interval else: interval = min(s['config'].interval_seconds for s in enforcer_setups) # Get cross-simulation sync configuration cross_sync_config = policy.get('cross_simulation_sync', {}) if not args.live: for setup in enforcer_setups: logger.info(f"--- Applying policies for {setup['name']} Simulation ---") setup['enforcer'].apply_policies(setup['config']) # Apply cross-simulation sync after all policies have been applied if cross_sync_config and len(enforcer_setups) == 2: # We need to identify which setup is auth and which is download # Based on their names auth_setup = next((s for s in enforcer_setups if s['name'] == 'Auth'), None) download_setup = next((s for s in enforcer_setups if s['name'] == 'Download'), None) if auth_setup and download_setup: sync_cross_simulation( auth_setup['enforcer'].manager, download_setup['enforcer'].manager, cross_sync_config, dry_run=args.dry_run ) return 0 logger.info(f"Running in live mode. Applying policies every {interval} seconds. Press Ctrl+C to stop.") if not args.verbose: print("Each '.' represents a check cycle with no actions taken.", file=sys.stderr) while not shutdown_event: had_action_in_cycle = False for setup in enforcer_setups: logger.debug(f"--- Applying policies for {setup['name']} Simulation ({setup['enforcer'].manager.key_prefix}) ---") if setup['enforcer'].apply_policies(setup['config']): had_action_in_cycle = True # Apply cross-simulation sync after all policies have been applied in this cycle if cross_sync_config and len(enforcer_setups) == 2: auth_setup = next((s for s in enforcer_setups if s['name'] == 'Auth'), None) download_setup = next((s for s in enforcer_setups if s['name'] == 'Download'), None) if auth_setup and download_setup: sync_cross_simulation( auth_setup['enforcer'].manager, download_setup['enforcer'].manager, cross_sync_config, dry_run=args.dry_run ) # Note: sync_cross_simulation may take actions, but we don't track them for the dot indicator # This is fine for now if had_action_in_cycle: if not args.verbose: # Print a newline to separate the action logs from subsequent dots print(file=sys.stderr) else: if not args.verbose: print(".", end="", file=sys.stderr) sys.stderr.flush() sleep_end_time = time.time() + interval while time.time() < sleep_end_time and not shutdown_event: time.sleep(1) logger.info("Policy enforcer stopped.") return 0