""" DAG to manage the state of proxies and accounts used by the ytdlp-ops-server. """ from __future__ import annotations import logging from datetime import datetime import socket from airflow.exceptions import AirflowException from airflow.models.dag import DAG from airflow.models.dagbag import DagBag from airflow.models.dagrun import DagRun from airflow.models.param import Param from airflow.models.taskinstance import TaskInstance from airflow.operators.python import PythonOperator from airflow.utils.dates import days_ago from airflow.models.variable import Variable from airflow.providers.redis.hooks.redis import RedisHook from airflow.utils.session import create_session # Configure logging logger = logging.getLogger(__name__) # Import and apply Thrift exceptions patch for Airflow compatibility try: from thrift_exceptions_patch import patch_thrift_exceptions patch_thrift_exceptions() logger.info("Applied Thrift exceptions patch for Airflow compatibility.") except ImportError: logger.warning("Could not import thrift_exceptions_patch. Compatibility may be affected.") except Exception as e: logger.error(f"Error applying Thrift exceptions patch: {e}") # Thrift imports try: from pangramia.yt.exceptions.ttypes import PBServiceException, PBUserException from yt_ops_services.client_utils import get_thrift_client, format_timestamp except ImportError as e: logger.critical(f"Could not import Thrift modules: {e}. Ensure yt_ops_services package is installed correctly.") # Fail DAG parsing if thrift modules are not available raise DEFAULT_MANAGEMENT_SERVICE_IP = Variable.get("MANAGEMENT_SERVICE_HOST", default_var="envoy-thrift-lb") DEFAULT_MANAGEMENT_SERVICE_PORT = Variable.get("MANAGEMENT_SERVICE_PORT", default_var=9080) DEFAULT_REDIS_CONN_ID = "redis_default" # Version tracking for debugging DAG_VERSION = "1.7.1" # Updated to handle Redis configuration errors # Helper function to connect to Redis, similar to other DAGs def _get_redis_client(redis_conn_id: str): """Gets a Redis client from an Airflow connection.""" try: # Use the imported RedisHook redis_hook = RedisHook(redis_conn_id=redis_conn_id) # get_conn returns a redis.Redis client return redis_hook.get_conn() except Exception as e: logger.error(f"Failed to connect to Redis using connection '{redis_conn_id}': {e}") # Use the imported AirflowException raise AirflowException(f"Redis connection failed: {e}") def _list_proxy_statuses(client, server_identity): """Lists the status of proxies.""" logger.info(f"Listing proxy statuses for server: {server_identity or 'ALL'}") logger.info("NOTE: Proxy statuses are read from server's internal state via Thrift service") try: statuses = client.getProxyStatus(server_identity) except PBServiceException as e: if "Redis is not configured for this server" in e.message: logger.error(f"Redis not configured on server: {e.message}") print(f"\nERROR: Server configuration issue - {e.message}\n") print("This server does not have Redis configured for proxy management.\n") return else: # Re-raise if it's a different PBServiceException raise except Exception as e: logger.error(f"Unexpected error getting proxy statuses: {e}", exc_info=True) print(f"\nERROR: Unexpected error getting proxy statuses: {e}\n") return if not statuses: logger.info("No proxy statuses found.") return from tabulate import tabulate status_list = [] # This is forward-compatible: it checks for new attributes before using them. has_extended_info = hasattr(statuses[0], 'recentAccounts') or hasattr(statuses[0], 'recentMachines') headers = ["Server", "Proxy URL", "Status", "Success", "Failures", "Last Success", "Last Failure"] if has_extended_info: headers.extend(["Recent Accounts", "Recent Machines"]) for s in statuses: status_item = { "Server": s.serverIdentity, "Proxy URL": s.proxyUrl, "Status": s.status, "Success": s.successCount, "Failures": s.failureCount, "Last Success": format_timestamp(s.lastSuccessTimestamp), "Last Failure": format_timestamp(s.lastFailureTimestamp), } if has_extended_info: recent_accounts = getattr(s, 'recentAccounts', []) recent_machines = getattr(s, 'recentMachines', []) status_item["Recent Accounts"] = "\n".join(recent_accounts) if recent_accounts else "N/A" status_item["Recent Machines"] = "\n".join(recent_machines) if recent_machines else "N/A" status_list.append(status_item) print("\n--- Proxy Statuses ---") # The f-string with a newline ensures the table starts on a new line in the logs. print(f"\n{tabulate(status_list, headers='keys', tablefmt='grid')}") print("----------------------\n") if not has_extended_info: logger.warning("Server does not seem to support 'recentAccounts' or 'recentMachines' fields yet.") print("NOTE: To see Recent Accounts/Machines, the server's `getProxyStatus` method must be updated to return these fields.") def _list_account_statuses(client, account_id, redis_conn_id): """Lists the status of accounts, enriching with live data from Redis.""" logger.info(f"Listing account statuses for account: {account_id or 'ALL'}") logger.info("NOTE: Account statuses are read from the Thrift service and enriched with live data from Redis.") redis_client = None try: redis_client = _get_redis_client(redis_conn_id) logger.info("Successfully connected to Redis to fetch detailed account status.") except Exception as e: logger.warning(f"Could not connect to Redis to get detailed status. Will show basic status. Error: {e}") redis_client = None try: # The thrift method takes accountId (specific) or accountPrefix. # If account_id is provided, we use it. If not, we get all by leaving both params as None. statuses = client.getAccountStatus(accountId=account_id, accountPrefix=None) if not statuses: print("\n--- Account Statuses ---\nNo account statuses found.\n------------------------\n") return from tabulate import tabulate status_list = [] for s in statuses: status_str = s.status # If an account is resting, get the live countdown from Redis for accuracy. if redis_client and 'RESTING' in status_str: try: status_key = f"account_status:{s.accountId}" # The server stores resting expiry time in 'resting_until'. expiry_ts_bytes = redis_client.hget(status_key, "resting_until") if expiry_ts_bytes: expiry_ts = float(expiry_ts_bytes) now = datetime.now().timestamp() if now >= expiry_ts: status_str = "ACTIVE (was RESTING)" else: remaining_seconds = int(expiry_ts - now) if remaining_seconds > 3600: status_str = f"RESTING (active in {remaining_seconds // 3600}h {remaining_seconds % 3600 // 60}m)" elif remaining_seconds > 60: status_str = f"RESTING (active in {remaining_seconds // 60}m {remaining_seconds % 60}s)" else: status_str = f"RESTING (active in {remaining_seconds}s)" except Exception as e: logger.warning(f"Could not parse resting time for {s.accountId} from Redis: {e}. Using server status.") # Determine the last activity timestamp for sorting last_success = float(s.lastSuccessTimestamp) if s.lastSuccessTimestamp else 0 last_failure = float(s.lastFailureTimestamp) if s.lastFailureTimestamp else 0 last_activity = max(last_success, last_failure) status_item = { "Account ID": s.accountId, "Status": status_str, "Success": s.successCount, "Failures": s.failureCount, "Last Success": format_timestamp(s.lastSuccessTimestamp), "Last Failure": format_timestamp(s.lastFailureTimestamp), "Last Proxy": s.lastUsedProxy or "N/A", "Last Machine": s.lastUsedMachine or "N/A", "_last_activity": last_activity, # Add a temporary key for sorting } status_list.append(status_item) # Sort the list by the last activity timestamp in descending order status_list.sort(key=lambda item: item.get('_last_activity', 0), reverse=True) # Remove the temporary sort key before printing for item in status_list: del item['_last_activity'] print("\n--- Account Statuses ---") # The f-string with a newline ensures the table starts on a new line in the logs. print(f"\n{tabulate(status_list, headers='keys', tablefmt='grid')}") print("------------------------\n") except (PBServiceException, PBUserException) as e: logger.error(f"Failed to get account statuses: {e.message}", exc_info=True) print(f"\nERROR: Could not retrieve account statuses. Server returned: {e.message}\n") except Exception as e: logger.error(f"An unexpected error occurred while getting account statuses: {e}", exc_info=True) print(f"\nERROR: An unexpected error occurred: {e}\n") def manage_system_callable(**context): """Main callable to interact with the system management endpoints.""" # Log version for debugging logger.info(f"Running ytdlp_mgmt_proxy_account DAG version {DAG_VERSION}") params = context["params"] entity = params["entity"] action = params["action"] # For Thrift actions, use the new management host/port if entity not in ["airflow_meta"]: host = params["management_host"] port = params["management_port"] else: host, port = None, None # Not needed for meta actions server_identity = params.get("server_identity") proxy_url = params.get("proxy_url") account_id = params.get("account_id") # --- Validate Action/Entity Combination and Parameters --- valid_actions = { "proxy": ["list_with_status", "ban", "unban", "ban_all", "unban_all", "delete_from_redis"], "account": ["list_with_status", "ban", "unban", "unban_all", "delete_from_redis"], "accounts_and_proxies": ["list_with_status", "ban", "unban", "ban_all", "unban_all", "delete_from_redis"], "airflow_meta": ["clear_dag_runs"], } if action not in valid_actions.get(entity, []): raise ValueError( f"The action '{action}' is not valid for entity '{entity}'.\n" f"Valid actions for '{entity}' are: {', '.join(valid_actions.get(entity, ['None']))}." ) # Validate required parameters for the chosen action if entity == "proxy": if action in ["ban", "unban"] and not server_identity: raise ValueError(f"A 'server_identity' is required for proxy action '{action}'.") if action in ["ban", "unban"] and not proxy_url: raise ValueError(f"A 'proxy_url' is required for proxy action '{action}'.") if entity == "account": if action in ["ban", "unban"] and not account_id: raise ValueError(f"An 'account_id' is required for account action '{action}'.") # --- Handle Airflow Meta actions separately as they don't use Thrift --- if entity == "airflow_meta": dag_id = params.get("dag_id_to_manage") if action == "clear_dag_runs": clear_scope = params.get("clear_scope") logger.info(f"Attempting to delete DagRuns for DAG '{dag_id}' with scope '{clear_scope}'.") with create_session() as session: dag_run_query = session.query(DagRun).filter(DagRun.dag_id == dag_id) if clear_scope == "last_run": last_run = dag_run_query.order_by(DagRun.execution_date.desc()).first() if not last_run: logger.info(f"No runs found for DAG '{dag_id}'. Nothing to delete.") print(f"\nNo runs found for DAG '{dag_id}'.\n") return logger.warning(f"Deleting last DagRun for DAG '{dag_id}' (run_id: {last_run.run_id}, execution_date: {last_run.execution_date}). This will also delete its task instances.") # Deleting the DagRun object should cascade and delete related TaskInstances. session.delete(last_run) deleted_count = 1 else: # all_runs logger.warning(f"Deleting ALL DagRuns and associated TaskInstances for DAG '{dag_id}'. This will remove all history from the UI.") # To ensure all related data is cleared, we explicitly delete TaskInstances first. # This is safer than relying on DB-level cascades which may not be configured. ti_deleted_count = session.query(TaskInstance).filter(TaskInstance.dag_id == dag_id).delete(synchronize_session=False) logger.info(f"Deleted {ti_deleted_count} TaskInstance records for DAG '{dag_id}'.") deleted_count = dag_run_query.delete(synchronize_session=False) # The session is committed automatically by the `with create_session()` context manager. logger.info(f"Successfully deleted {deleted_count} DagRun(s) for DAG '{dag_id}'.") print(f"\nSuccessfully deleted {deleted_count} DagRun(s) for DAG '{dag_id}'.\n") return # End execution # Handle Thrift-based deletion actions if action == "delete_from_redis": client, transport = None, None try: client, transport = get_thrift_client(host, port) if entity == "proxy": proxy_url = params.get("proxy_url") server_identity = params.get("server_identity") if proxy_url and server_identity: logger.info(f"Deleting proxy '{proxy_url}' for server '{server_identity}' from Redis via Thrift service...") result = client.deleteProxyFromRedis(proxy_url, server_identity) if result: print(f"\nSuccessfully deleted proxy '{proxy_url}' for server '{server_identity}' from Redis.\n") else: print(f"\nFailed to delete proxy '{proxy_url}' for server '{server_identity}' from Redis.\n") else: logger.info("Deleting all proxies from Redis via Thrift service...") # If server_identity is provided, delete all proxies for that server # If server_identity is None, delete all proxies for ALL servers result = client.deleteAllProxiesFromRedis(server_identity) if server_identity: print(f"\nSuccessfully deleted all proxies for server '{server_identity}' from Redis. Count: {result}\n") else: print(f"\nSuccessfully deleted all proxies from Redis across ALL servers. Count: {result}\n") elif entity == "account": account_id = params.get("account_id") if account_id: logger.info(f"Deleting account '{account_id}' from Redis via Thrift service...") result = client.deleteAccountFromRedis(account_id) if result: print(f"\nSuccessfully deleted account '{account_id}' from Redis.\n") else: print(f"\nFailed to delete account '{account_id}' from Redis.\n") else: logger.info("Deleting all accounts from Redis via Thrift service...") # If account_id is provided as prefix, delete all accounts with that prefix # If account_id is None, delete all accounts account_prefix = params.get("account_id") result = client.deleteAllAccountsFromRedis(account_prefix) if account_prefix: print(f"\nSuccessfully deleted all accounts with prefix '{account_prefix}' from Redis. Count: {result}\n") else: print(f"\nSuccessfully deleted all accounts from Redis. Count: {result}\n") elif entity == "accounts_and_proxies": # Delete accounts account_prefix = params.get("account_id") # Repurpose account_id param as an optional prefix logger.info("Deleting accounts from Redis via Thrift service...") account_result = client.deleteAllAccountsFromRedis(account_prefix) if account_prefix: print(f"\nSuccessfully deleted {account_result} account keys with prefix '{account_prefix}' from Redis.\n") else: print(f"\nSuccessfully deleted {account_result} account keys from Redis.\n") # Delete proxies server_identity = params.get("server_identity") logger.info("Deleting proxies from Redis via Thrift service...") proxy_result = client.deleteAllProxiesFromRedis(server_identity) if server_identity: print(f"\nSuccessfully deleted {proxy_result} proxy keys for server '{server_identity}' from Redis.\n") else: print(f"\nSuccessfully deleted {proxy_result} proxy keys from Redis across ALL servers.\n") except (PBServiceException, PBUserException) as e: logger.error(f"Thrift error performing delete action: {e.message}", exc_info=True) print(f"\nERROR: Thrift service error: {e.message}\n") raise except Exception as e: logger.error(f"Error performing delete action: {e}", exc_info=True) print(f"\nERROR: An unexpected error occurred: {e}\n") raise finally: if transport and transport.isOpen(): transport.close() logger.info("Thrift connection closed.") return # End execution for this action client, transport = None, None try: client, transport = get_thrift_client(host, port) if entity == "proxy": if action == "list_with_status": _list_proxy_statuses(client, server_identity) elif action == "ban": if not proxy_url: raise ValueError("A 'proxy_url' is required.") logger.info(f"Banning proxy '{proxy_url}' for server '{server_identity}'...") client.banProxy(proxy_url, server_identity) print(f"Successfully sent request to ban proxy '{proxy_url}'.") elif action == "unban": if not proxy_url: raise ValueError("A 'proxy_url' is required.") logger.info(f"Unbanning proxy '{proxy_url}' for server '{server_identity}'...") client.unbanProxy(proxy_url, server_identity) print(f"Successfully sent request to unban proxy '{proxy_url}'.") elif action == "ban_all": if server_identity: logger.info(f"Banning all proxies for server '{server_identity}'...") client.banAllProxies(server_identity) print(f"Successfully sent request to ban all proxies for '{server_identity}'.") else: logger.info("No server_identity provided. Banning all proxies for ALL servers...") all_statuses = client.getProxyStatus(None) if not all_statuses: print("\nNo proxy statuses found for any server. Nothing to ban.\n") return all_server_identities = sorted(list(set(s.serverIdentity for s in all_statuses))) logger.info(f"Found {len(all_server_identities)} server identities: {all_server_identities}") print(f"Found {len(all_server_identities)} server identities. Sending ban request for each...") success_count = 0 fail_count = 0 for identity in all_server_identities: try: client.banAllProxies(identity) logger.info(f" - Sent ban_all for '{identity}'.") success_count += 1 except Exception as e: logger.error(f" - Failed to ban all proxies for '{identity}': {e}") fail_count += 1 print(f"\nSuccessfully sent ban_all requests for {success_count} server identities.") if fail_count > 0: print(f"Failed to send ban_all requests for {fail_count} server identities. See logs for details.") elif action == "unban_all": if server_identity: logger.info(f"Unbanning all proxy statuses for server '{server_identity}'...") client.resetAllProxyStatuses(server_identity) print(f"Successfully sent request to unban all proxy statuses for '{server_identity}'.") else: logger.info("No server_identity provided. Unbanning all proxies for ALL servers...") all_statuses = client.getProxyStatus(None) if not all_statuses: print("\nNo proxy statuses found for any server. Nothing to unban.\n") return all_server_identities = sorted(list(set(s.serverIdentity for s in all_statuses))) logger.info(f"Found {len(all_server_identities)} server identities: {all_server_identities}") print(f"Found {len(all_server_identities)} server identities. Sending unban request for each...") success_count = 0 fail_count = 0 for identity in all_server_identities: try: client.resetAllProxyStatuses(identity) logger.info(f" - Sent unban_all for '{identity}'.") success_count += 1 except Exception as e: logger.error(f" - Failed to unban all proxies for '{identity}': {e}") fail_count += 1 print(f"\nSuccessfully sent unban_all requests for {success_count} server identities.") if fail_count > 0: print(f"Failed to send unban_all requests for {fail_count} server identities. See logs for details.") elif entity == "account": if action == "list_with_status": _list_account_statuses(client, account_id, params["redis_conn_id"]) elif action == "ban": if not account_id: raise ValueError("An 'account_id' is required.") reason = f"Manual ban from Airflow mgmt DAG by {socket.gethostname()}" logger.info(f"Banning account '{account_id}'...") client.banAccount(accountId=account_id, reason=reason) print(f"Successfully sent request to ban account '{account_id}'.") elif action == "unban": if not account_id: raise ValueError("An 'account_id' is required.") reason = f"Manual un-ban from Airflow mgmt DAG by {socket.gethostname()}" logger.info(f"Unbanning account '{account_id}'...") client.unbanAccount(accountId=account_id, reason=reason) print(f"Successfully sent request to unban account '{account_id}'.") elif action == "unban_all": account_prefix = account_id # Repurpose account_id param as an optional prefix logger.info(f"Unbanning all account statuses to ACTIVE (prefix: '{account_prefix or 'ALL'}')...") all_statuses = client.getAccountStatus(accountId=None, accountPrefix=account_prefix) if not all_statuses: print(f"No accounts found with prefix '{account_prefix or 'ALL'}' to unban.") return accounts_to_unban = [s.accountId for s in all_statuses] logger.info(f"Found {len(accounts_to_unban)} accounts to unban.") print(f"Found {len(accounts_to_unban)} accounts. Sending unban request for each...") unban_count = 0 fail_count = 0 for acc_id in accounts_to_unban: try: reason = f"Manual unban_all from Airflow mgmt DAG by {socket.gethostname()}" client.unbanAccount(accountId=acc_id, reason=reason) logger.info(f" - Sent unban for '{acc_id}'.") unban_count += 1 except Exception as e: logger.error(f" - Failed to unban account '{acc_id}': {e}") fail_count += 1 print(f"\nSuccessfully sent unban requests for {unban_count} accounts.") if fail_count > 0: print(f"Failed to send unban requests for {fail_count} accounts. See logs for details.") # Optionally, list statuses again to confirm print("\n--- Listing statuses after unban_all ---") _list_account_statuses(client, account_prefix, params["redis_conn_id"]) elif entity == "accounts_and_proxies": print(f"\n--- Performing action '{action}' on BOTH Proxies and Accounts ---") # --- Proxy Action --- try: print("\n-- Running Proxy Action --") if action == "list_with_status": _list_proxy_statuses(client, server_identity) elif action == "ban": if not proxy_url: raise ValueError("A 'proxy_url' is required.") logger.info(f"Banning proxy '{proxy_url}' for server '{server_identity}'...") client.banProxy(proxy_url, server_identity) print(f"Successfully sent request to ban proxy '{proxy_url}'.") elif action == "unban": if not proxy_url: raise ValueError("A 'proxy_url' is required.") logger.info(f"Unbanning proxy '{proxy_url}' for server '{server_identity}'...") client.unbanProxy(proxy_url, server_identity) print(f"Successfully sent request to unban proxy '{proxy_url}'.") elif action == "ban_all": if server_identity: logger.info(f"Banning all proxies for server '{server_identity}'...") client.banAllProxies(server_identity) print(f"Successfully sent request to ban all proxies for '{server_identity}'.") else: logger.info("No server_identity provided. Banning all proxies for ALL servers...") all_statuses = client.getProxyStatus(None) if not all_statuses: print("\nNo proxy statuses found for any server. Nothing to ban.\n") else: all_server_identities = sorted(list(set(s.serverIdentity for s in all_statuses))) logger.info(f"Found {len(all_server_identities)} server identities: {all_server_identities}") print(f"Found {len(all_server_identities)} server identities. Sending ban request for each...") success_count = 0 fail_count = 0 for identity in all_server_identities: try: client.banAllProxies(identity) logger.info(f" - Sent ban_all for '{identity}'.") success_count += 1 except Exception as e: logger.error(f" - Failed to ban all proxies for '{identity}': {e}") fail_count += 1 print(f"\nSuccessfully sent ban_all requests for {success_count} server identities.") if fail_count > 0: print(f"Failed to send ban_all requests for {fail_count} server identities. See logs for details.") elif action == "unban_all": if server_identity: logger.info(f"Unbanning all proxy statuses for server '{server_identity}'...") client.resetAllProxyStatuses(server_identity) print(f"Successfully sent request to unban all proxy statuses for '{server_identity}'.") else: logger.info("No server_identity provided. Unbanning all proxies for ALL servers...") all_statuses = client.getProxyStatus(None) if not all_statuses: print("\nNo proxy statuses found for any server. Nothing to unban.\n") else: all_server_identities = sorted(list(set(s.serverIdentity for s in all_statuses))) logger.info(f"Found {len(all_server_identities)} server identities: {all_server_identities}") print(f"Found {len(all_server_identities)} server identities. Sending unban request for each...") success_count = 0 fail_count = 0 for identity in all_server_identities: try: client.resetAllProxyStatuses(identity) logger.info(f" - Sent unban_all for '{identity}'.") success_count += 1 except Exception as e: logger.error(f" - Failed to unban all proxies for '{identity}': {e}") fail_count += 1 print(f"\nSuccessfully sent unban_all requests for {success_count} server identities.") if fail_count > 0: print(f"Failed to send unban_all requests for {fail_count} server identities. See logs for details.") except Exception as proxy_e: logger.error(f"Error during proxy action '{action}': {proxy_e}", exc_info=True) print(f"\nERROR during proxy action: {proxy_e}") # --- Account Action --- try: print("\n-- Running Account Action --") if action == "list_with_status": _list_account_statuses(client, account_id, params["redis_conn_id"]) elif action == "ban": if not account_id: raise ValueError("An 'account_id' is required.") reason = f"Manual ban from Airflow mgmt DAG by {socket.gethostname()}" logger.info(f"Banning account '{account_id}'...") client.banAccount(accountId=account_id, reason=reason) print(f"Successfully sent request to ban account '{account_id}'.") elif action == "unban": if not account_id: raise ValueError("An 'account_id' is required.") reason = f"Manual un-ban from Airflow mgmt DAG by {socket.gethostname()}" logger.info(f"Unbanning account '{account_id}'...") client.unbanAccount(accountId=account_id, reason=reason) print(f"Successfully sent request to unban account '{account_id}'.") elif action == "unban_all": account_prefix = account_id # Repurpose account_id param as an optional prefix logger.info(f"Unbanning all account statuses to ACTIVE (prefix: '{account_prefix or 'ALL'}')...") all_statuses = client.getAccountStatus(accountId=None, accountPrefix=account_prefix) if not all_statuses: print(f"No accounts found with prefix '{account_prefix or 'ALL'}' to unban.") else: accounts_to_unban = [s.accountId for s in all_statuses] logger.info(f"Found {len(accounts_to_unban)} accounts to unban.") print(f"Found {len(accounts_to_unban)} accounts. Sending unban request for each...") unban_count = 0 fail_count = 0 for acc_id in accounts_to_unban: try: reason = f"Manual unban_all from Airflow mgmt DAG by {socket.gethostname()}" client.unbanAccount(accountId=acc_id, reason=reason) logger.info(f" - Sent unban for '{acc_id}'.") unban_count += 1 except Exception as e: logger.error(f" - Failed to unban account '{acc_id}': {e}") fail_count += 1 print(f"\nSuccessfully sent unban requests for {unban_count} accounts.") if fail_count > 0: print(f"Failed to send unban requests for {fail_count} accounts. See logs for details.") # Optionally, list statuses again to confirm print("\n--- Listing statuses after unban_all ---") _list_account_statuses(client, account_prefix, params["redis_conn_id"]) except Exception as account_e: logger.error(f"Error during account action '{action}': {account_e}", exc_info=True) print(f"\nERROR during account action: {account_e}") elif entity == "all": if action == "list_with_status": print("\nListing all entities...") _list_proxy_statuses(client, server_identity) _list_account_statuses(client, account_id, params["redis_conn_id"]) except (PBServiceException, PBUserException) as e: logger.error(f"Thrift error performing action '{action}': {e.message}", exc_info=True) raise except NotImplementedError as e: logger.error(f"Feature not implemented: {e}", exc_info=True) raise except Exception as e: logger.error(f"Error performing action '{action}': {e}", exc_info=True) raise finally: if transport and transport.isOpen(): transport.close() logger.info("Thrift connection closed.") with DAG( dag_id="ytdlp_mgmt_proxy_account", start_date=days_ago(1), schedule=None, catchup=False, tags=["ytdlp", "mgmt", "master"], doc_md=""" ### YT-DLP Proxy and Account Manager DAG This DAG provides tools to manage the state of proxies and accounts used by the `ytdlp-ops-server`. Select an `entity` and an `action` to perform. **IMPORTANT NOTE ABOUT DATA SOURCES:** - **Proxy Statuses**: Read from the server's internal state via Thrift service calls. - **Account Statuses**: Read from the Thrift service, and then enriched with live cooldown data directly from Redis. **IMPORTANT NOTE ABOUT PROXY MANAGEMENT:** - Proxies are managed by the server's internal state through Thrift methods - There is NO direct Redis manipulation for proxies - they are managed entirely by the server - To properly manage proxies, use the Thrift service methods (ban, unban, etc.) """, params={ "management_host": Param(DEFAULT_MANAGEMENT_SERVICE_IP, type="string", title="Management Service Host", description="The hostname or IP of the management service. Can be a Docker container name (e.g., 'envoy-thrift-lb') if on the same network."), "management_port": Param(DEFAULT_MANAGEMENT_SERVICE_PORT, type="integer", title="Management Service Port", description="The port of the dedicated management service."), "entity": Param( "accounts_and_proxies", type="string", enum=["account", "proxy", "accounts_and_proxies", "airflow_meta"], description="The type of entity to manage.", ), "action": Param( "list_with_status", type="string", enum=["list_with_status", "ban", "unban", "ban_all", "unban_all", "delete_from_redis", "clear_dag_runs"], description="""The management action to perform. --- #### Actions for `entity: proxy` - `list_with_status`: View status of all proxies, optionally filtered by `server_identity`. - `ban`: Ban a specific proxy for a given `server_identity`. Requires `proxy_url`. - `unban`: Un-ban a specific proxy. Requires `proxy_url`. - `ban_all`: Sets the status of all proxies for a given `server_identity` (or all servers) to `BANNED`. - `unban_all`: Resets the status of all proxies for a given `server_identity` (or all servers) to `ACTIVE`. - `delete_from_redis`: **(Destructive)** Deletes proxy status from Redis via Thrift service. This permanently removes the proxy from being tracked by the system. If `proxy_url` and `server_identity` are provided, it deletes a single proxy. If only `server_identity` is provided, it deletes all proxies for that server. If neither is provided, it deletes ALL proxies across all servers. #### Actions for `entity: account` - `list_with_status`: View status of all accounts, optionally filtered by `account_id` (as a prefix). - `ban`: Ban a specific account. Requires `account_id`. - `unban`: Un-ban a specific account. Requires `account_id`. - `unban_all`: Sets the status of all accounts (or those matching a prefix in `account_id`) to `ACTIVE`. - `delete_from_redis`: **(Destructive)** Deletes account status from Redis via Thrift service. This permanently removes the account from being tracked by the system. If `account_id` is provided, it deletes that specific account. If `account_id` is provided as a prefix, it deletes all accounts matching that prefix. If `account_id` is empty, it deletes ALL accounts. #### Actions for `entity: accounts_and_proxies` - This entity performs the selected action on **both** proxies and accounts where applicable. - `list_with_status`: View statuses for both proxies and accounts. - `ban`: Ban a specific proxy AND a specific account. Requires `proxy_url`, `server_identity`, and `account_id`. - `unban`: Un-ban a specific proxy AND a specific account. Requires `proxy_url`, `server_identity`, and `account_id`. - `ban_all`: Ban all proxies for a `server_identity` (or all servers). Does not affect accounts. - `unban_all`: Un-ban all proxies for a `server_identity` (or all servers) AND all accounts (optionally filtered by `account_id` as a prefix). - `delete_from_redis`: Deletes both account and proxy status from Redis via Thrift service. For accounts, if `account_id` is provided as a prefix, it deletes all accounts matching that prefix. If `account_id` is empty, it deletes ALL accounts. For proxies, if `server_identity` is provided, it deletes all proxies for that server. If `server_identity` is empty, it deletes ALL proxies across all servers. #### Actions for `entity: airflow_meta` - `clear_dag_runs`: **(Destructive)** Deletes DAG run history and associated task instances from the database, removing them from the UI. This allows the runs to be re-created if backfilling is enabled. - `clear_scope: last_run`: Deletes only the most recent DAG run and its task instances. - `clear_scope: all_runs`: Deletes all historical DAG runs and task instances for the selected DAG. """, ), "server_identity": Param( None, type=["null", "string"], description="The identity of the server instance (for proxy management). Leave blank to list all or delete all proxies.", ), "proxy_url": Param( None, type=["null", "string"], description="The proxy URL to act upon (e.g., 'socks5://host:port').", ), "account_id": Param( None, type=["null", "string"], description="The account ID to act upon. For `unban_all` or `delete_from_redis` on accounts, this can be an optional prefix. Leave blank to delete all accounts.", ), "redis_conn_id": Param( DEFAULT_REDIS_CONN_ID, type="string", title="Redis Connection ID", description="The Airflow connection ID for the Redis server (used for 'delete_from_redis' and for fetching detailed account status).", ), "dag_id_to_manage": Param( "ytdlp_ops_worker_per_url", type="string", enum=["ytdlp_ops_worker_per_url", "ytdlp_ops_orchestrator"], title="[Airflow Meta] DAG ID", description="The DAG ID to perform the action on.", ), "clear_scope": Param( "last_run", type="string", enum=["last_run", "all_runs"], title="[Airflow Meta] Clear Scope", description="For 'clear_dag_runs' action, specifies the scope of runs to clear.", ), }, ) as dag: system_management_task = PythonOperator( task_id="system_management_task", python_callable=manage_system_callable, )