""" DAG to manage the state of proxies used by the ytdlp-ops-server. """ from __future__ import annotations import logging from datetime import datetime from airflow.models.dag import DAG from airflow.models.param import Param from airflow.operators.python import PythonOperator from airflow.utils.dates import days_ago # Configure logging logger = logging.getLogger(__name__) # Import and apply Thrift exceptions patch for Airflow compatibility try: from thrift_exceptions_patch import patch_thrift_exceptions patch_thrift_exceptions() logger.info("Applied Thrift exceptions patch for Airflow compatibility.") except ImportError: logger.warning("Could not import thrift_exceptions_patch. Compatibility may be affected.") except Exception as e: logger.error(f"Error applying Thrift exceptions patch: {e}") # Thrift imports try: from thrift.transport import TSocket, TTransport from thrift.protocol import TBinaryProtocol from pangramia.yt.tokens_ops import YTTokenOpService from pangramia.yt.exceptions.ttypes import PBServiceException, PBUserException except ImportError as e: logger.critical(f"Could not import Thrift modules: {e}. Ensure ytdlp-ops-auth package is installed.") # Fail DAG parsing if thrift modules are not available raise def format_timestamp(ts_str: str) -> str: """Formats a string timestamp into a human-readable date string.""" if not ts_str: return "" try: ts_float = float(ts_str) if ts_float <= 0: return "" # Use datetime from the imported 'from datetime import datetime' dt_obj = datetime.fromtimestamp(ts_float) return dt_obj.strftime('%Y-%m-%d %H:%M:%S') except (ValueError, TypeError): return ts_str # Return original string if conversion fails def get_thrift_client(host: str, port: int): """Helper function to create and connect a Thrift client.""" transport = TSocket.TSocket(host, port) transport = TTransport.TFramedTransport(transport) protocol = TBinaryProtocol.TBinaryProtocol(transport) client = YTTokenOpService.Client(protocol) transport.open() logger.info(f"Connected to Thrift server at {host}:{port}") return client, transport def manage_proxies_callable(**context): """Main callable to interact with the proxy management endpoints.""" params = context["params"] action = params["action"] host = params["host"] port = params["port"] server_identity = params.get("server_identity") proxy_url = params.get("proxy_url") if not server_identity and action in ["ban", "unban", "reset_all"]: raise ValueError(f"A 'server_identity' is required for the '{action}' action.") client, transport = None, None try: client, transport = get_thrift_client(host, port) if action == "list": logger.info(f"Listing proxy statuses for server: {server_identity or 'ALL'}") statuses = client.getProxyStatus(server_identity) if not statuses: logger.info("No proxy statuses found.") print("No proxy statuses found.") else: from tabulate import tabulate status_list = [ { "Server": s.serverIdentity, "Proxy URL": s.proxyUrl, "Status": s.status, "Success": s.successCount, "Failures": s.failureCount, "Last Success": format_timestamp(s.lastSuccessTimestamp), "Last Failure": format_timestamp(s.lastFailureTimestamp), } for s in statuses ] print("\n--- Proxy Statuses ---") print(tabulate(status_list, headers="keys", tablefmt="grid")) print("----------------------\n") elif action == "ban": if not proxy_url: raise ValueError("A 'proxy_url' is required to ban a proxy.") logger.info(f"Banning proxy '{proxy_url}' for server '{server_identity}'...") success = client.banProxy(proxy_url, server_identity) if success: logger.info("Successfully banned proxy.") print(f"Successfully banned proxy '{proxy_url}' for server '{server_identity}'.") else: logger.error("Failed to ban proxy.") raise Exception("Server returned failure for banProxy operation.") elif action == "unban": if not proxy_url: raise ValueError("A 'proxy_url' is required to unban a proxy.") logger.info(f"Unbanning proxy '{proxy_url}' for server '{server_identity}'...") success = client.unbanProxy(proxy_url, server_identity) if success: logger.info("Successfully unbanned proxy.") print(f"Successfully unbanned proxy '{proxy_url}' for server '{server_identity}'.") else: logger.error("Failed to unban proxy.") raise Exception("Server returned failure for unbanProxy operation.") elif action == "reset_all": logger.info(f"Resetting all proxy statuses for server '{server_identity}'...") success = client.resetAllProxyStatuses(server_identity) if success: logger.info("Successfully reset all proxy statuses.") print(f"Successfully reset all proxy statuses for server '{server_identity}'.") else: logger.error("Failed to reset all proxy statuses.") raise Exception("Server returned failure for resetAllProxyStatuses operation.") else: raise ValueError(f"Invalid action: {action}") except (PBServiceException, PBUserException) as e: logger.error(f"Thrift error performing action '{action}': {e.message}", exc_info=True) raise except Exception as e: logger.error(f"Error performing action '{action}': {e}", exc_info=True) raise finally: if transport and transport.isOpen(): transport.close() logger.info("Thrift connection closed.") with DAG( dag_id="ytdlp_mgmt_proxy", start_date=days_ago(1), schedule=None, catchup=False, tags=["ytdlp", "utility", "proxy"], doc_md=""" ### YT-DLP Proxy Manager DAG This DAG provides tools to manage the state of proxies used by the `ytdlp-ops-server`. You can view statuses, and manually ban, unban, or reset proxies for a specific server instance. **Parameters:** - `host`: The hostname or IP of the `ytdlp-ops-server` Thrift service. - `port`: The port of the Thrift service. - `action`: The operation to perform. - `list`: List proxy statuses. Provide a `server_identity` to query a specific server, or leave it blank to query the server instance you are connected to. - `ban`: Ban a specific proxy. Requires `server_identity` and `proxy_url`. - `unban`: Un-ban a specific proxy. Requires `server_identity` and `proxy_url`. - `reset_all`: Reset all proxies for a server to `ACTIVE`. Requires `server_identity`. - `server_identity`: The unique identifier for the server instance (e.g., `ytdlp-ops-airflow-service`). - `proxy_url`: The full URL of the proxy to act upon (e.g., `socks5://host:port`). """, params={ "host": Param("89.253.221.173", type="string", description="The hostname of the ytdlp-ops-server service."), "port": Param(9090, type="integer", description="The port of the ytdlp-ops-server service."), "action": Param( "list", type="string", enum=["list", "ban", "unban", "reset_all"], description="The management action to perform.", ), "server_identity": Param( "ytdlp-ops-airflow-service", type=["null", "string"], description="The identity of the server to manage. Leave blank to query the connected server instance.", ), "proxy_url": Param( None, type=["null", "string"], description="The proxy URL to ban/unban (e.g., 'socks5://host:port').", ), }, ) as dag: proxy_management_task = PythonOperator( task_id="proxy_management_task", python_callable=manage_proxies_callable, )