yt-dlp-dags/dags/ytdlp_mgmt_proxy.py

198 lines
8.3 KiB
Python

"""
DAG to manage the state of proxies used by the ytdlp-ops-server.
"""
from __future__ import annotations
import logging
from datetime import datetime
from airflow.models.dag import DAG
from airflow.models.param import Param
from airflow.operators.python import PythonOperator
from airflow.utils.dates import days_ago
# Configure logging
logger = logging.getLogger(__name__)
# Import and apply Thrift exceptions patch for Airflow compatibility
try:
from thrift_exceptions_patch import patch_thrift_exceptions
patch_thrift_exceptions()
logger.info("Applied Thrift exceptions patch for Airflow compatibility.")
except ImportError:
logger.warning("Could not import thrift_exceptions_patch. Compatibility may be affected.")
except Exception as e:
logger.error(f"Error applying Thrift exceptions patch: {e}")
# Thrift imports
try:
from thrift.transport import TSocket, TTransport
from thrift.protocol import TBinaryProtocol
from pangramia.yt.tokens_ops import YTTokenOpService
from pangramia.yt.exceptions.ttypes import PBServiceException, PBUserException
except ImportError as e:
logger.critical(f"Could not import Thrift modules: {e}. Ensure ytdlp-ops-auth package is installed.")
# Fail DAG parsing if thrift modules are not available
raise
def format_timestamp(ts_str: str) -> str:
"""Formats a string timestamp into a human-readable date string."""
if not ts_str:
return ""
try:
ts_float = float(ts_str)
if ts_float <= 0:
return ""
# Use datetime from the imported 'from datetime import datetime'
dt_obj = datetime.fromtimestamp(ts_float)
return dt_obj.strftime('%Y-%m-%d %H:%M:%S')
except (ValueError, TypeError):
return ts_str # Return original string if conversion fails
def get_thrift_client(host: str, port: int):
"""Helper function to create and connect a Thrift client."""
transport = TSocket.TSocket(host, port)
transport = TTransport.TFramedTransport(transport)
protocol = TBinaryProtocol.TBinaryProtocol(transport)
client = YTTokenOpService.Client(protocol)
transport.open()
logger.info(f"Connected to Thrift server at {host}:{port}")
return client, transport
def manage_proxies_callable(**context):
"""Main callable to interact with the proxy management endpoints."""
params = context["params"]
action = params["action"]
host = params["host"]
port = params["port"]
server_identity = params.get("server_identity")
proxy_url = params.get("proxy_url")
if not server_identity and action in ["ban", "unban", "reset_all"]:
raise ValueError(f"A 'server_identity' is required for the '{action}' action.")
client, transport = None, None
try:
client, transport = get_thrift_client(host, port)
if action == "list":
logger.info(f"Listing proxy statuses for server: {server_identity or 'ALL'}")
statuses = client.getProxyStatus(server_identity)
if not statuses:
logger.info("No proxy statuses found.")
print("No proxy statuses found.")
else:
from tabulate import tabulate
status_list = [
{
"Server": s.serverIdentity,
"Proxy URL": s.proxyUrl,
"Status": s.status,
"Success": s.successCount,
"Failures": s.failureCount,
"Last Success": format_timestamp(s.lastSuccessTimestamp),
"Last Failure": format_timestamp(s.lastFailureTimestamp),
}
for s in statuses
]
print("\n--- Proxy Statuses ---")
print(tabulate(status_list, headers="keys", tablefmt="grid"))
print("----------------------\n")
elif action == "ban":
if not proxy_url:
raise ValueError("A 'proxy_url' is required to ban a proxy.")
logger.info(f"Banning proxy '{proxy_url}' for server '{server_identity}'...")
success = client.banProxy(proxy_url, server_identity)
if success:
logger.info("Successfully banned proxy.")
print(f"Successfully banned proxy '{proxy_url}' for server '{server_identity}'.")
else:
logger.error("Failed to ban proxy.")
raise Exception("Server returned failure for banProxy operation.")
elif action == "unban":
if not proxy_url:
raise ValueError("A 'proxy_url' is required to unban a proxy.")
logger.info(f"Unbanning proxy '{proxy_url}' for server '{server_identity}'...")
success = client.unbanProxy(proxy_url, server_identity)
if success:
logger.info("Successfully unbanned proxy.")
print(f"Successfully unbanned proxy '{proxy_url}' for server '{server_identity}'.")
else:
logger.error("Failed to unban proxy.")
raise Exception("Server returned failure for unbanProxy operation.")
elif action == "reset_all":
logger.info(f"Resetting all proxy statuses for server '{server_identity}'...")
success = client.resetAllProxyStatuses(server_identity)
if success:
logger.info("Successfully reset all proxy statuses.")
print(f"Successfully reset all proxy statuses for server '{server_identity}'.")
else:
logger.error("Failed to reset all proxy statuses.")
raise Exception("Server returned failure for resetAllProxyStatuses operation.")
else:
raise ValueError(f"Invalid action: {action}")
except (PBServiceException, PBUserException) as e:
logger.error(f"Thrift error performing action '{action}': {e.message}", exc_info=True)
raise
except Exception as e:
logger.error(f"Error performing action '{action}': {e}", exc_info=True)
raise
finally:
if transport and transport.isOpen():
transport.close()
logger.info("Thrift connection closed.")
with DAG(
dag_id="ytdlp_mgmt_proxy",
start_date=days_ago(1),
schedule=None,
catchup=False,
tags=["ytdlp", "utility", "proxy"],
doc_md="""
### YT-DLP Proxy Manager DAG
This DAG provides tools to manage the state of proxies used by the `ytdlp-ops-server`.
You can view statuses, and manually ban, unban, or reset proxies for a specific server instance.
**Parameters:**
- `host`: The hostname or IP of the `ytdlp-ops-server` Thrift service.
- `port`: The port of the Thrift service.
- `action`: The operation to perform.
- `list`: List proxy statuses. Provide a `server_identity` to query a specific server, or leave it blank to query the server instance you are connected to.
- `ban`: Ban a specific proxy. Requires `server_identity` and `proxy_url`.
- `unban`: Un-ban a specific proxy. Requires `server_identity` and `proxy_url`.
- `reset_all`: Reset all proxies for a server to `ACTIVE`. Requires `server_identity`.
- `server_identity`: The unique identifier for the server instance (e.g., `ytdlp-ops-airflow-service`).
- `proxy_url`: The full URL of the proxy to act upon (e.g., `socks5://host:port`).
""",
params={
"host": Param("89.253.221.173", type="string", description="The hostname of the ytdlp-ops-server service."),
"port": Param(9090, type="integer", description="The port of the ytdlp-ops-server service."),
"action": Param(
"list",
type="string",
enum=["list", "ban", "unban", "reset_all"],
description="The management action to perform.",
),
"server_identity": Param(
"ytdlp-ops-airflow-service",
type=["null", "string"],
description="The identity of the server to manage. Leave blank to query the connected server instance.",
),
"proxy_url": Param(
None,
type=["null", "string"],
description="The proxy URL to ban/unban (e.g., 'socks5://host:port').",
),
},
) as dag:
proxy_management_task = PythonOperator(
task_id="proxy_management_task",
python_callable=manage_proxies_callable,
)