198 lines
8.3 KiB
Python
198 lines
8.3 KiB
Python
"""
|
|
DAG to manage the state of proxies used by the ytdlp-ops-server.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
from datetime import datetime
|
|
|
|
from airflow.models.dag import DAG
|
|
from airflow.models.param import Param
|
|
from airflow.operators.python import PythonOperator
|
|
from airflow.utils.dates import days_ago
|
|
|
|
# Configure logging
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Import and apply Thrift exceptions patch for Airflow compatibility
|
|
try:
|
|
from thrift_exceptions_patch import patch_thrift_exceptions
|
|
patch_thrift_exceptions()
|
|
logger.info("Applied Thrift exceptions patch for Airflow compatibility.")
|
|
except ImportError:
|
|
logger.warning("Could not import thrift_exceptions_patch. Compatibility may be affected.")
|
|
except Exception as e:
|
|
logger.error(f"Error applying Thrift exceptions patch: {e}")
|
|
|
|
# Thrift imports
|
|
try:
|
|
from thrift.transport import TSocket, TTransport
|
|
from thrift.protocol import TBinaryProtocol
|
|
from pangramia.yt.tokens_ops import YTTokenOpService
|
|
from pangramia.yt.exceptions.ttypes import PBServiceException, PBUserException
|
|
except ImportError as e:
|
|
logger.critical(f"Could not import Thrift modules: {e}. Ensure ytdlp-ops-auth package is installed.")
|
|
# Fail DAG parsing if thrift modules are not available
|
|
raise
|
|
|
|
def format_timestamp(ts_str: str) -> str:
|
|
"""Formats a string timestamp into a human-readable date string."""
|
|
if not ts_str:
|
|
return ""
|
|
try:
|
|
ts_float = float(ts_str)
|
|
if ts_float <= 0:
|
|
return ""
|
|
# Use datetime from the imported 'from datetime import datetime'
|
|
dt_obj = datetime.fromtimestamp(ts_float)
|
|
return dt_obj.strftime('%Y-%m-%d %H:%M:%S')
|
|
except (ValueError, TypeError):
|
|
return ts_str # Return original string if conversion fails
|
|
|
|
def get_thrift_client(host: str, port: int):
|
|
"""Helper function to create and connect a Thrift client."""
|
|
transport = TSocket.TSocket(host, port)
|
|
transport = TTransport.TFramedTransport(transport)
|
|
protocol = TBinaryProtocol.TBinaryProtocol(transport)
|
|
client = YTTokenOpService.Client(protocol)
|
|
transport.open()
|
|
logger.info(f"Connected to Thrift server at {host}:{port}")
|
|
return client, transport
|
|
|
|
def manage_proxies_callable(**context):
|
|
"""Main callable to interact with the proxy management endpoints."""
|
|
params = context["params"]
|
|
action = params["action"]
|
|
host = params["host"]
|
|
port = params["port"]
|
|
server_identity = params.get("server_identity")
|
|
proxy_url = params.get("proxy_url")
|
|
|
|
if not server_identity and action in ["ban", "unban", "reset_all"]:
|
|
raise ValueError(f"A 'server_identity' is required for the '{action}' action.")
|
|
|
|
client, transport = None, None
|
|
try:
|
|
client, transport = get_thrift_client(host, port)
|
|
|
|
if action == "list":
|
|
logger.info(f"Listing proxy statuses for server: {server_identity or 'ALL'}")
|
|
statuses = client.getProxyStatus(server_identity)
|
|
if not statuses:
|
|
logger.info("No proxy statuses found.")
|
|
print("No proxy statuses found.")
|
|
else:
|
|
from tabulate import tabulate
|
|
status_list = [
|
|
{
|
|
"Server": s.serverIdentity,
|
|
"Proxy URL": s.proxyUrl,
|
|
"Status": s.status,
|
|
"Success": s.successCount,
|
|
"Failures": s.failureCount,
|
|
"Last Success": format_timestamp(s.lastSuccessTimestamp),
|
|
"Last Failure": format_timestamp(s.lastFailureTimestamp),
|
|
}
|
|
for s in statuses
|
|
]
|
|
print("\n--- Proxy Statuses ---")
|
|
print(tabulate(status_list, headers="keys", tablefmt="grid"))
|
|
print("----------------------\n")
|
|
|
|
elif action == "ban":
|
|
if not proxy_url:
|
|
raise ValueError("A 'proxy_url' is required to ban a proxy.")
|
|
logger.info(f"Banning proxy '{proxy_url}' for server '{server_identity}'...")
|
|
success = client.banProxy(proxy_url, server_identity)
|
|
if success:
|
|
logger.info("Successfully banned proxy.")
|
|
print(f"Successfully banned proxy '{proxy_url}' for server '{server_identity}'.")
|
|
else:
|
|
logger.error("Failed to ban proxy.")
|
|
raise Exception("Server returned failure for banProxy operation.")
|
|
|
|
elif action == "unban":
|
|
if not proxy_url:
|
|
raise ValueError("A 'proxy_url' is required to unban a proxy.")
|
|
logger.info(f"Unbanning proxy '{proxy_url}' for server '{server_identity}'...")
|
|
success = client.unbanProxy(proxy_url, server_identity)
|
|
if success:
|
|
logger.info("Successfully unbanned proxy.")
|
|
print(f"Successfully unbanned proxy '{proxy_url}' for server '{server_identity}'.")
|
|
else:
|
|
logger.error("Failed to unban proxy.")
|
|
raise Exception("Server returned failure for unbanProxy operation.")
|
|
|
|
elif action == "reset_all":
|
|
logger.info(f"Resetting all proxy statuses for server '{server_identity}'...")
|
|
success = client.resetAllProxyStatuses(server_identity)
|
|
if success:
|
|
logger.info("Successfully reset all proxy statuses.")
|
|
print(f"Successfully reset all proxy statuses for server '{server_identity}'.")
|
|
else:
|
|
logger.error("Failed to reset all proxy statuses.")
|
|
raise Exception("Server returned failure for resetAllProxyStatuses operation.")
|
|
|
|
else:
|
|
raise ValueError(f"Invalid action: {action}")
|
|
|
|
except (PBServiceException, PBUserException) as e:
|
|
logger.error(f"Thrift error performing action '{action}': {e.message}", exc_info=True)
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error performing action '{action}': {e}", exc_info=True)
|
|
raise
|
|
finally:
|
|
if transport and transport.isOpen():
|
|
transport.close()
|
|
logger.info("Thrift connection closed.")
|
|
|
|
with DAG(
|
|
dag_id="ytdlp_mgmt_proxy",
|
|
start_date=days_ago(1),
|
|
schedule=None,
|
|
catchup=False,
|
|
tags=["ytdlp", "utility", "proxy"],
|
|
doc_md="""
|
|
### YT-DLP Proxy Manager DAG
|
|
|
|
This DAG provides tools to manage the state of proxies used by the `ytdlp-ops-server`.
|
|
You can view statuses, and manually ban, unban, or reset proxies for a specific server instance.
|
|
|
|
**Parameters:**
|
|
- `host`: The hostname or IP of the `ytdlp-ops-server` Thrift service.
|
|
- `port`: The port of the Thrift service.
|
|
- `action`: The operation to perform.
|
|
- `list`: List proxy statuses. Provide a `server_identity` to query a specific server, or leave it blank to query the server instance you are connected to.
|
|
- `ban`: Ban a specific proxy. Requires `server_identity` and `proxy_url`.
|
|
- `unban`: Un-ban a specific proxy. Requires `server_identity` and `proxy_url`.
|
|
- `reset_all`: Reset all proxies for a server to `ACTIVE`. Requires `server_identity`.
|
|
- `server_identity`: The unique identifier for the server instance (e.g., `ytdlp-ops-airflow-service`).
|
|
- `proxy_url`: The full URL of the proxy to act upon (e.g., `socks5://host:port`).
|
|
""",
|
|
params={
|
|
"host": Param("89.253.221.173", type="string", description="The hostname of the ytdlp-ops-server service."),
|
|
"port": Param(9090, type="integer", description="The port of the ytdlp-ops-server service."),
|
|
"action": Param(
|
|
"list",
|
|
type="string",
|
|
enum=["list", "ban", "unban", "reset_all"],
|
|
description="The management action to perform.",
|
|
),
|
|
"server_identity": Param(
|
|
"ytdlp-ops-airflow-service",
|
|
type=["null", "string"],
|
|
description="The identity of the server to manage. Leave blank to query the connected server instance.",
|
|
),
|
|
"proxy_url": Param(
|
|
None,
|
|
type=["null", "string"],
|
|
description="The proxy URL to ban/unban (e.g., 'socks5://host:port').",
|
|
),
|
|
},
|
|
) as dag:
|
|
proxy_management_task = PythonOperator(
|
|
task_id="proxy_management_task",
|
|
python_callable=manage_proxies_callable,
|
|
)
|