891 lines
48 KiB
Python
891 lines
48 KiB
Python
"""
|
|
DAG to manage the state of proxies and accounts used by the ytdlp-ops-server.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import json
|
|
import re
|
|
import time
|
|
from datetime import datetime
|
|
import socket
|
|
|
|
from airflow.exceptions import AirflowException
|
|
from airflow.models.dag import DAG
|
|
from airflow.models.dagbag import DagBag
|
|
from airflow.models.dagrun import DagRun
|
|
from airflow.models.param import Param
|
|
from airflow.models.taskinstance import TaskInstance
|
|
from airflow.operators.python import PythonOperator
|
|
from airflow.utils.dates import days_ago
|
|
from airflow.models.variable import Variable
|
|
from airflow.providers.redis.hooks.redis import RedisHook
|
|
from airflow.utils.session import create_session
|
|
|
|
# Configure logging
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Import and apply Thrift exceptions patch for Airflow compatibility
|
|
try:
|
|
from thrift_exceptions_patch import patch_thrift_exceptions
|
|
patch_thrift_exceptions()
|
|
logger.info("Applied Thrift exceptions patch for Airflow compatibility.")
|
|
except ImportError:
|
|
logger.warning("Could not import thrift_exceptions_patch. Compatibility may be affected.")
|
|
except Exception as e:
|
|
logger.error(f"Error applying Thrift exceptions patch: {e}")
|
|
|
|
# Thrift imports
|
|
try:
|
|
from pangramia.yt.exceptions.ttypes import PBServiceException, PBUserException
|
|
from yt_ops_services.client_utils import get_thrift_client, format_timestamp
|
|
except ImportError as e:
|
|
logger.critical(f"Could not import Thrift modules: {e}. Ensure yt_ops_services package is installed correctly.")
|
|
# Fail DAG parsing if thrift modules are not available
|
|
raise
|
|
|
|
DEFAULT_MANAGEMENT_SERVICE_IP = Variable.get("MANAGEMENT_SERVICE_HOST", default_var="172.17.0.1")
|
|
DEFAULT_MANAGEMENT_SERVICE_PORT = Variable.get("MANAGEMENT_SERVICE_PORT", default_var=9080)
|
|
DEFAULT_REDIS_CONN_ID = "redis_default"
|
|
|
|
# Version tracking for debugging
|
|
DAG_VERSION = "1.7.1" # Updated to handle Redis configuration errors
|
|
|
|
|
|
# Helper function to connect to Redis, similar to other DAGs
|
|
def _get_redis_client(redis_conn_id: str):
|
|
"""Gets a Redis client from an Airflow connection."""
|
|
try:
|
|
# Use the imported RedisHook
|
|
redis_hook = RedisHook(redis_conn_id=redis_conn_id)
|
|
# get_conn returns a redis.Redis client
|
|
return redis_hook.get_conn()
|
|
except Exception as e:
|
|
logger.error(f"Failed to connect to Redis using connection '{redis_conn_id}': {e}")
|
|
# Use the imported AirflowException
|
|
raise AirflowException(f"Redis connection failed: {e}")
|
|
|
|
|
|
|
|
def _list_proxy_statuses(client, server_identity):
|
|
"""Lists the status of proxies."""
|
|
logger.info(f"Listing proxy statuses for server: {server_identity or 'ALL'}")
|
|
logger.info("NOTE: Proxy statuses are read from server's internal state via Thrift service")
|
|
try:
|
|
statuses = client.getProxyStatus(server_identity)
|
|
except PBServiceException as e:
|
|
if "Redis is not configured for this server" in e.message:
|
|
logger.error(f"Redis not configured on server: {e.message}")
|
|
print(f"\nERROR: Server configuration issue - {e.message}\n")
|
|
print("This server does not have Redis configured for proxy management.\n")
|
|
return
|
|
else:
|
|
# Re-raise if it's a different PBServiceException
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Unexpected error getting proxy statuses: {e}", exc_info=True)
|
|
print(f"\nERROR: Unexpected error getting proxy statuses: {e}\n")
|
|
return
|
|
|
|
if not statuses:
|
|
logger.info("No proxy statuses found.")
|
|
return
|
|
|
|
from tabulate import tabulate
|
|
status_list = []
|
|
# This is forward-compatible: it checks for new attributes before using them.
|
|
has_extended_info = hasattr(statuses[0], 'recentAccounts') or hasattr(statuses[0], 'recentMachines')
|
|
|
|
headers = ["Server", "Proxy URL", "Status", "Success", "Failures", "Last Success", "Last Failure"]
|
|
if has_extended_info:
|
|
headers.extend(["Recent Accounts", "Recent Machines"])
|
|
|
|
for s in statuses:
|
|
status_item = {
|
|
"Server": s.serverIdentity,
|
|
"Proxy URL": s.proxyUrl,
|
|
"Status": s.status,
|
|
"Success": s.successCount,
|
|
"Failures": s.failureCount,
|
|
"Last Success": format_timestamp(s.lastSuccessTimestamp),
|
|
"Last Failure": format_timestamp(s.lastFailureTimestamp),
|
|
}
|
|
if has_extended_info:
|
|
recent_accounts = getattr(s, 'recentAccounts', [])
|
|
recent_machines = getattr(s, 'recentMachines', [])
|
|
status_item["Recent Accounts"] = "\n".join(recent_accounts) if recent_accounts else "N/A"
|
|
status_item["Recent Machines"] = "\n".join(recent_machines) if recent_machines else "N/A"
|
|
status_list.append(status_item)
|
|
|
|
print("\n--- Proxy Statuses ---")
|
|
# The f-string with a newline ensures the table starts on a new line in the logs.
|
|
print(f"\n{tabulate(status_list, headers='keys', tablefmt='grid')}")
|
|
print("----------------------\n")
|
|
if not has_extended_info:
|
|
logger.warning("Server does not seem to support 'recentAccounts' or 'recentMachines' fields yet.")
|
|
print("NOTE: To see Recent Accounts/Machines, the server's `getProxyStatus` method must be updated to return these fields.")
|
|
|
|
|
|
def _list_account_statuses(client, account_id, redis_conn_id):
|
|
"""Lists the status of accounts, enriching with live data from Redis."""
|
|
logger.info(f"Listing account statuses for account: {account_id or 'ALL'}")
|
|
logger.info("NOTE: Account statuses are read from the Thrift service and enriched with live data from Redis.")
|
|
|
|
redis_client = None
|
|
try:
|
|
redis_client = _get_redis_client(redis_conn_id)
|
|
logger.info("Successfully connected to Redis to fetch detailed account status.")
|
|
except Exception as e:
|
|
logger.warning(f"Could not connect to Redis to get detailed status. Will show basic status. Error: {e}")
|
|
redis_client = None
|
|
|
|
try:
|
|
# The thrift method takes accountId (specific) or accountPrefix.
|
|
# If account_id is provided, we use it. If not, we get all by leaving both params as None.
|
|
statuses = client.getAccountStatus(accountId=account_id, accountPrefix=None)
|
|
if not statuses:
|
|
print("\n--- Account Statuses ---\nNo account statuses found.\n------------------------\n")
|
|
return
|
|
|
|
from tabulate import tabulate
|
|
status_list = []
|
|
|
|
for s in statuses:
|
|
status_str = s.status
|
|
# If an account is resting, get the live countdown from Redis for accuracy.
|
|
if redis_client and 'RESTING' in status_str:
|
|
try:
|
|
status_key = f"account_status:{s.accountId}"
|
|
# The server stores resting expiry time in 'resting_until'.
|
|
expiry_ts_bytes = redis_client.hget(status_key, "resting_until")
|
|
if expiry_ts_bytes:
|
|
expiry_ts = float(expiry_ts_bytes)
|
|
now = datetime.now().timestamp()
|
|
if now >= expiry_ts:
|
|
status_str = "ACTIVE (was RESTING)"
|
|
else:
|
|
remaining_seconds = int(expiry_ts - now)
|
|
if remaining_seconds > 3600:
|
|
status_str = f"RESTING (active in {remaining_seconds // 3600}h {remaining_seconds % 3600 // 60}m)"
|
|
elif remaining_seconds > 60:
|
|
status_str = f"RESTING (active in {remaining_seconds // 60}m {remaining_seconds % 60}s)"
|
|
else:
|
|
status_str = f"RESTING (active in {remaining_seconds}s)"
|
|
except Exception as e:
|
|
logger.warning(f"Could not parse resting time for {s.accountId} from Redis: {e}. Using server status.")
|
|
|
|
# Determine the last activity timestamp for sorting
|
|
last_success = float(s.lastSuccessTimestamp) if s.lastSuccessTimestamp else 0
|
|
last_failure = float(s.lastFailureTimestamp) if s.lastFailureTimestamp else 0
|
|
last_activity = max(last_success, last_failure)
|
|
|
|
status_item = {
|
|
"Account ID": s.accountId,
|
|
"Status": status_str,
|
|
"Success": s.successCount,
|
|
"Failures": s.failureCount,
|
|
"Last Success": format_timestamp(s.lastSuccessTimestamp),
|
|
"Last Failure": format_timestamp(s.lastFailureTimestamp),
|
|
"Last Proxy": s.lastUsedProxy or "N/A",
|
|
"Last Machine": s.lastUsedMachine or "N/A",
|
|
"_last_activity": last_activity, # Add a temporary key for sorting
|
|
}
|
|
status_list.append(status_item)
|
|
|
|
# Sort the list by the last activity timestamp in descending order
|
|
status_list.sort(key=lambda item: item.get('_last_activity', 0), reverse=True)
|
|
|
|
# Remove the temporary sort key before printing
|
|
for item in status_list:
|
|
del item['_last_activity']
|
|
|
|
print("\n--- Account Statuses ---")
|
|
# The f-string with a newline ensures the table starts on a new line in the logs.
|
|
print(f"\n{tabulate(status_list, headers='keys', tablefmt='grid')}")
|
|
print("------------------------\n")
|
|
except (PBServiceException, PBUserException) as e:
|
|
logger.error(f"Failed to get account statuses: {e.message}", exc_info=True)
|
|
print(f"\nERROR: Could not retrieve account statuses. Server returned: {e.message}\n")
|
|
except Exception as e:
|
|
logger.error(f"An unexpected error occurred while getting account statuses: {e}", exc_info=True)
|
|
print(f"\nERROR: An unexpected error occurred: {e}\n")
|
|
|
|
|
|
def _list_client_statuses(redis_conn_id):
|
|
"""Lists the status of different client types from Redis."""
|
|
logger.info("Listing client statuses from Redis key 'client_stats'")
|
|
|
|
try:
|
|
redis_client = _get_redis_client(redis_conn_id)
|
|
stats_key = "client_stats"
|
|
all_stats_raw = redis_client.hgetall(stats_key)
|
|
|
|
if not all_stats_raw:
|
|
print("\n--- Client Statuses ---\nNo client stats found in Redis.\n-----------------------\n")
|
|
return
|
|
|
|
from tabulate import tabulate
|
|
status_list = []
|
|
|
|
for client_bytes, stats_json_bytes in all_stats_raw.items():
|
|
client_name = client_bytes.decode('utf-8')
|
|
try:
|
|
stats = json.loads(stats_json_bytes.decode('utf-8'))
|
|
|
|
def format_latest(data):
|
|
if not data: return "N/A"
|
|
ts = format_timestamp(data.get('timestamp'))
|
|
url = data.get('url') or 'N/A'
|
|
machine = data.get('machine_id', 'N/A')
|
|
video_id_match = re.search(r'v=([a-zA-Z0-9_-]{11})', url)
|
|
video_id = video_id_match.group(1) if video_id_match else 'N/A'
|
|
return f"{ts}\nMachine: {machine}\nVideo ID: {video_id}"
|
|
|
|
status_item = {
|
|
"Client": client_name,
|
|
"Success": stats.get('success_count', 0),
|
|
"Failures": stats.get('failure_count', 0),
|
|
"Last Success": format_latest(stats.get('latest_success')),
|
|
"Last Failure": format_latest(stats.get('latest_failure')),
|
|
}
|
|
status_list.append(status_item)
|
|
except (json.JSONDecodeError, AttributeError) as e:
|
|
logger.error(f"Could not parse stats for client '{client_name}': {e}")
|
|
status_list.append({
|
|
"Client": client_name, "Success": "ERROR", "Failures": "ERROR",
|
|
"Last Success": "Could not parse data", "Last Failure": "Could not parse data"
|
|
})
|
|
|
|
status_list.sort(key=lambda item: item.get('Client', ''))
|
|
|
|
print("\n--- Client Statuses ---")
|
|
print(f"\n{tabulate(status_list, headers='keys', tablefmt='grid')}")
|
|
print("-----------------------\n")
|
|
|
|
except Exception as e:
|
|
logger.error(f"An unexpected error occurred while getting client statuses: {e}", exc_info=True)
|
|
print(f"\nERROR: An unexpected error occurred: {e}\n")
|
|
|
|
|
|
def _list_activity_counters(redis_conn_id: str):
|
|
"""Lists current activity rates for proxies and accounts from Redis."""
|
|
logger.info("Listing activity counters from Redis keys 'activity:per_proxy:*' and 'activity:per_account:*'")
|
|
|
|
try:
|
|
redis_client = _get_redis_client(redis_conn_id)
|
|
from tabulate import tabulate
|
|
now = time.time()
|
|
|
|
def process_keys(pattern, entity_name):
|
|
keys = redis_client.scan_iter(pattern)
|
|
status_list = []
|
|
for key_bytes in keys:
|
|
key = key_bytes.decode('utf-8')
|
|
entity_id = key.split(':', 2)[-1]
|
|
|
|
# Clean up old entries before counting
|
|
redis_client.zremrangebyscore(key, '-inf', now - 3660) # Clean up > 1hr old
|
|
|
|
count_1m = redis_client.zcount(key, now - 60, now)
|
|
count_5m = redis_client.zcount(key, now - 300, now)
|
|
count_1h = redis_client.zcount(key, now - 3600, now)
|
|
|
|
if count_1h == 0: # Don't show entities with no recent activity
|
|
continue
|
|
|
|
status_list.append({
|
|
entity_name: entity_id,
|
|
"Activity (Last 1m)": count_1m,
|
|
"Activity (Last 5m)": count_5m,
|
|
"Activity (Last 1h)": count_1h,
|
|
})
|
|
|
|
status_list.sort(key=lambda item: item.get(entity_name, ''))
|
|
|
|
print(f"\n--- {entity_name} Activity Counters ---")
|
|
if not status_list:
|
|
print(f"No recent activity found for {entity_name.lower()}s.")
|
|
else:
|
|
print(f"\n{tabulate(status_list, headers='keys', tablefmt='grid')}")
|
|
print("-----------------------------------\n")
|
|
|
|
process_keys("activity:per_proxy:*", "Proxy URL")
|
|
process_keys("activity:per_account:*", "Account ID")
|
|
|
|
except Exception as e:
|
|
logger.error(f"An unexpected error occurred while getting activity counters: {e}", exc_info=True)
|
|
print(f"\nERROR: An unexpected error occurred: {e}\n")
|
|
|
|
|
|
def manage_system_callable(**context):
|
|
"""Main callable to interact with the system management endpoints."""
|
|
# Log version for debugging
|
|
logger.info(f"Running ytdlp_mgmt_proxy_account DAG version {DAG_VERSION}")
|
|
|
|
params = context["params"]
|
|
entity = params["entity"]
|
|
action = params["action"]
|
|
|
|
# For Thrift actions, use the new management host/port
|
|
if entity not in ["activity_counters"]:
|
|
host = params["management_host"]
|
|
port = params["management_port"]
|
|
else:
|
|
host, port = None, None # Not needed for meta actions
|
|
|
|
server_identity = params.get("server_identity")
|
|
proxy_url = params.get("proxy_url")
|
|
account_id = params.get("account_id")
|
|
|
|
# --- Validate Action/Entity Combination and Parameters ---
|
|
valid_actions = {
|
|
"proxy": ["list_with_status", "ban", "unban", "ban_all", "unban_all", "delete_from_redis"],
|
|
"account": ["list_with_status", "ban", "unban", "unban_all", "delete_from_redis"],
|
|
"client": ["list_with_status", "delete_from_redis"],
|
|
"accounts_and_proxies": ["list_with_status", "ban", "unban", "ban_all", "unban_all", "delete_from_redis"],
|
|
"activity_counters": ["list_with_status"],
|
|
}
|
|
|
|
if action not in valid_actions.get(entity, []):
|
|
raise ValueError(
|
|
f"The action '{action}' is not valid for entity '{entity}'.\n"
|
|
f"Valid actions for '{entity}' are: {', '.join(valid_actions.get(entity, ['None']))}."
|
|
)
|
|
|
|
# Validate required parameters for the chosen action
|
|
if entity == "proxy":
|
|
if action in ["ban", "unban"] and not server_identity:
|
|
raise ValueError(f"A 'server_identity' is required for proxy action '{action}'.")
|
|
if action in ["ban", "unban"] and not proxy_url:
|
|
raise ValueError(f"A 'proxy_url' is required for proxy action '{action}'.")
|
|
|
|
if entity == "account":
|
|
if action in ["ban", "unban"] and not account_id:
|
|
raise ValueError(f"An 'account_id' is required for account action '{action}'.")
|
|
|
|
|
|
# --- Handle Activity Counter action ---
|
|
if entity == "activity_counters":
|
|
if action == "list_with_status":
|
|
_list_activity_counters(params["redis_conn_id"])
|
|
return # End execution
|
|
else:
|
|
raise ValueError(f"Action '{action}' is not valid for entity 'activity_counters'. Only 'list_with_status' is supported.")
|
|
|
|
# Handle Thrift-based deletion actions
|
|
if action == "delete_from_redis":
|
|
client, transport = None, None
|
|
try:
|
|
client, transport = get_thrift_client(host, port)
|
|
|
|
if entity == "proxy":
|
|
proxy_url = params.get("proxy_url")
|
|
server_identity = params.get("server_identity")
|
|
|
|
if proxy_url and server_identity:
|
|
logger.info(f"Deleting proxy '{proxy_url}' for server '{server_identity}' from Redis via Thrift service...")
|
|
result = client.deleteProxyFromRedis(proxy_url, server_identity)
|
|
if result:
|
|
print(f"\nSuccessfully deleted proxy '{proxy_url}' for server '{server_identity}' from Redis.\n")
|
|
else:
|
|
print(f"\nFailed to delete proxy '{proxy_url}' for server '{server_identity}' from Redis.\n")
|
|
else:
|
|
logger.info("Deleting all proxies from Redis via Thrift service...")
|
|
# If server_identity is provided, delete all proxies for that server
|
|
# If server_identity is None, delete all proxies for ALL servers
|
|
result = client.deleteAllProxiesFromRedis(server_identity)
|
|
if server_identity:
|
|
print(f"\nSuccessfully deleted all proxies for server '{server_identity}' from Redis. Count: {result}\n")
|
|
else:
|
|
print(f"\nSuccessfully deleted all proxies from Redis across ALL servers. Count: {result}\n")
|
|
|
|
elif entity == "account":
|
|
account_id = params.get("account_id")
|
|
|
|
if account_id:
|
|
logger.info(f"Deleting account '{account_id}' from Redis via Thrift service...")
|
|
result = client.deleteAccountFromRedis(account_id)
|
|
if result:
|
|
print(f"\nSuccessfully deleted account '{account_id}' from Redis.\n")
|
|
else:
|
|
print(f"\nFailed to delete account '{account_id}' from Redis.\n")
|
|
else:
|
|
logger.info("Deleting all accounts from Redis via Thrift service...")
|
|
# If account_id is provided as prefix, delete all accounts with that prefix
|
|
# If account_id is None, delete all accounts
|
|
account_prefix = params.get("account_id")
|
|
result = client.deleteAllAccountsFromRedis(account_prefix)
|
|
if account_prefix:
|
|
print(f"\nSuccessfully deleted all accounts with prefix '{account_prefix}' from Redis. Count: {result}\n")
|
|
else:
|
|
print(f"\nSuccessfully deleted all accounts from Redis. Count: {result}\n")
|
|
|
|
elif entity == "accounts_and_proxies":
|
|
# Delete accounts
|
|
account_prefix = params.get("account_id") # Repurpose account_id param as an optional prefix
|
|
logger.info("Deleting accounts from Redis via Thrift service...")
|
|
account_result = client.deleteAllAccountsFromRedis(account_prefix)
|
|
if account_prefix:
|
|
print(f"\nSuccessfully deleted {account_result} account keys with prefix '{account_prefix}' from Redis.\n")
|
|
else:
|
|
print(f"\nSuccessfully deleted {account_result} account keys from Redis.\n")
|
|
|
|
# Delete proxies
|
|
server_identity = params.get("server_identity")
|
|
logger.info("Deleting proxies from Redis via Thrift service...")
|
|
proxy_result = client.deleteAllProxiesFromRedis(server_identity)
|
|
if server_identity:
|
|
print(f"\nSuccessfully deleted {proxy_result} proxy keys for server '{server_identity}' from Redis.\n")
|
|
else:
|
|
print(f"\nSuccessfully deleted {proxy_result} proxy keys from Redis across ALL servers.\n")
|
|
|
|
elif entity == "client":
|
|
logger.info("Deleting all client stats from Redis...")
|
|
redis_client = _get_redis_client(params["redis_conn_id"])
|
|
result = redis_client.delete("client_stats")
|
|
if result > 0:
|
|
print(f"\nSuccessfully deleted 'client_stats' key from Redis.\n")
|
|
else:
|
|
print(f"\nKey 'client_stats' not found in Redis. Nothing to delete.\n")
|
|
|
|
except (PBServiceException, PBUserException) as e:
|
|
logger.error(f"Thrift error performing delete action: {e.message}", exc_info=True)
|
|
print(f"\nERROR: Thrift service error: {e.message}\n")
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error performing delete action: {e}", exc_info=True)
|
|
print(f"\nERROR: An unexpected error occurred: {e}\n")
|
|
raise
|
|
finally:
|
|
if transport and transport.isOpen():
|
|
transport.close()
|
|
logger.info("Thrift connection closed.")
|
|
return # End execution for this action
|
|
|
|
client, transport = None, None
|
|
try:
|
|
client, transport = get_thrift_client(host, port)
|
|
|
|
if entity == "client":
|
|
if action == "list_with_status":
|
|
_list_client_statuses(params["redis_conn_id"])
|
|
elif entity == "proxy":
|
|
if action == "list_with_status":
|
|
_list_proxy_statuses(client, server_identity)
|
|
elif action == "ban":
|
|
if not proxy_url: raise ValueError("A 'proxy_url' is required.")
|
|
logger.info(f"Banning proxy '{proxy_url}' for server '{server_identity}'...")
|
|
client.banProxy(proxy_url, server_identity)
|
|
print(f"Successfully sent request to ban proxy '{proxy_url}'.")
|
|
elif action == "unban":
|
|
if not proxy_url: raise ValueError("A 'proxy_url' is required.")
|
|
logger.info(f"Unbanning proxy '{proxy_url}' for server '{server_identity}'...")
|
|
client.unbanProxy(proxy_url, server_identity)
|
|
print(f"Successfully sent request to unban proxy '{proxy_url}'.")
|
|
elif action == "ban_all":
|
|
if server_identity:
|
|
logger.info(f"Banning all proxies for server '{server_identity}'...")
|
|
client.banAllProxies(server_identity)
|
|
print(f"Successfully sent request to ban all proxies for '{server_identity}'.")
|
|
else:
|
|
logger.info("No server_identity provided. Banning all proxies for ALL servers...")
|
|
all_statuses = client.getProxyStatus(None)
|
|
if not all_statuses:
|
|
print("\nNo proxy statuses found for any server. Nothing to ban.\n")
|
|
return
|
|
|
|
all_server_identities = sorted(list(set(s.serverIdentity for s in all_statuses)))
|
|
logger.info(f"Found {len(all_server_identities)} server identities: {all_server_identities}")
|
|
print(f"Found {len(all_server_identities)} server identities. Sending ban request for each...")
|
|
|
|
success_count = 0
|
|
fail_count = 0
|
|
for identity in all_server_identities:
|
|
try:
|
|
client.banAllProxies(identity)
|
|
logger.info(f" - Sent ban_all for '{identity}'.")
|
|
success_count += 1
|
|
except Exception as e:
|
|
logger.error(f" - Failed to ban all proxies for '{identity}': {e}")
|
|
fail_count += 1
|
|
|
|
print(f"\nSuccessfully sent ban_all requests for {success_count} server identities.")
|
|
if fail_count > 0:
|
|
print(f"Failed to send ban_all requests for {fail_count} server identities. See logs for details.")
|
|
elif action == "unban_all":
|
|
if server_identity:
|
|
logger.info(f"Unbanning all proxy statuses for server '{server_identity}'...")
|
|
client.resetAllProxyStatuses(server_identity)
|
|
print(f"Successfully sent request to unban all proxy statuses for '{server_identity}'.")
|
|
else:
|
|
logger.info("No server_identity provided. Unbanning all proxies for ALL servers...")
|
|
all_statuses = client.getProxyStatus(None)
|
|
if not all_statuses:
|
|
print("\nNo proxy statuses found for any server. Nothing to unban.\n")
|
|
return
|
|
|
|
all_server_identities = sorted(list(set(s.serverIdentity for s in all_statuses)))
|
|
logger.info(f"Found {len(all_server_identities)} server identities: {all_server_identities}")
|
|
print(f"Found {len(all_server_identities)} server identities. Sending unban request for each...")
|
|
|
|
success_count = 0
|
|
fail_count = 0
|
|
for identity in all_server_identities:
|
|
try:
|
|
client.resetAllProxyStatuses(identity)
|
|
logger.info(f" - Sent unban_all for '{identity}'.")
|
|
success_count += 1
|
|
except Exception as e:
|
|
logger.error(f" - Failed to unban all proxies for '{identity}': {e}")
|
|
fail_count += 1
|
|
|
|
print(f"\nSuccessfully sent unban_all requests for {success_count} server identities.")
|
|
if fail_count > 0:
|
|
print(f"Failed to send unban_all requests for {fail_count} server identities. See logs for details.")
|
|
|
|
elif entity == "account":
|
|
if action == "list_with_status":
|
|
_list_account_statuses(client, account_id, params["redis_conn_id"])
|
|
elif action == "ban":
|
|
if not account_id: raise ValueError("An 'account_id' is required.")
|
|
reason = f"Manual ban from Airflow mgmt DAG by {socket.gethostname()}"
|
|
logger.info(f"Banning account '{account_id}'...")
|
|
client.banAccount(accountId=account_id, reason=reason)
|
|
print(f"Successfully sent request to ban account '{account_id}'.")
|
|
elif action == "unban":
|
|
if not account_id: raise ValueError("An 'account_id' is required.")
|
|
reason = f"Manual un-ban from Airflow mgmt DAG by {socket.gethostname()}"
|
|
logger.info(f"Unbanning account '{account_id}'...")
|
|
|
|
# Fetch status to get current success count before unbanning
|
|
statuses = client.getAccountStatus(accountId=account_id, accountPrefix=None)
|
|
if not statuses:
|
|
raise AirflowException(f"Account '{account_id}' not found.")
|
|
current_success_count = statuses[0].successCount or 0
|
|
|
|
client.unbanAccount(accountId=account_id, reason=reason)
|
|
print(f"Successfully sent request to unban account '{account_id}'.")
|
|
|
|
# Set the success_count_at_activation to baseline the account
|
|
redis_client = _get_redis_client(params["redis_conn_id"])
|
|
redis_client.hset(f"account_status:{account_id}", "success_count_at_activation", current_success_count)
|
|
logger.info(f"Set 'success_count_at_activation' for '{account_id}' to {current_success_count}.")
|
|
elif action == "unban_all":
|
|
account_prefix = account_id # Repurpose account_id param as an optional prefix
|
|
logger.info(f"Unbanning all account statuses to ACTIVE (prefix: '{account_prefix or 'ALL'}')...")
|
|
|
|
all_statuses = client.getAccountStatus(accountId=None, accountPrefix=account_prefix)
|
|
if not all_statuses:
|
|
print(f"No accounts found with prefix '{account_prefix or 'ALL'}' to unban.")
|
|
return
|
|
|
|
accounts_to_unban = [s.accountId for s in all_statuses]
|
|
account_map = {s.accountId: s for s in all_statuses}
|
|
redis_client = _get_redis_client(params["redis_conn_id"])
|
|
|
|
logger.info(f"Found {len(accounts_to_unban)} accounts to unban.")
|
|
print(f"Found {len(accounts_to_unban)} accounts. Sending unban request for each...")
|
|
|
|
unban_count = 0
|
|
fail_count = 0
|
|
for acc_id in accounts_to_unban:
|
|
try:
|
|
reason = f"Manual unban_all from Airflow mgmt DAG by {socket.gethostname()}"
|
|
client.unbanAccount(accountId=acc_id, reason=reason)
|
|
logger.info(f" - Sent unban for '{acc_id}'.")
|
|
|
|
# Also set the success_count_at_activation to baseline the account
|
|
current_success_count = account_map[acc_id].successCount or 0
|
|
redis_client.hset(f"account_status:{acc_id}", "success_count_at_activation", current_success_count)
|
|
logger.info(f" - Set 'success_count_at_activation' for '{acc_id}' to {current_success_count}.")
|
|
|
|
unban_count += 1
|
|
except Exception as e:
|
|
logger.error(f" - Failed to unban account '{acc_id}': {e}")
|
|
fail_count += 1
|
|
|
|
print(f"\nSuccessfully sent unban requests for {unban_count} accounts.")
|
|
if fail_count > 0:
|
|
print(f"Failed to send unban requests for {fail_count} accounts. See logs for details.")
|
|
|
|
# Optionally, list statuses again to confirm
|
|
print("\n--- Listing statuses after unban_all ---")
|
|
_list_account_statuses(client, account_prefix, params["redis_conn_id"])
|
|
|
|
elif entity == "accounts_and_proxies":
|
|
if action == "list_with_status":
|
|
print("\n--- Listing statuses for Proxies, Accounts, and Clients ---")
|
|
_list_proxy_statuses(client, server_identity)
|
|
_list_account_statuses(client, account_id, params["redis_conn_id"])
|
|
_list_client_statuses(params["redis_conn_id"])
|
|
return # End execution for list_with_status
|
|
|
|
print(f"\n--- Performing action '{action}' on BOTH Proxies and Accounts ---")
|
|
|
|
# --- Proxy Action ---
|
|
try:
|
|
print("\n-- Running Proxy Action --")
|
|
if action == "list_with_status":
|
|
_list_proxy_statuses(client, server_identity)
|
|
elif action == "ban":
|
|
if not proxy_url: raise ValueError("A 'proxy_url' is required.")
|
|
logger.info(f"Banning proxy '{proxy_url}' for server '{server_identity}'...")
|
|
client.banProxy(proxy_url, server_identity)
|
|
print(f"Successfully sent request to ban proxy '{proxy_url}'.")
|
|
elif action == "unban":
|
|
if not proxy_url: raise ValueError("A 'proxy_url' is required.")
|
|
logger.info(f"Unbanning proxy '{proxy_url}' for server '{server_identity}'...")
|
|
client.unbanProxy(proxy_url, server_identity)
|
|
print(f"Successfully sent request to unban proxy '{proxy_url}'.")
|
|
elif action == "ban_all":
|
|
if server_identity:
|
|
logger.info(f"Banning all proxies for server '{server_identity}'...")
|
|
client.banAllProxies(server_identity)
|
|
print(f"Successfully sent request to ban all proxies for '{server_identity}'.")
|
|
else:
|
|
logger.info("No server_identity provided. Banning all proxies for ALL servers...")
|
|
all_statuses = client.getProxyStatus(None)
|
|
if not all_statuses:
|
|
print("\nNo proxy statuses found for any server. Nothing to ban.\n")
|
|
else:
|
|
all_server_identities = sorted(list(set(s.serverIdentity for s in all_statuses)))
|
|
logger.info(f"Found {len(all_server_identities)} server identities: {all_server_identities}")
|
|
print(f"Found {len(all_server_identities)} server identities. Sending ban request for each...")
|
|
|
|
success_count = 0
|
|
fail_count = 0
|
|
for identity in all_server_identities:
|
|
try:
|
|
client.banAllProxies(identity)
|
|
logger.info(f" - Sent ban_all for '{identity}'.")
|
|
success_count += 1
|
|
except Exception as e:
|
|
logger.error(f" - Failed to ban all proxies for '{identity}': {e}")
|
|
fail_count += 1
|
|
|
|
print(f"\nSuccessfully sent ban_all requests for {success_count} server identities.")
|
|
if fail_count > 0:
|
|
print(f"Failed to send ban_all requests for {fail_count} server identities. See logs for details.")
|
|
elif action == "unban_all":
|
|
if server_identity:
|
|
logger.info(f"Unbanning all proxy statuses for server '{server_identity}'...")
|
|
client.resetAllProxyStatuses(server_identity)
|
|
print(f"Successfully sent request to unban all proxy statuses for '{server_identity}'.")
|
|
else:
|
|
logger.info("No server_identity provided. Unbanning all proxies for ALL servers...")
|
|
all_statuses = client.getProxyStatus(None)
|
|
if not all_statuses:
|
|
print("\nNo proxy statuses found for any server. Nothing to unban.\n")
|
|
else:
|
|
all_server_identities = sorted(list(set(s.serverIdentity for s in all_statuses)))
|
|
logger.info(f"Found {len(all_server_identities)} server identities: {all_server_identities}")
|
|
print(f"Found {len(all_server_identities)} server identities. Sending unban request for each...")
|
|
|
|
success_count = 0
|
|
fail_count = 0
|
|
for identity in all_server_identities:
|
|
try:
|
|
client.resetAllProxyStatuses(identity)
|
|
logger.info(f" - Sent unban_all for '{identity}'.")
|
|
success_count += 1
|
|
except Exception as e:
|
|
logger.error(f" - Failed to unban all proxies for '{identity}': {e}")
|
|
fail_count += 1
|
|
|
|
print(f"\nSuccessfully sent unban_all requests for {success_count} server identities.")
|
|
if fail_count > 0:
|
|
print(f"Failed to send unban_all requests for {fail_count} server identities. See logs for details.")
|
|
except Exception as proxy_e:
|
|
logger.error(f"Error during proxy action '{action}': {proxy_e}", exc_info=True)
|
|
print(f"\nERROR during proxy action: {proxy_e}")
|
|
|
|
# --- Account Action ---
|
|
try:
|
|
print("\n-- Running Account Action --")
|
|
if action == "list_with_status":
|
|
_list_account_statuses(client, account_id, params["redis_conn_id"])
|
|
elif action == "ban":
|
|
if not account_id: raise ValueError("An 'account_id' is required.")
|
|
reason = f"Manual ban from Airflow mgmt DAG by {socket.gethostname()}"
|
|
logger.info(f"Banning account '{account_id}'...")
|
|
client.banAccount(accountId=account_id, reason=reason)
|
|
print(f"Successfully sent request to ban account '{account_id}'.")
|
|
elif action == "unban":
|
|
if not account_id: raise ValueError("An 'account_id' is required.")
|
|
reason = f"Manual un-ban from Airflow mgmt DAG by {socket.gethostname()}"
|
|
logger.info(f"Unbanning account '{account_id}'...")
|
|
|
|
# Fetch status to get current success count before unbanning
|
|
statuses = client.getAccountStatus(accountId=account_id, accountPrefix=None)
|
|
if not statuses:
|
|
logger.warning(f"Account '{account_id}' not found. Skipping account unban.")
|
|
else:
|
|
current_success_count = statuses[0].successCount or 0
|
|
client.unbanAccount(accountId=account_id, reason=reason)
|
|
print(f"Successfully sent request to unban account '{account_id}'.")
|
|
|
|
# Set the success_count_at_activation to baseline the account
|
|
redis_client = _get_redis_client(params["redis_conn_id"])
|
|
redis_client.hset(f"account_status:{account_id}", "success_count_at_activation", current_success_count)
|
|
logger.info(f"Set 'success_count_at_activation' for '{account_id}' to {current_success_count}.")
|
|
elif action == "unban_all":
|
|
account_prefix = account_id # Repurpose account_id param as an optional prefix
|
|
logger.info(f"Unbanning all account statuses to ACTIVE (prefix: '{account_prefix or 'ALL'}')...")
|
|
|
|
all_statuses = client.getAccountStatus(accountId=None, accountPrefix=account_prefix)
|
|
if not all_statuses:
|
|
print(f"No accounts found with prefix '{account_prefix or 'ALL'}' to unban.")
|
|
else:
|
|
accounts_to_unban = [s.accountId for s in all_statuses]
|
|
account_map = {s.accountId: s for s in all_statuses}
|
|
redis_client = _get_redis_client(params["redis_conn_id"])
|
|
|
|
logger.info(f"Found {len(accounts_to_unban)} accounts to unban.")
|
|
print(f"Found {len(accounts_to_unban)} accounts. Sending unban request for each...")
|
|
|
|
unban_count = 0
|
|
fail_count = 0
|
|
for acc_id in accounts_to_unban:
|
|
try:
|
|
reason = f"Manual unban_all from Airflow mgmt DAG by {socket.gethostname()}"
|
|
client.unbanAccount(accountId=acc_id, reason=reason)
|
|
logger.info(f" - Sent unban for '{acc_id}'.")
|
|
|
|
# Also set the success_count_at_activation to baseline the account
|
|
current_success_count = account_map[acc_id].successCount or 0
|
|
redis_client.hset(f"account_status:{acc_id}", "success_count_at_activation", current_success_count)
|
|
logger.info(f" - Set 'success_count_at_activation' for '{acc_id}' to {current_success_count}.")
|
|
|
|
unban_count += 1
|
|
except Exception as e:
|
|
logger.error(f" - Failed to unban account '{acc_id}': {e}")
|
|
fail_count += 1
|
|
|
|
print(f"\nSuccessfully sent unban requests for {unban_count} accounts.")
|
|
if fail_count > 0:
|
|
print(f"Failed to send unban requests for {fail_count} accounts. See logs for details.")
|
|
|
|
# Optionally, list statuses again to confirm
|
|
print("\n--- Listing statuses after unban_all ---")
|
|
_list_account_statuses(client, account_prefix, params["redis_conn_id"])
|
|
except Exception as account_e:
|
|
logger.error(f"Error during account action '{action}': {account_e}", exc_info=True)
|
|
print(f"\nERROR during account action: {account_e}")
|
|
|
|
elif entity == "all":
|
|
if action == "list_with_status":
|
|
print("\nListing all entities...")
|
|
_list_proxy_statuses(client, server_identity)
|
|
_list_account_statuses(client, account_id, params["redis_conn_id"])
|
|
|
|
except (PBServiceException, PBUserException) as e:
|
|
logger.error(f"Thrift error performing action '{action}': {e.message}", exc_info=True)
|
|
raise
|
|
except NotImplementedError as e:
|
|
logger.error(f"Feature not implemented: {e}", exc_info=True)
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error performing action '{action}': {e}", exc_info=True)
|
|
raise
|
|
finally:
|
|
if transport and transport.isOpen():
|
|
transport.close()
|
|
logger.info("Thrift connection closed.")
|
|
|
|
with DAG(
|
|
dag_id="ytdlp_mgmt_proxy_account",
|
|
default_args={"queue": "queue-mgmt"},
|
|
start_date=days_ago(1),
|
|
schedule=None,
|
|
catchup=False,
|
|
tags=["ytdlp", "mgmt", "master"],
|
|
doc_md="""
|
|
### YT-DLP Proxy and Account Manager DAG
|
|
This DAG provides tools to manage the state of proxies and accounts used by the `ytdlp-ops-server`.
|
|
Select an `entity` and an `action` to perform.
|
|
|
|
**IMPORTANT NOTE ABOUT DATA SOURCES:**
|
|
- **Proxy Statuses**: Read from the server's internal state via Thrift service calls.
|
|
- **Account Statuses**: Read from the Thrift service, and then enriched with live cooldown data directly from Redis.
|
|
|
|
**IMPORTANT NOTE ABOUT PROXY MANAGEMENT:**
|
|
- Proxies are managed by the server's internal state through Thrift methods
|
|
- There is NO direct Redis manipulation for proxies - they are managed entirely by the server
|
|
- To properly manage proxies, use the Thrift service methods (ban, unban, etc.)
|
|
""",
|
|
params={
|
|
"management_host": Param(DEFAULT_MANAGEMENT_SERVICE_IP, type="string", title="Management Service Host", description="The hostname or IP of the management service. Can be a Docker container name (e.g., 'envoy-thrift-lb') if on the same network."),
|
|
"management_port": Param(DEFAULT_MANAGEMENT_SERVICE_PORT, type="integer", title="Management Service Port", description="The port of the dedicated management service."),
|
|
"entity": Param(
|
|
"accounts_and_proxies",
|
|
type="string",
|
|
enum=["account", "proxy", "client", "accounts_and_proxies", "activity_counters"],
|
|
description="The type of entity to manage.",
|
|
),
|
|
"action": Param(
|
|
"list_with_status",
|
|
type="string",
|
|
enum=["list_with_status", "ban", "unban", "ban_all", "unban_all", "delete_from_redis"],
|
|
description="""The management action to perform.
|
|
---
|
|
#### Actions for `entity: proxy`
|
|
- `list_with_status`: View status of all proxies, optionally filtered by `server_identity`.
|
|
- `ban`: Ban a specific proxy for a given `server_identity`. Requires `proxy_url`.
|
|
- `unban`: Un-ban a specific proxy. Requires `proxy_url`.
|
|
- `ban_all`: Sets the status of all proxies for a given `server_identity` (or all servers) to `BANNED`.
|
|
- `unban_all`: Resets the status of all proxies for a given `server_identity` (or all servers) to `ACTIVE`.
|
|
- `delete_from_redis`: **(Destructive)** Deletes proxy status from Redis via Thrift service. This permanently removes the proxy from being tracked by the system. If `proxy_url` and `server_identity` are provided, it deletes a single proxy. If only `server_identity` is provided, it deletes all proxies for that server. If neither is provided, it deletes ALL proxies across all servers.
|
|
|
|
#### Actions for `entity: account`
|
|
- `list_with_status`: View status of all accounts, optionally filtered by `account_id` (as a prefix).
|
|
- `ban`: Ban a specific account. Requires `account_id`.
|
|
- `unban`: Un-ban a specific account. Requires `account_id`.
|
|
- `unban_all`: Sets the status of all accounts (or those matching a prefix in `account_id`) to `ACTIVE`.
|
|
- `delete_from_redis`: **(Destructive)** Deletes account status from Redis via Thrift service. This permanently removes the account from being tracked by the system. If `account_id` is provided, it deletes that specific account. If `account_id` is provided as a prefix, it deletes all accounts matching that prefix. If `account_id` is empty, it deletes ALL accounts.
|
|
|
|
#### Actions for `entity: client`
|
|
- `list_with_status`: View success/failure statistics for each client type.
|
|
- `delete_from_redis`: **(Destructive)** Deletes all client stats from Redis.
|
|
|
|
#### Actions for `entity: activity_counters`
|
|
- `list_with_status`: View current activity rates (ops/min, ops/hr) for proxies and accounts.
|
|
|
|
#### Actions for `entity: accounts_and_proxies`
|
|
- This entity performs the selected action on **both** proxies and accounts where applicable.
|
|
- `list_with_status`: View statuses for both proxies and accounts.
|
|
- `ban`: Ban a specific proxy AND a specific account. Requires `proxy_url`, `server_identity`, and `account_id`.
|
|
- `unban`: Un-ban a specific proxy AND a specific account. Requires `proxy_url`, `server_identity`, and `account_id`.
|
|
- `ban_all`: Ban all proxies for a `server_identity` (or all servers). Does not affect accounts.
|
|
- `unban_all`: Un-ban all proxies for a `server_identity` (or all servers) AND all accounts (optionally filtered by `account_id` as a prefix).
|
|
- `delete_from_redis`: Deletes both account and proxy status from Redis via Thrift service. For accounts, if `account_id` is provided as a prefix, it deletes all accounts matching that prefix. If `account_id` is empty, it deletes ALL accounts. For proxies, if `server_identity` is provided, it deletes all proxies for that server. If `server_identity` is empty, it deletes ALL proxies across all servers.
|
|
|
|
""",
|
|
),
|
|
"server_identity": Param(
|
|
None,
|
|
type=["null", "string"],
|
|
description="The identity of the server instance (for proxy management). Leave blank to list all or delete all proxies.",
|
|
),
|
|
"proxy_url": Param(
|
|
None,
|
|
type=["null", "string"],
|
|
description="The proxy URL to act upon (e.g., 'socks5://host:port').",
|
|
),
|
|
"account_id": Param(
|
|
None,
|
|
type=["null", "string"],
|
|
description="The account ID to act upon. For `unban_all` or `delete_from_redis` on accounts, this can be an optional prefix. Leave blank to delete all accounts.",
|
|
),
|
|
"redis_conn_id": Param(
|
|
DEFAULT_REDIS_CONN_ID,
|
|
type="string",
|
|
title="Redis Connection ID",
|
|
description="The Airflow connection ID for the Redis server (used for 'delete_from_redis' and for fetching detailed account status).",
|
|
),
|
|
},
|
|
) as dag:
|
|
system_management_task = PythonOperator(
|
|
task_id="system_management_task",
|
|
python_callable=manage_system_callable,
|
|
)
|