Remove unused dags
This commit is contained in:
parent
de609aaecd
commit
7b13257bd1
36
.aider.chat.history.md
Normal file
36
.aider.chat.history.md
Normal file
@ -0,0 +1,36 @@
|
||||
|
||||
# aider chat started at 2025-08-19 17:52:04
|
||||
|
||||
> Newer aider version v0.86.1 is available.
|
||||
> /Users/aperez/.local/share/uv/tools/aider-chat/bin/python3 -m pip install --upgrade --upgrade-strategy only-if-needed aider-chat
|
||||
> Run pip install? (Y)es/(N)o [Yes]: y
|
||||
> Re-run aider to use new version.
|
||||
|
||||
# aider chat started at 2025-08-19 17:55:26
|
||||
|
||||
> /Users/aperez/.local/bin/aider --model gemini/gemini-2.5-pro-preview-03-25
|
||||
> Aider v0.86.1
|
||||
> Main model: gemini/gemini-2.5-pro-preview-03-25 with diff-fenced edit format
|
||||
> Weak model: gemini/gemini-2.0-flash
|
||||
> Git repo: .git with 34 files
|
||||
> Repo-map: using 4096 tokens, auto refresh
|
||||
> https://aider.chat/HISTORY.html#release-notes
|
||||
> Please answer with one of: yes, no, skip, all
|
||||
> Would you like to see what's new in this version? (Y)es/(N)o [Yes]: n
|
||||
|
||||
#### ls
|
||||
>
|
||||
>
|
||||
> ^C again to exit
|
||||
>
|
||||
>
|
||||
> ^C again to exit
|
||||
|
||||
#### /read-only ../yt-dlp-deployment/ansible/airflow_worker
|
||||
> Added 51 files from directory /opt/yt-dlp-dags/../yt-dlp-deployment/ansible/airflow_worker to read-only files.
|
||||
>
|
||||
>
|
||||
> ^C again to exit
|
||||
>
|
||||
>
|
||||
> ^C KeyboardInterrupt
|
||||
15
.aider.input.history
Normal file
15
.aider.input.history
Normal file
@ -0,0 +1,15 @@
|
||||
|
||||
# 2025-08-19 17:52:06.094899
|
||||
+Y
|
||||
|
||||
# 2025-08-19 17:55:33.616531
|
||||
+D
|
||||
|
||||
# 2025-08-19 17:55:35.382770
|
||||
+No
|
||||
|
||||
# 2025-08-19 17:55:39.050939
|
||||
+ls
|
||||
|
||||
# 2025-08-19 17:56:18.910148
|
||||
+/read-only ../yt-dlp-deployment/ansible/airflow_worker
|
||||
BIN
.aider.tags.cache.v4/cache.db
Normal file
BIN
.aider.tags.cache.v4/cache.db
Normal file
Binary file not shown.
23
airflow/.env
23
airflow/.env
@ -1,23 +0,0 @@
|
||||
AIRFLOW_IMAGE_NAME=apache/airflow:2.10.4
|
||||
_AIRFLOW_WWW_USER_USERNAME=airflow
|
||||
_AIRFLOW_WWW_USER_PASSWORD=airflow-password-ytld
|
||||
AIRFLOW_UID=50000
|
||||
AIRFLOW_PROJ_DIR=.
|
||||
|
||||
AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow-new-super-pass@89.253.221.173:52919/airflow
|
||||
AIRFLOW__CELERY__RESULT_BACKEND=db+postgresql://airflow:airflow-new-super-pass@89.253.221.173:52919/airflow
|
||||
AIRFLOW__CELERY__BROKER_URL=redis://:rOhTAIlTFFylXsjhqwxnYxDChFc@89.253.221.173:52909/0
|
||||
|
||||
AIRFLOW_QUEUE=holisticlegs-download
|
||||
AIRFLOW_QUEUE_CHECK=holisticlegs-check
|
||||
AIRFLOW_QUEUE_UPLOAD=holisticlegs-upload
|
||||
AIRFLOW__WEBSERVER__SECRET_KEY=8DJ6XbtIICassrVxM9jWV3eTlt5N3XtyEdyW
|
||||
HOSTNAME=85.192.30.55
|
||||
|
||||
AIRFLOW_WORKER_DOWNLOAD_MEM_LIMIT=768M
|
||||
AIRFLOW_WORKER_DOWNLOAD_MEM_RESERV=522M
|
||||
AIRFLOW_WORKER_DOWNLOAD_CONCURRENCY=2
|
||||
|
||||
AIRFLOW_SMALL_WORKERS_MEM_LIMIT=1024M
|
||||
AIRFLOW_SMALL_WORKERS_MEM_RESERV=512M
|
||||
~
|
||||
@ -1,60 +0,0 @@
|
||||
# This file contains all environment variables for the Airflow-based deployment.
|
||||
# Copy this file to .env in the same directory and fill in your production values.
|
||||
# This file is used by `generate_envoy_config.py` and `docker-compose-ytdlp-ops.yaml`.
|
||||
|
||||
# --- Common Configuration ---
|
||||
# A unique name for this server instance, used as a key in Redis.
|
||||
# This is hardcoded in the docker-compose file but can be overridden here.
|
||||
SERVER_IDENTITY=ytdlp-ops-airflow-service
|
||||
|
||||
# Redis connection details for proxy and account state management.
|
||||
REDIS_HOST=redis
|
||||
REDIS_PORT=6379
|
||||
REDIS_PASSWORD=redis_pwd_K3fG8hJ1mN5pQ2sT
|
||||
|
||||
# --- Airflow Database Configuration ---
|
||||
# The password for the PostgreSQL database used by Airflow.
|
||||
# This should be a secure, randomly generated password.
|
||||
POSTGRES_PASSWORD=pgdb_pwd_A7bC2xY9zE1wV5uP
|
||||
|
||||
# The password for the Airflow web UI admin user.
|
||||
AIRFLOW_ADMIN_PASSWORD=admin_pwd_X9yZ3aB1cE5dF7gH
|
||||
|
||||
# --- Envoy & Worker Configuration ---
|
||||
# The public-facing port for the Envoy load balancer that fronts the WORKERS.
|
||||
ENVOY_PORT=9080
|
||||
# The port for Envoy's admin/stats interface.
|
||||
ENVOY_ADMIN_PORT=9901
|
||||
# The public-facing port for the standalone MANAGEMENT service.
|
||||
MANAGEMENT_SERVICE_PORT=9091
|
||||
# The number of Python server workers to run.
|
||||
# Set to 1 to simplify debugging. Multi-worker mode is experimental.
|
||||
YTDLP_WORKERS=1
|
||||
# The starting port for the Python workers. They will use sequential ports (e.g., 9090, 9091, ...).
|
||||
YTDLP_BASE_PORT=9090
|
||||
|
||||
# --- Camoufox (Browser) Configuration ---
|
||||
# Comma-separated list of SOCKS5 proxies to be used by Camoufox instances.
|
||||
# Each proxy will get its own dedicated browser instance.
|
||||
# Example: CAMOUFOX_PROXIES="socks5://user:pass@p.webshare.io:80,socks5://user:pass@p.webshare.io:81"
|
||||
CAMOUFOX_PROXIES="socks5://your_proxy_user:your_proxy_pass@proxy.example.com:1080,socks5://your_proxy_user:your_proxy_pass@proxy.example.com:1081"
|
||||
|
||||
# Password for VNC access to the Camoufox browser instances.
|
||||
VNC_PASSWORD=vnc_pwd_Z5xW8cV2bN4mP7lK
|
||||
|
||||
# The starting port for VNC access. Ports will be assigned sequentially (e.g., 5901, 5902, ...).
|
||||
CAMOUFOX_BASE_VNC_PORT=5901
|
||||
|
||||
# The internal port used by Camoufox for its WebSocket server. Usually does not need to be changed.
|
||||
CAMOUFOX_PORT=12345
|
||||
|
||||
# --- General Proxy Configuration ---
|
||||
# A general-purpose SOCKS5 proxy that can be used alongside Camoufox proxies.
|
||||
# This should be the IP address of the proxy server accessible from within the Docker network.
|
||||
# '172.17.0.1' is often the host IP from within a container.
|
||||
SOCKS5_SOCK_SERVER_IP=172.17.0.1
|
||||
|
||||
# --- Account Manager Configuration ---
|
||||
# Account cooldown parameters (values are in minutes).
|
||||
ACCOUNT_ACTIVE_DURATION_MIN=30
|
||||
ACCOUNT_COOLDOWN_DURATION_MIN=60
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,736 +0,0 @@
|
||||
import sys
|
||||
import os
|
||||
import time
|
||||
import csv
|
||||
import json
|
||||
import logging
|
||||
import requests
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from typing import List, Optional, Dict, Callable, Union
|
||||
from threading import Event
|
||||
|
||||
from PyQt6.QtCore import Qt, QThread, pyqtSignal, QObject, QTimer
|
||||
from PyQt6.QtWidgets import (
|
||||
QApplication, QMainWindow, QWidget, QVBoxLayout, QHBoxLayout,
|
||||
QLabel, QLineEdit, QPushButton, QTextEdit, QSpinBox, QDoubleSpinBox,
|
||||
QCheckBox, QGroupBox, QGridLayout, QMessageBox, QProgressBar, QDialog,
|
||||
QComboBox, QFileDialog
|
||||
)
|
||||
|
||||
# Define the current version of this tool.
|
||||
CURRENT_VERSION = "1.3.0"
|
||||
|
||||
class ProxyChecker:
|
||||
"""
|
||||
Fetches proxy lists from given URLs and checks if they work.
|
||||
Supports cancellation, pause/resume, progress reporting, and collects optional detailed
|
||||
response times, anonymity classification, and geo-location details for working proxies.
|
||||
"""
|
||||
def __init__(self,
|
||||
proxy_urls: Dict[str, str],
|
||||
timeout: int = 1,
|
||||
max_retries: int = 3,
|
||||
retry_delay: float = 1.0,
|
||||
max_workers: int = 20,
|
||||
check_url: str = "http://www.google.com",
|
||||
detailed_results: bool = False,
|
||||
export_format: str = "txt", # or "csv" or "json"
|
||||
user_agent: Optional[str] = None,
|
||||
log_callback: Optional[Callable[[str], None]] = None,
|
||||
progress_callback: Optional[Callable[[int], None]] = None):
|
||||
self.proxy_urls = proxy_urls
|
||||
self.timeout = timeout
|
||||
self.max_retries = max_retries
|
||||
self.retry_delay = retry_delay
|
||||
self.max_workers = max_workers
|
||||
self.check_url = check_url
|
||||
self.detailed_results = detailed_results
|
||||
self.export_format = export_format.lower()
|
||||
self.user_agent = user_agent
|
||||
self.log_callback = log_callback
|
||||
self.progress_callback = progress_callback
|
||||
self.cancel_event = Event()
|
||||
self.pause_event = Event() # When set, processing is paused
|
||||
|
||||
# Statistics counters
|
||||
self.total_proxies_checked = 0
|
||||
self.working_proxies_found = 0
|
||||
self.overall_total_count = 0
|
||||
self.overall_processed_count = 0
|
||||
|
||||
# Store detailed working results by type.
|
||||
self.working_results: Dict[str, List[Union[str, Dict[str, Union[str, float, dict]]]]] = {}
|
||||
|
||||
self.session = requests.Session()
|
||||
if self.user_agent:
|
||||
self.session.headers["User-Agent"] = self.user_agent
|
||||
|
||||
# Determine the client IP to help with anonymity detection.
|
||||
try:
|
||||
r = requests.get("https://api.ipify.org?format=json", timeout=3)
|
||||
r.raise_for_status()
|
||||
self.client_ip = r.json().get("ip")
|
||||
self.log("info", f"Client IP determined as {self.client_ip}")
|
||||
except requests.RequestException:
|
||||
self.client_ip = "unknown"
|
||||
self.log("warning", "Could not determine client IP for anonymity detection.")
|
||||
|
||||
def log(self, level: str, message: str) -> None:
|
||||
full_message = f"{level.upper()}: {message}"
|
||||
if self.log_callback:
|
||||
self.log_callback(full_message)
|
||||
else:
|
||||
print(full_message)
|
||||
|
||||
def cancel(self) -> None:
|
||||
self.cancel_event.set()
|
||||
self.log("info", "Cancellation requested.")
|
||||
|
||||
def pause(self) -> None:
|
||||
self.pause_event.set()
|
||||
self.log("info", "Proxy checking paused.")
|
||||
|
||||
def resume(self) -> None:
|
||||
self.pause_event.clear()
|
||||
self.log("info", "Proxy checking resumed.")
|
||||
|
||||
def determine_anonymity(self, proxy: str) -> str:
|
||||
try:
|
||||
session = requests.Session()
|
||||
session.proxies = {'http': proxy, 'https': proxy}
|
||||
r = session.get("https://api.ipify.org?format=json", timeout=self.timeout)
|
||||
r.raise_for_status()
|
||||
proxy_ip = r.json().get("ip")
|
||||
return "transparent" if proxy_ip == self.client_ip else "anonymous"
|
||||
except requests.RequestException:
|
||||
return "unknown"
|
||||
|
||||
def get_geo_info(self, ip: str) -> dict:
|
||||
try:
|
||||
r = requests.get(f"http://ip-api.com/json/{ip}", timeout=3)
|
||||
r.raise_for_status()
|
||||
return r.json()
|
||||
except requests.RequestException:
|
||||
return {}
|
||||
|
||||
def check_proxy(self, proxy: str) -> Optional[Union[str, dict]]:
|
||||
if self.cancel_event.is_set():
|
||||
return None
|
||||
# If paused, wait until resumed.
|
||||
while self.pause_event.is_set():
|
||||
time.sleep(0.1)
|
||||
try:
|
||||
start = time.time()
|
||||
session = requests.Session()
|
||||
session.proxies = {'http': proxy, 'https': proxy}
|
||||
if self.user_agent:
|
||||
session.headers["User-Agent"] = self.user_agent
|
||||
response = session.get(self.check_url, timeout=self.timeout)
|
||||
elapsed = time.time() - start
|
||||
if response.status_code == 200:
|
||||
if self.detailed_results:
|
||||
anonymity = self.determine_anonymity(proxy)
|
||||
ip_only = proxy.split(':')[0]
|
||||
geo = self.get_geo_info(ip_only)
|
||||
return {
|
||||
"proxy": proxy,
|
||||
"response_time": elapsed,
|
||||
"anonymity": anonymity,
|
||||
"geo": geo
|
||||
}
|
||||
else:
|
||||
return proxy
|
||||
except requests.RequestException:
|
||||
return None
|
||||
|
||||
def get_proxies(self, url: str) -> List[str]:
|
||||
for attempt in range(self.max_retries):
|
||||
if self.cancel_event.is_set():
|
||||
self.log("info", "Cancellation detected while fetching proxies.")
|
||||
return []
|
||||
try:
|
||||
response = self.session.get(url, timeout=self.timeout)
|
||||
response.raise_for_status()
|
||||
self.log("info", f"Successfully fetched proxies from {url}")
|
||||
return response.text.strip().splitlines()
|
||||
except requests.RequestException as e:
|
||||
self.log("warning", f"Attempt {attempt + 1} failed for {url}: {e}")
|
||||
time.sleep(self.retry_delay)
|
||||
self.log("error", f"Failed to retrieve proxies from {url} after {self.max_retries} attempts.")
|
||||
return []
|
||||
|
||||
@staticmethod
|
||||
def create_proxy_dir(directory: str) -> None:
|
||||
os.makedirs(directory, exist_ok=True)
|
||||
|
||||
def process_proxies(self,
|
||||
proxy_type: str,
|
||||
url: Optional[str] = None,
|
||||
proxies: Optional[List[str]] = None) -> int:
|
||||
if proxies is None and url is not None:
|
||||
proxies = self.get_proxies(url)
|
||||
if self.cancel_event.is_set():
|
||||
self.log("info", "Cancellation detected before processing proxies.")
|
||||
return 0
|
||||
if not proxies:
|
||||
self.log("warning", f"No proxies to check for {proxy_type}")
|
||||
return 0
|
||||
|
||||
total_proxies = len(proxies)
|
||||
self.log("info", f"Checking {total_proxies} {proxy_type} proxies with {self.max_workers} workers.")
|
||||
working_proxy_list = []
|
||||
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
|
||||
futures = {executor.submit(self.check_proxy, proxy): proxy for proxy in proxies}
|
||||
for future in as_completed(futures):
|
||||
while self.pause_event.is_set():
|
||||
time.sleep(0.1)
|
||||
if self.cancel_event.is_set():
|
||||
self.log("info", "Cancellation detected during proxy checking loop.")
|
||||
break
|
||||
result = future.result()
|
||||
self.overall_processed_count += 1
|
||||
if self.progress_callback and self.overall_total_count > 0:
|
||||
progress_percent = int((self.overall_processed_count / self.overall_total_count) * 100)
|
||||
self.progress_callback(progress_percent)
|
||||
if result:
|
||||
working_proxy_list.append(result)
|
||||
|
||||
self.working_results[proxy_type] = working_proxy_list
|
||||
file_ext = ".csv" if self.export_format == "csv" else ".json" if self.export_format == "json" else ".txt"
|
||||
proxy_file = f'proxies/{proxy_type}{file_ext}'
|
||||
self.create_proxy_dir(os.path.dirname(proxy_file))
|
||||
try:
|
||||
if self.export_format == "csv":
|
||||
with open(proxy_file, 'w', newline='') as f:
|
||||
if self.detailed_results:
|
||||
writer = csv.writer(f)
|
||||
writer.writerow(["Proxy", "Response Time (s)", "Anonymity", "Country", "Region", "City"])
|
||||
for item in working_proxy_list:
|
||||
geo = item.get("geo", {})
|
||||
writer.writerow([
|
||||
item.get("proxy"),
|
||||
f"{item.get('response_time', 0):.2f}",
|
||||
item.get("anonymity"),
|
||||
geo.get("country", ""),
|
||||
geo.get("regionName", ""),
|
||||
geo.get("city", "")
|
||||
])
|
||||
else:
|
||||
writer = csv.writer(f)
|
||||
writer.writerow(["Proxy"])
|
||||
for item in working_proxy_list:
|
||||
writer.writerow([item])
|
||||
elif self.export_format == "json":
|
||||
with open(proxy_file, 'w') as f:
|
||||
json.dump(working_proxy_list, f, indent=4)
|
||||
else:
|
||||
with open(proxy_file, 'w') as f:
|
||||
if self.detailed_results:
|
||||
lines = [
|
||||
f"{item.get('proxy')} - {item.get('response_time'):.2f} s - {item.get('anonymity')} - {item.get('geo', {}).get('country', '')}"
|
||||
for item in working_proxy_list
|
||||
]
|
||||
else:
|
||||
lines = working_proxy_list
|
||||
f.write('\n'.join(lines) + '\n')
|
||||
except OSError as e:
|
||||
self.log("error", f"Failed to write working proxies to {proxy_file}: {e}")
|
||||
|
||||
self.log("info", f"Checked {total_proxies} {proxy_type} proxies. Working: {len(working_proxy_list)}.")
|
||||
self.total_proxies_checked += total_proxies
|
||||
self.working_proxies_found += len(working_proxy_list)
|
||||
return len(working_proxy_list)
|
||||
|
||||
def get_statistics(self) -> str:
|
||||
stats = f"Total proxies checked: {self.total_proxies_checked}\n"
|
||||
stats += f"Working proxies found: {self.working_proxies_found}\n"
|
||||
if self.detailed_results:
|
||||
all_times = []
|
||||
for lst in self.working_results.values():
|
||||
all_times.extend([item.get("response_time") for item in lst if isinstance(item, dict)])
|
||||
if all_times:
|
||||
avg_time = sum(all_times) / len(all_times)
|
||||
stats += f"Average response time: {avg_time:.2f} seconds\n"
|
||||
return stats
|
||||
|
||||
def run(self) -> None:
|
||||
start_time = time.time()
|
||||
self.overall_total_count = 0
|
||||
self.overall_processed_count = 0
|
||||
proxies_by_type: Dict[str, List[str]] = {}
|
||||
|
||||
for proxy_type, url in self.proxy_urls.items():
|
||||
if self.cancel_event.is_set():
|
||||
self.log("info", "Cancellation detected. Aborting processing.")
|
||||
return
|
||||
proxies = self.get_proxies(url)
|
||||
proxies_by_type[proxy_type] = proxies
|
||||
self.overall_total_count += len(proxies)
|
||||
|
||||
if self.overall_total_count == 0:
|
||||
self.log("warning", "No proxies fetched from any source.")
|
||||
|
||||
for proxy_type, proxies in proxies_by_type.items():
|
||||
if self.cancel_event.is_set():
|
||||
self.log("info", "Cancellation detected. Aborting further processing.")
|
||||
break
|
||||
self.process_proxies(proxy_type, proxies=proxies)
|
||||
|
||||
self.session.close()
|
||||
end_time = time.time()
|
||||
minutes, seconds = divmod(end_time - start_time, 60)
|
||||
self.log("info", f"Total proxies checked: {self.total_proxies_checked}. Working proxies: {self.working_proxies_found}.")
|
||||
self.log("info", f"Execution time: {int(minutes)} minutes {int(seconds)} seconds.")
|
||||
self.log("info", "Statistics:\n" + self.get_statistics())
|
||||
# Append history log
|
||||
try:
|
||||
with open("history.log", "a") as hist_file:
|
||||
hist_file.write(f"{time.strftime('%Y-%m-%d %H:%M:%S')} - {self.get_statistics()}\n")
|
||||
except OSError as e:
|
||||
self.log("error", f"Failed to write history log: {e}")
|
||||
|
||||
class ProxyCheckerWorker(QObject):
|
||||
"""
|
||||
Worker class to run the proxy checking process in a separate thread.
|
||||
Emits log messages, progress updates, and a finished signal.
|
||||
"""
|
||||
log_signal = pyqtSignal(str)
|
||||
progress_update = pyqtSignal(int)
|
||||
finished = pyqtSignal()
|
||||
|
||||
def __init__(self,
|
||||
proxy_urls: Dict[str, str],
|
||||
timeout: int,
|
||||
max_retries: int,
|
||||
retry_delay: float,
|
||||
max_workers: int,
|
||||
check_url: str,
|
||||
detailed_results: bool,
|
||||
export_format: str,
|
||||
user_agent: Optional[str] = None):
|
||||
super().__init__()
|
||||
self.proxy_urls = proxy_urls
|
||||
self.timeout = timeout
|
||||
self.max_retries = max_retries
|
||||
self.retry_delay = retry_delay
|
||||
self.max_workers = max_workers
|
||||
self.check_url = check_url
|
||||
self.detailed_results = detailed_results
|
||||
self.export_format = export_format
|
||||
self.user_agent = user_agent
|
||||
self.checker: Optional[ProxyChecker] = None
|
||||
|
||||
def log_callback(self, message: str) -> None:
|
||||
self.log_signal.emit(message)
|
||||
|
||||
def progress_callback(self, progress: int) -> None:
|
||||
self.progress_update.emit(progress)
|
||||
|
||||
def cancel(self) -> None:
|
||||
if self.checker is not None:
|
||||
self.checker.cancel()
|
||||
|
||||
def run(self) -> None:
|
||||
self.checker = ProxyChecker(
|
||||
proxy_urls=self.proxy_urls,
|
||||
timeout=self.timeout,
|
||||
max_retries=self.max_retries,
|
||||
retry_delay=self.retry_delay,
|
||||
max_workers=self.max_workers,
|
||||
check_url=self.check_url,
|
||||
detailed_results=self.detailed_results,
|
||||
export_format=self.export_format,
|
||||
user_agent=self.user_agent,
|
||||
log_callback=self.log_callback,
|
||||
progress_callback=self.progress_callback
|
||||
)
|
||||
self.log_callback("Starting proxy checking...")
|
||||
self.checker.run()
|
||||
self.log_callback("Proxy checking finished.")
|
||||
self.finished.emit()
|
||||
|
||||
class UpdateChecker(QObject):
|
||||
"""
|
||||
Worker class to check for software updates.
|
||||
"""
|
||||
update_checked = pyqtSignal(str)
|
||||
|
||||
def run(self) -> None:
|
||||
try:
|
||||
response = requests.get("https://api.github.com/repos/Jesewe/proxy-checker/releases/latest", timeout=5)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
latest_version = data["tag_name"].lstrip("v")
|
||||
if latest_version != CURRENT_VERSION:
|
||||
msg = (f"New version available: {latest_version}.\n"
|
||||
f"You are using version {CURRENT_VERSION}.\n"
|
||||
f"Visit {data['html_url']} to download the update.")
|
||||
else:
|
||||
msg = f"You are up-to-date with version {CURRENT_VERSION}."
|
||||
except Exception as e:
|
||||
msg = f"Failed to check for updates: {e}"
|
||||
self.update_checked.emit(msg)
|
||||
|
||||
class MainWindow(QMainWindow):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.setWindowTitle("Proxy Checker")
|
||||
self.setGeometry(100, 100, 850, 750)
|
||||
self.init_ui()
|
||||
self.thread: Optional[QThread] = None
|
||||
self.worker: Optional[ProxyCheckerWorker] = None
|
||||
self.update_thread: Optional[QThread] = None
|
||||
self.last_checker: Optional[ProxyChecker] = None
|
||||
self.is_paused = False
|
||||
|
||||
def init_ui(self):
|
||||
main_widget = QWidget()
|
||||
main_layout = QVBoxLayout()
|
||||
|
||||
# Configuration group
|
||||
config_group = QGroupBox("Settings")
|
||||
config_layout = QGridLayout()
|
||||
|
||||
# Timeout
|
||||
config_layout.addWidget(QLabel("Timeout (s):"), 0, 0)
|
||||
self.timeout_spin = QSpinBox()
|
||||
self.timeout_spin.setRange(1, 60)
|
||||
self.timeout_spin.setValue(3)
|
||||
config_layout.addWidget(self.timeout_spin, 0, 1)
|
||||
|
||||
# Max Retries
|
||||
config_layout.addWidget(QLabel("Max Retries:"), 0, 2)
|
||||
self.retries_spin = QSpinBox()
|
||||
self.retries_spin.setRange(1, 10)
|
||||
self.retries_spin.setValue(3)
|
||||
config_layout.addWidget(self.retries_spin, 0, 3)
|
||||
|
||||
# Retry Delay
|
||||
config_layout.addWidget(QLabel("Retry Delay (s):"), 1, 0)
|
||||
self.retry_delay_spin = QDoubleSpinBox()
|
||||
self.retry_delay_spin.setRange(0.1, 10.0)
|
||||
self.retry_delay_spin.setSingleStep(0.1)
|
||||
self.retry_delay_spin.setValue(1.0)
|
||||
config_layout.addWidget(self.retry_delay_spin, 1, 1)
|
||||
|
||||
# Max Workers
|
||||
config_layout.addWidget(QLabel("Max Workers:"), 1, 2)
|
||||
self.workers_spin = QSpinBox()
|
||||
self.workers_spin.setRange(1, 200)
|
||||
self.workers_spin.setValue(50)
|
||||
config_layout.addWidget(self.workers_spin, 1, 3)
|
||||
|
||||
# Test URL
|
||||
config_layout.addWidget(QLabel("Test URL:"), 2, 0)
|
||||
self.test_url_edit = QLineEdit("http://www.google.com")
|
||||
config_layout.addWidget(self.test_url_edit, 2, 1, 1, 3)
|
||||
|
||||
# Custom User-Agent
|
||||
config_layout.addWidget(QLabel("Custom User-Agent:"), 3, 0)
|
||||
self.user_agent_edit = QLineEdit("")
|
||||
self.user_agent_edit.setPlaceholderText("Leave blank for default")
|
||||
config_layout.addWidget(self.user_agent_edit, 3, 1, 1, 3)
|
||||
|
||||
# Detailed Results Option
|
||||
self.detailed_checkbox = QCheckBox("Detailed Results (Include Response Time, Anonymity & Geo)")
|
||||
config_layout.addWidget(self.detailed_checkbox, 4, 0, 1, 2)
|
||||
|
||||
# Export Format Option
|
||||
config_layout.addWidget(QLabel("Export Format:"), 4, 2)
|
||||
self.export_format_combo = QComboBox()
|
||||
self.export_format_combo.addItems(["txt", "csv", "json"])
|
||||
config_layout.addWidget(self.export_format_combo, 4, 3)
|
||||
|
||||
config_group.setLayout(config_layout)
|
||||
main_layout.addWidget(config_group)
|
||||
|
||||
# Proxy Sources Group
|
||||
proxy_group = QGroupBox("Proxy Sources")
|
||||
proxy_layout = QGridLayout()
|
||||
self.proxy_urls = {
|
||||
"http": "https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/http.txt",
|
||||
"socks4": "https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks4.txt",
|
||||
"socks5": "https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks5.txt"
|
||||
}
|
||||
self.proxy_type_checkboxes = {}
|
||||
self.proxy_url_edits = {}
|
||||
row = 0
|
||||
for proxy_type, url in self.proxy_urls.items():
|
||||
checkbox = QCheckBox(proxy_type)
|
||||
checkbox.setChecked(True)
|
||||
self.proxy_type_checkboxes[proxy_type] = checkbox
|
||||
proxy_layout.addWidget(checkbox, row, 0)
|
||||
url_edit = QLineEdit(url)
|
||||
self.proxy_url_edits[proxy_type] = url_edit
|
||||
proxy_layout.addWidget(url_edit, row, 1)
|
||||
row += 1
|
||||
proxy_group.setLayout(proxy_layout)
|
||||
main_layout.addWidget(proxy_group)
|
||||
|
||||
# Progress Bar
|
||||
self.progress_bar = QProgressBar()
|
||||
self.progress_bar.setRange(0, 100)
|
||||
self.progress_bar.setValue(0)
|
||||
main_layout.addWidget(self.progress_bar)
|
||||
|
||||
# Main Buttons
|
||||
btn_layout = QHBoxLayout()
|
||||
self.start_btn = QPushButton("Start Checking")
|
||||
self.start_btn.clicked.connect(self.start_checking)
|
||||
btn_layout.addWidget(self.start_btn)
|
||||
|
||||
self.pause_btn = QPushButton("Pause")
|
||||
self.pause_btn.setEnabled(False)
|
||||
self.pause_btn.clicked.connect(self.toggle_pause)
|
||||
btn_layout.addWidget(self.pause_btn)
|
||||
|
||||
self.cancel_btn = QPushButton("Cancel")
|
||||
self.cancel_btn.setEnabled(False)
|
||||
self.cancel_btn.clicked.connect(self.cancel_checking)
|
||||
btn_layout.addWidget(self.cancel_btn)
|
||||
|
||||
self.show_results_btn = QPushButton("Show Results")
|
||||
self.show_results_btn.setEnabled(False)
|
||||
self.show_results_btn.clicked.connect(self.show_results)
|
||||
btn_layout.addWidget(self.show_results_btn)
|
||||
main_layout.addLayout(btn_layout)
|
||||
|
||||
# Extra Buttons: Show Statistics, Save Log
|
||||
extra_btn_layout = QHBoxLayout()
|
||||
self.show_stats_btn = QPushButton("Show Statistics")
|
||||
self.show_stats_btn.setEnabled(False)
|
||||
self.show_stats_btn.clicked.connect(self.show_statistics)
|
||||
extra_btn_layout.addWidget(self.show_stats_btn)
|
||||
|
||||
self.save_log_btn = QPushButton("Save Log")
|
||||
self.save_log_btn.clicked.connect(self.save_log)
|
||||
extra_btn_layout.addWidget(self.save_log_btn)
|
||||
main_layout.addLayout(extra_btn_layout)
|
||||
|
||||
# Log Text Area
|
||||
self.log_text = QTextEdit()
|
||||
self.log_text.setReadOnly(True)
|
||||
self.log_text.setStyleSheet("background-color: #1e1e1e; color: #d4d4d4; font-family: Consolas; font-size: 12pt;")
|
||||
main_layout.addWidget(self.log_text)
|
||||
|
||||
main_widget.setLayout(main_layout)
|
||||
self.setCentralWidget(main_widget)
|
||||
|
||||
def start_checking(self):
|
||||
self.start_btn.setEnabled(False)
|
||||
self.cancel_btn.setEnabled(True)
|
||||
self.pause_btn.setEnabled(True)
|
||||
self.show_results_btn.setEnabled(False)
|
||||
self.show_stats_btn.setEnabled(False)
|
||||
self.progress_bar.setValue(0)
|
||||
self.log_text.clear()
|
||||
|
||||
# Build proxy_urls from selected checkboxes.
|
||||
selected_proxy_urls = {}
|
||||
for proxy_type, checkbox in self.proxy_type_checkboxes.items():
|
||||
if checkbox.isChecked():
|
||||
url = self.proxy_url_edits[proxy_type].text().strip()
|
||||
if url:
|
||||
selected_proxy_urls[proxy_type] = url
|
||||
|
||||
if not selected_proxy_urls:
|
||||
QMessageBox.warning(self, "No Proxies Selected", "Please select at least one proxy type to check.")
|
||||
self.start_btn.setEnabled(True)
|
||||
self.cancel_btn.setEnabled(False)
|
||||
self.pause_btn.setEnabled(False)
|
||||
return
|
||||
|
||||
# Get settings from UI.
|
||||
timeout = self.timeout_spin.value()
|
||||
max_retries = self.retries_spin.value()
|
||||
retry_delay = self.retry_delay_spin.value()
|
||||
max_workers = self.workers_spin.value()
|
||||
check_url = self.test_url_edit.text().strip()
|
||||
detailed_results = self.detailed_checkbox.isChecked()
|
||||
export_format = self.export_format_combo.currentText().strip()
|
||||
user_agent = self.user_agent_edit.text().strip() or None
|
||||
|
||||
self.thread = QThread()
|
||||
self.worker = ProxyCheckerWorker(
|
||||
proxy_urls=selected_proxy_urls,
|
||||
timeout=timeout,
|
||||
max_retries=max_retries,
|
||||
retry_delay=retry_delay,
|
||||
max_workers=max_workers,
|
||||
check_url=check_url,
|
||||
detailed_results=detailed_results,
|
||||
export_format=export_format,
|
||||
user_agent=user_agent
|
||||
)
|
||||
self.worker.moveToThread(self.thread)
|
||||
self.worker.log_signal.connect(self.append_log)
|
||||
self.worker.progress_update.connect(self.progress_bar.setValue)
|
||||
self.worker.finished.connect(self.on_finished)
|
||||
self.thread.started.connect(self.worker.run)
|
||||
self.thread.finished.connect(self.thread.deleteLater)
|
||||
self.thread.start()
|
||||
|
||||
def toggle_pause(self):
|
||||
if self.worker and self.worker.checker:
|
||||
if not self.is_paused:
|
||||
self.worker.checker.pause()
|
||||
self.is_paused = True
|
||||
self.pause_btn.setText("Resume")
|
||||
self.append_log("Paused proxy checking.")
|
||||
else:
|
||||
self.worker.checker.resume()
|
||||
self.is_paused = False
|
||||
self.pause_btn.setText("Pause")
|
||||
self.append_log("Resumed proxy checking.")
|
||||
|
||||
def cancel_checking(self):
|
||||
if self.worker is not None:
|
||||
self.append_log("Cancel requested by user...")
|
||||
self.worker.cancel()
|
||||
self.cancel_btn.setEnabled(False)
|
||||
|
||||
def append_log(self, message: str):
|
||||
timestamp = time.strftime("%H:%M:%S")
|
||||
self.log_text.append(f"[{timestamp}] {message}")
|
||||
|
||||
def on_finished(self):
|
||||
self.append_log("All tasks completed.")
|
||||
self.start_btn.setEnabled(True)
|
||||
self.cancel_btn.setEnabled(False)
|
||||
self.pause_btn.setEnabled(False)
|
||||
self.show_results_btn.setEnabled(True)
|
||||
self.show_stats_btn.setEnabled(True)
|
||||
if self.thread is not None:
|
||||
self.thread.quit()
|
||||
self.thread.wait()
|
||||
# Save a reference to the last checker for filtering results.
|
||||
if self.worker:
|
||||
self.last_checker = self.worker.checker
|
||||
|
||||
def show_results(self):
|
||||
# If detailed results are enabled, allow filtering by response time.
|
||||
if self.last_checker and self.last_checker.detailed_results:
|
||||
dialog = QDialog(self)
|
||||
dialog.setWindowTitle("Filtered Working Proxies")
|
||||
dialog.resize(600, 500)
|
||||
layout = QVBoxLayout()
|
||||
|
||||
filter_layout = QHBoxLayout()
|
||||
filter_layout.addWidget(QLabel("Max Response Time (s):"))
|
||||
filter_spin = QDoubleSpinBox()
|
||||
filter_spin.setRange(0.1, 10.0)
|
||||
filter_spin.setSingleStep(0.1)
|
||||
filter_spin.setValue(1.0)
|
||||
filter_layout.addWidget(filter_spin)
|
||||
apply_btn = QPushButton("Apply Filter")
|
||||
filter_layout.addWidget(apply_btn)
|
||||
layout.addLayout(filter_layout)
|
||||
|
||||
result_area = QTextEdit()
|
||||
result_area.setReadOnly(True)
|
||||
layout.addWidget(result_area)
|
||||
|
||||
def apply_filter():
|
||||
threshold = filter_spin.value()
|
||||
text = ""
|
||||
for ptype, results in self.last_checker.working_results.items():
|
||||
filtered = []
|
||||
for item in results:
|
||||
if isinstance(item, dict) and item.get("response_time") <= threshold:
|
||||
geo = item.get("geo", {})
|
||||
filtered.append(f"{item.get('proxy')} - {item.get('response_time'):.2f} s - {item.get('anonymity')} - {geo.get('country', '')}")
|
||||
if filtered:
|
||||
text += f"--- {ptype} ---\n" + "\n".join(filtered) + "\n\n"
|
||||
result_area.setText(text if text else "No proxies match the filter criteria.")
|
||||
|
||||
apply_btn.clicked.connect(apply_filter)
|
||||
# Show all results initially
|
||||
apply_filter()
|
||||
|
||||
btn_layout = QHBoxLayout()
|
||||
copy_btn = QPushButton("Copy to Clipboard")
|
||||
copy_btn.clicked.connect(lambda: QApplication.clipboard().setText(result_area.toPlainText()))
|
||||
btn_layout.addWidget(copy_btn)
|
||||
close_btn = QPushButton("Close")
|
||||
close_btn.clicked.connect(dialog.close)
|
||||
btn_layout.addWidget(close_btn)
|
||||
layout.addLayout(btn_layout)
|
||||
|
||||
dialog.setLayout(layout)
|
||||
dialog.exec()
|
||||
else:
|
||||
# Fallback: read the exported files from the proxies directory.
|
||||
results_text = ""
|
||||
proxy_dir = "proxies"
|
||||
if os.path.isdir(proxy_dir):
|
||||
for filename in os.listdir(proxy_dir):
|
||||
filepath = os.path.join(proxy_dir, filename)
|
||||
results_text += f"--- {filename} ---\n"
|
||||
try:
|
||||
with open(filepath, 'r') as f:
|
||||
results_text += f.read() + "\n"
|
||||
except OSError as e:
|
||||
results_text += f"Error reading file: {e}\n"
|
||||
else:
|
||||
results_text = "No results found."
|
||||
|
||||
dialog = QDialog(self)
|
||||
dialog.setWindowTitle("Working Proxies")
|
||||
dialog.resize(600, 400)
|
||||
dlg_layout = QVBoxLayout()
|
||||
text_area = QTextEdit()
|
||||
text_area.setReadOnly(True)
|
||||
text_area.setText(results_text)
|
||||
dlg_layout.addWidget(text_area)
|
||||
|
||||
btn_layout = QHBoxLayout()
|
||||
copy_btn = QPushButton("Copy to Clipboard")
|
||||
copy_btn.clicked.connect(lambda: QApplication.clipboard().setText(results_text))
|
||||
btn_layout.addWidget(copy_btn)
|
||||
close_btn = QPushButton("Close")
|
||||
close_btn.clicked.connect(dialog.close)
|
||||
btn_layout.addWidget(close_btn)
|
||||
dlg_layout.addLayout(btn_layout)
|
||||
dialog.setLayout(dlg_layout)
|
||||
dialog.exec()
|
||||
|
||||
def show_statistics(self):
|
||||
if self.worker and self.worker.checker:
|
||||
stats = self.worker.checker.get_statistics()
|
||||
else:
|
||||
stats = "No statistics available."
|
||||
QMessageBox.information(self, "Statistics", stats)
|
||||
|
||||
def save_log(self):
|
||||
filename, _ = QFileDialog.getSaveFileName(self, "Save Log", "", "Text Files (*.txt);;All Files (*)")
|
||||
if filename:
|
||||
try:
|
||||
with open(filename, 'w') as f:
|
||||
f.write(self.log_text.toPlainText())
|
||||
QMessageBox.information(self, "Saved", f"Log saved to {filename}")
|
||||
except OSError as e:
|
||||
QMessageBox.warning(self, "Error", f"Failed to save log: {e}")
|
||||
|
||||
def auto_check_for_update(self):
|
||||
self.update_thread = QThread()
|
||||
self.update_worker = UpdateChecker()
|
||||
self.update_worker.moveToThread(self.update_thread)
|
||||
self.update_worker.update_checked.connect(self.show_update_message)
|
||||
self.update_thread.started.connect(self.update_worker.run)
|
||||
self.update_thread.start()
|
||||
|
||||
def show_update_message(self, msg: str):
|
||||
QMessageBox.information(self, "Update Check", msg)
|
||||
self.update_thread.quit()
|
||||
self.update_thread.wait()
|
||||
|
||||
def showEvent(self, event):
|
||||
super().showEvent(event)
|
||||
QTimer.singleShot(1000, self.auto_check_for_update)
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
app = QApplication(sys.argv)
|
||||
window = MainWindow()
|
||||
window.show()
|
||||
sys.exit(app.exec())
|
||||
@ -1,941 +0,0 @@
|
||||
from airflow import DAG
|
||||
from airflow.models import BaseOperator, Variable
|
||||
from airflow.utils.decorators import apply_defaults
|
||||
from airflow.hooks.base import BaseHook
|
||||
from airflow.exceptions import AirflowException
|
||||
from airflow.utils.dates import days_ago
|
||||
from thrift.transport import TSocket, TTransport
|
||||
from thrift.protocol import TBinaryProtocol
|
||||
from thrift.transport.TTransport import TTransportException
|
||||
from datetime import datetime, timedelta
|
||||
from pangramia.yt.exceptions.ttypes import PBServiceException
|
||||
import redis
|
||||
import logging
|
||||
import time
|
||||
import socket
|
||||
import json
|
||||
import os
|
||||
from pangramia.yt.tokens_ops import YTTokenOpService
|
||||
from pangramia.yt.common.ttypes import TokenUpdateMode
|
||||
from airflow.providers.redis.hooks.redis import RedisHook
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.models.param import Param
|
||||
# Assuming ytdlp_utils exists in the same directory or PYTHONPATH
|
||||
# from ytdlp_utils import get_info_json, is_valid_json, extract_video_id
|
||||
|
||||
# Configure logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Default settings (similar to ytdlp_client_dag.py)
|
||||
MAX_RETRIES = 1
|
||||
RETRY_DELAY = timedelta(seconds=10)
|
||||
DEFAULT_TIMEOUT = 30
|
||||
|
||||
class YtdlpOpsOperator(BaseOperator):
|
||||
"""
|
||||
Custom Airflow operator to interact with YTDLP Thrift service. Handles direct connections
|
||||
and Redis-based discovery, retrieves tokens, saves info.json, and manages errors.
|
||||
"""
|
||||
template_fields = ('url', 'service_ip', 'service_port', 'account_id', 'timeout', 'info_json_dir')
|
||||
|
||||
@apply_defaults
|
||||
def __init__(self, url, redis_conn_id='redis_default', max_retries=3, retry_delay=10,
|
||||
service_ip=None, service_port=None, redis_enabled=False, account_id=None,
|
||||
save_info_json=True, info_json_dir=None, get_socks_proxy=True,
|
||||
store_socks_proxy=False, timeout=DEFAULT_TIMEOUT, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
logger.info(f"Initializing YtdlpOpsOperator with parameters: url={url}, "
|
||||
f"redis_conn_id={redis_conn_id}, max_retries={max_retries}, retry_delay={retry_delay}, "
|
||||
f"service_ip={service_ip}, service_port={service_port}, redis_enabled={redis_enabled}, "
|
||||
f"account_id={account_id}, save_info_json={save_info_json}, info_json_dir={info_json_dir}, "
|
||||
f"get_socks_proxy={get_socks_proxy}, store_socks_proxy={store_socks_proxy}, timeout={timeout}")
|
||||
|
||||
# Validate required parameters
|
||||
if not url:
|
||||
raise ValueError("url is required")
|
||||
|
||||
# Validate parameters based on connection mode
|
||||
if redis_enabled:
|
||||
if not account_id:
|
||||
raise ValueError("account_id is required when redis_enabled=True")
|
||||
# Use default Redis connection if not specified
|
||||
if not redis_conn_id:
|
||||
redis_conn_id = 'redis_default'
|
||||
logger.info(f"Using default Redis connection ID: {redis_conn_id}")
|
||||
else:
|
||||
if not service_ip or not service_port:
|
||||
raise ValueError("Both service_ip and service_port must be specified when redis_enabled=False")
|
||||
if not account_id:
|
||||
logger.warning("No account_id provided for direct connection mode. Using 'default'")
|
||||
account_id = 'default' # Assign default if missing in direct mode
|
||||
|
||||
self.url = url
|
||||
self.redis_conn_id = redis_conn_id
|
||||
self.max_retries = max_retries
|
||||
self.retry_delay = int(retry_delay.total_seconds() if isinstance(retry_delay, timedelta) else retry_delay)
|
||||
self.service_ip = service_ip
|
||||
self.service_port = service_port
|
||||
self.redis_enabled = redis_enabled
|
||||
self.account_id = account_id
|
||||
self.save_info_json = save_info_json
|
||||
self.info_json_dir = info_json_dir
|
||||
self.get_socks_proxy = get_socks_proxy
|
||||
self.store_socks_proxy = store_socks_proxy
|
||||
self.timeout = timeout
|
||||
|
||||
def execute(self, context):
|
||||
logger.info("Executing YtdlpOpsOperator")
|
||||
transport = None
|
||||
try:
|
||||
logger.info("Getting task parameters")
|
||||
params = context.get('params', {})
|
||||
redis_enabled = params.get('redis_enabled', self.redis_enabled)
|
||||
logger.info(f"Using redis_enabled={redis_enabled} (from {'task params' if 'redis_enabled' in params else 'operator init'})")
|
||||
|
||||
# Determine account_id to use (from params or operator default)
|
||||
account_id = context['params'].get('account_id', self.account_id)
|
||||
logger.info(f"Using account_id='{account_id}' (from {'task params' if 'account_id' in params else 'operator init'})")
|
||||
|
||||
if redis_enabled:
|
||||
# Get Redis connection with proper authentication and error handling
|
||||
redis_conn = BaseHook.get_connection(self.redis_conn_id)
|
||||
redis_client = redis.Redis(
|
||||
host=redis_conn.host,
|
||||
port=redis_conn.port,
|
||||
password=redis_conn.password,
|
||||
db=0,
|
||||
decode_responses=True # Important for consistent key handling
|
||||
)
|
||||
|
||||
# Test Redis connection
|
||||
try:
|
||||
if not redis_client.ping():
|
||||
raise redis.exceptions.ConnectionError("Redis ping failed")
|
||||
logger.info(f"Successfully connected to Redis at {redis_conn.host}:{redis_conn.port}")
|
||||
except redis.exceptions.AuthenticationError:
|
||||
logger.error(f"Redis authentication failed for connection '{self.redis_conn_id}'. Check password.")
|
||||
raise AirflowException("Redis authentication failed.")
|
||||
except redis.exceptions.ConnectionError as e:
|
||||
logger.error(f"Could not connect to Redis at {redis_conn.host}:{redis_conn.port}. Error: {e}")
|
||||
raise AirflowException(f"Redis connection failed: {e}")
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected Redis error: {str(e)}")
|
||||
raise AirflowException(f"Unexpected Redis error: {e}")
|
||||
|
||||
# Get service details from Redis with retries and proper key handling
|
||||
service_key = f"ytdlp:{account_id}"
|
||||
legacy_key = account_id # For backward compatibility
|
||||
|
||||
host = None
|
||||
port = None
|
||||
for attempt in range(self.max_retries):
|
||||
try:
|
||||
logger.info(f"Attempt {attempt + 1}/{self.max_retries}: Fetching service details from Redis for keys: '{service_key}', '{legacy_key}'")
|
||||
service_details = redis_client.hgetall(service_key)
|
||||
if not service_details:
|
||||
logger.warning(f"Key '{service_key}' not found, trying legacy key '{legacy_key}'")
|
||||
service_details = redis_client.hgetall(legacy_key)
|
||||
|
||||
if not service_details:
|
||||
raise ValueError(f"No service details found in Redis for keys: {service_key} or {legacy_key}")
|
||||
|
||||
# Find IP and port, handling potential case differences and byte/string types
|
||||
ip_key = next((k for k in service_details if k.lower() == 'ip'), None)
|
||||
port_key = next((k for k in service_details if k.lower() == 'port'), None)
|
||||
|
||||
if not ip_key: raise ValueError(f"'ip' key not found in Redis hash for {service_key}/{legacy_key}")
|
||||
if not port_key: raise ValueError(f"'port' key not found in Redis hash for {service_key}/{legacy_key}")
|
||||
|
||||
host = service_details[ip_key] # Already decoded due to decode_responses=True
|
||||
port_str = service_details[port_key]
|
||||
|
||||
try:
|
||||
port = int(port_str)
|
||||
except ValueError:
|
||||
raise ValueError(f"Invalid port value '{port_str}' found in Redis for {service_key}/{legacy_key}")
|
||||
|
||||
logger.info(f"Extracted from Redis - Service IP: {host}, Service Port: {port}")
|
||||
break # Success
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Attempt {attempt + 1} failed to get Redis details: {str(e)}")
|
||||
if attempt == self.max_retries - 1:
|
||||
logger.error("Max retries reached for fetching Redis details.")
|
||||
raise AirflowException(f"Failed to get service details from Redis after {self.max_retries} attempts: {e}")
|
||||
logger.info(f"Retrying in {self.retry_delay} seconds...")
|
||||
time.sleep(self.retry_delay)
|
||||
else:
|
||||
# Direct connection: Log parameter sources
|
||||
params = context.get('params', {})
|
||||
host = params.get('service_ip', self.service_ip)
|
||||
host_source = 'task params' if 'service_ip' in params else 'operator init'
|
||||
port_str = params.get('service_port', self.service_port)
|
||||
port_source = 'task params' if 'service_port' in params else 'operator init'
|
||||
url = params.get('url', self.url)
|
||||
url_source = 'task params' if 'url' in params else 'operator init'
|
||||
|
||||
logger.info(f"Using service_ip={host} (from {host_source})")
|
||||
logger.info(f"Using service_port={port_str} (from {port_source})")
|
||||
logger.info(f"Using url={url} (from {url_source})")
|
||||
|
||||
if not host or not port_str:
|
||||
raise ValueError("Direct connection requires service_ip and service_port")
|
||||
try:
|
||||
port = int(port_str)
|
||||
except ValueError:
|
||||
raise ValueError(f"Invalid service_port value: {port_str}")
|
||||
|
||||
logger.info(f"Connecting directly to Thrift service at {host}:{port} (Redis bypassed)")
|
||||
|
||||
# Render and validate timeout
|
||||
timeout_param = context.get('params', {}).get('timeout', self.timeout)
|
||||
if isinstance(self.timeout, str) and '{{' in self.timeout:
|
||||
timeout_rendered = self.render_template(self.timeout, context)
|
||||
logger.info(f"Rendered timeout template: '{self.timeout}' -> '{timeout_rendered}'")
|
||||
timeout_param = timeout_rendered
|
||||
try:
|
||||
timeout = int(timeout_param)
|
||||
if timeout <= 0: raise ValueError("Timeout must be positive")
|
||||
logger.info(f"Using timeout: {timeout} seconds")
|
||||
except (ValueError, TypeError):
|
||||
logger.warning(f"Invalid timeout value: '{timeout_param}'. Using default: {DEFAULT_TIMEOUT}")
|
||||
timeout = DEFAULT_TIMEOUT
|
||||
|
||||
# Create Thrift connection objects
|
||||
socket_conn = TSocket.TSocket(host, port, socket_family=socket.AF_INET) # Explicitly use AF_INET (IPv4)
|
||||
socket_conn.setTimeout(timeout * 1000) # Thrift timeout is in milliseconds
|
||||
transport = TTransport.TFramedTransport(socket_conn)
|
||||
protocol = TBinaryProtocol.TBinaryProtocol(transport)
|
||||
client = YTTokenOpService.Client(protocol)
|
||||
|
||||
logger.info(f"Attempting to connect to Thrift server at {host}:{port}...")
|
||||
try:
|
||||
transport.open()
|
||||
logger.info("Successfully connected to Thrift server.")
|
||||
|
||||
# Test connection with ping
|
||||
try:
|
||||
client.ping()
|
||||
logger.info("Server ping successful.")
|
||||
except Exception as e:
|
||||
logger.error(f"Server ping failed: {e}")
|
||||
raise AirflowException(f"Server connection test (ping) failed: {e}")
|
||||
|
||||
# Get token from service with specific error handling
|
||||
try:
|
||||
url_param = context.get('params', {}).get('url', self.url)
|
||||
logger.info(f"Requesting token for accountId='{account_id}', url='{url_param}'")
|
||||
token_data = client.getOrRefreshToken(
|
||||
accountId=account_id,
|
||||
updateType=TokenUpdateMode.AUTO,
|
||||
url=url_param
|
||||
)
|
||||
logger.info("Successfully retrieved token data from service.")
|
||||
except PBServiceException as e:
|
||||
logger.error(f"PBServiceException occurred: Code={getattr(e, 'errorCode', 'N/A')}, Message={getattr(e, 'message', 'N/A')}")
|
||||
error_code = getattr(e, 'errorCode', None)
|
||||
error_msg = f"YTDLP service error: {getattr(e, 'message', str(e))}"
|
||||
# Handle specific known error codes
|
||||
if error_code in [
|
||||
"SOCKS5_CONNECTION_FAILED", "SOCKS5_TIMEOUT",
|
||||
"SOCKS5_CONNECTION_REFUSED", "SOCKS5_CONNECTION_TIMEOUT",
|
||||
"SOCKS5_HOST_NOT_FOUND", "SOCKS5_NETWORK_UNREACHABLE"
|
||||
]:
|
||||
error_msg = f"SOCKS5 proxy error ({error_code}): {e.message}. Check proxy settings."
|
||||
elif error_code == "BOT_DETECTION":
|
||||
error_msg = f"Bot detection triggered ({error_code}): {e.message}."
|
||||
suggestions = getattr(e, 'context', {}).get('suggestions', [])
|
||||
if suggestions: error_msg += "\nSuggestions:\n" + "\n".join(f"- {s}" for s in suggestions)
|
||||
elif error_code == "NODEJS_SCRIPT_ERROR":
|
||||
error_msg = f"Node.js script error ({error_code}): {e.message}."
|
||||
elif error_code == "NODEJS_TIMEOUT":
|
||||
error_msg = f"Node.js timeout ({error_code}): {e.message}."
|
||||
# Add more specific error handling as needed
|
||||
raise AirflowException(error_msg)
|
||||
except TTransportException as e:
|
||||
logger.error(f"Thrift transport error during getOrRefreshToken: {e}")
|
||||
raise AirflowException(f"Transport error during API call: {e}")
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error during getOrRefreshToken: {e}")
|
||||
raise AirflowException(f"Unexpected error during API call: {e}")
|
||||
|
||||
except TTransportException as e:
|
||||
# Handle connection-specific transport errors
|
||||
if "read 0 bytes" in str(e) or "Could not connect to" in str(e) or "Connection refused" in str(e):
|
||||
logger.error(f"Connection failed to {host}:{port}. Details: {e}")
|
||||
logger.error("Possible causes: Server down, firewall block, incorrect IP/port.")
|
||||
raise AirflowException(f"Failed to connect to YTDLP service at {host}:{port}: {e}")
|
||||
else:
|
||||
logger.error(f"Thrift transport error during connection: {str(e)}")
|
||||
raise AirflowException(f"Transport error connecting to YTDLP service: {str(e)}")
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error during connection or ping: {str(e)}")
|
||||
raise # Re-raise other unexpected errors
|
||||
|
||||
# Log received token data attributes for debugging
|
||||
logger.debug(f"Token data received. Attributes: {dir(token_data)}")
|
||||
for attr in dir(token_data):
|
||||
if not attr.startswith('__') and not callable(getattr(token_data, attr)): # Log non-callable attributes
|
||||
value = getattr(token_data, attr)
|
||||
if attr == 'infoJson' and value:
|
||||
logger.debug(f"infoJson: {value[:50]}...")
|
||||
else:
|
||||
logger.debug(f"{attr}: {value}")
|
||||
|
||||
info_json_path = None # Initialize info_json_path
|
||||
|
||||
save_info_json_param = context['params'].get('save_info_json', self.save_info_json)
|
||||
# Render if it's a string template
|
||||
if isinstance(save_info_json_param, str):
|
||||
save_info_json_rendered = self.render_template(save_info_json_param, context)
|
||||
# Convert common string representations to boolean
|
||||
save_info_json = str(save_info_json_rendered).lower() in ['true', '1', 't', 'y', 'yes']
|
||||
else:
|
||||
save_info_json = bool(save_info_json_param)
|
||||
|
||||
|
||||
# Save info.json if requested and valid
|
||||
if self.save_info_json:
|
||||
info_json = self._get_info_json(token_data)
|
||||
if info_json and self._is_valid_json(info_json):
|
||||
try:
|
||||
# Use internal _save_info_json method which handles rendering, dir creation, logging
|
||||
info_json_path = self._save_info_json(context, info_json)
|
||||
if info_json_path: # Check if saving was successful
|
||||
context['task_instance'].xcom_push(key='info_json_path', value=info_json_path)
|
||||
logger.info(f"Successfully saved info.json and pushed path to XCom: {info_json_path}")
|
||||
else:
|
||||
# _save_info_json should log errors, push None to indicate failure
|
||||
context['task_instance'].xcom_push(key='info_json_path', value=None)
|
||||
logger.warning("info.json saving failed (check logs from _save_info_json), pushing None to XCom for info_json_path.")
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error during info.json saving process: {e}", exc_info=True)
|
||||
context['task_instance'].xcom_push(key='info_json_path', value=None) # Push None on error
|
||||
elif info_json:
|
||||
logger.warning("Retrieved infoJson is not valid JSON. Skipping save.")
|
||||
context['task_instance'].xcom_push(key='info_json_path', value=None)
|
||||
else:
|
||||
logger.info("No infoJson found in token data. Skipping save.")
|
||||
context['task_instance'].xcom_push(key='info_json_path', value=None)
|
||||
else:
|
||||
logger.info("save_info_json is False. Skipping info.json save.")
|
||||
context['task_instance'].xcom_push(key='info_json_path', value=None)
|
||||
|
||||
|
||||
# Extract and potentially store SOCKS proxy
|
||||
socks_proxy = None
|
||||
if self.get_socks_proxy: # Use instance attribute
|
||||
# Check for common attribute names for proxy
|
||||
proxy_attr = next((attr for attr in ['socks5Proxy', 'socksProxy', 'socks'] if hasattr(token_data, attr)), None)
|
||||
if proxy_attr:
|
||||
socks_proxy = getattr(token_data, proxy_attr)
|
||||
if socks_proxy: # Ensure proxy value is not empty
|
||||
logger.info(f"Extracted SOCKS proxy ({proxy_attr}): {socks_proxy}")
|
||||
if self.store_socks_proxy: # Use instance attribute
|
||||
context['task_instance'].xcom_push(key='socks_proxy', value=socks_proxy)
|
||||
logger.info(f"Pushed key 'socks_proxy' to XCom with value: {socks_proxy}")
|
||||
else:
|
||||
logger.info("SOCKS proxy extracted but not pushed to XCom (store_socks_proxy=False).")
|
||||
else:
|
||||
logger.info(f"Found proxy attribute '{proxy_attr}' but value is empty. No proxy extracted.")
|
||||
# Push None even if found but empty, if storing is enabled
|
||||
if self.store_socks_proxy: # Use instance attribute
|
||||
context['task_instance'].xcom_push(key='socks_proxy', value=None)
|
||||
logger.info("Pushed None to XCom for 'socks_proxy' as extracted value was empty.")
|
||||
else:
|
||||
logger.info("get_socks_proxy is True, but no SOCKS proxy attribute found in token data.")
|
||||
# Push None if storing is enabled but attribute not found
|
||||
if self.store_socks_proxy: # Use instance attribute
|
||||
context['task_instance'].xcom_push(key='socks_proxy', value=None)
|
||||
logger.info("Pushed None to XCom for 'socks_proxy' as attribute was not found.")
|
||||
else:
|
||||
logger.info("get_socks_proxy is False. Skipping proxy extraction.")
|
||||
# Push None if storing is enabled but extraction was skipped
|
||||
if self.store_socks_proxy: # Use instance attribute
|
||||
context['task_instance'].xcom_push(key='socks_proxy', value=None)
|
||||
logger.info("Pushed None to XCom for 'socks_proxy' as get_socks_proxy=False.")
|
||||
|
||||
|
||||
# Get the original command from the server
|
||||
ytdlp_cmd = getattr(token_data, 'ytdlpCommand', None)
|
||||
if not ytdlp_cmd:
|
||||
logger.error("No 'ytdlpCommand' attribute found in token data.")
|
||||
raise AirflowException("Required 'ytdlpCommand' not received from service.")
|
||||
|
||||
logger.info(f"Original command received from server: {ytdlp_cmd}")
|
||||
|
||||
# Log example usage command (DO NOT MODIFY the original command here)
|
||||
if info_json_path:
|
||||
# Use double quotes for paths/proxy in example for robustness
|
||||
example_cmd = f"yt-dlp --load-info-json \"{info_json_path}\""
|
||||
if socks_proxy:
|
||||
example_cmd += f" --proxy \"{socks_proxy}\""
|
||||
example_cmd += " --verbose --simulate" # Add useful flags for testing
|
||||
logger.info(f"\n--- Example usage with saved info.json ---")
|
||||
logger.info(example_cmd)
|
||||
logger.info(f"(Note: The actual command with tokens/cookies is pushed to XCom as 'ytdlp_command')")
|
||||
latest_json_path = os.path.join(os.path.dirname(info_json_path), 'latest.json')
|
||||
logger.info(f"(You can also use 'latest.json': {latest_json_path})")
|
||||
logger.info(f"-------------------------------------------\n")
|
||||
|
||||
else:
|
||||
logger.info("\n--- Original command pushed to XCom ('ytdlp_command') ---")
|
||||
if socks_proxy:
|
||||
logger.info(f"Use the extracted proxy '{socks_proxy}' (pushed to XCom if store_socks_proxy=True) with the --proxy flag.")
|
||||
logger.info("Add --verbose and --simulate flags for testing the command.")
|
||||
logger.info(f"-------------------------------------------------------\n")
|
||||
|
||||
|
||||
# Push the *original* command to XCom
|
||||
context['task_instance'].xcom_push(key='ytdlp_command', value=ytdlp_cmd)
|
||||
logger.info(f"Pushed original command to XCom key 'ytdlp_command'.")
|
||||
|
||||
# Note: Returning ytdlp_cmd below implicitly pushes the same value
|
||||
# to XCom under the key 'return_value'. Downstream tasks should
|
||||
# preferably use the explicitly pushed 'ytdlp_command' key for clarity.
|
||||
return ytdlp_cmd # Return the original command
|
||||
|
||||
except AirflowException as e: # Catch AirflowExceptions raised explicitly in the code above
|
||||
logger.error(f"Operation failed due to AirflowException: {e}")
|
||||
raise # Re-raise AirflowExceptions to ensure task failure
|
||||
except (TTransportException, PBServiceException) as e: # Catch specific Thrift/Service errors not already wrapped
|
||||
logger.error(f"Unhandled Thrift/Service error: {e}", exc_info=True) # Add traceback for context
|
||||
raise AirflowException(f"Unhandled YTDLP service error: {e}") # Wrap in AirflowException
|
||||
except Exception as e: # General catch-all for truly unexpected errors
|
||||
# Log with traceback for unexpected errors
|
||||
logger.error(f"Caught unexpected error in YtdlpOpsOperator: {e}", exc_info=True)
|
||||
# Ensure any unexpected error explicitly fails the task with AirflowException
|
||||
raise AirflowException(f"Unexpected error caused task failure: {e}")
|
||||
finally:
|
||||
if transport and transport.isOpen(): # Check if transport exists and is open before closing
|
||||
logger.info("Closing Thrift transport.")
|
||||
transport.close()
|
||||
|
||||
# --- Helper Methods ---
|
||||
|
||||
def _get_info_json(self, token_data):
|
||||
"""Safely extracts infoJson from token data."""
|
||||
info_json = getattr(token_data, 'infoJson', None)
|
||||
if info_json:
|
||||
logger.debug("Extracted infoJson from token data.")
|
||||
else:
|
||||
logger.debug("No infoJson attribute found in token data.")
|
||||
return info_json
|
||||
|
||||
def _is_valid_json(self, json_str):
|
||||
"""Checks if a string is valid JSON."""
|
||||
if not json_str or not isinstance(json_str, str):
|
||||
logger.debug("Input is not a non-empty string, considered invalid JSON.")
|
||||
return False
|
||||
try:
|
||||
json.loads(json_str)
|
||||
logger.debug("JSON string validation successful.")
|
||||
return True
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"JSON validation failed: {e}")
|
||||
return False
|
||||
|
||||
def _save_info_json(self, context, info_json):
|
||||
"""Saves info_json to a file, handling directory creation and logging. Returns the path on success, None on failure."""
|
||||
try:
|
||||
# Get URL from params/context for video ID extraction
|
||||
url_param = context.get('params', {}).get('url', self.url)
|
||||
video_id = self._extract_video_id(url_param) # Use internal helper
|
||||
|
||||
# Render the info_json_dir template
|
||||
save_dir_template = self.info_json_dir or "." # Default to current dir if template is None or empty string
|
||||
save_dir = self.render_template(save_dir_template, context)
|
||||
if not save_dir: # Handle case where template renders to empty string
|
||||
logger.warning(f"Rendered info_json_dir template '{save_dir_template}' resulted in an empty path. Defaulting to '.'")
|
||||
save_dir = "."
|
||||
logger.info(f"Target directory for info.json (rendered): {save_dir}")
|
||||
|
||||
# Ensure directory exists
|
||||
try:
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
logger.info(f"Ensured directory exists: {save_dir}")
|
||||
except OSError as e:
|
||||
logger.error(f"Could not create directory {save_dir}: {e}. Cannot save info.json.")
|
||||
return None # Indicate failure
|
||||
|
||||
# Construct filename (using potentially overridden account_id)
|
||||
account_id_param = context.get('params', {}).get('account_id', self.account_id)
|
||||
timestamp = int(time.time())
|
||||
base_filename = f"info_{video_id}_{account_id_param}_{timestamp}.json" if video_id else f"info_{account_id_param}_{timestamp}.json"
|
||||
info_json_path = os.path.join(save_dir, base_filename)
|
||||
latest_json_path = os.path.join(save_dir, "latest.json") # Path for the latest symlink/copy
|
||||
|
||||
# Write to timestamped file
|
||||
try:
|
||||
logger.info(f"Writing info.json content (received from service) to {info_json_path}...")
|
||||
with open(info_json_path, 'w', encoding='utf-8') as f:
|
||||
f.write(info_json)
|
||||
logger.info(f"Successfully saved info.json to timestamped file: {info_json_path}")
|
||||
except IOError as e:
|
||||
logger.error(f"Failed to write info.json to {info_json_path}: {e}")
|
||||
return None # Indicate failure
|
||||
|
||||
# Write to latest.json (overwrite) - best effort
|
||||
try:
|
||||
with open(latest_json_path, 'w', encoding='utf-8') as f:
|
||||
f.write(info_json)
|
||||
logger.info(f"Updated latest.json file: {latest_json_path}")
|
||||
except IOError as e:
|
||||
# Log warning but don't fail the whole save if only latest.json fails
|
||||
logger.warning(f"Failed to update latest.json at {latest_json_path}: {e}")
|
||||
|
||||
return info_json_path # Return path on success (even if latest.json failed)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error in _save_info_json: {e}", exc_info=True)
|
||||
return None # Indicate failure
|
||||
|
||||
def _extract_video_id(self, url):
|
||||
"""Extracts YouTube video ID from URL (internal helper)."""
|
||||
if not url or not isinstance(url, str):
|
||||
logger.debug("URL is empty or not a string, cannot extract video ID.")
|
||||
return None
|
||||
try:
|
||||
# Basic extraction logic (can be enhanced for more URL types)
|
||||
video_id = None
|
||||
if 'youtube.com/watch?v=' in url:
|
||||
video_id = url.split('v=')[1].split('&')[0]
|
||||
elif 'youtu.be/' in url:
|
||||
video_id = url.split('youtu.be/')[1].split('?')[0]
|
||||
|
||||
# Ensure it looks like a video ID (typically 11 chars, but can vary)
|
||||
if video_id and len(video_id) >= 11:
|
||||
video_id = video_id[:11] # Take first 11 chars as standard ID length
|
||||
logger.debug(f"Extracted video ID '{video_id}' from URL: {url}")
|
||||
return video_id
|
||||
else:
|
||||
logger.debug(f"Could not extract a standard video ID pattern from URL: {url}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to extract video ID from URL '{url}'. Error: {e}")
|
||||
return None
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Python Callables for Tasks
|
||||
# =============================================================================
|
||||
|
||||
def display_token_info(**context):
|
||||
"""Displays token info from XCom, parses info.json, and logs example commands."""
|
||||
ti = context['task_instance']
|
||||
logger.info("Starting display_token_info task.")
|
||||
|
||||
# Pull data from XCom (provide default values)
|
||||
info_json_path = ti.xcom_pull(task_ids='get_token', key='info_json_path')
|
||||
socks_proxy = ti.xcom_pull(task_ids='get_token', key='socks_proxy')
|
||||
ytdlp_command = ti.xcom_pull(task_ids='get_token', key='ytdlp_command')
|
||||
|
||||
logger.info("\n=== Pulled Token Information from XCom ===")
|
||||
logger.info(f"Info.json path: {info_json_path or 'Not found/Not saved'}")
|
||||
logger.info(f"SOCKS Proxy: {socks_proxy or 'Not found/Not extracted'}")
|
||||
logger.info(f"Original yt-dlp command (with tokens): {ytdlp_command or 'Not found'}")
|
||||
|
||||
result = {
|
||||
'info_path': info_json_path,
|
||||
'proxy': socks_proxy,
|
||||
'ytdlp_command': ytdlp_command,
|
||||
'video_info': None,
|
||||
'commands': {},
|
||||
'error': None
|
||||
}
|
||||
|
||||
if info_json_path and os.path.exists(info_json_path):
|
||||
logger.info(f"\n=== Processing Video Information from: {info_json_path} ===")
|
||||
try:
|
||||
with open(info_json_path, 'r', encoding='utf-8') as f:
|
||||
info = json.load(f)
|
||||
|
||||
# Extract and log basic video info safely
|
||||
title = info.get('title', 'Unknown Title')
|
||||
uploader = info.get('uploader', 'Unknown Author')
|
||||
duration = info.get('duration_string', 'Unknown Length')
|
||||
upload_date_str = info.get('upload_date') # Format: YYYYMMDD
|
||||
upload_date_formatted = 'Unknown Date'
|
||||
if upload_date_str:
|
||||
try:
|
||||
# Validate format before parsing
|
||||
if len(upload_date_str) == 8 and upload_date_str.isdigit():
|
||||
upload_date_formatted = datetime.strptime(upload_date_str, '%Y%m%d').strftime('%Y-%m-%d')
|
||||
else:
|
||||
logger.warning(f"Upload date '{upload_date_str}' is not in YYYYMMDD format.")
|
||||
except ValueError:
|
||||
logger.warning(f"Could not parse upload_date '{upload_date_str}'")
|
||||
|
||||
result['video_info'] = {
|
||||
'title': title,
|
||||
'uploader': uploader,
|
||||
'upload_date': upload_date_formatted, # Store formatted date
|
||||
'duration': duration
|
||||
}
|
||||
|
||||
logger.info(f"Title: {title}")
|
||||
logger.info(f"Author: {uploader}")
|
||||
logger.info(f"Date: {upload_date_formatted}")
|
||||
logger.info(f"Length: {duration}")
|
||||
|
||||
logger.info("\n=== Example yt-dlp Commands (using saved info.json) ===")
|
||||
base_cmd = f"yt-dlp --load-info-json \"{info_json_path}\""
|
||||
if socks_proxy:
|
||||
base_cmd += f" --proxy \"{socks_proxy}\""
|
||||
|
||||
# Command to list formats
|
||||
format_cmd = f"{base_cmd} -F"
|
||||
result['commands']['format'] = format_cmd
|
||||
logger.info(f"List formats command: {format_cmd}")
|
||||
|
||||
# Execute and log the format listing command
|
||||
logger.info("\n--- Executing Format List Command ---")
|
||||
try:
|
||||
# Use os.popen for simplicity, capture output
|
||||
logger.info(f"Running: {format_cmd}")
|
||||
format_output = os.popen(format_cmd).read()
|
||||
logger.info("--- Format List Output ---")
|
||||
logger.info(format_output)
|
||||
logger.info("--------------------------")
|
||||
except Exception as e:
|
||||
logger.error(f"Error executing format command: {e}")
|
||||
|
||||
# Command to simulate download
|
||||
simulate_cmd = f"{base_cmd} --simulate --verbose" # Add verbose for more info
|
||||
result['commands']['simulate'] = simulate_cmd
|
||||
logger.info(f"Simulate download command: {simulate_cmd}")
|
||||
|
||||
# Execute and log the simulation command
|
||||
logger.info("\n--- Executing Simulation Command ---")
|
||||
try:
|
||||
logger.info(f"Running: {simulate_cmd}")
|
||||
simulate_output = os.popen(simulate_cmd).read()
|
||||
logger.info("--- Simulation Output ---")
|
||||
logger.info(simulate_output)
|
||||
logger.info("-------------------------")
|
||||
except Exception as e:
|
||||
logger.error(f"Error executing simulation command: {e}")
|
||||
|
||||
# Basic download command
|
||||
download_cmd = base_cmd
|
||||
result['commands']['download_base'] = download_cmd
|
||||
logger.info(f"Base download command (add format selection, output path): {download_cmd}")
|
||||
|
||||
# Push generated example commands to XCom for potential downstream use
|
||||
# ti.xcom_push(key='format_cmd', value=format_cmd) # Removed as requested
|
||||
# ti.xcom_push(key='simulate_cmd', value=simulate_cmd) # Removed as requested
|
||||
ti.xcom_push(key='download_cmd', value=download_cmd)
|
||||
logger.info(f"Pushed key 'download_cmd' to XCom with value: {download_cmd}")
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
error_msg = f"Failed to parse info.json file '{info_json_path}': {e}"
|
||||
logger.error(error_msg)
|
||||
result['error'] = error_msg
|
||||
except FileNotFoundError:
|
||||
error_msg = f"Info.json file not found at path: {info_json_path}"
|
||||
logger.error(error_msg)
|
||||
result['error'] = error_msg
|
||||
except Exception as e:
|
||||
error_msg = f"Error processing info.json file '{info_json_path}': {str(e)}"
|
||||
logger.error(error_msg, exc_info=True)
|
||||
result['error'] = error_msg
|
||||
elif info_json_path:
|
||||
error_msg = f"Info.json path provided ('{info_json_path}') but file does not exist."
|
||||
logger.warning(error_msg)
|
||||
result['error'] = error_msg
|
||||
else:
|
||||
logger.warning("No info.json path found in XCom. Cannot display video details or generate example commands.")
|
||||
result['error'] = "Info.json path not available."
|
||||
|
||||
logger.info("Finished display_token_info task.")
|
||||
# Return the collected information (useful if used as a PythonOperator return value)
|
||||
return json.dumps(result) # Return as JSON string for XCom compatibility if needed
|
||||
|
||||
|
||||
def store_token_info(**context):
|
||||
"""Stores retrieved token information (command, proxy, info.json) in Redis."""
|
||||
ti = context['task_instance']
|
||||
# Use the redis_conn_id defined in the operator/DAG params if possible, else default
|
||||
redis_conn_id = context['params'].get('redis_conn_id', 'redis_default')
|
||||
redis_hook = RedisHook(redis_conn_id=redis_conn_id)
|
||||
logger.info(f"Starting store_token_info task using Redis connection '{redis_conn_id}'.")
|
||||
|
||||
try:
|
||||
# Pull necessary data from XCom and context
|
||||
url = context['params'].get('url')
|
||||
if not url:
|
||||
# Attempt to get URL from DAG run conf as fallback
|
||||
url = context.get('dag_run', {}).conf.get('url')
|
||||
if not url:
|
||||
raise ValueError("URL parameter is missing in context['params'] and dag_run.conf")
|
||||
logger.warning("URL parameter missing in context['params'], using URL from dag_run.conf.")
|
||||
|
||||
|
||||
ytdlp_command = ti.xcom_pull(task_ids='get_token', key='ytdlp_command')
|
||||
socks_proxy = ti.xcom_pull(task_ids='get_token', key='socks_proxy') or '' # Default to empty string if None
|
||||
info_json_path = ti.xcom_pull(task_ids='get_token', key='info_json_path')
|
||||
|
||||
if not ytdlp_command:
|
||||
logger.warning("ytdlp_command not found in XCom. Storing empty value.")
|
||||
ytdlp_command = '' # Store empty if not found
|
||||
|
||||
# Construct the base command using info.json
|
||||
ytdlp_command_base = ''
|
||||
if info_json_path and os.path.exists(info_json_path):
|
||||
ytdlp_command_base = f"yt-dlp --load-info-json \"{info_json_path}\""
|
||||
logger.info(f"Constructed base command: {ytdlp_command_base}")
|
||||
else:
|
||||
logger.warning("Cannot construct base command: info_json_path not valid.")
|
||||
|
||||
# Construct the command with tokens and proxy
|
||||
ytdlp_command_tokens = ytdlp_command # Start with original command from server
|
||||
if socks_proxy:
|
||||
ytdlp_command_tokens += f" --proxy \"{socks_proxy}\""
|
||||
logger.info("Appended proxy to token command.")
|
||||
|
||||
data_to_store = {
|
||||
'url': url,
|
||||
'ytdlp_command': ytdlp_command_base, # Store the base command
|
||||
'proxy': socks_proxy,
|
||||
'info_json_path': info_json_path or '' # Store path even if None/empty
|
||||
# 'info_json' will be added below
|
||||
}
|
||||
|
||||
# Read info.json content if path exists
|
||||
info_json_content = None
|
||||
if info_json_path and os.path.exists(info_json_path):
|
||||
try:
|
||||
with open(info_json_path, 'r', encoding='utf-8') as f:
|
||||
# Read and immediately validate JSON structure before storing
|
||||
info_json_content = json.load(f)
|
||||
# Store the validated JSON as a string
|
||||
data_to_store['info_json'] = json.dumps(info_json_content)
|
||||
logger.info(f"Read and validated info.json content from: {info_json_path}")
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Failed to parse info.json file '{info_json_path}' as JSON: {e}. Storing empty content.")
|
||||
data_to_store['info_json'] = '' # Store empty string on parse error
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to read info.json file '{info_json_path}': {e}. Storing empty content.")
|
||||
data_to_store['info_json'] = '' # Store empty string on other read errors
|
||||
else:
|
||||
logger.warning(f"info_json_path ('{info_json_path}') not found or invalid. Storing without info_json content.")
|
||||
data_to_store['info_json'] = '' # Store empty string if no path
|
||||
|
||||
# Determine Redis key using video ID
|
||||
# Use the same helper method as the operator for consistency
|
||||
# Need an instance or static method call. Let's make _extract_video_id static temporarily
|
||||
# Or instantiate the operator just for this - less ideal.
|
||||
# Simplest: Re-implement or assume utils.
|
||||
# Re-implementing basic logic here for simplicity:
|
||||
video_id = None
|
||||
try:
|
||||
if 'youtube.com/watch?v=' in url:
|
||||
video_id = url.split('v=')[1].split('&')[0][:11]
|
||||
elif 'youtu.be/' in url:
|
||||
video_id = url.split('youtu.be/')[1].split('?')[0][:11]
|
||||
except Exception:
|
||||
pass # Ignore errors in ID extraction for key generation
|
||||
redis_key = f"token_info:{video_id or 'unknown'}"
|
||||
logger.info(f"Determined Redis key: {redis_key}")
|
||||
|
||||
# Store data in Redis hash
|
||||
# Log presence/absence rather than full content for potentially large fields
|
||||
logger.info(f"Data to store in Redis key '{redis_key}': "
|
||||
f"URL='{data_to_store['url']}', "
|
||||
f"Command={'<present>' if data_to_store['ytdlp_command'] else '<empty>'}, "
|
||||
f"Proxy='{data_to_store['proxy'] or '<empty>'}', "
|
||||
f"Path='{data_to_store['info_json_path'] or '<empty>'}', "
|
||||
f"JSON Content={'<present>' if data_to_store.get('info_json') else '<empty>'}")
|
||||
|
||||
with redis_hook.get_conn() as redis_client:
|
||||
# Extract video ID from URL
|
||||
video_id = None
|
||||
try:
|
||||
if 'youtube.com/watch?v=' in url:
|
||||
video_id = url.split('v=')[1].split('&')[0][:11]
|
||||
elif 'youtu.be/' in url:
|
||||
video_id = url.split('youtu.be/')[1].split('?')[0][:11]
|
||||
except Exception:
|
||||
pass # Ignore errors in ID extraction for key generation
|
||||
|
||||
# Use video ID as part of the Redis key
|
||||
redis_key = f"token_info:{video_id or 'unknown'}"
|
||||
logger.info(f"Determined Redis key: {redis_key}")
|
||||
|
||||
# Store data in Redis hash
|
||||
# Add video_id, timestamp, and the constructed ytdlp_command_tokens
|
||||
data_to_store['video_id'] = video_id or 'unknown'
|
||||
data_to_store['timestamp'] = int(time.time())
|
||||
data_to_store['ytdlp_command_tokens'] = ytdlp_command_tokens # Store the original token command
|
||||
|
||||
# Log fields being stored
|
||||
log_data = {k: (f"<{len(v)} bytes>" if isinstance(v, str) and len(v) > 100 else v) for k, v in data_to_store.items()}
|
||||
logger.info(f"Storing in Redis key '{redis_key}': {log_data}")
|
||||
|
||||
redis_client.hset(redis_key, mapping=data_to_store)
|
||||
# Set expiration (e.g., 24 hours = 86400 seconds)
|
||||
redis_client.expire(redis_key, 86400)
|
||||
logger.info(f"Successfully stored token info in Redis key '{redis_key}' with 24h expiration.")
|
||||
# Log the final stored data again for clarity
|
||||
final_log_data = {k: (f"<{len(v)} bytes>" if isinstance(v, str) and len(v) > 100 else v) for k, v in data_to_store.items()}
|
||||
logger.info(f"--- Final Data Stored in Redis Key '{redis_key}' ---")
|
||||
logger.info(final_log_data)
|
||||
logger.info("----------------------------------------------------")
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to store token info in Redis: {e}", exc_info=True)
|
||||
# Re-raise as AirflowException to fail the task
|
||||
raise AirflowException(f"Failed to store token info in Redis: {e}")
|
||||
|
||||
logger.info("Finished store_token_info task.")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# DAG Definition
|
||||
# =============================================================================
|
||||
|
||||
# Update default_args to match ytdlp_client_dag.py structure
|
||||
default_args = {
|
||||
'owner': 'airflow',
|
||||
'depends_on_past': False,
|
||||
'email_on_failure': False, # Match reference DAG
|
||||
'email_on_retry': False, # Match reference DAG
|
||||
'retries': 1, # Default task retries
|
||||
'retry_delay': timedelta(minutes=5), # Standard task retry delay
|
||||
'start_date': days_ago(1) # Best practice start date
|
||||
}
|
||||
|
||||
# Update DAG definition
|
||||
with DAG(
|
||||
dag_id='ytdlp_client_dag_v2.1',
|
||||
default_args=default_args,
|
||||
schedule_interval=None, # Manually triggered DAG
|
||||
catchup=False, # Don't run for past missed schedules
|
||||
description='DAG for YTDLP operations using Thrift client (V2 - Refactored)', # Updated description
|
||||
tags=['ytdlp', 'thrift', 'client', 'v2'], # Updated tags for better filtering
|
||||
params={
|
||||
# Define DAG parameters with defaults and types for UI clarity
|
||||
'url': Param('https://www.youtube.com/watch?v=sOlTX9uxUtM', type=["null", "string"], description="Required: The video URL to process."), # Default URL
|
||||
'redis_enabled': Param(False, type="boolean", description="Use Redis for service discovery? If False, uses service_ip/port."), # Default to direct connection
|
||||
'service_ip': Param('85.192.30.55', type="string", description="Service IP if redis_enabled=False."), # Default service IP
|
||||
'service_port': Param(9090, type="integer", description="Service port if redis_enabled=False."), # Default service port
|
||||
'account_id': Param('account_fr_2025-04-03T1220_anonomyous_2ssdfsf2342afga09', type="string", description="Account ID for Redis lookup or direct call."), # Updated default account_id
|
||||
'timeout': Param(DEFAULT_TIMEOUT, type="integer", description="Timeout in seconds for the Thrift connection."),
|
||||
# Use Airflow Variable for downloads directory, matching reference DAG structure
|
||||
'info_json_dir': Param("{{ var.value.get('DOWNLOADS_TEMP', '/opt/airflow/downloadfiles') }}", type="string", description="Directory to save info.json. Uses Airflow Variable 'DOWNLOADS_TEMP' or default.")
|
||||
}
|
||||
) as dag:
|
||||
|
||||
# Define Tasks
|
||||
|
||||
get_token = YtdlpOpsOperator(
|
||||
task_id='get_token',
|
||||
# Pass templated parameters from DAG run config
|
||||
url="{{ params.url }}",
|
||||
redis_enabled="{{ params.redis_enabled }}",
|
||||
service_ip="{{ params.service_ip }}",
|
||||
service_port="{{ params.service_port }}",
|
||||
account_id="{{ params.account_id }}",
|
||||
save_info_json=True,
|
||||
info_json_dir="{{ params.info_json_dir }}",
|
||||
get_socks_proxy=True,
|
||||
store_socks_proxy=True,
|
||||
timeout="{{ params.timeout }}",
|
||||
retries=MAX_RETRIES, # Operator-specific retries if needed, else use DAG default
|
||||
retry_delay=RETRY_DELAY, # Operator-specific delay if needed
|
||||
# Add callbacks for logging success/failure, similar to reference DAG
|
||||
on_failure_callback=lambda context: logger.error(f"Task {context['task_instance_key_str']} failed."),
|
||||
on_success_callback=lambda context: logger.info(f"Task {context['task_instance_key_str']} succeeded.")
|
||||
)
|
||||
# Add task documentation (visible in Airflow UI)
|
||||
get_token.doc_md = """
|
||||
### Get Token Task
|
||||
Connects to the YTDLP Thrift service (either directly or via Redis discovery)
|
||||
to retrieve an authentication token and video metadata (info.json).
|
||||
|
||||
**Pushes to XCom:**
|
||||
- `info_json_path`: Path to the saved info.json file (or None if not saved/failed).
|
||||
- `socks_proxy`: The extracted SOCKS proxy string (or None if not requested/found).
|
||||
- `ytdlp_command`: The original command string received from the server (contains tokens/cookies).
|
||||
|
||||
- Uses parameters defined in the DAG run configuration.
|
||||
"""
|
||||
|
||||
# Optional: Add a task to explicitly check XComs for debugging (like in reference DAG)
|
||||
def _check_xcom_callable(**context):
|
||||
"""Logs XCom values pushed by the get_token task."""
|
||||
ti = context['task_instance']
|
||||
logger.info("--- Checking XCom values pushed by get_token ---")
|
||||
keys_to_check = ['info_json_path', 'socks_proxy', 'ytdlp_command']
|
||||
xcom_values = {}
|
||||
for key in keys_to_check:
|
||||
value = ti.xcom_pull(task_ids='get_token', key=key)
|
||||
xcom_values[key] = value
|
||||
# Avoid logging potentially sensitive command details fully in production
|
||||
if key == 'ytdlp_command' and value:
|
||||
log_value = f"{value[:50]}..." # Log truncated command
|
||||
else:
|
||||
log_value = value
|
||||
logger.info(f"XCom key='{key}': {log_value}")
|
||||
logger.info("----------------------------------------------")
|
||||
return xcom_values # Return values for potential future use
|
||||
|
||||
check_xcom_task = PythonOperator(
|
||||
task_id='check_xcom_after_get_token',
|
||||
python_callable=_check_xcom_callable,
|
||||
)
|
||||
check_xcom_task.doc_md = "Logs the values pushed to XCom by the 'get_token' task for debugging purposes."
|
||||
|
||||
display_info = PythonOperator(
|
||||
task_id='display_token_info',
|
||||
python_callable=display_token_info,
|
||||
trigger_rule='all_success'
|
||||
)
|
||||
display_info.doc_md = """
|
||||
### Display Token Info Task
|
||||
Pulls information from XCom, parses the `info.json` file (if available),
|
||||
logs video details, and generates example `yt-dlp` commands.
|
||||
|
||||
**Pulls from XCom (task_id='get_token'):**
|
||||
- `info_json_path`
|
||||
- `socks_proxy`
|
||||
- `ytdlp_command`
|
||||
|
||||
**Pushes to XCom:**
|
||||
- `download_cmd`: Base command using `--load-info-json` (user needs to add format/output).
|
||||
"""
|
||||
|
||||
store_info = PythonOperator(
|
||||
task_id='store_token_info', # Use consistent task ID naming
|
||||
python_callable=store_token_info,
|
||||
)
|
||||
store_info.doc_md = """
|
||||
### Store Token Info Task
|
||||
Pulls information from XCom and DAG parameters, reads the `info.json` content,
|
||||
and stores relevant data in a Redis hash.
|
||||
|
||||
**Pulls from XCom (task_id='get_token'):**
|
||||
- `ytdlp_command`
|
||||
- `socks_proxy`
|
||||
- `info_json_path`
|
||||
|
||||
**Pulls from DAG context:**
|
||||
- `params['url']` (or `dag_run.conf['url']`)
|
||||
|
||||
**Stores in Redis Hash (key: `token_info:<video_id>`):**
|
||||
- `url`: The video URL.
|
||||
- `ytdlp_command`: Base command using `--load-info-json`.
|
||||
- `proxy`: The SOCKS proxy string.
|
||||
- `info_json_path`: Path to the saved info.json file.
|
||||
- `info_json`: The full content of the info.json file (as a JSON string).
|
||||
- `video_id`: Extracted video ID.
|
||||
- `timestamp`: Unix timestamp of storage.
|
||||
- `ytdlp_command_tokens`: The original command string from the server (contains tokens/cookies).
|
||||
|
||||
Sets a 24-hour expiration on the Redis key.
|
||||
"""
|
||||
|
||||
# Define task dependencies matching the reference DAG structure
|
||||
get_token >> check_xcom_task >> display_info >> store_info
|
||||
@ -1,179 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# vim:fenc=utf-8
|
||||
#
|
||||
# Copyright © 2024 rl <rl@rlmbp>
|
||||
#
|
||||
# Distributed under terms of the MIT license.
|
||||
|
||||
"""
|
||||
Airflow DAG for manually checking the status (type and size) of a specific Redis key used by YTDLP queues.
|
||||
"""
|
||||
|
||||
from airflow import DAG
|
||||
from airflow.exceptions import AirflowException
|
||||
from airflow.models.param import Param
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.providers.redis.hooks.redis import RedisHook
|
||||
from airflow.utils.dates import days_ago
|
||||
from datetime import datetime, timedelta, timezone
|
||||
import logging
|
||||
import json
|
||||
import redis # Import redis exceptions if needed
|
||||
|
||||
# Configure logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Default settings
|
||||
DEFAULT_REDIS_CONN_ID = 'redis_default'
|
||||
DEFAULT_QUEUE_BASE_NAME = 'video_queue'
|
||||
DEFAULT_MAX_ITEMS_TO_LIST = 25
|
||||
|
||||
# Import utility functions
|
||||
from utils.redis_utils import _get_redis_client
|
||||
|
||||
# --- Python Callable for Check and List Task ---
|
||||
|
||||
def check_and_list_queue_callable(**context):
|
||||
"""Checks the type and size of a Redis key and lists its recent contents."""
|
||||
params = context['params']
|
||||
redis_conn_id = params['redis_conn_id']
|
||||
# queue_suffix is passed from the PythonOperator's op_kwargs, which are available in the context
|
||||
queue_suffix = context['queue_suffix']
|
||||
queue_name = params.get('queue_name', DEFAULT_QUEUE_BASE_NAME)
|
||||
queue_to_check = f"{queue_name}{queue_suffix}"
|
||||
max_items = int(params.get('max_items_to_list', DEFAULT_MAX_ITEMS_TO_LIST))
|
||||
|
||||
logger.info(f"--- Checking Status and Contents of Redis Key: '{queue_to_check}' ---")
|
||||
logger.info(f"Using connection '{redis_conn_id}', listing up to {max_items} items.")
|
||||
|
||||
try:
|
||||
redis_client = _get_redis_client(redis_conn_id)
|
||||
key_type_bytes = redis_client.type(queue_to_check)
|
||||
key_type = key_type_bytes.decode('utf-8')
|
||||
|
||||
if key_type == 'list':
|
||||
list_length = redis_client.llen(queue_to_check)
|
||||
logger.info(f"Redis key '{queue_to_check}' is a LIST with {list_length} items.")
|
||||
if list_length > 0:
|
||||
items_to_fetch = min(max_items, list_length)
|
||||
# lrange with negative indices gets items from the end (most recent for rpush)
|
||||
contents_bytes = redis_client.lrange(queue_to_check, -items_to_fetch, -1)
|
||||
contents = [item.decode('utf-8') for item in contents_bytes]
|
||||
contents.reverse() # Show most recent first
|
||||
logger.info(f"--- Showing most recent {len(contents)} of {list_length} items ---")
|
||||
for i, item in enumerate(contents):
|
||||
logger.info(f" [recent_{i}]: {item}")
|
||||
if list_length > len(contents):
|
||||
logger.info(f" ... ({list_length - len(contents)} older items not shown)")
|
||||
logger.info(f"--- End of List Contents ---")
|
||||
|
||||
elif key_type == 'hash':
|
||||
hash_size = redis_client.hlen(queue_to_check)
|
||||
logger.info(f"Redis key '{queue_to_check}' is a HASH with {hash_size} fields.")
|
||||
if hash_size > 0:
|
||||
logger.info(f"--- Showing a sample of up to {max_items} fields ---")
|
||||
item_count = 0
|
||||
# Using hscan_iter to safely iterate over hash fields, count is a hint
|
||||
for field_bytes, value_bytes in redis_client.hscan_iter(queue_to_check, count=max_items):
|
||||
if item_count >= max_items:
|
||||
logger.info(f" ... (stopped listing after {max_items} items of {hash_size})")
|
||||
break
|
||||
field = field_bytes.decode('utf-8')
|
||||
value = value_bytes.decode('utf-8')
|
||||
# Try to pretty-print if value is JSON
|
||||
try:
|
||||
parsed_value = json.loads(value)
|
||||
# Check for timestamp to show age
|
||||
timestamp = parsed_value.get('end_time') or parsed_value.get('start_time')
|
||||
age_str = ""
|
||||
if timestamp:
|
||||
age_seconds = (datetime.now(timezone.utc) - datetime.fromtimestamp(timestamp, timezone.utc)).total_seconds()
|
||||
age_str = f" (age: {timedelta(seconds=age_seconds)})"
|
||||
|
||||
pretty_value = json.dumps(parsed_value, indent=2)
|
||||
logger.info(f" Field '{field}'{age_str}:\n{pretty_value}")
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
logger.info(f" Field '{field}': {value}")
|
||||
item_count += 1
|
||||
logger.info(f"--- End of Hash Contents ---")
|
||||
|
||||
elif key_type == 'none':
|
||||
logger.info(f"Redis key '{queue_to_check}' does not exist.")
|
||||
else:
|
||||
logger.info(f"Redis key '{queue_to_check}' is of type '{key_type}'. Listing contents for this type is not implemented.")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to check/list contents of Redis key '{queue_to_check}': {e}", exc_info=True)
|
||||
raise AirflowException(f"Failed to process Redis key: {e}")
|
||||
|
||||
# --- DAG Definition ---
|
||||
default_args = {
|
||||
'owner': 'airflow',
|
||||
'depends_on_past': False,
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 0, # No retries for a manual check/list operation
|
||||
'start_date': days_ago(1)
|
||||
}
|
||||
|
||||
with DAG(
|
||||
dag_id='ytdlp_mgmt_queues_check_status',
|
||||
default_args=default_args,
|
||||
schedule_interval=None, # Manually triggered
|
||||
catchup=False,
|
||||
description='Manually check the status and recent items of all YTDLP Redis queues for a given base name.',
|
||||
tags=['ytdlp', 'queue', 'management', 'redis', 'manual', 'status', 'list'],
|
||||
params={
|
||||
'redis_conn_id': Param(DEFAULT_REDIS_CONN_ID, type="string", description="Airflow Redis connection ID."),
|
||||
'queue_name': Param(
|
||||
DEFAULT_QUEUE_BASE_NAME,
|
||||
type="string",
|
||||
description="Base name for the Redis queues (e.g., 'video_queue')."
|
||||
),
|
||||
'max_items_to_list': Param(DEFAULT_MAX_ITEMS_TO_LIST, type="integer", description="Maximum number of recent items/fields to list from each queue."),
|
||||
}
|
||||
) as dag:
|
||||
|
||||
check_inbox_queue = PythonOperator(
|
||||
task_id='check_inbox_queue',
|
||||
python_callable=check_and_list_queue_callable,
|
||||
op_kwargs={'queue_suffix': '_inbox'},
|
||||
)
|
||||
check_inbox_queue.doc_md = """
|
||||
### Check Inbox Queue (`_inbox`)
|
||||
Checks the status and lists the most recent URLs waiting to be processed.
|
||||
The full queue name is `{{ params.queue_name }}_inbox`.
|
||||
"""
|
||||
|
||||
check_progress_queue = PythonOperator(
|
||||
task_id='check_progress_queue',
|
||||
python_callable=check_and_list_queue_callable,
|
||||
op_kwargs={'queue_suffix': '_progress'},
|
||||
)
|
||||
check_progress_queue.doc_md = """
|
||||
### Check Progress Queue (`_progress`)
|
||||
Checks the status and lists a sample of URLs currently being processed.
|
||||
The full queue name is `{{ params.queue_name }}_progress`.
|
||||
"""
|
||||
|
||||
check_result_queue = PythonOperator(
|
||||
task_id='check_result_queue',
|
||||
python_callable=check_and_list_queue_callable,
|
||||
op_kwargs={'queue_suffix': '_result'},
|
||||
)
|
||||
check_result_queue.doc_md = """
|
||||
### Check Result Queue (`_result`)
|
||||
Checks the status and lists a sample of successfully processed URLs.
|
||||
The full queue name is `{{ params.queue_name }}_result`.
|
||||
"""
|
||||
|
||||
check_fail_queue = PythonOperator(
|
||||
task_id='check_fail_queue',
|
||||
python_callable=check_and_list_queue_callable,
|
||||
op_kwargs={'queue_suffix': '_fail'},
|
||||
)
|
||||
check_fail_queue.doc_md = """
|
||||
### Check Fail Queue (`_fail`)
|
||||
Checks the status and lists a sample of failed URLs.
|
||||
The full queue name is `{{ params.queue_name }}_fail`.
|
||||
"""
|
||||
@ -1,343 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# vim:fenc=utf-8
|
||||
#
|
||||
# Copyright © 2024 rl <rl@rlmbp>
|
||||
#
|
||||
# Distributed under terms of the MIT license.
|
||||
|
||||
"""
|
||||
DAG for processing a single YouTube URL passed via DAG run configuration.
|
||||
This is the "Worker" part of a Sensor/Worker pattern.
|
||||
This DAG has been refactored to use the TaskFlow API to implement worker affinity,
|
||||
ensuring all tasks for a single URL run on the same machine.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from airflow.decorators import task, task_group
|
||||
from airflow.exceptions import AirflowException, AirflowSkipException
|
||||
from airflow.models import Variable
|
||||
from airflow.models.dag import DAG
|
||||
from airflow.models.param import Param
|
||||
from airflow.models.xcom_arg import XComArg
|
||||
from airflow.operators.dummy import DummyOperator
|
||||
from airflow.operators.bash import BashOperator
|
||||
from airflow.utils.dates import days_ago
|
||||
from airflow.api.common.trigger_dag import trigger_dag
|
||||
from datetime import timedelta, datetime
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import socket
|
||||
import time
|
||||
import traceback
|
||||
import uuid
|
||||
import subprocess
|
||||
import shlex
|
||||
|
||||
# Import utility functions and Thrift modules
|
||||
from utils.redis_utils import _get_redis_client
|
||||
|
||||
# Handle potential import issues with Thrift modules
|
||||
try:
|
||||
from pangramia.yt.common.ttypes import TokenUpdateMode
|
||||
except ImportError as e:
|
||||
logging.warning(f"Could not import TokenUpdateMode from pangramia.yt.common.ttypes: {e}")
|
||||
TokenUpdateMode = None
|
||||
|
||||
try:
|
||||
from pangramia.yt.exceptions.ttypes import PBServiceException, PBUserException
|
||||
except ImportError as e:
|
||||
logging.warning(f"Could not import PBServiceException/PBUserException from pangramia.yt.exceptions.ttypes: {e}")
|
||||
PBServiceException = Exception
|
||||
PBUserException = Exception
|
||||
|
||||
try:
|
||||
from pangramia.yt.tokens_ops import YTTokenOpService
|
||||
except ImportError as e:
|
||||
logging.warning(f"Could not import YTTokenOpService from pangramia.yt.tokens_ops: {e}")
|
||||
YTTokenOpService = None
|
||||
|
||||
try:
|
||||
from thrift.protocol import TBinaryProtocol
|
||||
from thrift.transport import TSocket, TTransport
|
||||
from thrift.transport.TTransport import TTransportException
|
||||
except ImportError as e:
|
||||
logging.warning(f"Could not import thrift modules: {e}")
|
||||
TBinaryProtocol = None
|
||||
TSocket = None
|
||||
TTransport = None
|
||||
TTransportException = Exception
|
||||
|
||||
# Configure logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Default settings from Airflow Variables or hardcoded fallbacks
|
||||
DEFAULT_QUEUE_NAME = 'video_queue'
|
||||
DEFAULT_REDIS_CONN_ID = 'redis_default'
|
||||
DEFAULT_TIMEOUT = 3600
|
||||
DEFAULT_YT_AUTH_SERVICE_IP = Variable.get("YT_AUTH_SERVICE_IP", default_var="172.17.0.1")
|
||||
DEFAULT_YT_AUTH_SERVICE_PORT = Variable.get("YT_AUTH_SERVICE_PORT", default_var=9080)
|
||||
|
||||
# The queue is set to a fallback here. The actual worker-specific queue is
|
||||
# assigned just-in-time by the task_instance_mutation_hook in airflow_local_settings.py,
|
||||
# which reads the 'worker_queue' from the DAG run configuration.
|
||||
DEFAULT_ARGS = {
|
||||
'owner': 'airflow',
|
||||
'retries': 0,
|
||||
'queue': 'queue-dl', # Fallback queue. Will be overridden by the policy hook.
|
||||
}
|
||||
|
||||
|
||||
# --- Helper Functions ---
|
||||
|
||||
def _get_thrift_client(host, port, timeout):
|
||||
"""Helper to create and connect a Thrift client."""
|
||||
if not TSocket or not TTransport or not TBinaryProtocol:
|
||||
raise AirflowException("Required Thrift modules are not available")
|
||||
|
||||
transport = TSocket.TSocket(host, port)
|
||||
transport.setTimeout(timeout * 1000)
|
||||
transport = TTransport.TFramedTransport(transport)
|
||||
protocol = TBinaryProtocol.TBinaryProtocolFactory()
|
||||
client = YTTokenOpService.Client(protocol) if YTTokenOpService else None
|
||||
if client:
|
||||
transport.open()
|
||||
logger.info(f"Connected to Thrift server at {host}:{port}")
|
||||
return client, transport
|
||||
|
||||
def _extract_video_id(url):
|
||||
"""Extracts YouTube video ID from URL."""
|
||||
if not url or not isinstance(url, str):
|
||||
return None
|
||||
patterns = [r'v=([a-zA-Z0-9_-]{11})', r'youtu\.be/([a-zA-Z0-9_-]{11})']
|
||||
for pattern in patterns:
|
||||
match = re.search(pattern, url)
|
||||
if match:
|
||||
return match.group(1)
|
||||
return None
|
||||
|
||||
def _get_account_pool(params: dict) -> list:
|
||||
"""
|
||||
Gets the list of accounts to use for processing, filtering out banned/resting accounts.
|
||||
Supports explicit list, prefix-based generation, and single account modes.
|
||||
"""
|
||||
account_pool_str = params.get('account_pool', 'default_account')
|
||||
accounts = []
|
||||
is_prefix_mode = False
|
||||
|
||||
if ',' in account_pool_str:
|
||||
accounts = [acc.strip() for acc in account_pool_str.split(',') if acc.strip()]
|
||||
else:
|
||||
prefix = account_pool_str
|
||||
pool_size_param = params.get('account_pool_size')
|
||||
if pool_size_param is not None:
|
||||
is_prefix_mode = True
|
||||
pool_size = int(pool_size_param)
|
||||
accounts = [f"{prefix}_{i:02d}" for i in range(1, pool_size + 1)]
|
||||
else:
|
||||
accounts = [prefix]
|
||||
|
||||
if not accounts:
|
||||
raise AirflowException("Initial account pool is empty.")
|
||||
|
||||
redis_conn_id = params.get('redis_conn_id', DEFAULT_REDIS_CONN_ID)
|
||||
try:
|
||||
redis_client = _get_redis_client(redis_conn_id)
|
||||
active_accounts = []
|
||||
for account in accounts:
|
||||
status_bytes = redis_client.hget(f"account_status:{account}", "status")
|
||||
status = status_bytes.decode('utf-8') if status_bytes else "ACTIVE"
|
||||
if status not in ['BANNED'] and 'RESTING' not in status:
|
||||
active_accounts.append(account)
|
||||
|
||||
if not active_accounts and accounts:
|
||||
auto_create = params.get('auto_create_new_accounts_on_exhaustion', False)
|
||||
if auto_create and is_prefix_mode:
|
||||
new_account_id = f"{account_pool_str}-auto-{str(uuid.uuid4())[:8]}"
|
||||
logger.warning(f"Account pool exhausted. Auto-creating new account: '{new_account_id}'")
|
||||
active_accounts.append(new_account_id)
|
||||
else:
|
||||
raise AirflowException("All accounts in the configured pool are currently exhausted.")
|
||||
accounts = active_accounts
|
||||
except Exception as e:
|
||||
logger.error(f"Could not filter accounts from Redis. Using unfiltered pool. Error: {e}", exc_info=True)
|
||||
|
||||
if not accounts:
|
||||
raise AirflowException("Account pool is empty after filtering.")
|
||||
|
||||
logger.info(f"Final active account pool with {len(accounts)} accounts.")
|
||||
return accounts
|
||||
|
||||
# =============================================================================
|
||||
# TASK DEFINITIONS (TaskFlow API)
|
||||
# =============================================================================
|
||||
|
||||
@task
|
||||
def get_url_and_assign_account(**context):
|
||||
"""
|
||||
Gets the URL to process from the DAG run configuration and assigns an active account.
|
||||
This is the first task in the pinned-worker DAG.
|
||||
"""
|
||||
params = context['params']
|
||||
|
||||
# Update yt-dlp to latest nightly before every run
|
||||
subprocess.run(["/usr/local/bin/update-yt-dlp.sh"], check=True)
|
||||
|
||||
# The URL is passed by the dispatcher DAG.
|
||||
url_to_process = params.get('url_to_process')
|
||||
if not url_to_process:
|
||||
raise AirflowException("'url_to_process' was not found in the DAG run configuration.")
|
||||
logger.info(f"Received URL '{url_to_process}' to process.")
|
||||
|
||||
# Account assignment logic is the same as before.
|
||||
account_id = random.choice(_get_account_pool(params))
|
||||
logger.info(f"Selected account '{account_id}' for this run.")
|
||||
|
||||
return {
|
||||
'url_to_process': url_to_process,
|
||||
'account_id': account_id,
|
||||
'accounts_tried': [account_id],
|
||||
}
|
||||
|
||||
@task
|
||||
def get_token(initial_data: dict, **context):
|
||||
"""Makes a single attempt to get a token from the Thrift service."""
|
||||
ti = context['task_instance']
|
||||
params = context['params']
|
||||
|
||||
account_id = initial_data['account_id']
|
||||
url = initial_data['url_to_process']
|
||||
info_json_dir = Variable.get('DOWNLOADS_TEMP', '/opt/airflow/downloadfiles')
|
||||
|
||||
host, port, timeout = params['service_ip'], int(params['service_port']), int(params.get('timeout', DEFAULT_TIMEOUT))
|
||||
machine_id = params.get('machine_id') or socket.gethostname()
|
||||
|
||||
logger.info(f"--- Attempting to get token for URL '{url}' with account '{account_id}' ---")
|
||||
client, transport = None, None
|
||||
try:
|
||||
client, transport = _get_thrift_client(host, port, timeout)
|
||||
if not client or not TokenUpdateMode:
|
||||
raise AirflowException("Thrift client or TokenUpdateMode not available")
|
||||
|
||||
token_data = client.getOrRefreshToken(accountId=account_id, updateType=TokenUpdateMode.AUTO, url=url, clients=params.get('clients'), machineId=machine_id)
|
||||
|
||||
info_json = getattr(token_data, 'infoJson', None)
|
||||
if not (info_json and json.loads(info_json)):
|
||||
raise AirflowException("Service returned success but info.json was empty or invalid.")
|
||||
|
||||
video_id = _extract_video_id(url)
|
||||
os.makedirs(info_json_dir, exist_ok=True)
|
||||
# Use a readable timestamp for a unique filename on each attempt.
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
info_json_path = os.path.join(info_json_dir, f"info_{video_id or 'unknown'}_{account_id}_{timestamp}.json")
|
||||
with open(info_json_path, 'w', encoding='utf-8') as f:
|
||||
f.write(info_json)
|
||||
|
||||
proxy_attr = next((attr for attr in ['socks5Proxy', 'socksProxy', 'socks'] if hasattr(token_data, attr)), None)
|
||||
return {
|
||||
'info_json_path': info_json_path,
|
||||
'socks_proxy': getattr(token_data, proxy_attr) if proxy_attr else None,
|
||||
'ytdlp_command': getattr(token_data, proxy_attr) if proxy_attr else None,
|
||||
'successful_account_id': account_id,
|
||||
'original_url': url, # Include original URL for fallback
|
||||
}
|
||||
except (PBServiceException, PBUserException, TTransportException) as e:
|
||||
error_context = getattr(e, 'context', None)
|
||||
if isinstance(error_context, str):
|
||||
try: error_context = json.loads(error_context.replace("'", "\""))
|
||||
except: pass
|
||||
|
||||
error_details = {
|
||||
'error_message': getattr(e, 'message', str(e)),
|
||||
'error_code': getattr(e, 'errorCode', 'TRANSPORT_ERROR'),
|
||||
'proxy_url': error_context.get('proxy_url') if isinstance(error_context, dict) else None
|
||||
}
|
||||
logger.error(f"Thrift call failed for account '{account_id}'. Exception: {error_details['error_message']}")
|
||||
ti.xcom_push(key='error_details', value=error_details)
|
||||
|
||||
# If it's not a connection error, run diagnostic yt-dlp command
|
||||
if error_details['error_code'] not in ["SOCKS5_CONNECTION_FAILED", "SOCKET_TIMEOUT", "TRANSPORT_ERROR", "CAMOUFOX_TIMEOUT"]:
|
||||
_run_diagnostic_yt_dlp(url, error_details.get('proxy_url'), params.get('clients', 'web'))
|
||||
|
||||
raise AirflowException(f"Thrift call failed: {error_details['error_message']}")
|
||||
finally:
|
||||
if transport and transport.isOpen():
|
||||
transport.close()
|
||||
|
||||
def _run_diagnostic_yt_dlp(url, proxy, clients):
|
||||
"""Runs yt-dlp with diagnostic flags to capture failed responses."""
|
||||
logger.warning("Running diagnostic yt-dlp command to capture failed response...")
|
||||
|
||||
dump_dir = "/opt/airflow/dumps"
|
||||
os.makedirs(dump_dir, exist_ok=True)
|
||||
|
||||
video_id = _extract_video_id(url)
|
||||
dump_file = os.path.join(dump_dir, f"diagnostic_{video_id}_{int(time.time())}.dump")
|
||||
|
||||
cmd = [
|
||||
'yt-dlp',
|
||||
'--extractor-args', f'youtube:player-client={clients}',
|
||||
'--write-pages',
|
||||
'--proxy', proxy or '',
|
||||
'-FvU',
|
||||
url,
|
||||
'--write-info-json',
|
||||
'--print', 'filename',
|
||||
'--continue',
|
||||
'--no-progress',
|
||||
'--no-simulate',
|
||||
'--ignore-errors',
|
||||
'--no-playlist'
|
||||
]
|
||||
|
||||
logger.info(f"Executing diagnostic command: {' '.join(shlex.quote(arg) for arg in cmd)}")
|
||||
logger.info(f"Diagnostic dump will be saved to: {dump_file}")
|
||||
|
||||
try:
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
|
||||
logger.info(f"Diagnostic yt-dlp exit code: {result.returncode}")
|
||||
if result.stdout:
|
||||
logger.info(f"Diagnostic output:\n{result.stdout}")
|
||||
if result.stderr:
|
||||
logger.error(f"Diagnostic stderr:\n{result.stderr}")
|
||||
except subprocess.TimeoutExpired:
|
||||
logger.error("Diagnostic yt-dlp command timed out after 5 minutes")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to run diagnostic yt-dlp: {e}")
|
||||
|
||||
@task.branch
|
||||
def handle_bannable_error_branch(task_id_to_check: str, **context):
|
||||
"""Inspects a failed task and routes to retry logic if the error is bannable."""
|
||||
ti = context['task_instance']
|
||||
params = context['params']
|
||||
error_details = ti.xcom_pull(task_ids=task_id_to_check, key='error_details')
|
||||
if not error_details:
|
||||
return None # Let DAG fail for unexpected errors
|
||||
|
||||
error_code = error_details.get('error_code', '').strip()
|
||||
policy = params.get('on_bannable_failure', 'retry_with_new_account')
|
||||
|
||||
# Connection errors should be retried without banning the account.
|
||||
connection_errors = ['SOCKS5_CONNECTION_FAILED', 'SOCKET_TIMEOUT', 'TRANSPORT_ERROR', 'CAMOUFOX_TIMEOUT']
|
||||
if error_code in connection_errors:
|
||||
logger.info(f"Handling connection error '{error_code}' from '{task_id_to_check}'. Policy: '{policy}'")
|
||||
if policy == 'stop_loop':
|
||||
logger.warning(f"Connection error with 'stop_loop' policy. Failing DAG without banning.")
|
||||
return None
|
||||
else:
|
||||
logger.info("Retrying with a new account without banning.")
|
||||
return 'assign_new_account_for_retry'
|
||||
|
||||
is_bannable = error_code in ["BOT_DETECTED", "BOT_DETECTION_SIGN_IN_REQUIRED"]
|
||||
|
||||
logger.info(f"Handling failure from '{task_id_to_check}'. Error code: '{error_code}', Policy: '{policy}'")
|
||||
if is_bannable and policy in ['retry_with_new_account', 'retry_and_ban_account_only']:
|
||||
return 'ban_account_and_prepare_for_retry'
|
||||
if is_bannable and policy in ['retry_on_connection_error', 'retry_without_ban']:
|
||||
return 'assign_new_account_for_retry'
|
||||
if is_bannable: # stop_loop
|
||||
return 'ban_and_fail'
|
||||
return None # Not a bannable error, let DAG fail
|
||||
@ -1,707 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# vim:fenc=utf-8
|
||||
#
|
||||
# Copyright © 2024 rl <rl@rlmbp>
|
||||
#
|
||||
# Distributed under terms of the MIT license.
|
||||
|
||||
"""
|
||||
DAG for processing YouTube URLs sequentially from a Redis queue using YTDLP Ops Thrift service.
|
||||
"""
|
||||
|
||||
from airflow import DAG
|
||||
from airflow.exceptions import AirflowException, AirflowSkipException, AirflowFailException
|
||||
from airflow.hooks.base import BaseHook
|
||||
from airflow.models import BaseOperator, Variable
|
||||
from airflow.models.param import Param
|
||||
from airflow.operators.bash import BashOperator # Import BashOperator
|
||||
from airflow.operators.python import PythonOperator
|
||||
from airflow.operators.trigger_dagrun import TriggerDagRunOperator
|
||||
from airflow.providers.redis.hooks.redis import RedisHook
|
||||
from airflow.utils.dates import days_ago
|
||||
from airflow.utils.decorators import apply_defaults
|
||||
from datetime import datetime, timedelta
|
||||
from pangramia.yt.common.ttypes import TokenUpdateMode
|
||||
from pangramia.yt.exceptions.ttypes import PBServiceException
|
||||
from pangramia.yt.tokens_ops import YTTokenOpService
|
||||
from thrift.protocol import TBinaryProtocol
|
||||
from thrift.transport import TSocket, TTransport
|
||||
from thrift.transport.TTransport import TTransportException
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import redis # Import redis exceptions if needed
|
||||
import socket
|
||||
import time
|
||||
import traceback # For logging stack traces in failure handler
|
||||
|
||||
# Configure logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Default settings
|
||||
DEFAULT_QUEUE_NAME = 'video_queue' # Base name for queues
|
||||
DEFAULT_REDIS_CONN_ID = 'redis_default'
|
||||
DEFAULT_TIMEOUT = 30 # Default Thrift timeout in seconds
|
||||
MAX_RETRIES_REDIS_LOOKUP = 3 # Retries for fetching service details from Redis
|
||||
RETRY_DELAY_REDIS_LOOKUP = 10 # Delay (seconds) for Redis lookup retries
|
||||
|
||||
# --- Helper Functions ---
|
||||
|
||||
from utils.redis_utils import _get_redis_client
|
||||
|
||||
def _extract_video_id(url):
|
||||
"""Extracts YouTube video ID from URL."""
|
||||
if not url or not isinstance(url, str):
|
||||
logger.debug("URL is empty or not a string, cannot extract video ID.")
|
||||
return None
|
||||
try:
|
||||
video_id = None
|
||||
if 'youtube.com/watch?v=' in url:
|
||||
video_id = url.split('v=')[1].split('&')[0]
|
||||
elif 'youtu.be/' in url:
|
||||
video_id = url.split('youtu.be/')[1].split('?')[0]
|
||||
|
||||
if video_id and len(video_id) >= 11:
|
||||
video_id = video_id[:11] # Standard ID length
|
||||
logger.debug(f"Extracted video ID '{video_id}' from URL: {url}")
|
||||
return video_id
|
||||
else:
|
||||
logger.debug(f"Could not extract a standard video ID pattern from URL: {url}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to extract video ID from URL '{url}'. Error: {e}")
|
||||
return None
|
||||
|
||||
# --- Queue Management Callables ---
|
||||
|
||||
def pop_url_from_queue(**context):
|
||||
"""Pops a URL from the inbox queue and pushes to XCom."""
|
||||
params = context['params']
|
||||
queue_name = params['queue_name']
|
||||
inbox_queue = f"{queue_name}_inbox"
|
||||
redis_conn_id = params.get('redis_conn_id', DEFAULT_REDIS_CONN_ID)
|
||||
logger.info(f"Attempting to pop URL from inbox queue: {inbox_queue}")
|
||||
|
||||
try:
|
||||
client = _get_redis_client(redis_conn_id)
|
||||
# LPOP is non-blocking, returns None if empty
|
||||
url_bytes = client.lpop(inbox_queue) # Returns bytes if decode_responses=False on hook/client
|
||||
|
||||
if url_bytes:
|
||||
url = url_bytes.decode('utf-8') if isinstance(url_bytes, bytes) else url_bytes
|
||||
logger.info(f"Popped URL: {url}")
|
||||
context['task_instance'].xcom_push(key='current_url', value=url)
|
||||
return url # Return URL for logging/potential use
|
||||
else:
|
||||
logger.info(f"Inbox queue '{inbox_queue}' is empty. Skipping downstream tasks.")
|
||||
context['task_instance'].xcom_push(key='current_url', value=None)
|
||||
# Raise AirflowSkipException to signal downstream tasks to skip
|
||||
raise AirflowSkipException(f"Inbox queue '{inbox_queue}' is empty.")
|
||||
except AirflowSkipException:
|
||||
raise # Re-raise skip exception
|
||||
except Exception as e:
|
||||
logger.error(f"Error popping URL from Redis queue '{inbox_queue}': {e}", exc_info=True)
|
||||
raise AirflowException(f"Failed to pop URL from Redis: {e}")
|
||||
|
||||
|
||||
def move_url_to_progress(**context):
|
||||
"""Moves the current URL from XCom to the progress hash."""
|
||||
ti = context['task_instance']
|
||||
url = ti.xcom_pull(task_ids='pop_url_from_queue', key='current_url')
|
||||
|
||||
# This task should be skipped if pop_url_from_queue raised AirflowSkipException
|
||||
# Adding check for robustness
|
||||
if not url:
|
||||
logger.info("No URL found in XCom (or upstream skipped). Skipping move to progress.")
|
||||
raise AirflowSkipException("No URL to process.")
|
||||
|
||||
params = context['params']
|
||||
queue_name = params['queue_name']
|
||||
progress_queue = f"{queue_name}_progress"
|
||||
redis_conn_id = params.get('redis_conn_id', DEFAULT_REDIS_CONN_ID)
|
||||
logger.info(f"Moving URL '{url}' to progress hash: {progress_queue}")
|
||||
|
||||
progress_data = {
|
||||
'status': 'processing',
|
||||
'start_time': time.time(),
|
||||
'dag_run_id': context['dag_run'].run_id,
|
||||
'task_instance_key_str': context['task_instance_key_str']
|
||||
}
|
||||
|
||||
try:
|
||||
client = _get_redis_client(redis_conn_id)
|
||||
client.hset(progress_queue, url, json.dumps(progress_data))
|
||||
logger.info(f"Moved URL '{url}' to progress hash '{progress_queue}'.")
|
||||
except Exception as e:
|
||||
logger.error(f"Error moving URL to Redis progress hash '{progress_queue}': {e}", exc_info=True)
|
||||
# If this fails, the URL is popped but not tracked as processing. Fail the task.
|
||||
raise AirflowException(f"Failed to move URL to progress hash: {e}")
|
||||
|
||||
|
||||
def handle_success(**context):
|
||||
"""Moves URL from progress to result hash on success."""
|
||||
ti = context['task_instance']
|
||||
url = ti.xcom_pull(task_ids='pop_url_from_queue', key='current_url')
|
||||
if not url:
|
||||
logger.warning("handle_success called but no URL found from pop_url_from_queue XCom. This shouldn't happen on success path.")
|
||||
return # Or raise error
|
||||
|
||||
params = context['params']
|
||||
queue_name = params['queue_name']
|
||||
progress_queue = f"{queue_name}_progress"
|
||||
result_queue = f"{queue_name}_result"
|
||||
redis_conn_id = params.get('redis_conn_id', DEFAULT_REDIS_CONN_ID)
|
||||
|
||||
# Pull results from get_token task
|
||||
info_json_path = ti.xcom_pull(task_ids='get_token', key='info_json_path')
|
||||
socks_proxy = ti.xcom_pull(task_ids='get_token', key='socks_proxy')
|
||||
ytdlp_command = ti.xcom_pull(task_ids='get_token', key='ytdlp_command') # Original command
|
||||
downloaded_file_path = ti.xcom_pull(task_ids='download_video') # Pull from download_video task
|
||||
|
||||
logger.info(f"Handling success for URL: {url}")
|
||||
logger.info(f" Info JSON Path: {info_json_path}")
|
||||
logger.info(f" SOCKS Proxy: {socks_proxy}")
|
||||
logger.info(f" YTDLP Command: {ytdlp_command[:100] if ytdlp_command else 'None'}...") # Log truncated command
|
||||
logger.info(f" Downloaded File Path: {downloaded_file_path}")
|
||||
|
||||
result_data = {
|
||||
'status': 'success',
|
||||
'end_time': time.time(),
|
||||
'info_json_path': info_json_path,
|
||||
'socks_proxy': socks_proxy,
|
||||
'ytdlp_command': ytdlp_command,
|
||||
'downloaded_file_path': downloaded_file_path,
|
||||
'url': url,
|
||||
'dag_run_id': context['dag_run'].run_id,
|
||||
'task_instance_key_str': context['task_instance_key_str'] # Record which task instance succeeded
|
||||
}
|
||||
|
||||
try:
|
||||
client = _get_redis_client(redis_conn_id)
|
||||
# Remove from progress hash
|
||||
removed_count = client.hdel(progress_queue, url)
|
||||
if removed_count > 0:
|
||||
logger.info(f"Removed URL '{url}' from progress hash '{progress_queue}'.")
|
||||
else:
|
||||
logger.warning(f"URL '{url}' not found in progress hash '{progress_queue}' during success handling.")
|
||||
|
||||
# Add to result hash
|
||||
client.hset(result_queue, url, json.dumps(result_data))
|
||||
logger.info(f"Stored success result for URL '{url}' in result hash '{result_queue}'.")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error handling success in Redis for URL '{url}': {e}", exc_info=True)
|
||||
# Even if Redis fails, the task succeeded. Log error but don't fail the task.
|
||||
# Consider adding retry logic for Redis operations here or marking state differently.
|
||||
|
||||
|
||||
def handle_failure(**context):
|
||||
"""
|
||||
Handles failed processing. Depending on the `requeue_on_failure` parameter,
|
||||
it either moves the URL to the fail hash or re-queues it in the inbox.
|
||||
If `stop_on_failure` is True, this task will fail, stopping the DAG loop.
|
||||
"""
|
||||
ti = context['task_instance']
|
||||
url = ti.xcom_pull(task_ids='pop_url_from_queue', key='current_url')
|
||||
if not url:
|
||||
logger.error("handle_failure called but no URL found from pop_url_from_queue XCom.")
|
||||
return
|
||||
|
||||
params = context['params']
|
||||
queue_name = params['queue_name']
|
||||
progress_queue = f"{queue_name}_progress"
|
||||
fail_queue = f"{queue_name}_fail"
|
||||
inbox_queue = f"{queue_name}_inbox"
|
||||
redis_conn_id = params.get('redis_conn_id', DEFAULT_REDIS_CONN_ID)
|
||||
requeue_on_failure = params.get('requeue_on_failure', False)
|
||||
stop_on_failure = params.get('stop_on_failure', True) # Default to True
|
||||
|
||||
exception = context.get('exception')
|
||||
error_message = str(exception) if exception else "Unknown error"
|
||||
tb_str = traceback.format_exc() if exception else "No traceback available."
|
||||
|
||||
logger.info(f"Handling failure for URL: {url}")
|
||||
logger.error(f" Failure Reason: {error_message}")
|
||||
logger.debug(f" Traceback:\n{tb_str}")
|
||||
|
||||
try:
|
||||
client = _get_redis_client(redis_conn_id)
|
||||
# Always remove from progress hash first
|
||||
removed_count = client.hdel(progress_queue, url)
|
||||
if removed_count > 0:
|
||||
logger.info(f"Removed URL '{url}' from progress hash '{progress_queue}'.")
|
||||
else:
|
||||
logger.warning(f"URL '{url}' not found in progress hash '{progress_queue}' during failure handling.")
|
||||
|
||||
if requeue_on_failure:
|
||||
# Re-queue the URL for another attempt
|
||||
client.rpush(inbox_queue, url)
|
||||
logger.info(f"Re-queued failed URL '{url}' to inbox '{inbox_queue}' for retry.")
|
||||
else:
|
||||
# Move to the permanent fail hash
|
||||
fail_data = {
|
||||
'status': 'failed',
|
||||
'end_time': time.time(),
|
||||
'error': error_message,
|
||||
'traceback': tb_str,
|
||||
'url': url,
|
||||
'dag_run_id': context['dag_run'].run_id,
|
||||
'task_instance_key_str': context['task_instance_key_str']
|
||||
}
|
||||
client.hset(fail_queue, url, json.dumps(fail_data))
|
||||
logger.info(f"Stored failure details for URL '{url}' in fail hash '{fail_queue}'.")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during failure handling in Redis for URL '{url}': {e}", exc_info=True)
|
||||
# This is a critical error in the failure handling logic itself.
|
||||
raise AirflowException(f"Could not handle failure in Redis: {e}")
|
||||
|
||||
# After handling Redis, decide whether to fail the task to stop the loop
|
||||
if stop_on_failure:
|
||||
logger.error("stop_on_failure is True. Failing this task to stop the DAG loop.")
|
||||
# Re-raise the original exception to fail the task instance.
|
||||
# This is better than AirflowFailException because it preserves the original error.
|
||||
if exception:
|
||||
raise exception
|
||||
else:
|
||||
# If for some reason there's no exception, fail explicitly.
|
||||
raise AirflowFailException("Failing task as per stop_on_failure=True, but original exception was not found.")
|
||||
|
||||
|
||||
# --- YtdlpOpsOperator ---
|
||||
|
||||
class YtdlpOpsOperator(BaseOperator):
|
||||
"""
|
||||
Custom Airflow operator to interact with YTDLP Thrift service. Handles direct connections
|
||||
and Redis-based discovery, retrieves tokens, saves info.json, and manages errors.
|
||||
Modified to pull URL from XCom for sequential processing.
|
||||
"""
|
||||
# Removed 'url' from template_fields as it's pulled from XCom
|
||||
template_fields = ('service_ip', 'service_port', 'account_id', 'timeout', 'info_json_dir', 'redis_conn_id')
|
||||
|
||||
@apply_defaults
|
||||
def __init__(self,
|
||||
# url parameter removed - will be pulled from XCom
|
||||
redis_conn_id=DEFAULT_REDIS_CONN_ID,
|
||||
max_retries_lookup=MAX_RETRIES_REDIS_LOOKUP,
|
||||
retry_delay_lookup=RETRY_DELAY_REDIS_LOOKUP,
|
||||
service_ip=None,
|
||||
service_port=None,
|
||||
redis_enabled=False, # Default to direct connection now
|
||||
account_id=None,
|
||||
# save_info_json removed, always True
|
||||
info_json_dir=None,
|
||||
# get_socks_proxy removed, always True
|
||||
# store_socks_proxy removed, always True
|
||||
# get_socks_proxy=True, # Removed
|
||||
# store_socks_proxy=True, # Store proxy in XCom by default # Removed
|
||||
timeout=DEFAULT_TIMEOUT,
|
||||
*args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
logger.info(f"Initializing YtdlpOpsOperator (Processor Version) with parameters: "
|
||||
f"redis_conn_id={redis_conn_id}, max_retries_lookup={max_retries_lookup}, retry_delay_lookup={retry_delay_lookup}, "
|
||||
f"service_ip={service_ip}, service_port={service_port}, redis_enabled={redis_enabled}, "
|
||||
f"account_id={account_id}, info_json_dir={info_json_dir}, timeout={timeout}")
|
||||
# save_info_json, get_socks_proxy, store_socks_proxy removed from log
|
||||
|
||||
# Validate parameters based on connection mode
|
||||
if redis_enabled:
|
||||
# If using Redis, account_id is essential for lookup
|
||||
if not account_id:
|
||||
raise ValueError("account_id is required when redis_enabled=True for service lookup.")
|
||||
else:
|
||||
# If direct connection, IP and Port are essential
|
||||
if not service_ip or not service_port:
|
||||
raise ValueError("Both service_ip and service_port must be specified when redis_enabled=False.")
|
||||
# Account ID is still needed for the API call itself, but rely on DAG param or operator config
|
||||
if not account_id:
|
||||
logger.warning("No account_id provided for direct connection mode. Ensure it's set in DAG params or operator config.")
|
||||
# We won't assign 'default' here, let the value passed during instantiation be used.
|
||||
|
||||
# self.url is no longer needed here
|
||||
self.redis_conn_id = redis_conn_id
|
||||
self.max_retries_lookup = max_retries_lookup
|
||||
self.retry_delay_lookup = int(retry_delay_lookup.total_seconds() if isinstance(retry_delay_lookup, timedelta) else retry_delay_lookup)
|
||||
self.service_ip = service_ip
|
||||
self.service_port = service_port
|
||||
self.redis_enabled = redis_enabled
|
||||
self.account_id = account_id
|
||||
# self.save_info_json removed
|
||||
self.info_json_dir = info_json_dir # Still needed
|
||||
# self.get_socks_proxy removed
|
||||
# self.store_socks_proxy removed
|
||||
self.timeout = timeout
|
||||
|
||||
def execute(self, context):
|
||||
logger.info("Executing YtdlpOpsOperator (Processor Version)")
|
||||
transport = None
|
||||
ti = context['task_instance'] # Get task instance for XCom access
|
||||
|
||||
try:
|
||||
# --- Get URL from XCom ---
|
||||
url = ti.xcom_pull(task_ids='pop_url_from_queue', key='current_url')
|
||||
if not url:
|
||||
# This should ideally be caught by upstream skip, but handle defensively
|
||||
logger.info("No URL found in XCom from pop_url_from_queue. Skipping execution.")
|
||||
raise AirflowSkipException("Upstream task did not provide a URL.")
|
||||
logger.info(f"Processing URL from XCom: {url}")
|
||||
# --- End Get URL ---
|
||||
|
||||
logger.info("Getting task parameters and rendering templates")
|
||||
params = context['params'] # DAG run params
|
||||
|
||||
# Render template fields using context
|
||||
# Use render_template_as_native for better type handling if needed, else render_template
|
||||
redis_conn_id = self.render_template(self.redis_conn_id, context)
|
||||
service_ip = self.render_template(self.service_ip, context)
|
||||
service_port_rendered = self.render_template(self.service_port, context)
|
||||
account_id = self.render_template(self.account_id, context)
|
||||
timeout_rendered = self.render_template(self.timeout, context)
|
||||
info_json_dir = self.render_template(self.info_json_dir, context) # Rendered here for _save_info_json
|
||||
|
||||
# Determine effective settings (DAG params override operator defaults)
|
||||
redis_enabled = params.get('redis_enabled', self.redis_enabled)
|
||||
account_id = params.get('account_id', account_id) # Use DAG param if provided
|
||||
redis_conn_id = params.get('redis_conn_id', redis_conn_id) # Use DAG param if provided
|
||||
|
||||
logger.info(f"Effective settings: redis_enabled={redis_enabled}, account_id='{account_id}', redis_conn_id='{redis_conn_id}'")
|
||||
|
||||
host = None
|
||||
port = None
|
||||
|
||||
if redis_enabled:
|
||||
# Get Redis connection using the helper for consistency
|
||||
redis_client = _get_redis_client(redis_conn_id)
|
||||
logger.info(f"Successfully connected to Redis using connection '{redis_conn_id}' for service discovery.")
|
||||
|
||||
# Get service details from Redis with retries
|
||||
service_key = f"ytdlp:{account_id}"
|
||||
legacy_key = account_id # For backward compatibility
|
||||
|
||||
for attempt in range(self.max_retries_lookup):
|
||||
try:
|
||||
logger.info(f"Attempt {attempt + 1}/{self.max_retries_lookup}: Fetching service details from Redis for keys: '{service_key}', '{legacy_key}'")
|
||||
service_details = redis_client.hgetall(service_key)
|
||||
if not service_details:
|
||||
logger.warning(f"Key '{service_key}' not found, trying legacy key '{legacy_key}'")
|
||||
service_details = redis_client.hgetall(legacy_key)
|
||||
|
||||
if not service_details:
|
||||
raise ValueError(f"No service details found in Redis for keys: {service_key} or {legacy_key}")
|
||||
|
||||
# Find IP and port (case-insensitive keys)
|
||||
ip_key = next((k for k in service_details if k.lower() == 'ip'), None)
|
||||
port_key = next((k for k in service_details if k.lower() == 'port'), None)
|
||||
|
||||
if not ip_key: raise ValueError(f"'ip' key not found in Redis hash for {service_key}/{legacy_key}")
|
||||
if not port_key: raise ValueError(f"'port' key not found in Redis hash for {service_key}/{legacy_key}")
|
||||
|
||||
host = service_details[ip_key] # Assumes decode_responses=True in hook
|
||||
port_str = service_details[port_key]
|
||||
|
||||
try:
|
||||
port = int(port_str)
|
||||
except (ValueError, TypeError):
|
||||
raise ValueError(f"Invalid port value '{port_str}' found in Redis for {service_key}/{legacy_key}")
|
||||
|
||||
logger.info(f"Extracted from Redis - Service IP: {host}, Service Port: {port}")
|
||||
break # Success
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Attempt {attempt + 1} failed to get Redis details: {str(e)}")
|
||||
if attempt == self.max_retries_lookup - 1:
|
||||
logger.error("Max retries reached for fetching Redis details.")
|
||||
raise AirflowException(f"Failed to get service details from Redis after {self.max_retries_lookup} attempts: {e}")
|
||||
logger.info(f"Retrying in {self.retry_delay_lookup} seconds...")
|
||||
time.sleep(self.retry_delay_lookup)
|
||||
else:
|
||||
# Direct connection: Use rendered/param values
|
||||
host = params.get('service_ip', service_ip) # Use DAG param if provided
|
||||
port_str = params.get('service_port', service_port_rendered) # Use DAG param if provided
|
||||
|
||||
logger.info(f"Using direct connection settings: service_ip={host}, service_port={port_str}")
|
||||
|
||||
if not host or not port_str:
|
||||
raise ValueError("Direct connection requires service_ip and service_port (check Operator config and DAG params)")
|
||||
try:
|
||||
port = int(port_str)
|
||||
except (ValueError, TypeError):
|
||||
raise ValueError(f"Invalid service_port value: {port_str}")
|
||||
|
||||
logger.info(f"Connecting directly to Thrift service at {host}:{port} (Redis bypassed)")
|
||||
|
||||
# Validate and use timeout
|
||||
try:
|
||||
timeout = int(timeout_rendered)
|
||||
if timeout <= 0: raise ValueError("Timeout must be positive")
|
||||
logger.info(f"Using timeout: {timeout} seconds")
|
||||
except (ValueError, TypeError):
|
||||
logger.warning(f"Invalid timeout value: '{timeout_rendered}'. Using default: {DEFAULT_TIMEOUT}")
|
||||
timeout = DEFAULT_TIMEOUT
|
||||
|
||||
# Create Thrift connection objects
|
||||
# socket_conn = TSocket.TSocket(host, port) # Original
|
||||
socket_conn = TSocket.TSocket(host, port, socket_family=socket.AF_INET) # Explicitly use AF_INET (IPv4)
|
||||
socket_conn.setTimeout(timeout * 1000) # Thrift timeout is in milliseconds
|
||||
transport = TTransport.TFramedTransport(socket_conn) # Use TFramedTransport if server expects it
|
||||
# transport = TTransport.TBufferedTransport(socket_conn) # Use TBufferedTransport if server expects it
|
||||
protocol = TBinaryProtocol.TBinaryProtocol(transport)
|
||||
client = YTTokenOpService.Client(protocol)
|
||||
|
||||
logger.info(f"Attempting to connect to Thrift server at {host}:{port}...")
|
||||
try:
|
||||
transport.open()
|
||||
logger.info("Successfully connected to Thrift server.")
|
||||
|
||||
# Test connection with ping
|
||||
try:
|
||||
client.ping()
|
||||
logger.info("Server ping successful.")
|
||||
except Exception as e:
|
||||
logger.error(f"Server ping failed: {e}")
|
||||
raise AirflowException(f"Server connection test (ping) failed: {e}")
|
||||
|
||||
# Get token from service using the URL from XCom
|
||||
try:
|
||||
logger.info(f"Requesting token for accountId='{account_id}', url='{url}'")
|
||||
token_data = client.getOrRefreshToken(
|
||||
accountId=account_id,
|
||||
updateType=TokenUpdateMode.AUTO,
|
||||
url=url # Use the url variable from XCom
|
||||
)
|
||||
logger.info("Successfully retrieved token data from service.")
|
||||
except PBServiceException as e:
|
||||
# Handle specific service exceptions
|
||||
error_code = getattr(e, 'errorCode', 'N/A')
|
||||
error_message = getattr(e, 'message', 'N/A')
|
||||
error_context = getattr(e, 'context', {})
|
||||
logger.error(f"PBServiceException occurred: Code={error_code}, Message={error_message}")
|
||||
if error_context:
|
||||
logger.error(f" Context: {error_context}") # Log context separately
|
||||
# Construct a concise error message for AirflowException
|
||||
error_msg = f"YTDLP service error (Code: {error_code}): {error_message}"
|
||||
# Add specific error code handling if needed...
|
||||
logger.error(f"Failing task instance due to PBServiceException: {error_msg}") # Add explicit log before raising
|
||||
raise AirflowException(error_msg) # Fail task on service error
|
||||
except TTransportException as e:
|
||||
logger.error(f"Thrift transport error during getOrRefreshToken: {e}")
|
||||
logger.error(f"Failing task instance due to TTransportException: {e}") # Add explicit log before raising
|
||||
raise AirflowException(f"Transport error during API call: {e}")
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error during getOrRefreshToken: {e}")
|
||||
logger.error(f"Failing task instance due to unexpected error during API call: {e}") # Add explicit log before raising
|
||||
raise AirflowException(f"Unexpected error during API call: {e}")
|
||||
|
||||
except TTransportException as e:
|
||||
# Handle connection errors
|
||||
logger.error(f"Thrift transport error during connection: {str(e)}")
|
||||
logger.error(f"Failing task instance due to TTransportException during connection: {e}") # Add explicit log before raising
|
||||
raise AirflowException(f"Transport error connecting to YTDLP service: {str(e)}")
|
||||
# Removed the overly broad except Exception block here, as inner blocks raise AirflowException
|
||||
|
||||
# --- Process Token Data ---
|
||||
logger.debug(f"Token data received. Attributes: {dir(token_data)}")
|
||||
|
||||
info_json_path = None # Initialize
|
||||
|
||||
# save_info_json is now always True
|
||||
logger.info("Proceeding to save info.json (save_info_json=True).")
|
||||
info_json = self._get_info_json(token_data)
|
||||
if info_json and self._is_valid_json(info_json):
|
||||
try:
|
||||
# Pass rendered info_json_dir to helper
|
||||
info_json_path = self._save_info_json(context, info_json, url, account_id, info_json_dir)
|
||||
if info_json_path:
|
||||
ti.xcom_push(key='info_json_path', value=info_json_path)
|
||||
logger.info(f"Successfully saved info.json and pushed path to XCom: {info_json_path}")
|
||||
else:
|
||||
ti.xcom_push(key='info_json_path', value=None)
|
||||
logger.warning("info.json saving failed (check logs from _save_info_json).")
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error during info.json saving process: {e}", exc_info=True)
|
||||
ti.xcom_push(key='info_json_path', value=None)
|
||||
elif info_json:
|
||||
logger.warning("Retrieved infoJson is not valid JSON. Skipping save.")
|
||||
ti.xcom_push(key='info_json_path', value=None)
|
||||
else:
|
||||
logger.info("No infoJson found in token data. Skipping save.")
|
||||
ti.xcom_push(key='info_json_path', value=None)
|
||||
|
||||
|
||||
# Extract and potentially store SOCKS proxy
|
||||
# get_socks_proxy and store_socks_proxy are now always True
|
||||
socks_proxy = None
|
||||
logger.info("Attempting to extract SOCKS proxy (get_socks_proxy=True).")
|
||||
proxy_attr = next((attr for attr in ['socks5Proxy', 'socksProxy', 'socks'] if hasattr(token_data, attr)), None)
|
||||
if proxy_attr:
|
||||
socks_proxy = getattr(token_data, proxy_attr)
|
||||
if socks_proxy:
|
||||
logger.info(f"Extracted SOCKS proxy ({proxy_attr}): {socks_proxy}")
|
||||
# Always store if found (store_socks_proxy=True)
|
||||
ti.xcom_push(key='socks_proxy', value=socks_proxy)
|
||||
logger.info("Pushed 'socks_proxy' to XCom.")
|
||||
else:
|
||||
logger.info(f"Found proxy attribute '{proxy_attr}' but value is empty.")
|
||||
# Store None if attribute found but empty
|
||||
ti.xcom_push(key='socks_proxy', value=None)
|
||||
logger.info("Pushed None to XCom for 'socks_proxy' as extracted value was empty.")
|
||||
else:
|
||||
logger.info("No SOCKS proxy attribute found in token data.")
|
||||
# Store None if attribute not found
|
||||
ti.xcom_push(key='socks_proxy', value=None)
|
||||
logger.info("Pushed None to XCom for 'socks_proxy' as attribute was not found.")
|
||||
|
||||
|
||||
# --- Removed old logic block ---
|
||||
# # Extract and potentially store SOCKS proxy
|
||||
# socks_proxy = None
|
||||
# get_socks_proxy = params.get('get_socks_proxy', self.get_socks_proxy)
|
||||
# store_socks_proxy = params.get('store_socks_proxy', self.store_socks_proxy)
|
||||
#
|
||||
# if get_socks_proxy:
|
||||
# proxy_attr = next((attr for attr in ['socks5Proxy', 'socksProxy', 'socks'] if hasattr(token_data, attr)), None)
|
||||
# if proxy_attr:
|
||||
# socks_proxy = getattr(token_data, proxy_attr)
|
||||
# if socks_proxy:
|
||||
# logger.info(f"Extracted SOCKS proxy ({proxy_attr}): {socks_proxy}")
|
||||
# if store_socks_proxy:
|
||||
# ti.xcom_push(key='socks_proxy', value=socks_proxy)
|
||||
# logger.info("Pushed 'socks_proxy' to XCom.")
|
||||
# else:
|
||||
# logger.info(f"Found proxy attribute '{proxy_attr}' but value is empty.")
|
||||
# if store_socks_proxy: ti.xcom_push(key='socks_proxy', value=None)
|
||||
# else:
|
||||
# logger.info("get_socks_proxy is True, but no SOCKS proxy attribute found.")
|
||||
# if store_socks_proxy: ti.xcom_push(key='socks_proxy', value=None)
|
||||
# else:
|
||||
# logger.info("get_socks_proxy is False. Skipping proxy extraction.")
|
||||
# if store_socks_proxy: ti.xcom_push(key='socks_proxy', value=None)
|
||||
# --- End Removed old logic block ---
|
||||
|
||||
|
||||
# Get the original command from the server, or construct a fallback
|
||||
ytdlp_cmd = getattr(token_data, 'ytdlpCommand', None)
|
||||
if ytdlp_cmd:
|
||||
logger.info(f"Original command received from server: {ytdlp_cmd[:100]}...") # Log truncated
|
||||
else:
|
||||
logger.warning("No 'ytdlpCommand' attribute found in token data. Constructing a fallback for logging.")
|
||||
# Construct a representative command for logging purposes
|
||||
if socks_proxy:
|
||||
ytdlp_cmd = f"yt-dlp --dump-json --proxy \"{socks_proxy}\" \"{url}\""
|
||||
else:
|
||||
ytdlp_cmd = f"yt-dlp --dump-json \"{url}\""
|
||||
logger.info(f"Constructed fallback command: {ytdlp_cmd}")
|
||||
|
||||
# Push the command to XCom
|
||||
ti.xcom_push(key='ytdlp_command', value=ytdlp_cmd)
|
||||
logger.info("Pushed command to XCom key 'ytdlp_command'.")
|
||||
|
||||
# No explicit return needed, success is implicit if no exception raised
|
||||
|
||||
except (AirflowSkipException, AirflowFailException) as e:
|
||||
logger.info(f"Task skipped or failed explicitly: {e}")
|
||||
raise # Re-raise to let Airflow handle state
|
||||
except AirflowException as e: # Catch AirflowExceptions raised explicitly
|
||||
logger.error(f"Operation failed due to AirflowException: {e}", exc_info=True)
|
||||
raise # Re-raise AirflowExceptions to ensure task failure
|
||||
except (TTransportException, PBServiceException) as e: # Catch specific Thrift/Service errors not already handled inside inner try
|
||||
logger.error(f"Unhandled YTDLP Service/Transport error in outer block: {e}", exc_info=True)
|
||||
logger.error(f"Failing task instance due to unhandled outer Service/Transport error: {e}") # Add explicit log before raising
|
||||
raise AirflowException(f"Unhandled YTDLP service error: {e}") # Wrap in AirflowException to fail task
|
||||
except Exception as e: # General catch-all for truly unexpected errors
|
||||
logger.error(f"Caught unexpected error in YtdlpOpsOperator outer block: {e}", exc_info=True)
|
||||
logger.error(f"Failing task instance due to unexpected outer error: {e}") # Add explicit log before raising
|
||||
raise AirflowException(f"Unexpected error caused task failure: {e}") # Wrap to fail task
|
||||
finally:
|
||||
if transport and transport.isOpen():
|
||||
logger.info("Closing Thrift transport.")
|
||||
transport.close()
|
||||
|
||||
# --- Helper Methods ---
|
||||
|
||||
def _get_info_json(self, token_data):
|
||||
"""Safely extracts infoJson from token data."""
|
||||
return getattr(token_data, 'infoJson', None)
|
||||
|
||||
def _is_valid_json(self, json_str):
|
||||
"""Checks if a string is valid JSON."""
|
||||
if not json_str or not isinstance(json_str, str): return False
|
||||
try:
|
||||
json.loads(json_str)
|
||||
return True
|
||||
except json.JSONDecodeError:
|
||||
return False
|
||||
|
||||
def _save_info_json(self, context, info_json, url, account_id, rendered_info_json_dir):
|
||||
"""Saves info_json to a file. Uses pre-rendered directory path."""
|
||||
try:
|
||||
video_id = _extract_video_id(url) # Use standalone helper
|
||||
|
||||
save_dir = rendered_info_json_dir or "." # Use rendered path
|
||||
logger.info(f"Target directory for info.json: {save_dir}")
|
||||
|
||||
# Ensure directory exists
|
||||
try:
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
logger.info(f"Ensured directory exists: {save_dir}")
|
||||
except OSError as e:
|
||||
logger.error(f"Could not create directory {save_dir}: {e}. Cannot save info.json.")
|
||||
return None
|
||||
|
||||
# Construct filename
|
||||
timestamp = int(time.time())
|
||||
base_filename = f"info_{video_id or 'unknown'}_{account_id}_{timestamp}.json"
|
||||
info_json_path = os.path.join(save_dir, base_filename)
|
||||
latest_json_path = os.path.join(save_dir, "latest.json") # Path for the latest symlink/copy
|
||||
|
||||
# Write to timestamped file
|
||||
try:
|
||||
logger.info(f"Writing info.json content (received from service) to {info_json_path}...")
|
||||
with open(info_json_path, 'w', encoding='utf-8') as f:
|
||||
f.write(info_json)
|
||||
logger.info(f"Successfully saved info.json to timestamped file: {info_json_path}")
|
||||
except IOError as e:
|
||||
logger.error(f"Failed to write info.json to {info_json_path}: {e}")
|
||||
return None
|
||||
|
||||
# Write to latest.json (overwrite) - best effort
|
||||
try:
|
||||
with open(latest_json_path, 'w', encoding='utf-8') as f:
|
||||
f.write(info_json)
|
||||
logger.info(f"Updated latest.json file: {latest_json_path}")
|
||||
except IOError as e:
|
||||
logger.warning(f"Failed to update latest.json at {latest_json_path}: {e}")
|
||||
|
||||
return info_json_path
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error in _save_info_json: {e}", exc_info=True)
|
||||
return None
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# DAG Definition
|
||||
# =============================================================================
|
||||
|
||||
default_args = {
|
||||
'owner': 'airflow',
|
||||
'depends_on_past': False,
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 1, # Default retries for tasks like queue management
|
||||
'retry_delay': timedelta(minutes=1),
|
||||
'start_date': days_ago(1),
|
||||
# Add concurrency control if needed for sequential processing
|
||||
# 'concurrency': 1, # Ensure only one task instance runs at a time per DAG run
|
||||
# 'max_active_runs': 1, # Ensure only one DAG run is active
|
||||
}
|
||||
|
||||
# Define DAG
|
||||
#
|
||||
# --- DAG Block Deactivated on 2025-07-16 ---
|
||||
# This DAG has been replaced by the Sensor/Worker pattern implemented in:
|
||||
# - ytdlp_sensor_redis_queue.py (polls the queue)
|
||||
# - ytdlp_worker_per_url.py (processes a single URL)
|
||||
# This code is kept for reference but is not active.
|
||||
#
|
||||
@ -1,974 +0,0 @@
|
||||
"""
|
||||
DAG to deploy and manage YTDLP token service.
|
||||
|
||||
This DAG handles the deployment, monitoring, and cleanup of a YTDLP token service
|
||||
for a given account. It supports both Redis-based service discovery and direct
|
||||
connection via manually specified host and port.
|
||||
|
||||
Configuration Options:
|
||||
- account_id: (Required) The account ID for which the service is being deployed.
|
||||
- proxy: (Optional) The proxy to use for the service.
|
||||
- redis_enabled: (Optional, default=True) Whether to use Redis for service discovery.
|
||||
If False, you must provide `host` and `port` manually.
|
||||
- host: (Optional) The host IP of the service. Required if `redis_enabled=False`.
|
||||
- port: (Optional) The port of the service. Required if `redis_enabled=False`.
|
||||
|
||||
Usage:
|
||||
1. Redis-based service discovery:
|
||||
- Set `redis_enabled=True` (default).
|
||||
- Ensure Redis is configured in Airflow connections.
|
||||
- The DAG will automatically discover the service IP and port from Redis.
|
||||
|
||||
2. Manual host and port:
|
||||
- Set `redis_enabled=False`.
|
||||
- Provide `host` and `port` manually in the DAG configuration.
|
||||
- Example: {"host": "192.168.1.100", "port": 9090}.
|
||||
|
||||
Example Trigger Configuration:
|
||||
{
|
||||
"account_id": "test_account",
|
||||
"proxy": "socks5://proxy.example.com:1080",
|
||||
"redis_enabled": False,
|
||||
"host": "192.168.1.100",
|
||||
"port": 9090
|
||||
}
|
||||
"""
|
||||
|
||||
from airflow import DAG
|
||||
from airflow.models.param import Param
|
||||
from airflow.operators.empty import EmptyOperator
|
||||
from airflow.operators.python import PythonOperator
|
||||
# HttpSensor is no longer used
|
||||
# from airflow.providers.http.sensors.http import HttpSensor
|
||||
from airflow.utils.trigger_rule import TriggerRule
|
||||
from airflow.hooks.base import BaseHook
|
||||
from airflow.exceptions import AirflowException
|
||||
from typing import Sequence # Add Sequence for type hinting
|
||||
from datetime import datetime, timedelta
|
||||
from airflow.utils.dates import days_ago # Add this import
|
||||
import uuid
|
||||
import os
|
||||
import logging
|
||||
import shutil
|
||||
import docker
|
||||
import uuid
|
||||
import redis
|
||||
import requests
|
||||
import socket
|
||||
import time
|
||||
import sys # Import sys for maxsize
|
||||
from airflow.configuration import conf # Import conf
|
||||
|
||||
# Import and apply Thrift exceptions patch
|
||||
try:
|
||||
# Always apply the patch, regardless of environment
|
||||
from thrift_exceptions_patch import patch_thrift_exceptions
|
||||
patch_thrift_exceptions()
|
||||
logging.info("Applied Thrift exceptions patch for Airflow compatibility")
|
||||
|
||||
# Verify the patch was applied correctly
|
||||
try:
|
||||
from pangramia.yt.exceptions.ttypes import PBServiceException
|
||||
test_exception = PBServiceException(message="Test")
|
||||
# Try to modify attributes to verify patch works
|
||||
test_exception.args = ("Test",)
|
||||
test_exception.message = "Modified test"
|
||||
logging.info("Verified Thrift exception patch is working correctly")
|
||||
except Exception as verify_error:
|
||||
logging.error(f"Thrift exception patch verification failed: {verify_error}")
|
||||
logging.error("This may cause 'immutable instance' errors during error handling")
|
||||
except ImportError as e:
|
||||
logging.warning(f"Could not import thrift_exceptions_patch: {e}")
|
||||
logging.warning("Airflow compatibility will be affected - expect 'immutable instance' errors")
|
||||
except Exception as e:
|
||||
logging.error(f"Error applying Thrift exceptions patch: {e}")
|
||||
|
||||
# Default arguments for the DAG
|
||||
default_args = {
|
||||
'owner': 'airflow',
|
||||
'depends_on_past': False,
|
||||
'email_on_failure': False,
|
||||
'email_on_retry': False,
|
||||
'retries': 0, # Disable retries for all tasks in this DAG
|
||||
'retry_delay': timedelta(minutes=5),
|
||||
# Removed 'queue': 'auth_queue' to use the default queue
|
||||
# Optional: Further filter workers by tags if using CeleryExecutor
|
||||
'executor_config': {"CeleryExecutor": {"tags": ["auth_node"]}},
|
||||
}
|
||||
|
||||
def get_redis_connection(redis_host=None, redis_port=None):
|
||||
"""Get a Redis connection using Airflow's Redis connection or manually specified host/port."""
|
||||
if redis_host and redis_port:
|
||||
# Use manually specified host and port
|
||||
return redis.Redis(
|
||||
host=redis_host,
|
||||
port=redis_port,
|
||||
db=0,
|
||||
decode_responses=True
|
||||
)
|
||||
else:
|
||||
# Use Airflow's Redis connection
|
||||
redis_conn = BaseHook.get_connection("redis_default")
|
||||
# Use the password from the connection if available, otherwise use 'airflow' as default
|
||||
password = redis_conn.password or 'airflow'
|
||||
return redis.Redis(
|
||||
host=redis_conn.host, # 'redis' (service name in docker-compose)
|
||||
port=redis_conn.port, # 6379
|
||||
password=password,
|
||||
db=0,
|
||||
decode_responses=True
|
||||
)
|
||||
|
||||
def get_free_port():
|
||||
"""Find and return a free port."""
|
||||
import socket
|
||||
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
||||
s.bind(('0.0.0.0', 0))
|
||||
return s.getsockname()[1]
|
||||
|
||||
def is_port_free(p):
|
||||
"""Check if a port is free to use."""
|
||||
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
||||
try:
|
||||
s.bind(('0.0.0.0', p))
|
||||
return True
|
||||
except OSError:
|
||||
return False
|
||||
|
||||
def store_account_metadata(account_id, ip, port, proxy=None, health_port=None, container_id=None):
|
||||
"""Store account metadata in Redis."""
|
||||
redis_client = get_redis_connection()
|
||||
try:
|
||||
# Verify Redis connection
|
||||
if not redis_client.ping():
|
||||
raise ConnectionError("Failed to connect to Redis")
|
||||
|
||||
# Store main account metadata
|
||||
mapping = {
|
||||
"ip": ip,
|
||||
"port": str(port),
|
||||
"status": "running",
|
||||
"start_time": str(time.time())
|
||||
}
|
||||
if proxy:
|
||||
mapping["proxy"] = proxy
|
||||
if health_port:
|
||||
mapping["health_port"] = str(health_port)
|
||||
if container_id:
|
||||
mapping["container_id"] = container_id
|
||||
|
||||
# Use pipeline for atomic operations
|
||||
with redis_client.pipeline() as pipe:
|
||||
# Store main metadata
|
||||
pipe.hset(f"ytdlp:{account_id}", mapping=mapping)
|
||||
# Set expiration (1 week)
|
||||
pipe.expire(f"ytdlp:{account_id}", 604800)
|
||||
# Add to account list
|
||||
pipe.sadd("ytdlp:accounts", account_id)
|
||||
# Execute all commands
|
||||
results = pipe.execute()
|
||||
|
||||
# Verify all commands succeeded
|
||||
if not all(results):
|
||||
raise RuntimeError(f"Failed to store metadata for {account_id}. Pipeline results: {results}")
|
||||
|
||||
# Verify the data was actually stored
|
||||
stored_data = redis_client.hgetall(f"ytdlp:{account_id}")
|
||||
if not stored_data:
|
||||
raise RuntimeError(f"Failed to verify stored data for {account_id}")
|
||||
|
||||
logging.info(f"Successfully stored account metadata for {account_id} in Redis: {stored_data}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to store account metadata for {account_id}: {e}", exc_info=True)
|
||||
# Attempt cleanup if storage failed
|
||||
try:
|
||||
redis_client = get_redis_connection() # Ensure client is available
|
||||
redis_client.delete(f"ytdlp:{account_id}")
|
||||
redis_client.srem("ytdlp:accounts", account_id)
|
||||
except Exception as cleanup_error:
|
||||
logging.error(f"Failed to cleanup failed storage for {account_id}: {cleanup_error}")
|
||||
raise
|
||||
|
||||
# Removed get_account_metadata function as the service now handles Redis registration checks.
|
||||
|
||||
def prepare_and_deploy_service(**context):
|
||||
"""Prepare deployment and deploy the Docker service."""
|
||||
# Retrieve account_id, proxy, clients, and other parameters from DAG run configuration (conf)
|
||||
# Set default values for account_id, proxy, and redis_enabled
|
||||
account_id = context['dag_run'].conf.get('account_id') or context['params'].get('account_id', 'account_fr_2025-04-03T1220_anonomyous_2ssdfsf2342afga09')
|
||||
proxy = context['dag_run'].conf.get('proxy') or context['params'].get('proxy', 'socks5://sslocal-rust-1084:1084')
|
||||
clients = context['dag_run'].conf.get('clients') or context['params'].get('clients', 'ios,android,mweb')
|
||||
redis_enabled = context['dag_run'].conf.get('redis_enabled', False) # Default to False
|
||||
host_param = context['dag_run'].conf.get('host') # Host parameter from config
|
||||
port_param = context['dag_run'].conf.get('port') # Port parameter from config
|
||||
docker_network = context['dag_run'].conf.get('docker_network') or context['params'].get('docker_network', 'airflow_prod_proxynet')
|
||||
host_external_ip_env = os.getenv('HOST_EXTERNAL_IP') # Explicit external IP from environment
|
||||
|
||||
if not account_id:
|
||||
raise ValueError("Account ID is missing.")
|
||||
|
||||
# --- Port Determination ---
|
||||
# Assign a free port if not provided, or validate the provided one
|
||||
if not port_param:
|
||||
port = get_free_port()
|
||||
if not is_port_free(port):
|
||||
raise ValueError(f"Assigned port {port} is already in use")
|
||||
logging.info(f"No port provided, assigned free port: {port}")
|
||||
else:
|
||||
port = int(port_param)
|
||||
if not is_port_free(port):
|
||||
raise ValueError(f"Provided port {port} is already in use")
|
||||
logging.info(f"Using provided port: {port}")
|
||||
|
||||
# Determine health port
|
||||
health_port = port + 1
|
||||
if not is_port_free(health_port):
|
||||
raise ValueError(f"Health port {health_port} (derived from port {port}) is already in use")
|
||||
logging.info(f"Using health port: {health_port}")
|
||||
|
||||
|
||||
# --- Host Determination ---
|
||||
# host_for_registration: IP/Host for client discovery (Redis/Logs)
|
||||
# host_for_sensor: Hostname/IP for Airflow HttpSensor health check
|
||||
|
||||
host_for_registration = host_param # Start with the parameter value
|
||||
|
||||
if redis_enabled:
|
||||
# If Redis is enabled, registration host should ideally be externally reachable
|
||||
if not host_for_registration:
|
||||
host_for_registration = host_external_ip_env # Use external IP from env var if available
|
||||
if not host_for_registration:
|
||||
# If no env var, try fetching external IP using requests
|
||||
try:
|
||||
logging.info("HOST_EXTERNAL_IP not set. Attempting to fetch external IP from api.ipify.org...")
|
||||
response = requests.get('https://api.ipify.org', timeout=10) # 10 second timeout
|
||||
response.raise_for_status() # Raise exception for bad status codes
|
||||
host_for_registration = response.text.strip()
|
||||
if not host_for_registration: # Check if response was empty
|
||||
raise ValueError("Received empty response from api.ipify.org")
|
||||
logging.info(f"Successfully fetched external IP: {host_for_registration}")
|
||||
except requests.exceptions.RequestException as e:
|
||||
logging.warning(f"Failed to fetch external IP: {e}. Falling back to Docker bridge IP.")
|
||||
# Fallback to default Docker bridge IP if fetching fails
|
||||
host_for_registration = "172.17.0.1"
|
||||
logging.warning(f"Defaulting registration host to Docker bridge IP: {host_for_registration}. Ensure clients can reach this IP.")
|
||||
except Exception as e:
|
||||
logging.error(f"Unexpected error fetching external IP: {e}. Falling back to Docker bridge IP.")
|
||||
host_for_registration = "172.17.0.1"
|
||||
logging.warning(f"Defaulting registration host to Docker bridge IP: {host_for_registration}. Ensure clients can reach this IP.")
|
||||
else:
|
||||
logging.info(f"Redis enabled. Using HOST_EXTERNAL_IP environment variable for registration: {host_for_registration}")
|
||||
else:
|
||||
logging.info(f"Redis enabled. Using provided host parameter for registration: {host_for_registration}")
|
||||
else: # Redis disabled
|
||||
# If Redis is disabled, registration host defaults to 0.0.0.0 if not provided
|
||||
if not host_for_registration:
|
||||
host_for_registration = "0.0.0.0"
|
||||
logging.warning(f"Redis disabled and no host param provided. Defaulting registration host to {host_for_registration}.")
|
||||
else:
|
||||
logging.info(f"Redis disabled. Using provided host parameter for registration: {host_for_registration}")
|
||||
|
||||
# host_for_sensor determination will happen *after* container creation, using container name.
|
||||
|
||||
logging.info(f"Preparing deployment for account {account_id}. Registration Host: {host_for_registration}, Port: {port}, Health Port: {health_port}")
|
||||
|
||||
# Generate unique work ID and context directory
|
||||
work_id = str(uuid.uuid4())
|
||||
context['task_instance'].xcom_push(key='work_id', value=work_id)
|
||||
|
||||
context_dir = os.path.join(os.getenv('AIRFLOW_HOME', '/tmp'), 'service-data', work_id, 'context-data')
|
||||
os.makedirs(context_dir, exist_ok=True, mode=0o777)
|
||||
os.chmod(context_dir, 0o777)
|
||||
|
||||
# Push context directory and account details to XCom
|
||||
context['task_instance'].xcom_push(key='context_dir', value=context_dir)
|
||||
context['task_instance'].xcom_push(key='account_id', value=account_id)
|
||||
|
||||
# Deploy the Docker service
|
||||
# The 'host_for_registration' variable here represents the externally accessible IP for registration/XCom.
|
||||
# The service inside the container will listen on 0.0.0.0.
|
||||
logging.info(f"Deploying service for account {account_id}. Registration Host: {host_for_registration}, Port: {port}")
|
||||
|
||||
# Get Redis connection details ONLY if redis_enabled (for the container to register itself)
|
||||
redis_host_for_container = ''
|
||||
redis_port_for_container = ''
|
||||
redis_password_for_container = ''
|
||||
if redis_enabled:
|
||||
try:
|
||||
# Get connection details to pass to the container environment
|
||||
redis_conn_details = get_redis_connection().connection_pool.connection_kwargs
|
||||
redis_host_for_container = os.getenv('REDIS_HOST', redis_conn_details.get('host', 'redis'))
|
||||
redis_port_for_container = str(os.getenv('REDIS_PORT', redis_conn_details.get('port', 6379)))
|
||||
redis_password_for_container = os.getenv('REDIS_PASSWORD', redis_conn_details.get('password', ''))
|
||||
logging.info(f"Redis enabled. Passing REDIS_HOST={redis_host_for_container}, REDIS_PORT={redis_port_for_container} to container.")
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to get Redis connection details for container environment: {e}")
|
||||
logging.warning("Proceeding without Redis details in container environment due to error.")
|
||||
# Depending on container requirements, you might want to raise an error here instead
|
||||
else:
|
||||
logging.info("Redis disabled. Not passing REDIS_HOST/REDIS_PORT to container environment.")
|
||||
|
||||
|
||||
# Get Docker connection details from Airflow
|
||||
try:
|
||||
secrets_backend = conf.get('secrets', 'backend', fallback='None')
|
||||
logging.info(f"Attempting to get 'docker_hub' connection. Configured secrets backend: {secrets_backend}")
|
||||
docker_conn = BaseHook.get_connection("docker_hub")
|
||||
docker_username = docker_conn.login
|
||||
docker_password = docker_conn.password
|
||||
logging.info("Successfully retrieved 'docker_hub' connection.")
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to retrieve 'docker_hub' connection: {e}")
|
||||
# Log details about potential secrets backend issues
|
||||
secrets_backend_kwargs = conf.get('secrets', 'backend_kwargs', fallback='{}')
|
||||
logging.error(f"Secrets backend details: backend={secrets_backend}, kwargs={secrets_backend_kwargs}")
|
||||
# Re-raise the exception to fail the task
|
||||
raise
|
||||
|
||||
try:
|
||||
# Initialize Docker client to connect to docker-socket-proxy
|
||||
client = docker.DockerClient(base_url='tcp://docker-socket-proxy:2375')
|
||||
|
||||
# Authenticate with Docker Hub
|
||||
client.login(
|
||||
username=docker_username,
|
||||
password=docker_password,
|
||||
registry=docker_conn.host # Typically "https://index.docker.io/v1/"
|
||||
)
|
||||
|
||||
# Generate a unique container name
|
||||
container_name = f"ytdlp_service_{account_id}_{uuid.uuid4().hex[:8]}"
|
||||
|
||||
# Pull the Docker image (if not already present)
|
||||
client.images.pull('pangramia/ytdlp-ops-server:latest')
|
||||
|
||||
# Use the configured network name (from params or default)
|
||||
network_name = docker_network # Use the retrieved parameter
|
||||
logging.info(f"Attempting to run container on network: {network_name}")
|
||||
|
||||
# Determine if --probe flag should be added based on DAG param
|
||||
exit_on_proxy_fail = context['dag_run'].conf.get('exit_on_proxy_fail', True) # Default to True if not set
|
||||
command_args = [
|
||||
'--script-dir', '/app/scripts',
|
||||
'--context-dir', '/app/context-data', # Use the bind mount target inside container
|
||||
'--port', str(port),
|
||||
'--health-port', str(health_port),
|
||||
'--clients', clients,
|
||||
'--timeout', '120',
|
||||
'--proxy', proxy if proxy else '',
|
||||
'--server-identity', account_id, # Use account_id as server identity
|
||||
]
|
||||
if redis_enabled:
|
||||
command_args.extend(['--redis-host', redis_host_for_container])
|
||||
command_args.extend(['--redis-port', redis_port_for_container])
|
||||
|
||||
if exit_on_proxy_fail:
|
||||
command_args.append('--probe')
|
||||
logging.info("Adding --probe flag to container command as exit_on_proxy_fail=True")
|
||||
else:
|
||||
logging.info("Not adding --probe flag to container command as exit_on_proxy_fail=False")
|
||||
|
||||
# Run the Docker container with health port
|
||||
container = client.containers.run(
|
||||
image='pangramia/ytdlp-ops-server:latest',
|
||||
command=command_args, # Use the constructed command list
|
||||
environment={
|
||||
'PYTHONUNBUFFERED': '1', # Ensure logs are not buffered
|
||||
'SERVER_PORT': str(port), # Port the service listens on *inside* the container
|
||||
'SERVER_HOST': '0.0.0.0', # Service should listen on all interfaces *inside* the container
|
||||
'ACCOUNT_ID': account_id,
|
||||
# Pass Redis details *if enabled* for the service to register itself
|
||||
'REDIS_HOST': redis_host_for_container,
|
||||
'REDIS_PORT': redis_port_for_container,
|
||||
'REDIS_PASSWORD': redis_password_for_container,
|
||||
# Pass PROXY_URL for health check access
|
||||
'PROXY_URL': proxy if proxy else '',
|
||||
},
|
||||
ports={
|
||||
f"{port}/tcp": port,
|
||||
f"{health_port}/tcp": health_port
|
||||
},
|
||||
volumes={
|
||||
context_dir: {'bind': '/app/context-data', 'mode': 'rw'}
|
||||
},
|
||||
network_mode=network_name, # Use the specified network variable
|
||||
auto_remove=False, # Do not auto-remove the container
|
||||
name=container_name, # Use a unique name
|
||||
detach=True,
|
||||
tty=True,
|
||||
shm_size='256m',
|
||||
# Updated healthcheck to test external connectivity via proxy
|
||||
healthcheck={
|
||||
# Use CMD-SHELL to allow conditional logic based on PROXY_URL env var
|
||||
'test': [
|
||||
'CMD-SHELL',
|
||||
# Script checks if PROXY_URL is set, uses it with curl if yes, otherwise curls directly.
|
||||
# -f: Fail silently (exit non-zero on error)
|
||||
# --connect-timeout 10: Timeout for connection phase
|
||||
# > /dev/null: Discard output, we only care about exit code
|
||||
'if [ -n "$PROXY_URL" ]; then '
|
||||
'curl -f --connect-timeout 10 -x "$PROXY_URL" https://ifconfig.co > /dev/null; '
|
||||
'else '
|
||||
'curl -f --connect-timeout 10 https://ifconfig.co > /dev/null; '
|
||||
'fi'
|
||||
],
|
||||
'interval': 30 * 1000000000, # Check every 30 seconds (30 * 1e9 nanoseconds)
|
||||
'timeout': 15 * 1000000000, # Timeout after 15 seconds (15 * 1e9 nanoseconds)
|
||||
'retries': 5, # Retry 5 times on failure
|
||||
'start_period': 15 * 1000000000 # Grace period of 15 seconds after start
|
||||
},
|
||||
# Add labels for better identification
|
||||
labels={
|
||||
'service': 'ytdlp',
|
||||
'account_id': account_id
|
||||
}
|
||||
)
|
||||
|
||||
# Wait for container to be running (skip health check verification)
|
||||
start_time = time.time()
|
||||
while True:
|
||||
container.reload()
|
||||
if container.status == 'running':
|
||||
break
|
||||
if time.time() - start_time > 10: # 10 second timeout
|
||||
raise TimeoutError("Container failed to start within 10 seconds")
|
||||
time.sleep(1)
|
||||
|
||||
logging.info(f"Container started: {container.id} (health check verification skipped)")
|
||||
# Push container details immediately after creation using simplified keys
|
||||
context['task_instance'].xcom_push(key='container_id', value=container.id)
|
||||
context['task_instance'].xcom_push(key='container_name', value=container_name)
|
||||
logging.info(f"Pushed container_id={container.id} and container_name={container_name} to XCom.")
|
||||
|
||||
# --- Determine Host for Sensor ---
|
||||
# Get the container's IP address on the specified network for the HttpSensor
|
||||
try:
|
||||
container.reload() # Refresh container attributes
|
||||
network_settings = container.attrs.get('NetworkSettings', {}).get('Networks', {})
|
||||
if network_name in network_settings:
|
||||
host_for_sensor = network_settings[network_name].get('IPAddress')
|
||||
if not host_for_sensor:
|
||||
raise ValueError(f"Container {container.id} has no IPAddress on network '{network_name}'")
|
||||
logging.info(f"Using container IP '{host_for_sensor}' on network '{network_name}' for HttpSensor.")
|
||||
else:
|
||||
# Fallback or error if container not on expected network
|
||||
logging.error(f"Container {container.id} is not attached to the expected network '{network_name}'. Network settings: {network_settings}")
|
||||
# Option 1: Fallback to container name (might fail as observed)
|
||||
# host_for_sensor = container_name
|
||||
# logging.warning(f"Falling back to container name '{host_for_sensor}' for sensor.")
|
||||
# Option 2: Raise error
|
||||
raise ValueError(f"Container {container.id} not found on network '{network_name}'. Cannot determine IP for sensor.")
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to get container IP address: {e}", exc_info=True)
|
||||
raise AirflowException(f"Failed to determine IP address for HttpSensor: {e}")
|
||||
|
||||
# Ensure we don't use 0.0.0.0 or empty string for the sensor
|
||||
if not host_for_sensor or host_for_sensor == "0.0.0.0":
|
||||
raise ValueError(f"Determined host_for_sensor is invalid ('{host_for_sensor}'). Check container network attachment and IP assignment.")
|
||||
|
||||
# --- Add extra logging before pushing ---
|
||||
logging.info(f"FINAL CHECK before XCom push:")
|
||||
logging.info(f" Account ID: {account_id}")
|
||||
logging.info(f" Host for Sensor (IP Address): {host_for_sensor}")
|
||||
logging.info(f" Host for Registration: {host_for_registration}")
|
||||
logging.info(f" Service Port: {port}")
|
||||
logging.info(f" Health Port: {health_port}")
|
||||
logging.info(f" Pushing to XCom key: service_host with value: {host_for_sensor}")
|
||||
# --- End extra logging ---
|
||||
|
||||
# Push distinct service connection details using simplified keys
|
||||
context['task_instance'].xcom_push(key='service_host_registration', value=host_for_registration) # For client discovery (e.g., Redis)
|
||||
context['task_instance'].xcom_push(key='service_host', value=host_for_sensor) # IP Address for HttpSensor
|
||||
context['task_instance'].xcom_push(key='service_port', value=port) # Port is the same
|
||||
context['task_instance'].xcom_push(key='service_health_port', value=health_port) # Health port is the same
|
||||
logging.info(f"Pushed host_for_sensor (IP Address)={host_for_sensor} to XCom key 'service_host'")
|
||||
logging.info(f"Pushed host_for_registration={host_for_registration} to XCom key 'service_host_registration'")
|
||||
|
||||
|
||||
# Store account metadata in Redis only if redis_enabled is True
|
||||
# This uses the 'host_for_registration' for client discovery
|
||||
if redis_enabled:
|
||||
store_account_metadata(account_id, host_for_registration, port, proxy, health_port, container.id)
|
||||
|
||||
# If we reach here, deployment is considered successful for now
|
||||
logging.info("Deployment preparation successful.")
|
||||
# Return values are implicitly pushed to XCom (but we pushed explicitly above)
|
||||
return context_dir, host_for_registration, port
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error during service deployment: {e}", exc_info=True)
|
||||
# Attempt to cleanup the container if it was created before the error
|
||||
try:
|
||||
if 'container' in locals() and container and container.id:
|
||||
logging.warning(f"Attempting to stop and remove container {container.id} due to deployment error.")
|
||||
container.stop(timeout=5)
|
||||
container.remove(force=True)
|
||||
logging.info(f"Successfully stopped and removed container {container.id} after error.")
|
||||
elif 'container_name' in locals() and container_name:
|
||||
# Try finding by name if ID wasn't captured
|
||||
containers = client.containers.list(filters={'name': container_name})
|
||||
if containers:
|
||||
logging.warning(f"Attempting to stop and remove container {containers[0].name} by name due to deployment error.")
|
||||
containers[0].stop(timeout=5)
|
||||
containers[0].remove(force=True)
|
||||
logging.info(f"Successfully stopped and removed container {containers[0].name} after error.")
|
||||
except Exception as cleanup_err:
|
||||
logging.error(f"Failed during post-error container cleanup: {cleanup_err}")
|
||||
raise # Re-raise the original exception to fail the task
|
||||
|
||||
# Removed the old monitor_health PythonOperator
|
||||
|
||||
# stop_service and cleanup_service are now defined directly in the DAG below.
|
||||
|
||||
def check_service_health(ti=None, **context):
|
||||
"""
|
||||
Periodically checks the service's /health endpoint using requests.
|
||||
Acts as a long-running sentinel task. Fails if the health check fails
|
||||
repeatedly or times out.
|
||||
"""
|
||||
# Get parameters from XCom
|
||||
host_reg = ti.xcom_pull(task_ids='prepare_and_deploy', key='service_host_registration')
|
||||
host_svc = ti.xcom_pull(task_ids='prepare_and_deploy', key='service_host')
|
||||
health_port = ti.xcom_pull(task_ids='prepare_and_deploy', key='service_health_port')
|
||||
|
||||
# Determine the host to use (prioritize registration host)
|
||||
host = host_reg if host_reg and host_reg != '0.0.0.0' else host_svc
|
||||
if not host or not health_port:
|
||||
raise AirflowException("Could not retrieve host or health_port from XCom for health check.")
|
||||
|
||||
health_url = f"http://{host}:{health_port}/health"
|
||||
logging.info(f"Starting health check for: {health_url}")
|
||||
|
||||
# Get configuration for polling
|
||||
# Use task's execution_timeout if available, otherwise default to 1 year
|
||||
task_timeout = ti.task.execution_timeout or timedelta(days=365)
|
||||
poke_interval = 60 # Check every 60 seconds (adjust as needed)
|
||||
start_time = time.monotonic()
|
||||
timeout_seconds = task_timeout.total_seconds()
|
||||
consecutive_error_start_time = None # Track start time of consecutive connection errors
|
||||
error_retry_window = 10 # Seconds to retry connection errors before failing
|
||||
|
||||
while True:
|
||||
current_time = time.monotonic()
|
||||
if current_time - start_time > timeout_seconds:
|
||||
raise AirflowException(f"Health check timed out after {timeout_seconds} seconds for {health_url}")
|
||||
|
||||
try:
|
||||
# Use a reasonable timeout for the individual request
|
||||
response = requests.get(health_url, timeout=15) # 15 second request timeout
|
||||
response.raise_for_status() # Raises HTTPError for bad responses (4xx or 5xx)
|
||||
|
||||
# Check response content if needed (optional)
|
||||
# Example: Check for specific JSON content
|
||||
# try:
|
||||
# data = response.json()
|
||||
# if data.get("status") == "healthy":
|
||||
# logging.info(f"Health check successful: Status {response.status_code}")
|
||||
# else:
|
||||
# logging.warning(f"Health check OK (Status {response.status_code}), but content unexpected: {data}")
|
||||
# except requests.exceptions.JSONDecodeError:
|
||||
# logging.warning(f"Health check OK (Status {response.status_code}), but response is not valid JSON.")
|
||||
|
||||
# If we got a 2xx status, log success and reset error timer if needed
|
||||
if consecutive_error_start_time is not None:
|
||||
logging.info(f"Connection to {health_url} recovered.")
|
||||
consecutive_error_start_time = None
|
||||
logging.info(f"Health check successful: Status {response.status_code} for {health_url}")
|
||||
|
||||
except requests.exceptions.Timeout:
|
||||
current_monotonic_time = time.monotonic()
|
||||
if consecutive_error_start_time is None:
|
||||
consecutive_error_start_time = current_monotonic_time
|
||||
logging.warning(f"Health check request timed out for {health_url}. Starting {error_retry_window}s retry window...")
|
||||
else:
|
||||
elapsed_error_time = current_monotonic_time - consecutive_error_start_time
|
||||
if elapsed_error_time > error_retry_window:
|
||||
error_msg = f"Health check failed for {health_url}: Timeout persisted for over {error_retry_window} seconds."
|
||||
logging.error(error_msg)
|
||||
raise AirflowException(error_msg)
|
||||
else:
|
||||
logging.warning(f"Health check request timed out for {health_url}. Retrying within {error_retry_window}s window ({elapsed_error_time:.1f}s elapsed)...")
|
||||
|
||||
except requests.exceptions.ConnectionError as e:
|
||||
# Check if the error is specifically "Connection refused" - fail immediately
|
||||
if "[Errno 111] Connection refused" in str(e):
|
||||
logging.error(f"Health check failed for {health_url}: Connection refused. Failing task immediately.")
|
||||
raise AirflowException(f"Health check failed for {health_url}: Connection refused")
|
||||
else:
|
||||
# Handle other connection errors with the retry window
|
||||
current_monotonic_time = time.monotonic()
|
||||
if consecutive_error_start_time is None:
|
||||
consecutive_error_start_time = current_monotonic_time
|
||||
logging.warning(f"Health check connection error for {health_url}: {e}. Starting {error_retry_window}s retry window...")
|
||||
else:
|
||||
elapsed_error_time = current_monotonic_time - consecutive_error_start_time
|
||||
if elapsed_error_time > error_retry_window:
|
||||
error_msg = f"Health check failed for {health_url}: Connection error persisted for over {error_retry_window} seconds. Last error: {e}"
|
||||
logging.error(error_msg)
|
||||
raise AirflowException(error_msg)
|
||||
else:
|
||||
logging.warning(f"Health check connection error for {health_url}: {e}. Retrying within {error_retry_window}s window ({elapsed_error_time:.1f}s elapsed)...")
|
||||
|
||||
except requests.exceptions.HTTPError as e:
|
||||
# This catches 4xx/5xx errors - fail immediately
|
||||
logging.error(f"Health check failed for {health_url}: Status {e.response.status_code}. Failing task.")
|
||||
# Fail the task immediately on HTTP error
|
||||
raise AirflowException(f"Health check failed for {health_url}: Status {e.response.status_code}")
|
||||
except requests.exceptions.RequestException as e:
|
||||
logging.error(f"Health check failed for {health_url} with unexpected error: {e}. Failing task.")
|
||||
# Fail the task immediately on other request errors
|
||||
raise AirflowException(f"Health check failed for {health_url}: {e}")
|
||||
except Exception as e:
|
||||
# Catch any other unexpected errors during the check
|
||||
logging.error(f"Unexpected error during health check for {health_url}: {e}", exc_info=True)
|
||||
raise AirflowException(f"Unexpected error during health check: {e}")
|
||||
|
||||
# Wait for the poke interval before the next check
|
||||
time.sleep(poke_interval)
|
||||
|
||||
|
||||
def _wait_forever():
|
||||
"""Sleeps indefinitely (or until task timeout) to simulate a running service."""
|
||||
logging.info("Sentinel task started. Sleeping in a loop...")
|
||||
# Sleep in a loop with a reasonable interval to avoid OverflowError
|
||||
# The task will keep running until it times out based on execution_timeout
|
||||
# or is manually stopped/failed.
|
||||
while True:
|
||||
try:
|
||||
# Sleep for a long interval (e.g., 1 day)
|
||||
# You can adjust this interval if needed.
|
||||
time.sleep(86400) # Sleep for 24 hours
|
||||
except KeyboardInterrupt:
|
||||
logging.info("Sentinel task interrupted. Exiting.")
|
||||
break
|
||||
except Exception as e:
|
||||
# Log other potential errors during sleep, though unlikely
|
||||
logging.error(f"Error during sentinel sleep loop: {e}")
|
||||
# Optionally break or continue based on error handling strategy
|
||||
break # Exit loop on unexpected error
|
||||
|
||||
def stop_service(**context):
|
||||
"""Stop the running Docker container with verification."""
|
||||
# Retrieve account_id from params or kwargs
|
||||
account_id = context.get('params', {}).get('account_id') or context.get('account_id')
|
||||
if not account_id:
|
||||
raise ValueError("Account ID is missing.")
|
||||
|
||||
# Initialize Docker client to connect to docker-socket-proxy
|
||||
client = docker.DockerClient(base_url='tcp://docker-socket-proxy:2375')
|
||||
|
||||
try:
|
||||
# For testing, try to get container ID from environment if XCom is not available
|
||||
container_id = None
|
||||
if 'ti' in context:
|
||||
# Use simplified XCom key
|
||||
container_id = context['ti'].xcom_pull(task_ids='prepare_and_deploy', key='container_id')
|
||||
|
||||
if not container_id:
|
||||
# If not found in XCom, try to find container by account_id pattern (keep this fallback)
|
||||
containers = client.containers.list(filters={"name": f"ytdlp_service_{account_id}"})
|
||||
if containers:
|
||||
container = containers[0]
|
||||
container_id = container.id
|
||||
logging.info(f"Found container by name pattern: {container.name} (ID: {container_id})")
|
||||
else:
|
||||
logging.warning(f"No container found for account {account_id} - nothing to stop")
|
||||
return
|
||||
|
||||
if container_id:
|
||||
# If found in XCom, stop by container ID
|
||||
container = client.containers.get(container_id)
|
||||
|
||||
# Verify container is running before stopping
|
||||
if container.status != 'running':
|
||||
logging.warning(f"Container {container_id} is not running (status: {container.status})")
|
||||
return
|
||||
|
||||
logging.info(f"Stopping container {container_id}...")
|
||||
container.stop(timeout=10) # 10 second timeout
|
||||
|
||||
# Verify container is stopped
|
||||
container.reload()
|
||||
if container.status == 'exited':
|
||||
logging.info(f"Successfully stopped container {container_id}")
|
||||
else:
|
||||
logging.error(f"Container {container_id} failed to stop (status: {container.status})")
|
||||
raise RuntimeError(f"Container {container_id} failed to stop")
|
||||
|
||||
# Clear Redis entries only if redis_enabled is True
|
||||
# Retrieve redis_enabled status from DAG run conf or params
|
||||
redis_enabled = context['dag_run'].conf.get('redis_enabled', False) or context['params'].get('redis_enabled', False)
|
||||
if redis_enabled:
|
||||
redis_client = get_redis_connection()
|
||||
try:
|
||||
# Verify Redis connection
|
||||
if not redis_client.ping():
|
||||
raise ConnectionError("Failed to connect to Redis")
|
||||
|
||||
# Remove main metadata
|
||||
redis_client.delete(f"ytdlp:{account_id}")
|
||||
# Remove from accounts set
|
||||
redis_client.srem("ytdlp:accounts", account_id)
|
||||
logging.info(f"Successfully cleared Redis entries for account: {account_id}")
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to clear Redis entries for account {account_id}: {e}")
|
||||
# Do not raise here, allow container stop to be considered successful
|
||||
# raise # Optional: re-raise if Redis cleanup failure should fail the task
|
||||
|
||||
return
|
||||
|
||||
logging.warning(f"No container found for account {account_id} - nothing to stop")
|
||||
|
||||
except docker.errors.NotFound as e:
|
||||
logging.warning(f"Container for account {account_id} not found: {e}")
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to stop container: {e}")
|
||||
raise
|
||||
|
||||
|
||||
def cleanup_service(**context):
|
||||
"""Cleanup service resources including Redis entries and XCom data."""
|
||||
# Note: This function is now called within the manual_stop_cleanup TaskGroup
|
||||
try:
|
||||
# Retrieve account_id from params first, then from XCom
|
||||
account_id = context['params'].get('account_id')
|
||||
if not account_id:
|
||||
# Try to get it from XCom
|
||||
account_id = context['task_instance'].xcom_pull(task_ids='prepare_and_deploy', key='account_id')
|
||||
if not account_id:
|
||||
logging.warning("Account ID not found in params or XCom - skipping resource cleanup")
|
||||
return
|
||||
|
||||
# Redis cleanup (if redis_enabled=True) is handled in the 'stop_service' task.
|
||||
logging.info(f"Redis cleanup for account {account_id} is handled by the 'stop_service' task if enabled.")
|
||||
|
||||
# Cleanup XCom data (using simplified keys where applicable)
|
||||
# Note: XCom cleanup is generally not strictly necessary but can be good practice.
|
||||
# Airflow manages XCom expiry. This code doesn't actually *delete* XComs.
|
||||
# To truly delete, you'd use the Airflow API or DB directly.
|
||||
# We'll leave the pull calls here as they don't harm anything.
|
||||
ti = context['task_instance']
|
||||
ti.xcom_pull(key='container_id', task_ids='prepare_and_deploy', include_prior_dates=True)
|
||||
ti.xcom_pull(key='container_name', task_ids='prepare_and_deploy', include_prior_dates=True)
|
||||
ti.xcom_pull(key='service_host_registration', task_ids='prepare_and_deploy', include_prior_dates=True)
|
||||
ti.xcom_pull(key='service_host', task_ids='prepare_and_deploy', include_prior_dates=True)
|
||||
ti.xcom_pull(key='service_port', task_ids='prepare_and_deploy', include_prior_dates=True)
|
||||
ti.xcom_pull(key='service_health_port', task_ids='prepare_and_deploy', include_prior_dates=True)
|
||||
ti.xcom_pull(key='work_id', task_ids='prepare_and_deploy', include_prior_dates=True)
|
||||
ti.xcom_pull(key='context_dir', task_ids='prepare_and_deploy', include_prior_dates=True)
|
||||
ti.xcom_pull(key='account_id', task_ids='prepare_and_deploy', include_prior_dates=True) # Keep account_id pull
|
||||
logging.info(f"Pulled XCom data for potential cleanup logging for account: {account_id}")
|
||||
|
||||
# Initialize Docker client
|
||||
client = docker.DockerClient(base_url='tcp://docker-socket-proxy:2375')
|
||||
container_found_and_removed = False
|
||||
|
||||
# Attempt 1: Get container ID from XCom using simplified key
|
||||
container_id_xcom = context['task_instance'].xcom_pull(task_ids='prepare_and_deploy', key='container_id')
|
||||
if container_id_xcom:
|
||||
logging.info(f"Attempting to remove container using XCom ID: {container_id_xcom}")
|
||||
try:
|
||||
container = client.containers.get(container_id_xcom)
|
||||
logging.info(f"Found container {container.id} (Name: {container.name}). Removing...")
|
||||
container.remove(force=True)
|
||||
logging.info(f"Successfully removed container {container.id}")
|
||||
container_found_and_removed = True
|
||||
except docker.errors.NotFound:
|
||||
logging.warning(f"Container with XCom ID {container_id_xcom} not found. Trying other methods.")
|
||||
except Exception as e:
|
||||
logging.error(f"Error removing container {container_id_xcom}: {e}")
|
||||
|
||||
# Attempt 2: Find container by labels if not found/removed via XCom ID
|
||||
if not container_found_and_removed:
|
||||
logging.info(f"Attempting to find and remove container by labels: service=ytdlp, account_id={account_id}")
|
||||
try:
|
||||
containers = client.containers.list(
|
||||
filters={'label': [f'service=ytdlp', f'account_id={account_id}']},
|
||||
all=True # Include stopped containers
|
||||
)
|
||||
if containers:
|
||||
for container in containers:
|
||||
logging.info(f"Found container {container.id} (Name: {container.name}) by labels. Removing...")
|
||||
try:
|
||||
container.remove(force=True)
|
||||
logging.info(f"Successfully removed container {container.id}")
|
||||
container_found_and_removed = True # Mark as found even if only one is removed
|
||||
except Exception as e:
|
||||
logging.error(f"Error removing container {container.id} found by labels: {e}")
|
||||
else:
|
||||
logging.info("No containers found matching labels.")
|
||||
except Exception as e:
|
||||
logging.error(f"Error searching for containers by labels: {e}")
|
||||
|
||||
# Attempt 3: Find container by name pattern if still not found/removed
|
||||
if not container_found_and_removed:
|
||||
container_name_pattern = f"ytdlp_service_{account_id}_*"
|
||||
logging.info(f"Attempting to find and remove container by name pattern: {container_name_pattern}")
|
||||
try:
|
||||
containers = client.containers.list(filters={'name': container_name_pattern}, all=True)
|
||||
if containers:
|
||||
for container in containers:
|
||||
logging.info(f"Found container {container.id} (Name: {container.name}) by name pattern. Removing...")
|
||||
try:
|
||||
container.remove(force=True)
|
||||
logging.info(f"Successfully removed container {container.id}")
|
||||
container_found_and_removed = True
|
||||
except Exception as e:
|
||||
logging.error(f"Error removing container {container.id} found by name: {e}")
|
||||
else:
|
||||
logging.info("No containers found matching name pattern.")
|
||||
except Exception as e:
|
||||
logging.error(f"Error searching for containers by name: {e}")
|
||||
|
||||
if not container_found_and_removed:
|
||||
logging.warning(f"Could not find or remove any container for account {account_id} using ID, labels, or name.")
|
||||
|
||||
# Get context directory from XCom and remove it
|
||||
context_dir = context['task_instance'].xcom_pull(task_ids='prepare_and_deploy', key='context_dir')
|
||||
if context_dir and os.path.exists(context_dir):
|
||||
shutil.rmtree(context_dir)
|
||||
logging.info(f"Cleaned up working directory: {context_dir}")
|
||||
except Exception as e:
|
||||
logging.error(f"Error during cleanup: {e}")
|
||||
raise
|
||||
|
||||
# Define the DAG
|
||||
with DAG(
|
||||
'ytdlp_service',
|
||||
default_args=default_args,
|
||||
description='Deploy YTDLP token service for ios, android, mweb',
|
||||
schedule_interval=None,
|
||||
start_date=days_ago(1), # Use dynamic start date for manually triggered DAG
|
||||
catchup=False,
|
||||
tags=['youtube', 'tokens', 'service', 'docker'],
|
||||
# executor_config moved to default_args
|
||||
is_paused_upon_creation=False,
|
||||
params={
|
||||
'account_id': Param(
|
||||
'account_fr_2025-04-03T1220_anonomyous_2ssdfsf2342afga09',
|
||||
type="string",
|
||||
description="Required: The account ID for which the service is being deployed."
|
||||
),
|
||||
'proxy': Param(
|
||||
'socks5://sslocal-rust-1084:1084',
|
||||
type=["null", "string"],
|
||||
description="Optional: The SOCKS5 proxy URL to use for the service (e.g., socks5://host:port)."
|
||||
),
|
||||
'clients': Param(
|
||||
'ios,android,mweb',
|
||||
type="string",
|
||||
description="Comma-separated list of client types (e.g., ios,android,mweb)."
|
||||
),
|
||||
'redis_enabled': Param(
|
||||
False,
|
||||
type="boolean",
|
||||
description="Use Redis for service discovery? If False, host/port must be provided or will be auto-assigned."
|
||||
),
|
||||
'host': Param(
|
||||
None,
|
||||
type=["null", "string"],
|
||||
description="Optional: Host IP for the service. If redis_enabled=False and host is not provided, defaults to '0.0.0.0'. If redis_enabled=True and host is not provided, uses HOST_EXTERNAL_IP or defaults to '0.0.0.0'."
|
||||
),
|
||||
'port': Param(
|
||||
None,
|
||||
type=["null", "integer"],
|
||||
description="Optional: Port for the service. If None, a free port will be assigned automatically. If redis_enabled=False and a port is provided, it will be used (after checking availability)."
|
||||
),
|
||||
# redis_host and redis_port parameters are removed.
|
||||
# If redis_enabled=True, the DAG will use the 'redis_default' Airflow connection.
|
||||
'docker_network': Param(
|
||||
'airflow_prod_proxynet',
|
||||
type="string",
|
||||
description="Optional: The Docker network to attach the container to. Defaults to 'airflow_prod_proxynet'."
|
||||
),
|
||||
'exit_on_proxy_fail': Param(
|
||||
True,
|
||||
type="boolean",
|
||||
description="Exit the service container immediately if the initial proxy test fails?"
|
||||
),
|
||||
}
|
||||
) as dag:
|
||||
|
||||
# Task to prepare and deploy the service
|
||||
prepare_and_deploy = PythonOperator(
|
||||
task_id='prepare_and_deploy',
|
||||
python_callable=prepare_and_deploy_service,
|
||||
provide_context=True,
|
||||
trigger_rule='all_success' # Keep default trigger rule for prepare_and_deploy
|
||||
)
|
||||
|
||||
# Combined Health Check and Sentinel Task using PythonOperator
|
||||
# This task runs for a long time, checking health periodically using the 'requests' library.
|
||||
# If the health check fails repeatedly or times out, the task fails, triggering 'stop_service'.
|
||||
monitor_service_health = PythonOperator(
|
||||
task_id='monitor_service_health',
|
||||
python_callable=check_service_health,
|
||||
provide_context=True,
|
||||
# Set execution timeout for the task itself (acts as the overall timeout)
|
||||
execution_timeout=timedelta(days=365), # Long timeout (e.g., 1 year)
|
||||
# op_kwargs can pass static config, but host/port come from XCom inside the function
|
||||
# poke_interval and request timeout are handled within check_service_health
|
||||
)
|
||||
monitor_service_health.doc_md = """
|
||||
### Monitor Service Health Task (PythonOperator)
|
||||
Uses a Python function to periodically check the service's `/health` endpoint using the `requests` library.
|
||||
Acts as both a health check and a sentinel for the running service.
|
||||
- **Pulls from XCom:** Reads `service_host_registration`, `service_host`, and `service_health_port` from the `prepare_and_deploy` task to construct the target URL.
|
||||
- **Polling:** Checks the `/health` endpoint every 60 seconds.
|
||||
- **Timeout:** Uses the task's `execution_timeout` (set to 1 year) as the overall maximum duration. Individual requests have a 15-second timeout.
|
||||
- **Failure:** If a health check request returns a 4xx/5xx status code or encounters other request errors, the task fails immediately. If the overall `execution_timeout` is reached without a failure, the task would eventually time out and fail.
|
||||
"""
|
||||
|
||||
# Task to stop the service (runs if monitor_service_health fails)
|
||||
stop = PythonOperator(
|
||||
task_id='stop_service',
|
||||
python_callable=stop_service,
|
||||
provide_context=True,
|
||||
trigger_rule=TriggerRule.ONE_FAILED # Run only if monitor_service_health fails
|
||||
)
|
||||
stop.doc_md = """
|
||||
### Stop Service Task
|
||||
Stops the Docker container associated with the service.
|
||||
- **Trigger Rule:** `one_failed` - This task only runs if the upstream `monitor_service_health` task fails.
|
||||
- Pulls container ID/name from XCom or finds it using labels/name patterns.
|
||||
- Clears Redis entries if `redis_enabled=True`.
|
||||
"""
|
||||
|
||||
# Marker task to indicate that the deployment failed
|
||||
prepare_failed_marker = EmptyOperator(
|
||||
task_id='prepare_failed_marker',
|
||||
trigger_rule=TriggerRule.ONE_FAILED # Run only if 'prepare_and_deploy' fails
|
||||
)
|
||||
|
||||
# Task to cleanup resources (runs after stop sequence OR if prepare fails)
|
||||
cleanup = PythonOperator(
|
||||
task_id='cleanup_service',
|
||||
python_callable=cleanup_service,
|
||||
provide_context=True,
|
||||
trigger_rule=TriggerRule.ALL_DONE # Run after upstream (stop or prepare_failed_marker) is done
|
||||
)
|
||||
cleanup.doc_md = """
|
||||
### Cleanup Service Task
|
||||
Removes the Docker container and cleans up related resources.
|
||||
- **Trigger Rule:** `all_done` - Runs after the `stop_service` task finishes, whether it succeeded or failed.
|
||||
- Removes the container using ID from XCom, labels, or name patterns.
|
||||
- Cleans up XCom variables.
|
||||
- Removes the context directory.
|
||||
"""
|
||||
|
||||
# Define task dependencies
|
||||
# Success Path: prepare -> monitor (runs indefinitely)
|
||||
# Monitor Failure Path: monitor (fails) -> stop -> cleanup
|
||||
# Prepare Failure Path: prepare (fails) -> prepare_failed_marker -> cleanup
|
||||
|
||||
prepare_and_deploy >> monitor_service_health
|
||||
prepare_and_deploy >> prepare_failed_marker # Trigger marker if prepare fails
|
||||
|
||||
monitor_service_health >> stop # Trigger stop if monitor fails
|
||||
|
||||
# Cleanup is triggered after stop finishes OR after prepare_failed_marker finishes
|
||||
stop >> cleanup
|
||||
prepare_failed_marker >> cleanup
|
||||
|
||||
BIN
airflow/dags/.DS_Store
vendored
BIN
airflow/dags/.DS_Store
vendored
Binary file not shown.
@ -1,88 +0,0 @@
|
||||
# Архитектура и описание YTDLP Airflow DAGs
|
||||
|
||||
Этот документ описывает архитектуру и назначение DAG'ов, используемых для скачивания видео с YouTube. Система построена на модели непрерывного, самоподдерживающегося цикла для параллельной и отказоустойчивой обработки.
|
||||
|
||||
## Основной цикл обработки
|
||||
|
||||
Обработка выполняется двумя основными DAG'ами, которые работают в паре: оркестратор и воркер.
|
||||
|
||||
### `ytdlp_ops_orchestrator` (Система "зажигания")
|
||||
|
||||
- **Назначение:** Этот DAG действует как "система зажигания" для запуска обработки. Он запускается вручную для старта указанного количества параллельных циклов-воркеров.
|
||||
- **Принцип работы:**
|
||||
- Он **не** обрабатывает URL-адреса самостоятельно.
|
||||
- Его единственная задача — запустить сконфигурированное количество DAG'ов `ytdlp_ops_worker_per_url`.
|
||||
- Он передает всю необходимую конфигурацию (пул аккаунтов, подключение к Redis и т.д.) воркерам.
|
||||
|
||||
### `ytdlp_ops_worker_per_url` (Самоподдерживающийся воркер)
|
||||
|
||||
- **Назначение:** Этот DAG обрабатывает один URL и спроектирован для работы в непрерывном цикле.
|
||||
- **Принцип работы:**
|
||||
1. **Запуск:** Начальный запуск инициируется `ytdlp_ops_orchestrator`.
|
||||
2. **Получение задачи:** Воркер извлекает один URL из очереди `_inbox` в Redis. Если очередь пуста, выполнение воркера завершается, и его "линия" обработки останавливается.
|
||||
3. **Обработка:** Он взаимодействует с сервисом `ytdlp-ops-server` для получения `info.json` и прокси, после чего скачивает видео.
|
||||
4. **Продолжение или остановка:**
|
||||
- **В случае успеха:** Он запускает новый экземпляр самого себя, создавая непрерывный цикл для обработки следующего URL.
|
||||
- **В случае сбоя:** Цикл прерывается (если `stop_on_failure` установлено в `True`), останавливая эту "линию" обработки. Это предотвращает остановку всей системы из-за одного проблемного URL или аккаунта.
|
||||
|
||||
## Управляющие DAG'и
|
||||
|
||||
### `ytdlp_mgmt_proxy_account`
|
||||
|
||||
- **Назначение:** Это основной инструмент для мониторинга и управления состоянием ресурсов, используемых `ytdlp-ops-server`.
|
||||
- **Функциональность:**
|
||||
- **Просмотр статусов:** Позволяет увидеть текущий статус всех прокси и аккаунтов (например, `ACTIVE`, `BANNED`, `RESTING`).
|
||||
- **Управление прокси:** Позволяет вручную банить, разбанивать или сбрасывать статус прокси.
|
||||
- **Управление аккаунтами:** Позволяет вручную банить или разбанивать аккаунты.
|
||||
|
||||
### `ytdlp_mgmt_queues`
|
||||
|
||||
- **Назначение:** Предоставляет набор инструментов для управления очередями Redis, используемыми в конвейере обработки.
|
||||
- **Функциональность (через параметр `action`):**
|
||||
- `add_videos`: Добавление одного или нескольких URL-адресов YouTube в очередь.
|
||||
- `clear_queue`: Очистка (удаление) указанного ключа Redis.
|
||||
- `list_contents`: Просмотр содержимого ключа Redis (списка или хэша).
|
||||
- `check_status`: Проверка общего состояния очередей (тип, размер).
|
||||
- `requeue_failed`: Перемещение всех URL-адресов из очереди сбоев `_fail` обратно в очередь `_inbox` для повторной обработки.
|
||||
|
||||
## Стратегия управления ресурсами (Прокси и Аккаунты)
|
||||
|
||||
Система использует интеллектуальную стратегию для управления жизненным циклом и состоянием аккаунтов и прокси, чтобы максимизировать процент успеха и минимизировать блокировки.
|
||||
|
||||
- **Жизненный цикл аккаунта ("Cooldown"):**
|
||||
- Чтобы предотвратить "выгорание", аккаунты автоматически переходят в состояние "отдыха" (`RESTING`) после периода интенсивного использования.
|
||||
- По истечении периода отдыха они автоматически возвращаются в `ACTIVE` и снова становятся доступными для воркеров.
|
||||
|
||||
- **Умная стратегия банов:**
|
||||
- **Сначала бан аккаунта:** При возникновении серьезной ошибки (например, `BOT_DETECTED`) система наказывает **только аккаунт**, который вызвал сбой. Прокси при этом продолжает работать.
|
||||
- **Бан прокси по "скользящему окну":** Прокси банится автоматически, только если он демонстрирует **систематические сбои с РАЗНЫМИ аккаунтами** за короткий промежуток времени. Это является надежным индикатором того, что проблема именно в прокси.
|
||||
|
||||
- **Мониторинг:**
|
||||
- DAG `ytdlp_mgmt_proxy_account` является основным инструментом для мониторинга. Он показывает текущий статус всех ресурсов, включая время, оставшееся до активации забаненных или отдыхающих аккаунтов.
|
||||
- Граф выполнения DAG `ytdlp_ops_worker_per_url` теперь явно показывает шаги, такие как `assign_account`, `get_token`, `ban_account`, `retry_get_token`, что делает процесс отладки более наглядным.
|
||||
|
||||
## Внешние сервисы
|
||||
|
||||
### `ytdlp-ops-server` (Thrift Service)
|
||||
|
||||
- **Назначение:** Внешний сервис, который предоставляет аутентификационные данные (токены, cookies, proxy) для скачивания видео.
|
||||
- **Взаимодействие:** Worker DAG (`ytdlp_ops_worker_per_url`) обращается к этому сервису перед началом загрузки для получения необходимых данных для `yt-dlp`.
|
||||
|
||||
## Логика работы Worker DAG (`ytdlp_ops_worker_per_url`)
|
||||
|
||||
Этот DAG является "рабочей лошадкой" системы. Он спроектирован как самоподдерживающийся цикл для обработки одного URL за запуск.
|
||||
|
||||
### Задачи и их назначение:
|
||||
|
||||
- **`pull_url_from_redis`**: Извлекает один URL из очереди `_inbox` в Redis. Если очередь пуста, DAG завершается со статусом `skipped`, останавливая эту "линию" обработки.
|
||||
- **`assign_account`**: Выбирает аккаунт для выполнения задачи. Он будет повторно использовать тот же аккаунт, который был успешно использован в предыдущем запуске в своей "линии" (привязка аккаунта). Если это первый запуск, он выбирает случайный аккаунт.
|
||||
- **`get_token`**: Основная задача. Она обращается к `ytdlp-ops-server` для получения `info.json`.
|
||||
- **`handle_bannable_error_branch`**: Если `get_token` завершается с ошибкой, требующей бана, эта задача-развилка решает, что делать дальше, в зависимости от политики `on_bannable_failure`.
|
||||
- **`ban_account_and_prepare_for_retry`**: Если политика разрешает повтор, эта задача банит сбойный аккаунт и выбирает новый для повторной попытки.
|
||||
- **`retry_get_token`**: Выполняет вторую попытку получить токен с новым аккаунтом.
|
||||
- **`ban_second_account_and_proxy`**: Если и вторая попытка неудачна, эта задача банит второй аккаунт и использованный прокси.
|
||||
- **`download_and_probe`**: Если `get_token` (или `retry_get_token`) завершилась успешно, эта задача использует `yt-dlp` для скачивания медиа и `ffmpeg` для проверки целостности скачанного файла.
|
||||
- **`mark_url_as_success`**: Если `download_and_probe` завершилась успешно, эта задача записывает результат в хэш `_result` в Redis.
|
||||
- **`handle_generic_failure`**: Если любая из основных задач завершается с неисправимой ошибкой, эта задача записывает подробную информацию об ошибке в хэш `_fail` в Redis.
|
||||
- **`decide_what_to_do_next`**: Задача-развилка, которая запускается после успеха или неудачи. Она решает, продолжать ли цикл.
|
||||
- **`trigger_self_run`**: Задача, которая фактически запускает следующий экземпляр DAG, создавая непрерывный цикл.
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -1,23 +0,0 @@
|
||||
import socket
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def get_ip_address():
|
||||
"""
|
||||
Get the primary IP address of the host.
|
||||
This is used by Airflow workers to advertise their IP for log serving,
|
||||
ensuring the webserver can reach them in a multi-host environment.
|
||||
"""
|
||||
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
|
||||
try:
|
||||
# This doesn't even have to be reachable
|
||||
s.connect(('10.255.255.255', 1))
|
||||
ip_address = s.getsockname()[0]
|
||||
logger.info(f"Determined host IP address as: {ip_address}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not determine IP address, falling back to 127.0.0.1. Error: {e}")
|
||||
ip_address = '127.0.0.1'
|
||||
finally:
|
||||
s.close()
|
||||
return ip_address
|
||||
@ -1,56 +0,0 @@
|
||||
from airflow.plugins_manager import AirflowPlugin
|
||||
from airflow.hooks.base import BaseHook
|
||||
from airflow.configuration import conf
|
||||
import uuid
|
||||
import backoff
|
||||
|
||||
class YTDLPHook(BaseHook):
|
||||
def __init__(self, conn_id='ytdlp_default'):
|
||||
super().__init__()
|
||||
self.conn_id = conn_id
|
||||
self.connection = self.get_connection(conn_id)
|
||||
self.timeout = conf.getint('ytdlp', 'timeout', fallback=120)
|
||||
self.max_retries = conf.getint('ytdlp', 'max_retries', fallback=3)
|
||||
|
||||
@backoff.on_exception(backoff.expo,
|
||||
Exception,
|
||||
max_tries=3,
|
||||
max_time=300)
|
||||
def start_service(self, host, port, service_id, work_dir):
|
||||
"""Start token service as a long-running process"""
|
||||
import subprocess
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
# Get script path relative to Airflow home
|
||||
airflow_home = os.getenv('AIRFLOW_HOME', '')
|
||||
script_path = Path(airflow_home).parent / 'ytdlp_ops_server.py'
|
||||
|
||||
# Ensure work directory exists
|
||||
os.makedirs(work_dir, exist_ok=True)
|
||||
|
||||
# Start service process
|
||||
cmd = [
|
||||
'python', str(script_path),
|
||||
'--port', str(port),
|
||||
'--host', host,
|
||||
'--service-id', service_id,
|
||||
'--context-dir', work_dir,
|
||||
'--script-dir', str(Path(airflow_home) / 'dags' / 'scripts')
|
||||
]
|
||||
|
||||
self.log.info(f"Starting token service: {' '.join(cmd)}")
|
||||
|
||||
# Start process detached
|
||||
docker_cmd = [
|
||||
'docker-compose', '-f', 'docker-compose.yaml',
|
||||
'up', '-d', '--build', 'ytdlp-service'
|
||||
]
|
||||
subprocess.run(docker_cmd, check=True)
|
||||
|
||||
self.log.info(f"Token service started on {host}:{port}")
|
||||
return True
|
||||
|
||||
class YTDLPPlugin(AirflowPlugin):
|
||||
name = 'ytdlp_plugin'
|
||||
hooks = [YTDLPHook]
|
||||
@ -1,14 +0,0 @@
|
||||
2025-04-06 00:41:03,141 - INFO - Attempting to connect to server at 127.0.0.1:9090...
|
||||
2025-04-06 00:41:03,141 - INFO - Successfully connected to server
|
||||
2025-04-06 00:41:03,142 - INFO - Server connection test successful
|
||||
2025-04-06 00:41:03,142 - INFO - Requesting token for URL: https://www.youtube.com/watch?v=sOlTX9uxUtM%27
|
||||
2025-04-06 00:41:17,930 - INFO - Successfully received token data from server
|
||||
2025-04-06 00:41:17,938 - INFO - Valid JSON with video data: Операция "Багратион". От поражения к победе.
|
||||
2025-04-06 00:41:17,944 - INFO - Successfully saved info.json to info_json_sOlTX9uxUtM_1743889277.json and latest.json to latest.json
|
||||
2025-04-06 00:44:05,608 - INFO - Attempting to connect to server at 127.0.0.1:9090...
|
||||
2025-04-06 00:44:05,609 - INFO - Successfully connected to server
|
||||
2025-04-06 00:44:05,609 - INFO - Server connection test successful
|
||||
2025-04-06 00:44:05,610 - INFO - Requesting token for URL: https://www.youtube.com/watch?v=sOlTX9uxUtM%27
|
||||
2025-04-06 00:44:18,350 - INFO - Successfully received token data from server
|
||||
2025-04-06 00:44:18,357 - INFO - Valid JSON with video data: Операция "Багратион". От поражения к победе.
|
||||
2025-04-06 00:44:18,364 - INFO - Successfully saved info.json to info_json_sOlTX9uxUtM_1743889458.json and latest.json to latest.json
|
||||
Loading…
x
Reference in New Issue
Block a user