Migrate implementation to state machine support

This commit is contained in:
aperez 2026-01-03 22:36:05 +03:00
parent efac6cf1fb
commit db78171281
22 changed files with 5390 additions and 3852 deletions

View File

@ -14,19 +14,35 @@ settings:
mode: fetch_only
orchestration_mode: direct_docker_cli
profile_mode: from_pool_with_lock
urls_file: "inputfiles/urls.sky3.txt"
urls_file: "inputfiles/urls.rt300.txt"
# The save directory MUST be inside the docker_host_mount_path for the download
# simulation to be able to find the files.
save_info_json_dir: "run/docker_mount/fetched_info_jsons/direct_docker_simulation"
# Settings for controlling the behavior of dummy/simulation modes.
# These values can be overridden at runtime with the --set flag.
dummy_simulation_settings:
# Timings for dummy auth simulation (per-URL delay in a batch)
auth_min_seconds: 0.1
auth_max_seconds: 0.5
auth_failure_rate: 0.0
auth_skipped_failure_rate: 0.0
# Timings for dummy download simulation (per-format download time)
download_min_seconds: 1.0
download_max_seconds: 3.0
download_failure_rate: 0.0
download_skipped_failure_rate: 0.0
execution_control:
# Define worker pools, each tied to a specific profile prefix.
# The stress tool will launch the specified number of workers for each pool.
# Define worker pools. For a single auth worker that serves multiple groups
# (e.g., user1, user2), a single pool with a broad prefix like "user" is
# correct. This allows the worker to lock whichever profile the enforcer
# makes available from any group.
worker_pools:
- profile_prefix: "user1"
workers: 1
- profile_prefix: "user2"
- profile_prefix: "user"
workers: 1
# - profile_prefix: "user2"
# workers: 1
# How long a worker should pause if it cannot find an available profile to lock.
worker_polling_interval_seconds: 1
# No sleep between tasks; throughput is controlled by yt-dlp performance and profile availability.
@ -43,7 +59,7 @@ info_json_generation_policy:
# This section is needed for the 'downloads_per_url: from_download_policy' setting.
# It should mirror the formats being used by the download simulation.
download_policy:
formats: "299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy,140-dashy/140-dashy-0/140"
formats: "299-dashy" #/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy,140-dashy/140-dashy-0/140"
direct_docker_cli_policy:
# Which simulation environment's profiles to use for locking.
@ -88,7 +104,7 @@ direct_docker_cli_policy:
# https://github.com/yt-dlp/yt-dlp/blob/master/yt_dlp/utils/networking.py
user_agent_version_range: [137, 143]
batch_size: 25
batch_size: 5
# A base config file can be used, with overrides applied from the policy.
# The orchestrator will inject 'proxy', 'batch-file', and 'output' keys into the overrides.

View File

@ -8,14 +8,15 @@
name: direct_docker_download_simulation
settings:
#dummy_batch: true
mode: download_only
orchestration_mode: direct_docker_cli
profile_mode: from_pool_with_lock
# This directory should contain info.json files generated by an auth simulation,
# like `10_direct_docker_auth_simulation`.
# It MUST be inside the docker_host_mount_path.
info_json_dir: "run/docker_mount/download_tasks"
#info_json_dir: "run/docker_mount/fetched_info_jsons/direct_docker_simulation"
info_json_dir: "run/docker_mount/fetched_info_jsons/direct_docker_simulation"
#info_json_dir: "run/docker_mount/download_tasks"
# Regex to extract the profile name from a task filename. The first capture
# group is used. This is crucial for the task-first locking strategy.
# It looks for a component that starts with 'user' between two hyphens.
@ -37,11 +38,11 @@ download_policy:
# A comma-separated list of format IDs to download for each info.json.
# This is used by the dummy mode simulation to test per-format downloads.
# In non-dummy mode, the format selector in ytdlp_config_overrides is used.
formats: "140-dashy,299-dashy"
#formats: "140-dashy,299-dashy"
# After a successful download, run ffprobe to generate a stream info JSON file.
run_ffprobe: true
#run_ffprobe: true
# After a successful download, replace the media file with a zero-byte .empty file.
cleanup: true
#cleanup: true
# Default cooldown in seconds if not specified by the enforcer in Redis.
# The value from Redis (set via `unlock_cooldown_seconds` in the enforcer policy)
# will always take precedence. This is a fallback.
@ -99,7 +100,8 @@ direct_docker_cli_policy:
buffer-size: "4M"
concurrent-fragments: 8
ytdlp_raw_args: []
ytdlp_raw_args: # []
- "--simulate"
# --- Live Error Parsing Rules ---
# If a fatal error is detected, immediately ban the profile to stop the container.

View File

@ -14,10 +14,13 @@ auth_profile_setup:
pools:
- prefix: "user1"
proxy: "sslocal-rust-1092:1092"
count: 4
count: 3
- prefix: "user2"
proxy: "sslocal-rust-1093:1093"
count: 4
proxy: "sslocal-rust-1092:1092"
count: 3
- prefix: "user3"
proxy: "sslocal-rust-1092:1092"
count: 3
# --- Profile setup for the DOWNLOAD simulation ---
download_profile_setup:
@ -26,7 +29,10 @@ download_profile_setup:
pools:
- prefix: "user1"
proxy: "sslocal-rust-1092:1092"
count: 4
count: 3
- prefix: "user2"
proxy: "sslocal-rust-1093:1093"
count: 4
proxy: "sslocal-rust-1092:1092"
count: 3
- prefix: "user3"
proxy: "sslocal-rust-1092:1092"
count: 3

View File

@ -2,10 +2,6 @@
# This file is used by `bin/ytops-client policy-enforcer --live` to manage
# both the authentication and download simulation environments from a single process.
# Policy for the unified simulation enforcer.
# This file is used by `bin/ytops-client policy-enforcer --live` to manage
# both the authentication and download simulation environments from a single process.
simulation_parameters:
# --- Common Redis settings for all tools ---
# The enforcer will connect to two different Redis environments (key prefixes)
@ -19,10 +15,6 @@ simulation_parameters:
# --- Policies for the Authentication Simulation ---
auth_policy_enforcer_config:
# New setting for load balancing across profile groups.
# "round_robin": Cycle through available groups evenly (FIFO based on rest time).
# "least_loaded": Prioritize the group with the fewest pending downloads.
profile_selection_strategy: "least_loaded"
# Ban if 2 failures occur within a 1-minute window.
#ban_on_failures: 2
@ -32,7 +24,7 @@ auth_policy_enforcer_config:
# New rate limit policy to enforce requests-per-hour limits.
# For guest sessions, the limit is ~300 videos/hour.
rate_limit_requests: 280
rate_limit_requests: 0
rate_limit_window_minutes: 60
rate_limit_rest_duration_minutes: 5
@ -44,41 +36,45 @@ auth_policy_enforcer_config:
# For accounts, it is ~2000 videos/hour (~4000 webpage/player requests per hour).
# The settings below should be configured to respect these limits.
# A group of profiles that are managed together.
# The enforcer will ensure that no more than `max_active_profiles` from this
# group are in the ACTIVE state at any time.
# New setting for load balancing across profile groups.
# "longest_idle": Activates the profile that has been idle the longest across all groups (based on last_used time).
# This is a global FIFO strategy that effectively cycles through profiles regardless of their group.
# "least_loaded": Prioritizes activating a profile from the group with the fewest pending downloads.
# If multiple groups have zero pending downloads, it acts as a FIFO queue, activating
# the one that finished its last download batch the earliest. This is useful when you want
# to ensure a group finishes its entire workload before another group starts.
profile_selection_strategy: "longest_idle"
# Enforce a total limit of active profiles across all groups defined below.
# Set to 1 to ensure only one group's profile is active at any time.
global_max_active_profiles: 1
# Define separate profile groups for each user type.
# This allows one profile from each group to be active simultaneously,
# ensuring the single auth worker is never blocked waiting for downloads.
profile_groups:
- name: "auth_user1"
prefix: "user1"
# Enforce that only 1 profile from this group can be active at a time.
max_active_profiles: 1
# After an active profile has been used for this many requests, it will be
# rotated out and put into a RESTING state.
rotate_after_requests: 25
# How long a profile rests after being rotated out.
rest_duration_minutes_on_rotation: 1
# If true, no new profile in this group will be activated while another
# one is in the 'waiting_downloads' state.
defer_activation_if_any_waiting: true
# --- New settings for download wait feature ---
# When a profile is rotated, wait for its generated downloads to finish
# before it can be used again.
wait_download_finish_per_profile: true
# Safety net: max time to wait for downloads before forcing rotation.
# Should be aligned with info.json URL validity (e.g., 4 hours = 240 mins).
rotate_after_requests: 5
rest_duration_minutes_on_rotation: 0.20
wait_download_finish_per_group: true
max_wait_for_downloads_minutes: 240
- name: "auth_user2"
prefix: "user2"
max_active_profiles: 1
rotate_after_requests: 25
rest_duration_minutes_on_rotation: 1
defer_activation_if_any_waiting: true
wait_download_finish_per_profile: true
rotate_after_requests: 5
rest_duration_minutes_on_rotation: 0.20
wait_download_finish_per_group: true
max_wait_for_downloads_minutes: 240
- name: "auth_user3"
prefix: "user3"
max_active_profiles: 1
rotate_after_requests: 5
rest_duration_minutes_on_rotation: 0.20
wait_download_finish_per_group: true
max_wait_for_downloads_minutes: 240
# Time-based proxy rules are disabled as they are not needed for this setup.
proxy_work_minutes: 0
proxy_rest_duration_minutes: 0
@ -98,31 +94,41 @@ auth_policy_enforcer_config:
# A short post-task cooldown for auth simulation profiles. When a batch is finished,
# the profile is put into COOLDOWN briefly. This prevents a worker from immediately
# re-locking the same profile, giving the policy enforcer a window to perform rotation.
unlock_cooldown_seconds: 1
unlock_cooldown_seconds: 0
# Cross-simulation synchronization
#cross_simulation_sync:
# Link auth profiles to download profiles (by name)
# Both profiles should exist in their respective environments
#profile_links:
# - auth: "user1"
# download: "user1"
# - auth: "user2"
# download: "user2"
# Which states to synchronize
#sync_states:
# - "RESTING" # Disabling to prevent deadlock when auth profile is waiting for downloads.
# The download profile must remain active to process them.
# - "BANNED"
# Whether to sync rotation (when auth is rotated due to rotate_after_requests)
#sync_rotation: true
# Whether download profile should be banned if auth is banned (even if download hasn't violated its own rules)
#enforce_auth_lead: true
# Ensures the same profile (e.g., user1_0) is active in both simulations.
# This will activate the correct download profile and rest any others in its group.
#sync_active_profile: true
# When an auth profile is waiting for downloads, ensure the matching download profile is active
#sync_waiting_downloads: true
# --- Cross-simulation synchronization ---
cross_simulation_sync:
# Link auth profiles to download profiles (by prefix)
profile_links:
- auth: "user1"
download: "user1"
- auth: "user2"
download: "user2"
- auth: "user3"
download: "user3"
# Which states to synchronize from auth to download.
# 'RESTING' is no longer needed here; the new group-aware deactivation logic
# in `sync_active_profile` handles rotation more cleanly.
sync_states:
- "BANNED"
# If true, when an auth profile is rotated, the corresponding
# download profile group will also be rotated. This is now handled by the
# group-aware deactivation logic triggered by `sync_active_profile`.
sync_rotation: true
# If true, a BANNED state on an auth profile will force the download profile
# to also be BANNED.
enforce_auth_lead: true
# CRITICAL: Ensures the correct download profile GROUP is active.
# This will activate the target download profile and rest any profiles in other groups.
sync_active_profile: true
# When an auth profile is in the 'waiting_downloads' state, ensure the
# matching download profile is active so it can process those downloads.
sync_waiting_downloads: true
# --- Policies for the Download Simulation ---
download_policy_enforcer_config:
@ -137,7 +143,6 @@ download_policy_enforcer_config:
rate_limit_requests: 280
rate_limit_window_minutes: 60
rate_limit_rest_duration_minutes: 5
#
rest_after_requests: 0
rest_duration_minutes: 20
@ -146,21 +151,27 @@ download_policy_enforcer_config:
# For accounts, it is ~2000 videos/hour (~4000 webpage/player requests per hour).
# The settings below should be configured to respect these limits.
# A group of profiles that are mutually exclusive. Only one will be active at a time.
# Define separate profile groups for download workers.
# Increase max_active_profiles to allow all profiles in a group to be used.
profile_groups:
- name: "download_user1"
prefix: "user1"
rotate_after_requests: 25
rest_duration_minutes_on_rotation: 1
max_active_profiles: 4
rotate_after_requests: 0
rest_duration_minutes_on_rotation: 0.2
# max_active_profiles: 0 # Allow all profiles in this group to be active (0, -1, or omitted)
- name: "download_user2"
prefix: "user2"
rotate_after_requests: 25
rest_duration_minutes_on_rotation: 1
max_active_profiles: 4
rotate_after_requests: 0
rest_duration_minutes_on_rotation: 0.2
# max_active_profiles: 0 # Allow all profiles in this group to be active (0, -1, or omitted)
- name: "download_user3"
prefix: "user3"
rotate_after_requests: 0
rest_duration_minutes_on_rotation: 0.2
# max_active_profiles: 0 # Allow all profiles in this group to be active (0, -1, or omitted)
# Time-based proxy rules are disabled.
proxy_work_minutes: 50
proxy_work_minutes: 0
proxy_rest_duration_minutes: 10
# Global maximum time a proxy can be active before being rested, regardless of
@ -177,5 +188,5 @@ download_policy_enforcer_config:
unlock_stale_locks_after_seconds: 960
# After a profile is used for a download, unlock it but put it in COOLDOWN
# state for 12-16s. This is enforced by the worker, which reads this config from Redis.
# state for 2-3s. This is enforced by the worker, which reads this config from Redis.
unlock_cooldown_seconds: [2, 3]

View File

@ -1,59 +1 @@
{
"ytops": {
"force_renew": [],
"session_params": {
"visitor_rotation_threshold": 0
}
},
"ytdlp_params": {
"debug_printtraffic": true,
"write_pages": false,
"verbose": true,
"no_color": true,
"ignoreerrors": true,
"noresizebuffer": true,
"buffersize": "4M",
"concurrent_fragments": 8,
"socket_timeout": 60,
"outtmpl": {
"default": "%(id)s.f%(format_id)s.%(ext)s"
},
"restrictfilenames": true,
"updatetime": false,
"noplaylist": true,
"match_filter": "!is_live",
"writeinfojson": true,
"skip_download": true,
"allow_playlist_files": false,
"clean_infojson": true,
"getcomments": false,
"writesubtitles": false,
"writethumbnail": false,
"sleep_interval_requests": 0.75,
"parse_metadata": [
":(?P<automatic_captions>)"
],
"extractor_args": {
"youtube": {
"player_client": ["tv_simply"],
"formats": ["duplicate"],
"jsc_trace": ["true"],
"pot_trace": ["true"],
"skip": ["translated_subs", "hls"]
},
"youtubepot-bgutilhttp": {
"base_url": ["http://172.17.0.1:4416"]
}
},
"noprogress": true,
"format_sort": [
"res",
"ext:mp4:m4a"
],
"remuxvideo": "mp4",
"nooverwrites": true,
"continuedl": true
}
}
{}

View File

@ -1,22 +0,0 @@
module github.com/yourproject/ytops_client/go_ytdlp_cli
go 1.23.0
toolchain go1.24.4
require (
github.com/lrstanley/go-ytdlp v0.0.0-00010101000000-000000000000
github.com/spf13/cobra v1.8.0
)
replace github.com/lrstanley/go-ytdlp => ../../go-ytdlp
require (
github.com/ProtonMail/go-crypto v1.3.0 // indirect
github.com/cloudflare/circl v1.6.1 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/ulikunitz/xz v0.5.13 // indirect
golang.org/x/crypto v0.41.0 // indirect
golang.org/x/sys v0.35.0 // indirect
)

View File

@ -1,27 +0,0 @@
github.com/ProtonMail/go-crypto v1.3.0 h1:ILq8+Sf5If5DCpHQp4PbZdS1J7HDFRXz/+xKBiRGFrw=
github.com/ProtonMail/go-crypto v1.3.0/go.mod h1:9whxjD8Rbs29b4XWbB8irEcE8KHMqaR2e7GWU1R+/PE=
github.com/cloudflare/circl v1.6.1 h1:zqIqSPIndyBh1bjLVVDHMPpVKqp8Su/V+6MeDzzQBQ0=
github.com/cloudflare/circl v1.6.1/go.mod h1:uddAzsPgqdMAYatqJ0lsjX1oECcQLIlRpzZh3pJrofs=
github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/spf13/cobra v1.8.0 h1:7aJaZx1B85qltLMc546zn58BxxfZdR/W22ej9CFoEf0=
github.com/spf13/cobra v1.8.0/go.mod h1:WXLWApfZ71AjXPya3WOlMsY9yMs7YeiHhFVlvLyhcho=
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
github.com/ulikunitz/xz v0.5.13 h1:ar98gWrjf4H1ev05fYP/o29PDZw9DrI3niHtnEqyuXA=
github.com/ulikunitz/xz v0.5.13/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
golang.org/x/crypto v0.41.0 h1:WKYxWedPGCTVVl5+WHSSrOBT0O8lx32+zxmHxijgXp4=
golang.org/x/crypto v0.41.0/go.mod h1:pO5AFd7FA68rFak7rOAGVuygIISepHftHnr8dr6+sUc=
golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI=
golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@ -1,64 +0,0 @@
package main
import (
"encoding/json"
"fmt"
"io"
"os"
"github.com/lrstanley/go-ytdlp"
"github.com/spf13/cobra"
)
func main() {
cli := &cobra.Command{
Use: "go-ytdlp",
Short: "A simple CLI wrapper for go-ytdlp.",
SilenceUsage: true,
SilenceErrors: true,
}
cli.AddCommand(&cobra.Command{
Use: "flags-to-json [flags...]",
Short: "Converts yt-dlp flags to a JSON config.",
Long: "Converts yt-dlp flags to a JSON config. Note that this does not validate the flags.",
Args: cobra.MinimumNArgs(1),
RunE: func(cmd *cobra.Command, args []string) (err error) {
// The go-ytdlp library documentation mentions FlagsToJSON and SetFlagConfig,
// but these methods are missing from the generated code in the current version.
// Therefore, we cannot implement this command yet.
return fmt.Errorf("flags-to-json is not supported by the underlying go-ytdlp library")
},
})
cli.AddCommand(&cobra.Command{
Use: "json-to-flags",
Short: "Converts a JSON config to yt-dlp flags.",
Long: "Converts a JSON config to yt-dlp flags. Note that this does not validate the flags. Reads from stdin.",
Args: cobra.NoArgs,
RunE: func(cmd *cobra.Command, args []string) (err error) {
var in []byte
in, err = io.ReadAll(cmd.InOrStdin())
if err != nil {
return err
}
// Manually unmarshal into FlagConfig since JSONToFlags helper is missing
var cfg ytdlp.FlagConfig
if err := json.Unmarshal(in, &cfg); err != nil {
return fmt.Errorf("failed to unmarshal JSON: %w", err)
}
flags := cfg.ToFlags()
for _, flag := range flags {
fmt.Fprintln(cmd.OutOrStdout(), flag)
}
return nil
},
})
if err := cli.Execute(); err != nil {
fmt.Fprintln(os.Stderr, "Error:", err)
os.Exit(1)
}
}

View File

@ -12,6 +12,7 @@ import logging
import os
import sys
import time
import threading
from copy import deepcopy
try:
@ -29,7 +30,7 @@ from ytops_client.profile_manager_tool import ProfileManager
from ytops_client.stress_policy import utils as sp_utils
from ytops_client.stress_policy.state_manager import StateManager
from ytops_client.stress_policy.utils import load_policy, apply_overrides
from ytops_client.stress_policy.workers import _run_download_logic
from ytops_client.stress_policy.worker_utils import _run_download_logic
from ytops_client.stress_policy_tool import shutdown_event
# Configure logging
@ -164,6 +165,8 @@ def main_locking_download_emulator(args):
policy=local_policy,
state_manager=dummy_state_manager,
args=args, # Pass orchestrator args through
running_processes=set(), # This standalone tool doesn't need to track processes
process_lock=threading.Lock(),
profile_name=locked_profile['name'],
profile_manager_instance=manager
)

File diff suppressed because it is too large Load Diff

View File

@ -10,6 +10,7 @@ import io
import logging
import os
import random
import re
import signal
import sys
import threading
@ -20,6 +21,8 @@ import collections
import redis
from .profile_statemachine import ProfileState, ProfileStateMachine
try:
from dotenv import load_dotenv
except ImportError:
@ -48,32 +51,53 @@ def handle_shutdown(sig, frame):
shutdown_event.set()
def natural_sort_key(s: str) -> List[Any]:
"""Key for natural sorting (e.g., 'user10' comes after 'user2')."""
return [int(text) if text.isdigit() else text.lower() for text in re.split('([0-9]+)', s)]
def format_timestamp(ts: float) -> str:
"""Format timestamp for display."""
if not ts or ts == 0:
return "Never"
return datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
def format_duration(seconds: float) -> str:
"""Format duration for display."""
if seconds < 60:
return f"{seconds:.0f}s"
elif seconds < 3600:
return f"{seconds/60:.1f}m"
elif seconds < 86400:
return f"{seconds/3600:.1f}h"
else:
return f"{seconds/86400:.1f}d"
class ProfileManager:
"""Manages profiles in Redis with configurable prefix."""
# Profile states
STATE_ACTIVE = "ACTIVE"
STATE_PAUSED = "PAUSED"
STATE_RESTING = "RESTING"
STATE_BANNED = "BANNED"
STATE_LOCKED = "LOCKED"
STATE_COOLDOWN = "COOLDOWN"
VALID_STATES = [STATE_ACTIVE, STATE_PAUSED, STATE_RESTING, STATE_BANNED, STATE_LOCKED, STATE_COOLDOWN]
# Profile states are defined in the ProfileState enum.
VALID_STATES = ProfileState.values()
def __init__(self, redis_host='localhost', redis_port=6379,
redis_password=None, key_prefix='profile_mgmt_'):
redis_password=None, key_prefix='profile_mgmt_', redis_db=0):
"""Initialize Redis connection and key prefix."""
self.key_prefix = key_prefix
logger.info(f"Attempting to connect to Redis at {redis_host}:{redis_port}...")
logger.info(f"Attempting to connect to Redis at {redis_host}:{redis_port} (DB: {redis_db})...")
try:
self.redis = redis.Redis(
host=redis_host,
port=redis_port,
password=redis_password,
db=redis_db,
decode_responses=True,
socket_connect_timeout=5,
socket_timeout=5
socket_connect_timeout=10,
socket_timeout=30,
socket_keepalive=True,
retry_on_timeout=True,
max_connections=10
)
self.redis.ping()
logger.info(f"Successfully connected to Redis.")
@ -121,16 +145,65 @@ class ProfileManager:
"""Get Redis key for a profile's pending downloads counter."""
return f"{self.key_prefix}downloads_pending:{profile_name}"
def _activation_log_key(self) -> str:
"""Get Redis key for the activation event log."""
return f"{self.key_prefix}log:activations"
def log_activation_event(self, event_data: Dict[str, Any]):
"""Logs a profile activation event to a capped list in Redis."""
key = self._activation_log_key()
try:
# Serialize the event data to a JSON string
event_json = json.dumps(event_data, default=str)
pipe = self.redis.pipeline()
# Prepend the new event to the list
pipe.lpush(key, event_json)
# Trim the list to keep only the most recent 20 entries
pipe.ltrim(key, 0, 19)
pipe.execute()
logger.debug(f"Logged activation event: {event_json}")
except (TypeError, redis.RedisError) as e:
logger.error(f"Failed to log activation event: {e}")
def get_activation_events(self, count: int = 10) -> List[Dict[str, Any]]:
"""Retrieves the most recent activation events from Redis."""
key = self._activation_log_key()
try:
events_json = self.redis.lrange(key, 0, count - 1)
events = [json.loads(e) for e in events_json]
return events
except (TypeError, json.JSONDecodeError, redis.RedisError) as e:
logger.error(f"Failed to retrieve or parse activation events: {e}")
return []
def increment_pending_downloads(self, profile_name: str, count: int = 1) -> Optional[int]:
"""Atomically increments the pending downloads counter for a profile."""
if count <= 0:
"""Atomically increments (or decrements if count is negative) the pending downloads counter for a profile."""
if count == 0:
return None
key = self._pending_downloads_key(profile_name)
# When decrementing, ensure the counter exists to avoid creating negative counters from stray calls.
if count < 0 and not self.redis.exists(key):
logger.warning(f"Attempted to decrement pending downloads for '{profile_name}' by {abs(count)}, but no counter exists. No action taken.")
return None
new_value = self.redis.incrby(key, count)
# Set a TTL on the key to prevent it from living forever if something goes wrong.
# 5 hours is a safe buffer for the 4-hour info.json validity.
self.redis.expire(key, 5 * 3600)
logger.info(f"Incremented pending downloads for '{profile_name}' by {count}. New count: {new_value}")
if count > 0:
# Set/refresh TTL on positive increments to prevent it from living forever.
# 5 hours is a safe buffer for the 4-hour info.json validity.
self.redis.expire(key, 5 * 3600)
logger.info(f"Incremented pending downloads for '{profile_name}' by {count}. New count: {new_value}")
elif count < 0:
logger.info(f"Decremented pending downloads for '{profile_name}' by {abs(count)}. New count: {new_value}")
if new_value <= 0:
# Clean up the key if it drops to or below zero.
self.redis.delete(key)
logger.info(f"Pending downloads for '{profile_name}' reached zero or less. Cleared counter key.")
return new_value
def decrement_pending_downloads(self, profile_name: str) -> Optional[int]:
@ -187,8 +260,11 @@ class ProfileManager:
"""Get Redis key for the failed lock attempts counter."""
return f"{self.key_prefix}stats:failed_lock_attempts"
def create_profile(self, name: str, proxy: str, initial_state: str = STATE_ACTIVE) -> bool:
def create_profile(self, name: str, proxy: str, initial_state: str = ProfileState.ACTIVE.value) -> bool:
"""Create a new profile."""
# Normalize to uppercase
initial_state = initial_state.upper()
if initial_state not in self.VALID_STATES:
logger.error(f"Invalid initial state: {initial_state}")
return False
@ -339,87 +415,51 @@ class ProfileManager:
profiles.append(data)
# Sort by creation time (newest first)
profiles.sort(key=lambda x: x.get('created_at', 0), reverse=True)
# Sort by natural name order
profiles.sort(key=lambda p: natural_sort_key(p.get('name', '')))
return profiles
def update_profile_state(self, name: str, new_state: str,
reason: str = '') -> bool:
"""Update profile state."""
if new_state not in self.VALID_STATES:
"""Update profile state by triggering a state machine transition."""
# Normalize to uppercase
new_state = new_state.upper()
if new_state not in ProfileState.values():
logger.error(f"Invalid state: {new_state}")
return False
profile = self.get_profile(name)
if not profile:
logger.error(f"Profile '{name}' not found")
return False
sm = self.get_state_machine(name)
if not sm:
return False # get_state_machine logs the error
old_state = profile['state']
if old_state == new_state:
logger.info(f"Profile '{name}' already in state {new_state}")
if sm.current_state.value == new_state:
logger.info(f"Profile '{name}' already in state {new_state}. No action taken.")
return True
now = time.time()
profile_key = self._profile_key(name)
pipe = self.redis.pipeline()
# Update profile hash
updates = {'state': new_state, 'last_used': str(now)}
if new_state == self.STATE_BANNED and reason:
updates['ban_reason'] = reason
elif new_state == self.STATE_RESTING:
# Set rest_until to 1 hour from now by default
rest_until = now + 3600
updates['rest_until'] = str(rest_until)
if reason:
updates['rest_reason'] = reason
# Handle transitions into ACTIVE state
if new_state == self.STATE_ACTIVE:
# Clear any resting/banned state fields
updates['rest_until'] = '0'
updates['rest_reason'] = ''
updates['reason'] = ''
updates['ban_reason'] = '' # Clear ban reason on manual activation
if old_state in [self.STATE_RESTING, self.STATE_COOLDOWN]:
updates['last_rest_timestamp'] = str(now)
# When activating a profile, ensure its proxy is also active.
proxy_url = profile.get('proxy')
if proxy_url:
logger.info(f"Activating associated proxy '{proxy_url}' for profile '{name}'.")
pipe.hset(self._proxy_state_key(proxy_url), mapping={
'state': self.STATE_ACTIVE,
'rest_until': '0',
'work_start_timestamp': str(now)
})
# If moving to any state that is not LOCKED, ensure any stale lock data is cleared.
# This makes manual state changes (like 'activate' or 'unban') more robust.
if new_state != self.STATE_LOCKED:
updates['lock_owner'] = ''
updates['lock_timestamp'] = '0'
pipe.hdel(self._locks_key(), name)
if old_state == self.STATE_LOCKED:
logger.info(f"Profile '{name}' was in LOCKED state. Clearing global lock.")
pipe.hset(profile_key, mapping=updates)
# Remove from old state index, add to new state index
if old_state in self.VALID_STATES:
pipe.zrem(self._state_key(old_state), name)
pipe.zadd(self._state_key(new_state), {name: now})
result = pipe.execute()
logger.info(f"Updated profile '{name}' from {old_state} to {new_state}")
if reason:
logger.info(f"Reason: {reason}")
return True
try:
if new_state == ProfileState.ACTIVE.value:
sm.activate()
elif new_state == ProfileState.BANNED.value:
sm.ban(reason=reason)
elif new_state == ProfileState.RESTING.value:
sm.rest(reason=reason)
elif new_state == ProfileState.PAUSED.value:
sm.pause(reason=reason)
# LOCKED and COOLDOWN are not handled here as they are special transitions
# from lock_profile and unlock_profile, and should not be set directly.
elif new_state in [ProfileState.LOCKED.value, ProfileState.COOLDOWN.value]:
logger.error(f"Manual state transition to '{new_state}' is not allowed. Use lock_profile() or unlock_profile().")
return False
else:
# This case should not be reached if ProfileState.values() is correct
logger.error(f"State transition to '{new_state}' is not implemented in update_profile_state.")
return False
return True
except Exception as e:
logger.error(f"Failed to update profile '{name}' from {sm.current_state.id} to '{new_state}': {e}", exc_info=True)
return False
def update_profile_field(self, name: str, field: str, value: str) -> bool:
"""Update a specific field in profile."""
@ -449,7 +489,7 @@ class ProfileManager:
pipe.delete(profile_key)
# Remove from state index
if state in self.VALID_STATES:
if state in ProfileState.values():
pipe.zrem(self._state_key(state), name)
# Delete activity keys
@ -581,7 +621,7 @@ class ProfileManager:
"""Get the total count of failed lock attempts from Redis."""
count = self.redis.get(self._failed_lock_attempts_key())
return int(count) if count else 0
def get_global_stats(self) -> Dict[str, int]:
"""Get aggregated global stats across all profiles."""
profiles = self.list_profiles()
@ -629,7 +669,7 @@ class ProfileManager:
def set_proxy_state(self, proxy_url: str, state: str, rest_duration_minutes: Optional[int] = None) -> bool:
"""Set the state of a proxy and propagates it to associated profiles."""
if state not in [self.STATE_ACTIVE, self.STATE_RESTING]:
if state not in [ProfileState.ACTIVE.value, ProfileState.RESTING.value]:
logger.error(f"Invalid proxy state: {state}. Only ACTIVE and RESTING are supported for proxies.")
return False
@ -638,7 +678,7 @@ class ProfileManager:
updates = {'state': state}
rest_until = 0
if state == self.STATE_RESTING:
if state == ProfileState.RESTING.value:
if not rest_duration_minutes or rest_duration_minutes <= 0:
logger.error("rest_duration_minutes is required when setting proxy state to RESTING.")
return False
@ -657,17 +697,17 @@ class ProfileManager:
if not profiles_on_proxy:
return True
if state == self.STATE_RESTING:
if state == ProfileState.RESTING.value:
logger.info(f"Propagating RESTING state to profiles on proxy '{proxy_url}'.")
for profile in profiles_on_proxy:
if profile['state'] == self.STATE_ACTIVE:
self.update_profile_state(profile['name'], self.STATE_RESTING, "Proxy resting")
if profile['state'] == ProfileState.ACTIVE.value:
self.update_profile_state(profile['name'], ProfileState.RESTING.value, "Proxy resting")
self.update_profile_field(profile['name'], 'rest_until', str(rest_until))
elif state == self.STATE_ACTIVE:
elif state == ProfileState.ACTIVE.value:
logger.info(f"Propagating ACTIVE state to profiles on proxy '{proxy_url}'.")
for profile in profiles_on_proxy:
if profile['state'] == self.STATE_RESTING and profile.get('rest_reason') == "Proxy resting":
self.update_profile_state(profile['name'], self.STATE_ACTIVE, "Proxy activated")
if profile['state'] == ProfileState.RESTING.value and profile.get('rest_reason') == "Proxy resting":
self.update_profile_state(profile['name'], ProfileState.ACTIVE.value, "Proxy activated")
return True
@ -700,7 +740,7 @@ class ProfileManager:
states[proxy_url] = data
else:
# Default to ACTIVE if no state is found
states[proxy_url] = {'state': self.STATE_ACTIVE, 'rest_until': 0.0, 'work_start_timestamp': 0.0}
states[proxy_url] = {'state': ProfileState.ACTIVE.value, 'rest_until': 0.0, 'work_start_timestamp': 0.0}
return states
@ -787,13 +827,21 @@ class ProfileManager:
'active_profile_index': int,
'rotate_after_requests': int,
'max_active_profiles': int,
'pending_downloads': int,
}
float_fields = ['last_finished_downloads_ts']
for field, type_converter in numeric_fields.items():
if field in data:
try:
data[field] = type_converter(data[field])
except (ValueError, TypeError):
data[field] = 0
for field in float_fields:
if field in data:
try:
data[field] = float(data[field])
except (ValueError, TypeError):
data[field] = 0.0
states[group_name] = data
else:
states[group_name] = {}
@ -812,7 +860,7 @@ class ProfileManager:
profiles_to_check = [specific_profile_name]
else:
# Original logic: find all active profiles, optionally filtered by prefix.
active_profiles = self.redis.zrange(self._state_key(self.STATE_ACTIVE), 0, -1)
active_profiles = self.redis.zrange(self._state_key(ProfileState.ACTIVE.value), 0, -1)
if not active_profiles:
logger.warning("No active profiles available to lock.")
self.redis.incr(self._failed_lock_attempts_key())
@ -831,7 +879,7 @@ class ProfileManager:
full_profiles = [self.get_profile(p) for p in profiles_to_check]
# Filter out any None profiles from a race condition with deletion, and ensure state is ACTIVE.
# This is especially important when locking a specific profile.
full_profiles = [p for p in full_profiles if p and p.get('proxy') and p.get('state') == self.STATE_ACTIVE]
full_profiles = [p for p in full_profiles if p and p.get('proxy') and p.get('state') == ProfileState.ACTIVE.value]
if not full_profiles:
if specific_profile_name:
@ -846,7 +894,7 @@ class ProfileManager:
eligible_profiles = [
p['name'] for p in full_profiles
if proxy_states.get(p['proxy'], {}).get('state', self.STATE_ACTIVE) == self.STATE_ACTIVE
if proxy_states.get(p['proxy'], {}).get('state', ProfileState.ACTIVE.value) == ProfileState.ACTIVE.value
]
if not eligible_profiles:
@ -863,33 +911,40 @@ class ProfileManager:
# Try to acquire lock atomically
if self.redis.hsetnx(locks_key, name, owner):
# Lock acquired. Now, re-check state to avoid race condition with enforcer.
profile_key = self._profile_key(name)
current_state = self.redis.hget(profile_key, 'state')
profile = self.get_profile(name)
if not profile: # Profile might have been deleted in a race condition.
self.redis.hdel(locks_key, name)
continue
if current_state != self.STATE_ACTIVE:
current_state = profile.get('state')
# Normalize to uppercase for check
if not current_state or current_state.upper() != ProfileState.ACTIVE.value:
# Another process (enforcer) changed the state. Release lock and try next.
self.redis.hdel(locks_key, name)
logger.warning(f"Aborted lock for '{name}'; state changed from ACTIVE to '{current_state}' during lock acquisition.")
continue
# State is still ACTIVE, proceed with locking.
now = time.time()
sm = self.get_state_machine(name, profile=profile)
if not sm:
# Should not happen if we just checked the profile
self.redis.hdel(locks_key, name)
continue
pipe = self.redis.pipeline()
# Update profile state and lock info
pipe.hset(profile_key, mapping={
'state': self.STATE_LOCKED,
'lock_owner': owner,
'lock_timestamp': str(now),
'last_used': str(now)
})
# Move from ACTIVE to LOCKED state index
pipe.zrem(self._state_key(self.STATE_ACTIVE), name)
pipe.zadd(self._state_key(self.STATE_LOCKED), {name: now})
pipe.execute()
logger.info(f"Locked profile '{name}' for owner '{owner}'")
return self.get_profile(name)
try:
# The hsetnx above acquired the global lock. Now we transition the state.
sm.lock(owner=owner)
# The on_enter_locked action handles all Redis updates for the profile itself.
# The logger messages are also in the action.
return self.get_profile(name)
except Exception as e:
# This could be a TransitionNotAllowed error if the state changed,
# or a Redis error during the action.
logger.error(f"Failed to transition profile '{name}' to LOCKED state: {e}", exc_info=True)
# Release the global lock as the state transition failed.
self.redis.hdel(locks_key, name)
continue
logger.warning("Could not lock any active profile (all may have been locked by other workers).")
self.redis.incr(self._failed_lock_attempts_key())
@ -902,8 +957,10 @@ class ProfileManager:
logger.error(f"Profile '{name}' not found.")
return False
if profile['state'] != self.STATE_LOCKED:
logger.warning(f"Profile '{name}' is not in LOCKED state (current: {profile['state']}).")
# Normalize to uppercase for check
current_state = profile.get('state')
if not current_state or current_state.upper() != ProfileState.LOCKED.value:
logger.warning(f"Profile '{name}' is not in LOCKED state (current: {current_state}).")
# Forcibly remove from locks hash if it's inconsistent
self.redis.hdel(self._locks_key(), name)
return False
@ -912,45 +969,93 @@ class ProfileManager:
logger.error(f"Owner mismatch: cannot unlock profile '{name}'. Locked by '{profile['lock_owner']}', attempted by '{owner}'.")
return False
now = time.time()
profile_key = self._profile_key(name)
sm = self.get_state_machine(name, profile=profile)
if not sm:
return False
pipe = self.redis.pipeline()
try:
if rest_for_seconds and rest_for_seconds > 0:
sm.start_cooldown(duration=rest_for_seconds)
else:
sm.unlock()
return True
except Exception as e:
logger.error(f"Failed to unlock profile '{name}': {e}", exc_info=True)
return False
updates = {
'lock_owner': '',
'lock_timestamp': '0',
'last_used': str(now)
}
def get_state_machine(self, name: str, profile: Optional[Dict[str, Any]] = None) -> Optional[ProfileStateMachine]:
"""
Initializes and returns a ProfileStateMachine instance for a given profile,
set to its current state from Redis.
If `profile` object is not provided, it will be fetched from Redis.
"""
if profile is None:
profile = self.get_profile(name)
if not profile:
logger.error(f"Cannot create state machine for non-existent profile '{name}'")
return None
if rest_for_seconds and rest_for_seconds > 0:
new_state = self.STATE_COOLDOWN
rest_until = now + rest_for_seconds
updates['rest_until'] = str(rest_until)
updates['rest_reason'] = 'Post-task cooldown'
logger_msg = f"Unlocked profile '{name}' into COOLDOWN for {rest_for_seconds}s."
else:
new_state = self.STATE_ACTIVE
# Clear any rest-related fields when moving to ACTIVE
updates['rest_until'] = '0'
updates['rest_reason'] = ''
updates['reason'] = ''
logger_msg = f"Unlocked profile '{name}'"
current_state_str = profile.get('state')
if not current_state_str:
logger.error(f"Profile '{name}' has no state. Cannot initialize state machine.")
return None
updates['state'] = new_state
pipe.hset(profile_key, mapping=updates)
# Normalize to uppercase to handle potential inconsistencies (e.g. "locked" vs "LOCKED")
current_state_str = current_state_str.upper()
# Move from LOCKED to the new state index
pipe.zrem(self._state_key(self.STATE_LOCKED), name)
pipe.zadd(self._state_key(new_state), {name: now})
if current_state_str not in self.VALID_STATES:
logger.error(f"Profile '{name}' has an invalid state value '{current_state_str}' in Redis. Cannot initialize state machine.")
return None
# The `model` parameter in the StateMachine constructor is where we can pass
# context. We pass the manager and profile name.
# We convert the Redis state (uppercase value) to the state machine identifier (lowercase attribute name).
# Remove from global locks hash
pipe.hdel(self._locks_key(), name)
# When re-hydrating a state machine from a stored state, we don't want to re-trigger
# the `on_enter` actions for the current state. We can suppress the initial transition
# and set the state directly.
pipe.execute()
# WORKAROUND for older statemachine library:
# Instantiating the machine triggers an initial transition to ACTIVE, which wrongly updates Redis.
# We let this happen, and then immediately correct the state if it was supposed to be something else.
sm = ProfileStateMachine(manager=self, profile_name=name)
logger.info(logger_msg)
return True
# The sm is now in ACTIVE state, and Redis has been updated. If the original state was
# LOCKED, we must re-lock it to fix Redis and the state machine object so transitions work.
if current_state_str == ProfileState.LOCKED.value:
lock_owner = profile.get('lock_owner', 're-lock-owner')
try:
# This transition ensures the `on_enter_LOCKED` actions are run, making the
# state consistent in Redis and in the state machine object.
sm.lock(owner=lock_owner)
except Exception as e:
logger.error(f"Failed to re-lock profile '{name}' during state machine hydration: {e}")
# The state is now inconsistent, best to not return a broken machine.
return None
elif current_state_str != sm.current_state.value.upper():
# For any other state, we must manually fix both the state machine object and Redis,
# as the constructor wrongly transitioned to ACTIVE.
# 1. Force state on the machine object. This does not trigger actions.
target_state_obj = next((s for s in sm.states if s.value.upper() == current_state_str), None)
if not target_state_obj:
logger.error(f"Could not find state object for '{current_state_str}' during hydration of '{name}'.")
return None
sm.current_state = target_state_obj
# 2. Manually revert the state in Redis to what it should be.
profile_key = self._profile_key(name)
pipe = self.redis.pipeline()
pipe.hset(profile_key, 'state', current_state_str)
# Atomically move the profile from the incorrect ACTIVE index to the correct one.
# The constructor may have added it to ACTIVE without removing it from its original state index.
pipe.zrem(self._state_key(ProfileState.ACTIVE.value), name)
pipe.zadd(self._state_key(current_state_str), {name: profile.get('last_used', time.time())})
pipe.execute()
logger.debug(f"Corrected state for '{name}' to '{current_state_str}' in object and Redis during hydration.")
return sm
def cleanup_stale_locks(self, max_lock_time_seconds: int) -> int:
"""Find and unlock profiles with stale locks."""
@ -972,6 +1077,21 @@ class ProfileManager:
cleaned_count += 1
continue
# --- NEW: Check for inconsistent locks on ACTIVE profiles ---
# A lock should not exist for a profile that is still ACTIVE, except for the
# milliseconds between when a worker acquires the lock and when it transitions
# the profile state to LOCKED. If the policy enforcer sees this state, it's
# almost certainly a stale lock from a crashed worker.
if profile.get('state') == ProfileState.ACTIVE.value:
logger.warning(
f"Found inconsistent lock for ACTIVE profile '{name}' (owner: '{owner}'). "
"This indicates a worker may have crashed. Cleaning up stale lock."
)
self.redis.hdel(locks_key, name)
cleaned_count += 1
continue
# --- END NEW LOGIC ---
lock_timestamp = profile.get('lock_timestamp', 0)
if lock_timestamp > 0 and (now - lock_timestamp) > max_lock_time_seconds:
logger.warning(f"Found stale lock for profile '{name}' (locked by '{owner}' for {now - lock_timestamp:.0f}s). Unlocking...")
@ -984,23 +1104,6 @@ class ProfileManager:
logger.debug("No stale locks found to clean up.")
return cleaned_count
def format_timestamp(ts: float) -> str:
"""Format timestamp for display."""
if not ts or ts == 0:
return "Never"
return datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
def format_duration(seconds: float) -> str:
"""Format duration for display."""
if seconds < 60:
return f"{seconds:.0f}s"
elif seconds < 3600:
return f"{seconds/60:.1f}m"
elif seconds < 86400:
return f"{seconds/3600:.1f}h"
else:
return f"{seconds/86400:.1f}d"
def add_profile_manager_parser(subparsers):
"""Adds the parser for the 'profile' command."""
@ -1016,6 +1119,7 @@ def add_profile_manager_parser(subparsers):
common_parser.add_argument('--env-file', help='Path to a .env file to load environment variables from.')
common_parser.add_argument('--redis-host', default=None, help='Redis host. Defaults to REDIS_HOST or MASTER_HOST_IP env var, or localhost.')
common_parser.add_argument('--redis-port', type=int, default=None, help='Redis port. Defaults to REDIS_PORT env var, or 6379.')
common_parser.add_argument('--redis-db', type=int, default=None, help='Redis DB number. Defaults to REDIS_DB env var, or 0.')
common_parser.add_argument('--redis-password', default=None, help='Redis password. Defaults to REDIS_PASSWORD env var.')
common_parser.add_argument('--env', default='dev', help="Environment name for Redis key prefix (e.g., 'stg', 'prod'). Used by all non-list commands, and by 'list' for single-view mode. Defaults to 'dev'.")
common_parser.add_argument('--legacy', action='store_true', help="Use legacy key prefix ('profile_mgmt_') without environment.")
@ -1028,9 +1132,9 @@ def add_profile_manager_parser(subparsers):
create_parser = subparsers.add_parser('create', help='Create a new profile', parents=[common_parser])
create_parser.add_argument('name', help='Profile name')
create_parser.add_argument('proxy', help='Proxy URL (e.g., sslocal-rust-1090:1090)')
create_parser.add_argument('--state', default='ACTIVE',
choices=['ACTIVE', 'PAUSED', 'RESTING', 'BANNED', 'COOLDOWN'],
help='Initial state (default: ACTIVE)')
create_parser.add_argument('--state', default=ProfileState.ACTIVE.value,
choices=ProfileState.values(),
help=f'Initial state (default: {ProfileState.ACTIVE.value})')
# List command
list_parser = subparsers.add_parser('list', help='List profiles', parents=[common_parser])
@ -1041,6 +1145,8 @@ def add_profile_manager_parser(subparsers):
list_parser.add_argument('--state', help='Filter by state')
list_parser.add_argument('--proxy', help='Filter by proxy (substring match)')
list_parser.add_argument('--show-proxy-activity', action='store_true', help='Show a detailed activity summary table for proxies. If --proxy is specified, shows details for that proxy only. Otherwise, shows a summary for all proxies.')
list_parser.add_argument('--show-reasons', action='store_true', help='Show detailed reasons for group and profile selection states.')
list_parser.add_argument('--show-activation-history', action='store_true', help='Show the recent profile activation history.')
list_parser.add_argument('--format', choices=['table', 'json', 'csv'], default='table',
help='Output format (default: table)')
list_parser.add_argument('--live', action='store_true', help='Run continuously with a non-blinking live-updating display.')
@ -1061,7 +1167,7 @@ def add_profile_manager_parser(subparsers):
# Update state command
update_state_parser = subparsers.add_parser('update-state', help='Update profile state', parents=[common_parser])
update_state_parser.add_argument('name', help='Profile name')
update_state_parser.add_argument('state', choices=['ACTIVE', 'PAUSED', 'RESTING', 'BANNED', 'LOCKED', 'COOLDOWN'],
update_state_parser.add_argument('state', choices=ProfileState.values(),
help='New state')
update_state_parser.add_argument('--reason', help='Reason for state change (especially for BAN)')
@ -1072,20 +1178,20 @@ def add_profile_manager_parser(subparsers):
update_field_parser.add_argument('value', help='New value')
# Pause command (convenience)
pause_parser = subparsers.add_parser('pause', help='Pause a profile (sets state to PAUSED).', parents=[common_parser])
pause_parser = subparsers.add_parser('pause', help=f'Pause a profile (sets state to {ProfileState.PAUSED.value}).', parents=[common_parser])
pause_parser.add_argument('name', help='Profile name')
# Activate command (convenience)
activate_parser = subparsers.add_parser('activate', help='Activate a profile (sets state to ACTIVE). Useful for resuming a PAUSED profile or fixing a stale LOCKED one.', parents=[common_parser])
activate_parser = subparsers.add_parser('activate', help=f'Activate a profile (sets state to {ProfileState.ACTIVE.value}). Useful for resuming a PAUSED profile or fixing a stale LOCKED one.', parents=[common_parser])
activate_parser.add_argument('name', help='Profile name')
# Ban command (convenience)
ban_parser = subparsers.add_parser('ban', help='Ban a profile (sets state to BANNED).', parents=[common_parser])
ban_parser = subparsers.add_parser('ban', help=f'Ban a profile (sets state to {ProfileState.BANNED.value}).', parents=[common_parser])
ban_parser.add_argument('name', help='Profile name')
ban_parser.add_argument('--reason', required=True, help='Reason for ban')
# Unban command (convenience)
unban_parser = subparsers.add_parser('unban', help='Unban a profile (sets state to ACTIVE and resets session counters).', parents=[common_parser])
unban_parser = subparsers.add_parser('unban', help=f'Unban a profile (sets state to {ProfileState.ACTIVE.value} and resets session counters).', parents=[common_parser])
unban_parser.add_argument('name', help='Profile name')
# Delete command
@ -1329,7 +1435,7 @@ def _render_proxy_activity_summary(manager, proxy_url, simulation_type, file=sys
print(tabulate(table_data, headers=headers, tablefmt='grid'), file=file)
def _render_profile_group_summary_table(manager, all_profiles, profile_groups_config, file=sys.stdout):
def _render_profile_group_summary_table(manager, all_profiles, profile_groups_config, args, file=sys.stdout):
"""Renders a summary table for profile groups."""
if not profile_groups_config:
return
@ -1337,6 +1443,69 @@ def _render_profile_group_summary_table(manager, all_profiles, profile_groups_co
print("\nProfile Group Status:", file=file)
table_data = []
all_profiles_map = {p['name']: p for p in all_profiles}
# --- New logic to determine the next group to be activated ---
profile_selection_strategy = manager.get_config('profile_selection_strategy')
next_up_group_name = None
next_up_reason = ""
if profile_selection_strategy and profile_groups_config:
# This logic mirrors the enforcer's selection process for display purposes.
# It determines which group is likely to have its profile activated next.
now = time.time()
all_profiles_by_name = {p['name']: p for p in all_profiles}
if profile_selection_strategy == 'least_loaded':
sorted_groups = sorted(
profile_groups_config,
key=lambda g: (
g.get('pending_downloads', 0),
g.get('last_finished_downloads_ts', float('inf')),
g.get('name', '')
)
)
if sorted_groups:
next_up_group = sorted_groups[0]
next_up_group_name = next_up_group['name']
next_up_reason = profile_selection_strategy
if getattr(args, 'show_reasons', False):
load = next_up_group.get('pending_downloads', 0)
finish_ts = next_up_group.get('last_finished_downloads_ts', 0)
finish_str = f"finished {format_duration(time.time() - finish_ts)} ago" if finish_ts > 0 else "never finished"
next_up_reason = f"least_loaded (load: {load}, {finish_str})"
elif profile_selection_strategy == 'longest_idle':
# Find the single longest idle profile across all groups
ready_profiles = []
for group in profile_groups_config:
for p_name in group.get('profiles_in_group', []):
p = all_profiles_by_name.get(p_name)
if p and p['state'] in [ProfileState.RESTING.value, ProfileState.COOLDOWN.value] and p.get('rest_until', 0) <= now and p.get('rest_reason') != 'waiting_downloads':
ready_profiles.append(p)
if ready_profiles:
# Sort them according to the 'longest_idle' activation logic
unused_profiles = [p for p in ready_profiles if (p.get('success_count', 0) + p.get('failure_count', 0) + p.get('tolerated_error_count', 0) + p.get('download_count', 0) + p.get('download_error_count', 0)) == 0]
used_profiles = [p for p in ready_profiles if p not in unused_profiles]
unused_profiles.sort(key=lambda p: natural_sort_key(p.get('name', '')))
used_profiles.sort(key=lambda p: (p.get('last_used', 0), natural_sort_key(p.get('name', ''))))
sorted_ready_profiles = unused_profiles + used_profiles
if sorted_ready_profiles:
next_profile = sorted_ready_profiles[0]
# Find which group it belongs to
for group in profile_groups_config:
if next_profile['name'] in group.get('profiles_in_group', []):
next_up_group_name = group['name']
next_up_reason = profile_selection_strategy
if getattr(args, 'show_reasons', False):
last_used_ts = next_profile.get('last_used', 0)
idle_time_str = f"idle for {format_duration(time.time() - last_used_ts)}" if last_used_ts > 0 else "never used"
next_up_reason = f"longest_idle (via {next_profile['name']}, {idle_time_str})"
break
# --- End new logic ---
for group in profile_groups_config:
group_name = group.get('name', 'N/A')
@ -1344,7 +1513,7 @@ def _render_profile_group_summary_table(manager, all_profiles, profile_groups_co
active_profiles = [
p_name for p_name in profiles_in_group
if all_profiles_map.get(p_name, {}).get('state') in [manager.STATE_ACTIVE, manager.STATE_LOCKED]
if all_profiles_map.get(p_name, {}).get('state') in [ProfileState.ACTIVE.value, ProfileState.LOCKED.value]
]
active_profiles_str = ', '.join(active_profiles) or "None"
@ -1369,15 +1538,68 @@ def _render_profile_group_summary_table(manager, all_profiles, profile_groups_co
remaining_reqs = rotate_after - total_reqs
reqs_left_str = str(max(0, int(remaining_reqs)))
# Recalculate pending downloads on the fly for display accuracy
pending_downloads = sum(
all_profiles_map.get(p_name, {}).get('pending_downloads', 0)
for p_name in profiles_in_group
)
selection_priority_str = ""
if group_name == next_up_group_name:
selection_priority_str = f"<- Next Up ({next_up_reason})"
time_since_finish_str = "N/A"
last_finish_ts = group.get('last_finished_downloads_ts', 0)
if last_finish_ts > 0:
time_since_finish_str = format_duration(time.time() - last_finish_ts)
table_data.append([
group_name,
active_profiles_str,
policy_str,
rotation_rule_str,
reqs_left_str
reqs_left_str,
pending_downloads,
time_since_finish_str,
selection_priority_str
])
headers = ['Group Name', 'Active Profile(s)', 'Policy', 'Rotation Rule', 'Requests Left ↓']
headers = ['Group Name', 'Active Profile(s)', 'Policy', 'Rotation Rule', 'Requests Left ↓', 'Pending DLs', 'Time Since Finish', 'Selection Priority']
print(tabulate(table_data, headers=headers, tablefmt='grid'), file=file)
def _render_activation_history_table(manager, file=sys.stdout):
"""Renders a table of the most recent profile activation events."""
if not manager:
return
# Fetch more events to ensure we have enough after filtering.
# The log is capped at 20 entries in Redis.
events = manager.get_activation_events(count=20)
# Filter out non-activation events and take the most recent 10.
filtered_events = [
e for e in events if e.get('reason') != 'Rest/Cooldown completed'
][:10]
if not filtered_events:
# Don't print the header if there's nothing to show.
return
print("\nRecent Profile Activations:", file=file)
table_data = []
for event in filtered_events:
ts = event.get('ts', 0)
time_str = format_timestamp(ts) if ts > 0 else "N/A"
table_data.append([
time_str,
event.get('profile', 'N/A'),
event.get('group', 'N/A'),
event.get('reason', 'N/A')
])
headers = ['Time', 'Profile', 'Group', 'Reason']
print(tabulate(table_data, headers=headers, tablefmt='grid'), file=file)
@ -1428,7 +1650,7 @@ def _render_profile_details_table(manager, args, simulation_type, profile_groups
elif args.rest_after_requests and args.rest_after_requests > 0:
rotate_after = args.rest_after_requests
if rotate_after > 0 and state_str != manager.STATE_COOLDOWN:
if rotate_after > 0 and state_str != ProfileState.COOLDOWN.value:
total_reqs = (
p.get('success_count', 0) + p.get('failure_count', 0) +
p.get('tolerated_error_count', 0) +
@ -1470,7 +1692,8 @@ def _render_profile_details_table(manager, args, simulation_type, profile_groups
countdown_str,
rest_until_str,
reason_str,
p.get('ban_reason') or ''
p.get('ban_reason') or '',
p.get('pending_downloads', 0)
])
table_data.append(row)
@ -1481,7 +1704,7 @@ def _render_profile_details_table(manager, args, simulation_type, profile_groups
else: # is_download_sim or unknown
headers.extend(['DataOK', 'DownFail', 'Skip.Err', 'Tot.DataOK', 'Tot.DownFail'])
headers.extend(['ReqCD ↓', 'RestCD ↓', 'R.Reason', 'B.Reason'])
headers.extend(['ReqCD ↓', 'RestCD ↓', 'R.Reason', 'B.Reason', 'Pend.DLs'])
# Using `maxcolwidths` to control column width for backward compatibility
# with older versions of the `tabulate` library. This prevents content
@ -1518,7 +1741,6 @@ def _render_simulation_view(title, manager, args, file=sys.stdout):
print("'tabulate' library is required for table format. Please install it.", file=sys.stderr)
return 1
print(f"\n--- {title} ---", file=file)
profiles = manager.list_profiles(args.state, args.proxy)
if args.format == 'json':
@ -1533,6 +1755,7 @@ def _render_simulation_view(title, manager, args, file=sys.stdout):
return 0
# --- Table Format with Summaries ---
print(f"\n--- {title} ---", file=file)
if args.show_proxy_activity:
if args.proxy:
@ -1541,7 +1764,12 @@ def _render_simulation_view(title, manager, args, file=sys.stdout):
_render_all_proxies_activity_summary(manager, title, file=file)
profile_groups_config = _build_profile_groups_config(manager, profiles)
_render_profile_group_summary_table(manager, profiles, profile_groups_config, file=file)
profile_selection_strategy = manager.get_config('profile_selection_strategy')
if profile_selection_strategy:
print(f"Profile Selection Strategy: {profile_selection_strategy}", file=file)
_render_profile_group_summary_table(manager, profiles, profile_groups_config, args, file=file)
failed_lock_attempts = manager.get_failed_lock_attempts()
global_stats = manager.get_global_stats()
@ -1605,6 +1833,9 @@ def _render_simulation_view(title, manager, args, file=sys.stdout):
print("\nProfile Details:", file=file)
_render_profile_details_table(manager, args, title, profile_groups_config, file=file)
if args.show_activation_history:
_render_activation_history_table(manager, file=file)
return 0
@ -1689,12 +1920,19 @@ def _render_merged_view(auth_manager, download_manager, args, file=sys.stdout):
print(tabulate(proxy_table_data, headers=proxy_headers, tablefmt='grid'), file=file)
print(f"\n--- Auth Simulation Profile Details ({args.auth_env}) ---", file=file)
_render_profile_group_summary_table(auth_manager, auth_profiles, auth_groups_config, file=file)
profile_selection_strategy = auth_manager.get_config('profile_selection_strategy')
if profile_selection_strategy:
print(f"Profile Selection Strategy: {profile_selection_strategy}", file=file)
_render_profile_group_summary_table(auth_manager, auth_profiles, auth_groups_config, args, file=file)
_render_profile_details_table(auth_manager, args, "Auth", auth_groups_config, file=file)
if args.show_activation_history:
_render_activation_history_table(auth_manager, file=file)
print(f"\n--- Download Simulation Profile Details ({args.download_env}) ---", file=file)
_render_profile_group_summary_table(download_manager, dl_profiles, dl_groups_config, file=file)
_render_profile_group_summary_table(download_manager, dl_profiles, dl_groups_config, args, file=file)
_render_profile_details_table(download_manager, args, "Download", dl_groups_config, file=file)
if args.show_activation_history:
_render_activation_history_table(download_manager, file=file)
return 0
@ -1709,7 +1947,7 @@ def main_profile_manager(args):
if load_dotenv:
env_file = args.env_file
if not env_file and args.env and '.env' in args.env and os.path.exists(args.env):
print(f"WARNING: --env should be an environment name (e.g., 'dev'), not a file path. Treating '{args.env}' as --env-file. The environment name will default to 'dev'.", file=sys.stderr)
print(f"Warning: --env should be an environment name (e.g., 'dev'), not a file path. Treating '{args.env}' as --env-file. The environment name will default to 'dev'.", file=sys.stderr)
env_file = args.env
args.env = 'dev'
@ -1724,6 +1962,8 @@ def main_profile_manager(args):
args.redis_host = os.getenv('REDIS_HOST', os.getenv('MASTER_HOST_IP', 'localhost'))
if args.redis_port is None:
args.redis_port = int(os.getenv('REDIS_PORT', 6379))
if getattr(args, 'redis_db', None) is None:
args.redis_db = int(os.getenv('REDIS_DB', 0))
if args.redis_password is None:
args.redis_password = os.getenv('REDIS_PASSWORD')
@ -1741,7 +1981,8 @@ def main_profile_manager(args):
redis_host=args.redis_host,
redis_port=args.redis_port,
redis_password=args.redis_password,
key_prefix=key_prefix
key_prefix=key_prefix,
redis_db=args.redis_db
)
if args.profile_command == 'create':
@ -1770,7 +2011,8 @@ def main_profile_manager(args):
return ProfileManager(
redis_host=args.redis_host, redis_port=args.redis_port,
redis_password=args.redis_password, key_prefix=key_prefix
redis_password=args.redis_password, key_prefix=key_prefix,
redis_db=args.redis_db
)
if not args.live:
@ -1924,20 +2166,20 @@ def main_profile_manager(args):
return 0 if success else 1
elif args.profile_command == 'pause':
success = manager.update_profile_state(args.name, manager.STATE_PAUSED, 'Manual pause')
success = manager.update_profile_state(args.name, ProfileState.PAUSED.value, 'Manual pause')
return 0 if success else 1
elif args.profile_command == 'activate':
success = manager.update_profile_state(args.name, manager.STATE_ACTIVE, 'Manual activation')
success = manager.update_profile_state(args.name, ProfileState.ACTIVE.value, 'Manual activation')
return 0 if success else 1
elif args.profile_command == 'ban':
success = manager.update_profile_state(args.name, manager.STATE_BANNED, args.reason)
success = manager.update_profile_state(args.name, ProfileState.BANNED.value, args.reason)
return 0 if success else 1
elif args.profile_command == 'unban':
# First activate, then reset session counters. The ban reason is cleared by update_profile_state.
success = manager.update_profile_state(args.name, manager.STATE_ACTIVE, 'Manual unban')
success = manager.update_profile_state(args.name, ProfileState.ACTIVE.value, 'Manual unban')
if success:
manager.reset_profile_counters(args.name)
return 0 if success else 1

View File

@ -0,0 +1,330 @@
from __future__ import annotations
from enum import Enum
from typing import TYPE_CHECKING, Optional
import time
import logging
from statemachine import StateMachine, State, event
if TYPE_CHECKING:
from .profile_manager_tool import ProfileManager
logger = logging.getLogger(__name__)
class ProfileState(Enum):
"""Enumeration for profile states."""
ACTIVE = "ACTIVE"
LOCKED = "LOCKED"
RESTING = "RESTING"
BANNED = "BANNED"
COOLDOWN = "COOLDOWN"
PAUSED = "PAUSED"
@classmethod
def values(cls):
return [item.value for item in cls]
class ProfileStateMachine(StateMachine):
"""A state machine for managing the lifecycle of a profile."""
# States
# We use lowercase attribute names to match the on_enter_* callback convention (e.g., on_enter_active).
# The value passed to State() is the string stored in Redis (uppercase).
active = State("ACTIVE", initial=True)
locked = State("LOCKED")
resting = State("RESTING")
banned = State("BANNED")
cooldown = State("COOLDOWN")
paused = State("PAUSED")
# Transitions
lock = active.to(locked)
unlock = locked.to(active)
start_cooldown = locked.to(cooldown)
rest = active.to(resting) | locked.to(resting) | cooldown.to(resting)
ban = active.to(banned) | locked.to(banned) | resting.to(banned) | cooldown.to(banned) | paused.to(banned)
activate = resting.to(active) | banned.to(active) | cooldown.to(active) | paused.to(active)
pause = active.to(paused) | locked.to(paused) | resting.to(paused) | cooldown.to(paused)
def __init__(self, manager: ProfileManager, profile_name: str, *args, **kwargs):
self.manager = manager
self.profile_name = profile_name
super().__init__(*args, **kwargs)
# --- Action Methods ---
def on_enter_locked(self, event_data: event.EventData, owner: Optional[str] = None):
"""Action executed when entering the LOCKED state."""
# When re-hydrating a state machine, `owner` will be None. In this case,
# the profile is already locked, so we should not perform any actions.
if owner is None:
return
now = time.time()
profile_key = self.manager._profile_key(self.profile_name)
pipe = self.manager.redis.pipeline()
# Explicitly use Enum value to ensure Uppercase in Redis
state_val = ProfileState.LOCKED.value
pipe.hset(profile_key, mapping={
'state': state_val,
'lock_owner': owner,
'lock_timestamp': str(now),
'last_used': str(now)
})
# Update state indexes
if event_data.source:
pipe.zrem(self.manager._state_key(event_data.source.value), self.profile_name)
pipe.zadd(self.manager._state_key(state_val), {self.profile_name: now})
pipe.execute()
logger.info(f"Updated profile '{self.profile_name}' from {event_data.source.value if event_data.source else 'None'} to {state_val}")
logger.info(f"Locked profile '{self.profile_name}' for owner '{owner}'")
def on_enter_cooldown(self, event_data: event.EventData, duration: Optional[int] = None):
"""Action executed when entering the COOLDOWN state."""
# When re-hydrating a state machine, `duration` will be None. In this case,
# the profile is already in cooldown, so we should not perform any actions.
if duration is None:
return
now = time.time()
rest_until = now + duration
profile_key = self.manager._profile_key(self.profile_name)
pipe = self.manager.redis.pipeline()
# Explicitly use Enum value to ensure Uppercase in Redis
state_val = ProfileState.COOLDOWN.value
pipe.hset(profile_key, mapping={
'state': state_val,
'rest_until': str(rest_until),
'rest_reason': 'Post-task cooldown',
'lock_owner': '',
'lock_timestamp': '0',
'last_used': str(now)
})
# Update state indexes
if event_data.source:
pipe.zrem(self.manager._state_key(event_data.source.value), self.profile_name)
pipe.zadd(self.manager._state_key(state_val), {self.profile_name: now})
# Remove from locks
pipe.hdel(self.manager._locks_key(), self.profile_name)
pipe.execute()
logger.info(f"Updated profile '{self.profile_name}' from {event_data.source.value if event_data.source else 'None'} to {state_val}")
logger.info(f"Unlocked profile '{self.profile_name}' into COOLDOWN for {duration}s.")
def on_enter_active(self, event_data: event.EventData, profile: Optional[dict] = None):
"""Action executed when entering the ACTIVE state."""
now = time.time()
source_state = event_data.source
profile_key = self.manager._profile_key(self.profile_name)
pipe = self.manager.redis.pipeline()
# Explicitly use Enum value to ensure Uppercase in Redis
state_val = ProfileState.ACTIVE.value
updates = {
'state': state_val,
'rest_until': '0',
'rest_reason': '',
'reason': '',
'ban_reason': '',
'lock_owner': '',
'lock_timestamp': '0',
'last_used': str(now),
'wait_started_at': '0'
}
if source_state and source_state.value in [ProfileState.RESTING.value, ProfileState.COOLDOWN.value]:
updates['last_rest_timestamp'] = str(now)
# Reset counters if activating from a long-term off state, but not from a short cooldown.
should_reset_counters = False
if source_state and source_state.value in [ProfileState.BANNED.value, ProfileState.PAUSED.value]:
should_reset_counters = True
elif source_state and source_state.value == ProfileState.RESTING.value:
# For RESTING, only reset if it wasn't just waiting for a slot after a cooldown.
profile_data = profile or self.manager.redis.hgetall(profile_key)
is_waiting_after_cooldown = profile_data.get('rest_reason') == "Waiting for group capacity"
if not is_waiting_after_cooldown:
should_reset_counters = True
if should_reset_counters:
logger.info(f"Resetting session counters for profile '{self.profile_name}' on activation.")
updates.update({
'success_count': '0',
'failure_count': '0',
'tolerated_error_count': '0',
'download_count': '0',
'download_error_count': '0',
})
pipe.hset(profile_key, mapping=updates)
# Update state indexes - this is critical for list_profiles to work correctly
if source_state:
pipe.zrem(self.manager._state_key(source_state.value), self.profile_name)
pipe.zadd(self.manager._state_key(state_val), {self.profile_name: now})
# Remove from locks if coming from LOCKED state
if source_state and source_state.value == ProfileState.LOCKED.value:
pipe.hdel(self.manager._locks_key(), self.profile_name)
# When activating a profile, ensure its proxy is also active.
proxy_url = self.manager.redis.hget(profile_key, 'proxy')
if proxy_url:
logger.debug(f"Ensuring associated proxy '{proxy_url}' is active for profile '{self.profile_name}'.")
pipe.hset(self.manager._proxy_state_key(proxy_url), mapping={
'state': ProfileState.ACTIVE.value,
'rest_until': '0',
'work_start_timestamp': str(now)
})
pipe.execute()
logger.info(f"Updated profile '{self.profile_name}' from {source_state.value if source_state else 'None'} to {state_val}")
def on_enter_banned(self, event_data: event.EventData, reason: str = ''):
"""Action executed when entering the BANNED state."""
now = time.time()
profile_key = self.manager._profile_key(self.profile_name)
pipe = self.manager.redis.pipeline()
# Explicitly use Enum value to ensure Uppercase in Redis
state_val = ProfileState.BANNED.value
updates = {
'state': state_val,
'last_used': str(now),
'lock_owner': '',
'lock_timestamp': '0',
'rest_until': '0',
'rest_reason': '',
'wait_started_at': '0'
}
if reason:
updates['ban_reason'] = reason
updates['reason'] = reason
pipe.hset(profile_key, mapping=updates)
# Update state indexes
if event_data.source:
pipe.zrem(self.manager._state_key(event_data.source.value), self.profile_name)
pipe.zadd(self.manager._state_key(state_val), {self.profile_name: now})
# Remove from locks
if event_data.source and event_data.source.value == ProfileState.LOCKED.value:
pipe.hdel(self.manager._locks_key(), self.profile_name)
pipe.execute()
logger.info(f"Updated profile '{self.profile_name}' from {event_data.source.value if event_data.source else 'None'} to {state_val}")
if reason:
logger.info(f"Reason for ban: {reason}")
def on_enter_resting(self, event_data: event.EventData, reason: str = '', duration_minutes: Optional[int] = None, is_waiting_profile: bool = False, is_rotation: bool = False):
"""Action executed when entering the RESTING state."""
now = time.time()
source_state = event_data.source
profile_key = self.manager._profile_key(self.profile_name)
pipe = self.manager.redis.pipeline()
# Explicitly use Enum value to ensure Uppercase in Redis
state_val = ProfileState.RESTING.value
updates = {
'state': state_val,
'last_used': str(now),
'lock_owner': '',
'lock_timestamp': '0',
}
if is_waiting_profile:
updates['wait_started_at'] = str(now)
updates['rest_until'] = '0'
elif duration_minutes == 0:
updates['rest_until'] = '0'
else:
# Default to 1 hour if no duration is provided
rest_duration_seconds = (duration_minutes * 60) if duration_minutes is not None else 3600
rest_until = now + rest_duration_seconds
updates['rest_until'] = str(rest_until)
if reason:
updates['rest_reason'] = reason
updates['reason'] = reason
if is_rotation:
logger.info(f"Resetting session counters for profile '{self.profile_name}' on rotation.")
updates.update({
'success_count': '0',
'failure_count': '0',
'tolerated_error_count': '0',
'download_count': '0',
'download_error_count': '0',
})
pipe.hset(profile_key, mapping=updates)
# Update state indexes - this is critical for the enforcer to see the correct state
if source_state:
pipe.zrem(self.manager._state_key(source_state.value), self.profile_name)
pipe.zadd(self.manager._state_key(state_val), {self.profile_name: now})
# Remove from locks if coming from LOCKED state
if source_state and source_state.value == ProfileState.LOCKED.value:
pipe.hdel(self.manager._locks_key(), self.profile_name)
pipe.execute()
logger.info(f"Updated profile '{self.profile_name}' from {source_state.value if source_state else 'None'} to {state_val}")
if reason:
logger.info(f"Reason for rest: {reason}")
def on_enter_paused(self, event_data: event.EventData, reason: str = ''):
"""Action executed when entering the PAUSED state."""
now = time.time()
profile_key = self.manager._profile_key(self.profile_name)
pipe = self.manager.redis.pipeline()
# Explicitly use Enum value to ensure Uppercase in Redis
state_val = ProfileState.PAUSED.value
updates = {
'state': state_val,
'last_used': str(now),
'lock_owner': '',
'lock_timestamp': '0',
'rest_until': '0',
'rest_reason': '',
'wait_started_at': '0'
}
if reason:
updates['reason'] = reason
pipe.hset(profile_key, mapping=updates)
# Update state indexes
if event_data.source:
pipe.zrem(self.manager._state_key(event_data.source.value), self.profile_name)
pipe.zadd(self.manager._state_key(state_val), {self.profile_name: now})
# Remove from locks
if event_data.source and event_data.source.value == ProfileState.LOCKED.value:
pipe.hdel(self.manager._locks_key(), self.profile_name)
pipe.execute()
logger.info(f"Updated profile '{self.profile_name}' from {event_data.source.value if event_data.source else 'None'} to {state_val}")
if reason:
logger.info(f"Reason for pause: {reason}")

View File

@ -27,7 +27,7 @@ requests==2.32.5
tabulate
# For yt-dlp integration in 'download py', 'list-formats', etc.
yt-dlp
# yt-dlp
# --- Pinned yt-dlp dependencies ---
# These are pinned to match versions known to work with the server.
@ -40,3 +40,6 @@ pycryptodomex==3.23.0
secretstorage==3.4.0
urllib3==2.5.0
websockets==15.0.1
python-statemachine
pytest

View File

@ -0,0 +1,406 @@
import collections
import json
import logging
import os
import random
import re
import shlex
import sys
import tempfile
import shutil
import subprocess
import threading
import time
from copy import deepcopy
from datetime import datetime, timezone
from pathlib import Path
from . import utils as sp_utils
from .process_runners import run_command, run_docker_container, get_worker_id
from ..profile_manager_tool import ProfileManager
logger = logging.getLogger(__name__)
def run_direct_batch_worker(worker_id, policy, state_manager, args, profile_manager_instance, urls_list, running_processes, process_lock, profile_prefix=None):
"""A worker for the 'direct_batch_cli' orchestration mode."""
owner_id = f"direct-batch-worker-{worker_id}"
settings = policy.get('settings', {})
exec_control = policy.get('execution_control', {})
gen_policy = policy.get('info_json_generation_policy', {})
direct_policy = policy.get('direct_batch_cli_policy', {})
queue_policy = policy.get('queue_policy')
# Prioritize the passed-in profile_prefix for worker pool compatibility.
if not profile_prefix:
profile_prefix = gen_policy.get('profile_prefix')
if not profile_prefix:
logger.error(f"[Worker {worker_id}] Direct batch mode requires a 'profile_prefix' from the worker pool or 'info_json_generation_policy'. Worker exiting.")
return []
batch_size = direct_policy.get('batch_size')
if not batch_size:
logger.error(f"[Worker {worker_id}] Direct batch mode requires 'direct_batch_cli_policy.batch_size'. Worker exiting.")
return []
save_dir = settings.get('save_info_json_dir')
if not save_dir:
logger.error(f"[Worker {worker_id}] Direct batch mode requires 'settings.save_info_json_dir'. Worker exiting.")
return []
os.makedirs(save_dir, exist_ok=True)
last_used_profile_name = None
while not state_manager.shutdown_event.is_set():
locked_profile = None
temp_batch_file = None
# --- Variables for robust finalization ---
files_created = 0
url_batch_len = 0
batch_started = False
downloads_per_url = 0 # Default to 0, meaning no increment unless configured
# ---
try:
# 1. Lock a profile
locked_profile = profile_manager_instance.lock_profile(owner=owner_id, profile_prefix=profile_prefix)
# --- New logic to avoid immediate reuse ---
avoid_reuse = direct_policy.get('avoid_immediate_profile_reuse', False)
if avoid_reuse and locked_profile and last_used_profile_name and locked_profile['name'] == last_used_profile_name:
logger.info(f"[Worker {worker_id}] Re-locked same profile '{locked_profile['name']}'. Unlocking and pausing to allow for rotation.")
profile_manager_instance.unlock_profile(locked_profile['name'], owner=owner_id)
wait_seconds = direct_policy.get('avoid_reuse_max_wait_seconds', 5)
time.sleep(wait_seconds)
# After waiting, try to lock again.
logger.info(f"[Worker {worker_id}] Attempting to lock a new profile after waiting.")
locked_profile = profile_manager_instance.lock_profile(owner=owner_id, profile_prefix=profile_prefix)
if locked_profile and locked_profile['name'] == last_used_profile_name:
logger.warning(f"[Worker {worker_id}] Still locking the same profile '{locked_profile['name']}' after waiting. Proceeding to use it to avoid getting stuck.")
elif locked_profile:
logger.info(f"[Worker {worker_id}] Switched to a different profile after waiting: '{locked_profile['name']}'.")
# --- End new logic ---
if not locked_profile:
polling_interval = exec_control.get('worker_polling_interval_seconds', 1)
# --- Add diagnostic logging ---
all_profiles_in_pool = profile_manager_instance.list_profiles()
profiles_in_prefix = [p for p in all_profiles_in_pool if p['name'].startswith(profile_prefix)]
if profiles_in_prefix:
state_counts = collections.Counter(p['state'] for p in profiles_in_prefix)
states_summary = ', '.join(f"{count} {state}" for state, count in sorted(state_counts.items()))
logger.info(f"[Worker {worker_id}] No auth profiles available to lock. Pool status ({profile_prefix}*): {states_summary}. Pausing for {polling_interval}s.")
else:
logger.info(f"[Worker {worker_id}] No auth profiles available to lock. No profiles found with prefix '{profile_prefix}'. Pausing for {polling_interval}s.")
# --- End diagnostic logging ---
time.sleep(polling_interval)
continue
profile_name = locked_profile['name']
proxy_url = locked_profile['proxy']
# 2. Get a batch of URLs from the shared list
url_batch, start_idx = state_manager.get_next_url_batch(batch_size, urls_list)
if not url_batch:
logger.info(f"[Worker {worker_id}] No more URLs to process. Worker exiting.")
break # Exit the while loop
url_batch_len = len(url_batch)
batch_started = True
# --- Calculate how many download tasks will be generated ---
downloads_per_url = 0 # Default to 0, meaning no increment unless configured
downloads_per_url_config = gen_policy.get('downloads_per_url')
if downloads_per_url_config:
if isinstance(downloads_per_url_config, int):
downloads_per_url = downloads_per_url_config
elif downloads_per_url_config == 'from_download_policy':
download_policy = policy.get('download_policy', {})
formats_str = download_policy.get('formats', '')
if formats_str:
# Use smarter parsing to handle complex yt-dlp format selectors
if any(c in formats_str for c in '/+[]()'):
num_formats = 1
else:
num_formats = len([f for f in formats_str.split(',') if f.strip()])
if num_formats > 0:
downloads_per_url = num_formats
if downloads_per_url > 0:
downloads_to_increment = url_batch_len * downloads_per_url
profile_manager_instance.increment_pending_downloads(profile_name, downloads_to_increment)
logger.info(f"[Worker {worker_id}] [{profile_name}] Preemptively incremented pending downloads by {downloads_to_increment} for the upcoming batch ({url_batch_len} URLs * {downloads_per_url} formats).")
else:
logger.warning(f"[Worker {worker_id}] [{profile_name}] 'downloads_per_url' is not configured or resolves to 0. Pending downloads counter will not be incremented for this batch.")
end_idx = start_idx + len(url_batch)
logger.info(f"[Worker {worker_id}] [{profile_name}] Processing batch of {len(url_batch)} URLs (lines {start_idx + 1}-{end_idx} from source).")
video_ids_in_batch = {sp_utils.get_video_id(u) for u in url_batch}
# 3. Write URLs to a temporary batch file
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt', encoding='utf-8') as f:
temp_batch_file = f.name
f.write('\n'.join(url_batch))
# 4. Construct and run the command
ytdlp_cmd_str = direct_policy.get('ytdlp_command')
if not ytdlp_cmd_str:
logger.error(f"[Worker {worker_id}] Direct batch mode requires 'direct_batch_cli_policy.ytdlp_command'.")
break
cmd = shlex.split(ytdlp_cmd_str)
cmd.extend(['--batch-file', temp_batch_file])
cmd.extend(['--proxy', proxy_url])
# The output template should not include the .info.json extension, as
# yt-dlp adds it automatically when --write-info-json is used.
output_template_str = direct_policy.get('ytdlp_output_template', '%(id)s')
ytdlp_args = direct_policy.get('ytdlp_args')
custom_env = direct_policy.get('env_vars', {}).copy()
# --- PYTHONPATH for custom yt-dlp module ---
ytdlp_module_path = direct_policy.get('ytdlp_module_path')
if ytdlp_module_path:
existing_pythonpath = custom_env.get('PYTHONPATH', os.environ.get('PYTHONPATH', ''))
# Prepend the custom path to PYTHONPATH to give it precedence
custom_env['PYTHONPATH'] = f"{ytdlp_module_path}{os.pathsep}{existing_pythonpath}".strip(os.pathsep)
logger.debug(f"[Worker {worker_id}] Using custom PYTHONPATH: {custom_env['PYTHONPATH']}")
custom_env['YTDLP_PROFILE_NAME'] = profile_name
custom_env['YTDLP_PROXY_URL'] = proxy_url
env_name = profile_manager_instance.key_prefix.replace('_profile_mgmt_', '')
custom_env['YTDLP_SIM_MODE'] = env_name
# Create a per-profile cache directory and set XDG_CACHE_HOME
cache_dir_base = direct_policy.get('cache_dir_base', '.cache')
profile_cache_dir = os.path.join(cache_dir_base, profile_name)
try:
os.makedirs(profile_cache_dir, exist_ok=True)
custom_env['XDG_CACHE_HOME'] = profile_cache_dir
except OSError as e:
logger.error(f"[Worker {worker_id}] Failed to create cache directory '{profile_cache_dir}': {e}")
# --- Manage User-Agent ---
# Use a consistent User-Agent per profile, storing it in the profile's cache directory.
user_agent = None
user_agent_file = os.path.join(profile_cache_dir, 'user_agent.txt')
try:
if os.path.exists(user_agent_file):
with open(user_agent_file, 'r', encoding='utf-8') as f:
user_agent = f.read().strip()
if not user_agent: # File doesn't exist or is empty
user_agent = sp_utils.generate_user_agent_from_policy(policy)
with open(user_agent_file, 'w', encoding='utf-8') as f:
f.write(user_agent)
logger.info(f"[{profile_name}] Generated and saved new User-Agent: '{user_agent}'")
else:
logger.info(f"[{profile_name}] Using existing User-Agent from cache: '{user_agent}'")
except IOError as e:
logger.error(f"[Worker {worker_id}] Error accessing User-Agent file '{user_agent_file}': {e}. Using generated UA for this run.")
user_agent = sp_utils.generate_user_agent_from_policy(policy) # fallback
# Add proxy rename from policy if specified, for custom yt-dlp forks
proxy_rename = direct_policy.get('ytdlp_proxy_rename')
if proxy_rename:
custom_env['YTDLP_PROXY_RENAME'] = proxy_rename
if user_agent:
cmd.extend(['--user-agent', user_agent])
if ytdlp_args:
cmd.extend(shlex.split(ytdlp_args))
if args.verbose and '--verbose' not in cmd:
cmd.append('--verbose')
if args.dummy_batch:
# In dummy batch mode, we simulate the entire batch process directly.
log_cmd = list(cmd)
log_cmd.extend(['-o', os.path.join('temp_dir', output_template_str)])
logger.info(f"[Worker {worker_id}] [{profile_name}] DUMMY BATCH MODE: Simulating batch of {len(url_batch)} URLs.")
logger.info(f"[Worker {worker_id}] [{profile_name}] DUMMY BATCH MODE: Would run real command: {' '.join(shlex.quote(s) for s in log_cmd)}")
logger.info(f"[Worker {worker_id}] [{profile_name}] DUMMY BATCH MODE: With environment: {custom_env}")
dummy_settings = policy.get('settings', {}).get('dummy_simulation_settings', {})
auth_failure_rate = dummy_settings.get('auth_failure_rate', 0.0)
auth_skipped_rate = dummy_settings.get('auth_skipped_failure_rate', 0.0)
min_seconds = dummy_settings.get('auth_min_seconds', 0.1)
max_seconds = dummy_settings.get('auth_max_seconds', 0.5)
for url in url_batch:
time.sleep(random.uniform(min_seconds, max_seconds))
video_id = sp_utils.get_video_id(url) or f"dummy_{random.randint(1000, 9999)}"
rand_val = random.random()
if rand_val < auth_skipped_rate:
logger.warning(f"[Worker {worker_id}] [{profile_name}] DUMMY: Simulating tolerated failure for {video_id}.")
profile_manager_instance.record_activity(profile_name, 'tolerated_error')
elif rand_val < (auth_skipped_rate + auth_failure_rate):
logger.warning(f"[Worker {worker_id}] [{profile_name}] DUMMY: Simulating fatal failure for {video_id}.")
profile_manager_instance.record_activity(profile_name, 'failure')
else:
# Success - create dummy info.json
files_created += 1
profile_manager_instance.record_activity(profile_name, 'success')
info_data = {'id': video_id, 'title': f'Dummy Video {video_id}', '_dummy': True}
env_name = profile_manager_instance.key_prefix.replace('_profile_mgmt_', '')
info_data['_ytops_metadata'] = {
'profile_name': profile_name, 'proxy_url': proxy_url,
'generation_timestamp_utc': datetime.now(timezone.utc).isoformat(),
'auth_env': env_name
}
final_path = Path(save_dir) / f"{video_id}.info.json"
rename_template = direct_policy.get('rename_file_template')
if rename_template:
sanitized_proxy = re.sub(r'[:/]', '_', proxy_url)
new_name = rename_template.format(video_id=video_id, profile_name=profile_name, proxy=sanitized_proxy)
final_path = Path(save_dir) / new_name
try:
with open(final_path, 'w', encoding='utf-8') as f:
json.dump(info_data, f, indent=2)
logger.info(f"[Worker {worker_id}] [{profile_name}] DUMMY: Created dummy info.json: '{final_path}'")
except IOError as e:
logger.error(f"[Worker {worker_id}] [{profile_name}] DUMMY: Failed to write dummy info.json: {e}")
success = (files_created > 0)
state_manager.record_batch_result(success, len(url_batch), profile_name=profile_name)
event = { 'type': 'fetch_batch', 'profile': profile_name, 'proxy_url': proxy_url, 'success': success, 'details': f"Dummy batch completed. Files created: {files_created}/{len(url_batch)}.", 'video_count': len(url_batch) }
state_manager.log_event(event)
else:
with tempfile.TemporaryDirectory(prefix=f"ytdlp-batch-{worker_id}-") as temp_output_dir:
output_template = os.path.join(temp_output_dir, output_template_str)
cmd.extend(['-o', output_template])
logger.info(f"[Worker {worker_id}] [{profile_name}] Processing batch of {len(url_batch)} URLs...")
logger.info(f"[Worker {worker_id}] [{profile_name}] Running command: {' '.join(shlex.quote(s) for s in cmd)}")
logger.info(f"[Worker {worker_id}] [{profile_name}] With environment: {custom_env}")
retcode, stdout, stderr = run_command(
cmd, running_processes, process_lock, env=custom_env, stream_output=args.verbose,
stream_prefix=f"[Worker {worker_id} | yt-dlp] "
)
is_bot_error = "Sign in to confirm you're not a bot" in stderr
if is_bot_error:
logger.warning(f"[Worker {worker_id}] [{profile_name}] Bot detection occurred during batch. Marking as failure.")
processed_files = list(Path(temp_output_dir).glob('*.json'))
for temp_path in processed_files:
files_created += 1
video_id = "unknown"
try:
with open(temp_path, 'r+', encoding='utf-8') as f:
info_data = json.load(f)
video_id = info_data.get('id', 'unknown')
env_name = profile_manager_instance.key_prefix.replace('_profile_mgmt_', '')
info_data['_ytops_metadata'] = {
'profile_name': profile_name,
'proxy_url': proxy_url,
'generation_timestamp_utc': datetime.now(timezone.utc).isoformat(),
'auth_env': env_name
}
f.seek(0)
json.dump(info_data, f, indent=2)
f.truncate()
final_path = Path(save_dir) / temp_path.name
rename_template = direct_policy.get('rename_file_template')
if rename_template:
sanitized_proxy = re.sub(r'[:/]', '_', proxy_url)
new_name = rename_template.format(
video_id=video_id, profile_name=profile_name, proxy=sanitized_proxy
)
final_path = Path(save_dir) / new_name
shutil.move(str(temp_path), str(final_path))
logger.info(f"[Worker {worker_id}] Post-processed and moved info.json to '{final_path}'")
except (IOError, json.JSONDecodeError, OSError) as e:
logger.error(f"[Worker {worker_id}] Error post-processing '{temp_path.name}' (video: {video_id}): {e}")
# The orchestrator records per-URL success/failure for the profile.
# A batch is considered an overall success for logging if it had no fatal errors
# and produced at least one file.
success = (files_created > 0 and not is_bot_error)
if not success:
reason = "bot detection occurred" if is_bot_error else f"0 files created out of {len(url_batch)}"
logger.warning(f"[Worker {worker_id}] [{profile_name}] Marking batch as FAILED. Reason: {reason}.")
# Record batch stats for overall orchestrator health
state_manager.record_batch_result(success, len(url_batch), profile_name=profile_name)
# In this mode, the custom yt-dlp script is responsible for recording
# per-URL activity ('success', 'failure', 'tolerated_error') directly into Redis.
# The orchestrator does not record activity here to avoid double-counting.
logger.info(f"[Worker {worker_id}] [{profile_name}] Batch finished. Per-URL activity was recorded by the yt-dlp script.")
event_details = f"Batch completed. Exit: {retcode}. Files created: {files_created}/{len(url_batch)}."
if not success and stderr:
if is_bot_error:
event_details += " Stderr: Bot detection occurred."
else:
event_details += f" Stderr: {stderr.strip().splitlines()[-1]}"
event = { 'type': 'fetch_batch', 'profile': profile_name, 'proxy_url': proxy_url, 'success': success, 'details': event_details, 'video_count': len(url_batch) }
state_manager.log_event(event)
except Exception as e:
logger.error(f"[Worker {worker_id}] Unexpected error in worker loop: {e}", exc_info=True)
if locked_profile:
profile_manager_instance.record_activity(locked_profile['name'], 'failure')
finally:
if locked_profile and batch_started:
# --- Reconcile pending downloads counter ---
if downloads_per_url > 0:
initial_increment = url_batch_len * downloads_per_url
actual_downloads = files_created * downloads_per_url
adjustment = actual_downloads - initial_increment
if adjustment != 0:
logger.warning(f"[Worker {worker_id}] [{profile_name}] Reconciling pending downloads. Batch created {files_created}/{url_batch_len} successful info.json(s). Adjusting counter by {adjustment}.")
profile_manager_instance.increment_pending_downloads(locked_profile['name'], adjustment)
if locked_profile:
last_used_profile_name = locked_profile['name']
cooldown = None
# DESIGN: The cooldown duration is not configured in the worker's policy.
# Instead, it is read from a central Redis key. This key is set by the
# policy-enforcer, making the enforcer the single source of truth for
# this policy. This allows changing the cooldown behavior without
# restarting the workers.
cooldown_config = profile_manager_instance.get_config('unlock_cooldown_seconds')
if cooldown_config:
try:
val = json.loads(cooldown_config)
if isinstance(val, list) and len(val) == 2 and val[0] < val[1]:
cooldown = random.randint(val[0], val[1])
elif isinstance(val, int):
cooldown = val
except (json.JSONDecodeError, TypeError):
if cooldown_config.isdigit():
cooldown = int(cooldown_config)
if cooldown:
logger.info(f"[Worker {worker_id}] Putting profile '{locked_profile['name']}' into COOLDOWN for {cooldown}s.")
profile_manager_instance.unlock_profile(
locked_profile['name'],
owner=owner_id,
rest_for_seconds=cooldown
)
if temp_batch_file and os.path.exists(temp_batch_file):
os.unlink(temp_batch_file)
logger.info(f"[Worker {worker_id}] Worker loop finished.")
return []

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,302 @@
import collections
import json
import logging
import os
import random
import re
import shlex
import sys
import tempfile
import shutil
import subprocess
import threading
import time
from copy import deepcopy
from datetime import datetime, timezone
from pathlib import Path
from . import utils as sp_utils
from .process_runners import run_command, get_worker_id
from ..profile_manager_tool import ProfileManager
from .worker_utils import find_task_and_lock_profile, get_auth_manager
logger = logging.getLogger(__name__)
def run_direct_download_worker(worker_id, policy, state_manager, args, profile_manager_instance, running_processes, process_lock, profile_prefix=None):
"""A persistent worker for the 'direct_download_cli' orchestration mode."""
owner_id = f"direct-dl-worker-{worker_id}"
settings = policy.get('settings', {})
exec_control = policy.get('execution_control', {})
d_policy = policy.get('download_policy', {})
direct_policy = policy.get('direct_download_cli_policy', {})
# Prioritize the passed-in profile_prefix for worker pool compatibility.
if not profile_prefix:
profile_prefix = d_policy.get('profile_prefix')
# Unlike other modes, this worker can function without a prefix (it will try to lock any active profile).
# The check `if not profile_prefix` is removed to allow this flexibility.
output_dir = direct_policy.get('output_dir')
if not output_dir:
logger.error(f"[Worker {worker_id}] Direct download mode requires 'direct_download_cli_policy.output_dir'. Worker exiting.")
return []
os.makedirs(output_dir, exist_ok=True)
no_task_streak = 0
while not state_manager.shutdown_event.is_set():
locked_profile = None
claimed_task_path = None
auth_profile_name, auth_env = None, None # For finally block
try:
# 0. If no tasks were found, pause briefly.
if no_task_streak > 0:
polling_interval = exec_control.get('worker_polling_interval_seconds', 1)
# --- Add diagnostic logging ---
all_profiles_in_pool = profile_manager_instance.list_profiles()
profiles_in_prefix = [p for p in all_profiles_in_pool if p['name'].startswith(profile_prefix or '')]
if profiles_in_prefix:
state_counts = collections.Counter(p['state'] for p in profiles_in_prefix)
states_summary = ', '.join(f"{count} {state}" for state, count in sorted(state_counts.items()))
logger.info(f"[Worker {worker_id}] No tasks found for available profiles. Pool status ({profile_prefix or '*'}*): {states_summary}. Pausing for {polling_interval}s. (Streak: {no_task_streak})")
else:
logger.info(f"[Worker {worker_id}] No tasks found for available profiles. No profiles found with prefix '{profile_prefix or '*'}'. Pausing for {polling_interval}s. (Streak: {no_task_streak})")
# --- End diagnostic logging ---
time.sleep(polling_interval)
if state_manager.shutdown_event.is_set(): continue
# 1. Find a task and lock its associated profile
locked_profile, claimed_task_path = find_task_and_lock_profile(
profile_manager_instance, owner_id, profile_prefix, policy, worker_id
)
if not locked_profile:
no_task_streak += 1
# The main loop will pause if the streak continues.
continue
profile_name = locked_profile['name']
# We have a task and a lock.
if claimed_task_path:
no_task_streak = 0 # Reset streak
# --- Read metadata before processing/deleting file ---
try:
with open(claimed_task_path, 'r', encoding='utf-8') as f:
info_data = json.load(f)
metadata = info_data.get('_ytops_metadata', {})
auth_profile_name = metadata.get('profile_name')
auth_env = metadata.get('auth_env')
except (IOError, json.JSONDecodeError) as e:
logger.error(f"CRITICAL: Could not read or parse task file '{claimed_task_path.name}': {e}. This task will be skipped, but the pending downloads counter CANNOT be decremented.")
continue # Skip to finally block to unlock profile
# 3. Construct and run the command
ytdlp_cmd_str = direct_policy.get('ytdlp_command')
if not ytdlp_cmd_str:
logger.error(f"[Worker {worker_id}] Direct download mode requires 'direct_download_cli_policy.ytdlp_command'.")
break
proxy_url = locked_profile['proxy']
proxy_rename = direct_policy.get('proxy_rename')
if proxy_rename:
rename_rule = proxy_rename.strip("'\"")
if rename_rule.startswith('s/') and rename_rule.count('/') >= 2:
try:
parts = rename_rule.split('/')
proxy_url = re.sub(parts[1], parts[2], proxy_url)
except (re.error, IndexError):
logger.error(f"[Worker {worker_id}] Invalid proxy_rename rule: {proxy_rename}")
output_template = os.path.join(output_dir, '%(title)s - %(id)s.%(ext)s')
cmd = shlex.split(ytdlp_cmd_str)
cmd.extend(['--load-info-json', str(claimed_task_path)])
cmd.extend(['--proxy', proxy_url])
cmd.extend(['-o', output_template])
ytdlp_args = direct_policy.get('ytdlp_args')
if ytdlp_args:
cmd.extend(shlex.split(ytdlp_args))
if args.verbose and '--verbose' not in cmd:
cmd.append('--verbose')
custom_env = direct_policy.get('env_vars', {}).copy()
# --- PYTHONPATH for custom yt-dlp module ---
ytdlp_module_path = direct_policy.get('ytdlp_module_path')
if ytdlp_module_path:
existing_pythonpath = custom_env.get('PYTHONPATH', os.environ.get('PYTHONPATH', ''))
custom_env['PYTHONPATH'] = f"{ytdlp_module_path}{os.pathsep}{existing_pythonpath}".strip(os.pathsep)
logger.debug(f"[Worker {worker_id}] Using custom PYTHONPATH: {custom_env['PYTHONPATH']}")
# Pass profile info to the custom yt-dlp process
custom_env['YTDLP_PROFILE_NAME'] = profile_name
custom_env['YTDLP_PROXY_URL'] = locked_profile['proxy'] # Original proxy
env_name = profile_manager_instance.key_prefix.replace('_profile_mgmt_', '')
custom_env['YTDLP_SIM_MODE'] = env_name
# Create a per-profile cache directory and set XDG_CACHE_HOME
cache_dir_base = direct_policy.get('cache_dir_base', '.cache')
profile_cache_dir = os.path.join(cache_dir_base, profile_name)
try:
os.makedirs(profile_cache_dir, exist_ok=True)
custom_env['XDG_CACHE_HOME'] = profile_cache_dir
except OSError as e:
logger.error(f"[Worker {worker_id}] Failed to create cache directory '{profile_cache_dir}': {e}")
logger.info(f"[Worker {worker_id}] [{profile_name}] Processing task '{claimed_task_path.name}'...")
if args.dummy or args.dummy_batch:
logger.info(f"========== [Worker {worker_id}] BEGIN DUMMY DIRECT DOWNLOAD ==========")
logger.info(f"[Worker {worker_id}] Profile: {profile_name} | Task: {claimed_task_path.name}")
logger.info(f"[Worker {worker_id}] Would run command: {' '.join(shlex.quote(s) for s in cmd)}")
logger.info(f"[Worker {worker_id}] With environment: {custom_env}")
dummy_settings = policy.get('settings', {}).get('dummy_simulation_settings', {})
min_seconds = dummy_settings.get('download_min_seconds', 0.5)
max_seconds = dummy_settings.get('download_max_seconds', 1.5)
failure_rate = dummy_settings.get('download_failure_rate', 0.0)
skipped_rate = dummy_settings.get('download_skipped_failure_rate', 0.0)
time.sleep(random.uniform(min_seconds, max_seconds))
rand_val = random.random()
should_fail_skipped = rand_val < skipped_rate
should_fail_fatal = not should_fail_skipped and rand_val < (skipped_rate + failure_rate)
success = False
details = ""
error_type = None
is_tolerated_error = False
if should_fail_skipped:
logger.warning(f"[Worker {worker_id}] [{profile_name}] DUMMY: Simulating skipped download failure for task '{claimed_task_path.name}'.")
details = "Dummy skipped failure"
error_type = "DummySkippedFailure"
is_tolerated_error = True
profile_manager_instance.record_activity(profile_name, 'tolerated_error')
elif should_fail_fatal:
logger.warning(f"[Worker {worker_id}] [{profile_name}] DUMMY: Simulating fatal download failure for task '{claimed_task_path.name}'.")
details = "Dummy fatal failure"
error_type = "DummyFailure"
profile_manager_instance.record_activity(profile_name, 'download_error')
else:
logger.info(f"[Worker {worker_id}] [{profile_name}] DUMMY: Simulating download success for task '{claimed_task_path.name}'.")
success = True
details = "Dummy success"
profile_manager_instance.record_activity(profile_name, 'download')
event = {
'type': 'direct_download',
'profile': profile_name,
'proxy_url': locked_profile['proxy'],
'success': success,
'details': details,
'error_type': error_type,
'is_tolerated_error': is_tolerated_error
}
state_manager.log_event(event)
logger.info(f"========== [Worker {worker_id}] END DUMMY DIRECT DOWNLOAD ==========")
else:
# --- Real execution ---
logger.info(f"[Worker {worker_id}] [{profile_name}] Running command: {' '.join(shlex.quote(s) for s in cmd)}")
retcode, stdout, stderr = run_command(
cmd, running_processes, process_lock, env=custom_env, stream_output=args.verbose,
stream_prefix=f"[Worker {worker_id} | yt-dlp] "
)
success = (retcode == 0)
details = ""
error_type = None
is_tolerated_error = False
if success:
details = "Download successful"
profile_manager_instance.record_activity(profile_name, 'download')
else:
# Check for tolerated errors first
tolerated_patterns = direct_policy.get('tolerated_error_patterns', [])
for pattern in tolerated_patterns:
if re.search(pattern, stderr, re.IGNORECASE):
is_tolerated_error = True
error_type = "ToleratedError"
details = f"Tolerated error: {stderr.strip().splitlines()[-1] if stderr.strip() else 'Unknown'}"
profile_manager_instance.record_activity(profile_name, 'tolerated_error')
break
if not is_tolerated_error:
error_type = f"Exit Code {retcode}"
details = f"Download failed. Stderr: {stderr.strip().splitlines()[-1] if stderr.strip() else 'Unknown'}"
profile_manager_instance.record_activity(profile_name, 'download_error')
event = {
'type': 'direct_download',
'profile': profile_name,
'proxy_url': locked_profile['proxy'],
'success': success,
'details': details,
'error_type': error_type,
'is_tolerated_error': is_tolerated_error
}
state_manager.log_event(event)
# 4. Clean up the processed task file
try:
# The claimed_task_path has a .LOCKED suffix, remove it before adding .processed
base_path_str = str(claimed_task_path).rsplit('.LOCKED.', 1)[0]
processed_path = Path(f"{base_path_str}.processed")
claimed_task_path.rename(processed_path)
logger.debug(f"[{sp_utils.get_display_name(claimed_task_path)}] Renamed processed task file to '{processed_path.name}'.")
except (OSError, IndexError) as e:
logger.error(f"Failed to rename processed task file '{claimed_task_path}': {e}")
else:
logger.warning(f"[Worker {worker_id}] Inconsistent state: locked profile '{profile_name}' but no task was claimed. Unlocking and continuing.")
except Exception as e:
logger.error(f"[Worker {worker_id}] An unexpected error occurred in the worker loop: {e}", exc_info=True)
if locked_profile:
profile_manager_instance.record_activity(locked_profile['name'], 'failure') # Generic failure
time.sleep(5)
finally:
if locked_profile:
# Decrement pending downloads counter on the original auth profile
if claimed_task_path and auth_profile_name and auth_env:
auth_manager = get_auth_manager(profile_manager_instance, auth_env)
if auth_manager:
auth_manager.decrement_pending_downloads(auth_profile_name)
else:
logger.error(f"Could not get auth profile manager for env '{auth_env}'. Pending downloads counter will not be decremented.")
elif claimed_task_path:
logger.warning(f"Could not find auth profile name and/or auth_env in info.json metadata. Pending downloads counter will not be decremented.")
cooldown = None
if claimed_task_path:
cooldown_config = profile_manager_instance.get_config('unlock_cooldown_seconds')
if cooldown_config:
try:
val = json.loads(cooldown_config)
if isinstance(val, list) and len(val) == 2 and val[0] < val[1]:
cooldown = random.randint(val[0], val[1])
elif isinstance(val, int):
cooldown = val
except (json.JSONDecodeError, TypeError):
if isinstance(cooldown_config, str) and cooldown_config.isdigit():
cooldown = int(cooldown_config)
if cooldown:
logger.info(f"[Worker {worker_id}] Putting profile '{locked_profile['name']}' into COOLDOWN for {cooldown}s.")
profile_manager_instance.unlock_profile(
locked_profile['name'],
owner=owner_id,
rest_for_seconds=cooldown
)
locked_profile = None
logger.info(f"[Worker {worker_id}] Worker loop finished.")
return []

View File

@ -23,7 +23,7 @@ from typing import Dict, List, Optional, Any, Tuple, Union
from . import utils as sp_utils
from .process_runners import run_command, run_docker_container, get_worker_id
from .workers import get_auth_manager
from .worker_utils import get_auth_manager
from .queue_provider import RedisQueueProvider
logger = logging.getLogger(__name__)

View File

@ -0,0 +1,158 @@
import collections
import json
import logging
import os
import random
import re
import shlex
import sys
import tempfile
import shutil
import subprocess
import threading
import time
from copy import deepcopy
from datetime import datetime, timezone
from pathlib import Path
from . import utils as sp_utils
from .process_runners import run_command, run_docker_container, get_worker_id
from ..profile_manager_tool import ProfileManager
from .worker_utils import find_task_and_lock_profile, _run_download_logic
logger = logging.getLogger(__name__)
def run_throughput_worker(worker_id, policy, state_manager, args, profile_manager_instance, running_processes, process_lock, profile_prefix=None):
"""A persistent worker for the 'throughput' orchestration mode."""
owner_id = f"throughput-worker-{worker_id}"
settings = policy.get('settings', {})
exec_control = policy.get('execution_control', {})
# Prioritize the passed-in profile_prefix for worker pool compatibility.
if not profile_prefix:
d_policy = policy.get('download_policy', {})
profile_prefix = d_policy.get('profile_prefix')
if not profile_prefix:
logger.error(f"[Worker {worker_id}] Throughput mode requires a 'profile_prefix' from the worker pool or 'download_policy'. Worker exiting.")
return []
no_task_streak = 0
while not state_manager.shutdown_event.is_set():
locked_profile = None
claimed_task_path = None
try:
# 0. If no tasks were found previously, pause briefly.
if no_task_streak > 0:
polling_interval = exec_control.get('worker_polling_interval_seconds', 1)
logger.info(f"[Worker {worker_id}] No tasks found in previous attempt(s). Pausing for {polling_interval}s. (Streak: {no_task_streak})")
time.sleep(polling_interval)
if state_manager.shutdown_event.is_set(): continue
# 1. Find a task and lock its associated profile
locked_profile, claimed_task_path = find_task_and_lock_profile(
profile_manager_instance, owner_id, profile_prefix, policy, worker_id
)
if not locked_profile:
# No task/profile combo was available.
no_task_streak += 1
polling_interval = exec_control.get('worker_polling_interval_seconds', 1)
logger.info(f"[Worker {worker_id}] No available tasks found for any active profiles. Pausing for {polling_interval}s.")
time.sleep(polling_interval)
continue
profile_name = locked_profile['name']
# We have a task and a lock.
if claimed_task_path:
no_task_streak = 0 # Reset streak
# 3. Process the task
try:
with open(claimed_task_path, 'r', encoding='utf-8') as f:
info_json_content = f.read()
except (IOError, FileNotFoundError) as e:
logger.error(f"[{sp_utils.get_display_name(claimed_task_path)}] Could not read claimed task file: {e}")
# Unlock profile and continue, file might be corrupted
profile_manager_instance.unlock_profile(profile_name, owner=owner_id)
locked_profile = None
# Clean up the bad file
try: claimed_task_path.unlink()
except OSError: pass
continue
# The locked profile's proxy MUST be used for the download.
local_policy = deepcopy(policy)
local_policy.setdefault('download_policy', {})['proxy'] = locked_profile['proxy']
_run_download_logic(
source=claimed_task_path,
info_json_content=info_json_content,
policy=local_policy,
state_manager=state_manager,
args=args,
running_processes=running_processes,
process_lock=process_lock,
profile_name=profile_name,
profile_manager_instance=profile_manager_instance
)
# 4. Clean up the processed task file
try:
os.remove(claimed_task_path)
logger.debug(f"[{sp_utils.get_display_name(claimed_task_path)}] Removed processed task file.")
except OSError as e:
logger.error(f"Failed to remove processed task file '{claimed_task_path}': {e}")
else:
# This case should not be reached with the new task-first locking logic.
# If it is, it means find_task_and_lock_profile returned a profile but no task.
logger.warning(f"[Worker {worker_id}] Inconsistent state: locked profile '{profile_name}' but no task was claimed. Unlocking and continuing.")
except Exception as e:
logger.error(f"[Worker {worker_id}] An unexpected error occurred in the worker loop: {e}", exc_info=True)
time.sleep(5) # Pause before retrying to avoid spamming errors
finally:
if locked_profile:
# 5. Unlock the profile. Only apply cooldown if a task was processed.
cooldown = None
if claimed_task_path:
# Enforcer is the only point where we configure to apply different policies,
# since we might restart enforcer, but won't restart stress-policy working on auth and downloads simultaneously.
# This is like applying a policy across multiple workers/machines without needing to restart each of them.
# DESIGN: The cooldown duration is not configured in the worker's policy.
# Instead, it is read from a central Redis key. This key is set by the
# policy-enforcer, making the enforcer the single source of truth for
# this policy. This allows changing the cooldown behavior without
# restarting the workers.
cooldown_config = profile_manager_instance.get_config('unlock_cooldown_seconds')
if cooldown_config:
try:
val = json.loads(cooldown_config)
if isinstance(val, list) and len(val) == 2 and val[0] < val[1]:
cooldown = random.randint(val[0], val[1])
elif isinstance(val, int):
cooldown = val
except (json.JSONDecodeError, TypeError):
if cooldown_config.isdigit():
cooldown = int(cooldown_config)
if cooldown:
logger.info(f"[Worker {worker_id}] Putting profile '{locked_profile['name']}' into COOLDOWN for {cooldown}s.")
profile_manager_instance.unlock_profile(
locked_profile['name'],
owner=owner_id,
rest_for_seconds=cooldown
)
locked_profile = None
# 6. Throughput is now controlled by the enforcer via the profile's
# 'unlock_cooldown_seconds' policy, which puts the profile into a
# RESTING state. The worker does not need to sleep here and can
# immediately try to lock a new profile to maximize throughput.
logger.info(f"[Worker {worker_id}] Worker loop finished.")
return [] # This function doesn't return results directly

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff