Add cleanup, ffmpeg, changes on prefixes per worker, load balance on enforccer, dummy updates mode

This commit is contained in:
aperez 2025-12-30 09:47:03 +03:00
parent 5479e8c8f8
commit efac6cf1fb
12 changed files with 1346 additions and 1717 deletions

View File

@ -1 +0,0 @@
PASS_TO_UNLOCK_host_vars_encrypted

View File

@ -1,44 +0,0 @@
Deploy with ansible from af-jump
```
ssh user@af-jump
cp cluster.dummy.yml cluster.stress.yml
vi cluster.stress.yml
./tools/generate-inventory.py cluster.stress.yml
ansible-playbook ansible/playbook-XXX -i ansible/inventory.stress.ini
playbook-base-system.yml
playbook-proxies.yml
playbook-stress-sync-code.yml
playbook-stress-install-deps.yml
playbook-stress-generate-env.yml
playbook-docker-services-setup.yml
```
Code updates only of ytops
```
#git pull or ./tools/sync-to-jump.sh
playbook-stress-sync-code.yml
```
Running
```
ssh user@af-green
cd /srv/airflow_master
./bin/build-yt-dlp-image
bin/ytops-client setup-profiles --policy policies/6_profile_setup_policy.yaml --cleanup-all
bin/ytops-client profile list --auth-env sim_auth --download-env sim_download --live --no-blink --show-proxy-activity
bin/ytops-client policy-enforcer --policy policies/8_unified_simulation_enforcer.yaml --live
bin/ytops-client stress-policy --policy policies/10_direct_docker_auth_simulation.yaml --verbose --set execution_control.workers=1 --set settings.urls_file=inputfiles/urls.rt3700.txt
bin/ytops-client stress-policy --policy policies/11_direct_docker_download_simulation.yaml --set execution_control.workers=1 --verbose
```

View File

@ -20,13 +20,30 @@ settings:
save_info_json_dir: "run/docker_mount/fetched_info_jsons/direct_docker_simulation"
execution_control:
workers: 1
# Define worker pools, each tied to a specific profile prefix.
# The stress tool will launch the specified number of workers for each pool.
worker_pools:
- profile_prefix: "user1"
workers: 1
- profile_prefix: "user2"
workers: 1
# How long a worker should pause if it cannot find an available profile to lock.
worker_polling_interval_seconds: 1
# No sleep between tasks; throughput is controlled by yt-dlp performance and profile availability.
info_json_generation_policy:
profile_prefix: "user1"
# This setting tells the auth worker how many download tasks will be generated
# per successful info.json. It is used to correctly increment the
# 'pending_downloads' counter on the auth profile.
# Can be an integer, or 'from_download_policy' to automatically count formats
# from the 'download_policy.formats' setting in this same policy file.
downloads_per_url: "from_download_policy"
# profile_prefix is now defined per-pool in execution_control.worker_pools
# This section is needed for the 'downloads_per_url: from_download_policy' setting.
# It should mirror the formats being used by the download simulation.
download_policy:
formats: "299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy,140-dashy/140-dashy-0/140"
direct_docker_cli_policy:
# Which simulation environment's profiles to use for locking.

View File

@ -14,23 +14,34 @@ settings:
# This directory should contain info.json files generated by an auth simulation,
# like `10_direct_docker_auth_simulation`.
# It MUST be inside the docker_host_mount_path.
info_json_dir: "run/docker_mount/fetched_info_jsons/direct_docker_simulation"
info_json_dir: "run/docker_mount/download_tasks"
#info_json_dir: "run/docker_mount/fetched_info_jsons/direct_docker_simulation"
# Regex to extract the profile name from a task filename. The first capture
# group is used. This is crucial for the task-first locking strategy.
# It looks for a component that starts with 'user' between two hyphens.
profile_extraction_regex: '^.+?-(user[^-]+)-'
execution_control:
workers: 1
# Define worker pools, each tied to a specific profile prefix.
# The stress tool will launch the specified number of workers for each pool.
worker_pools:
- profile_prefix: "user1"
workers: 1
- profile_prefix: "user2"
workers: 1
# How long a worker should pause if it cannot find an available profile or task.
worker_polling_interval_seconds: 1
download_policy:
profile_prefix: "user1"
# profile_prefix is now defined per-pool in execution_control.worker_pools
# A comma-separated list of format IDs to download for each info.json.
# This is used by the dummy mode simulation to test per-format downloads.
# In non-dummy mode, the format selector in ytdlp_config_overrides is used.
formats: "140-dashy,299-dashy"
# After a successful download, run ffprobe to generate a stream info JSON file.
run_ffprobe: true
# After a successful download, replace the media file with a zero-byte .empty file.
cleanup: true
# Default cooldown in seconds if not specified by the enforcer in Redis.
# The value from Redis (set via `unlock_cooldown_seconds` in the enforcer policy)
# will always take precedence. This is a fallback.

View File

@ -15,6 +15,9 @@ auth_profile_setup:
- prefix: "user1"
proxy: "sslocal-rust-1092:1092"
count: 4
- prefix: "user2"
proxy: "sslocal-rust-1093:1093"
count: 4
# --- Profile setup for the DOWNLOAD simulation ---
download_profile_setup:
@ -24,4 +27,6 @@ download_profile_setup:
- prefix: "user1"
proxy: "sslocal-rust-1092:1092"
count: 4
- prefix: "user2"
proxy: "sslocal-rust-1093:1093"
count: 4

View File

@ -19,12 +19,16 @@ simulation_parameters:
# --- Policies for the Authentication Simulation ---
auth_policy_enforcer_config:
# New setting for load balancing across profile groups.
# "round_robin": Cycle through available groups evenly (FIFO based on rest time).
# "least_loaded": Prioritize the group with the fewest pending downloads.
profile_selection_strategy: "least_loaded"
# Ban if 2 failures occur within a 1-minute window.
#ban_on_failures: 2
#ban_on_failures_window_minutes: 1
# The standard rest policy is disabled, as rotation is handled by the profile group.
profile_prefix: "user1"
# New rate limit policy to enforce requests-per-hour limits.
# For guest sessions, the limit is ~300 videos/hour.
@ -44,7 +48,7 @@ auth_policy_enforcer_config:
# The enforcer will ensure that no more than `max_active_profiles` from this
# group are in the ACTIVE state at any time.
profile_groups:
- name: "exclusive_auth_profiles"
- name: "auth_user1"
prefix: "user1"
# Enforce that only 1 profile from this group can be active at a time.
max_active_profiles: 1
@ -65,6 +69,14 @@ auth_policy_enforcer_config:
# Safety net: max time to wait for downloads before forcing rotation.
# Should be aligned with info.json URL validity (e.g., 4 hours = 240 mins).
max_wait_for_downloads_minutes: 240
- name: "auth_user2"
prefix: "user2"
max_active_profiles: 1
rotate_after_requests: 25
rest_duration_minutes_on_rotation: 1
defer_activation_if_any_waiting: true
wait_download_finish_per_profile: true
max_wait_for_downloads_minutes: 240
# Time-based proxy rules are disabled as they are not needed for this setup.
proxy_work_minutes: 0
@ -89,26 +101,28 @@ auth_policy_enforcer_config:
unlock_cooldown_seconds: 1
# Cross-simulation synchronization
cross_simulation_sync:
#cross_simulation_sync:
# Link auth profiles to download profiles (by name)
# Both profiles should exist in their respective environments
profile_links:
- auth: "user1"
download: "user1"
#profile_links:
# - auth: "user1"
# download: "user1"
# - auth: "user2"
# download: "user2"
# Which states to synchronize
#sync_states:
# - "RESTING" # Disabling to prevent deadlock when auth profile is waiting for downloads.
# The download profile must remain active to process them.
# - "BANNED"
# - "BANNED"
# Whether to sync rotation (when auth is rotated due to rotate_after_requests)
#sync_rotation: true
# Whether download profile should be banned if auth is banned (even if download hasn't violated its own rules)
#enforce_auth_lead: true
# Ensures the same profile (e.g., user1_0) is active in both simulations.
# This will activate the correct download profile and rest any others in its group.
sync_active_profile: true
#sync_active_profile: true
# When an auth profile is waiting for downloads, ensure the matching download profile is active
sync_waiting_downloads: true
#sync_waiting_downloads: true
# --- Policies for the Download Simulation ---
download_policy_enforcer_config:
@ -117,7 +131,6 @@ download_policy_enforcer_config:
ban_on_failures_window_minutes: 1
# Standard rest policy is disabled in favor of group rotation.
profile_prefix: "user1"
# New rate limit policy to enforce requests-per-hour limits.
# For guest sessions, the limit is ~300 videos/hour. We set it slightly lower to be safe.
@ -135,11 +148,16 @@ download_policy_enforcer_config:
# A group of profiles that are mutually exclusive. Only one will be active at a time.
profile_groups:
- name: "exclusive_download_profiles"
- name: "download_user1"
prefix: "user1"
rotate_after_requests: 25
rest_duration_minutes_on_rotation: 1
max_active_profiles: 1
max_active_profiles: 4
- name: "download_user2"
prefix: "user2"
rotate_after_requests: 25
rest_duration_minutes_on_rotation: 1
max_active_profiles: 4
# Time-based proxy rules are disabled.
proxy_work_minutes: 50

View File

@ -3,6 +3,7 @@
CLI tool to enforce policies on profiles.
"""
import argparse
import collections
import json
import logging
import os
@ -67,7 +68,7 @@ class PolicyEnforcer:
self.enforce_profile_group_policies(getattr(args, 'profile_groups', []), all_profiles_map)
# Un-rest profiles. This also reads from and modifies the local `all_profiles_map`.
self.enforce_unrest_policy(getattr(args, 'profile_groups', []), all_profiles_map)
self.enforce_unrest_policy(getattr(args, 'profile_groups', []), all_profiles_map, args)
# --- Phase 3: Apply policies to individual active profiles ---
# Use the now-updated snapshot to determine which profiles are active.
@ -148,7 +149,7 @@ class PolicyEnforcer:
return True # Indicates profile was rested
return False
def enforce_unrest_policy(self, profile_groups, all_profiles_map):
def enforce_unrest_policy(self, profile_groups, all_profiles_map, args):
all_profiles_list = list(all_profiles_map.values())
resting_profiles = [p for p in all_profiles_list if p['state'] == self.manager.STATE_RESTING]
cooldown_profiles = [p for p in all_profiles_list if p['state'] == self.manager.STATE_COOLDOWN]
@ -158,10 +159,6 @@ class PolicyEnforcer:
if not profiles_to_check:
return
# Sort profiles to check by their rest_until timestamp, then by name.
# This creates a deterministic FIFO queue for activation.
profiles_to_check.sort(key=lambda p: (p.get('rest_until', 0), p.get('name', '')))
# --- Group-aware unrest logic ---
profile_to_group_map = {}
group_to_profiles_map = {}
@ -194,6 +191,57 @@ class PolicyEnforcer:
live_active_counts[group_name] = count
# --- End group logic setup ---
# --- New Sorting Logic based on Profile Selection Strategy ---
strategy = getattr(args, 'profile_selection_strategy', 'round_robin')
if strategy == 'least_loaded' and profile_groups:
logger.debug("Applying 'least_loaded' profile selection strategy.")
# Separate profiles that are ready from those that are not
ready_profiles = [p for p in profiles_to_check if now >= p.get('rest_until', 0)]
not_ready_profiles = [p for p in profiles_to_check if now < p.get('rest_until', 0)]
# Group ready profiles by their group name
ready_by_group = collections.defaultdict(list)
for p in ready_profiles:
group_name = profile_to_group_map.get(p['name'])
if group_name:
ready_by_group[group_name].append(p)
# Calculate load for each group (sum of pending downloads of all profiles in the group)
group_load = {}
for group_name, profiles_in_group_names in group_to_profiles_map.items():
total_pending = sum(
all_profiles_map.get(p_name, {}).get('pending_downloads', 0)
for p_name in profiles_in_group_names
)
group_load[group_name] = total_pending
# Sort groups by load, then by name for stability
sorted_groups = sorted(group_load.items(), key=lambda item: (item[1], item[0]))
logger.debug(f"Group load order: {[(name, load) for name, load in sorted_groups]}")
# Rebuild the list of ready profiles, ordered by group load
sorted_ready_profiles = []
for group_name, load in sorted_groups:
profiles_in_group = ready_by_group.get(group_name, [])
# Within a group, sort by rest_until (FIFO)
profiles_in_group.sort(key=lambda p: (p.get('rest_until', 0), p.get('name', '')))
sorted_ready_profiles.extend(profiles_in_group)
# Add profiles not in any group to the end
profiles_not_in_group = [p for p in ready_profiles if not profile_to_group_map.get(p['name'])]
profiles_not_in_group.sort(key=lambda p: (p.get('rest_until', 0), p.get('name', '')))
sorted_ready_profiles.extend(profiles_not_in_group)
# The final list to check is the sorted ready profiles, followed by the not-ready ones.
not_ready_profiles.sort(key=lambda p: (p.get('rest_until', 0), p.get('name', '')))
profiles_to_check = sorted_ready_profiles + not_ready_profiles
else: # Default FIFO sort
if strategy not in ['round_robin']:
logger.warning(f"Unknown or unhandled profile_selection_strategy '{strategy}'. Defaulting to 'round_robin' (FIFO).")
profiles_to_check.sort(key=lambda p: (p.get('rest_until', 0), p.get('name', '')))
# --- End New Sorting Logic ---
# --- New logic: Identify groups with waiting profiles ---
groups_with_waiting_profiles = {}
if profile_groups:
@ -1170,6 +1218,7 @@ def main_policy_enforcer(args):
'unlock_stale_locks_after_seconds': 120,
'unlock_cooldown_seconds': 0,
'max_global_proxy_active_minutes': 0, 'rest_duration_on_max_active': 10,
'profile_selection_strategy': 'round_robin',
'interval_seconds': 60, 'proxy_groups': [], 'profile_groups': []
}

View File

@ -290,17 +290,26 @@ class ProfileManager:
if not profile_names:
return []
# Use a pipeline to fetch all profile data at once for efficiency
pipe = self.redis.pipeline()
for name in profile_names:
pipe.hgetall(self._profile_key(name))
all_profile_data = pipe.execute()
# Also fetch pending download counts for all profiles
pipe = self.redis.pipeline()
for name in profile_names:
pipe.get(self._pending_downloads_key(name))
all_pending_downloads = pipe.execute()
# --- Batch fetch profile data to avoid timeouts ---
all_profile_data = []
all_pending_downloads = []
batch_size = 500
for i in range(0, len(profile_names), batch_size):
batch_names = profile_names[i:i + batch_size]
# Fetch profile hashes
pipe = self.redis.pipeline()
for name in batch_names:
pipe.hgetall(self._profile_key(name))
all_profile_data.extend(pipe.execute())
# Fetch pending download counts
pipe = self.redis.pipeline()
for name in batch_names:
pipe.get(self._pending_downloads_key(name))
all_pending_downloads.extend(pipe.execute())
# --- End batch fetch ---
numeric_fields = ['created_at', 'last_used', 'success_count', 'failure_count',
'tolerated_error_count', 'download_count', 'download_error_count',
@ -667,27 +676,31 @@ class ProfileManager:
if not proxy_urls:
return {}
pipe = self.redis.pipeline()
for proxy_url in proxy_urls:
pipe.hgetall(self._proxy_state_key(proxy_url))
results = pipe.execute()
states = {}
for i, data in enumerate(results):
proxy_url = proxy_urls[i]
if data:
# Convert numeric fields
for field in ['rest_until', 'work_start_timestamp']:
if field in data:
try:
data[field] = float(data[field])
except (ValueError, TypeError):
data[field] = 0.0
states[proxy_url] = data
else:
# Default to ACTIVE if no state is found
states[proxy_url] = {'state': self.STATE_ACTIVE, 'rest_until': 0.0, 'work_start_timestamp': 0.0}
batch_size = 500
for i in range(0, len(proxy_urls), batch_size):
batch_urls = proxy_urls[i:i + batch_size]
pipe = self.redis.pipeline()
for proxy_url in batch_urls:
pipe.hgetall(self._proxy_state_key(proxy_url))
results = pipe.execute()
for j, data in enumerate(results):
proxy_url = batch_urls[j]
if data:
# Convert numeric fields
for field in ['rest_until', 'work_start_timestamp']:
if field in data:
try:
data[field] = float(data[field])
except (ValueError, TypeError):
data[field] = 0.0
states[proxy_url] = data
else:
# Default to ACTIVE if no state is found
states[proxy_url] = {'state': self.STATE_ACTIVE, 'rest_until': 0.0, 'work_start_timestamp': 0.0}
return states

View File

@ -129,7 +129,8 @@ Overridable Policy Parameters via --set:
download_policy.proxy_rename Regex substitution for the proxy URL (e.g., 's/old/new/').
download_policy.pause_before_download_seconds Pause for N seconds before starting each download attempt.
download_policy.continue_downloads Enable download continuation (true/false).
download_policy.cleanup After success: for native downloaders, rename and truncate file to 0 bytes; for 'aria2c_rpc', remove file(s) from filesystem.
download_policy.cleanup After success, replace downloaded media file with a zero-byte '.empty' file.
download_policy.run_ffprobe After success, run ffprobe on the media file and save stream info to a .ffprobe.json file.
download_policy.extra_args A string of extra arguments for the download script (e.g., "--limit-rate 5M").
download_policy.sleep_per_proxy_seconds Cooldown in seconds between downloads on the same proxy.
download_policy.rate_limits.per_proxy.max_requests Max downloads for a single proxy in a time period.
@ -195,6 +196,9 @@ Overridable Policy Parameters via --set:
'If a path is provided, cleans that directory. '
'If used without a path, cleans the directory specified in download_policy.output_dir or direct_docker_cli_policy.docker_host_download_path. '
'If no output_dir is set, it fails.')
download_util_group.add_argument('--run-ffprobe', action=argparse.BooleanOptionalAction, default=None,
help='After a successful download, run ffprobe to generate a stream info JSON file. '
'Overrides download_policy.run_ffprobe.')
download_util_group.add_argument('--reset-local-cache-folder', nargs='?', const='.', default=None,
help="Before running, delete the contents of the local cache folder used by direct_docker_cli mode. "
"The cache folder is defined by 'direct_docker_cli_policy.docker_host_cache_path' in the policy. "

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -56,7 +56,6 @@ def add_task_generator_parser(subparsers):
gen_parser.add_argument('--formats', required=True, help='A comma-separated list of format IDs or selectors to generate tasks for (e.g., "18,140,bestvideo").')
gen_parser.add_argument('--live', action='store_true', help='Run continuously, watching the source directory for new files.')
gen_parser.add_argument('--interval-seconds', type=int, default=10, help='When in --live mode, how often to scan for new files.')
gen_parser.add_argument('--dummy', action='store_true', help='Generate dummy task files without reading info.json content. Useful for testing download workers.')
gen_parser.add_argument('--verbose', action='store_true', help='Enable verbose logging.')
reset_parser = generate_subparsers.add_parser(
@ -124,29 +123,11 @@ def main_task_generator(args):
return 1
def _generate_tasks_for_file(source_file, output_dir, formats_to_generate, is_dummy_mode):
def _generate_tasks_for_file(source_file, output_dir, formats_to_generate):
"""Helper function to generate task files for a single source info.json."""
try:
info_json_content = {}
if is_dummy_mode:
# In dummy mode, we don't read the file content. We create a minimal structure.
# We try to parse the filename to get video_id and profile_name for organization.
# Example filename: {video_id}-{profile_name}-{proxy}.info.json
parts = source_file.stem.split('-')
video_id = parts[0] if parts else 'dummy_video'
profile_name = next((p for p in parts if p.startswith('user')), None)
info_json_content = {
'id': video_id,
'_dummy': True,
'_ytops_metadata': {
'profile_name': profile_name
}
}
logger.debug(f"DUMMY MODE: Generating tasks for source file: {source_file.name}")
else:
with open(source_file, 'r', encoding='utf-8') as f:
info_json_content = json.load(f)
with open(source_file, 'r', encoding='utf-8') as f:
info_json_content = json.load(f)
except (IOError, json.JSONDecodeError) as e:
logger.warning(f"Skipping file '{source_file.name}' due to read/parse error: {e}")
return 0
@ -237,7 +218,7 @@ def _main_task_generator_generate(args):
total_tasks_generated = 0
for source_file in source_files:
tasks_for_file = _generate_tasks_for_file(source_file, output_dir, formats_to_generate, args.dummy)
tasks_for_file = _generate_tasks_for_file(source_file, output_dir, formats_to_generate)
total_tasks_generated += tasks_for_file
logger.info(f"Successfully generated {total_tasks_generated} new task file(s) in '{output_dir}'.")
@ -258,7 +239,7 @@ def _main_task_generator_generate(args):
logger.info(f"Live mode: Found {len(source_files)} source file(s) to process.")
for source_file in source_files:
if shutdown_event: break
tasks_for_file = _generate_tasks_for_file(source_file, output_dir, formats_to_generate, args.dummy)
tasks_for_file = _generate_tasks_for_file(source_file, output_dir, formats_to_generate)
total_tasks_generated += tasks_for_file
if shutdown_event: break