Add cleanup, ffmpeg, changes on prefixes per worker, load balance on enforccer, dummy updates mode
This commit is contained in:
parent
5479e8c8f8
commit
efac6cf1fb
@ -1 +0,0 @@
|
|||||||
PASS_TO_UNLOCK_host_vars_encrypted
|
|
||||||
44
README.md
44
README.md
@ -1,44 +0,0 @@
|
|||||||
|
|
||||||
|
|
||||||
Deploy with ansible from af-jump
|
|
||||||
```
|
|
||||||
ssh user@af-jump
|
|
||||||
|
|
||||||
cp cluster.dummy.yml cluster.stress.yml
|
|
||||||
vi cluster.stress.yml
|
|
||||||
|
|
||||||
./tools/generate-inventory.py cluster.stress.yml
|
|
||||||
|
|
||||||
ansible-playbook ansible/playbook-XXX -i ansible/inventory.stress.ini
|
|
||||||
|
|
||||||
playbook-base-system.yml
|
|
||||||
playbook-proxies.yml
|
|
||||||
playbook-stress-sync-code.yml
|
|
||||||
playbook-stress-install-deps.yml
|
|
||||||
playbook-stress-generate-env.yml
|
|
||||||
playbook-docker-services-setup.yml
|
|
||||||
```
|
|
||||||
|
|
||||||
Code updates only of ytops
|
|
||||||
```
|
|
||||||
#git pull or ./tools/sync-to-jump.sh
|
|
||||||
|
|
||||||
playbook-stress-sync-code.yml
|
|
||||||
```
|
|
||||||
|
|
||||||
Running
|
|
||||||
```
|
|
||||||
ssh user@af-green
|
|
||||||
cd /srv/airflow_master
|
|
||||||
./bin/build-yt-dlp-image
|
|
||||||
|
|
||||||
bin/ytops-client setup-profiles --policy policies/6_profile_setup_policy.yaml --cleanup-all
|
|
||||||
bin/ytops-client profile list --auth-env sim_auth --download-env sim_download --live --no-blink --show-proxy-activity
|
|
||||||
|
|
||||||
bin/ytops-client policy-enforcer --policy policies/8_unified_simulation_enforcer.yaml --live
|
|
||||||
|
|
||||||
bin/ytops-client stress-policy --policy policies/10_direct_docker_auth_simulation.yaml --verbose --set execution_control.workers=1 --set settings.urls_file=inputfiles/urls.rt3700.txt
|
|
||||||
bin/ytops-client stress-policy --policy policies/11_direct_docker_download_simulation.yaml --set execution_control.workers=1 --verbose
|
|
||||||
|
|
||||||
|
|
||||||
```
|
|
||||||
@ -20,13 +20,30 @@ settings:
|
|||||||
save_info_json_dir: "run/docker_mount/fetched_info_jsons/direct_docker_simulation"
|
save_info_json_dir: "run/docker_mount/fetched_info_jsons/direct_docker_simulation"
|
||||||
|
|
||||||
execution_control:
|
execution_control:
|
||||||
workers: 1
|
# Define worker pools, each tied to a specific profile prefix.
|
||||||
|
# The stress tool will launch the specified number of workers for each pool.
|
||||||
|
worker_pools:
|
||||||
|
- profile_prefix: "user1"
|
||||||
|
workers: 1
|
||||||
|
- profile_prefix: "user2"
|
||||||
|
workers: 1
|
||||||
# How long a worker should pause if it cannot find an available profile to lock.
|
# How long a worker should pause if it cannot find an available profile to lock.
|
||||||
worker_polling_interval_seconds: 1
|
worker_polling_interval_seconds: 1
|
||||||
# No sleep between tasks; throughput is controlled by yt-dlp performance and profile availability.
|
# No sleep between tasks; throughput is controlled by yt-dlp performance and profile availability.
|
||||||
|
|
||||||
info_json_generation_policy:
|
info_json_generation_policy:
|
||||||
profile_prefix: "user1"
|
# This setting tells the auth worker how many download tasks will be generated
|
||||||
|
# per successful info.json. It is used to correctly increment the
|
||||||
|
# 'pending_downloads' counter on the auth profile.
|
||||||
|
# Can be an integer, or 'from_download_policy' to automatically count formats
|
||||||
|
# from the 'download_policy.formats' setting in this same policy file.
|
||||||
|
downloads_per_url: "from_download_policy"
|
||||||
|
# profile_prefix is now defined per-pool in execution_control.worker_pools
|
||||||
|
|
||||||
|
# This section is needed for the 'downloads_per_url: from_download_policy' setting.
|
||||||
|
# It should mirror the formats being used by the download simulation.
|
||||||
|
download_policy:
|
||||||
|
formats: "299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy,140-dashy/140-dashy-0/140"
|
||||||
|
|
||||||
direct_docker_cli_policy:
|
direct_docker_cli_policy:
|
||||||
# Which simulation environment's profiles to use for locking.
|
# Which simulation environment's profiles to use for locking.
|
||||||
|
|||||||
@ -14,23 +14,34 @@ settings:
|
|||||||
# This directory should contain info.json files generated by an auth simulation,
|
# This directory should contain info.json files generated by an auth simulation,
|
||||||
# like `10_direct_docker_auth_simulation`.
|
# like `10_direct_docker_auth_simulation`.
|
||||||
# It MUST be inside the docker_host_mount_path.
|
# It MUST be inside the docker_host_mount_path.
|
||||||
info_json_dir: "run/docker_mount/fetched_info_jsons/direct_docker_simulation"
|
info_json_dir: "run/docker_mount/download_tasks"
|
||||||
|
#info_json_dir: "run/docker_mount/fetched_info_jsons/direct_docker_simulation"
|
||||||
# Regex to extract the profile name from a task filename. The first capture
|
# Regex to extract the profile name from a task filename. The first capture
|
||||||
# group is used. This is crucial for the task-first locking strategy.
|
# group is used. This is crucial for the task-first locking strategy.
|
||||||
# It looks for a component that starts with 'user' between two hyphens.
|
# It looks for a component that starts with 'user' between two hyphens.
|
||||||
profile_extraction_regex: '^.+?-(user[^-]+)-'
|
profile_extraction_regex: '^.+?-(user[^-]+)-'
|
||||||
|
|
||||||
execution_control:
|
execution_control:
|
||||||
workers: 1
|
# Define worker pools, each tied to a specific profile prefix.
|
||||||
|
# The stress tool will launch the specified number of workers for each pool.
|
||||||
|
worker_pools:
|
||||||
|
- profile_prefix: "user1"
|
||||||
|
workers: 1
|
||||||
|
- profile_prefix: "user2"
|
||||||
|
workers: 1
|
||||||
# How long a worker should pause if it cannot find an available profile or task.
|
# How long a worker should pause if it cannot find an available profile or task.
|
||||||
worker_polling_interval_seconds: 1
|
worker_polling_interval_seconds: 1
|
||||||
|
|
||||||
download_policy:
|
download_policy:
|
||||||
profile_prefix: "user1"
|
# profile_prefix is now defined per-pool in execution_control.worker_pools
|
||||||
# A comma-separated list of format IDs to download for each info.json.
|
# A comma-separated list of format IDs to download for each info.json.
|
||||||
# This is used by the dummy mode simulation to test per-format downloads.
|
# This is used by the dummy mode simulation to test per-format downloads.
|
||||||
# In non-dummy mode, the format selector in ytdlp_config_overrides is used.
|
# In non-dummy mode, the format selector in ytdlp_config_overrides is used.
|
||||||
formats: "140-dashy,299-dashy"
|
formats: "140-dashy,299-dashy"
|
||||||
|
# After a successful download, run ffprobe to generate a stream info JSON file.
|
||||||
|
run_ffprobe: true
|
||||||
|
# After a successful download, replace the media file with a zero-byte .empty file.
|
||||||
|
cleanup: true
|
||||||
# Default cooldown in seconds if not specified by the enforcer in Redis.
|
# Default cooldown in seconds if not specified by the enforcer in Redis.
|
||||||
# The value from Redis (set via `unlock_cooldown_seconds` in the enforcer policy)
|
# The value from Redis (set via `unlock_cooldown_seconds` in the enforcer policy)
|
||||||
# will always take precedence. This is a fallback.
|
# will always take precedence. This is a fallback.
|
||||||
|
|||||||
@ -15,6 +15,9 @@ auth_profile_setup:
|
|||||||
- prefix: "user1"
|
- prefix: "user1"
|
||||||
proxy: "sslocal-rust-1092:1092"
|
proxy: "sslocal-rust-1092:1092"
|
||||||
count: 4
|
count: 4
|
||||||
|
- prefix: "user2"
|
||||||
|
proxy: "sslocal-rust-1093:1093"
|
||||||
|
count: 4
|
||||||
|
|
||||||
# --- Profile setup for the DOWNLOAD simulation ---
|
# --- Profile setup for the DOWNLOAD simulation ---
|
||||||
download_profile_setup:
|
download_profile_setup:
|
||||||
@ -24,4 +27,6 @@ download_profile_setup:
|
|||||||
- prefix: "user1"
|
- prefix: "user1"
|
||||||
proxy: "sslocal-rust-1092:1092"
|
proxy: "sslocal-rust-1092:1092"
|
||||||
count: 4
|
count: 4
|
||||||
|
- prefix: "user2"
|
||||||
|
proxy: "sslocal-rust-1093:1093"
|
||||||
|
count: 4
|
||||||
|
|||||||
@ -19,12 +19,16 @@ simulation_parameters:
|
|||||||
|
|
||||||
# --- Policies for the Authentication Simulation ---
|
# --- Policies for the Authentication Simulation ---
|
||||||
auth_policy_enforcer_config:
|
auth_policy_enforcer_config:
|
||||||
|
# New setting for load balancing across profile groups.
|
||||||
|
# "round_robin": Cycle through available groups evenly (FIFO based on rest time).
|
||||||
|
# "least_loaded": Prioritize the group with the fewest pending downloads.
|
||||||
|
profile_selection_strategy: "least_loaded"
|
||||||
|
|
||||||
# Ban if 2 failures occur within a 1-minute window.
|
# Ban if 2 failures occur within a 1-minute window.
|
||||||
#ban_on_failures: 2
|
#ban_on_failures: 2
|
||||||
#ban_on_failures_window_minutes: 1
|
#ban_on_failures_window_minutes: 1
|
||||||
|
|
||||||
# The standard rest policy is disabled, as rotation is handled by the profile group.
|
# The standard rest policy is disabled, as rotation is handled by the profile group.
|
||||||
profile_prefix: "user1"
|
|
||||||
|
|
||||||
# New rate limit policy to enforce requests-per-hour limits.
|
# New rate limit policy to enforce requests-per-hour limits.
|
||||||
# For guest sessions, the limit is ~300 videos/hour.
|
# For guest sessions, the limit is ~300 videos/hour.
|
||||||
@ -44,7 +48,7 @@ auth_policy_enforcer_config:
|
|||||||
# The enforcer will ensure that no more than `max_active_profiles` from this
|
# The enforcer will ensure that no more than `max_active_profiles` from this
|
||||||
# group are in the ACTIVE state at any time.
|
# group are in the ACTIVE state at any time.
|
||||||
profile_groups:
|
profile_groups:
|
||||||
- name: "exclusive_auth_profiles"
|
- name: "auth_user1"
|
||||||
prefix: "user1"
|
prefix: "user1"
|
||||||
# Enforce that only 1 profile from this group can be active at a time.
|
# Enforce that only 1 profile from this group can be active at a time.
|
||||||
max_active_profiles: 1
|
max_active_profiles: 1
|
||||||
@ -65,6 +69,14 @@ auth_policy_enforcer_config:
|
|||||||
# Safety net: max time to wait for downloads before forcing rotation.
|
# Safety net: max time to wait for downloads before forcing rotation.
|
||||||
# Should be aligned with info.json URL validity (e.g., 4 hours = 240 mins).
|
# Should be aligned with info.json URL validity (e.g., 4 hours = 240 mins).
|
||||||
max_wait_for_downloads_minutes: 240
|
max_wait_for_downloads_minutes: 240
|
||||||
|
- name: "auth_user2"
|
||||||
|
prefix: "user2"
|
||||||
|
max_active_profiles: 1
|
||||||
|
rotate_after_requests: 25
|
||||||
|
rest_duration_minutes_on_rotation: 1
|
||||||
|
defer_activation_if_any_waiting: true
|
||||||
|
wait_download_finish_per_profile: true
|
||||||
|
max_wait_for_downloads_minutes: 240
|
||||||
|
|
||||||
# Time-based proxy rules are disabled as they are not needed for this setup.
|
# Time-based proxy rules are disabled as they are not needed for this setup.
|
||||||
proxy_work_minutes: 0
|
proxy_work_minutes: 0
|
||||||
@ -89,26 +101,28 @@ auth_policy_enforcer_config:
|
|||||||
unlock_cooldown_seconds: 1
|
unlock_cooldown_seconds: 1
|
||||||
|
|
||||||
# Cross-simulation synchronization
|
# Cross-simulation synchronization
|
||||||
cross_simulation_sync:
|
#cross_simulation_sync:
|
||||||
# Link auth profiles to download profiles (by name)
|
# Link auth profiles to download profiles (by name)
|
||||||
# Both profiles should exist in their respective environments
|
# Both profiles should exist in their respective environments
|
||||||
profile_links:
|
#profile_links:
|
||||||
- auth: "user1"
|
# - auth: "user1"
|
||||||
download: "user1"
|
# download: "user1"
|
||||||
|
# - auth: "user2"
|
||||||
|
# download: "user2"
|
||||||
# Which states to synchronize
|
# Which states to synchronize
|
||||||
#sync_states:
|
#sync_states:
|
||||||
# - "RESTING" # Disabling to prevent deadlock when auth profile is waiting for downloads.
|
# - "RESTING" # Disabling to prevent deadlock when auth profile is waiting for downloads.
|
||||||
# The download profile must remain active to process them.
|
# The download profile must remain active to process them.
|
||||||
# - "BANNED"
|
# - "BANNED"
|
||||||
# Whether to sync rotation (when auth is rotated due to rotate_after_requests)
|
# Whether to sync rotation (when auth is rotated due to rotate_after_requests)
|
||||||
#sync_rotation: true
|
#sync_rotation: true
|
||||||
# Whether download profile should be banned if auth is banned (even if download hasn't violated its own rules)
|
# Whether download profile should be banned if auth is banned (even if download hasn't violated its own rules)
|
||||||
#enforce_auth_lead: true
|
#enforce_auth_lead: true
|
||||||
# Ensures the same profile (e.g., user1_0) is active in both simulations.
|
# Ensures the same profile (e.g., user1_0) is active in both simulations.
|
||||||
# This will activate the correct download profile and rest any others in its group.
|
# This will activate the correct download profile and rest any others in its group.
|
||||||
sync_active_profile: true
|
#sync_active_profile: true
|
||||||
# When an auth profile is waiting for downloads, ensure the matching download profile is active
|
# When an auth profile is waiting for downloads, ensure the matching download profile is active
|
||||||
sync_waiting_downloads: true
|
#sync_waiting_downloads: true
|
||||||
|
|
||||||
# --- Policies for the Download Simulation ---
|
# --- Policies for the Download Simulation ---
|
||||||
download_policy_enforcer_config:
|
download_policy_enforcer_config:
|
||||||
@ -117,7 +131,6 @@ download_policy_enforcer_config:
|
|||||||
ban_on_failures_window_minutes: 1
|
ban_on_failures_window_minutes: 1
|
||||||
|
|
||||||
# Standard rest policy is disabled in favor of group rotation.
|
# Standard rest policy is disabled in favor of group rotation.
|
||||||
profile_prefix: "user1"
|
|
||||||
|
|
||||||
# New rate limit policy to enforce requests-per-hour limits.
|
# New rate limit policy to enforce requests-per-hour limits.
|
||||||
# For guest sessions, the limit is ~300 videos/hour. We set it slightly lower to be safe.
|
# For guest sessions, the limit is ~300 videos/hour. We set it slightly lower to be safe.
|
||||||
@ -135,11 +148,16 @@ download_policy_enforcer_config:
|
|||||||
|
|
||||||
# A group of profiles that are mutually exclusive. Only one will be active at a time.
|
# A group of profiles that are mutually exclusive. Only one will be active at a time.
|
||||||
profile_groups:
|
profile_groups:
|
||||||
- name: "exclusive_download_profiles"
|
- name: "download_user1"
|
||||||
prefix: "user1"
|
prefix: "user1"
|
||||||
rotate_after_requests: 25
|
rotate_after_requests: 25
|
||||||
rest_duration_minutes_on_rotation: 1
|
rest_duration_minutes_on_rotation: 1
|
||||||
max_active_profiles: 1
|
max_active_profiles: 4
|
||||||
|
- name: "download_user2"
|
||||||
|
prefix: "user2"
|
||||||
|
rotate_after_requests: 25
|
||||||
|
rest_duration_minutes_on_rotation: 1
|
||||||
|
max_active_profiles: 4
|
||||||
|
|
||||||
# Time-based proxy rules are disabled.
|
# Time-based proxy rules are disabled.
|
||||||
proxy_work_minutes: 50
|
proxy_work_minutes: 50
|
||||||
|
|||||||
@ -3,6 +3,7 @@
|
|||||||
CLI tool to enforce policies on profiles.
|
CLI tool to enforce policies on profiles.
|
||||||
"""
|
"""
|
||||||
import argparse
|
import argparse
|
||||||
|
import collections
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
@ -67,7 +68,7 @@ class PolicyEnforcer:
|
|||||||
self.enforce_profile_group_policies(getattr(args, 'profile_groups', []), all_profiles_map)
|
self.enforce_profile_group_policies(getattr(args, 'profile_groups', []), all_profiles_map)
|
||||||
|
|
||||||
# Un-rest profiles. This also reads from and modifies the local `all_profiles_map`.
|
# Un-rest profiles. This also reads from and modifies the local `all_profiles_map`.
|
||||||
self.enforce_unrest_policy(getattr(args, 'profile_groups', []), all_profiles_map)
|
self.enforce_unrest_policy(getattr(args, 'profile_groups', []), all_profiles_map, args)
|
||||||
|
|
||||||
# --- Phase 3: Apply policies to individual active profiles ---
|
# --- Phase 3: Apply policies to individual active profiles ---
|
||||||
# Use the now-updated snapshot to determine which profiles are active.
|
# Use the now-updated snapshot to determine which profiles are active.
|
||||||
@ -148,7 +149,7 @@ class PolicyEnforcer:
|
|||||||
return True # Indicates profile was rested
|
return True # Indicates profile was rested
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def enforce_unrest_policy(self, profile_groups, all_profiles_map):
|
def enforce_unrest_policy(self, profile_groups, all_profiles_map, args):
|
||||||
all_profiles_list = list(all_profiles_map.values())
|
all_profiles_list = list(all_profiles_map.values())
|
||||||
resting_profiles = [p for p in all_profiles_list if p['state'] == self.manager.STATE_RESTING]
|
resting_profiles = [p for p in all_profiles_list if p['state'] == self.manager.STATE_RESTING]
|
||||||
cooldown_profiles = [p for p in all_profiles_list if p['state'] == self.manager.STATE_COOLDOWN]
|
cooldown_profiles = [p for p in all_profiles_list if p['state'] == self.manager.STATE_COOLDOWN]
|
||||||
@ -158,10 +159,6 @@ class PolicyEnforcer:
|
|||||||
if not profiles_to_check:
|
if not profiles_to_check:
|
||||||
return
|
return
|
||||||
|
|
||||||
# Sort profiles to check by their rest_until timestamp, then by name.
|
|
||||||
# This creates a deterministic FIFO queue for activation.
|
|
||||||
profiles_to_check.sort(key=lambda p: (p.get('rest_until', 0), p.get('name', '')))
|
|
||||||
|
|
||||||
# --- Group-aware unrest logic ---
|
# --- Group-aware unrest logic ---
|
||||||
profile_to_group_map = {}
|
profile_to_group_map = {}
|
||||||
group_to_profiles_map = {}
|
group_to_profiles_map = {}
|
||||||
@ -194,6 +191,57 @@ class PolicyEnforcer:
|
|||||||
live_active_counts[group_name] = count
|
live_active_counts[group_name] = count
|
||||||
# --- End group logic setup ---
|
# --- End group logic setup ---
|
||||||
|
|
||||||
|
# --- New Sorting Logic based on Profile Selection Strategy ---
|
||||||
|
strategy = getattr(args, 'profile_selection_strategy', 'round_robin')
|
||||||
|
if strategy == 'least_loaded' and profile_groups:
|
||||||
|
logger.debug("Applying 'least_loaded' profile selection strategy.")
|
||||||
|
# Separate profiles that are ready from those that are not
|
||||||
|
ready_profiles = [p for p in profiles_to_check if now >= p.get('rest_until', 0)]
|
||||||
|
not_ready_profiles = [p for p in profiles_to_check if now < p.get('rest_until', 0)]
|
||||||
|
|
||||||
|
# Group ready profiles by their group name
|
||||||
|
ready_by_group = collections.defaultdict(list)
|
||||||
|
for p in ready_profiles:
|
||||||
|
group_name = profile_to_group_map.get(p['name'])
|
||||||
|
if group_name:
|
||||||
|
ready_by_group[group_name].append(p)
|
||||||
|
|
||||||
|
# Calculate load for each group (sum of pending downloads of all profiles in the group)
|
||||||
|
group_load = {}
|
||||||
|
for group_name, profiles_in_group_names in group_to_profiles_map.items():
|
||||||
|
total_pending = sum(
|
||||||
|
all_profiles_map.get(p_name, {}).get('pending_downloads', 0)
|
||||||
|
for p_name in profiles_in_group_names
|
||||||
|
)
|
||||||
|
group_load[group_name] = total_pending
|
||||||
|
|
||||||
|
# Sort groups by load, then by name for stability
|
||||||
|
sorted_groups = sorted(group_load.items(), key=lambda item: (item[1], item[0]))
|
||||||
|
logger.debug(f"Group load order: {[(name, load) for name, load in sorted_groups]}")
|
||||||
|
|
||||||
|
# Rebuild the list of ready profiles, ordered by group load
|
||||||
|
sorted_ready_profiles = []
|
||||||
|
for group_name, load in sorted_groups:
|
||||||
|
profiles_in_group = ready_by_group.get(group_name, [])
|
||||||
|
# Within a group, sort by rest_until (FIFO)
|
||||||
|
profiles_in_group.sort(key=lambda p: (p.get('rest_until', 0), p.get('name', '')))
|
||||||
|
sorted_ready_profiles.extend(profiles_in_group)
|
||||||
|
|
||||||
|
# Add profiles not in any group to the end
|
||||||
|
profiles_not_in_group = [p for p in ready_profiles if not profile_to_group_map.get(p['name'])]
|
||||||
|
profiles_not_in_group.sort(key=lambda p: (p.get('rest_until', 0), p.get('name', '')))
|
||||||
|
sorted_ready_profiles.extend(profiles_not_in_group)
|
||||||
|
|
||||||
|
# The final list to check is the sorted ready profiles, followed by the not-ready ones.
|
||||||
|
not_ready_profiles.sort(key=lambda p: (p.get('rest_until', 0), p.get('name', '')))
|
||||||
|
profiles_to_check = sorted_ready_profiles + not_ready_profiles
|
||||||
|
|
||||||
|
else: # Default FIFO sort
|
||||||
|
if strategy not in ['round_robin']:
|
||||||
|
logger.warning(f"Unknown or unhandled profile_selection_strategy '{strategy}'. Defaulting to 'round_robin' (FIFO).")
|
||||||
|
profiles_to_check.sort(key=lambda p: (p.get('rest_until', 0), p.get('name', '')))
|
||||||
|
# --- End New Sorting Logic ---
|
||||||
|
|
||||||
# --- New logic: Identify groups with waiting profiles ---
|
# --- New logic: Identify groups with waiting profiles ---
|
||||||
groups_with_waiting_profiles = {}
|
groups_with_waiting_profiles = {}
|
||||||
if profile_groups:
|
if profile_groups:
|
||||||
@ -1170,6 +1218,7 @@ def main_policy_enforcer(args):
|
|||||||
'unlock_stale_locks_after_seconds': 120,
|
'unlock_stale_locks_after_seconds': 120,
|
||||||
'unlock_cooldown_seconds': 0,
|
'unlock_cooldown_seconds': 0,
|
||||||
'max_global_proxy_active_minutes': 0, 'rest_duration_on_max_active': 10,
|
'max_global_proxy_active_minutes': 0, 'rest_duration_on_max_active': 10,
|
||||||
|
'profile_selection_strategy': 'round_robin',
|
||||||
'interval_seconds': 60, 'proxy_groups': [], 'profile_groups': []
|
'interval_seconds': 60, 'proxy_groups': [], 'profile_groups': []
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -290,17 +290,26 @@ class ProfileManager:
|
|||||||
if not profile_names:
|
if not profile_names:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
# Use a pipeline to fetch all profile data at once for efficiency
|
# --- Batch fetch profile data to avoid timeouts ---
|
||||||
pipe = self.redis.pipeline()
|
all_profile_data = []
|
||||||
for name in profile_names:
|
all_pending_downloads = []
|
||||||
pipe.hgetall(self._profile_key(name))
|
batch_size = 500
|
||||||
all_profile_data = pipe.execute()
|
|
||||||
|
|
||||||
# Also fetch pending download counts for all profiles
|
for i in range(0, len(profile_names), batch_size):
|
||||||
pipe = self.redis.pipeline()
|
batch_names = profile_names[i:i + batch_size]
|
||||||
for name in profile_names:
|
|
||||||
pipe.get(self._pending_downloads_key(name))
|
# Fetch profile hashes
|
||||||
all_pending_downloads = pipe.execute()
|
pipe = self.redis.pipeline()
|
||||||
|
for name in batch_names:
|
||||||
|
pipe.hgetall(self._profile_key(name))
|
||||||
|
all_profile_data.extend(pipe.execute())
|
||||||
|
|
||||||
|
# Fetch pending download counts
|
||||||
|
pipe = self.redis.pipeline()
|
||||||
|
for name in batch_names:
|
||||||
|
pipe.get(self._pending_downloads_key(name))
|
||||||
|
all_pending_downloads.extend(pipe.execute())
|
||||||
|
# --- End batch fetch ---
|
||||||
|
|
||||||
numeric_fields = ['created_at', 'last_used', 'success_count', 'failure_count',
|
numeric_fields = ['created_at', 'last_used', 'success_count', 'failure_count',
|
||||||
'tolerated_error_count', 'download_count', 'download_error_count',
|
'tolerated_error_count', 'download_count', 'download_error_count',
|
||||||
@ -667,27 +676,31 @@ class ProfileManager:
|
|||||||
if not proxy_urls:
|
if not proxy_urls:
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
pipe = self.redis.pipeline()
|
|
||||||
for proxy_url in proxy_urls:
|
|
||||||
pipe.hgetall(self._proxy_state_key(proxy_url))
|
|
||||||
|
|
||||||
results = pipe.execute()
|
|
||||||
|
|
||||||
states = {}
|
states = {}
|
||||||
for i, data in enumerate(results):
|
batch_size = 500
|
||||||
proxy_url = proxy_urls[i]
|
|
||||||
if data:
|
for i in range(0, len(proxy_urls), batch_size):
|
||||||
# Convert numeric fields
|
batch_urls = proxy_urls[i:i + batch_size]
|
||||||
for field in ['rest_until', 'work_start_timestamp']:
|
|
||||||
if field in data:
|
pipe = self.redis.pipeline()
|
||||||
try:
|
for proxy_url in batch_urls:
|
||||||
data[field] = float(data[field])
|
pipe.hgetall(self._proxy_state_key(proxy_url))
|
||||||
except (ValueError, TypeError):
|
results = pipe.execute()
|
||||||
data[field] = 0.0
|
|
||||||
states[proxy_url] = data
|
for j, data in enumerate(results):
|
||||||
else:
|
proxy_url = batch_urls[j]
|
||||||
# Default to ACTIVE if no state is found
|
if data:
|
||||||
states[proxy_url] = {'state': self.STATE_ACTIVE, 'rest_until': 0.0, 'work_start_timestamp': 0.0}
|
# Convert numeric fields
|
||||||
|
for field in ['rest_until', 'work_start_timestamp']:
|
||||||
|
if field in data:
|
||||||
|
try:
|
||||||
|
data[field] = float(data[field])
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
data[field] = 0.0
|
||||||
|
states[proxy_url] = data
|
||||||
|
else:
|
||||||
|
# Default to ACTIVE if no state is found
|
||||||
|
states[proxy_url] = {'state': self.STATE_ACTIVE, 'rest_until': 0.0, 'work_start_timestamp': 0.0}
|
||||||
|
|
||||||
return states
|
return states
|
||||||
|
|
||||||
|
|||||||
@ -129,7 +129,8 @@ Overridable Policy Parameters via --set:
|
|||||||
download_policy.proxy_rename Regex substitution for the proxy URL (e.g., 's/old/new/').
|
download_policy.proxy_rename Regex substitution for the proxy URL (e.g., 's/old/new/').
|
||||||
download_policy.pause_before_download_seconds Pause for N seconds before starting each download attempt.
|
download_policy.pause_before_download_seconds Pause for N seconds before starting each download attempt.
|
||||||
download_policy.continue_downloads Enable download continuation (true/false).
|
download_policy.continue_downloads Enable download continuation (true/false).
|
||||||
download_policy.cleanup After success: for native downloaders, rename and truncate file to 0 bytes; for 'aria2c_rpc', remove file(s) from filesystem.
|
download_policy.cleanup After success, replace downloaded media file with a zero-byte '.empty' file.
|
||||||
|
download_policy.run_ffprobe After success, run ffprobe on the media file and save stream info to a .ffprobe.json file.
|
||||||
download_policy.extra_args A string of extra arguments for the download script (e.g., "--limit-rate 5M").
|
download_policy.extra_args A string of extra arguments for the download script (e.g., "--limit-rate 5M").
|
||||||
download_policy.sleep_per_proxy_seconds Cooldown in seconds between downloads on the same proxy.
|
download_policy.sleep_per_proxy_seconds Cooldown in seconds between downloads on the same proxy.
|
||||||
download_policy.rate_limits.per_proxy.max_requests Max downloads for a single proxy in a time period.
|
download_policy.rate_limits.per_proxy.max_requests Max downloads for a single proxy in a time period.
|
||||||
@ -195,6 +196,9 @@ Overridable Policy Parameters via --set:
|
|||||||
'If a path is provided, cleans that directory. '
|
'If a path is provided, cleans that directory. '
|
||||||
'If used without a path, cleans the directory specified in download_policy.output_dir or direct_docker_cli_policy.docker_host_download_path. '
|
'If used without a path, cleans the directory specified in download_policy.output_dir or direct_docker_cli_policy.docker_host_download_path. '
|
||||||
'If no output_dir is set, it fails.')
|
'If no output_dir is set, it fails.')
|
||||||
|
download_util_group.add_argument('--run-ffprobe', action=argparse.BooleanOptionalAction, default=None,
|
||||||
|
help='After a successful download, run ffprobe to generate a stream info JSON file. '
|
||||||
|
'Overrides download_policy.run_ffprobe.')
|
||||||
download_util_group.add_argument('--reset-local-cache-folder', nargs='?', const='.', default=None,
|
download_util_group.add_argument('--reset-local-cache-folder', nargs='?', const='.', default=None,
|
||||||
help="Before running, delete the contents of the local cache folder used by direct_docker_cli mode. "
|
help="Before running, delete the contents of the local cache folder used by direct_docker_cli mode. "
|
||||||
"The cache folder is defined by 'direct_docker_cli_policy.docker_host_cache_path' in the policy. "
|
"The cache folder is defined by 'direct_docker_cli_policy.docker_host_cache_path' in the policy. "
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -56,7 +56,6 @@ def add_task_generator_parser(subparsers):
|
|||||||
gen_parser.add_argument('--formats', required=True, help='A comma-separated list of format IDs or selectors to generate tasks for (e.g., "18,140,bestvideo").')
|
gen_parser.add_argument('--formats', required=True, help='A comma-separated list of format IDs or selectors to generate tasks for (e.g., "18,140,bestvideo").')
|
||||||
gen_parser.add_argument('--live', action='store_true', help='Run continuously, watching the source directory for new files.')
|
gen_parser.add_argument('--live', action='store_true', help='Run continuously, watching the source directory for new files.')
|
||||||
gen_parser.add_argument('--interval-seconds', type=int, default=10, help='When in --live mode, how often to scan for new files.')
|
gen_parser.add_argument('--interval-seconds', type=int, default=10, help='When in --live mode, how often to scan for new files.')
|
||||||
gen_parser.add_argument('--dummy', action='store_true', help='Generate dummy task files without reading info.json content. Useful for testing download workers.')
|
|
||||||
gen_parser.add_argument('--verbose', action='store_true', help='Enable verbose logging.')
|
gen_parser.add_argument('--verbose', action='store_true', help='Enable verbose logging.')
|
||||||
|
|
||||||
reset_parser = generate_subparsers.add_parser(
|
reset_parser = generate_subparsers.add_parser(
|
||||||
@ -124,29 +123,11 @@ def main_task_generator(args):
|
|||||||
return 1
|
return 1
|
||||||
|
|
||||||
|
|
||||||
def _generate_tasks_for_file(source_file, output_dir, formats_to_generate, is_dummy_mode):
|
def _generate_tasks_for_file(source_file, output_dir, formats_to_generate):
|
||||||
"""Helper function to generate task files for a single source info.json."""
|
"""Helper function to generate task files for a single source info.json."""
|
||||||
try:
|
try:
|
||||||
info_json_content = {}
|
with open(source_file, 'r', encoding='utf-8') as f:
|
||||||
if is_dummy_mode:
|
info_json_content = json.load(f)
|
||||||
# In dummy mode, we don't read the file content. We create a minimal structure.
|
|
||||||
# We try to parse the filename to get video_id and profile_name for organization.
|
|
||||||
# Example filename: {video_id}-{profile_name}-{proxy}.info.json
|
|
||||||
parts = source_file.stem.split('-')
|
|
||||||
video_id = parts[0] if parts else 'dummy_video'
|
|
||||||
profile_name = next((p for p in parts if p.startswith('user')), None)
|
|
||||||
|
|
||||||
info_json_content = {
|
|
||||||
'id': video_id,
|
|
||||||
'_dummy': True,
|
|
||||||
'_ytops_metadata': {
|
|
||||||
'profile_name': profile_name
|
|
||||||
}
|
|
||||||
}
|
|
||||||
logger.debug(f"DUMMY MODE: Generating tasks for source file: {source_file.name}")
|
|
||||||
else:
|
|
||||||
with open(source_file, 'r', encoding='utf-8') as f:
|
|
||||||
info_json_content = json.load(f)
|
|
||||||
except (IOError, json.JSONDecodeError) as e:
|
except (IOError, json.JSONDecodeError) as e:
|
||||||
logger.warning(f"Skipping file '{source_file.name}' due to read/parse error: {e}")
|
logger.warning(f"Skipping file '{source_file.name}' due to read/parse error: {e}")
|
||||||
return 0
|
return 0
|
||||||
@ -237,7 +218,7 @@ def _main_task_generator_generate(args):
|
|||||||
|
|
||||||
total_tasks_generated = 0
|
total_tasks_generated = 0
|
||||||
for source_file in source_files:
|
for source_file in source_files:
|
||||||
tasks_for_file = _generate_tasks_for_file(source_file, output_dir, formats_to_generate, args.dummy)
|
tasks_for_file = _generate_tasks_for_file(source_file, output_dir, formats_to_generate)
|
||||||
total_tasks_generated += tasks_for_file
|
total_tasks_generated += tasks_for_file
|
||||||
|
|
||||||
logger.info(f"Successfully generated {total_tasks_generated} new task file(s) in '{output_dir}'.")
|
logger.info(f"Successfully generated {total_tasks_generated} new task file(s) in '{output_dir}'.")
|
||||||
@ -258,7 +239,7 @@ def _main_task_generator_generate(args):
|
|||||||
logger.info(f"Live mode: Found {len(source_files)} source file(s) to process.")
|
logger.info(f"Live mode: Found {len(source_files)} source file(s) to process.")
|
||||||
for source_file in source_files:
|
for source_file in source_files:
|
||||||
if shutdown_event: break
|
if shutdown_event: break
|
||||||
tasks_for_file = _generate_tasks_for_file(source_file, output_dir, formats_to_generate, args.dummy)
|
tasks_for_file = _generate_tasks_for_file(source_file, output_dir, formats_to_generate)
|
||||||
total_tasks_generated += tasks_for_file
|
total_tasks_generated += tasks_for_file
|
||||||
|
|
||||||
if shutdown_event: break
|
if shutdown_event: break
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user