Add cleanup, ffmpeg, changes on prefixes per worker, load balance on enforccer, dummy updates mode

2025-12-30 09:47:03 +03:00 · 2025-12-30 09:47:03 +03:00 · efac6cf1fb
commit efac6cf1fb
parent 5479e8c8f8
12 changed files with 1346 additions and 1717 deletions
--- a/.vault_pass.example
+++ b/.vault_pass.example
@ -1 +0,0 @@
 PASS_TO_UNLOCK_host_vars_encrypted
--- a/README.md
+++ b/README.md
@ -1,44 +0,0 @@
 Deploy with ansible from af-jump
 ```
 ssh user@af-jump
 cp cluster.dummy.yml cluster.stress.yml
 vi cluster.stress.yml
 ./tools/generate-inventory.py cluster.stress.yml
 ansible-playbook  ansible/playbook-XXX -i ansible/inventory.stress.ini
 playbook-base-system.yml
 playbook-proxies.yml
 playbook-stress-sync-code.yml
 playbook-stress-install-deps.yml
 playbook-stress-generate-env.yml
 playbook-docker-services-setup.yml
 ```
 Code updates only of ytops
 ```
 #git pull or ./tools/sync-to-jump.sh 
 playbook-stress-sync-code.yml
 ```
 Running 
 ```
 ssh user@af-green
 cd /srv/airflow_master
 ./bin/build-yt-dlp-image 
 bin/ytops-client setup-profiles --policy policies/6_profile_setup_policy.yaml --cleanup-all
 bin/ytops-client profile list  --auth-env sim_auth --download-env sim_download --live --no-blink  --show-proxy-activity
 bin/ytops-client policy-enforcer --policy policies/8_unified_simulation_enforcer.yaml --live
 bin/ytops-client stress-policy --policy policies/10_direct_docker_auth_simulation.yaml --verbose --set execution_control.workers=1  --set settings.urls_file=inputfiles/urls.rt3700.txt
 bin/ytops-client stress-policy --policy policies/11_direct_docker_download_simulation.yaml  --set execution_control.workers=1  --verbose
 ```
--- a/ytops_client-source/policies/10_direct_docker_auth_simulation.yaml
+++ b/ytops_client-source/policies/10_direct_docker_auth_simulation.yaml
@ -20,13 +20,30 @@ settings:
  save_info_json_dir: "run/docker_mount/fetched_info_jsons/direct_docker_simulation"
 execution_control:
-  workers: 1
+  # Define worker pools, each tied to a specific profile prefix.
  # The stress tool will launch the specified number of workers for each pool.
  worker_pools:
    - profile_prefix: "user1"
      workers: 1
    - profile_prefix: "user2"
      workers: 1
  # How long a worker should pause if it cannot find an available profile to lock.
  worker_polling_interval_seconds: 1
  # No sleep between tasks; throughput is controlled by yt-dlp performance and profile availability.
 info_json_generation_policy:
-  profile_prefix: "user1"
+  # This setting tells the auth worker how many download tasks will be generated
  # per successful info.json. It is used to correctly increment the
  # 'pending_downloads' counter on the auth profile.
  # Can be an integer, or 'from_download_policy' to automatically count formats
  # from the 'download_policy.formats' setting in this same policy file.
  downloads_per_url: "from_download_policy"
  # profile_prefix is now defined per-pool in execution_control.worker_pools
 # This section is needed for the 'downloads_per_url: from_download_policy' setting.
 # It should mirror the formats being used by the download simulation.
 download_policy:
  formats: "299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy,140-dashy/140-dashy-0/140"
 direct_docker_cli_policy:
  # Which simulation environment's profiles to use for locking.
--- a/ytops_client-source/policies/11_direct_docker_download_simulation.yaml
+++ b/ytops_client-source/policies/11_direct_docker_download_simulation.yaml
@ -14,23 +14,34 @@ settings:
  # This directory should contain info.json files generated by an auth simulation,
  # like `10_direct_docker_auth_simulation`.
  # It MUST be inside the docker_host_mount_path.
-  info_json_dir: "run/docker_mount/fetched_info_jsons/direct_docker_simulation"
+  info_json_dir: "run/docker_mount/download_tasks"
    #info_json_dir: "run/docker_mount/fetched_info_jsons/direct_docker_simulation"
  # Regex to extract the profile name from a task filename. The first capture
  # group is used. This is crucial for the task-first locking strategy.
  # It looks for a component that starts with 'user' between two hyphens.
  profile_extraction_regex: '^.+?-(user[^-]+)-'
 execution_control:
-  workers: 1
+  # Define worker pools, each tied to a specific profile prefix.
  # The stress tool will launch the specified number of workers for each pool.
  worker_pools:
    - profile_prefix: "user1"
      workers: 1
    - profile_prefix: "user2"
      workers: 1
  # How long a worker should pause if it cannot find an available profile or task.
  worker_polling_interval_seconds: 1
 download_policy:
-  profile_prefix: "user1"
+  # profile_prefix is now defined per-pool in execution_control.worker_pools
  # A comma-separated list of format IDs to download for each info.json.
  # This is used by the dummy mode simulation to test per-format downloads.
  # In non-dummy mode, the format selector in ytdlp_config_overrides is used.
  formats: "140-dashy,299-dashy"
  # After a successful download, run ffprobe to generate a stream info JSON file.
  run_ffprobe: true
  # After a successful download, replace the media file with a zero-byte .empty file.
  cleanup: true
  # Default cooldown in seconds if not specified by the enforcer in Redis.
  # The value from Redis (set via `unlock_cooldown_seconds` in the enforcer policy)
  # will always take precedence. This is a fallback.
--- a/ytops_client-source/policies/6_profile_setup_policy.yaml
+++ b/ytops_client-source/policies/6_profile_setup_policy.yaml
@ -15,6 +15,9 @@ auth_profile_setup:
    - prefix: "user1"
      proxy: "sslocal-rust-1092:1092"
      count: 4
    - prefix: "user2"
      proxy: "sslocal-rust-1093:1093"
      count: 4
 # --- Profile setup for the DOWNLOAD simulation ---
 download_profile_setup:
@ -24,4 +27,6 @@ download_profile_setup:
    - prefix: "user1"
      proxy: "sslocal-rust-1092:1092"
      count: 4
-
+    - prefix: "user2"
      proxy: "sslocal-rust-1093:1093"
      count: 4
--- a/ytops_client-source/policies/8_unified_simulation_enforcer.yaml
+++ b/ytops_client-source/policies/8_unified_simulation_enforcer.yaml
@ -19,12 +19,16 @@ simulation_parameters:
 # --- Policies for the Authentication Simulation ---
 auth_policy_enforcer_config:
  # New setting for load balancing across profile groups.
  # "round_robin": Cycle through available groups evenly (FIFO based on rest time).
  # "least_loaded": Prioritize the group with the fewest pending downloads.
  profile_selection_strategy: "least_loaded"
  # Ban if 2 failures occur within a 1-minute window.
  #ban_on_failures: 2
  #ban_on_failures_window_minutes: 1
  # The standard rest policy is disabled, as rotation is handled by the profile group.
  profile_prefix: "user1"
  # New rate limit policy to enforce requests-per-hour limits.
  # For guest sessions, the limit is ~300 videos/hour.
@ -44,7 +48,7 @@ auth_policy_enforcer_config:
  # The enforcer will ensure that no more than `max_active_profiles` from this
  # group are in the ACTIVE state at any time.
  profile_groups:
-    - name: "exclusive_auth_profiles"
+    - name: "auth_user1"
      prefix: "user1"
      # Enforce that only 1 profile from this group can be active at a time.
      max_active_profiles: 1
@ -65,6 +69,14 @@ auth_policy_enforcer_config:
      # Safety net: max time to wait for downloads before forcing rotation.
      # Should be aligned with info.json URL validity (e.g., 4 hours = 240 mins).
      max_wait_for_downloads_minutes: 240
    - name: "auth_user2"
      prefix: "user2"
      max_active_profiles: 1
      rotate_after_requests: 25
      rest_duration_minutes_on_rotation: 1
      defer_activation_if_any_waiting: true
      wait_download_finish_per_profile: true
      max_wait_for_downloads_minutes: 240
  # Time-based proxy rules are disabled as they are not needed for this setup.
  proxy_work_minutes: 0
@ -89,26 +101,28 @@ auth_policy_enforcer_config:
  unlock_cooldown_seconds: 1
 # Cross-simulation synchronization
-cross_simulation_sync:
+#cross_simulation_sync:
  # Link auth profiles to download profiles (by name)
  # Both profiles should exist in their respective environments
-  profile_links:
+  #profile_links:
-    - auth: "user1"
+  # - auth: "user1"
-      download: "user1"
+  #    download: "user1"
  #  - auth: "user2"
  #    download: "user2"
  # Which states to synchronize
  #sync_states:
    # - "RESTING"  # Disabling to prevent deadlock when auth profile is waiting for downloads.
                   # The download profile must remain active to process them.
-     #  - "BANNED"
+    #  - "BANNED"
  # Whether to sync rotation (when auth is rotated due to rotate_after_requests)
  #sync_rotation: true
  # Whether download profile should be banned if auth is banned (even if download hasn't violated its own rules)
  #enforce_auth_lead: true
  # Ensures the same profile (e.g., user1_0) is active in both simulations.
  # This will activate the correct download profile and rest any others in its group.
-  sync_active_profile: true
+  #sync_active_profile: true
  # When an auth profile is waiting for downloads, ensure the matching download profile is active
-  sync_waiting_downloads: true
+  #sync_waiting_downloads: true
 # --- Policies for the Download Simulation ---
 download_policy_enforcer_config:
@ -117,7 +131,6 @@ download_policy_enforcer_config:
  ban_on_failures_window_minutes: 1
  # Standard rest policy is disabled in favor of group rotation.
  profile_prefix: "user1"
  # New rate limit policy to enforce requests-per-hour limits.
  # For guest sessions, the limit is ~300 videos/hour. We set it slightly lower to be safe.
@ -135,11 +148,16 @@ download_policy_enforcer_config:
  # A group of profiles that are mutually exclusive. Only one will be active at a time.
  profile_groups:
-    - name: "exclusive_download_profiles"
+    - name: "download_user1"
      prefix: "user1"
      rotate_after_requests: 25
      rest_duration_minutes_on_rotation: 1
-      max_active_profiles: 1
+      max_active_profiles: 4
    - name: "download_user2"
      prefix: "user2"
      rotate_after_requests: 25
      rest_duration_minutes_on_rotation: 1
      max_active_profiles: 4
  # Time-based proxy rules are disabled.
  proxy_work_minutes: 50
--- a/ytops_client-source/ytops_client/policy_enforcer_tool.py
+++ b/ytops_client-source/ytops_client/policy_enforcer_tool.py
@ -3,6 +3,7 @@
 CLI tool to enforce policies on profiles.
 """
 import argparse
 import collections
 import json
 import logging
 import os
@ -67,7 +68,7 @@ class PolicyEnforcer:
        self.enforce_profile_group_policies(getattr(args, 'profile_groups', []), all_profiles_map)
        # Un-rest profiles. This also reads from and modifies the local `all_profiles_map`.
-        self.enforce_unrest_policy(getattr(args, 'profile_groups', []), all_profiles_map)
+        self.enforce_unrest_policy(getattr(args, 'profile_groups', []), all_profiles_map, args)
        # --- Phase 3: Apply policies to individual active profiles ---
        # Use the now-updated snapshot to determine which profiles are active.
@ -148,7 +149,7 @@ class PolicyEnforcer:
            return True # Indicates profile was rested
        return False
-    def enforce_unrest_policy(self, profile_groups, all_profiles_map):
+    def enforce_unrest_policy(self, profile_groups, all_profiles_map, args):
        all_profiles_list = list(all_profiles_map.values())
        resting_profiles = [p for p in all_profiles_list if p['state'] == self.manager.STATE_RESTING]
        cooldown_profiles = [p for p in all_profiles_list if p['state'] == self.manager.STATE_COOLDOWN]
@ -158,10 +159,6 @@ class PolicyEnforcer:
        if not profiles_to_check:
            return
        # Sort profiles to check by their rest_until timestamp, then by name.
        # This creates a deterministic FIFO queue for activation.
        profiles_to_check.sort(key=lambda p: (p.get('rest_until', 0), p.get('name', '')))
        # --- Group-aware unrest logic ---
        profile_to_group_map = {}
        group_to_profiles_map = {}
@ -194,6 +191,57 @@ class PolicyEnforcer:
                live_active_counts[group_name] = count
        # --- End group logic setup ---
        # --- New Sorting Logic based on Profile Selection Strategy ---
        strategy = getattr(args, 'profile_selection_strategy', 'round_robin')
        if strategy == 'least_loaded' and profile_groups:
            logger.debug("Applying 'least_loaded' profile selection strategy.")
            # Separate profiles that are ready from those that are not
            ready_profiles = [p for p in profiles_to_check if now >= p.get('rest_until', 0)]
            not_ready_profiles = [p for p in profiles_to_check if now < p.get('rest_until', 0)]
            # Group ready profiles by their group name
            ready_by_group = collections.defaultdict(list)
            for p in ready_profiles:
                group_name = profile_to_group_map.get(p['name'])
                if group_name:
                    ready_by_group[group_name].append(p)
            # Calculate load for each group (sum of pending downloads of all profiles in the group)
            group_load = {}
            for group_name, profiles_in_group_names in group_to_profiles_map.items():
                total_pending = sum(
                    all_profiles_map.get(p_name, {}).get('pending_downloads', 0)
                    for p_name in profiles_in_group_names
                )
                group_load[group_name] = total_pending
            # Sort groups by load, then by name for stability
            sorted_groups = sorted(group_load.items(), key=lambda item: (item[1], item[0]))
            logger.debug(f"Group load order: {[(name, load) for name, load in sorted_groups]}")
            # Rebuild the list of ready profiles, ordered by group load
            sorted_ready_profiles = []
            for group_name, load in sorted_groups:
                profiles_in_group = ready_by_group.get(group_name, [])
                # Within a group, sort by rest_until (FIFO)
                profiles_in_group.sort(key=lambda p: (p.get('rest_until', 0), p.get('name', '')))
                sorted_ready_profiles.extend(profiles_in_group)
            # Add profiles not in any group to the end
            profiles_not_in_group = [p for p in ready_profiles if not profile_to_group_map.get(p['name'])]
            profiles_not_in_group.sort(key=lambda p: (p.get('rest_until', 0), p.get('name', '')))
            sorted_ready_profiles.extend(profiles_not_in_group)
            # The final list to check is the sorted ready profiles, followed by the not-ready ones.
            not_ready_profiles.sort(key=lambda p: (p.get('rest_until', 0), p.get('name', '')))
            profiles_to_check = sorted_ready_profiles + not_ready_profiles
        else: # Default FIFO sort
            if strategy not in ['round_robin']:
                logger.warning(f"Unknown or unhandled profile_selection_strategy '{strategy}'. Defaulting to 'round_robin' (FIFO).")
            profiles_to_check.sort(key=lambda p: (p.get('rest_until', 0), p.get('name', '')))
        # --- End New Sorting Logic ---
        # --- New logic: Identify groups with waiting profiles ---
        groups_with_waiting_profiles = {}
        if profile_groups:
@ -1170,6 +1218,7 @@ def main_policy_enforcer(args):
        'unlock_stale_locks_after_seconds': 120,
        'unlock_cooldown_seconds': 0,
        'max_global_proxy_active_minutes': 0, 'rest_duration_on_max_active': 10,
        'profile_selection_strategy': 'round_robin',
        'interval_seconds': 60, 'proxy_groups': [], 'profile_groups': []
    }
--- a/ytops_client-source/ytops_client/profile_manager_tool.py
+++ b/ytops_client-source/ytops_client/profile_manager_tool.py
@ -290,17 +290,26 @@ class ProfileManager:
        if not profile_names:
            return []
-        # Use a pipeline to fetch all profile data at once for efficiency
+        # --- Batch fetch profile data to avoid timeouts ---
-        pipe = self.redis.pipeline()
+        all_profile_data = []
-        for name in profile_names:
+        all_pending_downloads = []
-            pipe.hgetall(self._profile_key(name))
+        batch_size = 500
-        all_profile_data = pipe.execute()
+        
-
+        for i in range(0, len(profile_names), batch_size):
-        # Also fetch pending download counts for all profiles
+            batch_names = profile_names[i:i + batch_size]
-        pipe = self.redis.pipeline()
+            
-        for name in profile_names:
+            # Fetch profile hashes
-            pipe.get(self._pending_downloads_key(name))
+            pipe = self.redis.pipeline()
-        all_pending_downloads = pipe.execute()
+            for name in batch_names:
                pipe.hgetall(self._profile_key(name))
            all_profile_data.extend(pipe.execute())
            # Fetch pending download counts
            pipe = self.redis.pipeline()
            for name in batch_names:
                pipe.get(self._pending_downloads_key(name))
            all_pending_downloads.extend(pipe.execute())
        # --- End batch fetch ---
        numeric_fields = ['created_at', 'last_used', 'success_count', 'failure_count',
                         'tolerated_error_count', 'download_count', 'download_error_count',
@ -667,27 +676,31 @@ class ProfileManager:
        if not proxy_urls:
            return {}
        pipe = self.redis.pipeline()
        for proxy_url in proxy_urls:
            pipe.hgetall(self._proxy_state_key(proxy_url))
        results = pipe.execute()
        states = {}
-        for i, data in enumerate(results):
+        batch_size = 500
-            proxy_url = proxy_urls[i]
+        
-            if data:
+        for i in range(0, len(proxy_urls), batch_size):
-                # Convert numeric fields
+            batch_urls = proxy_urls[i:i + batch_size]
-                for field in ['rest_until', 'work_start_timestamp']:
+            
-                    if field in data:
+            pipe = self.redis.pipeline()
-                        try:
+            for proxy_url in batch_urls:
-                            data[field] = float(data[field])
+                pipe.hgetall(self._proxy_state_key(proxy_url))
-                        except (ValueError, TypeError):
+            results = pipe.execute()
-                            data[field] = 0.0
+            
-                states[proxy_url] = data
+            for j, data in enumerate(results):
-            else:
+                proxy_url = batch_urls[j]
-                # Default to ACTIVE if no state is found
+                if data:
-                states[proxy_url] = {'state': self.STATE_ACTIVE, 'rest_until': 0.0, 'work_start_timestamp': 0.0}
+                    # Convert numeric fields
                    for field in ['rest_until', 'work_start_timestamp']:
                        if field in data:
                            try:
                                data[field] = float(data[field])
                            except (ValueError, TypeError):
                                data[field] = 0.0
                    states[proxy_url] = data
                else:
                    # Default to ACTIVE if no state is found
                    states[proxy_url] = {'state': self.STATE_ACTIVE, 'rest_until': 0.0, 'work_start_timestamp': 0.0}
        return states
--- a/ytops_client-source/ytops_client/stress_policy/arg_parser.py
+++ b/ytops_client-source/ytops_client/stress_policy/arg_parser.py
@ -129,7 +129,8 @@ Overridable Policy Parameters via --set:
  download_policy.proxy_rename            Regex substitution for the proxy URL (e.g., 's/old/new/').
  download_policy.pause_before_download_seconds Pause for N seconds before starting each download attempt.
  download_policy.continue_downloads      Enable download continuation (true/false).
-  download_policy.cleanup                 After success: for native downloaders, rename and truncate file to 0 bytes; for 'aria2c_rpc', remove file(s) from filesystem.
+  download_policy.cleanup                 After success, replace downloaded media file with a zero-byte '.empty' file.
  download_policy.run_ffprobe             After success, run ffprobe on the media file and save stream info to a .ffprobe.json file.
  download_policy.extra_args              A string of extra arguments for the download script (e.g., "--limit-rate 5M").
  download_policy.sleep_per_proxy_seconds Cooldown in seconds between downloads on the same proxy.
  download_policy.rate_limits.per_proxy.max_requests Max downloads for a single proxy in a time period.
@ -195,6 +196,9 @@ Overridable Policy Parameters via --set:
                                          'If a path is provided, cleans that directory. '
                                          'If used without a path, cleans the directory specified in download_policy.output_dir or direct_docker_cli_policy.docker_host_download_path. '
                                          'If no output_dir is set, it fails.')
    download_util_group.add_argument('--run-ffprobe', action=argparse.BooleanOptionalAction, default=None,
                                     help='After a successful download, run ffprobe to generate a stream info JSON file. '
                                          'Overrides download_policy.run_ffprobe.')
    download_util_group.add_argument('--reset-local-cache-folder', nargs='?', const='.', default=None,
                                     help="Before running, delete the contents of the local cache folder used by direct_docker_cli mode. "
                                          "The cache folder is defined by 'direct_docker_cli_policy.docker_host_cache_path' in the policy. "
--- a/ytops_client-source/ytops_client/stress_policy/workers.py
+++ b/ytops_client-source/ytops_client/stress_policy/workers.py
--- a/ytops_client-source/ytops_client/stress_policy_tool.py
+++ b/ytops_client-source/ytops_client/stress_policy_tool.py
--- a/ytops_client-source/ytops_client/task_generator_tool.py
+++ b/ytops_client-source/ytops_client/task_generator_tool.py
@ -56,7 +56,6 @@ def add_task_generator_parser(subparsers):
    gen_parser.add_argument('--formats', required=True, help='A comma-separated list of format IDs or selectors to generate tasks for (e.g., "18,140,bestvideo").')
    gen_parser.add_argument('--live', action='store_true', help='Run continuously, watching the source directory for new files.')
    gen_parser.add_argument('--interval-seconds', type=int, default=10, help='When in --live mode, how often to scan for new files.')
    gen_parser.add_argument('--dummy', action='store_true', help='Generate dummy task files without reading info.json content. Useful for testing download workers.')
    gen_parser.add_argument('--verbose', action='store_true', help='Enable verbose logging.')
    reset_parser = generate_subparsers.add_parser(
@ -124,29 +123,11 @@ def main_task_generator(args):
    return 1
-def _generate_tasks_for_file(source_file, output_dir, formats_to_generate, is_dummy_mode):
+def _generate_tasks_for_file(source_file, output_dir, formats_to_generate):
    """Helper function to generate task files for a single source info.json."""
    try:
-        info_json_content = {}
+        with open(source_file, 'r', encoding='utf-8') as f:
-        if is_dummy_mode:
+            info_json_content = json.load(f)
            # In dummy mode, we don't read the file content. We create a minimal structure.
            # We try to parse the filename to get video_id and profile_name for organization.
            # Example filename: {video_id}-{profile_name}-{proxy}.info.json
            parts = source_file.stem.split('-')
            video_id = parts[0] if parts else 'dummy_video'
            profile_name = next((p for p in parts if p.startswith('user')), None)
            info_json_content = {
                'id': video_id,
                '_dummy': True,
                '_ytops_metadata': {
                    'profile_name': profile_name
                }
            }
            logger.debug(f"DUMMY MODE: Generating tasks for source file: {source_file.name}")
        else:
            with open(source_file, 'r', encoding='utf-8') as f:
                info_json_content = json.load(f)
    except (IOError, json.JSONDecodeError) as e:
        logger.warning(f"Skipping file '{source_file.name}' due to read/parse error: {e}")
        return 0
@ -237,7 +218,7 @@ def _main_task_generator_generate(args):
        total_tasks_generated = 0
        for source_file in source_files:
-            tasks_for_file = _generate_tasks_for_file(source_file, output_dir, formats_to_generate, args.dummy)
+            tasks_for_file = _generate_tasks_for_file(source_file, output_dir, formats_to_generate)
            total_tasks_generated += tasks_for_file
        logger.info(f"Successfully generated {total_tasks_generated} new task file(s) in '{output_dir}'.")
@ -258,7 +239,7 @@ def _main_task_generator_generate(args):
                logger.info(f"Live mode: Found {len(source_files)} source file(s) to process.")
                for source_file in source_files:
                    if shutdown_event: break
-                    tasks_for_file = _generate_tasks_for_file(source_file, output_dir, formats_to_generate, args.dummy)
+                    tasks_for_file = _generate_tasks_for_file(source_file, output_dir, formats_to_generate)
                    total_tasks_generated += tasks_for_file
            if shutdown_event: break