Update ansible works with auth and downloads on dl machine, while enforcer on master

2026-01-11 01:21:25 +03:00 · 2026-01-11 01:21:25 +03:00 · 7a514ab8ce
commit 7a514ab8ce
parent db78171281
45 changed files with 3377 additions and 471 deletions
--- a/ansible/group_vars/all/vault.yml
+++ b/ansible/group_vars/all/vault.yml
@ -1,7 +1,5 @@
 vault_redis_password: "rOhTAIlTFFylXsjhqwxnYxDChFc"
-vault_postgres_password: "pgdb_pwd_A7bC2xY9zE1wV5uP"
+vault_redis_port: 52909
 vault_airflow_admin_password: "2r234sdfrt3q454arq45q355"
 vault_flower_password: "dO4eXm7UkF81OdMvT8E2tIKFtPYPCzyzwlcZ4RyOmCsmG4qzrNFqM5sNTOT9"
 vault_vnc_password: "vnc_pwd_Z5xW8cV2bN4mP7lK"
 vault_ss_password_1: "UCUAR7vRO/u9Zo71nfA13c+/b1MCiJpfZJo+EmEBCfA="
 vault_ss_password_2: "tgtQcfjJp/A3F01g4woO0bEQoxij3CAOK/iR1OTPuF4="
--- a/ansible/manage-processes-tasks.yml
+++ b/ansible/manage-processes-tasks.yml
@ -0,0 +1,102 @@
 ---
 # This file is managed by include_tasks and is not a standalone playbook.
 # It provides generic tasks for managing a process within a tmux session.
 # Required variables:
 #   - tmux_session_name: The name of the tmux session.
 #   - working_dir: The directory where the command should be executed.
 #   - command_to_run: The command to execute inside the tmux session.
 #   - process_grep_pattern: A regex pattern to identify the process for pkill/status check.
 #
 # Optional variables (control flags):
 #   - start_process: (bool) Set to true to start the process. Default: false.
 #   - stop_process: (bool) Set to true to stop the process. Default: false.
 #   - check_status: (bool) Set to true to check and display the process status. Default: false.
 - name: Ensure tmux is installed
  ansible.builtin.apt:
    name: tmux
    state: present
  become: yes
  run_once: true # No need to run this on every include
 - name: Stop existing tmux session
  ansible.builtin.shell:
    cmd: "tmux kill-session -t {{ tmux_session_name }} 2>/dev/null || true"
  when: stop_process | default(false) | bool
 - name: Kill any orphaned processes
  ansible.builtin.shell:
    cmd: |
      PIDS=$(ps aux | grep -E "{{ process_grep_pattern }}" | grep -v "grep" | awk '{print $2}')
      if [ -n "$PIDS" ]; then
        kill $PIDS >/dev/null 2>&1 || true
        sleep 0.5
        kill -9 $PIDS >/dev/null 2>&1 || true
      fi
  changed_when: false
  when: stop_process | default(false) | bool
 - name: Stop existing process before starting (makes start idempotent)
  block:
    - name: Stop existing tmux session
      ansible.builtin.shell:
        cmd: "tmux kill-session -t {{ tmux_session_name }} 2>/dev/null || true"
    - name: Kill any orphaned processes
      ansible.builtin.shell:
        cmd: |
          PIDS=$(ps aux | grep -E "{{ process_grep_pattern }}" | grep -v "grep" | awk '{print $2}')
          if [ -n "$PIDS" ]; then
            kill $PIDS >/dev/null 2>&1 || true
            sleep 0.5
            kill -9 $PIDS >/dev/null 2>&1 || true
          fi
      changed_when: false
  when: start_process | default(false) | bool
 - name: Ensure client script is executable
  ansible.builtin.file:
    path: "{{ working_dir }}/bin/ytops-client"
    mode: "a+x"
  when: start_process | default(false) | bool
 - name: Display command for tmux session
  ansible.builtin.debug:
    msg: "Command for tmux session '{{ tmux_session_name }}': {{ command_to_run }}"
  when: start_process | default(false) | bool
 - name: Start process in tmux session
  ansible.builtin.shell:
    cmd: |
      cd {{ working_dir }}
      # The command is wrapped with a final sleep to keep the tmux session alive for debugging even if the process fails.
      # The actual command is run in a subshell (...) to prevent 'exec' from terminating the parent shell.
      tmux new-session -d -s {{ tmux_session_name }} \
        "if [ -f .env ]; then set -a; . ./.env; set +a; fi; \
        COMMAND_TO_RUN='{{ command_to_run | replace("'", "'\\''") }}'; \
        echo '>>> Running command:'; \
        echo "\$COMMAND_TO_RUN"; \
        echo '---'; \
        (eval "\$COMMAND_TO_RUN") ; \
        echo; echo '---'; echo 'Process exited. This tmux session will remain open for debugging.'; echo 'You can attach with: tmux attach -t {{ tmux_session_name }}'; \
        sleep 3600"
  when: start_process | default(false) | bool
 - name: Check process status
  ansible.builtin.shell:
    cmd: |
      echo "Process status on {{ inventory_hostname }} for session '{{ tmux_session_name }}':"
      if tmux has-session -t {{ tmux_session_name }} 2>/dev/null; then
        echo "  - Tmux session '{{ tmux_session_name }}' is running"
        ps aux | grep -E "{{ process_grep_pattern }}" | grep -v grep || echo "  - No matching process found"
      else
        echo "  - Tmux session '{{ tmux_session_name }}' is NOT running"
      fi
  register: status_check
  changed_when: false
  when: check_status | default(false) | bool
 - name: Display status
  ansible.builtin.debug:
    msg: "{{ status_check.stdout_lines }}"
  when: check_status | default(false) | bool
--- a/ansible/playbook-README.md
+++ b/ansible/playbook-README.md
@ -0,0 +1,219 @@
 # Ansible Playbooks Documentation
 This document provides an overview of all available playbooks, their purpose, and how to use them operationally.
 ## Table of Contents
 1. [Deployment Workflow](#deployment-workflow)
 2. [Initial Setup Playbooks](#initial-setup-playbooks)
 3. [Operational Playbooks](#operational-playbooks)
 4. [Monitoring and Inspection](#monitoring-and-inspection)
 5. [Common Operations](#common-operations)
 ## Deployment Workflow
 The typical deployment workflow follows these steps:
 1. **Initial Setup**: Configure machines, deploy proxies, install dependencies
 2. **Master Setup**: Configure Redis, MinIO, and other infrastructure services
 3. **Worker Setup**: Configure auth generators and download simulators
 4. **Operational Management**: Start/stop processes, monitor status, cleanup profiles
 ## Initial Setup Playbooks
 For a complete, automated installation on fresh nodes, you can use the main `full-install` playbook:
 ```bash
 # Run the complete installation process on all nodes
 ansible-playbook ansible/playbook-full-install.yml -i ansible/inventory.green.ini
 ```
 The steps below are for running each part of the installation manually.
 ### Base Installation
 ```bash
 # Deploy base system requirements to all nodes
 ansible-playbook ansible/playbook-base-system.yml -i ansible/inventory.green.ini
 ```
 ### Proxy Deployment
 ```bash
 # Deploy shadowsocks proxies to all nodes (as defined in cluster config)
 ansible-playbook ansible/playbook-proxies.yml -i ansible/inventory.green.ini
 ```
 ### Code, Environment, and Dependencies
 ```bash
 # Sync code to all nodes
 ansible-playbook ansible/playbook-stress-sync-code.yml -i ansible/inventory.green.ini
 # Generate .env files for all nodes
 ansible-playbook ansible/playbook-stress-generate-env.yml -i ansible/inventory.green.ini
 # Install dependencies on all nodes
 ansible-playbook ansible/playbook-stress-install-deps.yml -i ansible/inventory.green.ini
 ```
 ## Operational Playbooks
 ### Master Node Services
 Redis and MinIO are deployed as Docker containers during the initial setup (`playbook-full-install.yml`).
 To start the policy enforcer and monitoring tmux sessions on the master node:
 ```bash
 # Start policy enforcer and monitoring on master
 ansible-playbook ansible/playbook-stress-manage-processes.yml -i ansible/inventory.green.ini -e "start_enforcer=true start_monitor=true"
 ```
 To stop processes on the master node:
 ```bash
 # Stop ONLY the policy enforcer
 ansible-playbook ansible/playbook-stress-manage-processes.yml -i ansible/inventory.green.ini -e "stop_enforcer=true"
 # Stop ONLY the monitor
 ansible-playbook ansible/playbook-stress-manage-processes.yml -i ansible/inventory.green.ini -e "stop_monitor=true"
 # Stop BOTH the enforcer and monitor
 ansible-playbook ansible/playbook-stress-manage-processes.yml -i ansible/inventory.green.ini -e "stop_sessions=true"
 ```
 ### Worker Node Processes
 ```bash
 # Start auth generators and download simulators on all workers
 ansible-playbook ansible/playbook-stress-lifecycle.yml -i ansible/inventory.green.ini -e "action=start"
 # Stop all processes on workers
 ansible-playbook ansible/playbook-stress-lifecycle.yml -i ansible/inventory.green.ini -e "action=stop"
 # Check status of all worker processes
 ansible-playbook ansible/playbook-stress-lifecycle.yml -i ansible/inventory.green.ini -e "action=status"
 ```
 ### Profile Management
 ```bash
 # Clean up all profiles
 ansible-playbook ansible/playbook-stress-control.yml -i ansible/inventory.green.ini -e "action=cleanup-profiles"
 # Clean up specific profile prefix
 ansible-playbook ansible/playbook-stress-control.yml -i ansible/inventory.green.ini -e "action=cleanup-profiles" -e "profile_prefix=user1"
 ```
 ## Monitoring and Inspection
 ### Status Checks
 ```bash
 # Check status of all processes on all nodes
 ansible-playbook ansible/playbook-stress-control.yml -i ansible/inventory.green.ini -e "action=status"
 # Check enforcer status on master
 ansible-playbook ansible/playbook-stress-control.yml -i ansible/inventory.green.ini -e "action=check-enforcer"
 # Check profile status
 ansible-playbook ansible/playbook-stress-control.yml -i ansible/inventory.green.ini -e "action=profile-status"
 ```
 ### Log Inspection
 ```bash
 # View tmux session output on a specific node
 ssh user@hostname
 tmux attach -t stress-auth-user1,user2  # For auth generator
 tmux attach -t stress-download-user1,user2  # For download simulator
 tmux attach -t stress-enforcer  # For policy enforcer on master
 ```
 ## Common Operations
 ### Code and Policy Updates
 First, sync your local changes to the jump host from your development machine:
 ```bash
 # Sync project to jump host
 ./tools/sync-to-jump.sh
 ```
 Then, from the jump host, you can sync code or policies to the cluster nodes:
 ```bash
 # Sync all application code (Python sources, scripts, etc.)
 ansible-playbook ansible/playbook-stress-sync-code.yml -i ansible/inventory.green.ini
 # Sync only policies and CLI configs
 ansible-playbook ansible/playbook-stress-sync-configs.yml -i ansible/inventory.green.ini
 ```
 ### Adding a New Worker
 1. Update `cluster.green.yml` with the new worker definition:
   ```yaml
   workers:
     new-worker:
       ip: x.x.x.x
       port: 22
       profile_prefixes:
         - "user4"
       proxies:
         - "sslocal-rust-1090"
   ```
 2. Regenerate inventory:
   ```bash
   ./tools/generate-inventory.py cluster.green.yml
   ```
 3. Run the full installation playbook, limiting it to the new worker:
   ```bash
   ansible-playbook ansible/playbook-full-install.yml -i ansible/inventory.green.ini --limit new-worker
   ```
 4. Start processes on the new worker:
   ```bash
   ansible-playbook ansible/playbook-stress-lifecycle.yml -i ansible/inventory.green.ini -e "action=start" --limit new-worker
   ```
 ### Removing a Worker
 1. Stop all processes on the worker:
   ```bash
   ansible-playbook ansible/playbook-stress-lifecycle.yml -i ansible/inventory.green.ini -e "action=stop" --limit worker-to-remove
   ```
 2. Remove the worker from `cluster.green.yml`
 3. Regenerate inventory:
   ```bash
   ./tools/generate-inventory.py cluster.green.yml
   ```
 ### Emergency Stop All
 ```bash
 # Stop all processes on all nodes
 ansible-playbook ansible/playbook-stress-control.yml -i ansible/inventory.green.ini -e "action=stop-all"
 ```
 ### Stopping Specific Nodes
 To stop processes on a specific worker or group of workers, you can use the `stop-nodes` action and limit the playbook run.
 ```bash
 # Stop all processes on a single worker (e.g., dl003)
 ansible-playbook ansible/playbook-stress-control.yml -i ansible/inventory.green.ini -e "action=stop-nodes" --limit dl003
 # Stop all processes on all workers
 ansible-playbook ansible/playbook-stress-control.yml -i ansible/inventory.green.ini -e "action=stop-nodes" --limit workers
 ```
 ### Restart Enforcer and Monitoring
 ```bash
 # Restart monitoring and enforcer on master
 ansible-playbook ansible/playbook-stress-control.yml -i ansible/inventory.green.ini -e "action=restart-monitoring"
 ```
--- a/ansible/playbook-base-system.yml
+++ b/ansible/playbook-base-system.yml
@ -3,9 +3,15 @@
  hosts: all
  gather_facts: yes
  vars_files:
    - "group_vars/all/generated_vars.stress.yml" # Assumes generate-inventory.py was run with cluster.stress.yml
    - "group_vars/all/vault.yml"
  pre_tasks:
    - name: Set inventory_env fact
      ansible.builtin.set_fact:
        inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
    - name: Load environment-specific variables
      ansible.builtin.include_vars: "{{ item }}"
      with_fileglob:
        - "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
    - name: Announce base system setup
      ansible.builtin.debug:
        msg: "Starting base system setup on {{ inventory_hostname }}"
--- a/ansible/playbook-docker-services-setup.yml
+++ b/ansible/playbook-docker-services-setup.yml
@ -20,12 +20,19 @@
  hosts: all
  gather_facts: no
  vars_files:
    - "group_vars/all/generated_vars.stress.yml"
    - "group_vars/all/vault.yml"
  pre_tasks:
    - name: Set inventory_env fact
      ansible.builtin.set_fact:
        inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
    - name: Load environment-specific variables
      ansible.builtin.include_vars: "{{ item }}"
      with_fileglob:
        - "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
  tasks:
    - name: Define base directory for node
      ansible.builtin.set_fact:
-        base_dir: "{{ airflow_master_dir if inventory_hostname in groups['airflow_master'] else airflow_worker_dir }}"
+        base_dir: "{{ airflow_master_dir if inventory_hostname in groups['master'] else airflow_worker_dir }}"
    - name: Ensure base directories and subdirectories exist
      ansible.builtin.file:
@ -41,12 +48,12 @@
        - "run/docker_mount/fetched_info_jsons"
      become: yes
- name: "PHASE 2.2: Import playbook to install Python dependencies"
+- name: "PHASE 2.2: Import playbook to sync local code"
  import_playbook: playbook-stress-install-deps.yml
 - name: "PHASE 2.3: Import playbook to sync local code"
  import_playbook: playbook-stress-sync-code.yml
 - name: "PHASE 2.3: Import playbook to install Python dependencies"
  import_playbook: playbook-stress-install-deps.yml
 # -------------------------------------------------------------------------------------------------
 # PHASE 3: Environment and Service Configuration
 # Generates the .env file and starts the role-specific services on master and workers.
@ -55,11 +62,17 @@
  import_playbook: playbook-stress-generate-env.yml
 - name: "PHASE 3.2: Master Node Services Setup"
-  hosts: airflow_master
+  hosts: master
  gather_facts: no
  vars:
    inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
  vars_files:
    - "group_vars/all/generated_vars.stress.yml"
    - "group_vars/all/vault.yml"
  pre_tasks:
    - name: Load environment-specific variables
      ansible.builtin.include_vars: "{{ item }}"
      with_fileglob:
        - "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
  tasks:
    - name: Configure system performance and kernel settings
      ansible.builtin.copy:
@ -94,6 +107,16 @@
        mode: '0644'
      become: yes
    - name: Stop and remove existing containers before starting services
      ansible.builtin.shell:
        cmd: |
          docker ps -a --filter "name=bgutil-provider" --format "{{{{.ID}}}}" | xargs -r docker rm -f
          docker ps -a --filter "name=redis-stress" --format "{{{{.ID}}}}" | xargs -r docker rm -f
          docker ps -a --filter "name=minio-stress" --format "{{{{.ID}}}}" | xargs -r docker rm -f
      become: yes
      changed_when: false
      ignore_errors: yes
    - name: Start master services (Redis, MinIO)
      community.docker.docker_compose_v2:
        project_src: "{{ airflow_master_dir }}"
@ -148,16 +171,63 @@
      environment:
        HOME: "/home/{{ ansible_user }}"
- name: "PHASE 3.3: Shared Storage Setup (s3fs)"
+- name: "PHASE 3.2a: Worker Node Services Setup"
-  hosts: airflow_master:airflow_workers
+  hosts: workers
  gather_facts: no
  vars:
    inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
  vars_files:
    - "group_vars/all/generated_vars.stress.yml"
    - "group_vars/all/vault.yml"
  pre_tasks:
    - name: Load environment-specific variables
      ansible.builtin.include_vars: "{{ item }}"
      with_fileglob:
        - "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
  tasks:
    - name: Template Docker Compose file for worker services
      ansible.builtin.template:
        src: templates/docker-compose.stress-master.j2
        dest: "{{ airflow_worker_dir }}/docker-compose.stress.yml"
        owner: "{{ ansible_user }}"
        group: "{{ deploy_group }}"
        mode: '0644'
      become: yes
    - name: Stop and remove existing containers before starting services
      ansible.builtin.shell:
        cmd: |
          docker ps -a --filter "name=bgutil-provider" --format "{{{{.ID}}}}" | xargs -r docker rm -f
          docker ps -a --filter "name=redis-stress" --format "{{{{.ID}}}}" | xargs -r docker rm -f
          docker ps -a --filter "name=minio-stress" --format "{{{{.ID}}}}" | xargs -r docker rm -f
      become: yes
      changed_when: false
      ignore_errors: yes
    - name: Start worker services
      community.docker.docker_compose_v2:
        project_src: "{{ airflow_worker_dir }}"
        files:
          - docker-compose.stress.yml
        state: present
        remove_orphans: true
      become: yes
 - name: "PHASE 3.3: Shared Storage Setup (s3fs)"
  hosts: master:workers
  gather_facts: no
  vars:
    inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
  vars_files:
    - "group_vars/all/vault.yml"
  pre_tasks:
    - name: Load environment-specific variables
      ansible.builtin.include_vars: "{{ item }}"
      with_fileglob:
        - "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
  tasks:
    - name: Define base directory for node
      ansible.builtin.set_fact:
-        base_dir: "{{ airflow_master_dir if inventory_hostname in groups['airflow_master'] else airflow_worker_dir }}"
+        base_dir: "{{ airflow_master_dir if inventory_hostname in groups['master'] else airflow_worker_dir }}"
    - name: Mount S3 buckets via s3fs
      block:
@ -176,14 +246,124 @@
            mode: '0600'
          become: yes
        - name: Check if mount points are already mounted
          ansible.builtin.shell:
            cmd: "mount | grep -q '{{ item.path }}'"
          loop:
            - { bucket: 'stress-inputs', path: '{{ base_dir }}/inputfiles' }
            - { bucket: 'stress-jsons', path: '{{ base_dir }}/run/docker_mount/fetched_info_jsons' }
          register: mount_check
          changed_when: false
          failed_when: false
        - name: Ensure mount point directories exist (only if not mounted)
          ansible.builtin.file:
            path: "{{ item.item.path }}"
            state: directory
            owner: "{{ ansible_user }}"
            group: "{{ deploy_group }}"
            mode: '0755'
          loop: "{{ mount_check.results }}"
          when: item.rc != 0
          become: yes
        - name: Mount S3 buckets for stress testing
          ansible.posix.mount:
            src: "s3fs#{{ item.bucket }}"
            path: "{{ item.path }}"
            fstype: fuse
-            opts: "_netdev,allow_other,use_path_request_style,nonempty,url=http://{{ hostvars[groups['airflow_master'][0]].ansible_host }}:9000,passwd_file=/home/{{ ansible_user }}/.passwd-s3fs"
+            opts: "_netdev,allow_other,use_path_request_style,nonempty,url=http://{{ hostvars[groups['master'][0]].ansible_host }}:9000,passwd_file=/home/{{ ansible_user }}/.passwd-s3fs"
            state: mounted
          loop:
            - { bucket: 'stress-inputs', path: '{{ base_dir }}/inputfiles' }
            - { bucket: 'stress-jsons', path: '{{ base_dir }}/run/docker_mount/fetched_info_jsons' }
          become: yes
 - name: "PHASE 3.4: Import playbook to initialize Redis profiles"
  import_playbook: playbook-stress-init-redis.yml
 # -------------------------------------------------------------------------------------------------
 # PHASE 4: Monitoring and Management Services Setup
 # Starts monitoring, enforcer, and simulation processes.
 # -------------------------------------------------------------------------------------------------
 - name: "PHASE 4.1: Import playbook to manage monitoring and enforcer processes"
  import_playbook: playbook-stress-manage-processes.yml
 - name: "PHASE 4.2: Start monitoring and enforcer services"
  hosts: master
  gather_facts: no
  vars:
    inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
    start_monitor: true
    start_enforcer: true
  vars_files:
    - "group_vars/all/vault.yml"
  pre_tasks:
    - name: Load environment-specific variables
      ansible.builtin.include_vars: "{{ item }}"
      with_fileglob:
        - "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
  tasks:
    - name: Ensure tmux is installed
      ansible.builtin.apt:
        name: tmux
        state: present
      become: yes
    - name: Start profile monitoring in tmux session
      ansible.builtin.shell:
        cmd: |
          cd {{ airflow_master_dir }}
          tmux new-session -d -s stress-monitor \
            "set -a && . ./.env && set +a && \
            ./bin/ytops-client profile list \
              --auth-env sim_auth \
              --download-env sim_download \
              --live \
              --no-blink \
              --show-reasons"
      when: start_monitor | default(false) | bool
    - name: Start policy enforcer in tmux session
      ansible.builtin.shell:
        cmd: |
          cd {{ airflow_master_dir }}
          tmux new-session -d -s stress-enforcer \
            "set -a && . ./.env && set +a && \
            ./bin/ytops-client policy-enforcer \
              --policy policies/8_unified_simulation_enforcer.yaml \
              --live"
      when: start_enforcer | default(false) | bool
    - name: List active tmux sessions
      ansible.builtin.shell:
        cmd: tmux list-sessions
      register: tmux_sessions
      changed_when: false
    - name: Display active sessions
      ansible.builtin.debug:
        msg: "Active tmux sessions: {{ tmux_sessions.stdout_lines }}"
 # -------------------------------------------------------------------------------------------------
 # PHASE 5: Simulation Workload Generation (Optional - can be run manually)
 # These playbooks are available but not automatically started by default.
 # -------------------------------------------------------------------------------------------------
 - name: "PHASE 5.1: Note about simulation workload generation"
  hosts: localhost
  gather_facts: no
  tasks:
    - name: Display note about simulation playbooks
      ansible.builtin.debug:
        msg: |
          Simulation workload generation playbooks are available:
          - ansible/playbook-stress-auth-generator.yml
          - ansible/playbook-stress-download-simulation.yml
          To start simulations manually, run:
          ansible-playbook ansible/playbook-stress-auth-generator.yml \
            -e "start_generator=true dummy_batch=true auth_min_seconds=2 auth_max_seconds=3"
          ansible-playbook ansible/playbook-stress-download-simulation.yml \
            -e "start_download=true profile_prefix=user1 download_min_seconds=2 download_max_seconds=5" \
            --limit airflow_workers[0]
--- a/ansible/playbook-full-install.yml
+++ b/ansible/playbook-full-install.yml
@ -0,0 +1,404 @@
 ---
 # This playbook provides a complete installation for fresh nodes.
 # It can install either master or worker roles, or both on the same machine.
 #
 # Usage examples:
 #   # Install everything on all nodes
 #   ansible-playbook ansible/playbook-full-install.yml
 #
 #   # Install only on workers
 #   ansible-playbook ansible/playbook-full-install.yml --limit workers
 #
 #   # Install only on master
 #   ansible-playbook ansible/playbook-full-install.yml --limit master
 #
 #   # Install both roles on a single machine
 #   ansible-playbook ansible/playbook-full-install.yml --limit specific-host -e "install_master=true install_worker=true"
 # -------------------------------------------------------------------------------------------------
 # PHASE 1: Base System Configuration
 # Ensures all nodes have the necessary base packages, user configurations, and Docker installed.
 # -------------------------------------------------------------------------------------------------
 - name: "PHASE 1: Import base system setup playbook"
  import_playbook: playbook-base-system.yml
 # -------------------------------------------------------------------------------------------------
 # PHASE 2: Generate Environment Configuration
 # Creates .env files needed by all subsequent steps
 # -------------------------------------------------------------------------------------------------
 - name: "PHASE 2: Generate .env configuration"
  import_playbook: playbook-stress-generate-env.yml
 # -------------------------------------------------------------------------------------------------
 # PHASE 3: Docker Network Setup
 # Ensures the shared Docker network exists before building containers
 # -------------------------------------------------------------------------------------------------
 - name: "PHASE 3: Ensure Docker network exists"
  hosts: all
  gather_facts: no
  vars_files:
    - "group_vars/all/vault.yml"
  pre_tasks:
    - name: Set inventory_env fact
      ansible.builtin.set_fact:
        inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
    - name: Load environment-specific variables
      ansible.builtin.include_vars: "{{ item }}"
      with_fileglob:
        - "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
  tasks:
    - name: Create shared Docker network
      community.docker.docker_network:
        name: "{{ docker_network_name }}"
        driver: bridge
      become: yes
 # -------------------------------------------------------------------------------------------------
 # PHASE 4: Build yt-dlp Docker Image
 # Builds the yt-dlp container from bin/ directory
 # -------------------------------------------------------------------------------------------------
 - name: "PHASE 4: Build yt-dlp Docker image"
  hosts: all
  gather_facts: no
  vars_files:
    - "group_vars/all/vault.yml"
  pre_tasks:
    - name: Set inventory_env fact
      ansible.builtin.set_fact:
        inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
    - name: Load environment-specific variables
      ansible.builtin.include_vars: "{{ item }}"
      with_fileglob:
        - "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
  tasks:
    - name: Define base directory for node
      ansible.builtin.set_fact:
        base_dir: "{{ airflow_master_dir if (inventory_hostname in groups['master'] and not (install_worker | default(false) | bool)) else airflow_worker_dir }}"
    - name: Ensure bin directory exists
      ansible.builtin.file:
        path: "{{ base_dir }}/bin"
        state: directory
        owner: "{{ ansible_user }}"
        group: "{{ deploy_group }}"
        mode: '0755'
      become: yes
    - name: Check if Dockerfile exists in bin directory
      ansible.builtin.stat:
        path: "{{ base_dir }}/bin/Dockerfile"
      register: dockerfile_stat
    - name: Build yt-dlp Docker image if Dockerfile exists
      community.docker.docker_image:
        name: yt-dlp-custom
        tag: latest
        source: build
        build:
          path: "{{ base_dir }}/bin"
          pull: yes
        state: present
        force_source: yes
      become: yes
      when: dockerfile_stat.stat.exists
    - name: Display message if Dockerfile not found
      ansible.builtin.debug:
        msg: "Dockerfile not found at {{ base_dir }}/bin/Dockerfile - skipping yt-dlp image build"
      when: not dockerfile_stat.stat.exists
 # -------------------------------------------------------------------------------------------------
 # PHASE 5: Sync Code and Install Dependencies
 # Copies application code and installs Python dependencies
 # -------------------------------------------------------------------------------------------------
 - name: "PHASE 5.1: Sync application code"
  import_playbook: playbook-stress-sync-code.yml
 - name: "PHASE 5.2: Install Python dependencies"
  import_playbook: playbook-stress-install-deps.yml
 # -------------------------------------------------------------------------------------------------
 # PHASE 6: Deploy Shadowsocks Proxies
 # Configures and starts proxy services
 # -------------------------------------------------------------------------------------------------
 - name: "PHASE 6: Deploy proxy services"
  import_playbook: playbook-proxies.yml
 # -------------------------------------------------------------------------------------------------
 # PHASE 7: Install bgutils
 # Note: Currently bgutils is deployed on master via docker-compose
 # -------------------------------------------------------------------------------------------------
 - name: "PHASE 7: Install bgutils"
  import_playbook: playbook-install-bgutils.yml
 # -------------------------------------------------------------------------------------------------
 # PHASE 8: Master-Specific Services Setup
 # Starts Redis, MinIO, and other master-only services
 # -------------------------------------------------------------------------------------------------
 - name: "PHASE 8: Master Node Services Setup"
  hosts: master
  gather_facts: no
  vars:
    inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
  vars_files:
    - "group_vars/all/vault.yml"
  pre_tasks:
    - name: Load environment-specific variables
      ansible.builtin.include_vars: "{{ item }}"
      with_fileglob:
        - "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
  tasks:
    - name: Configure system performance and kernel settings
      ansible.builtin.copy:
        src: "configs/etc/sysctl.d/99-system-limits.conf"
        dest: "/etc/sysctl.d/99-system-limits.conf"
        owner: root
        group: root
        mode: '0644'
      become: yes
      register: sysctl_config_copy
    - name: Apply sysctl settings
      ansible.builtin.command: sysctl --system
      become: yes
      when: sysctl_config_copy.changed
    - name: Ensure MinIO data directory exists
      ansible.builtin.file:
        path: "{{ airflow_master_dir }}/minio-data"
        state: directory
        owner: "{{ ansible_user }}"
        group: "{{ deploy_group }}"
        mode: '0755'
      become: yes
    - name: Template Docker Compose file for master services
      ansible.builtin.template:
        src: templates/docker-compose.stress-master.j2
        dest: "{{ airflow_master_dir }}/docker-compose.stress.yml"
        owner: "{{ ansible_user }}"
        group: "{{ deploy_group }}"
        mode: '0644'
      become: yes
    - name: Stop and remove existing containers before starting services
      ansible.builtin.shell:
        cmd: |
          docker ps -a --filter "name=bgutil-provider" --format "{{{{.ID}}}}" | xargs -r docker rm -f
          docker ps -a --filter "name=redis-stress" --format "{{{{.ID}}}}" | xargs -r docker rm -f
          docker ps -a --filter "name=minio-stress" --format "{{{{.ID}}}}" | xargs -r docker rm -f
      become: yes
      changed_when: false
      ignore_errors: yes
    - name: Start master services (Redis, MinIO)
      community.docker.docker_compose_v2:
        project_src: "{{ airflow_master_dir }}"
        files:
          - docker-compose.stress.yml
        state: present
        remove_orphans: true
      become: yes
    - name: Wait for MinIO service to be ready
      ansible.builtin.wait_for:
        host: "{{ hostvars[inventory_hostname].ansible_host }}"
        port: 9000
        delay: 5
        timeout: 60
      delegate_to: localhost
    - name: Download MinIO Client (mc) if not present
      ansible.builtin.command:
        cmd: wget https://dl.min.io/client/mc/release/linux-amd64/mc -O /usr/local/bin/mc
        creates: /usr/local/bin/mc
      become: yes
    - name: Ensure MinIO Client (mc) is executable
      ansible.builtin.file:
        path: /usr/local/bin/mc
        mode: '0755'
      become: yes
    - name: Configure mc alias for local MinIO
      ansible.builtin.command: >
        mc alias set local http://localhost:9000 {{ vault_s3_access_key_id }} {{ vault_s3_secret_access_key }}
      become: yes
      become_user: "{{ ansible_user }}"
      changed_when: false
      environment:
        HOME: "/home/{{ ansible_user }}"
    - name: Ensure S3 buckets exist in MinIO using mc
      ansible.builtin.command: >
        mc mb local/{{ item }}
      loop:
        - "stress-inputs"
        - "stress-jsons"
      become: yes
      become_user: "{{ ansible_user }}"
      register: mc_mb_result
      failed_when: >
        mc_mb_result.rc != 0 and
        "already exists" not in mc_mb_result.stderr
      changed_when: mc_mb_result.rc == 0
      environment:
        HOME: "/home/{{ ansible_user }}"
 # -------------------------------------------------------------------------------------------------
 # PHASE 9: Worker-Specific Services Setup
 # Starts worker-only services if needed
 # -------------------------------------------------------------------------------------------------
 - name: "PHASE 9: Worker Node Services Setup"
  hosts: workers
  gather_facts: no
  vars:
    inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
  vars_files:
    - "group_vars/all/vault.yml"
  pre_tasks:
    - name: Load environment-specific variables
      ansible.builtin.include_vars: "{{ item }}"
      with_fileglob:
        - "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
  tasks:
    - name: Template Docker Compose file for worker services
      ansible.builtin.template:
        src: templates/docker-compose.stress-master.j2
        dest: "{{ airflow_worker_dir }}/docker-compose.stress.yml"
        owner: "{{ ansible_user }}"
        group: "{{ deploy_group }}"
        mode: '0644'
      become: yes
    - name: Stop and remove existing containers before starting services
      ansible.builtin.shell:
        cmd: |
          docker ps -a --filter "name=bgutil-provider" --format "{{{{.ID}}}}" | xargs -r docker rm -f
          docker ps -a --filter "name=redis-stress" --format "{{{{.ID}}}}" | xargs -r docker rm -f
          docker ps -a --filter "name=minio-stress" --format "{{{{.ID}}}}" | xargs -r docker rm -f
      become: yes
      changed_when: false
      ignore_errors: yes
    - name: Start worker services
      community.docker.docker_compose_v2:
        project_src: "{{ airflow_worker_dir }}"
        files:
          - docker-compose.stress.yml
        state: present
        remove_orphans: true
      become: yes
 # -------------------------------------------------------------------------------------------------
 # PHASE 10: Shared Storage Setup (s3fs)
 # Mounts S3 buckets on all nodes
 # -------------------------------------------------------------------------------------------------
 - name: "PHASE 10: Shared Storage Setup (s3fs)"
  hosts: master:workers
  gather_facts: no
  vars:
    inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
  vars_files:
    - "group_vars/all/vault.yml"
  pre_tasks:
    - name: Load environment-specific variables
      ansible.builtin.include_vars: "{{ item }}"
      with_fileglob:
        - "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
  tasks:
    - name: Define base directory for node
      ansible.builtin.set_fact:
        base_dir: "{{ airflow_master_dir if inventory_hostname in groups['master'] else airflow_worker_dir }}"
    - name: Mount S3 buckets via s3fs
      block:
        - name: Install s3fs for mounting S3 buckets
          ansible.builtin.apt:
            name: s3fs
            state: present
          become: yes
        - name: Configure s3fs credentials
          ansible.builtin.copy:
            content: "{{ vault_s3_access_key_id }}:{{ vault_s3_secret_access_key }}"
            dest: "/home/{{ ansible_user }}/.passwd-s3fs"
            owner: "{{ ansible_user }}"
            group: "{{ deploy_group }}"
            mode: '0600'
          become: yes
        - name: Check if mount points are already mounted
          ansible.builtin.shell:
            cmd: "mount | grep -q '{{ item.path }}'"
          loop:
            - { bucket: 'stress-inputs', path: '{{ base_dir }}/inputfiles' }
            - { bucket: 'stress-jsons', path: '{{ base_dir }}/run/docker_mount/fetched_info_jsons' }
          register: mount_check
          changed_when: false
          failed_when: false
        - name: Ensure mount point directories exist (only if not mounted)
          ansible.builtin.file:
            path: "{{ item.item.path }}"
            state: directory
            owner: "{{ ansible_user }}"
            group: "{{ deploy_group }}"
            mode: '0755'
          loop: "{{ mount_check.results }}"
          when: item.rc != 0
          become: yes
        - name: Mount S3 buckets for stress testing
          ansible.posix.mount:
            src: "s3fs#{{ item.bucket }}"
            path: "{{ item.path }}"
            fstype: fuse
            opts: "_netdev,allow_other,use_path_request_style,nonempty,url=http://{{ hostvars[groups['master'][0]].ansible_host }}:9000,passwd_file=/home/{{ ansible_user }}/.passwd-s3fs"
            state: mounted
          loop:
            - { bucket: 'stress-inputs', path: '{{ base_dir }}/inputfiles' }
            - { bucket: 'stress-jsons', path: '{{ base_dir }}/run/docker_mount/fetched_info_jsons' }
          become: yes
 # -------------------------------------------------------------------------------------------------
 # PHASE 11: Initialize Redis (Master Only)
 # Sets up profiles and policies in Redis
 # -------------------------------------------------------------------------------------------------
 - name: "PHASE 11: Initialize Redis profiles"
  import_playbook: playbook-stress-init-redis.yml
 # -------------------------------------------------------------------------------------------------
 # PHASE 12: Final Status and Next Steps
 # -------------------------------------------------------------------------------------------------
 - name: "PHASE 12: Installation Complete"
  hosts: localhost
  gather_facts: no
  tasks:
    - name: Display installation completion message
      ansible.builtin.debug:
        msg: |
          ========================================
          Full installation complete!
          ========================================
          Next steps:
          1. Start monitoring and enforcer (on master):
             ansible-playbook ansible/playbook-stress-manage-processes.yml \
               -e "start_monitor=true start_enforcer=true"
          2. Start auth generator (on master):
             ansible-playbook ansible/playbook-stress-auth-generator.yml \
               -e "start_generator=true dummy_batch=true auth_min_seconds=2 auth_max_seconds=3"
          3. Start download simulation (on workers):
             ansible-playbook ansible/playbook-stress-download-simulation.yml \
               -e "start_download=true profile_prefix=user1 download_min_seconds=2 download_max_seconds=5" \
               --limit workers
          4. Check status:
             ansible-playbook ansible/playbook-stress-control.yml -e "action=status"
          5. Monitor profiles:
             ansible-playbook ansible/playbook-stress-control.yml -e "action=profile-status"
--- a/ansible/playbook-install-bgutils.yml
+++ b/ansible/playbook-install-bgutils.yml
@ -0,0 +1,18 @@
 ---
 - name: "UTIL-SETUP: Install and configure bgutils container on workers"
  hosts: workers
  gather_facts: yes
  vars_files:
    - "group_vars/all/vault.yml"
  pre_tasks:
    - name: Set inventory_env fact
      ansible.builtin.set_fact:
        inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
    - name: Load environment-specific variables
      ansible.builtin.include_vars: "{{ item }}"
      with_fileglob:
        - "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
  tasks:
    - name: Note that bgutil-provider is now on master
      ansible.builtin.debug:
        msg: "The bgutil-provider service is now deployed on the master node via docker-compose and is no longer deployed on workers."
--- a/ansible/playbook-install-cleanup-media.yml
+++ b/ansible/playbook-install-cleanup-media.yml
@ -0,0 +1,165 @@
 ---
 - name: "UTIL-SETUP: Install media cleanup script and configure cron"
  hosts: all
  gather_facts: yes
  vars_files:
    - "group_vars/all/vault.yml"
  pre_tasks:
    - name: Set inventory_env fact
      ansible.builtin.set_fact:
        inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
    - name: Load environment-specific variables
      ansible.builtin.include_vars: "{{ item }}"
      with_fileglob:
        - "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
  tasks:
    - name: Define base directory for node
      ansible.builtin.set_fact:
        base_dir: "{{ airflow_master_dir if inventory_hostname in groups['master'] else airflow_worker_dir }}"
    - name: Ensure cleanup script directory exists
      ansible.builtin.file:
        path: "{{ base_dir }}/bin"
        state: directory
        owner: "{{ ansible_user }}"
        group: "{{ deploy_group }}"
        mode: '0755'
      become: yes
    - name: Copy cleanup_media.py script
      ansible.builtin.copy:
        src: "{{ playbook_dir }}/../yt-ops-services-debug/cleanup_media.py"
        dest: "{{ base_dir }}/bin/cleanup_media.py"
        owner: "{{ ansible_user }}"
        group: "{{ deploy_group }}"
        mode: '0755'
      become: yes
    - name: Install s5cmd for S3 uploads
      block:
        - name: Check if s5cmd is already installed
          ansible.builtin.stat:
            path: /usr/local/bin/s5cmd
          register: s5cmd_binary
        - name: Download and install s5cmd
          block:
            - name: Create temporary directory for s5cmd download
              ansible.builtin.tempfile:
                state: directory
                suffix: s5cmd
              register: s5cmd_temp_dir
            - name: Download s5cmd
              ansible.builtin.get_url:
                url: "https://github.com/peak/s5cmd/releases/download/v2.2.2/s5cmd_2.2.2_Linux-64bit.tar.gz"
                dest: "{{ s5cmd_temp_dir.path }}/s5cmd.tar.gz"
                mode: '0644'
            - name: Extract s5cmd
              ansible.builtin.unarchive:
                src: "{{ s5cmd_temp_dir.path }}/s5cmd.tar.gz"
                dest: "{{ s5cmd_temp_dir.path }}"
                remote_src: yes
            - name: Install s5cmd to /usr/local/bin
              ansible.builtin.copy:
                src: "{{ s5cmd_temp_dir.path }}/s5cmd"
                dest: /usr/local/bin/s5cmd
                mode: '0755'
                remote_src: yes
            - name: Clean up temporary directory
              ansible.builtin.file:
                path: "{{ s5cmd_temp_dir.path }}"
                state: absent
          when: not s5cmd_binary.stat.exists
      become: yes
    - name: Ensure log directory exists
      ansible.builtin.file:
        path: "/var/log"
        state: directory
        owner: root
        group: root
        mode: '0755'
      become: yes
    - name: Create wrapper script to source .env before running cleanup
      ansible.builtin.copy:
        content: |
          #!/bin/bash
          # Wrapper script to run cleanup_media.py with environment variables from .env
          set -e
          BASE_DIR="{{ base_dir }}"
          # Source .env file if it exists
          if [ -f "${BASE_DIR}/.env" ]; then
              set -a
              source "${BASE_DIR}/.env"
              set +a
          fi
          # Determine cleanup mode based on environment variable or default
          CLEANUP_MODE="${CLEANUP_MODE:-{{ cleanup_settings.mode | default('s3-upload') }}}"
          # Run cleanup script
          cd "${BASE_DIR}"
          if [ "$CLEANUP_MODE" = "s3-upload" ]; then
              # S3 upload mode - uploads to S3 then deletes
              exec python3 "${BASE_DIR}/bin/cleanup_media.py" \
                  --target-dir "${BASE_DIR}/run" \
                  --target-dir "${BASE_DIR}/downloadfiles" \
                  --max-age {{ cleanup_settings.max_age_seconds | default(3600) }} \
                  --log-file /var/log/cleanup_media.log \
                  --s3-upload \
                  --s3-bucket "${S3_BUCKET:-stress-media-archive}" \
                  --s3-prefix "archived-media/$(hostname)" \
                  --s5cmd-path /usr/local/bin/s5cmd
          else
              # Simple cleanup mode - just truncate and rename
              exec python3 "${BASE_DIR}/bin/cleanup_media.py" \
                  --target-dir "${BASE_DIR}/run" \
                  --target-dir "${BASE_DIR}/downloadfiles" \
                  --max-age {{ cleanup_settings.max_age_seconds | default(3600) }} \
                  --log-file /var/log/cleanup_media.log
          fi
        dest: "{{ base_dir }}/bin/cleanup_media_wrapper.sh"
        owner: "{{ ansible_user }}"
        group: "{{ deploy_group }}"
        mode: '0755'
      become: yes
    - name: Configure cron job for media cleanup
      ansible.builtin.cron:
        name: "Media cleanup - {{ base_dir }}"
        minute: "0"
        hour: "*"
        job: "{{ base_dir }}/bin/cleanup_media_wrapper.sh 2>&1 | logger -t cleanup_media"
        user: "{{ ansible_user }}"
        state: "{{ 'present' if cleanup_settings.enabled | default(true) | bool else 'absent' }}"
      become: yes
    - name: Display installation summary
      ansible.builtin.debug:
        msg: |
          Media cleanup script installed successfully on {{ inventory_hostname }}
          Configuration from cluster.green.yml:
          - Base directory: {{ base_dir }}
          - Enabled: {{ cleanup_settings.enabled | default(true) }}
          - Cleanup mode: {{ cleanup_settings.mode | default('s-upload') }}
          - Max age: {{ cleanup_settings.max_age_seconds | default(3600) }} seconds
          - Cron schedule: Every hour (0 * * * *)
          - Cron job state: {{ 'present' if cleanup_settings.enabled | default(true) | bool else 'absent' }}
          - Log file: /var/log/cleanup_media.log
          Note: You can override the cleanup mode for a single run by setting the
          CLEANUP_MODE environment variable before executing the wrapper script.
          e.g., CLEANUP_MODE=cleanup {{ base_dir }}/bin/cleanup_media_wrapper.sh
          To view logs:
          tail -f /var/log/cleanup_media.log
--- a/ansible/playbook-proxies.yml
+++ b/ansible/playbook-proxies.yml
@ -1,7 +1,17 @@
 ---
 - name: Deploy Shadowsocks-Rust Proxy Configurations
-  hosts: all
+  hosts: workers
  gather_facts: yes
  vars_files:
    - "group_vars/all/vault.yml"
  pre_tasks:
    - name: Set inventory_env fact
      ansible.builtin.set_fact:
        inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
    - name: Load environment-specific variables
      ansible.builtin.include_vars: "{{ item }}"
      with_fileglob:
        - "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
  tasks:
    - name: Deploy Shadowsocks-Rust proxy services
      block:
@ -55,11 +65,18 @@
            path: /srv/shadowsocks-rust/docker-compose.yaml
            state: absent
-        - name: Force stop and remove known proxy containers to prevent conflicts
+        - name: Find and stop any container using the target proxy ports
-          community.docker.docker_container:
+          ansible.builtin.shell:
-            name: "{{ item.key }}"
+            cmd: |
-            state: absent
+              container_id=$(docker ps -aq --filter "publish={{ item.value.local_port }}")
              if [ -n "$container_id" ]; then
                echo "Found container ${container_id} using port {{ item.value.local_port }}. Stopping and removing it."
                docker stop "${container_id}" >/dev/null 2>&1 || true
                docker rm -f "${container_id}" >/dev/null 2>&1 || true
              fi
          loop: "{{ shadowsocks_proxies | dict2items }}"
          register: stop_conflicting_containers
          changed_when: "'Stopping and removing it' in stop_conflicting_containers.stdout"
          loop_control:
            label: "{{ item.key }}"
--- a/ansible/playbook-stress-auth-generator.yml
+++ b/ansible/playbook-stress-auth-generator.yml
@ -0,0 +1,95 @@
 ---
 - name: "STRESS-SETUP: Manage auth simulation generator"
  hosts: workers
  gather_facts: no
  vars:
    tmux_session_auth_gen: "stress-auth-{{ (profile_prefix | default('default')) | replace(',', '-') }}"
    auth_policy: "policies/12_queue_auth_simulation.yaml"
  vars_files:
    - "group_vars/all/vault.yml"
  pre_tasks:
    - name: Set inventory_env fact
      ansible.builtin.set_fact:
        inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
    - name: Load environment-specific variables
      ansible.builtin.include_vars: "{{ item }}"
      with_fileglob:
        - "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
  tasks:
    - name: Define base directory for node
      ansible.builtin.set_fact:
        base_dir: "{{ airflow_worker_dir }}"
    - name: Validate profile_prefix is provided when starting or stopping
      ansible.builtin.fail:
        msg: "profile_prefix is required when start_generator=true or stop_generator=true"
      when:
        - (start_generator | default(false) | bool or stop_generator | default(false) | bool)
        - profile_prefix is not defined
    - name: "Display policy being used for auth generator"
      ansible.builtin.debug:
        msg: "Using auth generator policy: {{ auth_policy }}"
      when: start_generator | default(false) | bool
    - name: Manage auth generator process
      ansible.builtin.include_tasks:
        file: manage-processes-tasks.yml
      vars:
        tmux_session_name: "{{ tmux_session_auth_gen }}"
        working_dir: "{{ base_dir }}"
        command_to_run: >
          ./bin/ytops-client stress-policy
          --policy {{ auth_policy }}
          {% if dummy_batch | default(true) | bool %}--dummy-batch{% endif %}
          {% if auth_min_seconds is defined %}--set 'settings.dummy_simulation_settings.auth_min_seconds={{ auth_min_seconds }}'{% endif %}
          {% if auth_max_seconds is defined %}--set 'settings.dummy_simulation_settings.auth_max_seconds={{ auth_max_seconds }}'{% endif %}
          {% if batch_size is defined %}--set 'queue_policy.batch_size={{ batch_size }}'{% endif %}
          {% if create_download_tasks is defined %}--set 'queue_policy.create_download_tasks={{ create_download_tasks }}'{% endif %}
          {% if formats_to_download is defined %}--set 'queue_policy.formats_to_download={{ formats_to_download }}'{% endif %}
          {% if profile_prefix is defined %}--set 'execution_control.worker_pools=[{"profile_prefix": "{{ profile_prefix }}", "workers": 1}]'{% endif %}
          --profile-prefix {{ profile_prefix }}
        process_grep_pattern: "ytops-client.*stress-policy.*--policy {{ auth_policy }}.*--profile-prefix {{ profile_prefix }}"
        start_process: "{{ start_generator | default(false) | bool }}"
        stop_process: "{{ stop_generator | default(false) | bool }}"
        check_status: "{{ vars.check_status | default(false) | bool }}"
    - name: List active tmux sessions
      ansible.builtin.shell:
        cmd: tmux list-sessions 2>/dev/null || true
      register: tmux_sessions
      changed_when: false
    - name: Display active sessions
      ansible.builtin.debug:
        msg: "Active tmux sessions: {{ tmux_sessions.stdout_lines }}"
    - name: Check tmux session output for errors
      block:
        - name: Wait for a moment for the process to start
          ansible.builtin.pause:
            seconds: 2
        - name: Capture tmux pane content
          ansible.builtin.shell:
            cmd: "tmux capture-pane -p -t {{ tmux_session_auth_gen }}"
          register: tmux_output
          changed_when: false
          ignore_errors: true
        - name: Display tmux pane content if session exists
          ansible.builtin.debug:
            msg: "Initial output from tmux session '{{ tmux_session_auth_gen }}':"
          when: tmux_output.rc == 0
        - name: Show output lines if session exists
          ansible.builtin.debug:
            var: tmux_output.stdout_lines
          when: tmux_output.rc == 0
        - name: Report if session not found
          ansible.builtin.debug:
            msg: "Tmux session '{{ tmux_session_auth_gen }}' was not found. It may have exited immediately upon starting."
          when: tmux_output.rc != 0
      when: start_generator | default(false) | bool
--- a/ansible/playbook-stress-control.yml
+++ b/ansible/playbook-stress-control.yml
@ -0,0 +1,288 @@
 ---
 - name: "STRESS-SETUP: Unified control for stress test processes"
  hosts: all
  gather_facts: no
  vars:
    # Default action is status check
    action: "status"
    setup_policy: "policies/6_profile_setup_policy.yaml"
    enforcer_policy: "policies/8_unified_simulation_enforcer.yaml"
    master_only_actions:
      - "check-enforcer"
      - "profile-status"
      - "run-command"
      - "cleanup-profiles"
      - "restart-monitoring"
  vars_files:
    - "group_vars/all/vault.yml"
  pre_tasks:
    - name: Set inventory_env fact
      ansible.builtin.set_fact:
        inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
      when: action not in master_only_actions or inventory_hostname in groups['master']
    - name: Load environment-specific variables
      ansible.builtin.include_vars: "{{ item }}"
      with_fileglob:
        - "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
      when: action not in master_only_actions or inventory_hostname in groups['master']
  tasks:
    - name: Define base directory for node
      ansible.builtin.set_fact:
        base_dir: "{{ airflow_master_dir if inventory_hostname in groups['master'] else airflow_worker_dir }}"
      when: action not in master_only_actions or inventory_hostname in groups['master']
    - name: Check running processes (status action)
      ansible.builtin.shell:
        cmd: |
          echo "=== Process status on {{ inventory_hostname }} ==="
          echo "1. Tmux sessions:"
          tmux list-sessions 2>/dev/null || echo "No tmux sessions"
          echo ""
          echo "2. ytops-client processes:"
          ps aux | grep -E "ytops-client.*(profile|policy-enforcer|stress-policy)" | grep -v grep || echo "No ytops-client processes"
          echo ""
          echo "3. Python processes related to stress test:"
          ps aux | grep -E "python.*(ytops|stress)" | grep -v grep || echo "No related python processes"
      register: status_output
      changed_when: false
      when: action == "status"
    - name: Display status
      ansible.builtin.debug:
        msg: "{{ status_output.stdout_lines }}"
      when: action == "status"
    - name: Check enforcer status (check-enforcer action)
      block:
        - name: Check for enforcer tmux session and process
          ansible.builtin.shell:
            cmd: |
              echo "Enforcer status on {{ inventory_hostname }}:"
              if tmux has-session -t stress-enforcer 2>/dev/null; then
                echo "  - Tmux session 'stress-enforcer' is RUNNING."
                ps aux | grep -E "ytops-client.*policy-enforcer" | grep -v grep || echo "  - WARNING: Tmux session exists, but no matching process found."
              else
                echo "  - Tmux session 'stress-enforcer' is NOT RUNNING."
              fi
          register: enforcer_status
          changed_when: false
        - name: Display enforcer status
          ansible.builtin.debug:
            msg: "{{ enforcer_status.stdout_lines }}"
      when: action == "check-enforcer"
      delegate_to: "{{ groups['master'][0] }}"
      run_once: true
    - name: Run ytops-client profile list on master (profile-status action)
      ansible.builtin.shell:
        cmd: |
          cd {{ airflow_master_dir }}
          if [ -f .env ]; then
            set -a && . ./.env && set +a
          fi
          timeout 10 ./bin/ytops-client profile list \
            --auth-env sim_auth \
            --download-env sim_download 2>&1
      register: profile_list_output
      changed_when: false
      when: 
        - action == "profile-status"
        - inventory_hostname in groups['master']
      delegate_to: "{{ groups['master'][0] }}"
      run_once: true
    - name: Show profile list output lines
      ansible.builtin.debug:
        var: profile_list_output.stdout_lines
      when:
        - action == "profile-status"
        - profile_list_output is defined
        - inventory_hostname in groups['master']
    - name: Run custom ytops-client command on master (run-command action)
      ansible.builtin.shell:
        cmd: |
          cd {{ airflow_master_dir }}
          if [ -f .env ]; then
            set -a && . ./.env && set +a
          fi
          {{ command_to_run }}
      register: command_output
      changed_when: false
      when: 
        - action == "run-command"
        - inventory_hostname in groups['master']
        - command_to_run is defined
      delegate_to: "{{ groups['master'][0] }}"
      run_once: true
    - name: Display command output
      ansible.builtin.debug:
        msg: "Command output:"
      when: 
        - action == "run-command"
        - command_output is defined
    - name: Show command output lines
      ansible.builtin.debug:
        var: command_output.stdout_lines
      when: 
        - action == "run-command"
        - command_output is defined
    - name: "Display policy being used for profile cleanup"
      ansible.builtin.debug:
        msg: "Using setup policy for cleanup: {{ setup_policy }}"
      when:
        - action == "cleanup-profiles"
        - inventory_hostname in groups['master']
      delegate_to: "{{ groups['master'][0] }}"
      run_once: true
    - name: Cleanup profiles on master (cleanup-profiles action)
      ansible.builtin.shell:
        cmd: |
          cd {{ airflow_master_dir }}
          if [ -f .env ]; then
            set -a && . ./.env && set +a
          fi
          ./bin/ytops-client setup-profiles \
            --policy {{ setup_policy }} \
            --cleanup-all {% if profile_prefix is defined %}--profile-prefix {{ profile_prefix }}{% endif %}
      register: cleanup_output
      changed_when: false
      when:
        - action == "cleanup-profiles"
        - inventory_hostname in groups['master']
      delegate_to: "{{ groups['master'][0] }}"
      run_once: true
    - name: Display cleanup output
      ansible.builtin.debug:
        msg: "Cleanup output:"
      when:
        - action == "cleanup-profiles"
        - cleanup_output is defined
    - name: Show cleanup output lines
      ansible.builtin.debug:
        var: cleanup_output.stdout_lines
      when:
        - action == "cleanup-profiles"
        - cleanup_output is defined
    - name: Stop all stress test processes on all nodes (stop-all action)
      block:
        - name: Kill all tmux sessions starting with 'stress-'
          ansible.builtin.shell:
            cmd: |
              for session in $(tmux list-sessions -F "#{session_name}" 2>/dev/null | grep -E "^stress-"); do
                tmux kill-session -t "$session"
              done || true
              # Failsafe: kill any lingering tmux server processes for stress sessions
              TMUX_PIDS_TO_KILL=$(ps aux | grep '[t]mux' | grep 'stress-' | grep -v 'grep' | grep -v 'ansible' | awk '{print $2}')
              if [ -n "$TMUX_PIDS_TO_KILL" ]; then
                  kill -9 $TMUX_PIDS_TO_KILL >/dev/null 2>&1 || true
              fi
          ignore_errors: yes
          changed_when: false
        - name: Kill all ytops-client and related python processes
          ansible.builtin.shell:
            cmd: |
              # Gracefully terminate processes by pattern
              ps aux | grep -E "[y]tops-client.*(profile.*list|policy-enforcer|stress-policy)" | awk '{print $2}' | xargs kill >/dev/null 2>&1 || true
              ps aux | grep -E "[p]ython.*ytops" | grep -v ansible | awk '{print $2}' | xargs kill >/dev/null 2>&1 || true
              sleep 1 # Wait for graceful shutdown
              # Force kill any remaining processes
              ps aux | grep -E "[y]tops-client.*(profile.*list|policy-enforcer|stress-policy)" | awk '{print $2}' | xargs kill -9 >/dev/null 2>&1 || true
              ps aux | grep -E "[p]ython.*ytops" | grep -v ansible | awk '{print $2}' | xargs kill -9 >/dev/null 2>&1 || true
          ignore_errors: yes
          changed_when: false
      when: action == "stop-all"
    - name: Stop processes on targeted nodes only (stop-nodes action)
      block:
        - name: Kill all tmux sessions starting with 'stress-' on this node
          ansible.builtin.shell:
            cmd: |
              for session in $(tmux list-sessions -F "#{session_name}" 2>/dev/null | grep -E "^stress-"); do
                tmux kill-session -t "$session"
              done || true
              # Failsafe: kill any lingering tmux server processes for stress sessions
              TMUX_PIDS_TO_KILL=$(ps aux | grep '[t]mux' | grep 'stress-' | grep -v 'grep' | grep -v 'ansible' | awk '{print $2}')
              if [ -n "$TMUX_PIDS_TO_KILL" ]; then
                  kill -9 $TMUX_PIDS_TO_KILL >/dev/null 2>&1 || true
              fi
          ignore_errors: yes
          changed_when: false
        - name: Kill all ytops-client and related python processes on this node
          ansible.builtin.shell:
            cmd: |
              # Gracefully terminate processes by pattern
              ps aux | grep -E "[y]tops-client.*(profile.*list|policy-enforcer|stress-policy)" | awk '{print $2}' | xargs kill >/dev/null 2>&1 || true
              ps aux | grep -E "[p]ython.*ytops" | grep -v ansible | awk '{print $2}' | xargs kill >/dev/null 2>&1 || true
              sleep 1 # Wait for graceful shutdown
              # Force kill any remaining processes
              ps aux | grep -E "[y]tops-client.*(profile.*list|policy-enforcer|stress-policy)" | awk '{print $2}' | xargs kill -9 >/dev/null 2>&1 || true
              ps aux | grep -E "[p]ython.*ytops" | grep -v ansible | awk '{print $2}' | xargs kill -9 >/dev/null 2>&1 || true
          ignore_errors: yes
          changed_when: false
      when: action == "stop-nodes"
    - name: Restart monitoring and enforcer (restart-monitoring action)
      block:
        - name: Stop monitor process
          ansible.builtin.include_tasks:
            file: manage-processes-tasks.yml
          vars:
            tmux_session_name: "stress-monitor"
            process_grep_pattern: "ytops-client.*profile.*list"
            stop_process: true
        - name: Stop enforcer process
          ansible.builtin.include_tasks:
            file: manage-processes-tasks.yml
          vars:
            tmux_session_name: "stress-enforcer"
            process_grep_pattern: "ytops-client.*policy-enforcer.*{{ enforcer_policy }}"
            stop_process: true
        - name: Start monitor process
          ansible.builtin.include_tasks:
            file: manage-processes-tasks.yml
          vars:
            tmux_session_name: "stress-monitor"
            working_dir: "{{ airflow_master_dir }}"
            command_to_run: >
              ./bin/ytops-client profile list
              --auth-env sim_auth
              --download-env sim_download
              --live
              --no-blink
              --show-reasons
            process_grep_pattern: "ytops-client.*profile.*list"
            start_process: true
        - name: Start enforcer process
          ansible.builtin.include_tasks:
            file: manage-processes-tasks.yml
          vars:
            tmux_session_name: "stress-enforcer"
            working_dir: "{{ airflow_master_dir }}"
            command_to_run: >
              ./bin/ytops-client policy-enforcer
              --policy {{ enforcer_policy }}
              --live
            process_grep_pattern: "ytops-client.*policy-enforcer.*{{ enforcer_policy }}"
            start_process: true
      when:
        - action == "restart-monitoring"
        - inventory_hostname == groups['master'][0]
--- a/ansible/playbook-stress-download-simulation.yml
+++ b/ansible/playbook-stress-download-simulation.yml
@ -0,0 +1,93 @@
 ---
 - name: "STRESS-SETUP: Manage download simulation"
  hosts: workers
  gather_facts: no
  vars:
    tmux_session_download: "stress-download-{{ (profile_prefix | default('default')) | replace(',', '-') }}"
    download_policy: "policies/11_direct_docker_download_simulation.yaml"
  vars_files:
    - "group_vars/all/vault.yml"
  pre_tasks:
    - name: Set inventory_env fact
      ansible.builtin.set_fact:
        inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
    - name: Load environment-specific variables
      ansible.builtin.include_vars: "{{ item }}"
      with_fileglob:
        - "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
  tasks:
    - name: Define base directory for node
      ansible.builtin.set_fact:
        base_dir: "{{ airflow_worker_dir }}"
    - name: Validate profile_prefix is provided when starting or stopping
      ansible.builtin.fail:
        msg: "profile_prefix is required when start_download=true or stop_download=true"
      when: 
        - (start_download | default(false) | bool or stop_download | default(false) | bool)
        - profile_prefix is not defined
    - name: "Display policy being used for download simulation"
      ansible.builtin.debug:
        msg: "Using download simulation policy: {{ download_policy }}"
      when: start_download | default(false) | bool
    - name: Manage download simulation process
      ansible.builtin.include_tasks:
        file: manage-processes-tasks.yml
      vars:
        tmux_session_name: "{{ tmux_session_download }}"
        working_dir: "{{ base_dir }}"
        command_to_run: >
          ./bin/ytops-client stress-policy
          --policy {{ download_policy }}
          {% if dummy_batch | default(true) | bool %}--dummy-batch{% endif %}
          {% if download_min_seconds is defined %}--set 'settings.dummy_simulation_settings.download_min_seconds={{ download_min_seconds }}'{% endif %}
          {% if download_max_seconds is defined %}--set 'settings.dummy_simulation_settings.download_max_seconds={{ download_max_seconds }}'{% endif %}
          {% if profile_prefix is defined %}--set 'execution_control.worker_pools=[{"profile_prefix": "{{ profile_prefix }}", "workers": 1}]'{% endif %}
          {% for setting in (extra_set_args | default('[]')) | from_yaml %}--set '{{ setting }}' {% endfor %}
          --profile-prefix {{ profile_prefix }}
        process_grep_pattern: "ytops-client.*stress-policy.*--policy {{ download_policy }}.*--profile-prefix {{ profile_prefix }}"
        start_process: "{{ start_download | default(false) | bool }}"
        stop_process: "{{ stop_download | default(false) | bool }}"
        check_status: "{{ vars.check_status | default(false) | bool }}"
    - name: List active tmux sessions
      ansible.builtin.shell:
        cmd: tmux list-sessions 2>/dev/null || true
      register: tmux_sessions
      changed_when: false
    - name: Display active sessions
      ansible.builtin.debug:
        msg: "Active tmux sessions on {{ inventory_hostname }}: {{ tmux_sessions.stdout_lines }}"
    - name: Check tmux session output for errors
      block:
        - name: Wait for a moment for the process to start
          ansible.builtin.pause:
            seconds: 2
        - name: Capture tmux pane content
          ansible.builtin.shell:
            cmd: "tmux capture-pane -p -t {{ tmux_session_download }}"
          register: tmux_output
          changed_when: false
          ignore_errors: true
        - name: Display tmux pane content if session exists
          ansible.builtin.debug:
            msg: "Initial output from tmux session '{{ tmux_session_download }}':"
          when: tmux_output.rc == 0
        - name: Show output lines if session exists
          ansible.builtin.debug:
            var: tmux_output.stdout_lines
          when: tmux_output.rc == 0
        - name: Report if session not found
          ansible.builtin.debug:
            msg: "Tmux session '{{ tmux_session_download }}' was not found. It may have exited immediately upon starting."
          when: tmux_output.rc != 0
      when: start_download | default(false) | bool
--- a/ansible/playbook-stress-generate-env.yml
+++ b/ansible/playbook-stress-generate-env.yml
@ -3,12 +3,19 @@
  hosts: all
  gather_facts: no
  vars_files:
    - "group_vars/all/generated_vars.stress.yml"
    - "group_vars/all/vault.yml"
  pre_tasks:
    - name: Set inventory_env fact
      ansible.builtin.set_fact:
        inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
    - name: Load environment-specific variables
      ansible.builtin.include_vars: "{{ item }}"
      with_fileglob:
        - "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
  tasks:
    - name: Define base directory for node
      ansible.builtin.set_fact:
-        base_dir: "{{ airflow_master_dir if inventory_hostname in groups['airflow_master'] else airflow_worker_dir }}"
+        base_dir: "{{ airflow_master_dir if inventory_hostname in groups['master'] else airflow_worker_dir }}"
    - name: Create .env file for stress test environment
      ansible.builtin.template:
@ -18,15 +25,3 @@
        group: "{{ deploy_group }}"
        mode: '0644'
      become: yes
    - name: Ensure REDIS_PORT is set in .env file
      ansible.builtin.lineinfile:
        path: "{{ base_dir }}/.env"
        line: "REDIS_PORT={{ redis_port }}"
        regexp: "^REDIS_PORT="
        state: present
        create: yes
        owner: "{{ ansible_user }}"
        group: "{{ deploy_group }}"
        mode: '0644'
      become: yes
--- a/ansible/playbook-stress-init-redis.yml
+++ b/ansible/playbook-stress-init-redis.yml
@ -0,0 +1,61 @@
 ---
 - name: "STRESS-SETUP: Initialize Redis with profiles and policies"
  hosts: master
  gather_facts: no
  vars:
    setup_policy: "policies/6_profile_setup_policy.yaml"
  vars_files:
    - "group_vars/all/vault.yml"
  pre_tasks:
    - name: Set inventory_env fact
      ansible.builtin.set_fact:
        inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
    - name: Load environment-specific variables
      ansible.builtin.include_vars: "{{ item }}"
      with_fileglob:
        - "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
  tasks:
    - name: Check if Redis is running
      ansible.builtin.shell:
        cmd: "redis-cli -h {{ hostvars[groups['master'][0]].ansible_host }} -p {{ redis_port }} {% if use_redis_password | default(true) | string | lower == 'true' %}-a {{ vault_redis_password }}{% endif %} ping 2>&1 | grep -q PONG"
      register: redis_check
      ignore_errors: yes
      changed_when: false
    - name: Ensure Redis is accessible
      ansible.builtin.fail:
        msg: "Redis is not accessible on master node. Please ensure Redis service is running on {{ hostvars[groups['master'][0]].ansible_host }}:{{ redis_port }}"
      when: redis_check.rc != 0
    - name: Stop any running ytops-client processes on master
      ansible.builtin.shell:
        cmd: ps aux | grep "[y]tops-client" | awk '{print $2}' | xargs kill -9 >/dev/null 2>&1 || true
      changed_when: false
    - name: "Display policy being used for Redis initialization"
      ansible.builtin.debug:
        msg: "Using setup policy: {{ setup_policy }}"
    - name: Initialize Redis profiles and policies
      ansible.builtin.shell:
        cmd: |
          cd {{ airflow_master_dir }}
          ./bin/ytops-client setup-profiles \
            --policy {{ setup_policy }} \
            --cleanup-all
      environment:
        REDIS_HOST: "{{ hostvars[groups['master'][0]].ansible_host }}"
        REDIS_PORT: "{{ redis_port }}"
        REDIS_PASSWORD: "{{ vault_redis_password if use_redis_password | default(true) | string | lower == 'true' else '' }}"
      register: init_result
      changed_when: init_result.rc == 0
    - name: Display initialization result
      ansible.builtin.debug:
        msg: "Redis profile initialization completed successfully"
      when: init_result.rc == 0
    - name: Handle initialization failure
      ansible.builtin.fail:
        msg: "Failed to initialize Redis profiles: {{ init_result.stderr }}"
      when: init_result.rc != 0
--- a/ansible/playbook-stress-install-deps.yml
+++ b/ansible/playbook-stress-install-deps.yml
@ -3,9 +3,15 @@
  hosts: all
  gather_facts: yes
  vars_files:
    - "group_vars/all/generated_vars.stress.yml"
    - "group_vars/all/vault.yml"
  pre_tasks:
    - name: Set inventory_env fact
      ansible.builtin.set_fact:
        inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
    - name: Load environment-specific variables
      ansible.builtin.include_vars: "{{ item }}"
      with_fileglob:
        - "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
    - name: Ensure python3-pip is installed
      block:
        - name: Install prerequisites for managing repositories
@ -27,17 +33,22 @@
      become: yes
  tasks:
-    - name: Install required Python packages
+    - name: Define base directory for node
      ansible.builtin.set_fact:
        base_dir: "{{ airflow_master_dir if inventory_hostname in groups['master'] else airflow_worker_dir }}"
    - name: Install required Python packages from requirements.txt
      ansible.builtin.pip:
-        name:
+        requirements: "{{ base_dir }}/ytops_client/requirements.txt"
-          - python-dotenv
+        extra_args: "--ignore-installed"
-          - aria2p
+      become: yes
-          - tabulate
+      environment:
-          - redis
+        PIP_BREAK_SYSTEM_PACKAGES: "1"
-          - PyYAML
+
-          - aiothrift
+    - name: Explicitly install the thrift package
-          - PySocks
+      ansible.builtin.pip:
-        state: present
+        name: thrift
        extra_args: "--ignore-installed"
      become: yes
      environment:
        PIP_BREAK_SYSTEM_PACKAGES: "1"
--- a/ansible/playbook-stress-lifecycle.yml
+++ b/ansible/playbook-stress-lifecycle.yml
@ -0,0 +1,113 @@
 ---
 - name: "STRESS-SETUP: Manage full worker lifecycle based on inventory"
  hosts: workers
  gather_facts: no
  vars:
    # Default action
    action: "status" # Available actions: start, stop, status
  tasks:
    - name: "Start all configured generators and simulators"
      when: action == "start"
      block:
        - name: "Set combined profile prefixes string"
          ansible.builtin.set_fact:
            combined_prefixes: "{{ profile_prefixes | default([]) | join(',') }}"
          when: profile_prefixes is defined and profile_prefixes | length > 0
        - name: "Start single auth generator for all profiles: {{ combined_prefixes | default('none') }}"
          ansible.builtin.command: >-
            ansible-playbook {{ playbook_dir }}/playbook-stress-auth-generator.yml
            -i {{ inventory_file }}
            --limit {{ inventory_hostname }}
            -e "start_generator=true"
            -e "profile_prefix={{ combined_prefixes }}"
            {% if dummy_batch is defined %}-e "dummy_batch={{ dummy_batch }}"{% endif %}
            {% if auth_min_seconds is defined %}-e "auth_min_seconds={{ auth_min_seconds }}"{% endif %}
            {% if auth_max_seconds is defined %}-e "auth_max_seconds={{ auth_max_seconds }}"{% endif %}
            {% if batch_size is defined %}-e "batch_size={{ batch_size }}"{% endif %}
            {% if create_download_tasks is defined %}-e "create_download_tasks={{ create_download_tasks }}"{% endif %}
            {% if formats_to_download is defined %}-e "formats_to_download={{ formats_to_download }}"{% endif %}
          delegate_to: localhost
          changed_when: true
          when: profile_prefixes is defined and profile_prefixes | length > 0
        - name: "Start single download simulator for all profiles: {{ combined_prefixes | default('none') }}"
          ansible.builtin.command: >-
            ansible-playbook {{ playbook_dir }}/playbook-stress-download-simulation.yml
            -i {{ inventory_file }}
            --limit {{ inventory_hostname }}
            -e "start_download=true"
            -e "profile_prefix={{ combined_prefixes }}"
            {% if dummy_batch is defined %}-e "dummy_batch={{ dummy_batch }}"{% endif %}
            {% if download_min_seconds is defined %}-e "download_min_seconds={{ download_min_seconds }}"{% endif %}
            {% if download_max_seconds is defined %}-e "download_max_seconds={{ download_max_seconds }}"{% endif %}
            {% if extra_set_args is defined %}-e 'extra_set_args={{ extra_set_args | to_json }}'{% endif %}
          delegate_to: localhost
          changed_when: true
          when: profile_prefixes is defined and profile_prefixes | length > 0
    - name: "Stop all worker generators and simulators"
      when: action == "stop"
      block:
        - name: Kill all tmux sessions starting with 'stress-' on this worker
          ansible.builtin.shell:
            cmd: |
              for session in $(tmux list-sessions -F "#{session_name}" 2>/dev/null | grep -E "^stress-"); do
                tmux kill-session -t "$session"
              done || true
          ignore_errors: yes
          changed_when: false
        - name: Kill all ytops-client processes on this worker
          ansible.builtin.shell:
            cmd: |
              # Gracefully terminate
              ps aux | grep "[y]tops-client.*stress-policy" | awk '{print $2}' | xargs kill >/dev/null 2>&1 || true
              sleep 0.5
              # Force kill
              ps aux | grep "[y]tops-client.*stress-policy" | awk '{print $2}' | xargs kill -9 >/dev/null 2>&1 || true
          ignore_errors: yes
          changed_when: false
    - name: "Check status of all configured generators and simulators"
      when: action == "status"
      block:
        - name: "Set combined profile prefixes string"
          ansible.builtin.set_fact:
            combined_prefixes: "{{ profile_prefixes | default([]) | join(',') }}"
          when: profile_prefixes is defined and profile_prefixes | length > 0
        - name: "Check single auth generator for all profiles: {{ combined_prefixes | default('none') }}"
          ansible.builtin.command: >-
            ansible-playbook {{ playbook_dir }}/playbook-stress-auth-generator.yml
            -i {{ inventory_file }}
            --limit {{ inventory_hostname }}
            -e "check_status=true"
            -e "profile_prefix={{ combined_prefixes }}"
          delegate_to: localhost
          changed_when: false
          when: profile_prefixes is defined and profile_prefixes | length > 0
          register: auth_status_check
        - name: "Display auth generator status for {{ inventory_hostname }}"
          ansible.builtin.debug:
            var: auth_status_check.stdout_lines
          when: auth_status_check is defined
        - name: "Check single download simulator for all profiles: {{ combined_prefixes | default('none') }}"
          ansible.builtin.command: >-
            ansible-playbook {{ playbook_dir }}/playbook-stress-download-simulation.yml
            -i {{ inventory_file }}
            --limit {{ inventory_hostname }}
            -e "check_status=true"
            -e "profile_prefix={{ combined_prefixes }}"
          delegate_to: localhost
          changed_when: false
          when: profile_prefixes is defined and profile_prefixes | length > 0
          register: download_status_check
        - name: "Display download simulator status for {{ inventory_hostname }}"
          ansible.builtin.debug:
            var: download_status_check.stdout_lines
          when: download_status_check is defined
--- a/ansible/playbook-stress-manage-processes.yml
+++ b/ansible/playbook-stress-manage-processes.yml
@ -0,0 +1,111 @@
 ---
 - name: "STRESS-SETUP: Manage tmux sessions for monitoring and enforcer"
  hosts: master
  gather_facts: no
  vars:
    tmux_session_monitor: "stress-monitor"
    tmux_session_enforcer: "stress-enforcer"
    enforcer_policy: "policies/8_unified_simulation_enforcer.yaml"
  vars_files:
    - "group_vars/all/vault.yml"
  pre_tasks:
    - name: Set inventory_env fact
      ansible.builtin.set_fact:
        inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
    - name: Load environment-specific variables
      ansible.builtin.include_vars: "{{ item }}"
      with_fileglob:
        - "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
  tasks:
    - name: Manage monitor process
      ansible.builtin.include_tasks:
        file: manage-processes-tasks.yml
      vars:
        tmux_session_name: "{{ tmux_session_monitor }}"
        working_dir: "{{ airflow_master_dir }}"
        command_to_run: >
          ./bin/ytops-client profile list
          --auth-env sim_auth
          --download-env sim_download
          --live
          --no-blink
          --show-reasons
        process_grep_pattern: "ytops-client.*profile.*list"
        start_process: "{{ start_monitor | default(false) | bool }}"
        stop_process: "{{ stop_sessions | default(false) | bool or stop_monitor | default(false) | bool }}"
        check_status: "{{ vars.check_status | default(false) | bool }}"
    - name: Check monitor session output for errors
      block:
        - name: Wait for a moment for the process to start
          ansible.builtin.pause:
            seconds: 2
        - name: Capture tmux pane content
          ansible.builtin.shell:
            cmd: "tmux capture-pane -p -t {{ tmux_session_monitor }}"
          register: tmux_output
          changed_when: false
          ignore_errors: true
        - name: Display tmux pane content if session exists
          ansible.builtin.debug:
            msg: "Initial output from tmux session '{{ tmux_session_monitor }}':"
          when: tmux_output.rc == 0
        - name: Show output lines if session exists
          ansible.builtin.debug:
            var: tmux_output.stdout_lines
          when: tmux_output.rc == 0
        - name: Report if session not found
          ansible.builtin.debug:
            msg: "Tmux session '{{ tmux_session_monitor }}' was not found."
          when: tmux_output.rc != 0
      when: start_monitor | default(false) | bool
    - name: Manage enforcer process
      ansible.builtin.include_tasks:
        file: manage-processes-tasks.yml
      vars:
        tmux_session_name: "{{ tmux_session_enforcer }}"
        working_dir: "{{ airflow_master_dir }}"
        command_to_run: >
          ./bin/ytops-client policy-enforcer
          --policy {{ enforcer_policy }}
          --live
        process_grep_pattern: "ytops-client.*policy-enforcer.*{{ enforcer_policy }}"
        start_process: "{{ start_enforcer | default(false) | bool }}"
        stop_process: "{{ stop_sessions | default(false) | bool or stop_enforcer | default(false) | bool }}"
        check_status: "{{ vars.check_status | default(false) | bool }}"
    - name: Check enforcer session output for errors
      block:
        - name: Wait for a moment for the process to start
          ansible.builtin.pause:
            seconds: 2
        - name: Capture tmux pane content
          ansible.builtin.shell:
            cmd: "tmux capture-pane -p -t {{ tmux_session_enforcer }}"
          register: tmux_output
          changed_when: false
          ignore_errors: true
        - name: Display tmux pane content if session exists
          ansible.builtin.debug:
            msg: "Initial output from tmux session '{{ tmux_session_enforcer }}':"
          when: tmux_output.rc == 0
        - name: Show output lines if session exists
          ansible.builtin.debug:
            var: tmux_output.stdout_lines
          when: tmux_output.rc == 0
        - name: Report if session not found
          ansible.builtin.debug:
            msg: "Tmux session '{{ tmux_session_enforcer }}' was not found."
          when: tmux_output.rc != 0
      when: start_enforcer | default(false) | bool
    - name: List active tmux sessions
      ansible.builtin.shell:
        cmd: tmux list-sessions 2>/dev/null || true
      register: tmux_sessions
      changed_when: false
    - name: Display active sessions
      ansible.builtin.debug:
        msg: "Active tmux sessions: {{ tmux_sessions.stdout_lines }}"
--- a/ansible/playbook-stress-sync-code.yml
+++ b/ansible/playbook-stress-sync-code.yml
@ -2,13 +2,22 @@
 - name: "STRESS-SETUP: Sync Local Code"
  hosts: all
  gather_facts: no
  vars:
    ytops_source_dir: "{{ playbook_dir }}/../ytops_client-source"
  vars_files:
    - "group_vars/all/generated_vars.stress.yml"
    - "group_vars/all/vault.yml"
  pre_tasks:
    - name: Set inventory_env fact
      ansible.builtin.set_fact:
        inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
    - name: Load environment-specific variables
      ansible.builtin.include_vars: "{{ item }}"
      with_fileglob:
        - "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
  tasks:
    - name: Define base directory for node
      ansible.builtin.set_fact:
-        base_dir: "{{ airflow_master_dir if inventory_hostname in groups['airflow_master'] else airflow_worker_dir }}"
+        base_dir: "{{ airflow_master_dir if inventory_hostname in groups['master'] else airflow_worker_dir }}"
    - name: Ensure base directory exists for code sync
      ansible.builtin.file:
@ -21,7 +30,7 @@
    - name: Sync python packages and directories for stress testing
      ansible.posix.synchronize:
-        src: "../ytops_client-source/{{ item }}/"
+        src: "{{ ytops_source_dir }}/{{ item }}/"
        dest: "{{ base_dir }}/{{ item }}/"
        rsync_opts:
          - "--delete"
@ -42,7 +51,7 @@
    - name: Sync client utility scripts and configs
      ansible.posix.synchronize:
-        src: "../ytops_client-source/{{ item }}"
+        src: "{{ ytops_source_dir }}/{{ item }}"
        dest: "{{ base_dir }}/{{ item }}"
        perms: yes
      loop:
@ -56,3 +65,4 @@
        - "ytdlp.json"
      become: yes
      become_user: "{{ ansible_user }}"
--- a/ansible/playbook-stress-sync-policies.yml
+++ b/ansible/playbook-stress-sync-policies.yml
@ -0,0 +1,58 @@
 ---
 - name: "STRESS-SETUP: Sync Policies and CLI Configs"
  hosts: all
  gather_facts: no
  vars:
    ytops_source_dir: "{{ playbook_dir }}/../ytops_client-source"
  vars_files:
    - "group_vars/all/vault.yml"
  pre_tasks:
    - name: Set inventory_env fact
      ansible.builtin.set_fact:
        inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
    - name: Load environment-specific variables
      ansible.builtin.include_vars: "{{ item }}"
      with_fileglob:
        - "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
  tasks:
    - name: Define base directory for node
      ansible.builtin.set_fact:
        base_dir: "{{ airflow_master_dir if inventory_hostname in groups['master'] else airflow_worker_dir }}"
    - name: Ensure policies directory exists
      ansible.builtin.file:
        path: "{{ base_dir }}/policies"
        state: directory
        owner: "{{ ansible_user }}"
        group: "{{ deploy_group }}"
        mode: '0755'
      become: yes
    - name: Sync policies directory only
      ansible.posix.synchronize:
        src: "{{ ytops_source_dir }}/policies/"
        dest: "{{ base_dir }}/policies/"
        rsync_opts:
          - "--delete"
          - "--exclude=.DS_Store"
          - "--exclude=__pycache__"
          - "--exclude='*.pyc'"
        recursive: yes
        perms: yes
      become: yes
      become_user: "{{ ansible_user }}"
    - name: Sync client CLI config files
      ansible.posix.synchronize:
        src: "{{ ytops_source_dir }}/{{ item }}"
        dest: "{{ base_dir }}/{{ item }}"
        perms: yes
      loop:
        - "cli.auth.config"
        - "cli.download.config"
      become: yes
      become_user: "{{ ansible_user }}"
    - name: Display sync completion
      ansible.builtin.debug:
        msg: "Policies and CLI configs synced to {{ base_dir }}"
--- a/ansible/templates/.env.stress.j2
+++ b/ansible/templates/.env.stress.j2
@ -1,8 +1,16 @@
 # This file is managed by Ansible for the stress test environment.
 # --- Network Settings ---
-REDIS_HOST={{ hostvars[groups['airflow_master'][0]].ansible_host }}
+REDIS_HOST={{ hostvars[groups['master'][0]].ansible_host }}
 REDIS_PORT={{ vault_redis_port }}
 REDIS_PASSWORD={{ vault_redis_password }}
 # --- S3 Storage Configuration ---
 S3_ACCESS_KEY={{ vault_s3_delivery_access_key_id }}
 S3_SECRET_KEY={{ vault_s3_delivery_secret_access_key }}
 S3_ENDPOINT={{ vault_s3_delivery_endpoint }}
 S3_BUCKET={{ vault_s3_delivery_bucket }}
 AWS_REGION={{ vault_s3_delivery_aws_region }}
 # --- Account Manager Configuration ---
 ACCOUNT_ACTIVE_DURATION_MIN=7
 ACCOUNT_COOLDOWN_DURATION_MIN=30
--- a/ansible/templates/docker-compose.stress-master.j2
+++ b/ansible/templates/docker-compose.stress-master.j2
@ -1,6 +1,7 @@
 # Template for stress test master services
 name: "stress-services"
 services:
 {% if inventory_hostname in groups['master'] %}
  redis:
    image: redis:7-alpine
    container_name: stress-redis
@ -26,7 +27,7 @@ services:
    command: server /data --console-address ":9001"
    networks:
      - {{ docker_network_name }}
-
+{% endif %}
  bgutil-provider:
    image: brainicism/bgutil-ytdlp-pot-provider
    container_name: bgutil-provider
--- a/ansible/templates/shadowsocks-compose.yml.j2
+++ b/ansible/templates/shadowsocks-compose.yml.j2
@ -11,7 +11,6 @@ services:
    volumes:
      - ./config_ssp_{{ proxy_config.local_port }}/:/etc/shadowsocks-rust/:ro
    networks:
      - default
      - {{ docker_network_name }}
 {% endfor %}
--- a/tools/generate-inventory.py
+++ b/tools/generate-inventory.py
@ -19,7 +19,7 @@ def generate_inventory(cluster_config, inventory_path):
        f.write("# Edit cluster.yml and re-run the generator instead.\n\n")
        # Master group
-        f.write("[airflow_master]\n")
+        f.write("[master]\n")
        for hostname, config in cluster_config.get('master', {}).items():
            line = f"{hostname} ansible_host={config['ip']}"
            if 'port' in config:
@ -29,7 +29,7 @@ def generate_inventory(cluster_config, inventory_path):
        f.write("\n")
        # Workers group (handles case where workers are not defined)
-        f.write("[airflow_workers]\n")
+        f.write("[workers]\n")
        for hostname, config in cluster_config.get('workers', {}).items():
            line = f"{hostname} ansible_host={config['ip']}"
            if 'port' in config:
@ -47,6 +47,11 @@ def generate_host_vars(cluster_config, host_vars_dir):
        sys.exit(1)
    master_ip = list(master_nodes.values())[0]['ip']
    # Get global vars for aliases
    global_vars = cluster_config.get('global_vars', {})
    airflow_master_dir = global_vars.get('airflow_master_dir')
    airflow_worker_dir = global_vars.get('airflow_worker_dir')
    # Get global proxy definitions
    shadowsocks_proxies = cluster_config.get('shadowsocks_proxies', {})
@ -58,6 +63,8 @@ def generate_host_vars(cluster_config, host_vars_dir):
        # Per-node list of proxies to USE
        worker_proxies = config.get('proxies', [])
        profile_prefixes = config.get('profile_prefixes', [])
        cleanup_settings = config.get('cleanup_settings')
        with open(host_vars_file, 'w') as f:
            f.write("---\n")
@ -65,6 +72,13 @@ def generate_host_vars(cluster_config, host_vars_dir):
            f.write(f"master_host_ip: {master_ip}\n")
            f.write("redis_port: 52909\n")
            # Add node-specific directory aliases for template compatibility
            # The master path is needed by all nodes for the .env template.
            if airflow_master_dir:
                f.write(f"airflow_master: \"{airflow_master_dir}\"\n")
            if hostname in cluster_config.get('workers', {}) and airflow_worker_dir:
                f.write(f"airflow_dl_worker: \"{airflow_worker_dir}\"\n")
            # Write the global proxy definitions for deployment
            if shadowsocks_proxies:
                f.write("shadowsocks_proxies:\n")
@ -81,6 +95,22 @@ def generate_host_vars(cluster_config, host_vars_dir):
                for proxy in worker_proxies:
                    f.write(f"  - \"{proxy}\"\n")
            # Write worker-specific profile prefixes
            if profile_prefixes:
                f.write("profile_prefixes:\n")
                for prefix in profile_prefixes:
                    f.write(f"  - \"{prefix}\"\n")
            # Write worker-specific cleanup settings (overrides global)
            if cleanup_settings:
                f.write("cleanup_settings:\n")
                if 'enabled' in cleanup_settings:
                    f.write(f"  enabled: {str(cleanup_settings['enabled']).lower()}\n")
                if 'mode' in cleanup_settings:
                    f.write(f"  mode: \"{cleanup_settings['mode']}\"\n")
                if 'max_age_seconds' in cleanup_settings:
                    f.write(f"  max_age_seconds: {cleanup_settings['max_age_seconds']}\n")
 def generate_group_vars(cluster_config, group_vars_path):
    """Generate group-level variables"""
    # Create parent directory if it doesn't exist
@ -107,11 +137,11 @@ def generate_group_vars(cluster_config, group_vars_path):
    generated_data = {
        'master_host_ip': master_ip,
        'redis_port': 52909,
-        'external_access_ips': external_ips if external_ips else [],
+        'external_access_ips': external_ips if external_ips else []
        'hostvars': all_nodes
    }
    generated_data.update(global_vars)
    with open(group_vars_path, 'w') as f:
        f.write("---\n")
        f.write("# This file is auto-generated by tools/generate-inventory.py\n")
--- a/ytops_client-source/policies/10_direct_docker_auth_simulation.yaml
+++ b/ytops_client-source/policies/10_direct_docker_auth_simulation.yaml
@ -17,7 +17,8 @@ settings:
  urls_file: "inputfiles/urls.rt300.txt"
  # The save directory MUST be inside the docker_host_mount_path for the download
  # simulation to be able to find the files.
-  save_info_json_dir: "run/docker_mount/fetched_info_jsons/direct_docker_simulation"
+  # NOTE: This path is expected to be on an s3fs mount for cross-host communication.
  save_info_json_dir: "run/docker_mount/info_json_tasks/direct_docker_simulation"
  # Settings for controlling the behavior of dummy/simulation modes.
  # These values can be overridden at runtime with the --set flag.
@ -88,6 +89,7 @@ direct_docker_cli_policy:
  docker_container_mount_path: "/config" # The mount point inside the container
  # Host path for persisting cache data (e.g., cookies, sigfuncs) between runs.
  # NOTE: This path should be on a fast, local disk, NOT on s3fs.
  docker_host_cache_path: ".cache/direct_docker_simulation"
  # Path inside the container where the cache is mounted. Should match HOME/.cache
  docker_container_cache_path: "/config/.cache"
--- a/ytops_client-source/policies/11_direct_docker_download_simulation.yaml
+++ b/ytops_client-source/policies/11_direct_docker_download_simulation.yaml
@ -15,7 +15,8 @@ settings:
  # This directory should contain info.json files generated by an auth simulation,
  # like `10_direct_docker_auth_simulation`.
  # It MUST be inside the docker_host_mount_path.
-  info_json_dir: "run/docker_mount/fetched_info_jsons/direct_docker_simulation"
+  # NOTE: This path is expected to be on an s3fs mount for cross-host communication.
  info_json_dir: "run/docker_mount/info_json_tasks/direct_docker_simulation"
  #info_json_dir: "run/docker_mount/download_tasks"
  # Regex to extract the profile name from a task filename. The first capture
  # group is used. This is crucial for the task-first locking strategy.
@ -30,6 +31,8 @@ execution_control:
      workers: 1
    - profile_prefix: "user2"
      workers: 1
    - profile_prefix: "user3"
      workers: 1
  # How long a worker should pause if it cannot find an available profile or task.
  worker_polling_interval_seconds: 1
@ -86,6 +89,7 @@ direct_docker_cli_policy:
  docker_container_mount_path: "/config"
  # Path on the HOST where downloaded files will be saved.
  # NOTE: This path should be on a fast, local disk, NOT on s3fs.
  docker_host_download_path: "downloaded_media/direct_docker_simulation"
  # Path inside the CONTAINER where `docker_host_download_path` is mounted.
  docker_container_download_path: "/downloads"
--- a/ytops_client-source/policies/12_queue_auth_simulation.yaml
+++ b/ytops_client-source/policies/12_queue_auth_simulation.yaml
@ -1,27 +1,82 @@
-# Policy: Queue-based Authentication Simulation via Direct Docker Exec
+# Policy: Queue-based Authentication Simulation
 #
-# This policy simulates a continuous stream of info.json fetch requests using
+# This policy simulates a continuous stream of info.json fetch requests. It pulls
-# the 'direct_docker_cli' mode. It pulls URLs from a Redis queue, creates a
+# URLs from a Redis queue and processes them, acting as the first stage in a
-# temporary batch file, and then calls a yt-dlp command inside a running
+# two-stage simulation. The second stage (downloading) can be handled in one
-# Docker container.
+# of two ways, configured below:
 #
 # --- WORKFLOW 1: Queue-Auth -> File-Download ---
 #    - This policy creates info.json files in a shared directory (`save_info_json_dir`).
 #    - A separate download simulation (e.g., policy 11_direct_docker_download_simulation.yaml)
 #      watches that directory, picks up the files, and performs the downloads.
 #    - To enable:
 #      - Set `create_download_tasks: false`
 #      - Ensure `save_info_json_dir` points to a shared path.
 #
 # --- WORKFLOW 2: Queue-Auth -> Queue-Download ---
 #    - This policy creates download *tasks* and pushes them to another Redis queue.
 #    - A separate download simulation (e.g., policy 13_queue_download_simulation.yaml)
 #      pulls tasks from that queue and performs the downloads.
 #    - To enable:
 #      - Set `create_download_tasks: true`
 #      - Configure `download_task_queue` to the correct queue name.
 #      - Use `download_task_granularity` to control if one task is created per-URL
 #        or per-format.
 #
 name: 12_queue_auth_simulation
 settings:
  mode: fetch_only
-  orchestration_mode: direct_docker_cli
+  orchestration_mode: queue_auth
  profile_mode: from_pool_with_lock
-  # The save directory MUST be inside the docker_host_mount_path.
+  # For Queue-Auth -> File-Download workflow: Directory to save generated info.json files.
-  save_info_json_dir: "run/docker_mount/fetched_info_jsons/queue_simulation"
+  # A file-based download worker (e.g., policy 11) will watch this directory.
  # This directory MUST be inside the docker_host_mount_path.
  # NOTE: This path is expected to be on an s3fs mount for cross-host communication.
  save_info_json_dir: "run/docker_mount/info_json_tasks/direct_docker_simulation"
 execution_control:
  # Define worker pools for multiple user groups
  worker_pools:
    - profile_prefix: "user1"
      workers: 1
    - profile_prefix: "user2"
      workers: 1
    - profile_prefix: "user3"
      workers: 1
  # How long a worker should pause if it cannot find an available profile to lock.
  worker_polling_interval_seconds: 1
  # No sleep between tasks; throughput is controlled by yt-dlp performance and profile availability.
 info_json_generation_policy:
-  profile_prefix: "user1"
+  # This setting tells the auth worker how many download tasks will be generated
  # per successful info.json. It is used to correctly increment the
  # 'pending_downloads' counter on the auth profile.
  # Can be an integer, or 'from_download_policy' to automatically count formats
  # from the 'download_policy.formats' setting in this same policy file.
  downloads_per_url: "from_download_policy"
  # (For Queue-Download workflow) Controls how download tasks are created.
  #
  # "per_format": (Default) Creates one download task for EACH format specified in 'formats_to_download'.
  #               If `formats_to_download` is "140,299", two download tasks are created, and
  #               the 'pending_downloads' counter is incremented by 2.
  #
  # "per_url":    Creates a SINGLE download task for the entire URL. The 'formats_to_download'
  #               string is passed to the download worker as the format selector, but 'pending_downloads'
  #               is only incremented by 1 for the whole URL.
  #
  # --- Current Setting ---
  download_task_granularity: "per_format"
  # --- Alternative Setting (commented out) ---
  # download_task_granularity: "per_url"
  # profile_prefix is now defined per-pool in execution_control.worker_pools
  # However, for queue auth mode, we need a fallback prefix
  profile_prefix: "user"
 # This section is needed for the 'downloads_per_url: from_download_policy' setting.
 # It should mirror the formats being used by the download simulation.
 download_policy:
  formats: "299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy,140-dashy/140-dashy-0/140"
 direct_docker_cli_policy:
  # Which simulation environment's profiles to use for locking.
@ -50,6 +105,7 @@ direct_docker_cli_policy:
  docker_container_mount_path: "/config" # The mount point inside the container
  # Host path for persisting cache data (e.g., cookies, sigfuncs) between runs.
  # NOTE: This path should be on a fast, local disk, NOT on s3fs.
  docker_host_cache_path: ".cache/queue_auth_simulation"
  # Path inside the container where the cache is mounted. Should match HOME/.cache
  docker_container_cache_path: "/config/.cache"
@ -109,18 +165,47 @@ direct_docker_cli_policy:
  # Template for renaming the final info.json.
  rename_file_template: "{video_id}-{profile_name}-{proxy}.info.json"
 # Settings for controlling the behavior of dummy/simulation modes.
 # These values can be overridden at runtime with the --set flag.
 dummy_simulation_settings:
  # Timings for dummy auth simulation (per-URL delay in a batch)
  auth_min_seconds: 0.1
  auth_max_seconds: 0.5
  auth_failure_rate: 0.0
  auth_skipped_failure_rate: 0.0
  # Timings for dummy download simulation (per-format download time)
  download_min_seconds: 1.0
  download_max_seconds: 3.0
  download_failure_rate: 0.0
  download_skipped_failure_rate: 0.0
 queue_policy:
  # Set to false to use legacy, unprefixed queue names (e.g., 'queue2_auth_inbox').
  # Set to true (or omit) to use environment-prefixed names (e.g., 'sim_auth_queue2_auth_inbox').
  use_env_prefix: false
-  # If specified, create download tasks for these formats
+  # Queue to pull URLs from
-  # Can be "all", a specific format ID, or a list of format IDs
+  input_queue: "queue2_auth_inbox"
-  formats_to_download: "140-dashy/140-dashy-0/140,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy"
+  
  # --- Download Handoff Configuration ---
  # Set to 'true' for Queue-Auth -> Queue-Download workflow.
  # Set to 'false' for Queue-Auth -> File-Download workflow.
  create_download_tasks: false
  # Queue to push download tasks to (if create_download_tasks is true)
  download_task_queue: "queue2_dl_inbox"
  # How many tasks a worker should pull from the queue at once.
  # This will become the batch size for the docker run.
-  batch_size: 25
+  batch_size: 5
  # If specified, create download tasks for these formats
  # Can be "all", a specific format ID, or a list of format IDs
  # Defaults to the formats in download_policy.formats
  # Example: formats_to_download: "140-dashy,299-dashy"
  # Example: formats_to_download: "all"
  # Example: formats_to_download: ["140-dashy", "299-dashy"]
  formats_to_download: "from_download_policy"
 simulation_parameters:
  auth_env: "sim_auth"
--- a/ytops_client-source/policies/13_queue_download_simulation.yaml
+++ b/ytops_client-source/policies/13_queue_download_simulation.yaml
@ -1,16 +1,22 @@
-# Policy: Queue-based Download Simulation via Direct Docker Exec
+# Policy: Queue-based Download Simulation
 #
-# This policy simulates a continuous stream of downloads using the
+# This policy simulates a continuous stream of downloads. It pulls download tasks
-# 'direct_docker_cli' mode with `mode: download_only`. It pulls download
+# from a Redis queue, where each task typically contains a path to an info.json
-# tasks from a Redis queue, each containing a path to an info.json file,
+# file and a format to download.
-# and invokes a yt-dlp command inside a running Docker container to perform
+#
-# the download.
+# This policy is designed to be the *second stage* of a two-stage simulation,
 # consuming tasks produced by an authentication simulation like:
 #   - `12_queue_auth_simulation.yaml` (when configured for Queue-Download workflow)
 #
 # It does not matter to this policy whether the auth stage created tasks per-URL
 # or per-format; this worker will simply process whatever task it receives from
 # the `input_queue`.
 #
 name: 13_queue_download_simulation
 settings:
  mode: download_only
-  orchestration_mode: direct_docker_cli
+  orchestration_mode: queue_download
  profile_mode: from_pool_with_lock
  # In queue mode, info_json_dir is not used to find tasks.
  # However, the paths inside the download tasks must be accessible
@ -19,12 +25,19 @@ settings:
  # can be specified in the download task.
 execution_control:
-  workers: 4
+  # Define worker pools for multiple user groups
  worker_pools:
    - profile_prefix: "user1"
      workers: 1
    - profile_prefix: "user2"
      workers: 1
    - profile_prefix: "user3"
      workers: 1
  # How long a worker should pause if it cannot find an available profile or task.
  worker_polling_interval_seconds: 1
 download_policy:
-  profile_prefix: "user1"
+  # profile_prefix is now defined per-pool in execution_control.worker_pools
  # Default cooldown in seconds if not specified by the enforcer in Redis.
  # The value from Redis (set via `unlock_cooldown_seconds` in the enforcer policy)
  # will always take precedence. This is a fallback.
@ -65,6 +78,7 @@ direct_docker_cli_policy:
  docker_container_mount_path: "/config"
  # Path on the HOST where downloaded files will be saved.
  # NOTE: This path should be on a fast, local disk, NOT on s3fs.
  docker_host_download_path: "downloaded_media/queue_downloads"
  # Path inside the CONTAINER where `docker_host_download_path` is mounted.
  docker_container_download_path: "/downloads"
@ -95,11 +109,36 @@ direct_docker_cli_policy:
    - "Invalid data found when processing input"
    - "Error opening input files"
 # Settings for controlling the behavior of dummy/simulation modes.
 # These values can be overridden at runtime with the --set flag.
 dummy_simulation_settings:
  # Timings for dummy download simulation (per-format download time)
  download_min_seconds: 1.0
  download_max_seconds: 3.0
  download_failure_rate: 0.0
  download_skipped_failure_rate: 0.0
 queue_policy:
  # Set to false to use legacy, unprefixed queue names (e.g., 'queue2_dl_inbox').
  # Set to true (or omit) to use environment-prefixed names (e.g., 'sim_download_queue2_dl_inbox').
  use_env_prefix: false
  # Queue to pull download tasks from
  input_queue: "queue2_dl_inbox"
  # Whether to report completion back to a queue
  # Can be true (report all), false (report none), or "success_only"/"failure_only"
  report_completion: true
  # Queue to report completion to
  completion_queue: "queue2_dl_completed"
  # Queue to report failures to (always reported regardless of report_completion)
  failure_queue: "queue2_dl_fail"
  # Queue to report skipped tasks to
  skipped_queue: "queue2_dl_skipped"
  # How many tasks to process in a batch. For downloads, this should be 1,
  # as each worker locks a profile for a single download task.
  batch_size: 1
--- a/ytops_client-source/policies/6_profile_setup_policy.yaml
+++ b/ytops_client-source/policies/6_profile_setup_policy.yaml
@ -13,13 +13,13 @@ auth_profile_setup:
  cleanup_before_run: true
  pools:
    - prefix: "user1"
-      proxy: "sslocal-rust-1092:1092"
+      proxy: "sslocal-rust-1088:1088"
      count: 3
    - prefix: "user2"
-      proxy: "sslocal-rust-1092:1092"
+      proxy: "sslocal-rust-1085:1085"
      count: 3
    - prefix: "user3"
-      proxy: "sslocal-rust-1092:1092"
+      proxy: "sslocal-rust-1084:1084"
      count: 3
 # --- Profile setup for the DOWNLOAD simulation ---
@ -28,11 +28,11 @@ download_profile_setup:
  cleanup_before_run: true
  pools:
    - prefix: "user1"
-      proxy: "sslocal-rust-1092:1092"
+      proxy: "sslocal-rust-1088:1088"
      count: 3
    - prefix: "user2"
-      proxy: "sslocal-rust-1092:1092"
+      proxy: "sslocal-rust-1085:1085"
      count: 3
    - prefix: "user3"
-      proxy: "sslocal-rust-1092:1092"
+      proxy: "sslocal-rust-1084:1084"
      count: 3
--- a/ytops_client-source/ytops_client/download_aria_tool.py
+++ b/ytops_client-source/ytops_client/download_aria_tool.py
@ -16,13 +16,6 @@ import threading
 import time
 from urllib.parse import urljoin
 try:
    import aria2p
    from aria2p.utils import human_readable_bytes
    import yt_dlp
 except ImportError:
    print("aria2p or yt-dlp is not installed. Please install them with: pip install aria2p yt-dlp", file=sys.stderr)
    sys.exit(1)
 logger = logging.getLogger('download_aria_tool')
@ -173,6 +166,14 @@ def parse_aria_args_to_options(args_str):
 def main_download_aria(args):
    """Main logic for the 'download-aria' command."""
    try:
        import aria2p
        from aria2p.utils import human_readable_bytes
        import yt_dlp
    except ImportError:
        print("aria2p or yt-dlp is not installed. Please install them with: pip install aria2p yt-dlp", file=sys.stderr)
        return 1
    log_level = logging.DEBUG if args.verbose else logging.INFO
    # Reconfigure root logger to ensure our settings are applied.
    for handler in logging.root.handlers[:]:
@ -405,6 +406,7 @@ def main_download_aria(args):
 def download_url_aria(args, api, url, filename, aria_options, timeout_seconds, remote_dir=None):
    """Handle downloading a single URL with aria2c."""
    import aria2p
    if remote_dir:
        aria_options['dir'] = remote_dir
    logger.info(f"Adding download for format '{args.format}' with URL: {url[:70]}...")
@ -532,6 +534,7 @@ def download_url_aria(args, api, url, filename, aria_options, timeout_seconds, r
 def download_fragments_aria(args, api, target_format, filename, aria_options, timeout_seconds, remote_dir=None):
    """Handle downloading fragmented formats with aria2c."""
    import aria2p
    logger.info(f"Format '{args.format}' is fragmented. Adding all fragments to download queue.")
    fragment_base_url = target_format.get('fragment_base_url')
    fragments = target_format['fragments']
--- a/ytops_client-source/ytops_client/download_native_py_tool.py
+++ b/ytops_client-source/ytops_client/download_native_py_tool.py
@ -16,12 +16,6 @@ import sys
 import time
 from datetime import datetime
 try:
    import yt_dlp
    from yt_dlp.utils import match_filter_func
 except ImportError:
    print("yt-dlp is not installed. Please install it with: pip install yt-dlp", file=sys.stderr)
    sys.exit(1)
 logger = logging.getLogger('download_native_py_tool')
@ -110,6 +104,8 @@ def _download_single_format(format_id, info_data, base_ydl_opts, args):
    Returns a tuple: (success: bool, ytdlp_logger: YTDLPLogger)
    """
    import yt_dlp
    # Deep copy info_data so we can modify it without affecting other downloads
    local_info_data = copy.deepcopy(info_data)
@ -178,6 +174,13 @@ def _download_single_format(format_id, info_data, base_ydl_opts, args):
 def main_download_native_py(args):
    """Main logic for the 'download-native-py' command."""
    try:
        import yt_dlp
        from yt_dlp.utils import match_filter_func
    except ImportError:
        print("yt-dlp is not installed. Please install it with: pip install yt-dlp", file=sys.stderr)
        return 1
    # All logging should go to stderr to keep stdout clean for the final filename, or for binary data with --output-buffer.
    log_stream = sys.stderr
    log_level = logging.DEBUG if args.verbose else logging.INFO
--- a/ytops_client-source/ytops_client/policy_enforcer_tool.py
+++ b/ytops_client-source/ytops_client/policy_enforcer_tool.py
@ -44,6 +44,7 @@ class PolicyEnforcer:
        self.manager = manager
        self.dry_run = dry_run
        self.actions_taken_this_cycle = 0
        self._last_wait_log_message = ""
    PROXY_REST_REASON = "Proxy resting"
@ -248,7 +249,19 @@ class PolicyEnforcer:
                # This prevents a deadlock if all groups are just 'Working' but none are in the 'waiting_downloads' state yet.
                if not is_any_group_idle and groups_currently_waiting:
                    is_system_blocked_by_downloads = True
-                    logger.info(f"System is waiting for downloads to finish in groups: {groups_currently_waiting}. No new profiles will be activated until a group is free.")
+                    log_message = f"System is waiting for downloads to finish in groups: {groups_currently_waiting}. No new profiles will be activated until a group is free."
                    if log_message != self._last_wait_log_message:
                        if self._last_wait_log_message:
                            print(file=sys.stderr)  # Newline if we were printing dots
                        logger.info(log_message)
                        self._last_wait_log_message = log_message
                    else:
                        print(".", end="", file=sys.stderr, flush=True)
                else:
                    # If we are no longer blocked, reset the message tracker
                    if self._last_wait_log_message:
                        print(file=sys.stderr) # Newline to clean up after dots
                    self._last_wait_log_message = ""
        if is_system_blocked_by_downloads:
            # When blocked, we only want to consider profiles that are in the 'waiting_downloads' state,
@ -328,6 +341,7 @@ class PolicyEnforcer:
            # The final list to check is the sorted ready profiles, followed by the not-ready ones.
            not_ready_profiles.sort(key=lambda p: (p.get('rest_until', 0), natural_sort_key(p.get('name', ''))))
            profiles_to_check = sorted_ready_profiles + not_ready_profiles
            logger.debug(f"Activation candidates for 'least_loaded' strategy (first 10): {[p['name'] for p in profiles_to_check[:10]]}")
        else: # Default 'longest_idle' sort
            if strategy not in ['longest_idle']:
@ -352,6 +366,7 @@ class PolicyEnforcer:
            # The final list to check will process all ready profiles first, then wait for the not-ready ones.
            profiles_to_check = ready_profiles + not_ready_profiles
            logger.debug(f"Activation candidates for 'longest_idle' strategy (first 10): {[p['name'] for p in profiles_to_check[:10]]}")
        # --- End New Sorting Logic ---
        # --- New logic: Identify groups with waiting profiles ---
@ -382,7 +397,7 @@ class PolicyEnforcer:
            group_name = profile_to_group_map.get(profile_name)
            # --- New check to prevent activating profiles from a waiting group ---
-            if group_name in waiting_group_names and profile.get('rest_reason') != 'waiting_downloads':
+            if group_name in waiting_group_names:
                logger.debug(f"Profile '{profile_name}' activation deferred because its group '{group_name}' is waiting for downloads to complete.")
                continue
            # --- End new logic ---
--- a/ytops_client-source/ytops_client/profile_manager_tool.py
+++ b/ytops_client-source/ytops_client/profile_manager_tool.py
@ -102,10 +102,23 @@ class ProfileManager:
            self.redis.ping()
            logger.info(f"Successfully connected to Redis.")
            logger.info(f"Using key prefix: {key_prefix}")
            self._last_lock_warning = ""
        except redis.exceptions.ConnectionError as e:
            logger.error(f"Failed to connect to Redis at {redis_host}:{redis_port}: {e}")
            sys.exit(1)
    def _log_lock_warning(self, message: str):
        """Logs a lock-related warning, printing dots for repeated messages."""
        if message == self._last_lock_warning:
            # Use print to avoid logger formatting and newlines
            print(".", end="", file=sys.stderr, flush=True)
        else:
            # If we were printing dots, start a new line before the new message
            if self._last_lock_warning:
                print(file=sys.stderr)
            logger.warning(message)
            self._last_lock_warning = message
    def _profile_key(self, profile_name: str) -> str:
        """Get Redis key for a profile."""
        return f"{self.key_prefix}profile:{profile_name}"
@ -288,11 +301,13 @@ class ProfileManager:
            'tolerated_error_count': '0',
            'download_count': '0',
            'download_error_count': '0',
            'predicted_download_count': '0',
            'global_success_count': '0',
            'global_failure_count': '0',
            'global_tolerated_error_count': '0',
            'global_download_count': '0',
            'global_download_error_count': '0',
            'global_predicted_download_count': '0',
            'lock_timestamp': '0',
            'lock_owner': '',
            'rest_until': '0',
@ -328,10 +343,10 @@ class ProfileManager:
        # Convert numeric fields
        numeric_fields = ['created_at', 'last_used', 'success_count', 'failure_count',
-                         'tolerated_error_count', 'download_count', 'download_error_count',
+                         'tolerated_error_count', 'download_count', 'download_error_count', 'predicted_download_count',
                         'global_success_count', 'global_failure_count',
                         'global_tolerated_error_count', 'global_download_count',
-                         'global_download_error_count',
+                         'global_download_error_count', 'global_predicted_download_count',
                         'lock_timestamp', 'rest_until', 'last_rest_timestamp', 'wait_started_at']
        for field in numeric_fields:
            if field in data:
@ -388,10 +403,10 @@ class ProfileManager:
        # --- End batch fetch ---
        numeric_fields = ['created_at', 'last_used', 'success_count', 'failure_count',
-                         'tolerated_error_count', 'download_count', 'download_error_count',
+                         'tolerated_error_count', 'download_count', 'download_error_count', 'predicted_download_count',
                         'global_success_count', 'global_failure_count',
                         'global_tolerated_error_count', 'global_download_count',
-                         'global_download_error_count',
+                         'global_download_error_count', 'global_predicted_download_count',
                         'lock_timestamp', 'rest_until', 'last_rest_timestamp', 'wait_started_at']
        for i, data in enumerate(all_profile_data):
@ -528,8 +543,12 @@ class ProfileManager:
        return total_deleted
    def record_activity(self, name: str, activity_type: str,
-                       timestamp: Optional[float] = None) -> bool:
+                       timestamp: Optional[float] = None, is_dummy: bool = False) -> bool:
-        """Record activity (success/failure) for a profile."""
+        """
        Record activity (success/failure) for a profile.
        If is_dummy is True, the activity will NOT be recorded for the associated proxy,
        preventing dummy failures from triggering proxy-level enforcer actions.
        """
        if activity_type not in ['success', 'failure', 'tolerated_error', 'download', 'download_error']:
            logger.error(f"Invalid activity type: {activity_type}")
            return False
@ -558,7 +577,8 @@ class ProfileManager:
        # Keep only last 1000 activities to prevent unbounded growth
        self.redis.zremrangebyrank(activity_key, 0, -1001)
-        # Also record activity for the proxy
+        # Also record activity for the proxy, BUT NOT for dummy activities.
        if not is_dummy:
            proxy_url = profile.get('proxy')
            if proxy_url:
                proxy_activity_key = self._proxy_activity_key(proxy_url, activity_type)
@ -569,7 +589,36 @@ class ProfileManager:
                pipe.execute()
                logger.debug(f"Recorded {activity_type} for proxy '{proxy_url}'")
-        logger.debug(f"Recorded {activity_type} for profile '{name}' at {ts}")
+        log_msg = f"Recorded {activity_type} for profile '{name}' at {ts}"
        if is_dummy:
            log_msg += " (dummy, proxy activity skipped)"
        logger.debug(log_msg)
        return True
    def record_predicted_downloads(self, name: str, count: int, is_dummy: bool = False) -> bool:
        """Records the number of download tasks predicted/created for a profile."""
        if count <= 0:
            return False
        profile = self.get_profile(name)
        if not profile:
            logger.error(f"Profile '{name}' not found")
            return False
        ts = time.time()
        # Update counters in profile
        profile_key = self._profile_key(name)
        self.redis.hincrby(profile_key, 'predicted_download_count', count)
        self.redis.hincrby(profile_key, 'global_predicted_download_count', count)
        # Update last_used
        self.redis.hset(profile_key, 'last_used', str(ts))
        log_msg = f"Recorded {count} predicted downloads for profile '{name}'"
        if is_dummy:
            log_msg += " (dummy, proxy activity skipped)"
        logger.debug(log_msg)
        return True
    def get_activity_rate(self, name: str, activity_type: str, 
@ -612,6 +661,7 @@ class ProfileManager:
            'tolerated_error_count': '0',
            'download_count': '0',
            'download_error_count': '0',
            'predicted_download_count': '0',
        }
        self.redis.hset(profile_key, mapping=counters_to_reset)
        logger.info(f"Reset session counters for profile '{name}'.")
@ -630,19 +680,21 @@ class ProfileManager:
        total_tolerated_error = sum(int(p.get('global_tolerated_error_count', 0)) for p in profiles)
        total_downloads = sum(int(p.get('global_download_count', 0)) for p in profiles)
        total_download_errors = sum(int(p.get('global_download_error_count', 0)) for p in profiles)
        total_predicted_downloads = sum(int(p.get('global_predicted_download_count', 0)) for p in profiles)
        return {
            'total_success': total_success,
            'total_failure': total_failure,
            'total_tolerated_error': total_tolerated_error,
            'total_downloads': total_downloads,
            'total_download_errors': total_download_errors,
            'total_predicted_downloads': total_predicted_downloads,
        }
    def get_per_proxy_stats(self) -> Dict[str, Dict[str, Any]]:
        """Get aggregated stats per proxy."""
        profiles = self.list_profiles()
        proxy_stats = collections.defaultdict(lambda: {
-            'success': 0, 'failure': 0, 'tolerated_error': 0, 'downloads': 0, 'download_errors': 0, 'profiles': 0
+            'success': 0, 'failure': 0, 'tolerated_error': 0, 'downloads': 0, 'download_errors': 0, 'predicted_downloads': 0, 'profiles': 0
        })
        for p in profiles:
            proxy = p.get('proxy')
@ -652,6 +704,7 @@ class ProfileManager:
                proxy_stats[proxy]['tolerated_error'] += int(p.get('global_tolerated_error_count', 0))
                proxy_stats[proxy]['downloads'] += int(p.get('global_download_count', 0))
                proxy_stats[proxy]['download_errors'] += int(p.get('global_download_error_count', 0))
                proxy_stats[proxy]['predicted_downloads'] += int(p.get('global_predicted_download_count', 0))
                proxy_stats[proxy]['profiles'] += 1
        return dict(proxy_stats)
@ -862,14 +915,14 @@ class ProfileManager:
            # Original logic: find all active profiles, optionally filtered by prefix.
            active_profiles = self.redis.zrange(self._state_key(ProfileState.ACTIVE.value), 0, -1)
            if not active_profiles:
-                logger.warning("No active profiles available to lock.")
+                self._log_lock_warning("No active profiles available to lock.")
                self.redis.incr(self._failed_lock_attempts_key())
                return None
            if profile_prefix:
                profiles_to_check = [p for p in active_profiles if p.startswith(profile_prefix)]
                if not profiles_to_check:
-                    logger.warning(f"No active profiles with prefix '{profile_prefix}' available to lock.")
+                    self._log_lock_warning(f"No active profiles with prefix '{profile_prefix}' available to lock.")
                    self.redis.incr(self._failed_lock_attempts_key())
                    return None
            else:
@ -883,9 +936,9 @@ class ProfileManager:
        if not full_profiles:
            if specific_profile_name:
-                logger.warning(f"Profile '{specific_profile_name}' is not eligible for locking (e.g., not ACTIVE or missing).")
+                self._log_lock_warning(f"Profile '{specific_profile_name}' is not eligible for locking (e.g., not ACTIVE or missing).")
            else:
-                logger.warning("No active profiles available to lock after filtering.")
+                self._log_lock_warning("No active profiles available to lock after filtering.")
            self.redis.incr(self._failed_lock_attempts_key())
            return None
@ -898,7 +951,7 @@ class ProfileManager:
        ]
        if not eligible_profiles:
-            logger.warning("No active profiles with an active proxy available to lock.")
+            self._log_lock_warning("No active profiles with an active proxy available to lock.")
            self.redis.incr(self._failed_lock_attempts_key())
            return None
@ -922,7 +975,7 @@ class ProfileManager:
                if not current_state or current_state.upper() != ProfileState.ACTIVE.value:
                    # Another process (enforcer) changed the state. Release lock and try next.
                    self.redis.hdel(locks_key, name)
-                    logger.warning(f"Aborted lock for '{name}'; state changed from ACTIVE to '{current_state}' during lock acquisition.")
+                    self._log_lock_warning(f"Aborted lock for '{name}'; state changed from ACTIVE to '{current_state}' during lock acquisition.")
                    continue
                # State is still ACTIVE, proceed with locking.
@ -937,6 +990,7 @@ class ProfileManager:
                    sm.lock(owner=owner)
                    # The on_enter_locked action handles all Redis updates for the profile itself.
                    # The logger messages are also in the action.
                    self._last_lock_warning = ""  # Reset on successful lock
                    return self.get_profile(name)
                except Exception as e:
                    # This could be a TransitionNotAllowed error if the state changed,
@ -946,7 +1000,7 @@ class ProfileManager:
                    self.redis.hdel(locks_key, name)
                    continue
-        logger.warning("Could not lock any active profile (all may have been locked by other workers).")
+        self._log_lock_warning("Could not lock any active profile (all may have been locked by other workers).")
        self.redis.incr(self._failed_lock_attempts_key())
        return None
@ -1016,45 +1070,15 @@ class ProfileManager:
        # the `on_enter` actions for the current state. We can suppress the initial transition
        # and set the state directly.
-        # WORKAROUND for older statemachine library:
+        # By passing `initial_value`, we tell the state machine to start in the
-        # Instantiating the machine triggers an initial transition to ACTIVE, which wrongly updates Redis.
+        # profile's actual current state from Redis, instead of executing the
-        # We let this happen, and then immediately correct the state if it was supposed to be something else.
+        # default `initial=True` transition to ACTIVE. This prevents incorrect
-        sm = ProfileStateMachine(manager=self, profile_name=name)
+        # state changes and logs during "hydration" of the state machine.
-
+        sm = ProfileStateMachine(
-        # The sm is now in ACTIVE state, and Redis has been updated. If the original state was
+            manager=self,
-        # LOCKED, we must re-lock it to fix Redis and the state machine object so transitions work.
+            profile_name=name,
-        if current_state_str == ProfileState.LOCKED.value:
+            initial_value=current_state_str
-            lock_owner = profile.get('lock_owner', 're-lock-owner')
+        )
            try:
                # This transition ensures the `on_enter_LOCKED` actions are run, making the
                # state consistent in Redis and in the state machine object.
                sm.lock(owner=lock_owner)
            except Exception as e:
                logger.error(f"Failed to re-lock profile '{name}' during state machine hydration: {e}")
                # The state is now inconsistent, best to not return a broken machine.
                return None
        elif current_state_str != sm.current_state.value.upper():
            # For any other state, we must manually fix both the state machine object and Redis,
            # as the constructor wrongly transitioned to ACTIVE.
            # 1. Force state on the machine object. This does not trigger actions.
            target_state_obj = next((s for s in sm.states if s.value.upper() == current_state_str), None)
            if not target_state_obj:
                logger.error(f"Could not find state object for '{current_state_str}' during hydration of '{name}'.")
                return None
            sm.current_state = target_state_obj
            # 2. Manually revert the state in Redis to what it should be.
            profile_key = self._profile_key(name)
            pipe = self.redis.pipeline()
            pipe.hset(profile_key, 'state', current_state_str)
            # Atomically move the profile from the incorrect ACTIVE index to the correct one.
            # The constructor may have added it to ACTIVE without removing it from its original state index.
            pipe.zrem(self._state_key(ProfileState.ACTIVE.value), name)
            pipe.zadd(self._state_key(current_state_str), {name: profile.get('last_used', time.time())})
            pipe.execute()
            logger.debug(f"Corrected state for '{name}' to '{current_state_str}' in object and Redis during hydration.")
        return sm
    def cleanup_stale_locks(self, max_lock_time_seconds: int) -> int:
@ -1475,36 +1499,77 @@ def _render_profile_group_summary_table(manager, all_profiles, profile_groups_co
                    next_up_reason = f"least_loaded (load: {load}, {finish_str})"
        elif profile_selection_strategy == 'longest_idle':
-            # Find the single longest idle profile across all groups
+            # Find groups that don't have an active profile
-            ready_profiles = []
+            groups_without_active = []
            for group in profile_groups_config:
                profiles_in_group = group.get('profiles_in_group', [])
                has_active = False
                for p_name in profiles_in_group:
                    p = all_profiles_by_name.get(p_name)
                    if p and p['state'] in [ProfileState.ACTIVE.value, ProfileState.LOCKED.value]:
                        has_active = True
                        break
                if not has_active:
                    groups_without_active.append(group)
            # If all groups have active profiles, we need to find which one will be rotated next
            # For now, let's find the group with the profile that has been used the longest
            if not groups_without_active:
                # Find the active profile with the highest request count (closest to rotation)
                active_profiles_info = []
                for group in profile_groups_config:
                    for p_name in group.get('profiles_in_group', []):
                        p = all_profiles_by_name.get(p_name)
                        if p and p['state'] in [ProfileState.ACTIVE.value, ProfileState.LOCKED.value]:
                            # Calculate total requests
                            total_reqs = (
                                p.get('success_count', 0) + p.get('failure_count', 0) +
                                p.get('tolerated_error_count', 0) +
                                p.get('download_count', 0) + p.get('download_error_count', 0)
                            )
                            active_profiles_info.append({
                                'profile': p,
                                'group': group,
                                'total_reqs': total_reqs,
                                'last_used': p.get('last_used', 0)
                            })
                if active_profiles_info:
                    # Sort by total requests descending (highest first - closest to rotation)
                    # Then by last_used ascending (oldest first)
                    active_profiles_info.sort(key=lambda x: (-x['total_reqs'], x['last_used']))
                    # The first one is most likely to be rotated next
                    # Find which group should be activated after it
                    # For simplicity, let's just indicate the current active group
                    # But this is not ideal
                    pass
            # Find the longest idle profile from groups without active profiles
            ready_profiles = []
            for group in groups_without_active:
                for p_name in group.get('profiles_in_group', []):
                    p = all_profiles_by_name.get(p_name)
                    if p and p['state'] in [ProfileState.RESTING.value, ProfileState.COOLDOWN.value] and p.get('rest_until', 0) <= now and p.get('rest_reason') != 'waiting_downloads':
-                        ready_profiles.append(p)
+                        ready_profiles.append((p, group))
            if ready_profiles:
                # Sort them according to the 'longest_idle' activation logic
-                unused_profiles = [p for p in ready_profiles if (p.get('success_count', 0) + p.get('failure_count', 0) + p.get('tolerated_error_count', 0) + p.get('download_count', 0) + p.get('download_error_count', 0)) == 0]
+                unused_profiles = [(p, g) for p, g in ready_profiles if (p.get('success_count', 0) + p.get('failure_count', 0) + p.get('tolerated_error_count', 0) + p.get('download_count', 0) + p.get('download_error_count', 0)) == 0]
-                used_profiles = [p for p in ready_profiles if p not in unused_profiles]
+                used_profiles = [(p, g) for p, g in ready_profiles if (p, g) not in unused_profiles]
-                unused_profiles.sort(key=lambda p: natural_sort_key(p.get('name', '')))
+                unused_profiles.sort(key=lambda x: natural_sort_key(x[0].get('name', '')))
-                used_profiles.sort(key=lambda p: (p.get('last_used', 0), natural_sort_key(p.get('name', ''))))
+                used_profiles.sort(key=lambda x: (x[0].get('last_used', 0), natural_sort_key(x[0].get('name', ''))))
                sorted_ready_profiles = unused_profiles + used_profiles
                if sorted_ready_profiles:
-                    next_profile = sorted_ready_profiles[0]
+                    next_profile, next_group = sorted_ready_profiles[0]
-                    # Find which group it belongs to
+                    next_up_group_name = next_group['name']
                    for group in profile_groups_config:
                        if next_profile['name'] in group.get('profiles_in_group', []):
                            next_up_group_name = group['name']
                    next_up_reason = profile_selection_strategy
                    if getattr(args, 'show_reasons', False):
                        last_used_ts = next_profile.get('last_used', 0)
                        idle_time_str = f"idle for {format_duration(time.time() - last_used_ts)}" if last_used_ts > 0 else "never used"
                        next_up_reason = f"longest_idle (via {next_profile['name']}, {idle_time_str})"
                            break
    # --- End new logic ---
    for group in profile_groups_config:
@ -1672,9 +1737,11 @@ def _render_profile_details_table(manager, args, simulation_type, profile_groups
        if is_auth_sim:
            row.extend([
                p.get('success_count', 0),
                p.get('predicted_download_count', 0),
                p.get('failure_count', 0),
                p.get('tolerated_error_count', 0),
                p.get('global_success_count', 0),
                p.get('global_predicted_download_count', 0),
                p.get('global_failure_count', 0),
            ])
        else:  # is_download_sim or unknown
@ -1700,7 +1767,7 @@ def _render_profile_details_table(manager, args, simulation_type, profile_groups
    headers = ['Name', 'Proxy', 'State', 'Last Used']
    if is_auth_sim:
-        headers.extend(['AuthOK', 'AuthFail', 'Skip.Err', 'Tot.AuthOK', 'Tot.AuthFail'])
+        headers.extend(['AuthOK', 'DataPred', 'AuthFail', 'Skip.Err', 'Tot.AuthOK', 'Tot.DataPred', 'Tot.AuthFail'])
    else:  # is_download_sim or unknown
        headers.extend(['DataOK', 'DownFail', 'Skip.Err', 'Tot.DataOK', 'Tot.DownFail'])
@ -1870,7 +1937,7 @@ def _render_merged_view(auth_manager, download_manager, args, file=sys.stdout):
    dl_success_rate = (dl_stats['total_downloads'] / total_dls * 100) if total_dls > 0 else 100
    global_summary_str = (
-        f"Auth: {total_reqs} reqs ({auth_stats['total_success']} OK, {auth_stats['total_failure']} Fail, {auth_stats['total_tolerated_error']} Tol.Err) | "
+        f"Auth: {total_reqs} reqs ({auth_stats['total_success']} OK, {auth_stats.get('total_predicted_downloads', 0)} Tasks, {auth_stats['total_failure']} Fail, {auth_stats['total_tolerated_error']} Tol.Err) | "
        f"OK Rate: {success_rate:.2f}% | "
        f"Failed Locks: {auth_failed_locks} || "
        f"Download: {total_dls} attempts ({dl_stats['total_downloads']} OK, {dl_stats['total_download_errors']} Fail) | "
@ -1894,6 +1961,7 @@ def _render_merged_view(auth_manager, download_manager, args, file=sys.stdout):
                astats.get('profiles', 0),
                dstats.get('profiles', 0),
                astats.get('success', 0),
                astats.get('predicted_downloads', 0),
                astats.get('failure', 0),
                astats.get('tolerated_error', 0),
                dstats.get('downloads', 0),
@ -1916,7 +1984,7 @@ def _render_merged_view(auth_manager, download_manager, args, file=sys.stdout):
    if all_proxies:
        print("\n--- Per-Proxy Stats (Merged) ---", file=file)
-        proxy_headers = ['Proxy', 'State (A/D)', 'Profiles (A)', 'Profiles (D)', 'AuthOK', 'AuthFail', 'Skip.Err(A)', 'DataOK', 'DownFail', 'Skip.Err(D)']
+        proxy_headers = ['Proxy', 'State (A/D)', 'Profiles (A)', 'Profiles (D)', 'AuthOK', 'DataPred', 'AuthFail', 'Skip.Err(A)', 'DataOK', 'DownFail', 'Skip.Err(D)']
        print(tabulate(proxy_table_data, headers=proxy_headers, tablefmt='grid'), file=file)
    print(f"\n--- Auth Simulation Profile Details ({args.auth_env}) ---", file=file)
--- a/ytops_client-source/ytops_client/profile_statemachine.py
+++ b/ytops_client-source/ytops_client/profile_statemachine.py
@ -51,10 +51,31 @@ class ProfileStateMachine(StateMachine):
    pause = active.to(paused) | locked.to(paused) | resting.to(paused) | cooldown.to(paused)
-    def __init__(self, manager: ProfileManager, profile_name: str, *args, **kwargs):
+    def __init__(self, manager: ProfileManager, profile_name: str, initial_value=None):
        self.manager = manager
        self.profile_name = profile_name
-        super().__init__(*args, **kwargs)
+        # Call parent constructor with model=self
        super().__init__(model=self)
        # If initial_value is provided, set the current state without triggering transitions
        if initial_value is not None:
            # Convert to uppercase to match state values
            initial_value = initial_value.upper()
            # Check if we're not already in this state
            # Compare case-insensitively to handle any discrepancies
            if self.current_state.value.upper() != initial_value:
                # Find the corresponding state object
                target_state = None
                for state in self.states:
                    # Compare both .value and .id case-insensitively
                    if state.value.upper() == initial_value or (hasattr(state, 'id') and state.id.upper() == initial_value):
                        target_state = state
                        break
                if target_state:
                    # Set current state without triggering transitions
                    self.current_state = target_state
                else:
                    # If state not found, log a warning but don't crash
                    logger.warning(f"Could not find state '{initial_value}' (case-insensitive) for profile '{profile_name}'. Keeping current state '{self.current_state.value}'.")
    # --- Action Methods ---
--- a/ytops_client-source/ytops_client/queue_manager_tool.py
+++ b/ytops_client-source/ytops_client/queue_manager_tool.py
@ -70,12 +70,14 @@ class QueueManager:
        """Returns the number of items in a queue."""
        return self.redis.llen(queue_name)
-    def populate(self, queue_name: str, file_path: str) -> int:
+    def push_from_file(self, queue_name: str, file_path: str, wrap_key: Optional[str] = None) -> int:
-        """Populates a queue from a file (text with one item per line, or JSON with an array of strings)."""
+        """Populates a queue from a file (text with one item per line, or JSON with an array of items)."""
        count = 0
        if file_path.lower().endswith('.json'):
-            logger.info("Detected JSON file. Attempting to parse as an array of strings.")
+            if wrap_key:
                logger.warning("--wrap-file-line-in-json is ignored for JSON files, as they are expected to contain complete items.")
            logger.info("Detected JSON file. Attempting to parse as an array of items.")
            try:
                with open(file_path, 'r', encoding='utf-8') as f:
                    data = json.load(f)
@ -83,13 +85,21 @@ class QueueManager:
                    logger.error("JSON file must contain a list/array.")
                    return 0
-                items_to_add = [str(item).strip() for item in data if str(item).strip()]
+                # Items can be strings or objects. If objects, they should be converted to JSON strings.
                items_to_add = []
                for item in data:
                    if isinstance(item, str):
                        items_to_add.append(item.strip())
                    else:
                        items_to_add.append(json.dumps(item))
                items_to_add = [item for item in items_to_add if item]
                pipe = self.redis.pipeline()
                for item in items_to_add:
                    pipe.rpush(queue_name, item)
                    count += 1
-                    if count % 1000 == 0:
+                    if count > 0 and count % 1000 == 0:
                        pipe.execute()
                        logger.info(f"Pushed {count} items...")
                pipe.execute()
@ -105,9 +115,13 @@ class QueueManager:
                    for line in f:
                        item = line.strip()
                        if item:
-                            pipe.rpush(queue_name, item)
+                            if wrap_key:
                                payload = json.dumps({wrap_key: item})
                            else:
                                payload = item
                            pipe.rpush(queue_name, payload)
                            count += 1
-                            if count % 1000 == 0:
+                            if count > 0 and count % 1000 == 0:
                                pipe.execute()
                                logger.info(f"Pushed {count} items...")
                    pipe.execute()
@ -118,6 +132,45 @@ class QueueManager:
        logger.info(f"Finished. Pushed a total of {count} items to '{queue_name}'.")
        return count
    def push_generated(self, queue_name: str, prefix: str, count: int) -> int:
        """Pushes generated payloads to a queue."""
        from datetime import datetime
        timestamp = datetime.now().strftime('%Y%m%dt%H%M')
        pipe = self.redis.pipeline()
        pushed_count = 0
        for i in range(count):
            generated_value = f"{prefix}_{timestamp}_{i:04d}"
            payload = json.dumps({"url": generated_value})
            pipe.rpush(queue_name, payload)
            pushed_count += 1
            if pushed_count > 0 and pushed_count % 1000 == 0:
                pipe.execute()
                logger.info(f"Pushed {pushed_count} of {count} items...")
        pipe.execute()
        logger.info(f"Finished. Pushed a total of {pushed_count} items to '{queue_name}'.")
        return pushed_count
    def push_static(self, queue_name: str, payload: str, count: int) -> int:
        """Pushes a static payload multiple times to a queue."""
        try:
            json.loads(payload)
        except json.JSONDecodeError:
            logger.error(f"Invalid JSON in --payload-json: {payload}")
            return 0
        pipe = self.redis.pipeline()
        pushed_count = 0
        for _ in range(count):
            pipe.rpush(queue_name, payload)
            pushed_count += 1
            if pushed_count > 0 and pushed_count % 1000 == 0:
                pipe.execute()
                logger.info(f"Pushed {pushed_count} of {count} items...")
        pipe.execute()
        logger.info(f"Finished. Pushed a total of {pushed_count} items to '{queue_name}'.")
        return pushed_count
    def clear(self, queue_name: str, dump_path: Optional[str] = None) -> int:
        """Clears a queue, optionally dumping its contents to a file."""
        size = self.redis.llen(queue_name)
@ -174,10 +227,17 @@ def add_queue_manager_parser(subparsers):
    peek_parser.add_argument('queue_name', nargs='?', help="Name of the queue. Defaults to '<env>_stress_inbox'.")
    peek_parser.add_argument('--count', type=int, default=10, help='Number of items to show (default: 10)')
-    # Populate command
+    # Push command
-    populate_parser = subparsers.add_parser('populate', help='Populate a queue from a file (one item per line).', parents=[common_parser])
+    push_parser = subparsers.add_parser('push', help='Push items to a queue from a file, a generator, or a static payload.', parents=[common_parser])
-    populate_parser.add_argument('file_path', help='Path to the file containing items to add.')
+    push_parser.add_argument('queue_name', nargs='?', help="Name of the queue. Defaults to '<env>_stress_inbox'.")
-    populate_parser.add_argument('--queue-name', help="Name of the queue to populate. Defaults to '<env>_stress_inbox'.")
+    push_parser.add_argument('--count', type=int, default=1, help='Number of items to push (for --payload-json or --generate-payload-prefix).')
    source_group = push_parser.add_mutually_exclusive_group(required=True)
    source_group.add_argument('--from-file', dest='file_path', help='Path to a file containing items to add (one per line, or a JSON array).')
    source_group.add_argument('--payload-json', help='A static JSON payload to push. Use with --count to push multiple times.')
    source_group.add_argument('--generate-payload-prefix', help='Generate JSON payloads with a timestamp and counter. Example: {"url": "PREFIX_yyyymmddthhmm_0001"}. Use with --count.')
    push_parser.add_argument('--wrap-file-line-in-json', metavar='KEY', help="For text files (--from-file), wrap each line in a JSON object with the specified key (e.g., 'url' -> {\"url\": \"line_content\"}).")
    # Clear command
    clear_parser = subparsers.add_parser('clear', help='Clear a queue, optionally dumping its contents.', parents=[common_parser])
@ -215,9 +275,9 @@ def main_queue_manager(args):
    )
    # For commands that operate on a single queue, set a default name based on the environment if not provided.
-    is_single_queue_command = args.queue_command in ['peek', 'populate', 'clear']
+    is_single_queue_command = args.queue_command in ['peek', 'push', 'clear']
    if is_single_queue_command:
-        # `populate` uses an option (--queue-name), while `peek` and `clear` use a positional argument.
+        # `push`, `peek` and `clear` use a positional argument for queue_name.
        # We check for `queue_name` attribute and if it's falsy (None or empty string).
        if not getattr(args, 'queue_name', None):
            default_queue_name = f"{args.env}_stress_inbox"
@ -244,11 +304,21 @@ def main_queue_manager(args):
            print(f"{i+1: >3}: {item}")
        return 0
-    elif args.queue_command == 'populate':
+    elif args.queue_command == 'push':
        if args.file_path:
            if not os.path.exists(args.file_path):
                print(f"Error: File not found at '{args.file_path}'", file=sys.stderr)
                return 1
-        manager.populate(args.queue_name, args.file_path)
+            if args.count > 1:
                logger.warning("--count is ignored when using --from-file.")
            manager.push_from_file(args.queue_name, args.file_path, args.wrap_file_line_in_json)
        elif args.payload_json:
            manager.push_static(args.queue_name, args.payload_json, args.count)
        elif args.generate_payload_prefix:
            if args.count <= 0:
                print("Error: --count must be 1 or greater for --generate-payload-prefix.", file=sys.stderr)
                return 1
            manager.push_generated(args.queue_name, args.generate_payload_prefix, args.count)
        return 0
    elif args.queue_command == 'clear':
--- a/ytops_client-source/ytops_client/requirements.txt
+++ b/ytops_client-source/ytops_client/requirements.txt
@ -11,6 +11,9 @@ aria2p
 # For reading .env files for configuration
 python-dotenv==1.0.1
 # For 'direct_docker_cli' orchestration mode in stress-policy
 docker
 # For SOCKS proxy support in client tools
 PySocks
--- a/ytops_client-source/ytops_client/stress_policy/arg_parser.py
+++ b/ytops_client-source/ytops_client/stress_policy/arg_parser.py
@ -167,7 +167,7 @@ Overridable Policy Parameters via --set:
    parser.add_argument('--list-policies', action='store_true', help='List all available policies from the default policies directory and exit.')
    parser.add_argument('--show-overrides', action='store_true', help='Load the specified policy and print all its defined values as a single-line of --set arguments, then exit.')
    parser.add_argument('--set', action='append', default=[], help="Override a policy setting using 'key.subkey=value' format.\n(e.g., --set execution_control.workers=5)")
-    parser.add_argument('--profile-prefix', help="Shortcut to override the profile prefix for profile locking mode. Affects both auth and download stages.")
+    parser.add_argument('--profile-prefix', '--user-prefix', dest='profile_prefix', help="Shortcut to override the profile prefix for profile locking mode. Affects both auth and download stages. Can be a comma-separated list.")
    parser.add_argument('--start-from-url-index', type=int, help='Start processing from this line number (1-based) in the urls_file. Overrides saved state.')
    parser.add_argument('--expire-time-shift-minutes', type=int, help="Consider URLs expiring in N minutes as expired. Overrides policy.")
--- a/ytops_client-source/ytops_client/stress_policy/direct_batch_worker.py
+++ b/ytops_client-source/ytops_client/stress_policy/direct_batch_worker.py
@ -53,6 +53,7 @@ def run_direct_batch_worker(worker_id, policy, state_manager, args, profile_mana
    os.makedirs(save_dir, exist_ok=True)
    last_used_profile_name = None
    last_no_task_log_msg = ""
    while not state_manager.shutdown_event.is_set():
        locked_profile = None
        temp_batch_file = None
@ -93,13 +94,24 @@ def run_direct_batch_worker(worker_id, policy, state_manager, args, profile_mana
                if profiles_in_prefix:
                    state_counts = collections.Counter(p['state'] for p in profiles_in_prefix)
                    states_summary = ', '.join(f"{count} {state}" for state, count in sorted(state_counts.items()))
-                    logger.info(f"[Worker {worker_id}] No auth profiles available to lock. Pool status ({profile_prefix}*): {states_summary}. Pausing for {polling_interval}s.")
+                    base_log_msg = f"[Worker {worker_id}] No auth profiles available to lock. Pool status ({profile_prefix}*): {states_summary}."
                else:
-                    logger.info(f"[Worker {worker_id}] No auth profiles available to lock. No profiles found with prefix '{profile_prefix}'. Pausing for {polling_interval}s.")
+                    base_log_msg = f"[Worker {worker_id}] No auth profiles available to lock. No profiles found with prefix '{profile_prefix}'."
                if base_log_msg == last_no_task_log_msg:
                    print(".", end="", file=sys.stderr, flush=True)
                else:
                    if last_no_task_log_msg:
                        print(file=sys.stderr)
                    logger.info(f"{base_log_msg} Pausing for {polling_interval}s.")
                    last_no_task_log_msg = base_log_msg
                # --- End diagnostic logging ---
                time.sleep(polling_interval)
                continue
            if last_no_task_log_msg:
                print(file=sys.stderr)
            last_no_task_log_msg = ""
            profile_name = locked_profile['name']
            proxy_url = locked_profile['proxy']
--- a/ytops_client-source/ytops_client/stress_policy/direct_docker_worker.py
+++ b/ytops_client-source/ytops_client/stress_policy/direct_docker_worker.py
@ -113,6 +113,7 @@ def run_direct_docker_worker(worker_id, policy, state_manager, args, profile_man
        return []
    last_used_profile_name = None
    last_no_task_log_msg = ""
    while not state_manager.shutdown_event.is_set():
        locked_profile = None
        temp_task_dir_host = None
@ -120,11 +121,25 @@ def run_direct_docker_worker(worker_id, policy, state_manager, args, profile_man
        # --- Variables for robust finalization ---
        url_batch_len = 0
        batch_started = False
-        downloads_per_url = 0 # Default to 0, meaning no increment unless configured
+        num_formats_per_url = 0
        downloads_to_increment = 0
        # ---
        try:
-            # 1. Lock a profile
+            # 1. Lock a profile, trying any of the specified prefixes
-            locked_profile = profile_manager_instance.lock_profile(owner=owner_id, profile_prefix=profile_prefix)
+            locked_profile = None
            prefixes_to_try = []
            if profile_prefix:
                prefixes_to_try = [p.strip() for p in profile_prefix.split(',') if p.strip()]
            if prefixes_to_try:
                random.shuffle(prefixes_to_try)
                for prefix in prefixes_to_try:
                    locked_profile = profile_manager_instance.lock_profile(owner=owner_id, profile_prefix=prefix)
                    if locked_profile:
                        break
            else:
                # Fallback for empty/no prefix, which means lock any available profile
                locked_profile = profile_manager_instance.lock_profile(owner=owner_id, profile_prefix=None)
            # --- New logic to avoid immediate reuse ---
            avoid_reuse = direct_policy.get('avoid_immediate_profile_reuse', False)
@ -135,9 +150,21 @@ def run_direct_docker_worker(worker_id, policy, state_manager, args, profile_man
                wait_seconds = direct_policy.get('avoid_reuse_max_wait_seconds', 5)
                time.sleep(wait_seconds)
-                # After waiting, try to lock again.
+                # After waiting, try to lock again, from any of the available prefixes
                logger.info(f"[Worker {worker_id}] Attempting to lock a new profile after waiting.")
-                locked_profile = profile_manager_instance.lock_profile(owner=owner_id, profile_prefix=profile_prefix)
+                locked_profile = None
                prefixes_to_try = []
                if profile_prefix:
                    prefixes_to_try = [p.strip() for p in profile_prefix.split(',') if p.strip()]
                if prefixes_to_try:
                    random.shuffle(prefixes_to_try)
                    for prefix in prefixes_to_try:
                        locked_profile = profile_manager_instance.lock_profile(owner=owner_id, profile_prefix=prefix)
                        if locked_profile:
                            break
                else:
                    locked_profile = profile_manager_instance.lock_profile(owner=owner_id, profile_prefix=None)
                if locked_profile and locked_profile['name'] == last_used_profile_name:
                    logger.warning(f"[Worker {worker_id}] Still locking the same profile '{locked_profile['name']}' after waiting. Proceeding to use it to avoid getting stuck.")
@ -153,13 +180,24 @@ def run_direct_docker_worker(worker_id, policy, state_manager, args, profile_man
                if profiles_in_prefix:
                    state_counts = collections.Counter(p['state'] for p in profiles_in_prefix)
                    states_summary = ', '.join(f"{count} {state}" for state, count in sorted(state_counts.items()))
-                    logger.info(f"[Worker {worker_id}] No auth profiles available to lock. Pool status ({profile_prefix or '*'}*): {states_summary}. Pausing for {polling_interval}s.")
+                    base_log_msg = f"[Worker {worker_id}] No auth profiles available to lock. Pool status ({profile_prefix or '*'}*): {states_summary}."
                else:
-                    logger.info(f"[Worker {worker_id}] No auth profiles available to lock. No profiles found with prefix '{profile_prefix or '*'}'. Pausing for {polling_interval}s.")
+                    base_log_msg = f"[Worker {worker_id}] No auth profiles available to lock. No profiles found with prefix '{profile_prefix or '*'}'."
                if base_log_msg == last_no_task_log_msg:
                    print(".", end="", file=sys.stdout, flush=True)
                else:
                    if last_no_task_log_msg:
                        print(file=sys.stdout)
                    logger.info(f"{base_log_msg} Pausing for {polling_interval}s.")
                    last_no_task_log_msg = base_log_msg
                # --- End diagnostic logging ---
                time.sleep(polling_interval)
                continue
            if last_no_task_log_msg:
                print(file=sys.stderr)
            last_no_task_log_msg = ""
            profile_name = locked_profile['name']
            proxy_url = locked_profile['proxy']
@ -237,24 +275,33 @@ def run_direct_docker_worker(worker_id, policy, state_manager, args, profile_man
            url_batch_len = len(url_batch)
            batch_started = True
-            # --- Calculate how many download tasks will be generated ---
+            # --- Calculate how many download tasks will be generated to set the counter ---
            # The "pending downloads" counter for an auth profile tracks the number of
            # info.json files it has generated that are waiting to be processed.
            # Each successful URL fetch creates one info.json file (one task).
            # The number of actual media files downloaded from that info.json is
            # irrelevant to the auth profile's counter.
            downloads_per_url = 0 # Default to 0, meaning no increment unless configured
            downloads_per_url_config = gen_policy.get('downloads_per_url')
-            if downloads_per_url_config:
+            num_formats_per_url = 0 # Default to no increment
                downloads_per_url = 1 # We just need a flag to enable the logic, the count is per-URL.
-            if downloads_per_url > 0:
+            if downloads_per_url_config:
-                # Increment by the number of URLs in the batch.
+                if isinstance(downloads_per_url_config, int):
-                downloads_to_increment = url_batch_len
+                    num_formats_per_url = downloads_per_url_config
-                profile_manager_instance.increment_pending_downloads(profile_name, downloads_to_increment)
+                elif downloads_per_url_config == 'from_download_policy':
-                logger.info(f"[Worker {worker_id}] [{profile_name}] Preemptively incremented pending downloads by {downloads_to_increment} for the upcoming batch of {url_batch_len} URLs.")
+                    # Heuristic: count comma-separated groups in the format selector.
                    # This mirrors how yt-dlp processes multiple format downloads.
                    d_policy = policy.get('download_policy', {})
                    formats_str = d_policy.get('formats', '')
                    if formats_str:
                        # Each comma separates a group from which one format is downloaded.
                        num_formats_per_url = formats_str.count(',') + 1
                    else:
-                logger.warning(f"[Worker {worker_id}] [{profile_name}] 'downloads_per_url' is not configured. Pending downloads counter will not be incremented for this batch.")
+                        num_formats_per_url = 1 # fallback to 1 if formats is empty
                elif downloads_per_url_config:
                    # Fallback for non-int, non-'from_download_policy' truthy values
                    num_formats_per_url = 1
            if num_formats_per_url > 0:
                downloads_to_increment = url_batch_len * num_formats_per_url
                profile_manager_instance.increment_pending_downloads(profile_name, downloads_to_increment)
                logger.info(f"[Worker {worker_id}] [{profile_name}] Preemptively incremented pending downloads by {downloads_to_increment} for a batch of {url_batch_len} URLs ({num_formats_per_url} format(s)/URL).")
            else:
                 logger.warning(f"[Worker {worker_id}] [{profile_name}] 'downloads_per_url' is not configured or is zero. Pending downloads counter will not be incremented for this batch.")
            end_idx = start_idx + len(url_batch)
            logger.info(f"[Worker {worker_id}] [{profile_name}] Processing batch of {len(url_batch)} URLs (lines {start_idx + 1}-{end_idx} from source).")
@ -480,14 +527,15 @@ def run_direct_docker_worker(worker_id, policy, state_manager, args, profile_man
                        logger.error(f"Error during immediate post-processing from log line: {e}")
                    with activity_lock:
                        is_dummy = args.dummy or args.dummy_batch
                        if post_processed_successfully:
                            live_success_count += 1
                            logger.info(f"[Worker {worker_id}] [{profile_name}] Live success #{live_success_count} detected and post-processed.")
-                            profile_manager_instance.record_activity(profile_name, 'success')
+                            profile_manager_instance.record_activity(profile_name, 'success', is_dummy=is_dummy)
                        else:
                            live_failure_count += 1
                            logger.error(f"[Worker {worker_id}] [{profile_name}] Post-processing failed for a successful fetch. Recording as failure.")
-                            profile_manager_instance.record_activity(profile_name, 'failure')
+                            profile_manager_instance.record_activity(profile_name, 'failure', is_dummy=is_dummy)
                    # --- End immediate post-processing ---
                    return False
@ -495,9 +543,10 @@ def run_direct_docker_worker(worker_id, policy, state_manager, args, profile_man
                for pattern in fatal_error_patterns:
                    if re.search(pattern, line, re.IGNORECASE):
                        with activity_lock:
                            is_dummy = args.dummy or args.dummy_batch
                            live_failure_count += 1
                            logger.error(f"[Worker {worker_id}] [{profile_name}] Live FATAL error #{live_failure_count} detected from log: {line}")
-                            profile_manager_instance.record_activity(profile_name, 'failure')
+                            profile_manager_instance.record_activity(profile_name, 'failure', is_dummy=is_dummy)
                            if direct_policy.get('ban_on_fatal_error_in_batch'):
                                logger.warning(f"Banning profile '{profile_name}' immediately due to fatal error to stop container.")
                                profile_manager_instance.update_profile_state(profile_name, 'BANNED', 'Fatal error during batch')
@ -512,16 +561,18 @@ def run_direct_docker_worker(worker_id, policy, state_manager, args, profile_man
                for pattern in tolerated_error_patterns:
                    if re.search(pattern, line, re.IGNORECASE):
                        with activity_lock:
                            is_dummy = args.dummy or args.dummy_batch
                            live_tolerated_count += 1
                            logger.warning(f"[Worker {worker_id}] [{profile_name}] Live TOLERATED error #{live_tolerated_count} detected from log: {line}")
-                            profile_manager_instance.record_activity(profile_name, 'tolerated_error')
+                            profile_manager_instance.record_activity(profile_name, 'tolerated_error', is_dummy=is_dummy)
                        return False
                # If it's an ERROR: line and not tolerated, it's a failure
                with activity_lock:
                    is_dummy = args.dummy or args.dummy_batch
                    live_failure_count += 1
                    logger.warning(f"[Worker {worker_id}] [{profile_name}] Live failure #{live_failure_count} detected from log: {line}")
-                    profile_manager_instance.record_activity(profile_name, 'failure')
+                    profile_manager_instance.record_activity(profile_name, 'failure', is_dummy=is_dummy)
                return False
@ -544,14 +595,14 @@ def run_direct_docker_worker(worker_id, policy, state_manager, args, profile_man
                    rand_val = random.random()
                    if rand_val < auth_skipped_rate:
                        logger.warning(f"[Worker {worker_id}] [{profile_name}] DUMMY: Simulating tolerated failure for {video_id}.")
-                        profile_manager_instance.record_activity(profile_name, 'tolerated_error')
+                        profile_manager_instance.record_activity(profile_name, 'tolerated_error', is_dummy=True)
                    elif rand_val < (auth_skipped_rate + auth_failure_rate):
                        logger.warning(f"[Worker {worker_id}] [{profile_name}] DUMMY: Simulating fatal failure for {video_id}.")
-                        profile_manager_instance.record_activity(profile_name, 'failure')
+                        profile_manager_instance.record_activity(profile_name, 'failure', is_dummy=True)
                    else:
                        # Success
                        files_created += 1
-                        profile_manager_instance.record_activity(profile_name, 'success')
+                        profile_manager_instance.record_activity(profile_name, 'success', is_dummy=True)
                        # Create a dummy file in the temp output dir to simulate success
                        dummy_output_path = Path(temp_task_dir_host) / f"{video_id}.info.json"
@ -644,13 +695,14 @@ def run_direct_docker_worker(worker_id, policy, state_manager, args, profile_man
        finally:
            if locked_profile and batch_started:
                # --- Reconcile pending downloads counter ---
-                if downloads_per_url > 0:
+                if downloads_to_increment > 0:
-                    # We incremented by url_batch_len at the start.
+                    # We incremented at the start. Now we adjust for any URLs that failed to produce an info.json.
-                    # Now we adjust for any URLs that failed to produce an info.json.
+                    initial_increment = downloads_to_increment
-                    # The adjustment is the number of successes minus the number of attempts.
+                    actual_successes = live_success_count # This is count of successful info.jsons
-                    initial_increment = url_batch_len
+                    
-                    actual_successes = live_success_count
+                    expected_downloads_from_successes = actual_successes * num_formats_per_url
-                    adjustment = actual_successes - initial_increment
+                    
                    adjustment = expected_downloads_from_successes - initial_increment
                    if adjustment != 0:
                        # The adjustment will be negative, effectively decrementing the counter for each failure.
@ -680,10 +732,21 @@ def run_direct_docker_worker(worker_id, policy, state_manager, args, profile_man
                if cooldown:
                    logger.info(f"[Worker {worker_id}] Putting profile '{locked_profile['name']}' into COOLDOWN for {cooldown}s.")
                # For auth simulation, check if profile has pending downloads
                # If so, don't apply cooldown to avoid conflicting with enforcer
                profile_data = profile_manager_instance.get_profile(locked_profile['name'])
                should_apply_cooldown = cooldown
                if profile_data:
                    pending_downloads = profile_manager_instance.get_pending_downloads(locked_profile['name'])
                    rest_reason = profile_data.get('rest_reason')
                    if pending_downloads > 0 or rest_reason == 'waiting_downloads':
                        should_apply_cooldown = None
                        logger.info(f"[Worker {worker_id}] Auth profile '{locked_profile['name']}' has pending downloads or is waiting for downloads. Not applying cooldown.")
                unlocked_successfully = profile_manager_instance.unlock_profile(
                    locked_profile['name'],
                    owner=owner_id,
-                    rest_for_seconds=cooldown
+                    rest_for_seconds=should_apply_cooldown
                )
                if not unlocked_successfully:
                    logger.error(f"[Worker {worker_id}] FAILED to unlock profile '{locked_profile['name']}'. The profile may be stuck.")
@ -751,7 +814,7 @@ def run_direct_docker_download_worker(worker_id, policy, state_manager, args, pr
            return []
        no_task_streak = 0
-        last_used_profile_name = None
+        last_no_task_log_msg = ""
        task_counter = 0
        while not state_manager.shutdown_event.is_set():
            locked_profile = None
@ -760,6 +823,7 @@ def run_direct_docker_download_worker(worker_id, policy, state_manager, args, pr
            was_banned_by_parser = False
            task = None
            task_id = None
            downloads_processed_in_task = 0
            try:
                if no_task_streak > 0 and not queue_policy: # Polling only makes sense for file mode
                    polling_interval = exec_control.get('worker_polling_interval_seconds', 1)
@ -769,9 +833,18 @@ def run_direct_docker_download_worker(worker_id, policy, state_manager, args, pr
                    if profiles_in_prefix:
                        state_counts = collections.Counter(p['state'] for p in profiles_in_prefix)
                        states_summary = ', '.join(f"{count} {state}" for state, count in sorted(state_counts.items()))
-                        logger.info(f"[Worker {worker_id}] No tasks found or profiles available. Pool status ({profile_prefix or '*'}*): {states_summary}. Pausing for {polling_interval}s. (Streak: {no_task_streak})")
+                        base_log_msg = f"[Worker {worker_id}] No tasks found or profiles available. Pool status ({profile_prefix or '*'}*): {states_summary}."
                    else:
-                        logger.info(f"[Worker {worker_id}] No tasks found or profiles available. No profiles found with prefix '{profile_prefix or '*'}'. Pausing for {polling_interval}s. (Streak: {no_task_streak})")
+                        base_log_msg = f"[Worker {worker_id}] No tasks found or profiles available. No profiles found with prefix '{profile_prefix or '*'}'."
                    if base_log_msg == last_no_task_log_msg:
                        print(".", end="", file=sys.stdout, flush=True)
                    else:
                        if last_no_task_log_msg:
                            print(file=sys.stdout)
                        full_log_msg = f"{base_log_msg} Pausing for {polling_interval}s. (Streak: {no_task_streak})"
                        logger.info(full_log_msg)
                        last_no_task_log_msg = base_log_msg
                    # --- End diagnostic logging ---
                    time.sleep(polling_interval)
                    if state_manager.shutdown_event.is_set(): continue
@ -844,17 +917,27 @@ def run_direct_docker_download_worker(worker_id, policy, state_manager, args, pr
                if claimed_task_path_host:
                    no_task_streak = 0
                    if last_no_task_log_msg:
                        print(file=sys.stderr)
                    last_no_task_log_msg = ""
                    auth_profile_name, auth_env = None, None
                    info_data = None
-                    # --- Read info.json content and metadata first ---
+                    # In queue mode, the task object is the primary source of truth for auth metadata.
                    # This check is conditional because in file-based mode, `task` will be None.
                    if task:
                        auth_profile_name = task.get('auth_profile_name')
                        auth_env = task.get('auth_env')
                    # --- Read info.json content and use its metadata as a fallback ---
                    try:
                        with open(claimed_task_path_host, 'r', encoding='utf-8') as f:
                            info_data = json.load(f)
-                        # This is critical for decrementing the counter in the finally block
+                        # Fallback to info.json metadata if not present in the task object.
                        if not auth_profile_name or not auth_env:
                            metadata = info_data.get('_ytops_metadata', {})
-                        auth_profile_name = metadata.get('profile_name')
+                            auth_profile_name = auth_profile_name or metadata.get('profile_name')
-                        auth_env = metadata.get('auth_env')
+                            auth_env = auth_env or metadata.get('auth_env')
                    except (IOError, json.JSONDecodeError) as e:
                        logger.error(f"CRITICAL: Could not read or parse task file '{claimed_task_path_host.name}': {e}. This task will be skipped, but the pending downloads counter CANNOT be decremented.")
                        continue # Skip to finally block to unlock profile
@ -874,7 +957,12 @@ def run_direct_docker_download_worker(worker_id, policy, state_manager, args, pr
                        source_of_format = "task file"
                        if not format_selection:
                            format_selection = d_policy.get('formats', '')
-                            source_of_format = "policy"
+                            source_of_format = "policy (download_policy.formats)"
                        if not format_selection:
                            ytdlp_config_overrides = direct_policy.get('ytdlp_config_overrides', {})
                            format_selection = ytdlp_config_overrides.get('format', '')
                            source_of_format = "policy (ytdlp_config_overrides.format)"
                        if not format_selection:
                            logger.warning(f"[Worker {worker_id}] DUMMY: No format specified in task file or policy. Simulating a single download.")
@ -907,17 +995,18 @@ def run_direct_docker_download_worker(worker_id, policy, state_manager, args, pr
                                details = f"Dummy skipped failure for format {format_id}"
                                error_type = "DummySkippedFailure"
                                is_tolerated_error = True
-                                profile_manager_instance.record_activity(profile_name, 'tolerated_error')
+                                profile_manager_instance.record_activity(profile_name, 'tolerated_error', is_dummy=True)
                            elif should_fail_fatal:
                                logger.warning(f"[Worker {worker_id}] DUMMY: Simulating fatal download failure for format '{format_id}'.")
                                details = f"Dummy fatal failure for format {format_id}"
                                error_type = "DummyFailure"
-                                profile_manager_instance.record_activity(profile_name, 'download_error')
+                                profile_manager_instance.record_activity(profile_name, 'download_error', is_dummy=True)
                            else:
                                logger.info(f"[Worker {worker_id}] DUMMY: Simulating download success for format '{format_id}'.")
                                success = True
                                details = f"Dummy success for format {format_id}"
-                                profile_manager_instance.record_activity(profile_name, 'download')
+                                profile_manager_instance.record_activity(profile_name, 'download', is_dummy=True)
                                downloads_processed_in_task += 1
                            event = {
                                'type': 'direct_docker_download',
@ -963,7 +1052,7 @@ def run_direct_docker_download_worker(worker_id, policy, state_manager, args, pr
                                else:
                                    logger.warning(f"[Worker {worker_id}] [{profile_name}] Skipping task '{claimed_task_path_host.name}' because its URL is expired.")
-                                profile_manager_instance.record_activity(profile_name, 'tolerated_error')
+                                profile_manager_instance.record_activity(profile_name, 'tolerated_error', is_dummy=(args.dummy or args.dummy_batch))
                                event = {
                                    'type': 'direct_docker_download', 'profile': profile_name,
@ -1066,23 +1155,23 @@ def run_direct_docker_download_worker(worker_id, policy, state_manager, args, pr
                    def log_parser_callback(line):
                        nonlocal live_success_count, live_failure_count, live_tolerated_count, was_banned_by_parser
-                        # Success is a high-priority check. Only record one success per task.
+                        # Success is a high-priority check.
                        if '[download] 100% of' in line or 'has already been downloaded' in line:
                            with activity_lock:
-                                # Only count one success per task
+                                is_dummy = args.dummy or args.dummy_batch
                                if live_success_count == 0:
                                live_success_count += 1
-                                    logger.info(f"[Worker {worker_id}] [{profile_name}] Live download success detected from log.")
+                                logger.info(f"[Worker {worker_id}] [{profile_name}] Live download success #{live_success_count} detected from log.")
-                                    profile_manager_instance.record_activity(profile_name, 'download')
+                                profile_manager_instance.record_activity(profile_name, 'download', is_dummy=is_dummy)
                            return False
                        # Check for fatal patterns
                        for pattern in fatal_error_patterns:
                            if re.search(pattern, line, re.IGNORECASE):
                                with activity_lock:
                                    is_dummy = args.dummy or args.dummy_batch
                                    live_failure_count += 1
                                    logger.error(f"[Worker {worker_id}] [{profile_name}] Live FATAL download error #{live_failure_count} detected from log: {line}")
-                                    profile_manager_instance.record_activity(profile_name, 'download_error')
+                                    profile_manager_instance.record_activity(profile_name, 'download_error', is_dummy=is_dummy)
                                    if direct_policy.get('ban_on_fatal_error_in_batch'):
                                        logger.warning(f"Banning profile '{profile_name}' immediately due to fatal download error to stop container.")
                                        profile_manager_instance.update_profile_state(profile_name, 'BANNED', 'Fatal error during download')
@ -1098,16 +1187,18 @@ def run_direct_docker_download_worker(worker_id, policy, state_manager, args, pr
                        for pattern in tolerated_error_patterns:
                            if re.search(pattern, line, re.IGNORECASE):
                                with activity_lock:
                                    is_dummy = args.dummy or args.dummy_batch
                                    live_tolerated_count += 1
                                    logger.warning(f"[Worker {worker_id}] [{profile_name}] Live TOLERATED download error #{live_tolerated_count} detected from log: {line}")
-                                    profile_manager_instance.record_activity(profile_name, 'tolerated_error')
+                                    profile_manager_instance.record_activity(profile_name, 'tolerated_error', is_dummy=is_dummy)
                                return False
                        # If it's an ERROR: line and not tolerated, it's a failure
                        with activity_lock:
                            is_dummy = args.dummy or args.dummy_batch
                            live_failure_count += 1
                            logger.warning(f"[Worker {worker_id}] [{profile_name}] Live download failure #{live_failure_count} detected from log: {line}")
-                            profile_manager_instance.record_activity(profile_name, 'download_error')
+                            profile_manager_instance.record_activity(profile_name, 'download_error', is_dummy=is_dummy)
                        return False
@ -1149,11 +1240,11 @@ def run_direct_docker_download_worker(worker_id, policy, state_manager, args, pr
                            final_outcome = "download"
                            logger.warning(f"[Worker {worker_id}] [{profile_name}] No specific success/error log line matched, but exit code is 0. Assuming success, but this may indicate a parsing issue.")
                            # We record a success here as a fallback, in case the log parser missed it.
-                            profile_manager_instance.record_activity(profile_name, 'download')
+                            profile_manager_instance.record_activity(profile_name, 'download', is_dummy=(args.dummy or args.dummy_batch))
                        else:
                            final_outcome = "download_error"
                            logger.warning(f"[Worker {worker_id}] [{profile_name}] No specific error log line matched, but exit code was {retcode}. Recording a generic download_error.")
-                            profile_manager_instance.record_activity(profile_name, 'download_error')
+                            profile_manager_instance.record_activity(profile_name, 'download_error', is_dummy=(args.dummy or args.dummy_batch))
                    # --- Airflow Directory Logic ---
                    if success and d_policy.get('output_to_airflow_ready_dir'):
@ -1231,6 +1322,9 @@ def run_direct_docker_download_worker(worker_id, policy, state_manager, args, pr
                    event = { 'type': 'direct_docker_download', 'profile': profile_name, 'proxy_url': locked_profile['proxy'], 'success': success, 'details': event_details }
                    state_manager.log_event(event)
                    # Store the number of detected successful downloads to use for decrementing the counter.
                    downloads_processed_in_task = live_success_count
                    logger.info(f"[Worker {worker_id}] [{profile_name}] Task processing complete. Worker will now unlock profile and attempt next task.")
                    # 6. Clean up task file
@ -1274,25 +1368,34 @@ def run_direct_docker_download_worker(worker_id, policy, state_manager, args, pr
                            auth_manager = get_auth_manager(profile_manager_instance, auth_env)
                            if auth_manager:
                                try:
-                                    auth_manager.decrement_pending_downloads(auth_profile_name)
+                                    # Fallback: if no downloads were counted (e.g., due to an early exit or parsing issue),
-                                    logger.info(f"[Worker {worker_id}] Decremented pending downloads for auth profile '{auth_profile_name}' in env '{auth_env}'.")
+                                    # but the task is being finalized, we must assume all potential downloads for this task
                                    # are "processed" to prevent the auth profile from getting stuck.
                                    if downloads_processed_in_task == 0:
                                        logger.warning(f"[Worker {worker_id}] No downloads were counted for this task. Using policy to determine decrement count to avoid stuck profile.")
                                        ytdlp_config_overrides = direct_policy.get('ytdlp_config_overrides', {})
                                        formats_str = ytdlp_config_overrides.get('format', d_policy.get('formats', ''))
                                        num_formats = formats_str.count(',') + 1 if formats_str else 1
                                        downloads_processed_in_task = num_formats
                                        logger.warning(f"[Worker {worker_id}] Decrementing by fallback count: {downloads_processed_in_task}")
                                    if downloads_processed_in_task > 0:
                                        new_count = auth_manager.increment_pending_downloads(auth_profile_name, -downloads_processed_in_task)
                                        logger.info(f"[Worker {worker_id}] Decremented pending downloads for auth profile '{auth_profile_name}' by {downloads_processed_in_task} in env '{auth_env}'. New count: {new_count}")
                                except Exception as e:
                                    logger.error(f"[Worker {worker_id}] Failed to decrement pending downloads for auth profile '{auth_profile_name}' in env '{auth_env}': {e}", exc_info=True)
                            else:
                                logger.error(f"[Worker {worker_id}] Could not get auth profile manager for env '{auth_env}'. Pending downloads counter will not be decremented.")
                        else:
-                            logger.warning(f"[Worker {worker_id}] Could not find auth profile name and/or auth_env in info.json metadata. Pending downloads counter will not be decremented. (Profile: {auth_profile_name}, Env: {auth_env})")
+                            logger.warning(f"[Worker {worker_id}] Could not find auth profile name and/or auth_env. Pending downloads counter will not be decremented. (Profile: {auth_profile_name}, Env: {auth_env}) Task had auth_profile_name: {task.get('auth_profile_name')}, auth_env: {task.get('auth_env')}")
                    if was_banned_by_parser:
                        logger.info(f"[Worker {worker_id}] Profile '{locked_profile['name']}' was already banned by the log parser. Skipping unlock/cooldown.")
                    else:
                        last_used_profile_name = locked_profile['name']
                        cooldown = None
                        # Only apply cooldown if a task was actually claimed and processed.
                        if claimed_task_path_host:
                            # Enforcer is the only point where we configure to apply different policies,
                            # since we might restart enforcer, but won't restart stress-policy working on auth and downloads simultaneously.
                            # This is like applying a policy across multiple workers/machines without needing to restart each of them.
                            # DESIGN: The cooldown duration is not configured in the worker's policy.
                            # Instead, it is read from a central Redis key. This key is set by the
                            # policy-enforcer, making the enforcer the single source of truth for
--- a/ytops_client-source/ytops_client/stress_policy/direct_download_worker.py
+++ b/ytops_client-source/ytops_client/stress_policy/direct_download_worker.py
@ -46,11 +46,14 @@ def run_direct_download_worker(worker_id, policy, state_manager, args, profile_m
    os.makedirs(output_dir, exist_ok=True)
    no_task_streak = 0
    last_no_task_log_msg = ""
    while not state_manager.shutdown_event.is_set():
        locked_profile = None
        claimed_task_path = None
        auth_profile_name, auth_env = None, None # For finally block
        downloads_to_decrement = 0
        formats_from_metadata, granularity_from_metadata = [], 'per_format'
        try:
            # 0. If no tasks were found, pause briefly.
            if no_task_streak > 0:
@ -61,9 +64,19 @@ def run_direct_download_worker(worker_id, policy, state_manager, args, profile_m
                if profiles_in_prefix:
                    state_counts = collections.Counter(p['state'] for p in profiles_in_prefix)
                    states_summary = ', '.join(f"{count} {state}" for state, count in sorted(state_counts.items()))
-                    logger.info(f"[Worker {worker_id}] No tasks found for available profiles. Pool status ({profile_prefix or '*'}*): {states_summary}. Pausing for {polling_interval}s. (Streak: {no_task_streak})")
+                    base_log_msg = f"[Worker {worker_id}] No tasks found for available profiles. Pool status ({profile_prefix or '*'}*): {states_summary}. Pausing for {polling_interval}s."
                else:
-                    logger.info(f"[Worker {worker_id}] No tasks found for available profiles. No profiles found with prefix '{profile_prefix or '*'}'. Pausing for {polling_interval}s. (Streak: {no_task_streak})")
+                    base_log_msg = f"[Worker {worker_id}] No tasks found for available profiles. No profiles found with prefix '{profile_prefix or '*'}'. Pausing for {polling_interval}s."
                if base_log_msg == last_no_task_log_msg:
                    print(".", end="", file=sys.stderr, flush=True)
                else:
                    if last_no_task_log_msg:
                        print(file=sys.stderr)  # Newline to clean up after dots
                    full_log_msg = f"{base_log_msg} (Streak: {no_task_streak})"
                    logger.info(full_log_msg)
                    last_no_task_log_msg = base_log_msg
                # --- End diagnostic logging ---
                time.sleep(polling_interval)
                if state_manager.shutdown_event.is_set(): continue
@ -83,6 +96,9 @@ def run_direct_download_worker(worker_id, policy, state_manager, args, profile_m
            if claimed_task_path:
                no_task_streak = 0 # Reset streak
                if last_no_task_log_msg:
                    print(file=sys.stderr) # Newline to clean up after dots
                last_no_task_log_msg = ""
                # --- Read metadata before processing/deleting file ---
                try:
@ -91,6 +107,18 @@ def run_direct_download_worker(worker_id, policy, state_manager, args, profile_m
                        metadata = info_data.get('_ytops_metadata', {})
                        auth_profile_name = metadata.get('profile_name')
                        auth_env = metadata.get('auth_env')
                        # Use .get() without a default to distinguish "not present" (None) from "present and empty" ([]).
                        raw_formats_from_metadata = metadata.get('formats_requested')
                        granularity_from_metadata = metadata.get('download_task_granularity', 'per_format')
                        # Normalize to a list for simulation logic, while preserving the original raw state for decrement logic.
                        formats_from_metadata = raw_formats_from_metadata
                        if formats_from_metadata is None:
                            formats_from_metadata = []
                        elif isinstance(formats_from_metadata, str):
                            formats_from_metadata = [f.strip() for f in formats_from_metadata.split(',')]
                        elif not isinstance(formats_from_metadata, list):
                            formats_from_metadata = []
                except (IOError, json.JSONDecodeError) as e:
                    logger.error(f"CRITICAL: Could not read or parse task file '{claimed_task_path.name}': {e}. This task will be skipped, but the pending downloads counter CANNOT be decremented.")
                    continue # Skip to finally block to unlock profile
@ -152,10 +180,43 @@ def run_direct_download_worker(worker_id, policy, state_manager, args, profile_m
                logger.info(f"[Worker {worker_id}] [{profile_name}] Processing task '{claimed_task_path.name}'...")
                if args.dummy or args.dummy_batch:
-                    logger.info(f"========== [Worker {worker_id}] BEGIN DUMMY DIRECT DOWNLOAD ==========")
+                    logger.info(f"========== [Worker {worker_id}] BEGIN DUMMY DOCKER DOWNLOAD PER-FORMAT SIMULATION ==========")
                    logger.info(f"[Worker {worker_id}] Profile: {profile_name} | Task: {claimed_task_path.name}")
-                    logger.info(f"[Worker {worker_id}] Would run command: {' '.join(shlex.quote(s) for s in cmd)}")
+                    
-                    logger.info(f"[Worker {worker_id}] With environment: {custom_env}")
+                    formats_to_simulate = []
                    # Prioritize formats from the info.json metadata if the key was present
                    if raw_formats_from_metadata is not None:
                        formats_to_simulate = formats_from_metadata
                        logger.info(f"[Worker {worker_id}] DUMMY: Simulating download for {len(formats_to_simulate)} formats from info.json: {formats_to_simulate}")
                    else:
                        # Fallback to policy file if metadata key is absent
                        formats_config = d_policy.get('formats')
                        if isinstance(formats_config, str):
                            formats_to_simulate = [f.strip() for f in formats_config.split(',')]
                        elif isinstance(formats_config, list):
                            formats_to_simulate = formats_config
                    if not formats_to_simulate and raw_formats_from_metadata is None:
                        logger.info(f"[Worker {worker_id}] DUMMY: No format specified in task file or policy. Simulating a single download for backward compatibility.")
                        formats_to_simulate = ['dummy_format']
                        # For counting purposes, we should still decrement appropriately
                        if granularity_from_metadata == 'per_url':
                            downloads_to_decrement = 1
                        else:
                            downloads_to_decrement = 1
                    # If granularity was 'per_url', we only decrement by 1, even if multiple
                    # format selectors were passed (e.g., as one string).
                    if granularity_from_metadata == 'per_url':
                        downloads_to_decrement = 1
                    else:  # per_format or not specified
                        if raw_formats_from_metadata is not None:
                            # If key was present, trust the count from metadata. Can be 0.
                            downloads_to_decrement = len(formats_from_metadata)
                        else:
                            # Key was absent. Decrement by the number of formats we are simulating
                            # from policy, or 1 as a final fallback for old files.
                            downloads_to_decrement = len(formats_to_simulate) if formats_to_simulate else 1
                    dummy_settings = policy.get('settings', {}).get('dummy_simulation_settings', {})
                    min_seconds = dummy_settings.get('download_min_seconds', 0.5)
@ -163,46 +224,39 @@ def run_direct_download_worker(worker_id, policy, state_manager, args, profile_m
                    failure_rate = dummy_settings.get('download_failure_rate', 0.0)
                    skipped_rate = dummy_settings.get('download_skipped_failure_rate', 0.0)
-                    time.sleep(random.uniform(min_seconds, max_seconds))
+                    for i, format_id in enumerate(formats_to_simulate):
                        logger.info(f"[Worker {worker_id}] DUMMY: Simulating download for format '{format_id}'...")
                        sim_duration = random.uniform(min_seconds, max_seconds)
                        logger.info(f"[Worker {worker_id}] DUMMY: Simulating download for {sim_duration:.2f}s (from policy range {min_seconds}-{max_seconds}s).")
                        time.sleep(sim_duration)
                        rand_val = random.random()
-                    should_fail_skipped = rand_val < skipped_rate
+                        is_skipped = rand_val < skipped_rate
-                    should_fail_fatal = not should_fail_skipped and rand_val < (skipped_rate + failure_rate)
+                        is_fatal = not is_skipped and rand_val < (skipped_rate + failure_rate)
-                    success = False
+                        if is_skipped:
-                    details = ""
+                            logger.warning(f"[Worker {worker_id}] DUMMY: Simulating skipped download for format '{format_id}'.")
-                    error_type = None
+                            profile_manager_instance.record_activity(profile_name, 'tolerated_error', is_dummy=True)
-                    is_tolerated_error = False
+                        elif is_fatal:
                            logger.error(f"[Worker {worker_id}] DUMMY: Simulating fatal download failure for format '{format_id}'.")
                            profile_manager_instance.record_activity(profile_name, 'download_error', is_dummy=True)
                        else: # success
                            logger.info(f"[Worker {worker_id}] DUMMY: Simulating download success for format '{format_id}'.")
                            profile_manager_instance.record_activity(profile_name, 'download', is_dummy=True)
-                    if should_fail_skipped:
+                    logger.info(f"========== [Worker {worker_id}] END DUMMY DOCKER DOWNLOAD SIMULATION ==========")
                        logger.warning(f"[Worker {worker_id}] [{profile_name}] DUMMY: Simulating skipped download failure for task '{claimed_task_path.name}'.")
                        details = "Dummy skipped failure"
                        error_type = "DummySkippedFailure"
                        is_tolerated_error = True
                        profile_manager_instance.record_activity(profile_name, 'tolerated_error')
                    elif should_fail_fatal:
                        logger.warning(f"[Worker {worker_id}] [{profile_name}] DUMMY: Simulating fatal download failure for task '{claimed_task_path.name}'.")
                        details = "Dummy fatal failure"
                        error_type = "DummyFailure"
                        profile_manager_instance.record_activity(profile_name, 'download_error')
                else:
-                        logger.info(f"[Worker {worker_id}] [{profile_name}] DUMMY: Simulating download success for task '{claimed_task_path.name}'.")
+                    # Determine downloads_to_decrement based on metadata
-                        success = True
+                    if granularity_from_metadata == 'per_url':
-                        details = "Dummy success"
+                        downloads_to_decrement = 1
-                        profile_manager_instance.record_activity(profile_name, 'download')
+                    else:  # per_format or not specified
-
+                        if raw_formats_from_metadata is not None:
-                    event = {
+                            # If key was present, trust the count. It can be 0.
-                        'type': 'direct_download',
+                            downloads_to_decrement = len(formats_from_metadata)
                        'profile': profile_name,
                        'proxy_url': locked_profile['proxy'],
                        'success': success,
                        'details': details,
                        'error_type': error_type,
                        'is_tolerated_error': is_tolerated_error
                    }
                    state_manager.log_event(event)
                    logger.info(f"========== [Worker {worker_id}] END DUMMY DIRECT DOWNLOAD ==========")
                        else:
                            # Key was absent, fall back to 1 for old info.jsons
                            downloads_to_decrement = 1
                    # --- Real execution ---
                    logger.info(f"[Worker {worker_id}] [{profile_name}] Running command: {' '.join(shlex.quote(s) for s in cmd)}")
                    retcode, stdout, stderr = run_command(
@ -268,7 +322,13 @@ def run_direct_download_worker(worker_id, policy, state_manager, args, profile_m
                if claimed_task_path and auth_profile_name and auth_env:
                    auth_manager = get_auth_manager(profile_manager_instance, auth_env)
                    if auth_manager:
-                        auth_manager.decrement_pending_downloads(auth_profile_name)
+                        if downloads_to_decrement > 0:
                            auth_manager.increment_pending_downloads(auth_profile_name, count=-downloads_to_decrement)
                        elif downloads_to_decrement == 0:
                            logger.warning(f"[Worker {worker_id}] `downloads_to_decrement` was zero. Pending downloads counter for '{auth_profile_name}' will not be changed.")
                        else:
                            # This shouldn't happen, but log it
                            logger.error(f"[Worker {worker_id}] `downloads_to_decrement` was negative: {downloads_to_decrement}")
                    else:
                        logger.error(f"Could not get auth profile manager for env '{auth_env}'. Pending downloads counter will not be decremented.")
                elif claimed_task_path:
--- a/ytops_client-source/ytops_client/stress_policy/queue_workers.py
+++ b/ytops_client-source/ytops_client/stress_policy/queue_workers.py
@ -20,6 +20,7 @@ from copy import deepcopy
 from datetime import datetime, timezone
 from pathlib import Path
 from typing import Dict, List, Optional, Any, Tuple, Union
 from urllib.parse import urlparse, parse_qs
 from . import utils as sp_utils
 from .process_runners import run_command, run_docker_container, get_worker_id
@ -36,6 +37,7 @@ def run_queue_auth_worker(worker_id, policy, state_manager, args, profile_manage
    exec_control = policy.get('execution_control', {})
    gen_policy = policy.get('info_json_generation_policy', {})
    queue_policy = policy.get('queue_policy', {})
    task_granularity = gen_policy.get('download_task_granularity', 'per_format')
    profile_prefix = gen_policy.get('profile_prefix')
    if not profile_prefix:
@ -54,35 +56,58 @@ def run_queue_auth_worker(worker_id, policy, state_manager, args, profile_manage
    batch_size = queue_policy.get('batch_size', 1)
    logger.info(f"[Worker {worker_id}] Auth worker configured to process tasks in batches of {batch_size}.")
    task_counter = 0
    last_no_task_log_msg = ""
    while not state_manager.shutdown_event.is_set():
        locked_profile = None
        tasks = []
        try:
-            # 1. Lock a profile FIRST
+            # 1. Get a batch of tasks from the queue FIRST
-            locked_profile = profile_manager_instance.lock_profile(owner=owner_id, profile_prefix=profile_prefix)
+            tasks = state_manager.get_auth_tasks_batch(batch_size)
            if not tasks:
                polling_interval = exec_control.get('worker_polling_interval_seconds', 1)
                base_log_msg = f"[Worker {worker_id}] No tasks available in queue, polling."
                if base_log_msg == last_no_task_log_msg:
                    print(".", end="", file=sys.stdout, flush=True)
                else:
                    if last_no_task_log_msg:
                        print(file=sys.stdout)
                    logger.info(base_log_msg)
                    last_no_task_log_msg = base_log_msg
                time.sleep(polling_interval)
                continue
            if last_no_task_log_msg:
                print(file=sys.stderr)
            last_no_task_log_msg = ""
            # 2. Lock a profile, trying any of the specified prefixes
            locked_profile = None
            prefixes_to_try = []
            if profile_prefix:
                prefixes_to_try = [p.strip() for p in profile_prefix.split(',') if p.strip()]
            if prefixes_to_try:
                random.shuffle(prefixes_to_try)
                for prefix in prefixes_to_try:
                    locked_profile = profile_manager_instance.lock_profile(owner=owner_id, profile_prefix=prefix)
                    if locked_profile:
                        break
            else:
                # Fallback for empty/no prefix, which means lock any available profile
                locked_profile = profile_manager_instance.lock_profile(owner=owner_id, profile_prefix=None)
            if not locked_profile:
                polling_interval = exec_control.get('worker_polling_interval_seconds', 1)
-                logger.debug(f"[Worker {worker_id}] No profiles available to lock. Sleeping for {polling_interval}s.")
+                logger.warning(f"[Worker {worker_id}] No profiles available for {len(tasks)} task(s). Re-queueing and sleeping for {polling_interval}s.")
                state_manager.add_auth_tasks_batch(tasks)
                time.sleep(polling_interval)
                continue
            profile_name = locked_profile['name']
            proxy_url = locked_profile['proxy']
            # 2. Get a batch of tasks from the queue
            tasks = state_manager.get_auth_tasks_batch(batch_size)
            if not tasks:
                polling_interval = exec_control.get('worker_polling_interval_seconds', 1)
                logger.debug(f"[Worker {worker_id}] No tasks available for profile '{profile_name}'. Unlocking and sleeping for {polling_interval}s.")
                # Unlock immediately since we have no work to do.
                # No cooldown is applied here to make the profile available again quickly.
                profile_manager_instance.unlock_profile(profile_name, owner=owner_id)
                locked_profile = None # To prevent double-unlock in finally
                time.sleep(polling_interval)
                continue
            logger.info(f"[Worker {worker_id}] Locked profile '{profile_name}' to process a batch of {len(tasks)} tasks.")
            # 3. Process each task in the batch
@ -129,13 +154,34 @@ def run_queue_auth_worker(worker_id, policy, state_manager, args, profile_manage
                            stderr = "Dummy fatal failure"
                        else:
                            success = True
-                            video_id = sp_utils.get_video_id(url) or f"dummy_{random.randint(1000, 9999)}"
+                            # In dummy mode, robustly extract ID from URL to avoid 'unknown_video_id'
                            try:
                                # First, try the robust library function
                                video_id = sp_utils.get_video_id(url)
                                if not video_id and url:
                                    # Fallback parsing for dummy URLs like "...?v=dummy_0099"
                                    parsed_url = urlparse(url)
                                    video_id = parse_qs(parsed_url.query).get('v', [None])[0]
                                    # Additional fallback for URLs like "youtube.com/watch?v=dummy_0028"
                                    if not video_id and 'dummy_' in url:
                                        dummy_match = re.search(r'dummy_\d+', url)
                                        if dummy_match:
                                            video_id = dummy_match.group(0)
                            except Exception:
                                video_id = None # Ensure video_id is None on parsing failure
                            # If all extraction methods fail, use the task_id as a reliable fallback.
                            if not video_id:
                                logger.debug(f"Could not extract video ID from URL '{url}', using task ID '{task_id}' for dummy data.")
                                video_id = task_id
                            info_data = {'id': video_id, 'title': f'Dummy Video {video_id}', '_dummy': True, 'formats': [{'format_id': '18'}, {'format_id': '140'}]}
                    else:
                        client, req_params = state_manager.get_client_for_request(profile_name, gen_policy)
                        cmd = [
                            sys.executable, '-m', 'ytops_client.cli', 'get-info',
-                            '--client', client, '--profile', profile_name
+                            '--client', client or '', '--profile', profile_name
                        ]
                        if proxy_url:
                            cmd.extend(['--proxy', proxy_url])
@ -173,61 +219,143 @@ def run_queue_auth_worker(worker_id, policy, state_manager, args, profile_manage
                    if success and info_data:
                        try:
                            auth_env_name = profile_manager_instance.key_prefix.replace('_profile_mgmt_', '')
-                            info_data['_ytops_metadata'] = {
+                            is_dummy_run = args.dummy or args.dummy_batch
                            # --- 1. Determine which formats will be requested ---
                            formats_to_download = queue_policy.get('formats_to_download')
                            requested_formats_list = []
                            if formats_to_download:
                                if task_granularity == 'per_url':
                                    format_selector = formats_to_download
                                    if isinstance(formats_to_download, list):
                                        format_selector = ",".join(formats_to_download)
                                    requested_formats_list = [format_selector]
                                else:  # per_format
                                    formats_source = formats_to_download
                                    if formats_source == 'from_download_policy':
                                        formats_source = policy.get('download_policy', {}).get('formats')
                                    if formats_source == 'all':
                                        requested_formats_list = [f['format_id'] for f in info_data.get('formats', [])]
                                    elif isinstance(formats_source, list):
                                        requested_formats_list = formats_source
                                    elif isinstance(formats_source, str):
                                        # A comma-separated string of format groups
                                        requested_formats_list = [f.strip() for f in formats_source.split(',')]
                                    elif formats_source:
                                        # Handles integer format IDs or other non-string/list values
                                        requested_formats_list = [str(formats_source)]
                                    else:
                                        # formats_source is None or empty
                                        requested_formats_list = []
                            # --- 2. Create download tasks and save info.json(s) based on granularity ---
                            download_tasks_to_create = []
                            created_file_paths = []
                            if task_granularity == 'per_format' and requested_formats_list:
                                for i, format_id in enumerate(requested_formats_list):
                                    # Deepcopy to avoid modifying the original info_data in the loop
                                    task_info_data = deepcopy(info_data)
                                    metadata = {
                                        'profile_name': profile_name, 'proxy_url': proxy_url,
                                        'generation_timestamp_utc': datetime.now(timezone.utc).isoformat(),
-                                'task_id': task_id, 'url': url, 'auth_env': auth_env_name
+                                        'task_id': task_id, 'url': url, 'auth_env': auth_env_name,
                                        'formats_requested': [format_id], # This task is for ONE format
                                        'download_task_granularity': task_granularity
                                    }
                                    if is_dummy_run: metadata['_dummy'] = True
                                    task_info_data['_ytops_metadata'] = metadata
                                    final_path = None
                                    if save_dir:
-                                video_id = info_data.get('id', 'unknown')
+                                        video_id = task_info_data.get('id') or task_id or 'unknown_video_id'
                                        sanitized_proxy = re.sub(r'[:/]', '_', proxy_url) if proxy_url else 'noproxy'
-                                new_name = f"{video_id}-{profile_name}-{sanitized_proxy}.json"
+                                        # Sanitize format_id for use in filename
                                        sanitized_format = re.sub(r'[^a-zA-Z0-9_-]', '_', format_id)
                                        # Add index `i` to guarantee uniqueness even if sanitized format IDs clash
                                        new_name = f"{video_id}-{profile_name}-{sanitized_proxy}-fmt{i}_{sanitized_format}.info.json"
                                        final_path = os.path.join(save_dir, new_name)
                                        with open(final_path, 'w', encoding='utf-8') as f:
                                            json.dump(task_info_data, f, indent=2)
                                        logger.info(f"[Worker {worker_id}] [{profile_name}] Saved format-specific info.json to '{final_path}'")
                                        created_file_paths.append(final_path)
                                    download_tasks_to_create.append({
                                        'info_json_path': final_path, 'format_id': format_id,
                                        'video_id': task_info_data.get('id'), 'url': url,
                                        'auth_profile_name': profile_name, 'proxy_url': proxy_url,
                                        'auth_env': auth_env_name,
                                        'original_task': task
                                    })
                            else: # per_url or default (including per_format with no formats)
                                metadata = {
                                    'profile_name': profile_name, 'proxy_url': proxy_url,
                                    'generation_timestamp_utc': datetime.now(timezone.utc).isoformat(),
                                    'task_id': task_id, 'url': url, 'auth_env': auth_env_name,
                                    'formats_requested': requested_formats_list,
                                    'download_task_granularity': task_granularity
                                }
                                if is_dummy_run: metadata['_dummy'] = True
                                info_data['_ytops_metadata'] = metadata
                                final_path = None
                                if save_dir:
                                    video_id = info_data.get('id') or task_id or 'unknown_video_id'
                                    sanitized_proxy = re.sub(r'[:/]', '_', proxy_url) if proxy_url else 'noproxy'
                                    new_name = f"{video_id}-{profile_name}-{sanitized_proxy}.info.json"
                                    final_path = os.path.join(save_dir, new_name)
                                    with open(final_path, 'w', encoding='utf-8') as f:
                                        json.dump(info_data, f, indent=2)
                                    logger.info(f"[Worker {worker_id}] [{profile_name}] Saved info.json to '{final_path}'")
-                            else:
+                                    created_file_paths.append(final_path)
                                # This case means auth-only (no downloads) and no save_dir specified.
                                # The info.json is not persisted.
                                logger.debug(f"[Worker {worker_id}] [{profile_name}] Auth-only task succeeded. No save_dir, so info.json is not saved.")
-                            profile_manager_instance.record_activity(profile_name, 'success')
+                                # For per_url, requested_formats_list should contain a single format selector string.
-                            
+                                # The loop will run once, creating one task.
-                            formats_to_download = queue_policy.get('formats_to_download')
+                                if requested_formats_list:
-                            download_tasks = []
+                                    for format_selector in requested_formats_list:
-                            if formats_to_download:
+                                        download_tasks_to_create.append({
-                                task_formats = []
+                                            'info_json_path': final_path, 'format_id': format_selector,
                                if formats_to_download == 'all':
                                    task_formats = [f['format_id'] for f in info_data.get('formats', [])]
                                elif isinstance(formats_to_download, list):
                                    task_formats = formats_to_download
                                else:
                                    task_formats = [str(formats_to_download)]
                                for format_id in task_formats:
                                    download_tasks.append({
                                        'info_json_path': final_path, 'format_id': format_id,
                                            'video_id': info_data.get('id'), 'url': url,
                                            'auth_profile_name': profile_name, 'proxy_url': proxy_url,
                                            'auth_env': auth_env_name,
                                            'original_task': task
                                        })
                                else: # Handle the case where requested_formats_list is empty
                                    logger.debug(f"[Worker {worker_id}] [{profile_name}] No formats requested, no download tasks created.")
-                            if download_tasks:
+
-                                added_count = state_manager.add_download_tasks_batch(download_tasks)
+                            profile_manager_instance.record_activity(profile_name, 'success', is_dummy=(args.dummy or args.dummy_batch))
                            # --- 3. Dispatch download tasks ---
                            create_download_tasks = queue_policy.get('create_download_tasks', True)
                            if download_tasks_to_create:
                                num_tasks_to_process = 0
                                if create_download_tasks:
                                    added_count = state_manager.add_download_tasks_batch(download_tasks_to_create)
                                    logger.info(f"[Worker {worker_id}] [{profile_name}] Added {added_count} download tasks to queue")
-                                profile_manager_instance.increment_pending_downloads(profile_name, count=added_count)
+                                    num_tasks_to_process = added_count
                                else:
                                    num_tasks = len(download_tasks_to_create)
                                    logger.info(f"[Worker {worker_id}] [{profile_name}] File-based workflow: created {num_tasks} tasks (no queue tasks created)")
                                    num_tasks_to_process = num_tasks
-                            state_manager.report_auth_success(task_id, {
+                                if num_tasks_to_process > 0:
                                    profile_manager_instance.increment_pending_downloads(profile_name, count=num_tasks_to_process)
                                    profile_manager_instance.record_predicted_downloads(profile_name, count=num_tasks_to_process, is_dummy=(args.dummy or args.dummy_batch))
                            report_payload = {
                                "url": url, "video_id": info_data.get('id'), "profile_name": profile_name,
-                                "proxy_url": proxy_url, "info_json_path": final_path,
+                                "proxy_url": proxy_url, "info_json_path": created_file_paths[0] if len(created_file_paths) == 1 else created_file_paths,
-                                "download_tasks_created": len(download_tasks)
+                                "download_tasks_created": len(download_tasks_to_create)
-                            })
+                            }
                            if is_dummy_run:
                                report_payload['_dummy'] = True
                            state_manager.report_auth_success(task_id, report_payload)
                        except Exception as e:
                            logger.error(f"[Worker {worker_id}] [{profile_name}] Error processing successful auth result: {e}", exc_info=True)
-                            profile_manager_instance.record_activity(profile_name, 'failure')
+                            profile_manager_instance.record_activity(profile_name, 'failure', is_dummy=(args.dummy or args.dummy_batch))
                            state_manager.report_auth_failure(task_id, {"error": f"Error processing info.json: {str(e)}", "url": url})
                    else:
                        is_bot_error = "Sign in to confirm you're not a bot" in stderr
@ -240,19 +368,19 @@ def run_queue_auth_worker(worker_id, policy, state_manager, args, profile_manage
                        if is_unavailable or is_private or is_deleted or is_dummy_skipped:
                            reason = "Video unavailable" if is_unavailable else "Private video" if is_private else "Video removed" if is_deleted else "Dummy skipped"
                            logger.warning(f"[Worker {worker_id}] [{profile_name}] Auth skipped for {url}: {reason}")
-                            profile_manager_instance.record_activity(profile_name, 'tolerated_error')
+                            profile_manager_instance.record_activity(profile_name, 'tolerated_error', is_dummy=(args.dummy or args.dummy_batch))
                            state_manager.report_auth_skipped(task_id, {"url": url, "reason": reason, "stderr": stderr})
                        else:
                            error_type = "Bot detection" if is_bot_error else "Timeout" if is_timeout_error else "Dummy fatal failure" if "Dummy fatal failure" in stderr else f"Exit code {retcode}"
                            logger.error(f"[Worker {worker_id}] [{profile_name}] Authentication failed ({error_type}): {url}")
-                            profile_manager_instance.record_activity(profile_name, 'failure')
+                            profile_manager_instance.record_activity(profile_name, 'failure', is_dummy=(args.dummy or args.dummy_batch))
                            state_manager.report_auth_failure(task_id, {"url": url, "error_type": error_type, "stderr": stderr, "exit_code": retcode})
                except Exception as e:
                    logger.error(f"[Worker {worker_id}] [{profile_name}] Unexpected error processing task {task_id}: {e}", exc_info=True)
                    if task_id:
                        state_manager.report_auth_failure(task_id, {"error": f"Unexpected error: {str(e)}", "url": url or "unknown"})
-                    profile_manager_instance.record_activity(profile_name, 'failure')
+                    profile_manager_instance.record_activity(profile_name, 'failure', is_dummy=(args.dummy or args.dummy_batch))
                finally:
                    if temp_task_dir and os.path.exists(temp_task_dir):
                        shutil.rmtree(temp_task_dir)
@ -262,7 +390,7 @@ def run_queue_auth_worker(worker_id, policy, state_manager, args, profile_manage
        except Exception as e:
            logger.error(f"[Worker {worker_id}] Unexpected error in outer worker loop: {e}", exc_info=True)
            if locked_profile:
-                profile_manager_instance.record_activity(locked_profile['name'], 'failure')
+                profile_manager_instance.record_activity(locked_profile['name'], 'failure', is_dummy=(args.dummy or args.dummy_batch))
        finally:
            if locked_profile:
@ -276,16 +404,27 @@ def run_queue_auth_worker(worker_id, policy, state_manager, args, profile_manage
                        elif isinstance(val, int):
                            cooldown = val
                    except (json.JSONDecodeError, TypeError):
-                        if cooldown_config.isdigit():
+                        if isinstance(cooldown_config, str) and cooldown_config.isdigit():
                            cooldown = int(cooldown_config)
-                if cooldown:
+                # For auth simulation, check if profile has pending downloads
-                    logger.info(f"[Worker {worker_id}] Putting profile '{locked_profile['name']}' into COOLDOWN for {cooldown}s.")
+                # If so, don't apply cooldown to avoid conflicting with enforcer
                profile_data = profile_manager_instance.get_profile(locked_profile['name'])
                should_apply_cooldown = cooldown
                if profile_data:
                    pending_downloads = profile_manager_instance.get_pending_downloads(locked_profile['name'])
                    rest_reason = profile_data.get('rest_reason')
                    if pending_downloads > 0 or rest_reason == 'waiting_downloads':
                        should_apply_cooldown = None
                        logger.info(f"[Worker {worker_id}] Auth profile '{locked_profile['name']}' has pending downloads or is waiting for downloads. Not applying cooldown.")
                if should_apply_cooldown:
                    logger.info(f"[Worker {worker_id}] Putting profile '{locked_profile['name']}' into COOLDOWN for {should_apply_cooldown}s.")
                profile_manager_instance.unlock_profile(
                    locked_profile['name'],
                    owner=owner_id,
-                    rest_for_seconds=cooldown
+                    rest_for_seconds=should_apply_cooldown
                )
    logger.info(f"[Worker {worker_id}] Queue auth worker exiting.")
@ -311,6 +450,7 @@ def run_queue_download_worker(worker_id, policy, state_manager, args, profile_ma
        logger.info(f"[Worker {worker_id}] Will save downloads to '{output_dir}'")
    task_counter = 0
    last_no_task_log_msg = ""
    while not state_manager.shutdown_event.is_set():
        locked_profile = None
@ -322,7 +462,14 @@ def run_queue_download_worker(worker_id, policy, state_manager, args, profile_ma
            task = state_manager.get_download_task()
            if not task:
                polling_interval = exec_control.get('worker_polling_interval_seconds', 1)
-                logger.debug(f"[Worker {worker_id}] No download tasks available in queue. Sleeping for {polling_interval}s.")
+                base_log_msg = f"[Worker {worker_id}] No download tasks available in queue."
                if base_log_msg == last_no_task_log_msg:
                    print(".", end="", file=sys.stderr, flush=True)
                else:
                    if last_no_task_log_msg:
                        print(file=sys.stderr)
                    logger.debug(f"{base_log_msg} Sleeping for {polling_interval}s.")
                    last_no_task_log_msg = base_log_msg
                time.sleep(polling_interval)
                continue
@ -377,20 +524,45 @@ def run_queue_download_worker(worker_id, policy, state_manager, args, profile_ma
            if specific_profile:
                locked_profile = profile_manager_instance.lock_profile(owner=owner_id, specific_profile_name=specific_profile)
                if not locked_profile:
-                    logger.warning(f"[Worker {worker_id}] Could not lock specific profile '{specific_profile}'. Trying any profile with prefix.")
+                    logger.warning(f"[Worker {worker_id}] Could not lock specific profile '{specific_profile}'. Trying any profile from configured prefixes.")
-                    locked_profile = profile_manager_instance.lock_profile(owner=owner_id, profile_prefix=profile_prefix)
+            
-            else:
+            if not locked_profile:
-                locked_profile = profile_manager_instance.lock_profile(owner=owner_id, profile_prefix=profile_prefix)
+                # Lock from any of the specified prefixes
                prefixes_to_try = []
                if profile_prefix:
                    prefixes_to_try = [p.strip() for p in profile_prefix.split(',') if p.strip()]
                if prefixes_to_try:
                    random.shuffle(prefixes_to_try)
                    for prefix in prefixes_to_try:
                        locked_profile = profile_manager_instance.lock_profile(owner=owner_id, profile_prefix=prefix)
                        if locked_profile:
                            break
                if not locked_profile:
                    # Fallback for empty/no prefix, or if no profile from list was lockable
                    locked_profile = profile_manager_instance.lock_profile(owner=owner_id, profile_prefix=None)
            if not locked_profile:
                polling_interval = exec_control.get('worker_polling_interval_seconds', 1)
-                logger.warning(f"[Worker {worker_id}] No profiles available for task {task_id}. Re-queueing and sleeping for {polling_interval}s.")
+                base_log_msg = f"[Worker {worker_id}] No profiles available, polling."
                if base_log_msg == last_no_task_log_msg:
                    print(".", end="", file=sys.stderr, flush=True)
                else:
                    if last_no_task_log_msg:
                        print(file=sys.stderr)
                    logger.info(f"{base_log_msg} Re-queueing task {task_id} and sleeping for {polling_interval}s.")
                    last_no_task_log_msg = base_log_msg
                # Re-queue the task by adding it back to the inbox.
                state_manager.add_download_tasks_batch([task])
                # The 'in_progress' marker for this attempt will be cleaned up by the finally block.
                time.sleep(polling_interval)
                continue
            if last_no_task_log_msg:
                print(file=sys.stderr)
            last_no_task_log_msg = ""
            profile_name = locked_profile['name']
            proxy_url = locked_profile['proxy']
            logger.info(f"[Worker {worker_id}] Locked profile '{profile_name}' with proxy '{proxy_url}'")
@ -480,7 +652,7 @@ def run_queue_download_worker(worker_id, policy, state_manager, args, profile_ma
                        except Exception as e:
                            logger.error(f"[Worker {worker_id}] Failed to move files to Airflow-ready directory: {e}", exc_info=True)
-                profile_manager_instance.record_activity(profile_name, 'download_success')
+                profile_manager_instance.record_activity(profile_name, 'download', is_dummy=(args.dummy or args.dummy_batch))
                state_manager.report_download_success(task_id, {
                    "video_id": video_id, "url": url, "format_id": format_id,
                    "profile_name": profile_name, "proxy_url": proxy_url,
@ -508,7 +680,7 @@ def run_queue_download_worker(worker_id, policy, state_manager, args, profile_ma
                if is_unavailable or is_format_error:
                    logger.warning(f"[Worker {worker_id}] Download skipped: {video_id or url} format {format_id}")
-                    profile_manager_instance.record_activity(profile_name, 'tolerated_error')
+                    profile_manager_instance.record_activity(profile_name, 'tolerated_error', is_dummy=(args.dummy or args.dummy_batch))
                    state_manager.report_download_skipped(task_id, {
                        "video_id": video_id, "url": url, "format_id": format_id,
                        "reason": "Video unavailable" if is_unavailable else "Format not available", "stderr": stderr
@ -516,23 +688,13 @@ def run_queue_download_worker(worker_id, policy, state_manager, args, profile_ma
                else:
                    error_type = "Bot detection" if is_bot_error else "Timeout" if is_timeout_error else f"Exit code {retcode}"
                    logger.error(f"[Worker {worker_id}] Download failed ({error_type}): {video_id or url} format {format_id}")
-                    profile_manager_instance.record_activity(profile_name, 'download_error')
+                    profile_manager_instance.record_activity(profile_name, 'download_error', is_dummy=(args.dummy or args.dummy_batch))
                    state_manager.report_download_failure(task_id, {
                        "video_id": video_id, "url": url, "format_id": format_id,
                        "error_type": error_type, "stderr": stderr, "exit_code": retcode,
                        "original_task": task
                    })
            # Decrement pending downloads counter on the original auth profile, regardless of outcome
            if auth_profile_name and auth_env:
                auth_manager = get_auth_manager(profile_manager_instance, auth_env)
                if auth_manager:
                    auth_manager.decrement_pending_downloads(auth_profile_name)
                else:
                    logger.error(f"Could not get auth profile manager for env '{auth_env}'. Pending downloads counter will not be decremented.")
            elif task: # Only warn if we had a task but couldn't get metadata
                logger.warning(f"Could not find auth profile name and/or auth_env in info.json metadata. Pending downloads counter will not be decremented.")
        except Exception as e:
            logger.error(f"[Worker {worker_id}] Unexpected error: {e}", exc_info=True)
            if task and task_id:
@ -543,9 +705,22 @@ def run_queue_download_worker(worker_id, policy, state_manager, args, profile_ma
                    "error": f"Unexpected error: {str(e)}", "original_task": task
                })
            if locked_profile:
-                profile_manager_instance.record_activity(locked_profile['name'], 'download_error')
+                profile_manager_instance.record_activity(locked_profile['name'], 'download_error', is_dummy=(args.dummy or args.dummy_batch))
        finally:
            # Decrement pending downloads counter on the original auth profile, regardless of outcome.
            # This is critical for releasing the auth profile from the 'waiting_downloads' state.
            if task: # Only attempt to decrement if a task was actually pulled.
                if auth_profile_name and auth_env:
                    auth_manager = get_auth_manager(profile_manager_instance, auth_env)
                    if auth_manager:
                        new_count = auth_manager.decrement_pending_downloads(auth_profile_name)
                        logger.info(f"[Worker {worker_id}] Decremented pending downloads for auth profile '{auth_profile_name}' in env '{auth_env}'. New count: {new_count}")
                    else:
                        logger.error(f"[Worker {worker_id}] Could not get auth profile manager for env '{auth_env}'. Pending downloads counter will not be decremented.")
                else:
                    logger.warning(f"[Worker {worker_id}] Could not find auth profile name and/or auth_env in task metadata. Pending downloads counter will not be decremented.")
            if locked_profile:
                cooldown = None
                cooldown_config = profile_manager_instance.get_config('unlock_cooldown_seconds')
--- a/ytops_client-source/ytops_client/stress_policy/state_manager.py
+++ b/ytops_client-source/ytops_client/stress_policy/state_manager.py
@ -904,6 +904,22 @@ class StateManager:
        return self.queue_provider.remove_in_progress(self.queue_provider.AUTH_PROGRESS, task_id)
    def add_auth_task(self, task: Dict) -> bool:
        """Add an authentication task to the queue."""
        if not self.queue_provider:
            logger.error("Queue provider not initialized")
            return False
        return self.queue_provider.add_task(self.queue_provider.AUTH_INBOX, task)
    def add_auth_tasks_batch(self, tasks: List[Dict]) -> int:
        """Add a batch of authentication tasks to the queue."""
        if not self.queue_provider:
            logger.error("Queue provider not initialized")
            return 0
        return self.queue_provider.add_tasks_batch(self.queue_provider.AUTH_INBOX, tasks)
    def add_download_task(self, task: Dict) -> bool:
        """Add a download task to the queue."""
        if not self.queue_provider:
--- a/ytops_client-source/ytops_client/stress_policy/throughput_worker.py
+++ b/ytops_client-source/ytops_client/stress_policy/throughput_worker.py
@ -40,6 +40,7 @@ def run_throughput_worker(worker_id, policy, state_manager, args, profile_manage
        return []
    no_task_streak = 0
    last_no_task_log_msg = ""
    while not state_manager.shutdown_event.is_set():
        locked_profile = None
@ -48,7 +49,15 @@ def run_throughput_worker(worker_id, policy, state_manager, args, profile_manage
            # 0. If no tasks were found previously, pause briefly.
            if no_task_streak > 0:
                polling_interval = exec_control.get('worker_polling_interval_seconds', 1)
-                logger.info(f"[Worker {worker_id}] No tasks found in previous attempt(s). Pausing for {polling_interval}s. (Streak: {no_task_streak})")
+                base_log_msg = f"[Worker {worker_id}] No available tasks found for any active profiles."
                if base_log_msg == last_no_task_log_msg:
                    print(".", end="", file=sys.stderr, flush=True)
                else:
                    if last_no_task_log_msg:
                        print(file=sys.stderr)
                    full_log_msg = f"{base_log_msg} Pausing for {polling_interval}s. (Streak: {no_task_streak})"
                    logger.info(full_log_msg)
                    last_no_task_log_msg = base_log_msg
                time.sleep(polling_interval)
                if state_manager.shutdown_event.is_set(): continue
@ -60,9 +69,6 @@ def run_throughput_worker(worker_id, policy, state_manager, args, profile_manage
            if not locked_profile:
                # No task/profile combo was available.
                no_task_streak += 1
                polling_interval = exec_control.get('worker_polling_interval_seconds', 1)
                logger.info(f"[Worker {worker_id}] No available tasks found for any active profiles. Pausing for {polling_interval}s.")
                time.sleep(polling_interval)
                continue
            profile_name = locked_profile['name']
@ -70,6 +76,9 @@ def run_throughput_worker(worker_id, policy, state_manager, args, profile_manage
            # We have a task and a lock.
            if claimed_task_path:
                no_task_streak = 0 # Reset streak
                if last_no_task_log_msg:
                    print(file=sys.stderr)
                last_no_task_log_msg = ""
                # 3. Process the task
                try:
                    with open(claimed_task_path, 'r', encoding='utf-8') as f:
--- a/ytops_client-source/ytops_client/stress_policy/utils.py
+++ b/ytops_client-source/ytops_client/stress_policy/utils.py
@ -319,7 +319,7 @@ def apply_overrides(policy, overrides):
    return policy
-def display_effective_policy(policy, name, sources=None, profile_names=None, original_workers_setting=None):
+def display_effective_policy(policy, name, args, sources=None, profile_names=None, original_workers_setting=None):
    """Prints a human-readable summary of the effective policy."""
    logger = logging.getLogger(__name__)
    logger.info(f"--- Effective Policy: {name} ---")
@ -328,6 +328,8 @@ def display_effective_policy(policy, name, sources=None, profile_names=None, ori
    orchestration_mode = settings.get('orchestration_mode')
    logger.info(f"Mode: {settings.get('mode', 'full_stack')}")
    if args and args.profile_prefix:
        logger.info(f"Profile Prefix (from CLI): {args.profile_prefix}")
    if profile_names:
        num_profiles = len(profile_names)
        logger.info(f"Profiles found: {num_profiles}")
--- a/ytops_client-source/ytops_client/stress_policy/worker_utils.py
+++ b/ytops_client-source/ytops_client/stress_policy/worker_utils.py
@ -188,7 +188,7 @@ def find_task_and_lock_profile(profile_manager, owner_id, profile_prefix, policy
    if not info_json_dir:
        return None, None
-    logger.info(f"[Worker {worker_id}] Scanning for tasks in '{info_json_dir}'...")
+    logger.debug(f"[Worker {worker_id}] Scanning for tasks in '{info_json_dir}'...")
    # 1. Get all available task files and group by profile
    try:
@ -223,7 +223,21 @@ def find_task_and_lock_profile(profile_manager, owner_id, profile_prefix, policy
    # 2. Get ACTIVE profiles from Redis.
    active_profiles = profile_manager.list_profiles(state_filter='ACTIVE')
-    active_profile_names = {p['name'] for p in active_profiles if p['name'].startswith(profile_prefix or '')}
+
    prefixes_to_check = []
    if profile_prefix:
        prefixes_to_check = [p.strip() for p in profile_prefix.split(',') if p.strip()]
    active_profile_names = set()
    if prefixes_to_check:
        for p in active_profiles:
            for prefix in prefixes_to_check:
                if p['name'].startswith(prefix):
                    active_profile_names.add(p['name'])
                    break
    else:
        # If no prefixes are specified, consider all active profiles.
        active_profile_names = {p['name'] for p in active_profiles}
    # 3. Find profiles that are both ACTIVE and have tasks.
    candidate_profiles = list(active_profile_names.intersection(tasks_by_profile.keys()))
@ -918,7 +932,8 @@ def process_info_json_cycle(path, content, policy, state_manager, args, running_
                    result['proxy_url'] = proxy_url
                if profile_manager_instance and profile_name:
-                    profile_manager_instance.record_activity(profile_name, 'tolerated_error')
+                    is_dummy = args.dummy or args.dummy_batch
                    profile_manager_instance.record_activity(profile_name, 'tolerated_error', is_dummy=is_dummy)
                state_manager.log_event(result)
                results.append(result)
@ -960,14 +975,15 @@ def process_info_json_cycle(path, content, policy, state_manager, args, running_
        # --- Record activity on the DOWNLOAD profile that performed the work ---
        if profile_manager_instance and profile_name:
            is_dummy = args.dummy or args.dummy_batch
            if result.get('success'):
-                profile_manager_instance.record_activity(profile_name, 'download')
+                profile_manager_instance.record_activity(profile_name, 'download', is_dummy=is_dummy)
            elif result.get('error_type') == 'Cancelled':
                pass # Do not record cancellations
            elif result.get('is_tolerated_error'):
-                profile_manager_instance.record_activity(profile_name, 'tolerated_error')
+                profile_manager_instance.record_activity(profile_name, 'tolerated_error', is_dummy=is_dummy)
            else:
-                profile_manager_instance.record_activity(profile_name, 'download_error')
+                profile_manager_instance.record_activity(profile_name, 'download_error', is_dummy=is_dummy)
        state_manager.log_event(result)
        results.append(result)
--- a/ytops_client-source/ytops_client/stress_policy_tool.py
+++ b/ytops_client-source/ytops_client/stress_policy_tool.py
@ -495,7 +495,7 @@ def main_stress_policy(args):
            exec_control['workers'] = calculated_workers
            logger.info(f"Calculated 'auto' workers for throughput mode: {calculated_workers} (based on {len(matching_profiles)} profiles with prefix '{profile_prefix}').")
-        sp_utils.display_effective_policy(policy, policy_name, sources=[], original_workers_setting=original_workers_setting)
+        sp_utils.display_effective_policy(policy, policy_name, args, sources=[], original_workers_setting=original_workers_setting)
        if args.dry_run: return 0
        workers = exec_control.get('workers', 1)
@ -578,15 +578,45 @@ def main_stress_policy(args):
            logger.info(f"Starting/resuming from URL index {start_index + 1}.")
            # The worker's get_next_url_batch will respect this starting index.
-        sp_utils.display_effective_policy(policy, policy_name, sources=urls_list)
+        sp_utils.display_effective_policy(policy, policy_name, args, sources=urls_list)
        if args.dry_run: return 0
-        workers = exec_control.get('workers', 1)
+        worker_pools = exec_control.get('worker_pools', [])
-        with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor:
+        if not worker_pools and exec_control.get('workers'):
-            futures = [
+            prefix = policy.get('info_json_generation_policy', {}).get('profile_prefix')
-                executor.submit(run_direct_batch_worker, i, policy, state_manager, args, profile_manager_instance, urls_list, running_processes, process_lock)
+            worker_pools.append({'workers': exec_control.get('workers'), 'profile_prefix': prefix or 'user'})
-                for i in range(workers)
+
-            ]
+        if not worker_pools:
            logger.error("No workers configured in policy (execution_control.workers or execution_control.worker_pools).")
            return 1
        if args.profile_prefix:
            logger.info(f"CLI --profile-prefix '{args.profile_prefix}' is set, it will override any prefixes defined in worker_pools.")
            for pool in worker_pools:
                pool['profile_prefix'] = args.profile_prefix
        worker_specs = []
        worker_id_counter = 0
        for pool in worker_pools:
            pool_workers = pool.get('workers', 1)
            prefix_str = pool.get('profile_prefix', '')
            prefixes = [p.strip() for p in prefix_str.split(',') if p.strip()]
            if not prefixes:
                logger.warning(f"Worker pool has no profile_prefix. Skipping: {pool}")
                continue
            for i in range(pool_workers):
                assigned_prefix = prefixes[i % len(prefixes)]
                worker_policy = deepcopy(policy)
                worker_policy.setdefault('info_json_generation_policy', {})['profile_prefix'] = assigned_prefix
                worker_specs.append({
                    'func': run_direct_batch_worker,
                    'args': (worker_id_counter, worker_policy, state_manager, args, profile_manager_instance, urls_list, running_processes, process_lock)
                })
                worker_id_counter += 1
        with concurrent.futures.ThreadPoolExecutor(max_workers=len(worker_specs)) as executor:
            futures = [executor.submit(spec['func'], *spec['args']) for spec in worker_specs]
            # Wait for all workers to complete. They will exit their loops when no URLs are left.
            concurrent.futures.wait(futures)
            if shutdown_event.is_set():
@ -662,14 +692,46 @@ def main_stress_policy(args):
            if start_index > 0:
                logger.info(f"Starting/resuming from URL index {start_index + 1}.")
-            sp_utils.display_effective_policy(policy, policy_name, sources=urls_list)
+            sp_utils.display_effective_policy(policy, policy_name, args, sources=urls_list)
            if args.dry_run: return 0
-            with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor:
+            worker_pools = exec_control.get('worker_pools', [])
-                futures = [
+            if not worker_pools and exec_control.get('workers'):
-                    executor.submit(run_direct_docker_worker, i, policy, state_manager, args, profile_manager_instance, urls_list, running_processes, process_lock)
+                prefix = policy.get('info_json_generation_policy', {}).get('profile_prefix')
-                    for i in range(workers)
+                worker_pools.append({'workers': exec_control.get('workers'), 'profile_prefix': prefix or 'user'})
-                ]
+
            if not worker_pools:
                logger.error("No workers configured in policy (execution_control.workers or execution_control.worker_pools).")
                return 1
            if args.profile_prefix:
                logger.info(f"CLI --profile-prefix '{args.profile_prefix}' is set, it will override any prefixes defined in worker_pools.")
                for pool in worker_pools:
                    pool['profile_prefix'] = args.profile_prefix
            worker_specs = []
            worker_id_counter = 0
            for pool in worker_pools:
                pool_workers = pool.get('workers', 1)
                prefix_str = pool.get('profile_prefix', '')
                prefixes = [p.strip() for p in prefix_str.split(',') if p.strip()]
                if not prefixes:
                    logger.warning(f"Worker pool has no profile_prefix. Skipping: {pool}")
                    continue
                # Each worker in the pool gets the full list of prefixes from the pool configuration.
                for i in range(pool_workers):
                    worker_policy = deepcopy(policy)
                    # The worker functions will now handle a comma-separated list of prefixes.
                    worker_policy.setdefault('info_json_generation_policy', {})['profile_prefix'] = prefix_str
                    worker_specs.append({
                        'func': run_direct_docker_worker,
                        'args': (worker_id_counter, worker_policy, state_manager, args, profile_manager_instance, urls_list, running_processes, process_lock)
                    })
                    worker_id_counter += 1
            with concurrent.futures.ThreadPoolExecutor(max_workers=len(worker_specs)) as executor:
                futures = [executor.submit(spec['func'], *spec['args']) for spec in worker_specs]
                # This worker runs until shutdown, like the download worker
                shutdown_event.wait()
                logger.info("Shutdown signal received, waiting for direct docker workers to finish...")
@ -686,14 +748,46 @@ def main_stress_policy(args):
                logger.error(f"Failed to create info.json directory '{info_json_dir}': {e}")
                return 1
-            sp_utils.display_effective_policy(policy, policy_name, sources=[])
+            sp_utils.display_effective_policy(policy, policy_name, args, sources=[])
            if args.dry_run: return 0
-            with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor:
+            worker_pools = exec_control.get('worker_pools', [])
-                futures = [
+            if not worker_pools and exec_control.get('workers'):
-                    executor.submit(run_direct_docker_download_worker, i, policy, state_manager, args, profile_manager_instance, running_processes, process_lock)
+                prefix = policy.get('download_policy', {}).get('profile_prefix')
-                    for i in range(workers)
+                worker_pools.append({'workers': exec_control.get('workers'), 'profile_prefix': prefix or 'user'})
-                ]
+
            if not worker_pools:
                logger.error("No workers configured in policy (execution_control.workers or execution_control.worker_pools).")
                return 1
            if args.profile_prefix:
                logger.info(f"CLI --profile-prefix '{args.profile_prefix}' is set, it will override any prefixes defined in worker_pools.")
                for pool in worker_pools:
                    pool['profile_prefix'] = args.profile_prefix
            worker_specs = []
            worker_id_counter = 0
            for pool in worker_pools:
                pool_workers = pool.get('workers', 1)
                prefix_str = pool.get('profile_prefix', '')
                prefixes = [p.strip() for p in prefix_str.split(',') if p.strip()]
                if not prefixes:
                    logger.warning(f"Worker pool has no profile_prefix. Skipping: {pool}")
                    continue
                # Each worker in the pool gets the full list of prefixes from the pool configuration.
                for i in range(pool_workers):
                    worker_policy = deepcopy(policy)
                    # The worker functions will now handle a comma-separated list of prefixes.
                    worker_policy.setdefault('download_policy', {})['profile_prefix'] = prefix_str
                    worker_specs.append({
                        'func': run_direct_docker_download_worker,
                        'args': (worker_id_counter, worker_policy, state_manager, args, profile_manager_instance, running_processes, process_lock)
                    })
                    worker_id_counter += 1
            with concurrent.futures.ThreadPoolExecutor(max_workers=len(worker_specs)) as executor:
                futures = [executor.submit(spec['func'], *spec['args']) for spec in worker_specs]
                # This worker runs until shutdown
                shutdown_event.wait()
                logger.info("Shutdown signal received, waiting for direct docker download workers to finish...")
@ -726,15 +820,46 @@ def main_stress_policy(args):
            logger.error(f"Failed to create info.json directory '{info_json_dir}': {e}")
            return 1
-        sp_utils.display_effective_policy(policy, policy_name, sources=[])
+        sp_utils.display_effective_policy(policy, policy_name, args, sources=[])
        if args.dry_run: return 0
-        workers = exec_control.get('workers', 1)
+        worker_pools = exec_control.get('worker_pools', [])
-        with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor:
+        if not worker_pools and exec_control.get('workers'):
-            futures = [
+            prefix = policy.get('download_policy', {}).get('profile_prefix')
-                executor.submit(run_direct_download_worker, i, policy, state_manager, args, download_manager, running_processes, process_lock)
+            worker_pools.append({'workers': exec_control.get('workers'), 'profile_prefix': prefix or 'user'})
-                for i in range(workers)
+
-            ]
+        if not worker_pools:
            logger.error("No workers configured in policy (execution_control.workers or execution_control.worker_pools).")
            return 1
        if args.profile_prefix:
            logger.info(f"CLI --profile-prefix '{args.profile_prefix}' is set, it will override any prefixes defined in worker_pools.")
            for pool in worker_pools:
                pool['profile_prefix'] = args.profile_prefix
        worker_specs = []
        worker_id_counter = 0
        for pool in worker_pools:
            pool_workers = pool.get('workers', 1)
            prefix_str = pool.get('profile_prefix', '')
            prefixes = [p.strip() for p in prefix_str.split(',') if p.strip()]
            if not prefixes:
                logger.warning(f"Worker pool has no profile_prefix. Skipping: {pool}")
                continue
            # Each worker in the pool gets the full list of prefixes from the pool configuration.
            for i in range(pool_workers):
                worker_policy = deepcopy(policy)
                # The worker functions will now handle a comma-separated list of prefixes.
                worker_policy.setdefault('download_policy', {})['profile_prefix'] = prefix_str
                worker_specs.append({
                    'func': run_direct_download_worker,
                    'args': (worker_id_counter, worker_policy, state_manager, args, download_manager, running_processes, process_lock)
                })
                worker_id_counter += 1
        with concurrent.futures.ThreadPoolExecutor(max_workers=len(worker_specs)) as executor:
            futures = [executor.submit(spec['func'], *spec['args']) for spec in worker_specs]
            shutdown_event.wait()
            logger.info("Shutdown signal received, waiting for direct download workers to finish...")
            concurrent.futures.wait(futures)
@ -786,6 +911,15 @@ def main_stress_policy(args):
                logger.error(f"Failed to create save directory '{save_dir}': {e}")
                return 1
        # In dummy mode, do not populate the queue. Warn the user instead.
        if args.dummy or args.dummy_batch:
            input_queue = queue_policy.get('input_queue', 'queue2_auth_inbox')
            logger.warning("--- Dummy Mode Notice ---")
            logger.warning(f"Dummy mode is enabled. The tool will NOT automatically populate the queue.")
            logger.warning(f"Please ensure the input queue '{input_queue}' contains tasks for the workers to process.")
            logger.warning(f"You can populate it using another tool, for example: ./bin/ytops-client queue push {input_queue} --payload-json '{{\"url\":\"https://youtu.be/dQw4w9WgXcQ\"}}' --count 100")
            logger.warning("-------------------------")
        # Requeue failed tasks if requested
        if args.requeue_failed:
            requeued = state_manager.requeue_failed_auth_tasks(
@ -793,15 +927,46 @@ def main_stress_policy(args):
            )
            logger.info(f"Requeued {requeued} failed authentication tasks.")
-        sp_utils.display_effective_policy(policy, policy_name, sources=[])
+        sp_utils.display_effective_policy(policy, policy_name, args, sources=[])
        if args.dry_run: return 0
-        workers = exec_control.get('workers', 1)
+        worker_pools = exec_control.get('worker_pools', [])
-        with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor:
+        if not worker_pools and exec_control.get('workers'):
-            futures = [
+            prefix = policy.get('info_json_generation_policy', {}).get('profile_prefix')
-                executor.submit(run_queue_auth_worker, i, policy, state_manager, args, auth_manager, running_processes, process_lock)
+            worker_pools.append({'workers': exec_control.get('workers'), 'profile_prefix': prefix or 'user'})
-                for i in range(workers)
+
-            ]
+        if not worker_pools:
            logger.error("No workers configured in policy (execution_control.workers or execution_control.worker_pools).")
            return 1
        if args.profile_prefix:
            logger.info(f"CLI --profile-prefix '{args.profile_prefix}' is set, it will override any prefixes defined in worker_pools.")
            for pool in worker_pools:
                pool['profile_prefix'] = args.profile_prefix
        worker_specs = []
        worker_id_counter = 0
        for pool in worker_pools:
            pool_workers = pool.get('workers', 1)
            prefix_str = pool.get('profile_prefix', '')
            prefixes = [p.strip() for p in prefix_str.split(',') if p.strip()]
            if not prefixes:
                logger.warning(f"Worker pool has no profile_prefix. Skipping: {pool}")
                continue
            # Each worker in the pool gets the full list of prefixes from the pool configuration.
            for i in range(pool_workers):
                worker_policy = deepcopy(policy)
                # The worker functions will now handle a comma-separated list of prefixes.
                worker_policy.setdefault('info_json_generation_policy', {})['profile_prefix'] = prefix_str
                worker_specs.append({
                    'func': run_queue_auth_worker,
                    'args': (worker_id_counter, worker_policy, state_manager, args, auth_manager, running_processes, process_lock)
                })
                worker_id_counter += 1
        with concurrent.futures.ThreadPoolExecutor(max_workers=len(worker_specs)) as executor:
            futures = [executor.submit(spec['func'], *spec['args']) for spec in worker_specs]
            shutdown_event.wait()
            logger.info("Shutdown signal received, waiting for queue auth workers to finish...")
            concurrent.futures.wait(futures)
@ -843,6 +1008,17 @@ def main_stress_policy(args):
            env_prefix=env_prefix
        )
        # In dummy mode, do not populate the queue. Warn the user instead.
        if args.dummy or args.dummy_batch:
            input_queue = queue_policy.get('input_queue', 'queue2_dl_inbox')
            logger.warning("--- Dummy Mode Notice ---")
            logger.warning(f"Dummy mode is enabled. The tool will NOT automatically populate the queue.")
            logger.warning(f"Please ensure the input queue '{input_queue}' contains tasks for the workers to process.")
            logger.warning("-------------------------")
        # Get download policy
        d_policy = policy.get('download_policy', {})
        # Create output directory if specified
        output_dir = d_policy.get('output_dir')
        if output_dir:
@ -860,15 +1036,46 @@ def main_stress_policy(args):
            )
            logger.info(f"Requeued {requeued} failed download tasks.")
-        sp_utils.display_effective_policy(policy, policy_name, sources=[])
+        sp_utils.display_effective_policy(policy, policy_name, args, sources=[])
        if args.dry_run: return 0
-        workers = exec_control.get('workers', 1)
+        worker_pools = exec_control.get('worker_pools', [])
-        with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor:
+        if not worker_pools and exec_control.get('workers'):
-            futures = [
+            prefix = policy.get('download_policy', {}).get('profile_prefix')
-                executor.submit(run_queue_download_worker, i, policy, state_manager, args, download_manager, running_processes, process_lock)
+            worker_pools.append({'workers': exec_control.get('workers'), 'profile_prefix': prefix or 'user'})
-                for i in range(workers)
+
-            ]
+        if not worker_pools:
            logger.error("No workers configured in policy (execution_control.workers or execution_control.worker_pools).")
            return 1
        if args.profile_prefix:
            logger.info(f"CLI --profile-prefix '{args.profile_prefix}' is set, it will override any prefixes defined in worker_pools.")
            for pool in worker_pools:
                pool['profile_prefix'] = args.profile_prefix
        worker_specs = []
        worker_id_counter = 0
        for pool in worker_pools:
            pool_workers = pool.get('workers', 1)
            prefix_str = pool.get('profile_prefix', '')
            prefixes = [p.strip() for p in prefix_str.split(',') if p.strip()]
            if not prefixes:
                logger.warning(f"Worker pool has no profile_prefix. Skipping: {pool}")
                continue
            # Each worker in the pool gets the full list of prefixes from the pool configuration.
            for i in range(pool_workers):
                worker_policy = deepcopy(policy)
                # The worker functions will now handle a comma-separated list of prefixes.
                worker_policy.setdefault('download_policy', {})['profile_prefix'] = prefix_str
                worker_specs.append({
                    'func': run_queue_download_worker,
                    'args': (worker_id_counter, worker_policy, state_manager, args, download_manager, running_processes, process_lock)
                })
                worker_id_counter += 1
        with concurrent.futures.ThreadPoolExecutor(max_workers=len(worker_specs)) as executor:
            futures = [executor.submit(spec['func'], *spec['args']) for spec in worker_specs]
            shutdown_event.wait()
            logger.info("Shutdown signal received, waiting for queue download workers to finish...")
            concurrent.futures.wait(futures)
@ -949,7 +1156,7 @@ def main_stress_policy(args):
            )
            logger.info(f"Requeued {requeued_auth} failed authentication tasks and {requeued_dl} failed download tasks.")
-        sp_utils.display_effective_policy(policy, policy_name, sources=[])
+        sp_utils.display_effective_policy(policy, policy_name, args, sources=[])
        if args.dry_run: return 0
        # Start both auth and download workers
@ -1127,6 +1334,7 @@ def main_stress_policy(args):
    sp_utils.display_effective_policy(
        policy,
        policy_name,
        args,
        sources=sources,
        profile_names=None, # Profile grouping is removed
        original_workers_setting=original_workers_setting
@ -1254,12 +1462,22 @@ def main_stress_policy(args):
                                # If marking by rename is on, do that.
                                if settings.get('mark_processed_files'):
                                    try:
                                        processed_dir = settings.get('processed_files_dir')
                                        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
-                                        new_path = source.parent / f"{source.name}.{timestamp}.processed"
+                                        new_filename = f"{source.name}.{timestamp}.processed"
                                        if processed_dir:
                                            dest_path = Path(processed_dir) / new_filename
                                            dest_path.parent.mkdir(parents=True, exist_ok=True)
                                            shutil.move(str(source), str(dest_path))
                                            logger.info(f"Marked '{source.name}' as processed by moving to '{dest_path}'")
                                        else:
                                            # Fallback to old behavior: rename in place
                                            new_path = source.parent / new_filename
                                            source.rename(new_path)
                                            logger.info(f"Marked '{source.name}' as processed by renaming to '{new_path.name}'")
                                    except (IOError, OSError) as e:
-                                        logger.error(f"Failed to rename processed file '{source.name}': {e}")
+                                        logger.error(f"Failed to mark processed file '{source.name}': {e}")
                            # When using profile-aware mode, the file processing (including marking as
                            # processed) is handled inside process_profile_task.