Updates on single source of profile list in cluster.yaml, fix single tmux worker naming, updates ytops to support workers without profile list
This commit is contained in:
parent
bf12118b2b
commit
4fd9217c6d
@ -108,12 +108,20 @@ ansible-playbook ansible/playbook-stress-lifecycle.yml -i ansible/inventory.gree
|
|||||||
|
|
||||||
### Profile Management
|
### Profile Management
|
||||||
|
|
||||||
|
The `cleanup-profiles` action can be used to remove profiles from Redis. By default, it cleans up "ungrouped" profiles.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Clean up all profiles
|
# Perform a dry run of cleaning up ungrouped profiles
|
||||||
|
ansible-playbook ansible/playbook-stress-control.yml -i ansible/inventory.green.ini -e "action=cleanup-profiles" -e "dry_run=true"
|
||||||
|
|
||||||
|
# Clean up ungrouped profiles
|
||||||
ansible-playbook ansible/playbook-stress-control.yml -i ansible/inventory.green.ini -e "action=cleanup-profiles"
|
ansible-playbook ansible/playbook-stress-control.yml -i ansible/inventory.green.ini -e "action=cleanup-profiles"
|
||||||
|
|
||||||
# Clean up specific profile prefix
|
# To clean up ALL profiles (destructive), set cleanup_mode=full
|
||||||
ansible-playbook ansible/playbook-stress-control.yml -i ansible/inventory.green.ini -e "action=cleanup-profiles" -e "profile_prefix=user1"
|
ansible-playbook ansible/playbook-stress-control.yml -i ansible/inventory.green.ini -e "action=cleanup-profiles" -e "cleanup_mode=full"
|
||||||
|
|
||||||
|
# You can specify a custom setup policy file for cleanup operations
|
||||||
|
ansible-playbook ansible/playbook-stress-control.yml -i ansible/inventory.green.ini -e "action=cleanup-profiles" -e "setup_policy=policies/my_custom_setup_policy.yaml"
|
||||||
```
|
```
|
||||||
|
|
||||||
## Monitoring and Inspection
|
## Monitoring and Inspection
|
||||||
@ -157,7 +165,19 @@ Then, from the jump host, you can sync code or policies to the cluster nodes:
|
|||||||
ansible-playbook ansible/playbook-stress-sync-code.yml -i ansible/inventory.green.ini
|
ansible-playbook ansible/playbook-stress-sync-code.yml -i ansible/inventory.green.ini
|
||||||
|
|
||||||
# Sync only policies and CLI configs
|
# Sync only policies and CLI configs
|
||||||
ansible-playbook ansible/playbook-stress-sync-configs.yml -i ansible/inventory.green.ini
|
ansible-playbook ansible/playbook-stress-sync-policies.yml -i ansible/inventory.green.ini
|
||||||
|
|
||||||
|
# To sync files from a custom source directory on the Ansible controller, use the 'source_base_dir' extra variable:
|
||||||
|
ansible-playbook ansible/playbook-stress-sync-policies.yml -i ansible/inventory.green.ini -e "source_base_dir=/path/to/my-custom-source"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Docker Image Updates
|
||||||
|
|
||||||
|
To update the `yt-dlp` docker image used by download simulators, run the following playbook. This builds the image locally on each worker node.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Build the yt-dlp docker image locally on each worker node
|
||||||
|
ansible-playbook ansible/playbook-update-yt-dlp-docker.yml -i ansible/inventory.green.ini
|
||||||
```
|
```
|
||||||
|
|
||||||
### Adding a New Worker
|
### Adding a New Worker
|
||||||
@ -225,7 +245,10 @@ ansible-playbook ansible/playbook-stress-control.yml -i ansible/inventory.green.
|
|||||||
### Restart Enforcer and Monitoring
|
### Restart Enforcer and Monitoring
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Restart monitoring and enforcer on master
|
# Restart monitoring and enforcer on master using default policies
|
||||||
ansible-playbook ansible/playbook-stress-control.yml -i ansible/inventory.green.ini -e "action=restart-monitoring"
|
ansible-playbook ansible/playbook-stress-control.yml -i ansible/inventory.green.ini -e "action=restart-monitoring"
|
||||||
|
|
||||||
|
# Restart using a custom enforcer policy
|
||||||
|
ansible-playbook ansible/playbook-stress-control.yml -i ansible/inventory.green.ini -e "action=restart-monitoring" -e "enforcer_policy=policies/my_other_enforcer.yaml"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@ -1,4 +1,35 @@
|
|||||||
---
|
---
|
||||||
|
# -------------------------------------------------------------------------------------------------
|
||||||
|
# PHASE 0: Fix Python Dependencies
|
||||||
|
# Ensures remote hosts have compatible Python libraries to prevent module failures.
|
||||||
|
# -------------------------------------------------------------------------------------------------
|
||||||
|
- name: "PHASE 0: Upgrade Python SSL libraries"
|
||||||
|
hosts: all
|
||||||
|
gather_facts: no
|
||||||
|
tasks:
|
||||||
|
- name: Attempt to upgrade pyOpenSSL, cryptography and urllib3
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: "pip3 install --upgrade pyOpenSSL cryptography urllib3"
|
||||||
|
become: yes
|
||||||
|
register: pip_upgrade_result
|
||||||
|
changed_when: "'Successfully installed' in pip_upgrade_result.stdout"
|
||||||
|
failed_when: false
|
||||||
|
|
||||||
|
- name: Retry upgrade with --break-system-packages on specific error
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: "pip3 install --upgrade pyOpenSSL cryptography urllib3 --break-system-packages"
|
||||||
|
become: yes
|
||||||
|
register: pip_upgrade_result_retry
|
||||||
|
changed_when: "'Successfully installed' in pip_upgrade_result_retry.stdout"
|
||||||
|
when: pip_upgrade_result.rc != 0 and 'externally-managed-environment' in pip_upgrade_result.stderr
|
||||||
|
|
||||||
|
- name: Fail if package upgrade did not succeed
|
||||||
|
ansible.builtin.fail:
|
||||||
|
msg: "Failed to upgrade Python packages after retry. Last error: {{ pip_upgrade_result_retry.stderr | default(pip_upgrade_result.stderr) }}"
|
||||||
|
when: >
|
||||||
|
pip_upgrade_result.rc != 0 and
|
||||||
|
(pip_upgrade_result_retry is not defined or pip_upgrade_result_retry.rc != 0)
|
||||||
|
|
||||||
# This playbook provides a complete installation for fresh nodes.
|
# This playbook provides a complete installation for fresh nodes.
|
||||||
# It can install either master or worker roles, or both on the same machine.
|
# It can install either master or worker roles, or both on the same machine.
|
||||||
#
|
#
|
||||||
@ -55,57 +86,10 @@
|
|||||||
|
|
||||||
# -------------------------------------------------------------------------------------------------
|
# -------------------------------------------------------------------------------------------------
|
||||||
# PHASE 4: Build yt-dlp Docker Image
|
# PHASE 4: Build yt-dlp Docker Image
|
||||||
# Builds the yt-dlp container from bin/ directory
|
# Builds the yt-dlp container on each worker node using the dedicated playbook.
|
||||||
# -------------------------------------------------------------------------------------------------
|
# -------------------------------------------------------------------------------------------------
|
||||||
- name: "PHASE 4: Build yt-dlp Docker image"
|
- name: "PHASE 4: Build yt-dlp Docker image on workers"
|
||||||
hosts: all
|
import_playbook: playbook-update-yt-dlp-docker.yml
|
||||||
gather_facts: no
|
|
||||||
vars_files:
|
|
||||||
- "group_vars/all/vault.yml"
|
|
||||||
pre_tasks:
|
|
||||||
- name: Set inventory_env fact
|
|
||||||
ansible.builtin.set_fact:
|
|
||||||
inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
|
|
||||||
- name: Load environment-specific variables
|
|
||||||
ansible.builtin.include_vars: "{{ item }}"
|
|
||||||
with_fileglob:
|
|
||||||
- "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
|
|
||||||
tasks:
|
|
||||||
- name: Define base directory for node
|
|
||||||
ansible.builtin.set_fact:
|
|
||||||
base_dir: "{{ airflow_master_dir if (inventory_hostname in groups['master'] and not (install_worker | default(false) | bool)) else airflow_worker_dir }}"
|
|
||||||
|
|
||||||
- name: Ensure bin directory exists
|
|
||||||
ansible.builtin.file:
|
|
||||||
path: "{{ base_dir }}/bin"
|
|
||||||
state: directory
|
|
||||||
owner: "{{ ansible_user }}"
|
|
||||||
group: "{{ deploy_group }}"
|
|
||||||
mode: '0755'
|
|
||||||
become: yes
|
|
||||||
|
|
||||||
- name: Check if Dockerfile exists in bin directory
|
|
||||||
ansible.builtin.stat:
|
|
||||||
path: "{{ base_dir }}/bin/Dockerfile"
|
|
||||||
register: dockerfile_stat
|
|
||||||
|
|
||||||
- name: Build yt-dlp Docker image if Dockerfile exists
|
|
||||||
community.docker.docker_image:
|
|
||||||
name: yt-dlp-custom
|
|
||||||
tag: latest
|
|
||||||
source: build
|
|
||||||
build:
|
|
||||||
path: "{{ base_dir }}/bin"
|
|
||||||
pull: yes
|
|
||||||
state: present
|
|
||||||
force_source: yes
|
|
||||||
become: yes
|
|
||||||
when: dockerfile_stat.stat.exists
|
|
||||||
|
|
||||||
- name: Display message if Dockerfile not found
|
|
||||||
ansible.builtin.debug:
|
|
||||||
msg: "Dockerfile not found at {{ base_dir }}/bin/Dockerfile - skipping yt-dlp image build"
|
|
||||||
when: not dockerfile_stat.stat.exists
|
|
||||||
|
|
||||||
# -------------------------------------------------------------------------------------------------
|
# -------------------------------------------------------------------------------------------------
|
||||||
# PHASE 5: Sync Code and Install Dependencies
|
# PHASE 5: Sync Code and Install Dependencies
|
||||||
@ -148,6 +132,12 @@
|
|||||||
with_fileglob:
|
with_fileglob:
|
||||||
- "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
|
- "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
|
||||||
tasks:
|
tasks:
|
||||||
|
- name: Install redis-tools
|
||||||
|
ansible.builtin.apt:
|
||||||
|
name: redis-tools
|
||||||
|
state: present
|
||||||
|
become: yes
|
||||||
|
|
||||||
- name: Configure system performance and kernel settings
|
- name: Configure system performance and kernel settings
|
||||||
ansible.builtin.copy:
|
ansible.builtin.copy:
|
||||||
src: "configs/etc/sysctl.d/99-system-limits.conf"
|
src: "configs/etc/sysctl.d/99-system-limits.conf"
|
||||||
@ -174,7 +164,7 @@
|
|||||||
|
|
||||||
- name: Template Docker Compose file for master services
|
- name: Template Docker Compose file for master services
|
||||||
ansible.builtin.template:
|
ansible.builtin.template:
|
||||||
src: templates/docker-compose.stress-master.j2
|
src: docker-compose.stress-master.j2
|
||||||
dest: "{{ airflow_master_dir }}/docker-compose.stress.yml"
|
dest: "{{ airflow_master_dir }}/docker-compose.stress.yml"
|
||||||
owner: "{{ ansible_user }}"
|
owner: "{{ ansible_user }}"
|
||||||
group: "{{ deploy_group }}"
|
group: "{{ deploy_group }}"
|
||||||
@ -184,9 +174,7 @@
|
|||||||
- name: Stop and remove existing containers before starting services
|
- name: Stop and remove existing containers before starting services
|
||||||
ansible.builtin.shell:
|
ansible.builtin.shell:
|
||||||
cmd: |
|
cmd: |
|
||||||
docker ps -a --filter "name=bgutil-provider" --format "{{{{.ID}}}}" | xargs -r docker rm -f
|
docker ps -a --filter "name=bgutil-provider" --format "{{ '{{.ID}}' }}" | xargs -r docker rm -f
|
||||||
docker ps -a --filter "name=redis-stress" --format "{{{{.ID}}}}" | xargs -r docker rm -f
|
|
||||||
docker ps -a --filter "name=minio-stress" --format "{{{{.ID}}}}" | xargs -r docker rm -f
|
|
||||||
become: yes
|
become: yes
|
||||||
changed_when: false
|
changed_when: false
|
||||||
ignore_errors: yes
|
ignore_errors: yes
|
||||||
@ -200,6 +188,14 @@
|
|||||||
remove_orphans: true
|
remove_orphans: true
|
||||||
become: yes
|
become: yes
|
||||||
|
|
||||||
|
- name: Wait for Redis service to be ready
|
||||||
|
ansible.builtin.wait_for:
|
||||||
|
host: localhost
|
||||||
|
port: "{{ redis_port }}"
|
||||||
|
delay: 5
|
||||||
|
timeout: 60
|
||||||
|
delegate_to: "{{ inventory_hostname }}"
|
||||||
|
|
||||||
- name: Wait for MinIO service to be ready
|
- name: Wait for MinIO service to be ready
|
||||||
ansible.builtin.wait_for:
|
ansible.builtin.wait_for:
|
||||||
host: "{{ hostvars[inventory_hostname].ansible_host }}"
|
host: "{{ hostvars[inventory_hostname].ansible_host }}"
|
||||||
@ -240,7 +236,7 @@
|
|||||||
register: mc_mb_result
|
register: mc_mb_result
|
||||||
failed_when: >
|
failed_when: >
|
||||||
mc_mb_result.rc != 0 and
|
mc_mb_result.rc != 0 and
|
||||||
"already exists" not in mc_mb_result.stderr
|
"already own it" not in mc_mb_result.stderr
|
||||||
changed_when: mc_mb_result.rc == 0
|
changed_when: mc_mb_result.rc == 0
|
||||||
environment:
|
environment:
|
||||||
HOME: "/home/{{ ansible_user }}"
|
HOME: "/home/{{ ansible_user }}"
|
||||||
@ -264,7 +260,7 @@
|
|||||||
tasks:
|
tasks:
|
||||||
- name: Template Docker Compose file for worker services
|
- name: Template Docker Compose file for worker services
|
||||||
ansible.builtin.template:
|
ansible.builtin.template:
|
||||||
src: templates/docker-compose.stress-master.j2
|
src: docker-compose.stress-master.j2
|
||||||
dest: "{{ airflow_worker_dir }}/docker-compose.stress.yml"
|
dest: "{{ airflow_worker_dir }}/docker-compose.stress.yml"
|
||||||
owner: "{{ ansible_user }}"
|
owner: "{{ ansible_user }}"
|
||||||
group: "{{ deploy_group }}"
|
group: "{{ deploy_group }}"
|
||||||
@ -274,9 +270,9 @@
|
|||||||
- name: Stop and remove existing containers before starting services
|
- name: Stop and remove existing containers before starting services
|
||||||
ansible.builtin.shell:
|
ansible.builtin.shell:
|
||||||
cmd: |
|
cmd: |
|
||||||
docker ps -a --filter "name=bgutil-provider" --format "{{{{.ID}}}}" | xargs -r docker rm -f
|
docker ps -a --filter "name=bgutil-provider" --format "{{ '{{.ID}}' }}" | xargs -r docker rm -f
|
||||||
docker ps -a --filter "name=redis-stress" --format "{{{{.ID}}}}" | xargs -r docker rm -f
|
docker ps -a --filter "name=redis-stress" --format "{{ '{{.ID}}' }}" | xargs -r docker rm -f
|
||||||
docker ps -a --filter "name=minio-stress" --format "{{{{.ID}}}}" | xargs -r docker rm -f
|
docker ps -a --filter "name=minio-stress" --format "{{ '{{.ID}}' }}" | xargs -r docker rm -f
|
||||||
become: yes
|
become: yes
|
||||||
changed_when: false
|
changed_when: false
|
||||||
ignore_errors: yes
|
ignore_errors: yes
|
||||||
|
|||||||
41
ansible/playbook-stress-cleanup-info-jsons.yml
Normal file
41
ansible/playbook-stress-cleanup-info-jsons.yml
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
---
|
||||||
|
- name: "STRESS-CLEANUP: Remove info.json task files from workers"
|
||||||
|
hosts: workers
|
||||||
|
gather_facts: no
|
||||||
|
vars_files:
|
||||||
|
- "group_vars/all/vault.yml"
|
||||||
|
pre_tasks:
|
||||||
|
- name: Set inventory_env fact
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
|
||||||
|
- name: Load environment-specific variables
|
||||||
|
ansible.builtin.include_vars: "{{ item }}"
|
||||||
|
with_fileglob:
|
||||||
|
- "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
|
||||||
|
tasks:
|
||||||
|
- name: Define the directory to be cleaned
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
target_dir: "{{ airflow_worker_dir }}/run/docker_mount/info_json_tasks/direct_docker_simulation"
|
||||||
|
|
||||||
|
- name: "Display directory being cleaned"
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg: "Cleaning directory: {{ target_dir }} on {{ inventory_hostname }}"
|
||||||
|
|
||||||
|
- name: Remove the info_json_tasks directory
|
||||||
|
ansible.builtin.file:
|
||||||
|
path: "{{ target_dir }}"
|
||||||
|
state: absent
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Recreate the info_json_tasks directory
|
||||||
|
ansible.builtin.file:
|
||||||
|
path: "{{ target_dir }}"
|
||||||
|
state: directory
|
||||||
|
owner: "{{ ansible_user }}"
|
||||||
|
group: "{{ deploy_group }}"
|
||||||
|
mode: '0755'
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: "Display cleanup completion"
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg: "Successfully cleaned and recreated {{ target_dir }} on {{ inventory_hostname }}"
|
||||||
@ -1,10 +1,11 @@
|
|||||||
---
|
---
|
||||||
- name: "STRESS-SETUP: Unified control for stress test processes"
|
- name: "STRESS-SETUP: Unified control for stress test processes"
|
||||||
hosts: all
|
hosts: "{{ 'master' if action in master_only_actions else 'all' }}"
|
||||||
gather_facts: no
|
gather_facts: no
|
||||||
vars:
|
vars:
|
||||||
# Default action is status check
|
# Default action is status check
|
||||||
action: "status"
|
action: "status"
|
||||||
|
graceful_shutdown_timeout_seconds: 30
|
||||||
setup_policy: "policies/6_profile_setup_policy.yaml"
|
setup_policy: "policies/6_profile_setup_policy.yaml"
|
||||||
enforcer_policy: "policies/8_unified_simulation_enforcer.yaml"
|
enforcer_policy: "policies/8_unified_simulation_enforcer.yaml"
|
||||||
master_only_actions:
|
master_only_actions:
|
||||||
@ -80,9 +81,7 @@
|
|||||||
if [ -f .env ]; then
|
if [ -f .env ]; then
|
||||||
set -a && . ./.env && set +a
|
set -a && . ./.env && set +a
|
||||||
fi
|
fi
|
||||||
timeout 10 ./bin/ytops-client profile list \
|
timeout 10 ./bin/ytops-client profile list 2>&1
|
||||||
--auth-env sim_auth \
|
|
||||||
--download-env sim_download 2>&1
|
|
||||||
register: profile_list_output
|
register: profile_list_output
|
||||||
changed_when: false
|
changed_when: false
|
||||||
when:
|
when:
|
||||||
@ -132,7 +131,12 @@
|
|||||||
|
|
||||||
- name: "Display policy being used for profile cleanup"
|
- name: "Display policy being used for profile cleanup"
|
||||||
ansible.builtin.debug:
|
ansible.builtin.debug:
|
||||||
msg: "Using setup policy for cleanup: {{ setup_policy }}"
|
msg: >-
|
||||||
|
{% if cleanup_mode | default('ungrouped') == 'full' %}
|
||||||
|
Performing a FULL cleanup of all profiles using policy: {{ setup_policy }}
|
||||||
|
{% else %}
|
||||||
|
Cleaning up UNGROUPED profiles using policy: {{ setup_policy }}{% if dry_run | default(false) %} (DRY RUN){% endif %}
|
||||||
|
{% endif %}
|
||||||
when:
|
when:
|
||||||
- action == "cleanup-profiles"
|
- action == "cleanup-profiles"
|
||||||
- inventory_hostname in groups['master']
|
- inventory_hostname in groups['master']
|
||||||
@ -146,9 +150,15 @@
|
|||||||
if [ -f .env ]; then
|
if [ -f .env ]; then
|
||||||
set -a && . ./.env && set +a
|
set -a && . ./.env && set +a
|
||||||
fi
|
fi
|
||||||
|
{% if cleanup_mode | default('ungrouped') == 'full' %}
|
||||||
./bin/ytops-client setup-profiles \
|
./bin/ytops-client setup-profiles \
|
||||||
--policy {{ setup_policy }} \
|
--policy {{ setup_policy }} \
|
||||||
--cleanup-all {% if profile_prefix is defined %}--profile-prefix {{ profile_prefix }}{% endif %}
|
--cleanup-all
|
||||||
|
{% else %}
|
||||||
|
./bin/ytops-client profile cleanup-ungrouped \
|
||||||
|
--policy-file {{ setup_policy }} \
|
||||||
|
{% if dry_run | default(false) %}--dry-run{% endif %}
|
||||||
|
{% endif %}
|
||||||
register: cleanup_output
|
register: cleanup_output
|
||||||
changed_when: false
|
changed_when: false
|
||||||
when:
|
when:
|
||||||
@ -172,8 +182,34 @@
|
|||||||
- cleanup_output is defined
|
- cleanup_output is defined
|
||||||
|
|
||||||
- name: Stop all stress test processes on all nodes (stop-all action)
|
- name: Stop all stress test processes on all nodes (stop-all action)
|
||||||
|
vars:
|
||||||
|
process_pattern: "[y]tops-client.*(profile.*list|policy-enforcer|stress-policy)|[p]ython.*ytops"
|
||||||
block:
|
block:
|
||||||
- name: Kill all tmux sessions starting with 'stress-'
|
- name: "Get PIDs of running stress test processes"
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: "ps aux | grep -E '{{ process_pattern }}' | grep -v ansible | awk '{print $2}'"
|
||||||
|
register: pids_to_kill
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: "Gracefully terminate stress test processes"
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: "kill {{ pids_to_kill.stdout_lines | join(' ') }}"
|
||||||
|
when: pids_to_kill.stdout | length > 0
|
||||||
|
ignore_errors: true
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: "Wait for graceful shutdown"
|
||||||
|
ansible.builtin.pause:
|
||||||
|
seconds: "{{ graceful_shutdown_timeout_seconds }}"
|
||||||
|
when: pids_to_kill.stdout | length > 0
|
||||||
|
|
||||||
|
- name: "Force kill any lingering stress test processes"
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: "ps aux | grep -E '{{ process_pattern }}' | grep -v ansible | awk '{print $2}' | xargs kill -9 >/dev/null 2>&1 || true"
|
||||||
|
ignore_errors: true
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: "Kill all stress-related tmux sessions as a failsafe"
|
||||||
ansible.builtin.shell:
|
ansible.builtin.shell:
|
||||||
cmd: |
|
cmd: |
|
||||||
for session in $(tmux list-sessions -F "#{session_name}" 2>/dev/null | grep -E "^stress-"); do
|
for session in $(tmux list-sessions -F "#{session_name}" 2>/dev/null | grep -E "^stress-"); do
|
||||||
@ -186,27 +222,37 @@
|
|||||||
fi
|
fi
|
||||||
ignore_errors: yes
|
ignore_errors: yes
|
||||||
changed_when: false
|
changed_when: false
|
||||||
|
|
||||||
- name: Kill all ytops-client and related python processes
|
|
||||||
ansible.builtin.shell:
|
|
||||||
cmd: |
|
|
||||||
# Gracefully terminate processes by pattern
|
|
||||||
ps aux | grep -E "[y]tops-client.*(profile.*list|policy-enforcer|stress-policy)" | awk '{print $2}' | xargs kill >/dev/null 2>&1 || true
|
|
||||||
ps aux | grep -E "[p]ython.*ytops" | grep -v ansible | awk '{print $2}' | xargs kill >/dev/null 2>&1 || true
|
|
||||||
|
|
||||||
sleep 1 # Wait for graceful shutdown
|
|
||||||
|
|
||||||
# Force kill any remaining processes
|
|
||||||
ps aux | grep -E "[y]tops-client.*(profile.*list|policy-enforcer|stress-policy)" | awk '{print $2}' | xargs kill -9 >/dev/null 2>&1 || true
|
|
||||||
ps aux | grep -E "[p]ython.*ytops" | grep -v ansible | awk '{print $2}' | xargs kill -9 >/dev/null 2>&1 || true
|
|
||||||
ignore_errors: yes
|
|
||||||
changed_when: false
|
|
||||||
|
|
||||||
when: action == "stop-all"
|
when: action == "stop-all"
|
||||||
|
|
||||||
- name: Stop processes on targeted nodes only (stop-nodes action)
|
- name: Stop processes on targeted nodes only (stop-nodes action)
|
||||||
|
vars:
|
||||||
|
process_pattern: "[y]tops-client.*(profile.*list|policy-enforcer|stress-policy)|[p]ython.*ytops"
|
||||||
block:
|
block:
|
||||||
- name: Kill all tmux sessions starting with 'stress-' on this node
|
- name: "Get PIDs of running stress test processes"
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: "ps aux | grep -E '{{ process_pattern }}' | grep -v ansible | awk '{print $2}'"
|
||||||
|
register: pids_to_kill
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: "Gracefully terminate stress test processes"
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: "kill {{ pids_to_kill.stdout_lines | join(' ') }}"
|
||||||
|
when: pids_to_kill.stdout | length > 0
|
||||||
|
ignore_errors: true
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: "Wait for graceful shutdown"
|
||||||
|
ansible.builtin.pause:
|
||||||
|
seconds: "{{ graceful_shutdown_timeout_seconds }}"
|
||||||
|
when: pids_to_kill.stdout | length > 0
|
||||||
|
|
||||||
|
- name: "Force kill any lingering stress test processes"
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: "ps aux | grep -E '{{ process_pattern }}' | grep -v ansible | awk '{print $2}' | xargs kill -9 >/dev/null 2>&1 || true"
|
||||||
|
ignore_errors: true
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: "Kill all stress-related tmux sessions as a failsafe"
|
||||||
ansible.builtin.shell:
|
ansible.builtin.shell:
|
||||||
cmd: |
|
cmd: |
|
||||||
for session in $(tmux list-sessions -F "#{session_name}" 2>/dev/null | grep -E "^stress-"); do
|
for session in $(tmux list-sessions -F "#{session_name}" 2>/dev/null | grep -E "^stress-"); do
|
||||||
@ -219,22 +265,6 @@
|
|||||||
fi
|
fi
|
||||||
ignore_errors: yes
|
ignore_errors: yes
|
||||||
changed_when: false
|
changed_when: false
|
||||||
|
|
||||||
- name: Kill all ytops-client and related python processes on this node
|
|
||||||
ansible.builtin.shell:
|
|
||||||
cmd: |
|
|
||||||
# Gracefully terminate processes by pattern
|
|
||||||
ps aux | grep -E "[y]tops-client.*(profile.*list|policy-enforcer|stress-policy)" | awk '{print $2}' | xargs kill >/dev/null 2>&1 || true
|
|
||||||
ps aux | grep -E "[p]ython.*ytops" | grep -v ansible | awk '{print $2}' | xargs kill >/dev/null 2>&1 || true
|
|
||||||
|
|
||||||
sleep 1 # Wait for graceful shutdown
|
|
||||||
|
|
||||||
# Force kill any remaining processes
|
|
||||||
ps aux | grep -E "[y]tops-client.*(profile.*list|policy-enforcer|stress-policy)" | awk '{print $2}' | xargs kill -9 >/dev/null 2>&1 || true
|
|
||||||
ps aux | grep -E "[p]ython.*ytops" | grep -v ansible | awk '{print $2}' | xargs kill -9 >/dev/null 2>&1 || true
|
|
||||||
ignore_errors: yes
|
|
||||||
changed_when: false
|
|
||||||
|
|
||||||
when: action == "stop-nodes"
|
when: action == "stop-nodes"
|
||||||
|
|
||||||
- name: Restart monitoring and enforcer (restart-monitoring action)
|
- name: Restart monitoring and enforcer (restart-monitoring action)
|
||||||
@ -263,8 +293,6 @@
|
|||||||
working_dir: "{{ airflow_master_dir }}"
|
working_dir: "{{ airflow_master_dir }}"
|
||||||
command_to_run: >
|
command_to_run: >
|
||||||
./bin/ytops-client profile list
|
./bin/ytops-client profile list
|
||||||
--auth-env sim_auth
|
|
||||||
--download-env sim_download
|
|
||||||
--live
|
--live
|
||||||
--no-blink
|
--no-blink
|
||||||
--show-reasons
|
--show-reasons
|
||||||
|
|||||||
@ -3,7 +3,7 @@
|
|||||||
hosts: workers
|
hosts: workers
|
||||||
gather_facts: no
|
gather_facts: no
|
||||||
vars:
|
vars:
|
||||||
tmux_session_download: "stress-download-{{ (profile_prefix | default('default')) | replace(',', '-') }}"
|
tmux_session_download: "{{ 'stress-download-worker-' + (worker_num | string) if (profile_prefix | default('') == 'worker' and worker_num is defined) else 'stress-download-' + (profile_prefix | default('default')) | replace(',', '-') }}"
|
||||||
download_policy: "policies/11_direct_docker_download_simulation.yaml"
|
download_policy: "policies/11_direct_docker_download_simulation.yaml"
|
||||||
vars_files:
|
vars_files:
|
||||||
- "group_vars/all/vault.yml"
|
- "group_vars/all/vault.yml"
|
||||||
@ -41,13 +41,14 @@
|
|||||||
command_to_run: >
|
command_to_run: >
|
||||||
./bin/ytops-client stress-policy
|
./bin/ytops-client stress-policy
|
||||||
--policy {{ download_policy }}
|
--policy {{ download_policy }}
|
||||||
{% if dummy_batch | default(true) | bool %}--dummy-batch{% endif %}
|
{# {% if dummy_batch | default(true) | bool %}--dummy-batch{% endif %} #}
|
||||||
{% if download_min_seconds is defined %}--set 'settings.dummy_simulation_settings.download_min_seconds={{ download_min_seconds }}'{% endif %}
|
{% if download_min_seconds is defined %}--set 'settings.dummy_simulation_settings.download_min_seconds={{ download_min_seconds }}'{% endif %}
|
||||||
{% if download_max_seconds is defined %}--set 'settings.dummy_simulation_settings.download_max_seconds={{ download_max_seconds }}'{% endif %}
|
{% if download_max_seconds is defined %}--set 'settings.dummy_simulation_settings.download_max_seconds={{ download_max_seconds }}'{% endif %}
|
||||||
{% if profile_prefix is defined %}--set 'execution_control.worker_pools=[{"profile_prefix": "{{ profile_prefix }}", "workers": 1}]'{% endif %}
|
{# --set 'execution_control.worker_pools=[{"profile_prefix": "{% if profile_prefix == 'worker' %}{{ display_prefix }}{% else %}{{ profile_prefix }}{% endif %}", "workers": 1}]' #}
|
||||||
{% for setting in (extra_set_args | default('[]')) | from_yaml %}--set '{{ setting }}' {% endfor %}
|
{% for setting in (extra_set_args | default('[]')) | from_yaml %}--set '{{ setting }}' {% endfor %}
|
||||||
--profile-prefix {{ profile_prefix }}
|
{% if profile_prefix == 'worker' %}--workers 1{% endif %}
|
||||||
process_grep_pattern: "ytops-client.*stress-policy.*--policy {{ download_policy }}.*--profile-prefix {{ profile_prefix }}"
|
--profile-prefix {% if profile_prefix == 'worker' %}{{ display_prefix }}{% else %}{{ profile_prefix }}{% endif %}
|
||||||
|
process_grep_pattern: "ytops-client.*stress-policy.*--policy {{ download_policy }}{% if profile_prefix == 'worker' %}.*--workers 1{% endif %}.*--profile-prefix {% if profile_prefix == 'worker' %}{{ display_prefix }}{% else %}{{ profile_prefix }}{% endif %}"
|
||||||
start_process: "{{ start_download | default(false) | bool }}"
|
start_process: "{{ start_download | default(false) | bool }}"
|
||||||
stop_process: "{{ stop_download | default(false) | bool }}"
|
stop_process: "{{ stop_download | default(false) | bool }}"
|
||||||
check_status: "{{ vars.check_status | default(false) | bool }}"
|
check_status: "{{ vars.check_status | default(false) | bool }}"
|
||||||
|
|||||||
@ -15,16 +15,28 @@
|
|||||||
with_fileglob:
|
with_fileglob:
|
||||||
- "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
|
- "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
|
||||||
tasks:
|
tasks:
|
||||||
- name: Check if Redis is running
|
- name: Wait for Redis to be available
|
||||||
|
ansible.builtin.wait_for:
|
||||||
|
host: localhost
|
||||||
|
port: "{{ redis_port }}"
|
||||||
|
timeout: 60
|
||||||
|
delay: 5
|
||||||
|
register: redis_port_check
|
||||||
|
ignore_errors: yes
|
||||||
|
|
||||||
|
- name: Check if Redis is running and responding to commands
|
||||||
ansible.builtin.shell:
|
ansible.builtin.shell:
|
||||||
cmd: "redis-cli -h {{ hostvars[groups['master'][0]].ansible_host }} -p {{ redis_port }} {% if use_redis_password | default(true) | string | lower == 'true' %}-a {{ vault_redis_password }}{% endif %} ping 2>&1 | grep -q PONG"
|
cmd: "redis-cli -h localhost -p {{ redis_port }} {% if use_redis_password | default(true) | string | lower == 'true' %}-a {{ vault_redis_password }}{% endif %} ping 2>&1 | grep -q PONG"
|
||||||
register: redis_check
|
register: redis_check
|
||||||
ignore_errors: yes
|
ignore_errors: yes
|
||||||
changed_when: false
|
changed_when: false
|
||||||
|
retries: 3
|
||||||
|
delay: 5
|
||||||
|
until: redis_check.rc == 0
|
||||||
|
|
||||||
- name: Ensure Redis is accessible
|
- name: Ensure Redis is accessible
|
||||||
ansible.builtin.fail:
|
ansible.builtin.fail:
|
||||||
msg: "Redis is not accessible on master node. Please ensure Redis service is running on {{ hostvars[groups['master'][0]].ansible_host }}:{{ redis_port }}"
|
msg: "Redis is not accessible on master node. Please ensure Redis service is running on localhost:{{ redis_port }}"
|
||||||
when: redis_check.rc != 0
|
when: redis_check.rc != 0
|
||||||
|
|
||||||
- name: Stop any running ytops-client processes on master
|
- name: Stop any running ytops-client processes on master
|
||||||
@ -44,7 +56,7 @@
|
|||||||
--policy {{ setup_policy }} \
|
--policy {{ setup_policy }} \
|
||||||
--cleanup-all
|
--cleanup-all
|
||||||
environment:
|
environment:
|
||||||
REDIS_HOST: "{{ hostvars[groups['master'][0]].ansible_host }}"
|
REDIS_HOST: "localhost"
|
||||||
REDIS_PORT: "{{ redis_port }}"
|
REDIS_PORT: "{{ redis_port }}"
|
||||||
REDIS_PASSWORD: "{{ vault_redis_password if use_redis_password | default(true) | string | lower == 'true' else '' }}"
|
REDIS_PASSWORD: "{{ vault_redis_password if use_redis_password | default(true) | string | lower == 'true' else '' }}"
|
||||||
register: init_result
|
register: init_result
|
||||||
|
|||||||
@ -37,18 +37,52 @@
|
|||||||
ansible.builtin.set_fact:
|
ansible.builtin.set_fact:
|
||||||
base_dir: "{{ airflow_master_dir if inventory_hostname in groups['master'] else airflow_worker_dir }}"
|
base_dir: "{{ airflow_master_dir if inventory_hostname in groups['master'] else airflow_worker_dir }}"
|
||||||
|
|
||||||
- name: Install required Python packages from requirements.txt
|
- name: Attempt to install required Python packages from requirements.txt
|
||||||
|
ansible.builtin.pip:
|
||||||
|
requirements: "{{ base_dir }}/ytops_client/requirements.txt"
|
||||||
|
extra_args: "--ignore-installed"
|
||||||
|
become: yes
|
||||||
|
register: pip_reqs_result
|
||||||
|
failed_when: false
|
||||||
|
|
||||||
|
- name: Retry installing requirements with break-system-packages
|
||||||
ansible.builtin.pip:
|
ansible.builtin.pip:
|
||||||
requirements: "{{ base_dir }}/ytops_client/requirements.txt"
|
requirements: "{{ base_dir }}/ytops_client/requirements.txt"
|
||||||
extra_args: "--ignore-installed"
|
extra_args: "--ignore-installed"
|
||||||
become: yes
|
become: yes
|
||||||
environment:
|
environment:
|
||||||
PIP_BREAK_SYSTEM_PACKAGES: "1"
|
PIP_BREAK_SYSTEM_PACKAGES: "1"
|
||||||
|
register: pip_reqs_result_retry
|
||||||
|
when: pip_reqs_result.failed and 'externally-managed-environment' in pip_reqs_result.msg
|
||||||
|
|
||||||
- name: Explicitly install the thrift package
|
- name: Fail if requirements installation did not succeed
|
||||||
|
ansible.builtin.fail:
|
||||||
|
msg: "Failed to install requirements after retry. Last error: {{ pip_reqs_result_retry.msg | default(pip_reqs_result.msg) }}"
|
||||||
|
when: >
|
||||||
|
pip_reqs_result.failed and
|
||||||
|
(pip_reqs_result_retry is not defined or pip_reqs_result_retry.failed)
|
||||||
|
|
||||||
|
- name: Attempt to explicitly install the thrift package
|
||||||
|
ansible.builtin.pip:
|
||||||
|
name: thrift
|
||||||
|
extra_args: "--ignore-installed"
|
||||||
|
become: yes
|
||||||
|
register: pip_thrift_result
|
||||||
|
failed_when: false
|
||||||
|
|
||||||
|
- name: Retry installing thrift with break-system-packages
|
||||||
ansible.builtin.pip:
|
ansible.builtin.pip:
|
||||||
name: thrift
|
name: thrift
|
||||||
extra_args: "--ignore-installed"
|
extra_args: "--ignore-installed"
|
||||||
become: yes
|
become: yes
|
||||||
environment:
|
environment:
|
||||||
PIP_BREAK_SYSTEM_PACKAGES: "1"
|
PIP_BREAK_SYSTEM_PACKAGES: "1"
|
||||||
|
register: pip_thrift_result_retry
|
||||||
|
when: pip_thrift_result.failed and 'externally-managed-environment' in pip_thrift_result.msg
|
||||||
|
|
||||||
|
- name: Fail if thrift installation did not succeed
|
||||||
|
ansible.builtin.fail:
|
||||||
|
msg: "Failed to install thrift after retry. Last error: {{ pip_thrift_result_retry.msg | default(pip_thrift_result.msg) }}"
|
||||||
|
when: >
|
||||||
|
pip_thrift_result.failed and
|
||||||
|
(pip_thrift_result_retry is not defined or pip_thrift_result_retry.failed)
|
||||||
|
|||||||
@ -4,9 +4,15 @@
|
|||||||
gather_facts: no
|
gather_facts: no
|
||||||
vars:
|
vars:
|
||||||
# Default action
|
# Default action
|
||||||
action: "status" # Available actions: start, stop, status, start-auth, stop-auth, start-download, stop-download, stop-generator
|
action: "status" # Available actions: start, stop, status, start-auth, stop-auth, start-download, stop-download
|
||||||
|
graceful_shutdown_timeout_seconds: 30
|
||||||
|
|
||||||
tasks:
|
tasks:
|
||||||
|
- name: "Ensure profile_prefixes is a flat list of all prefixes from profile_pools"
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
profile_prefixes: "{{ profile_pools | map(attribute='prefixes') | flatten }}"
|
||||||
|
when: profile_pools is defined
|
||||||
|
|
||||||
- name: "Start all configured generators and simulators"
|
- name: "Start all configured generators and simulators"
|
||||||
when: action == "start"
|
when: action == "start"
|
||||||
block:
|
block:
|
||||||
@ -25,7 +31,7 @@
|
|||||||
--limit {{ inventory_hostname }}
|
--limit {{ inventory_hostname }}
|
||||||
-e "start_generator=true"
|
-e "start_generator=true"
|
||||||
-e "profile_prefix={{ combined_prefixes }}"
|
-e "profile_prefix={{ combined_prefixes }}"
|
||||||
{% if dummy_batch is defined %}-e "dummy_batch={{ dummy_batch }}"{% endif %}
|
-e "dummy_batch={{ dummy_batch | default(false) }}"
|
||||||
{% if auth_min_seconds is defined %}-e "auth_min_seconds={{ auth_min_seconds }}"{% endif %}
|
{% if auth_min_seconds is defined %}-e "auth_min_seconds={{ auth_min_seconds }}"{% endif %}
|
||||||
{% if auth_max_seconds is defined %}-e "auth_max_seconds={{ auth_max_seconds }}"{% endif %}
|
{% if auth_max_seconds is defined %}-e "auth_max_seconds={{ auth_max_seconds }}"{% endif %}
|
||||||
{% if batch_size is defined %}-e "batch_size={{ batch_size }}"{% endif %}
|
{% if batch_size is defined %}-e "batch_size={{ batch_size }}"{% endif %}
|
||||||
@ -42,7 +48,7 @@
|
|||||||
--limit {{ inventory_hostname }}
|
--limit {{ inventory_hostname }}
|
||||||
-e "start_generator=true"
|
-e "start_generator=true"
|
||||||
-e "profile_prefix={{ item }}"
|
-e "profile_prefix={{ item }}"
|
||||||
{% if dummy_batch is defined %}-e "dummy_batch={{ dummy_batch }}"{% endif %}
|
-e "dummy_batch={{ dummy_batch | default(false) }}"
|
||||||
{% if auth_min_seconds is defined %}-e "auth_min_seconds={{ auth_min_seconds }}"{% endif %}
|
{% if auth_min_seconds is defined %}-e "auth_min_seconds={{ auth_min_seconds }}"{% endif %}
|
||||||
{% if auth_max_seconds is defined %}-e "auth_max_seconds={{ auth_max_seconds }}"{% endif %}
|
{% if auth_max_seconds is defined %}-e "auth_max_seconds={{ auth_max_seconds }}"{% endif %}
|
||||||
{% if batch_size is defined %}-e "batch_size={{ batch_size }}"{% endif %}
|
{% if batch_size is defined %}-e "batch_size={{ batch_size }}"{% endif %}
|
||||||
@ -56,42 +62,14 @@
|
|||||||
label: "profile: {{ item }}"
|
label: "profile: {{ item }}"
|
||||||
when: auth_workers_per_profile | default(0) | int > 0
|
when: auth_workers_per_profile | default(0) | int > 0
|
||||||
|
|
||||||
- name: "Start download simulator(s)"
|
- name: "WORKAROUND: Align download worker config with auth worker config to bypass inventory bug"
|
||||||
when: profile_prefixes is defined and profile_prefixes | length > 0
|
ansible.builtin.set_fact:
|
||||||
block:
|
download_workers_total: "{{ auth_workers_total | default(0) }}"
|
||||||
- name: "Start single download simulator for all profiles: {{ combined_prefixes | default('none') }}"
|
download_workers_per_profile: "{{ auth_workers_per_profile | default(0) }}"
|
||||||
ansible.builtin.command: >-
|
|
||||||
ansible-playbook {{ playbook_dir }}/playbook-stress-download-simulation.yml
|
|
||||||
-i {{ inventory_file }}
|
|
||||||
--limit {{ inventory_hostname }}
|
|
||||||
-e "start_download=true"
|
|
||||||
-e "profile_prefix={{ combined_prefixes }}"
|
|
||||||
{% if dummy_batch is defined %}-e "dummy_batch={{ dummy_batch }}"{% endif %}
|
|
||||||
{% if download_min_seconds is defined %}-e "download_min_seconds={{ download_min_seconds }}"{% endif %}
|
|
||||||
{% if download_max_seconds is defined %}-e "download_max_seconds={{ download_max_seconds }}"{% endif %}
|
|
||||||
{% if extra_set_args is defined %}-e 'extra_set_args={{ extra_set_args | to_json }}'{% endif %}
|
|
||||||
delegate_to: localhost
|
|
||||||
changed_when: true
|
|
||||||
when: (download_workers_per_profile | default(0) | int == 0) and (download_workers_total | default(0) | int > 0)
|
|
||||||
|
|
||||||
- name: "Start parallel download simulators for each profile"
|
- name: "Start download simulator(s)"
|
||||||
ansible.builtin.command: >-
|
ansible.builtin.include_tasks: tasks/start-download-simulators.yml
|
||||||
ansible-playbook {{ playbook_dir }}/playbook-stress-download-simulation.yml
|
when: profile_prefixes is defined and profile_prefixes | length > 0
|
||||||
-i {{ inventory_file }}
|
|
||||||
--limit {{ inventory_hostname }}
|
|
||||||
-e "start_download=true"
|
|
||||||
-e "profile_prefix={{ item }}"
|
|
||||||
{% if dummy_batch is defined %}-e "dummy_batch={{ dummy_batch }}"{% endif %}
|
|
||||||
{% if download_min_seconds is defined %}-e "download_min_seconds={{ download_min_seconds }}"{% endif %}
|
|
||||||
{% if download_max_seconds is defined %}-e "download_max_seconds={{ download_max_seconds }}"{% endif %}
|
|
||||||
{% if extra_set_args is defined %}-e 'extra_set_args={{ extra_set_args | to_json }}'{% endif %}
|
|
||||||
delegate_to: localhost
|
|
||||||
changed_when: true
|
|
||||||
loop: "{{ profile_prefixes }}"
|
|
||||||
loop_control:
|
|
||||||
loop_var: item
|
|
||||||
label: "profile: {{ item }}"
|
|
||||||
when: download_workers_per_profile | default(0) | int > 0
|
|
||||||
|
|
||||||
- name: "Start only auth generators on workers"
|
- name: "Start only auth generators on workers"
|
||||||
when: action == "start-auth"
|
when: action == "start-auth"
|
||||||
@ -111,7 +89,7 @@
|
|||||||
--limit {{ inventory_hostname }}
|
--limit {{ inventory_hostname }}
|
||||||
-e "start_generator=true"
|
-e "start_generator=true"
|
||||||
-e "profile_prefix={{ combined_prefixes }}"
|
-e "profile_prefix={{ combined_prefixes }}"
|
||||||
{% if dummy_batch is defined %}-e "dummy_batch={{ dummy_batch }}"{% endif %}
|
-e "dummy_batch={{ dummy_batch | default(false) }}"
|
||||||
{% if auth_min_seconds is defined %}-e "auth_min_seconds={{ auth_min_seconds }}"{% endif %}
|
{% if auth_min_seconds is defined %}-e "auth_min_seconds={{ auth_min_seconds }}"{% endif %}
|
||||||
{% if auth_max_seconds is defined %}-e "auth_max_seconds={{ auth_max_seconds }}"{% endif %}
|
{% if auth_max_seconds is defined %}-e "auth_max_seconds={{ auth_max_seconds }}"{% endif %}
|
||||||
{% if batch_size is defined %}-e "batch_size={{ batch_size }}"{% endif %}
|
{% if batch_size is defined %}-e "batch_size={{ batch_size }}"{% endif %}
|
||||||
@ -128,7 +106,7 @@
|
|||||||
--limit {{ inventory_hostname }}
|
--limit {{ inventory_hostname }}
|
||||||
-e "start_generator=true"
|
-e "start_generator=true"
|
||||||
-e "profile_prefix={{ item }}"
|
-e "profile_prefix={{ item }}"
|
||||||
{% if dummy_batch is defined %}-e "dummy_batch={{ dummy_batch }}"{% endif %}
|
-e "dummy_batch={{ dummy_batch | default(false) }}"
|
||||||
{% if auth_min_seconds is defined %}-e "auth_min_seconds={{ auth_min_seconds }}"{% endif %}
|
{% if auth_min_seconds is defined %}-e "auth_min_seconds={{ auth_min_seconds }}"{% endif %}
|
||||||
{% if auth_max_seconds is defined %}-e "auth_max_seconds={{ auth_max_seconds }}"{% endif %}
|
{% if auth_max_seconds is defined %}-e "auth_max_seconds={{ auth_max_seconds }}"{% endif %}
|
||||||
{% if batch_size is defined %}-e "batch_size={{ batch_size }}"{% endif %}
|
{% if batch_size is defined %}-e "batch_size={{ batch_size }}"{% endif %}
|
||||||
@ -145,112 +123,90 @@
|
|||||||
- name: "Start only download simulators on workers"
|
- name: "Start only download simulators on workers"
|
||||||
when: action == "start-download"
|
when: action == "start-download"
|
||||||
block:
|
block:
|
||||||
|
- name: "WORKAROUND: Align download worker config with auth worker config to bypass inventory bug"
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
download_workers_total: "{{ auth_workers_total | default(0) }}"
|
||||||
|
download_workers_per_profile: "{{ auth_workers_per_profile | default(0) }}"
|
||||||
|
|
||||||
- name: "Set combined profile prefixes string"
|
- name: "Set combined profile prefixes string"
|
||||||
ansible.builtin.set_fact:
|
ansible.builtin.set_fact:
|
||||||
combined_prefixes: "{{ profile_prefixes | default([]) | join(',') }}"
|
combined_prefixes: "{{ profile_prefixes | default([]) | join(',') }}"
|
||||||
when: profile_prefixes is defined and profile_prefixes | length > 0
|
when: profile_prefixes is defined and profile_prefixes | length > 0
|
||||||
|
|
||||||
- name: "Start download simulator(s)"
|
- name: "Start download simulator(s)"
|
||||||
|
ansible.builtin.include_tasks: tasks/start-download-simulators.yml
|
||||||
when: profile_prefixes is defined and profile_prefixes | length > 0
|
when: profile_prefixes is defined and profile_prefixes | length > 0
|
||||||
block:
|
|
||||||
- name: "Start single download simulator for all profiles: {{ combined_prefixes | default('none') }}"
|
|
||||||
ansible.builtin.command: >-
|
|
||||||
ansible-playbook {{ playbook_dir }}/playbook-stress-download-simulation.yml
|
|
||||||
-i {{ inventory_file }}
|
|
||||||
--limit {{ inventory_hostname }}
|
|
||||||
-e "start_download=true"
|
|
||||||
-e "profile_prefix={{ combined_prefixes }}"
|
|
||||||
{% if dummy_batch is defined %}-e "dummy_batch={{ dummy_batch }}"{% endif %}
|
|
||||||
{% if download_min_seconds is defined %}-e "download_min_seconds={{ download_min_seconds }}"{% endif %}
|
|
||||||
{% if download_max_seconds is defined %}-e "download_max_seconds={{ download_max_seconds }}"{% endif %}
|
|
||||||
{% if extra_set_args is defined %}-e 'extra_set_args={{ extra_set_args | to_json }}'{% endif %}
|
|
||||||
delegate_to: localhost
|
|
||||||
changed_when: true
|
|
||||||
when: (download_workers_per_profile | default(0) | int == 0) and (download_workers_total | default(0) | int > 0)
|
|
||||||
|
|
||||||
- name: "Start parallel download simulators for each profile"
|
|
||||||
ansible.builtin.command: >-
|
|
||||||
ansible-playbook {{ playbook_dir }}/playbook-stress-download-simulation.yml
|
|
||||||
-i {{ inventory_file }}
|
|
||||||
--limit {{ inventory_hostname }}
|
|
||||||
-e "start_download=true"
|
|
||||||
-e "profile_prefix={{ item }}"
|
|
||||||
{% if dummy_batch is defined %}-e "dummy_batch={{ dummy_batch }}"{% endif %}
|
|
||||||
{% if download_min_seconds is defined %}-e "download_min_seconds={{ download_min_seconds }}"{% endif %}
|
|
||||||
{% if download_max_seconds is defined %}-e "download_max_seconds={{ download_max_seconds }}"{% endif %}
|
|
||||||
{% if extra_set_args is defined %}-e 'extra_set_args={{ extra_set_args | to_json }}'{% endif %}
|
|
||||||
delegate_to: localhost
|
|
||||||
changed_when: true
|
|
||||||
loop: "{{ profile_prefixes }}"
|
|
||||||
loop_control:
|
|
||||||
loop_var: item
|
|
||||||
label: "profile: {{ item }}"
|
|
||||||
when: download_workers_per_profile | default(0) | int > 0
|
|
||||||
|
|
||||||
- name: "Stop only auth generators on workers (via playbook call)"
|
- name: "Stop only auth generators on workers"
|
||||||
when: action == "stop-generator"
|
when: action == "stop-auth"
|
||||||
block:
|
block:
|
||||||
- name: "Set combined profile prefixes string"
|
- name: "Set combined profile prefixes string"
|
||||||
ansible.builtin.set_fact:
|
ansible.builtin.set_fact:
|
||||||
combined_prefixes: "{{ profile_prefixes | default([]) | join(',') }}"
|
combined_prefixes: "{{ profile_prefixes | default([]) | join(',') }}"
|
||||||
when: profile_prefixes is defined and profile_prefixes | length > 0
|
when: profile_prefixes is defined and profile_prefixes | length > 0
|
||||||
|
|
||||||
- name: "Stop single auth generator for profiles: {{ combined_prefixes | default('none') }}"
|
- name: "Gracefully stop auth generator(s) via playbook call"
|
||||||
ansible.builtin.command: >-
|
|
||||||
ansible-playbook {{ playbook_dir }}/playbook-stress-auth-generator.yml
|
|
||||||
-i {{ inventory_file }}
|
|
||||||
--limit {{ inventory_hostname }}
|
|
||||||
-e "stop_generator=true"
|
|
||||||
-e "profile_prefix={{ combined_prefixes }}"
|
|
||||||
delegate_to: localhost
|
|
||||||
changed_when: true
|
|
||||||
when: profile_prefixes is defined and profile_prefixes | length > 0
|
when: profile_prefixes is defined and profile_prefixes | length > 0
|
||||||
|
block:
|
||||||
|
- name: "Stop single auth generator for all profiles: {{ combined_prefixes | default('none') }}"
|
||||||
|
ansible.builtin.command: >-
|
||||||
|
ansible-playbook {{ playbook_dir }}/playbook-stress-auth-generator.yml
|
||||||
|
-i {{ inventory_file }}
|
||||||
|
--limit {{ inventory_hostname }}
|
||||||
|
-e "stop_generator=true"
|
||||||
|
-e "profile_prefix={{ combined_prefixes }}"
|
||||||
|
delegate_to: localhost
|
||||||
|
changed_when: true
|
||||||
|
when: (auth_workers_per_profile | default(0) | int == 0) and (auth_workers_total | default(0) | int > 0)
|
||||||
|
|
||||||
- name: "Stop only auth generators on workers"
|
- name: "Stop parallel auth generators for each profile"
|
||||||
when: action == "stop-auth"
|
ansible.builtin.command: >-
|
||||||
block:
|
ansible-playbook {{ playbook_dir }}/playbook-stress-auth-generator.yml
|
||||||
- name: Kill all auth generator tmux sessions on this worker
|
-i {{ inventory_file }}
|
||||||
ansible.builtin.shell:
|
--limit {{ inventory_hostname }}
|
||||||
cmd: |
|
-e "stop_generator=true"
|
||||||
for session in $(tmux list-sessions -F "#{session_name}" 2>/dev/null | grep -E "^stress-auth-"); do
|
-e "profile_prefix={{ item }}"
|
||||||
tmux kill-session -t "$session"
|
delegate_to: localhost
|
||||||
done || true
|
changed_when: true
|
||||||
ignore_errors: yes
|
loop: "{{ profile_prefixes }}"
|
||||||
changed_when: false
|
loop_control:
|
||||||
|
loop_var: item
|
||||||
- name: Kill all ytops-client auth generator processes on this worker
|
label: "profile: {{ item }}"
|
||||||
ansible.builtin.shell:
|
when: auth_workers_per_profile | default(0) | int > 0
|
||||||
cmd: |
|
|
||||||
# Gracefully terminate
|
|
||||||
ps aux | grep "[y]tops-client.*stress-policy.*12_queue_auth_simulation" | awk '{print $2}' | xargs kill >/dev/null 2>&1 || true
|
|
||||||
sleep 0.5
|
|
||||||
# Force kill
|
|
||||||
ps aux | grep "[y]tops-client.*stress-policy.*12_queue_auth_simulation" | awk '{print $2}' | xargs kill -9 >/dev/null 2>&1 || true
|
|
||||||
ignore_errors: yes
|
|
||||||
changed_when: false
|
|
||||||
|
|
||||||
- name: "Stop only download simulators on workers"
|
- name: "Stop only download simulators on workers"
|
||||||
when: action == "stop-download"
|
when: action == "stop-download"
|
||||||
block:
|
block:
|
||||||
- name: Kill all download simulator tmux sessions on this worker
|
- name: "WORKAROUND: Align download worker config with auth worker config to bypass inventory bug"
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
download_workers_total: "{{ auth_workers_total | default(0) }}"
|
||||||
|
download_workers_per_profile: "{{ auth_workers_per_profile | default(0) }}"
|
||||||
|
|
||||||
|
- name: "Set combined profile prefixes string"
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
combined_prefixes: "{{ profile_prefixes | default([]) | join(',') }}"
|
||||||
|
when: profile_prefixes is defined and profile_prefixes | length > 0
|
||||||
|
|
||||||
|
- name: "Stop single download simulator group"
|
||||||
ansible.builtin.shell:
|
ansible.builtin.shell:
|
||||||
cmd: |
|
cmd: |
|
||||||
for session in $(tmux list-sessions -F "#{session_name}" 2>/dev/null | grep -E "^stress-download-"); do
|
for session in $(tmux list-sessions -F "#{session_name}" 2>/dev/null | grep -E '^stress-download-worker-[0-9]+$'); do
|
||||||
tmux kill-session -t "$session"
|
tmux kill-session -t "$session"
|
||||||
done || true
|
done || true
|
||||||
|
when: download_workers_total | default(0) | int > 0
|
||||||
|
changed_when: true
|
||||||
ignore_errors: yes
|
ignore_errors: yes
|
||||||
changed_when: false
|
|
||||||
|
|
||||||
- name: Kill all ytops-client download simulator processes on this worker
|
- name: "Stop parallel download simulators for each profile"
|
||||||
ansible.builtin.shell:
|
ansible.builtin.command: "tmux kill-session -t stress-download-{{ item }}"
|
||||||
cmd: |
|
loop: "{{ profile_prefixes }}"
|
||||||
# Gracefully terminate
|
loop_control:
|
||||||
ps aux | grep "[y]tops-client.*stress-policy.*11_direct_docker_download_simulation" | awk '{print $2}' | xargs kill >/dev/null 2>&1 || true
|
loop_var: item
|
||||||
sleep 0.5
|
label: "profile: {{ item }}"
|
||||||
# Force kill
|
when: (download_workers_total | default(0) | int == 0) and (download_workers_per_profile | default(0) | int > 0)
|
||||||
ps aux | grep "[y]tops-client.*stress-policy.*11_direct_docker_download_simulation" | awk '{print $2}' | xargs kill -9 >/dev/null 2>&1 || true
|
changed_when: true
|
||||||
ignore_errors: yes
|
ignore_errors: yes
|
||||||
changed_when: false
|
|
||||||
|
|
||||||
- name: "Stop all worker generators and simulators"
|
- name: "Stop all worker generators and simulators"
|
||||||
when: action == "stop"
|
when: action == "stop"
|
||||||
|
|||||||
@ -3,7 +3,7 @@
|
|||||||
hosts: all
|
hosts: all
|
||||||
gather_facts: no
|
gather_facts: no
|
||||||
vars:
|
vars:
|
||||||
ytops_source_dir: "{{ playbook_dir }}/../ytops_client-source"
|
ytops_source_dir: "{{ source_base_dir | default(playbook_dir + '/../ytops_client-source') }}"
|
||||||
vars_files:
|
vars_files:
|
||||||
- "group_vars/all/vault.yml"
|
- "group_vars/all/vault.yml"
|
||||||
pre_tasks:
|
pre_tasks:
|
||||||
|
|||||||
30
ansible/playbook-update-yt-dlp-docker.yml
Normal file
30
ansible/playbook-update-yt-dlp-docker.yml
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
---
|
||||||
|
- name: "STRESS-SETUP: Build and push yt-dlp docker image"
|
||||||
|
hosts: workers
|
||||||
|
gather_facts: no
|
||||||
|
vars_files:
|
||||||
|
- "group_vars/all/vault.yml"
|
||||||
|
pre_tasks:
|
||||||
|
- name: Set inventory_env fact
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
|
||||||
|
- name: Load environment-specific variables
|
||||||
|
ansible.builtin.include_vars: "{{ item }}"
|
||||||
|
with_fileglob:
|
||||||
|
- "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
|
||||||
|
tasks:
|
||||||
|
- name: "Build and push yt-dlp image on worker"
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: |
|
||||||
|
cd {{ airflow_worker_dir }}
|
||||||
|
if [ -f .env ]; then
|
||||||
|
set -a && . ./.env && set +a
|
||||||
|
fi
|
||||||
|
./bin/build-yt-dlp-image
|
||||||
|
register: build_output
|
||||||
|
changed_when: true
|
||||||
|
|
||||||
|
- name: "Display build output"
|
||||||
|
ansible.builtin.debug:
|
||||||
|
var: build_output.stdout_lines
|
||||||
|
when: build_output.stdout_lines is defined
|
||||||
105
ansible/tasks/start-download-simulators.yml
Normal file
105
ansible/tasks/start-download-simulators.yml
Normal file
@ -0,0 +1,105 @@
|
|||||||
|
---
|
||||||
|
- name: "Scale down excess download workers if necessary"
|
||||||
|
when: (download_workers_per_profile | default(0) | int == 0) and (download_workers_total | default(0) | int > 0)
|
||||||
|
block:
|
||||||
|
- name: "Find running download simulator tmux sessions for this group"
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: "tmux list-sessions -F '#{session_name}' 2>/dev/null | grep -E '^stress-download-worker-[0-9]+$' || true"
|
||||||
|
register: running_sessions
|
||||||
|
changed_when: false
|
||||||
|
ignore_errors: yes
|
||||||
|
|
||||||
|
- name: "Identify excess download simulator sessions to stop"
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
excess_sessions_to_stop: "{{ excess_sessions_to_stop | default([]) + [item] }}"
|
||||||
|
vars:
|
||||||
|
worker_num_str: "{{ item | regex_replace('^stress-download-worker-', '') }}"
|
||||||
|
when: worker_num_str is number and (worker_num_str | int > (download_workers_total | int))
|
||||||
|
loop: "{{ running_sessions.stdout_lines }}"
|
||||||
|
loop_control:
|
||||||
|
label: "Identifying excess session: {{ item }}"
|
||||||
|
|
||||||
|
- name: "Get PIDs for excess download workers"
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: |
|
||||||
|
PANE_PID=$(tmux list-panes -s "{{ item }}" -F '#{pane_pid}' | head -n 1)
|
||||||
|
if [ -n "$PANE_PID" ]; then
|
||||||
|
pgrep -P "$PANE_PID" || true
|
||||||
|
fi
|
||||||
|
register: excess_pids_raw
|
||||||
|
loop: "{{ excess_sessions_to_stop | default([]) }}"
|
||||||
|
changed_when: false
|
||||||
|
ignore_errors: true
|
||||||
|
|
||||||
|
- name: "Set fact for PIDs to kill"
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
pids_to_kill_gracefully: "{{ excess_pids_raw.results | map(attribute='stdout') | reject('==', '') | list }}"
|
||||||
|
|
||||||
|
- name: "Gracefully terminate excess download workers"
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: "kill {{ item }}"
|
||||||
|
loop: "{{ pids_to_kill_gracefully | default([]) }}"
|
||||||
|
when: pids_to_kill_gracefully is defined and pids_to_kill_gracefully | length > 0
|
||||||
|
ignore_errors: true
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: "Wait for graceful shutdown of excess workers"
|
||||||
|
ansible.builtin.pause:
|
||||||
|
seconds: "{{ graceful_shutdown_timeout_seconds }}"
|
||||||
|
when: pids_to_kill_gracefully is defined and pids_to_kill_gracefully | length > 0
|
||||||
|
|
||||||
|
- name: "Force kill any lingering excess workers"
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: "kill -9 {{ item }}"
|
||||||
|
loop: "{{ pids_to_kill_gracefully | default([]) }}"
|
||||||
|
when: pids_to_kill_gracefully is defined and pids_to_kill_gracefully | length > 0
|
||||||
|
ignore_errors: true
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: "Kill tmux sessions for excess workers"
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: "tmux kill-session -t {{ item }}"
|
||||||
|
loop: "{{ excess_sessions_to_stop | default([]) }}"
|
||||||
|
when: excess_sessions_to_stop is defined and excess_sessions_to_stop | length > 0
|
||||||
|
ignore_errors: true
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: "Start single download simulator for all profiles: {{ combined_prefixes | default('none') }}"
|
||||||
|
ansible.builtin.command: >-
|
||||||
|
ansible-playbook {{ playbook_dir }}/playbook-stress-download-simulation.yml
|
||||||
|
-i {{ inventory_file }}
|
||||||
|
--limit {{ inventory_hostname }}
|
||||||
|
-e "start_download=true"
|
||||||
|
-e "profile_prefix=worker"
|
||||||
|
-e "display_prefix={{ combined_prefixes }}"
|
||||||
|
-e "worker_num={{ worker_num }}"
|
||||||
|
{% if dummy_batch | default(false) %}-e "dummy_batch=true"{% endif %}
|
||||||
|
{% if download_min_seconds is defined %}-e "download_min_seconds={{ download_min_seconds }}"{% endif %}
|
||||||
|
{% if download_max_seconds is defined %}-e "download_max_seconds={{ download_max_seconds }}"{% endif %}
|
||||||
|
{% if extra_set_args is defined %}-e 'extra_set_args={{ extra_set_args | to_json }}'{% endif %}
|
||||||
|
delegate_to: localhost
|
||||||
|
changed_when: true
|
||||||
|
when: download_workers_total | default(0) | int > 0
|
||||||
|
loop: "{{ range(1, (download_workers_total | default(1) | int) + 1) | list }}"
|
||||||
|
loop_control:
|
||||||
|
loop_var: worker_num
|
||||||
|
label: "worker {{ worker_num }}"
|
||||||
|
|
||||||
|
- name: "Start parallel download simulators for each profile"
|
||||||
|
ansible.builtin.command: >-
|
||||||
|
ansible-playbook {{ playbook_dir }}/playbook-stress-download-simulation.yml
|
||||||
|
-i {{ inventory_file }}
|
||||||
|
--limit {{ inventory_hostname }}
|
||||||
|
-e "start_download=true"
|
||||||
|
-e "profile_prefix={{ item }}"
|
||||||
|
{% if dummy_batch | default(false) %}-e "dummy_batch=true"{% endif %}
|
||||||
|
{% if download_min_seconds is defined %}-e "download_min_seconds={{ download_min_seconds }}"{% endif %}
|
||||||
|
{% if download_max_seconds is defined %}-e "download_max_seconds={{ download_max_seconds }}"{% endif %}
|
||||||
|
{% if extra_set_args is defined %}-e 'extra_set_args={{ extra_set_args | to_json }}'{% endif %}
|
||||||
|
delegate_to: localhost
|
||||||
|
changed_when: true
|
||||||
|
loop: "{{ profile_prefixes }}"
|
||||||
|
loop_control:
|
||||||
|
loop_var: item
|
||||||
|
label: "profile: {{ item }}"
|
||||||
|
when: (download_workers_total | default(0) | int == 0) and (download_workers_per_profile | default(0) | int > 0)
|
||||||
@ -15,3 +15,7 @@ AWS_REGION={{ vault_s3_delivery_aws_region }}
|
|||||||
ACCOUNT_ACTIVE_DURATION_MIN=7
|
ACCOUNT_ACTIVE_DURATION_MIN=7
|
||||||
ACCOUNT_COOLDOWN_DURATION_MIN=30
|
ACCOUNT_COOLDOWN_DURATION_MIN=30
|
||||||
STRESS_POLICY_INBOX_QUEUE=dev_stress_inbox
|
STRESS_POLICY_INBOX_QUEUE=dev_stress_inbox
|
||||||
|
|
||||||
|
# --- Stress Test Environment Names ---
|
||||||
|
YTOPS_AUTH_ENV={{ stress_auth_env | default('sim_auth') }}
|
||||||
|
YTOPS_DOWNLOAD_ENV={{ stress_download_env | default('sim_download') }}
|
||||||
|
|||||||
@ -121,20 +121,214 @@ def generate_policy(cluster_config, output_path):
|
|||||||
print(f"Successfully generated profile setup policy at: {output_path}")
|
print(f"Successfully generated profile setup policy at: {output_path}")
|
||||||
|
|
||||||
|
|
||||||
|
def generate_enforcer_policy(cluster_config, output_path):
|
||||||
|
"""Generate the enforcer policy file."""
|
||||||
|
all_workers = cluster_config.get('workers', {})
|
||||||
|
|
||||||
|
enforcement_pools = []
|
||||||
|
for worker_name, worker_config in sorted(all_workers.items()):
|
||||||
|
all_prefixes = []
|
||||||
|
for pool in worker_config.get('profile_pools', []):
|
||||||
|
all_prefixes.extend(pool.get('prefixes', []))
|
||||||
|
|
||||||
|
if not all_prefixes:
|
||||||
|
continue
|
||||||
|
|
||||||
|
pool_entry = OrderedDict([
|
||||||
|
('name', f"server_{worker_name}"),
|
||||||
|
('profile_group_patterns', sorted(list(set(all_prefixes)))),
|
||||||
|
('max_active_profiles', 1)
|
||||||
|
])
|
||||||
|
enforcement_pools.append(pool_entry)
|
||||||
|
|
||||||
|
with open(output_path, 'w') as f:
|
||||||
|
f.write("# Policy for the unified simulation enforcer.\n")
|
||||||
|
f.write("# This file is used by `bin/ytops-client policy-enforcer --live` to manage\n")
|
||||||
|
f.write("# both the authentication and download simulation environments from a single process.\n\n")
|
||||||
|
f.write("# !!! THIS FILE IS AUTO-GENERATED by tools/generate-profile-setup-policy.py !!!\n")
|
||||||
|
f.write("# !!! DO NOT EDIT. Your changes will be overwritten. !!!\n")
|
||||||
|
f.write("# !!! Edit cluster.green.yml and re-run the generator instead. !!!\n\n")
|
||||||
|
|
||||||
|
f.write("simulation_parameters:\n")
|
||||||
|
f.write(" # --- Common Redis settings for all tools ---\n")
|
||||||
|
f.write(" # The enforcer will connect to two different Redis environments (key prefixes)\n")
|
||||||
|
f.write(" # based on these settings, applying the corresponding policies to each.\n")
|
||||||
|
f.write(' env_file: ".env"\n')
|
||||||
|
f.write(' auth_env: "sim_auth"\n')
|
||||||
|
f.write(' download_env: "sim_download"\n')
|
||||||
|
f.write(" \n")
|
||||||
|
f.write(" # How often the enforcer should wake up and apply all policies.\n")
|
||||||
|
f.write(" interval_seconds: 2\n\n")
|
||||||
|
|
||||||
|
f.write("# --- Common & Pool-specific Settings ---\n")
|
||||||
|
f.write("# Common settings are applied to all profile groups discovered via the pools below.\n")
|
||||||
|
f.write("# A pool can optionally override these settings by defining its own 'group_settings' block.\n")
|
||||||
|
f.write("common_group_settings:\n")
|
||||||
|
f.write(" auth:\n")
|
||||||
|
f.write(" max_active_profiles: 1\n")
|
||||||
|
f.write(" rotate_after_requests: 5\n")
|
||||||
|
f.write(" rest_duration_minutes_on_rotation: 0.20\n")
|
||||||
|
f.write(" wait_download_finish_per_group: true\n")
|
||||||
|
f.write(" max_wait_for_downloads_minutes: 240\n")
|
||||||
|
f.write(" download:\n")
|
||||||
|
f.write(" max_active_profiles: 1\n")
|
||||||
|
f.write(" rotate_after_requests: 0\n")
|
||||||
|
f.write(" rest_duration_minutes_on_rotation: 0.2\n\n")
|
||||||
|
|
||||||
|
f.write("# Defines pools of profile groups with their own concurrency limits.\n")
|
||||||
|
f.write("enforcement_pools:\n")
|
||||||
|
|
||||||
|
for pool in enforcement_pools:
|
||||||
|
f.write(f' - name: "{pool["name"]}"\n')
|
||||||
|
patterns_str = ", ".join([f'"{p}"' for p in pool['profile_group_patterns']])
|
||||||
|
f.write(f' profile_group_patterns: [{patterns_str}]\n')
|
||||||
|
f.write(f' max_active_profiles: {pool["max_active_profiles"]}\n')
|
||||||
|
|
||||||
|
rest_of_file = """
|
||||||
|
# --- Policies for the Authentication Simulation ---
|
||||||
|
auth_policy_enforcer_config:
|
||||||
|
|
||||||
|
# Ban if 2 failures occur within a 1-minute window.
|
||||||
|
#ban_on_failures: 2
|
||||||
|
#ban_on_failures_window_minutes: 1
|
||||||
|
|
||||||
|
# The standard rest policy is disabled, as rotation is handled by the profile group.
|
||||||
|
|
||||||
|
# New rate limit policy to enforce requests-per-hour limits.
|
||||||
|
# For guest sessions, the limit is ~300 videos/hour.
|
||||||
|
rate_limit_requests: 0
|
||||||
|
rate_limit_window_minutes: 60
|
||||||
|
rate_limit_rest_duration_minutes: 5
|
||||||
|
|
||||||
|
rest_after_requests: 0
|
||||||
|
rest_duration_minutes: 10
|
||||||
|
|
||||||
|
# NOTE on Rate Limits: With the default yt-dlp settings, the rate limit for guest
|
||||||
|
# sessions is ~300 videos/hour (~1000 webpage/player requests per hour).
|
||||||
|
# For accounts, it is ~2000 videos/hour (~4000 webpage/player requests per hour).
|
||||||
|
# The settings below should be configured to respect these limits.
|
||||||
|
|
||||||
|
# New setting for load balancing across profile groups.
|
||||||
|
# "longest_idle": Activates the profile that has been idle the longest across all groups (based on last_used time).
|
||||||
|
# This is a global FIFO strategy that effectively cycles through profiles regardless of their group.
|
||||||
|
# "least_loaded": Prioritizes activating a profile from the group with the fewest pending downloads.
|
||||||
|
# If multiple groups have zero pending downloads, it acts as a FIFO queue, activating
|
||||||
|
# the one that finished its last download batch the earliest. This is useful when you want
|
||||||
|
# to ensure a group finishes its entire workload before another group starts.
|
||||||
|
profile_selection_strategy: "longest_idle"
|
||||||
|
|
||||||
|
# The 'global_max_active_profiles' setting is now superseded by the per-pool limits
|
||||||
|
# defined in the 'enforcement_pools' section.
|
||||||
|
|
||||||
|
# The 'profile_groups' section is now inherited from 'profile_group_definitions' above.
|
||||||
|
# The enforcer logic should be updated to read from there.
|
||||||
|
|
||||||
|
proxy_work_minutes: 0
|
||||||
|
proxy_rest_duration_minutes: 0
|
||||||
|
|
||||||
|
# Global maximum time a proxy can be active before being rested, regardless of
|
||||||
|
# other rules. Acts as a safety net. Set to 0 to disable.
|
||||||
|
max_global_proxy_active_minutes: 0
|
||||||
|
rest_duration_on_max_active: 10
|
||||||
|
|
||||||
|
# Proxy-level ban on failure burst is disabled.
|
||||||
|
proxy_ban_on_failures: 0
|
||||||
|
proxy_ban_window_minutes: 2
|
||||||
|
|
||||||
|
# Clean up locks held for more than 16 minutes (960s) to prevent stuck workers.
|
||||||
|
# This should be longer than the docker container timeout (15m).
|
||||||
|
unlock_stale_locks_after_seconds: 960
|
||||||
|
|
||||||
|
# A short post-task cooldown for auth simulation profiles. When a batch is finished,
|
||||||
|
# the profile is put into COOLDOWN briefly. This prevents a worker from immediately
|
||||||
|
# re-locking the same profile, giving the policy enforcer a window to perform rotation.
|
||||||
|
unlock_cooldown_seconds: 0
|
||||||
|
|
||||||
|
# --- Cross-simulation synchronization ---
|
||||||
|
# This section is simplified because the link between auth and download profiles
|
||||||
|
# is now defined in the `profile_group_definitions`.
|
||||||
|
cross_simulation_sync:
|
||||||
|
# Which states to synchronize from auth to download.
|
||||||
|
sync_states:
|
||||||
|
- "BANNED"
|
||||||
|
# If true, a BANNED state on an auth profile will force the download profile to also be BANNED.
|
||||||
|
enforce_auth_lead: true
|
||||||
|
# CRITICAL: Ensures the correct download profile GROUP is active.
|
||||||
|
sync_active_profile: true
|
||||||
|
# When an auth profile is in the 'waiting_downloads' state, ensure the matching download profile is active.
|
||||||
|
sync_waiting_downloads: true
|
||||||
|
|
||||||
|
# --- Policies for the Download Simulation ---
|
||||||
|
download_policy_enforcer_config:
|
||||||
|
|
||||||
|
# Ban if 1 failure occurs within a 1-minute window.
|
||||||
|
ban_on_failures: 1
|
||||||
|
ban_on_failures_window_minutes: 1
|
||||||
|
|
||||||
|
# Standard rest policy is disabled in favor of group rotation.
|
||||||
|
|
||||||
|
# New rate limit policy to enforce requests-per-hour limits.
|
||||||
|
# For guest sessions, the limit is ~300 videos/hour. We set it slightly lower to be safe.
|
||||||
|
rate_limit_requests: 280
|
||||||
|
rate_limit_window_minutes: 60
|
||||||
|
rate_limit_rest_duration_minutes: 5
|
||||||
|
rest_after_requests: 0
|
||||||
|
rest_duration_minutes: 20
|
||||||
|
|
||||||
|
# NOTE on Rate Limits: With the default yt-dlp settings, the rate limit for guest
|
||||||
|
# sessions is ~300 videos/hour (~1000 webpage/player requests per hour).
|
||||||
|
# For accounts, it is ~2000 videos/hour (~4000 webpage/player requests per hour).
|
||||||
|
# The settings below should be configured to respect these limits.
|
||||||
|
|
||||||
|
# The 'profile_groups' section is now inherited from 'profile_group_definitions' above.
|
||||||
|
# The enforcer logic should be updated to read from there.
|
||||||
|
|
||||||
|
# Time-based proxy rules are disabled.
|
||||||
|
proxy_work_minutes: 0
|
||||||
|
proxy_rest_duration_minutes: 10
|
||||||
|
|
||||||
|
# Global maximum time a proxy can be active before being rested, regardless of
|
||||||
|
# other rules. Acts as a safety net. Set to 0 to disable.
|
||||||
|
max_global_proxy_active_minutes: 0
|
||||||
|
rest_duration_on_max_active: 10
|
||||||
|
|
||||||
|
# Proxy-level ban on failure burst is disabled.
|
||||||
|
proxy_ban_on_failures: 3
|
||||||
|
proxy_ban_window_minutes: 1
|
||||||
|
|
||||||
|
# Clean up download locks held for more than 16 minutes (960s) to allow for long downloads.
|
||||||
|
# This should be longer than the docker container timeout (15m).
|
||||||
|
unlock_stale_locks_after_seconds: 960
|
||||||
|
|
||||||
|
# After a profile is used for a download, unlock it but put it in COOLDOWN
|
||||||
|
# state for 2-3s. This is enforced by the worker, which reads this config from Redis.
|
||||||
|
unlock_cooldown_seconds: [2, 3]
|
||||||
|
"""
|
||||||
|
with open(output_path, 'a') as f:
|
||||||
|
f.write(rest_of_file)
|
||||||
|
|
||||||
|
print(f"Successfully generated enforcer policy at: {output_path}")
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
if len(sys.argv) != 3:
|
if len(sys.argv) < 3 or len(sys.argv) > 4:
|
||||||
print("Usage: ./tools/generate-profile-setup-policy.py <cluster-config-file> <output-policy-file>")
|
print("Usage: ./tools/generate-profile-setup-policy.py <cluster-config-file> <output-profile-policy-file> [<output-enforcer-policy-file>]")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
config_path = sys.argv[1]
|
config_path = sys.argv[1]
|
||||||
output_path = sys.argv[2]
|
profile_output_path = sys.argv[2]
|
||||||
|
|
||||||
if not os.path.exists(config_path):
|
if not os.path.exists(config_path):
|
||||||
print(f"Error: Cluster configuration file not found at '{config_path}'", file=sys.stderr)
|
print(f"Error: Cluster configuration file not found at '{config_path}'", file=sys.stderr)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
cluster_config = load_cluster_config(config_path)
|
cluster_config = load_cluster_config(config_path)
|
||||||
generate_policy(cluster_config, output_path)
|
generate_policy(cluster_config, profile_output_path)
|
||||||
|
|
||||||
|
if len(sys.argv) == 4:
|
||||||
|
enforcer_output_path = sys.argv[3]
|
||||||
|
generate_enforcer_policy(cluster_config, enforcer_output_path)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|||||||
@ -205,6 +205,18 @@ queue_policy:
|
|||||||
# Example: formats_to_download: "all"
|
# Example: formats_to_download: "all"
|
||||||
# Example: formats_to_download: ["140-dashy", "299-dashy"]
|
# Example: formats_to_download: ["140-dashy", "299-dashy"]
|
||||||
formats_to_download: "from_download_policy"
|
formats_to_download: "from_download_policy"
|
||||||
|
|
||||||
|
# Whether to report completion back to a queue. Always reported for auth.
|
||||||
|
report_completion: true
|
||||||
|
|
||||||
|
# Queue to report completion to
|
||||||
|
completion_queue: "queue2_auth_completed"
|
||||||
|
|
||||||
|
# Queue to report failures to
|
||||||
|
failure_queue: "queue2_auth_fail"
|
||||||
|
|
||||||
|
# Queue to report skipped tasks to
|
||||||
|
skipped_queue: "queue2_auth_skipped"
|
||||||
|
|
||||||
simulation_parameters:
|
simulation_parameters:
|
||||||
auth_env: "sim_auth"
|
auth_env: "sim_auth"
|
||||||
|
|||||||
@ -13,22 +13,29 @@ simulation_parameters:
|
|||||||
# How often the enforcer should wake up and apply all policies.
|
# How often the enforcer should wake up and apply all policies.
|
||||||
interval_seconds: 2
|
interval_seconds: 2
|
||||||
|
|
||||||
# --- Dynamic Profile Group Templates ---
|
# --- Common & Pool-specific Settings ---
|
||||||
# The policy enforcer will find all profile prefixes matching a pattern in Redis
|
# Common settings are applied to all profile groups discovered via the pools below.
|
||||||
# and apply the settings from the matching template. This avoids having to list
|
# A pool can optionally override these settings by defining its own 'group_settings' block.
|
||||||
# every profile group manually.
|
common_group_settings:
|
||||||
# NOTE: The policy enforcer tool must be updated to support this format.
|
auth:
|
||||||
profile_group_templates:
|
max_active_profiles: 1
|
||||||
- pattern: "user*"
|
rotate_after_requests: 5
|
||||||
auth:
|
rest_duration_minutes_on_rotation: 0.20
|
||||||
max_active_profiles: 1
|
wait_download_finish_per_group: true
|
||||||
rotate_after_requests: 5
|
max_wait_for_downloads_minutes: 240
|
||||||
rest_duration_minutes_on_rotation: 0.20
|
download:
|
||||||
wait_download_finish_per_group: true
|
max_active_profiles: 1
|
||||||
max_wait_for_downloads_minutes: 240
|
rotate_after_requests: 0
|
||||||
download:
|
rest_duration_minutes_on_rotation: 0.2
|
||||||
rotate_after_requests: 0
|
|
||||||
rest_duration_minutes_on_rotation: 0.2
|
# Defines pools of profile groups with their own concurrency limits.
|
||||||
|
enforcement_pools:
|
||||||
|
- name: "server_dl003_pool"
|
||||||
|
profile_group_patterns: ["user31", "user32"]
|
||||||
|
max_active_profiles: 1
|
||||||
|
- name: "server_dl006_pool"
|
||||||
|
profile_group_patterns: ["user61", "user62"]
|
||||||
|
max_active_profiles: 1
|
||||||
|
|
||||||
# --- Policies for the Authentication Simulation ---
|
# --- Policies for the Authentication Simulation ---
|
||||||
auth_policy_enforcer_config:
|
auth_policy_enforcer_config:
|
||||||
@ -62,9 +69,8 @@ auth_policy_enforcer_config:
|
|||||||
# to ensure a group finishes its entire workload before another group starts.
|
# to ensure a group finishes its entire workload before another group starts.
|
||||||
profile_selection_strategy: "longest_idle"
|
profile_selection_strategy: "longest_idle"
|
||||||
|
|
||||||
# Enforce a total limit of active profiles across all groups defined below.
|
# The 'global_max_active_profiles' setting is now superseded by the per-pool limits
|
||||||
# Set to 1 to ensure only one group's profile is active at any time.
|
# defined in the 'enforcement_pools' section.
|
||||||
global_max_active_profiles: 1
|
|
||||||
|
|
||||||
# The 'profile_groups' section is now inherited from 'profile_group_definitions' above.
|
# The 'profile_groups' section is now inherited from 'profile_group_definitions' above.
|
||||||
# The enforcer logic should be updated to read from there.
|
# The enforcer logic should be updated to read from there.
|
||||||
@ -106,6 +112,7 @@ cross_simulation_sync:
|
|||||||
|
|
||||||
# --- Policies for the Download Simulation ---
|
# --- Policies for the Download Simulation ---
|
||||||
download_policy_enforcer_config:
|
download_policy_enforcer_config:
|
||||||
|
|
||||||
# Ban if 1 failure occurs within a 1-minute window.
|
# Ban if 1 failure occurs within a 1-minute window.
|
||||||
ban_on_failures: 1
|
ban_on_failures: 1
|
||||||
ban_on_failures_window_minutes: 1
|
ban_on_failures_window_minutes: 1
|
||||||
|
|||||||
@ -68,6 +68,9 @@ class PolicyEnforcer:
|
|||||||
all_profiles_list = self.manager.list_profiles()
|
all_profiles_list = self.manager.list_profiles()
|
||||||
all_profiles_map = {p['name']: p for p in all_profiles_list}
|
all_profiles_map = {p['name']: p for p in all_profiles_list}
|
||||||
|
|
||||||
|
# Sync profile states from their assigned proxy's state (e.g., if proxy is BANNED, ban profile).
|
||||||
|
self.enforce_proxy_state_on_profiles(all_profiles_list, all_profiles_map)
|
||||||
|
|
||||||
# Apply profile group policies (rotation, max_active). This will modify the local `all_profiles_map`.
|
# Apply profile group policies (rotation, max_active). This will modify the local `all_profiles_map`.
|
||||||
self.enforce_profile_group_policies(getattr(args, 'profile_groups', []), all_profiles_map, args)
|
self.enforce_profile_group_policies(getattr(args, 'profile_groups', []), all_profiles_map, args)
|
||||||
|
|
||||||
@ -197,12 +200,26 @@ class PolicyEnforcer:
|
|||||||
live_active_counts[group_name] = count
|
live_active_counts[group_name] = count
|
||||||
logger.debug(f"Initial live active counts: {live_active_counts}")
|
logger.debug(f"Initial live active counts: {live_active_counts}")
|
||||||
|
|
||||||
# --- New Global Max Active Logic ---
|
# --- New Enforcement Pool and Global Max Active Logic ---
|
||||||
|
enforcement_pools = getattr(args, 'enforcement_pools', [])
|
||||||
|
live_pool_active_counts = {}
|
||||||
|
if enforcement_pools:
|
||||||
|
for i, pool in enumerate(enforcement_pools):
|
||||||
|
pool_name = pool.get('name', f'pool_{i}')
|
||||||
|
live_pool_active_counts[pool_name] = 0
|
||||||
|
|
||||||
|
for group_name, count in live_active_counts.items():
|
||||||
|
group_policy = next((g for g in profile_groups if g.get('name') == group_name), {})
|
||||||
|
pool_name = group_policy.get('pool_name')
|
||||||
|
if pool_name:
|
||||||
|
live_pool_active_counts[pool_name] = live_pool_active_counts.get(pool_name, 0) + count
|
||||||
|
logger.debug(f"Initial live pool active counts: {live_pool_active_counts}")
|
||||||
|
|
||||||
global_max_active = getattr(args, 'global_max_active_profiles', 0)
|
global_max_active = getattr(args, 'global_max_active_profiles', 0)
|
||||||
live_global_active_count = sum(live_active_counts.values())
|
live_global_active_count = sum(live_active_counts.values())
|
||||||
if global_max_active > 0:
|
if global_max_active > 0:
|
||||||
logger.debug(f"Enforcing global max active profiles limit of {global_max_active}. Current global active: {live_global_active_count}")
|
logger.debug(f"Enforcing global max active profiles limit of {global_max_active}. Current global active: {live_global_active_count}")
|
||||||
# --- End New Global Logic ---
|
# --- End New Logic ---
|
||||||
|
|
||||||
# --- End group logic setup ---
|
# --- End group logic setup ---
|
||||||
|
|
||||||
@ -278,7 +295,7 @@ class PolicyEnforcer:
|
|||||||
# --- End new logic ---
|
# --- End new logic ---
|
||||||
|
|
||||||
# --- New Sorting Logic based on Profile Selection Strategy ---
|
# --- New Sorting Logic based on Profile Selection Strategy ---
|
||||||
strategy = getattr(args, 'profile_selection_strategy', 'longest_idle')
|
strategy = getattr(args, 'profile_selection_strategy', None)
|
||||||
if strategy == 'least_loaded' and profile_groups:
|
if strategy == 'least_loaded' and profile_groups:
|
||||||
logger.debug("Applying 'least_loaded' profile selection strategy.")
|
logger.debug("Applying 'least_loaded' profile selection strategy.")
|
||||||
# Separate profiles that are ready from those that are not
|
# Separate profiles that are ready from those that are not
|
||||||
@ -345,10 +362,8 @@ class PolicyEnforcer:
|
|||||||
profiles_to_check = sorted_ready_profiles + not_ready_profiles
|
profiles_to_check = sorted_ready_profiles + not_ready_profiles
|
||||||
logger.debug(f"Activation candidates for 'least_loaded' strategy (first 10): {[p['name'] for p in profiles_to_check[:10]]}")
|
logger.debug(f"Activation candidates for 'least_loaded' strategy (first 10): {[p['name'] for p in profiles_to_check[:10]]}")
|
||||||
|
|
||||||
else: # Default 'longest_idle' sort
|
elif strategy == 'longest_idle':
|
||||||
if strategy not in ['longest_idle']:
|
logger.debug("Applying 'longest_idle' profile selection strategy.")
|
||||||
logger.warning(f"Unknown or unhandled profile_selection_strategy '{strategy}'. Defaulting to 'longest_idle'.")
|
|
||||||
|
|
||||||
# Separate profiles that are ready to be activated from those still resting.
|
# Separate profiles that are ready to be activated from those still resting.
|
||||||
# A profile waiting for downloads is NOT considered ready for activation.
|
# A profile waiting for downloads is NOT considered ready for activation.
|
||||||
ready_profiles = [
|
ready_profiles = [
|
||||||
@ -369,6 +384,11 @@ class PolicyEnforcer:
|
|||||||
# The final list to check will process all ready profiles first, then wait for the not-ready ones.
|
# The final list to check will process all ready profiles first, then wait for the not-ready ones.
|
||||||
profiles_to_check = ready_profiles + not_ready_profiles
|
profiles_to_check = ready_profiles + not_ready_profiles
|
||||||
logger.debug(f"Activation candidates for 'longest_idle' strategy (first 10): {[p['name'] for p in profiles_to_check[:10]]}")
|
logger.debug(f"Activation candidates for 'longest_idle' strategy (first 10): {[p['name'] for p in profiles_to_check[:10]]}")
|
||||||
|
else: # Default sort (no strategy)
|
||||||
|
if strategy: # Log a warning if an unknown strategy was provided
|
||||||
|
logger.warning(f"Unknown profile_selection_strategy '{strategy}'. Using default FIFO sort by rest time.")
|
||||||
|
# Default to a simple FIFO sort based on when their rest period ends.
|
||||||
|
profiles_to_check.sort(key=lambda p: (p.get('rest_until', 0), natural_sort_key(p.get('name', ''))))
|
||||||
# --- End New Sorting Logic ---
|
# --- End New Sorting Logic ---
|
||||||
|
|
||||||
# --- New logic: Identify groups with waiting profiles ---
|
# --- New logic: Identify groups with waiting profiles ---
|
||||||
@ -490,13 +510,30 @@ class PolicyEnforcer:
|
|||||||
profile_name = profile['name']
|
profile_name = profile['name']
|
||||||
group_name = profile_to_group_map.get(profile_name)
|
group_name = profile_to_group_map.get(profile_name)
|
||||||
|
|
||||||
# --- New Global Max Active Check ---
|
# --- New Pool and Global Max Active Check ---
|
||||||
# This check prevents NEW profiles (in RESTING state) from becoming active if the global limit is reached.
|
is_new_activation = profile['state'] == ProfileState.RESTING.value
|
||||||
# It allows COOLDOWN profiles to become active, as they are already part of the active count.
|
if is_new_activation:
|
||||||
if global_max_active > 0 and live_global_active_count >= global_max_active and profile['state'] == ProfileState.RESTING.value:
|
# Check pool limits first
|
||||||
logger.debug(f"Profile '{profile_name}' rest ended, but global max active limit ({global_max_active}) has been reached. Deferring activation.")
|
if enforcement_pools and group_name:
|
||||||
continue
|
group_policy = next((g for g in profile_groups if g.get('name') == group_name), {})
|
||||||
# --- End New Global Check ---
|
pool_name = group_policy.get('pool_name')
|
||||||
|
pool_config = None
|
||||||
|
for i, p in enumerate(enforcement_pools):
|
||||||
|
if p.get('name', f'pool_{i}') == pool_name:
|
||||||
|
pool_config = p
|
||||||
|
break
|
||||||
|
if pool_config:
|
||||||
|
pool_max_active = pool_config.get('max_active_profiles', 0)
|
||||||
|
current_pool_active = live_pool_active_counts.get(pool_name, 0)
|
||||||
|
if pool_max_active > 0 and current_pool_active >= pool_max_active:
|
||||||
|
logger.debug(f"Profile '{profile_name}' rest ended, but pool '{pool_name}' max active limit ({pool_max_active}) has been reached. Deferring activation.")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Then check global limit if it's still configured (for backward compatibility)
|
||||||
|
if global_max_active > 0 and live_global_active_count >= global_max_active:
|
||||||
|
logger.debug(f"Profile '{profile_name}' rest ended, but global max active limit ({global_max_active}) has been reached. Deferring activation.")
|
||||||
|
continue
|
||||||
|
# --- End New Check ---
|
||||||
|
|
||||||
# --- Group-aware unrest check ---
|
# --- Group-aware unrest check ---
|
||||||
if group_name:
|
if group_name:
|
||||||
@ -577,11 +614,13 @@ class PolicyEnforcer:
|
|||||||
continue # Skip activation for this profile
|
continue # Skip activation for this profile
|
||||||
# --- End group check ---
|
# --- End group check ---
|
||||||
|
|
||||||
# Before activating, ensure the profile's proxy is not resting.
|
# Before activating, ensure the profile's proxy is not resting or banned.
|
||||||
proxy_url = profile.get('proxy')
|
proxy_url = profile.get('proxy')
|
||||||
if proxy_url:
|
if proxy_url:
|
||||||
proxy_state_data = proxy_states.get(proxy_url, {})
|
proxy_state_data = proxy_states.get(proxy_url, {})
|
||||||
if proxy_state_data.get('state') == ProfileState.RESTING.value:
|
proxy_state = proxy_state_data.get('state')
|
||||||
|
|
||||||
|
if proxy_state == ProfileState.RESTING.value:
|
||||||
logger.debug(f"Profile '{profile['name']}' rest period ended, but its proxy '{proxy_url}' is still resting. Deferring activation.")
|
logger.debug(f"Profile '{profile['name']}' rest period ended, but its proxy '{proxy_url}' is still resting. Deferring activation.")
|
||||||
|
|
||||||
# Update reason for clarity in the UI when a profile is blocked by its proxy.
|
# Update reason for clarity in the UI when a profile is blocked by its proxy.
|
||||||
@ -595,11 +634,33 @@ class PolicyEnforcer:
|
|||||||
all_profiles_map[profile_name]['rest_reason'] = new_reason
|
all_profiles_map[profile_name]['rest_reason'] = new_reason
|
||||||
|
|
||||||
continue # Do not activate this profile yet.
|
continue # Do not activate this profile yet.
|
||||||
|
|
||||||
|
elif proxy_state == ProfileState.BANNED.value and profile['state'] != ProfileState.BANNED.value:
|
||||||
|
# This profile is about to be activated, but its proxy is banned. Ban it.
|
||||||
|
reason = f"Proxy '{proxy_url}' is BANNED"
|
||||||
|
logger.warning(f"Banning profile '{profile['name']}' because its proxy is banned: {reason}")
|
||||||
|
self.actions_taken_this_cycle += 1
|
||||||
|
if not self.dry_run:
|
||||||
|
sm = self.manager.get_state_machine(profile_name)
|
||||||
|
if sm:
|
||||||
|
sm.ban(reason=reason)
|
||||||
|
# Update local map
|
||||||
|
all_profiles_map[profile_name]['state'] = ProfileState.BANNED.value
|
||||||
|
all_profiles_map[profile_name]['reason'] = reason
|
||||||
|
continue # Skip activation; it is now banned.
|
||||||
|
|
||||||
# Update group counter BEFORE making any changes, so subsequent checks in this cycle use the updated count
|
# Update group and pool counters BEFORE making any changes, so subsequent checks in this cycle use the updated count
|
||||||
if group_name and profile['state'] == ProfileState.RESTING.value:
|
if group_name and profile['state'] == ProfileState.RESTING.value:
|
||||||
# For RESTING profiles, they're becoming active, so increment the count
|
# For RESTING profiles, they're becoming active, so increment the count
|
||||||
live_active_counts[group_name] = live_active_counts.get(group_name, 0) + 1
|
live_active_counts[group_name] = live_active_counts.get(group_name, 0) + 1
|
||||||
|
|
||||||
|
# Also increment the pool counter
|
||||||
|
if enforcement_pools:
|
||||||
|
group_policy = next((g for g in profile_groups if g.get('name') == group_name), {})
|
||||||
|
pool_name = group_policy.get('pool_name')
|
||||||
|
if pool_name:
|
||||||
|
live_pool_active_counts[pool_name] = live_pool_active_counts.get(pool_name, 0) + 1
|
||||||
|
|
||||||
# Also increment the global counter
|
# Also increment the global counter
|
||||||
if global_max_active > 0:
|
if global_max_active > 0:
|
||||||
live_global_active_count += 1
|
live_global_active_count += 1
|
||||||
@ -903,20 +964,68 @@ class PolicyEnforcer:
|
|||||||
if num_active_or_locked == 0:
|
if num_active_or_locked == 0:
|
||||||
logger.debug(f"Group '{group_name}' has no active profiles. `enforce_unrest_policy` will attempt to activate one.")
|
logger.debug(f"Group '{group_name}' has no active profiles. `enforce_unrest_policy` will attempt to activate one.")
|
||||||
|
|
||||||
# --- 4. Global Self-Healing: Enforce global_max_active_profiles ---
|
# --- 4. Pool and Global Self-Healing ---
|
||||||
# This runs after all per-group healing and ensures the global limit is respected.
|
enforcement_pools = getattr(args, 'enforcement_pools', [])
|
||||||
|
if enforcement_pools:
|
||||||
|
for i, pool in enumerate(enforcement_pools):
|
||||||
|
pool_name = pool.get('name', f'pool_{i}')
|
||||||
|
pool_max_active = pool.get('max_active_profiles', 0)
|
||||||
|
if not pool_max_active or pool_max_active <= 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Get all profile names belonging to this pool
|
||||||
|
pool_profile_names = set()
|
||||||
|
for group in profile_groups:
|
||||||
|
if group.get('pool_name') == pool_name:
|
||||||
|
prefix = group.get('prefix')
|
||||||
|
if prefix:
|
||||||
|
for p_name in all_profiles_map:
|
||||||
|
if p_name.startswith(prefix):
|
||||||
|
pool_profile_names.add(p_name)
|
||||||
|
|
||||||
|
# Get current active count for this pool from our local map
|
||||||
|
current_pool_active = [
|
||||||
|
p for name, p in all_profiles_map.items()
|
||||||
|
if name in pool_profile_names and p['state'] in [ProfileState.ACTIVE.value, ProfileState.LOCKED.value, ProfileState.COOLDOWN.value]
|
||||||
|
]
|
||||||
|
|
||||||
|
num_pool_active = len(current_pool_active)
|
||||||
|
if num_pool_active > pool_max_active:
|
||||||
|
logger.warning(f"Pool Healing ('{pool_name}'): Found {num_pool_active} active profiles, but pool max is {pool_max_active}. Resting excess.")
|
||||||
|
|
||||||
|
profiles_that_can_be_rested = [p for p in current_pool_active if p['state'] == ProfileState.ACTIVE.value]
|
||||||
|
profiles_that_can_be_rested.sort(key=lambda p: natural_sort_key(p.get('name', '')), reverse=True)
|
||||||
|
profiles_that_can_be_rested.sort(key=lambda p: (
|
||||||
|
p.get('success_count', 0) + p.get('failure_count', 0) +
|
||||||
|
p.get('tolerated_error_count', 0) + p.get('download_count', 0) + p.get('download_error_count', 0)
|
||||||
|
), reverse=True)
|
||||||
|
|
||||||
|
num_to_rest = num_pool_active - pool_max_active
|
||||||
|
for profile in profiles_that_can_be_rested[:num_to_rest]:
|
||||||
|
logger.warning(f"Pool Healing ('{pool_name}'): Resting profile '{profile['name']}'.")
|
||||||
|
self.actions_taken_this_cycle += 1
|
||||||
|
if not self.dry_run:
|
||||||
|
sm = self.manager.get_state_machine(profile['name'])
|
||||||
|
if sm:
|
||||||
|
sm.rest(reason=f"Pool '{pool_name}' max_active healing", duration_minutes=0.02)
|
||||||
|
|
||||||
|
all_profiles_map[profile['name']]['state'] = ProfileState.RESTING.value
|
||||||
|
all_profiles_map[profile['name']]['rest_reason'] = f"Pool '{pool_name}' max_active healing"
|
||||||
|
all_profiles_map[profile['name']]['rest_until'] = time.time() + (0.02 * 60)
|
||||||
|
|
||||||
|
# For backward compatibility, also enforce global_max_active_profiles if it is set.
|
||||||
global_max_active = getattr(args, 'global_max_active_profiles', 0)
|
global_max_active = getattr(args, 'global_max_active_profiles', 0)
|
||||||
if global_max_active > 0:
|
if global_max_active > 0:
|
||||||
# Get all profiles managed by any group
|
# Get all profiles managed by any group
|
||||||
all_grouped_profiles = set()
|
all_grouped_profiles = set()
|
||||||
for group in profile_groups:
|
for group in profile_groups:
|
||||||
profiles_in_group = set()
|
if 'prefix' in group:
|
||||||
if 'profiles' in group:
|
|
||||||
profiles_in_group = set(group['profiles'])
|
|
||||||
elif 'prefix' in group:
|
|
||||||
prefix = group['prefix']
|
prefix = group['prefix']
|
||||||
profiles_in_group = {p['name'] for p in all_profiles_list if p['name'].startswith(prefix)}
|
for p_name in all_profiles_map:
|
||||||
all_grouped_profiles.update(profiles_in_group)
|
if p_name.startswith(prefix):
|
||||||
|
all_grouped_profiles.add(p_name)
|
||||||
|
elif 'profiles' in group:
|
||||||
|
all_grouped_profiles.update(group['profiles'])
|
||||||
|
|
||||||
# Get current active count across all groups from our local map
|
# Get current active count across all groups from our local map
|
||||||
current_global_active = [
|
current_global_active = [
|
||||||
@ -928,29 +1037,23 @@ class PolicyEnforcer:
|
|||||||
if num_global_active > global_max_active:
|
if num_global_active > global_max_active:
|
||||||
logger.warning(f"Global Healing: Found {num_global_active} active profiles across all groups, but global max is {global_max_active}. Resting excess.")
|
logger.warning(f"Global Healing: Found {num_global_active} active profiles across all groups, but global max is {global_max_active}. Resting excess.")
|
||||||
|
|
||||||
# We can only rest profiles that are in the ACTIVE state, not LOCKED.
|
|
||||||
profiles_that_can_be_rested = [p for p in current_global_active if p['state'] == ProfileState.ACTIVE.value]
|
profiles_that_can_be_rested = [p for p in current_global_active if p['state'] == ProfileState.ACTIVE.value]
|
||||||
|
profiles_that_can_be_rested.sort(key=lambda p: natural_sort_key(p.get('name', '')), reverse=True)
|
||||||
# Sort to determine which profiles to rest, using the same logic as per-group healing.
|
|
||||||
profiles_that_can_be_rested.sort(key=lambda p: natural_sort_key(p.get('name', '')), reverse=True) # Higher name first
|
|
||||||
profiles_that_can_be_rested.sort(key=lambda p: (
|
profiles_that_can_be_rested.sort(key=lambda p: (
|
||||||
p.get('success_count', 0) + p.get('failure_count', 0) +
|
p.get('success_count', 0) + p.get('failure_count', 0) +
|
||||||
p.get('tolerated_error_count', 0) +
|
p.get('tolerated_error_count', 0) +
|
||||||
p.get('download_count', 0) + p.get('download_error_count', 0)
|
p.get('download_count', 0) + p.get('download_error_count', 0)
|
||||||
), reverse=True) # Most requests first
|
), reverse=True)
|
||||||
|
|
||||||
num_to_rest = num_global_active - global_max_active
|
num_to_rest = num_global_active - global_max_active
|
||||||
profiles_to_rest = profiles_that_can_be_rested[:num_to_rest]
|
for profile in profiles_that_can_be_rested[:num_to_rest]:
|
||||||
for profile in profiles_to_rest:
|
|
||||||
logger.warning(f"Global Healing: Resting profile '{profile['name']}'.")
|
logger.warning(f"Global Healing: Resting profile '{profile['name']}'.")
|
||||||
self.actions_taken_this_cycle += 1
|
self.actions_taken_this_cycle += 1
|
||||||
if not self.dry_run:
|
if not self.dry_run:
|
||||||
sm = self.manager.get_state_machine(profile['name'])
|
sm = self.manager.get_state_machine(profile['name'])
|
||||||
if sm:
|
if sm:
|
||||||
# Rest for a minimal duration to prevent immediate re-activation in the same cycle.
|
sm.rest(reason="Global max_active healing", duration_minutes=0.02)
|
||||||
sm.rest(reason="Global max_active healing", duration_minutes=0.02) # ~1.2 seconds
|
|
||||||
|
|
||||||
# Update local map to reflect the change for this cycle
|
|
||||||
all_profiles_map[profile['name']]['state'] = ProfileState.RESTING.value
|
all_profiles_map[profile['name']]['state'] = ProfileState.RESTING.value
|
||||||
all_profiles_map[profile['name']]['rest_reason'] = "Global max_active healing"
|
all_profiles_map[profile['name']]['rest_reason'] = "Global max_active healing"
|
||||||
all_profiles_map[profile['name']]['rest_until'] = time.time() + (0.02 * 60)
|
all_profiles_map[profile['name']]['rest_until'] = time.time() + (0.02 * 60)
|
||||||
@ -1062,6 +1165,10 @@ class PolicyEnforcer:
|
|||||||
for proxy_url, state_data in proxy_states.items():
|
for proxy_url, state_data in proxy_states.items():
|
||||||
state = state_data.get('state', ProfileState.ACTIVE.value)
|
state = state_data.get('state', ProfileState.ACTIVE.value)
|
||||||
|
|
||||||
|
if state == ProfileState.BANNED.value:
|
||||||
|
logger.debug(f"Proxy '{proxy_url}' is BANNED. Skipping work/rest cycle enforcement.")
|
||||||
|
continue
|
||||||
|
|
||||||
# Un-rest logic
|
# Un-rest logic
|
||||||
if state == ProfileState.RESTING.value:
|
if state == ProfileState.RESTING.value:
|
||||||
rest_until = state_data.get('rest_until', 0)
|
rest_until = state_data.get('rest_until', 0)
|
||||||
@ -1257,6 +1364,60 @@ class PolicyEnforcer:
|
|||||||
return True # Indicates action was taken
|
return True # Indicates action was taken
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def enforce_proxy_state_on_profiles(self, all_profiles_list, all_profiles_map):
|
||||||
|
"""
|
||||||
|
Enforces the state of a proxy onto all profiles that use it.
|
||||||
|
- If a proxy is BANNED, any non-banned profile using it will be banned.
|
||||||
|
- If a proxy is RESTING, any ACTIVE profile using it will be rested.
|
||||||
|
This is a safeguard that runs after all proxy state changes and before profile
|
||||||
|
state logic.
|
||||||
|
"""
|
||||||
|
unique_proxies = sorted(list(set(p['proxy'] for p in all_profiles_list if p.get('proxy'))))
|
||||||
|
if not unique_proxies:
|
||||||
|
return
|
||||||
|
|
||||||
|
proxy_states = self.manager.get_proxy_states(unique_proxies)
|
||||||
|
|
||||||
|
for profile in all_profiles_list:
|
||||||
|
proxy_url = profile.get('proxy')
|
||||||
|
if not proxy_url:
|
||||||
|
continue
|
||||||
|
|
||||||
|
proxy_state_data = proxy_states.get(proxy_url)
|
||||||
|
if not proxy_state_data:
|
||||||
|
continue
|
||||||
|
|
||||||
|
proxy_state = proxy_state_data.get('state')
|
||||||
|
profile_name = profile['name']
|
||||||
|
|
||||||
|
if proxy_state == ProfileState.BANNED.value and profile['state'] != ProfileState.BANNED.value:
|
||||||
|
reason = f"Proxy '{proxy_url}' is BANNED"
|
||||||
|
logger.warning(f"Banning profile '{profile_name}' because its proxy is banned: {reason}")
|
||||||
|
self.actions_taken_this_cycle += 1
|
||||||
|
if not self.dry_run:
|
||||||
|
sm = self.manager.get_state_machine(profile_name)
|
||||||
|
if sm:
|
||||||
|
sm.ban(reason=reason)
|
||||||
|
# Update local map for consistency in this cycle
|
||||||
|
all_profiles_map[profile_name]['state'] = ProfileState.BANNED.value
|
||||||
|
all_profiles_map[profile_name]['reason'] = reason
|
||||||
|
|
||||||
|
elif proxy_state == ProfileState.RESTING.value and profile['state'] == ProfileState.ACTIVE.value:
|
||||||
|
logger.info(f"Resting profile '{profile_name}' because its proxy '{proxy_url}' is resting.")
|
||||||
|
self.actions_taken_this_cycle += 1
|
||||||
|
if not self.dry_run:
|
||||||
|
# Rest it for as long as the proxy is resting.
|
||||||
|
proxy_rest_until = proxy_state_data.get('rest_until', 0)
|
||||||
|
duration_minutes = max(0, (proxy_rest_until - time.time()) / 60)
|
||||||
|
sm = self.manager.get_state_machine(profile_name)
|
||||||
|
if sm:
|
||||||
|
sm.rest(reason=self.PROXY_REST_REASON, duration_minutes=duration_minutes)
|
||||||
|
# Update local map for consistency in this cycle
|
||||||
|
proxy_rest_until = proxy_state_data.get('rest_until', 0)
|
||||||
|
all_profiles_map[profile_name]['state'] = ProfileState.RESTING.value
|
||||||
|
all_profiles_map[profile_name]['rest_reason'] = self.PROXY_REST_REASON
|
||||||
|
all_profiles_map[profile_name]['rest_until'] = proxy_rest_until
|
||||||
|
|
||||||
def add_policy_enforcer_parser(subparsers):
|
def add_policy_enforcer_parser(subparsers):
|
||||||
"""Adds the parser for the 'policy-enforcer' command."""
|
"""Adds the parser for the 'policy-enforcer' command."""
|
||||||
parser = subparsers.add_parser(
|
parser = subparsers.add_parser(
|
||||||
@ -1421,6 +1582,10 @@ def sync_cross_simulation(auth_manager, download_manager, sync_config, dry_run=F
|
|||||||
|
|
||||||
logger.debug("Syncing active profiles from Auth to Download simulation...")
|
logger.debug("Syncing active profiles from Auth to Download simulation...")
|
||||||
|
|
||||||
|
# Get all download proxy states once for efficiency
|
||||||
|
all_dl_proxies = sorted(list(set(p['proxy'] for p in all_download_profiles.values() if p.get('proxy'))))
|
||||||
|
all_dl_proxy_states = download_manager.get_proxy_states(all_dl_proxies)
|
||||||
|
|
||||||
# Get profiles that should be active in the download simulation
|
# Get profiles that should be active in the download simulation
|
||||||
target_active_download_profiles = set()
|
target_active_download_profiles = set()
|
||||||
|
|
||||||
@ -1465,6 +1630,13 @@ def sync_cross_simulation(auth_manager, download_manager, sync_config, dry_run=F
|
|||||||
logger.warning(f"Auth profile '{target_profile_name}' needs an active download profile, but no corresponding download profile found.")
|
logger.warning(f"Auth profile '{target_profile_name}' needs an active download profile, but no corresponding download profile found.")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Check proxy state before activating
|
||||||
|
proxy_url = download_profile.get('proxy')
|
||||||
|
proxy_state = all_dl_proxy_states.get(proxy_url, {}).get('state')
|
||||||
|
if proxy_state in [ProfileState.BANNED.value, ProfileState.RESTING.value]:
|
||||||
|
logger.debug(f"Sync: Deferring activation of download profile '{target_profile_name}' because its proxy '{proxy_url}' is {proxy_state}.")
|
||||||
|
continue
|
||||||
|
|
||||||
if download_profile['state'] not in [ProfileState.ACTIVE.value, ProfileState.LOCKED.value]:
|
if download_profile['state'] not in [ProfileState.ACTIVE.value, ProfileState.LOCKED.value]:
|
||||||
is_from_cooldown = download_profile['state'] == ProfileState.COOLDOWN.value
|
is_from_cooldown = download_profile['state'] == ProfileState.COOLDOWN.value
|
||||||
log_msg_suffix = " (from COOLDOWN)" if is_from_cooldown else ""
|
log_msg_suffix = " (from COOLDOWN)" if is_from_cooldown else ""
|
||||||
@ -1476,24 +1648,12 @@ def sync_cross_simulation(auth_manager, download_manager, sync_config, dry_run=F
|
|||||||
sm.activate(profile=download_profile)
|
sm.activate(profile=download_profile)
|
||||||
|
|
||||||
# --- Group-Aware Deactivation ---
|
# --- Group-Aware Deactivation ---
|
||||||
# Identify the target download groups based on target_active_download_profiles.
|
# Deactivate any download profiles that are active but are not the target profile for their group.
|
||||||
# CRITICAL FIX: Directly map individual profiles to their groups instead of relying on name patterns.
|
# This ensures that if auth wants 'user1_1' to be active, the currently active 'user1_0' is rested first.
|
||||||
target_download_groups = set()
|
|
||||||
|
|
||||||
for target_profile_name in target_active_download_profiles:
|
|
||||||
group_info = dl_profile_to_group.get(target_profile_name)
|
|
||||||
if group_info:
|
|
||||||
target_download_groups.add(group_info['name'])
|
|
||||||
|
|
||||||
logger.debug(f"Target download groups for this sync cycle: {target_download_groups}")
|
|
||||||
|
|
||||||
# Deactivate any download profiles that are active but are not in a target group
|
|
||||||
for dl_profile_name, dl_profile in all_download_profiles.items():
|
for dl_profile_name, dl_profile in all_download_profiles.items():
|
||||||
if dl_profile['state'] == ProfileState.ACTIVE.value:
|
if dl_profile['state'] == ProfileState.ACTIVE.value:
|
||||||
group_info = dl_profile_to_group.get(dl_profile_name)
|
if dl_profile_name not in target_active_download_profiles:
|
||||||
# If the profile is in a group, and that group is NOT a target group, rest it.
|
logger.info(f"Syncing active state: Resting download profile '{dl_profile_name}' as it is no longer the target active profile for its group.")
|
||||||
if group_info and group_info['name'] not in target_download_groups:
|
|
||||||
logger.info(f"Syncing active state: Resting download profile '{dl_profile_name}' as its group '{group_info['name']}' is no longer active.")
|
|
||||||
if not dry_run:
|
if not dry_run:
|
||||||
sm = download_manager.get_state_machine(dl_profile_name)
|
sm = download_manager.get_state_machine(dl_profile_name)
|
||||||
if sm:
|
if sm:
|
||||||
@ -1539,9 +1699,9 @@ def main_policy_enforcer(args):
|
|||||||
'unlock_stale_locks_after_seconds': 120,
|
'unlock_stale_locks_after_seconds': 120,
|
||||||
'unlock_cooldown_seconds': 0,
|
'unlock_cooldown_seconds': 0,
|
||||||
'max_global_proxy_active_minutes': 0, 'rest_duration_on_max_active': 10,
|
'max_global_proxy_active_minutes': 0, 'rest_duration_on_max_active': 10,
|
||||||
'profile_selection_strategy': 'longest_idle',
|
'profile_selection_strategy': None,
|
||||||
'global_max_active_profiles': 0,
|
'global_max_active_profiles': 0,
|
||||||
'interval_seconds': 60, 'proxy_groups': [], 'profile_groups': []
|
'interval_seconds': 60, 'proxy_groups': [], 'profile_groups': [], 'enforcement_pools': []
|
||||||
}
|
}
|
||||||
|
|
||||||
sim_params = policy.get('simulation_parameters', {})
|
sim_params = policy.get('simulation_parameters', {})
|
||||||
@ -1586,10 +1746,90 @@ def main_policy_enforcer(args):
|
|||||||
|
|
||||||
logger.info(f"Setting up enforcer for {sim_type} simulation...")
|
logger.info(f"Setting up enforcer for {sim_type} simulation...")
|
||||||
|
|
||||||
# --- Dynamic Profile Group Discovery ---
|
# --- Hybrid Profile Group Discovery (Static + Dynamic) ---
|
||||||
profile_group_templates = policy.get('profile_group_templates')
|
common_group_settings = policy.get('common_group_settings', {})
|
||||||
# Check if templates exist and if the config block doesn't already have groups (CLI overrides take precedence)
|
enforcement_pools = policy.get('enforcement_pools')
|
||||||
if profile_group_templates and 'profile_groups' not in policy_config:
|
profile_group_templates = policy.get('profile_group_templates') # For backward compatibility
|
||||||
|
|
||||||
|
# Start with any statically defined groups from the policy.
|
||||||
|
final_profile_groups = policy_config.get('profile_groups', [])
|
||||||
|
|
||||||
|
# If enforcement_pools are defined, discover dynamic groups and merge them.
|
||||||
|
if enforcement_pools:
|
||||||
|
logger.info(f"Found 'enforcement_pools'. Discovering dynamic profile groups to merge with static ones for {sim_type}...")
|
||||||
|
|
||||||
|
# Determine key_prefix to connect to the right Redis env
|
||||||
|
policy_env = sim_params.get(env_policy_key)
|
||||||
|
default_policy_env = sim_params.get('env')
|
||||||
|
effective_env = env_cli_arg or args.env or policy_env or default_policy_env or 'dev'
|
||||||
|
if args.key_prefix: temp_key_prefix = args.key_prefix
|
||||||
|
elif args.legacy: temp_key_prefix = 'profile_mgmt_'
|
||||||
|
else: temp_key_prefix = f"{effective_env}_profile_mgmt_"
|
||||||
|
|
||||||
|
try:
|
||||||
|
temp_manager = ProfileManager(redis_host, redis_port, redis_password, temp_key_prefix, redis_db)
|
||||||
|
all_profiles = temp_manager.list_profiles()
|
||||||
|
found_prefixes = set(p['name'].rsplit('_', 1)[0] for p in all_profiles)
|
||||||
|
|
||||||
|
if not found_prefixes:
|
||||||
|
logger.warning(f"Dynamic discovery found no profile prefixes for env '{effective_env}'.")
|
||||||
|
else:
|
||||||
|
logger.info(f"Discovered {len(found_prefixes)} unique profile prefixes: {sorted(list(found_prefixes))}")
|
||||||
|
|
||||||
|
dynamically_generated_groups = []
|
||||||
|
# Match discovered prefixes against patterns in each enforcement pool
|
||||||
|
for i, pool in enumerate(enforcement_pools):
|
||||||
|
pool_name = pool.get('name', f'pool_{i}')
|
||||||
|
pool_patterns = pool.get('profile_group_patterns', [])
|
||||||
|
# If a pool has no patterns, it's just for defining concurrency for static groups. Skip discovery.
|
||||||
|
if not pool_patterns:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Merge common settings with any pool-specific overrides
|
||||||
|
group_settings_template = deepcopy(common_group_settings)
|
||||||
|
pool_specific_settings = pool.get('group_settings', {})
|
||||||
|
|
||||||
|
# A simple way to deep merge the two levels (auth/download)
|
||||||
|
auth_settings = group_settings_template.get('auth', {})
|
||||||
|
auth_settings.update(pool_specific_settings.get('auth', {}))
|
||||||
|
group_settings_template['auth'] = auth_settings
|
||||||
|
|
||||||
|
download_settings = group_settings_template.get('download', {})
|
||||||
|
download_settings.update(pool_specific_settings.get('download', {}))
|
||||||
|
group_settings_template['download'] = download_settings
|
||||||
|
|
||||||
|
for prefix in sorted(list(found_prefixes)):
|
||||||
|
for pattern in pool_patterns:
|
||||||
|
if fnmatch.fnmatch(prefix, pattern):
|
||||||
|
sim_settings = group_settings_template.get(sim_type.lower())
|
||||||
|
if not sim_settings:
|
||||||
|
logger.debug(f"Pool '{pool_name}' has no settings for '{sim_type}'. Skipping for prefix '{prefix}'.")
|
||||||
|
continue
|
||||||
|
|
||||||
|
new_group = deepcopy(sim_settings)
|
||||||
|
new_group['prefix'] = prefix
|
||||||
|
new_group['name'] = prefix
|
||||||
|
new_group['pool_name'] = pool_name
|
||||||
|
|
||||||
|
dynamically_generated_groups.append(new_group)
|
||||||
|
logger.debug(f"Assigned prefix '{prefix}' to pool '{pool_name}' for {sim_type} simulation.")
|
||||||
|
break
|
||||||
|
|
||||||
|
if dynamically_generated_groups:
|
||||||
|
logger.info(f"Merging {len(final_profile_groups)} static group(s) with {len(dynamically_generated_groups)} discovered dynamic group(s).")
|
||||||
|
final_profile_groups.extend(dynamically_generated_groups)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed during dynamic profile group discovery: {e}", exc_info=args.verbose)
|
||||||
|
|
||||||
|
# Update the policy_config with the final merged list and the pool definitions
|
||||||
|
policy_config['profile_groups'] = final_profile_groups
|
||||||
|
# CRITICAL: Deepcopy enforcement_pools to prevent modification in one simulation
|
||||||
|
# from affecting the other, since the policy object is shared.
|
||||||
|
policy_config['enforcement_pools'] = deepcopy(enforcement_pools)
|
||||||
|
|
||||||
|
# For backward compatibility with the old template format
|
||||||
|
elif profile_group_templates and 'profile_groups' not in policy_config:
|
||||||
logger.info(f"Found 'profile_group_templates'. Discovering profile groups dynamically for {sim_type}...")
|
logger.info(f"Found 'profile_group_templates'. Discovering profile groups dynamically for {sim_type}...")
|
||||||
|
|
||||||
# Determine key_prefix to connect to the right Redis env (logic duplicated from below)
|
# Determine key_prefix to connect to the right Redis env (logic duplicated from below)
|
||||||
@ -1639,6 +1879,21 @@ def main_policy_enforcer(args):
|
|||||||
policy_config['profile_groups'] = generated_groups
|
policy_config['profile_groups'] = generated_groups
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed during dynamic profile group discovery: {e}", exc_info=args.verbose)
|
logger.error(f"Failed during dynamic profile group discovery: {e}", exc_info=args.verbose)
|
||||||
|
|
||||||
|
# In the download simulation, the active profiles are dictated entirely by the
|
||||||
|
# cross-simulation sync logic. We must disable the download enforcer's own
|
||||||
|
# concurrency limits (max_active_profiles) to prevent it from "healing"
|
||||||
|
# profiles that the sync logic has correctly activated.
|
||||||
|
if sim_type == 'Download':
|
||||||
|
logger.info("Disabling max_active_profiles limits for Download simulation. Active profiles will be managed by cross-sim sync.")
|
||||||
|
if 'profile_groups' in policy_config:
|
||||||
|
for group in policy_config['profile_groups']:
|
||||||
|
group['max_active_profiles'] = 0
|
||||||
|
if 'enforcement_pools' in policy_config:
|
||||||
|
for pool in policy_config['enforcement_pools']:
|
||||||
|
pool['max_active_profiles'] = 0
|
||||||
|
# Also disable the global limit for the download simulation.
|
||||||
|
policy_config['global_max_active_profiles'] = 0
|
||||||
|
|
||||||
config = Config(args, policy_config, code_defaults)
|
config = Config(args, policy_config, code_defaults)
|
||||||
|
|
||||||
|
|||||||
@ -18,8 +18,10 @@ import time
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Dict, List, Optional, Any
|
from typing import Dict, List, Optional, Any
|
||||||
import collections
|
import collections
|
||||||
|
import fnmatch
|
||||||
|
|
||||||
import redis
|
import redis
|
||||||
|
import yaml
|
||||||
|
|
||||||
from .profile_statemachine import ProfileState, ProfileStateMachine
|
from .profile_statemachine import ProfileState, ProfileStateMachine
|
||||||
|
|
||||||
@ -199,8 +201,8 @@ class ProfileManager:
|
|||||||
|
|
||||||
# When decrementing, ensure the counter exists to avoid creating negative counters from stray calls.
|
# When decrementing, ensure the counter exists to avoid creating negative counters from stray calls.
|
||||||
if count < 0 and not self.redis.exists(key):
|
if count < 0 and not self.redis.exists(key):
|
||||||
logger.warning(f"Attempted to decrement pending downloads for '{profile_name}' by {abs(count)}, but no counter exists. No action taken.")
|
logger.warning(f"Attempted to decrement pending downloads for '{profile_name}' by {abs(count)}, but no counter exists. This can happen in a race condition. Assuming task is complete and counter is zero.")
|
||||||
return None
|
return 0
|
||||||
|
|
||||||
new_value = self.redis.incrby(key, count)
|
new_value = self.redis.incrby(key, count)
|
||||||
|
|
||||||
@ -225,8 +227,8 @@ class ProfileManager:
|
|||||||
|
|
||||||
# Only decrement if the key exists. This prevents stray calls from creating negative counters.
|
# Only decrement if the key exists. This prevents stray calls from creating negative counters.
|
||||||
if not self.redis.exists(key):
|
if not self.redis.exists(key):
|
||||||
logger.warning(f"Attempted to decrement pending downloads for '{profile_name}', but no counter exists. No action taken.")
|
logger.warning(f"Attempted to decrement pending downloads for '{profile_name}', but no counter exists. This can happen in a race condition. Assuming task is complete and counter is zero.")
|
||||||
return None
|
return 0
|
||||||
|
|
||||||
new_value = self.redis.decr(key)
|
new_value = self.redis.decr(key)
|
||||||
|
|
||||||
@ -444,7 +446,12 @@ class ProfileManager:
|
|||||||
logger.error(f"Invalid state: {new_state}")
|
logger.error(f"Invalid state: {new_state}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
sm = self.get_state_machine(name)
|
profile = self.get_profile(name)
|
||||||
|
if not profile:
|
||||||
|
# get_profile already logs an error if the profile is not found.
|
||||||
|
return False
|
||||||
|
|
||||||
|
sm = self.get_state_machine(name, profile=profile)
|
||||||
if not sm:
|
if not sm:
|
||||||
return False # get_state_machine logs the error
|
return False # get_state_machine logs the error
|
||||||
|
|
||||||
@ -453,14 +460,16 @@ class ProfileManager:
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
# Pass the profile object to the transition methods for context,
|
||||||
|
# which is consistent with the policy enforcer's usage.
|
||||||
if new_state == ProfileState.ACTIVE.value:
|
if new_state == ProfileState.ACTIVE.value:
|
||||||
sm.activate()
|
sm.activate(profile=profile)
|
||||||
elif new_state == ProfileState.BANNED.value:
|
elif new_state == ProfileState.BANNED.value:
|
||||||
sm.ban(reason=reason)
|
sm.ban(reason=reason, profile=profile)
|
||||||
elif new_state == ProfileState.RESTING.value:
|
elif new_state == ProfileState.RESTING.value:
|
||||||
sm.rest(reason=reason)
|
sm.rest(reason=reason, profile=profile)
|
||||||
elif new_state == ProfileState.PAUSED.value:
|
elif new_state == ProfileState.PAUSED.value:
|
||||||
sm.pause(reason=reason)
|
sm.pause(reason=reason, profile=profile)
|
||||||
# LOCKED and COOLDOWN are not handled here as they are special transitions
|
# LOCKED and COOLDOWN are not handled here as they are special transitions
|
||||||
# from lock_profile and unlock_profile, and should not be set directly.
|
# from lock_profile and unlock_profile, and should not be set directly.
|
||||||
elif new_state in [ProfileState.LOCKED.value, ProfileState.COOLDOWN.value]:
|
elif new_state in [ProfileState.LOCKED.value, ProfileState.COOLDOWN.value]:
|
||||||
@ -720,30 +729,51 @@ class ProfileManager:
|
|||||||
logger.info(f"Deleted {deleted_count} global counter key(s).")
|
logger.info(f"Deleted {deleted_count} global counter key(s).")
|
||||||
return deleted_count
|
return deleted_count
|
||||||
|
|
||||||
def set_proxy_state(self, proxy_url: str, state: str, rest_duration_minutes: Optional[int] = None) -> bool:
|
def set_proxy_state(self, proxy_url: str, state: str, rest_duration_minutes: Optional[int] = None, reason: Optional[str] = None) -> bool:
|
||||||
"""Set the state of a proxy and propagates it to associated profiles."""
|
"""Set the state of a proxy and propagates it to associated profiles."""
|
||||||
if state not in [ProfileState.ACTIVE.value, ProfileState.RESTING.value]:
|
if state not in [ProfileState.ACTIVE.value, ProfileState.RESTING.value, ProfileState.BANNED.value]:
|
||||||
logger.error(f"Invalid proxy state: {state}. Only ACTIVE and RESTING are supported for proxies.")
|
logger.error(f"Invalid proxy state: {state}. Only ACTIVE, RESTING, and BANNED are supported for proxies.")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
proxy_key = self._proxy_state_key(proxy_url)
|
proxy_key = self._proxy_state_key(proxy_url)
|
||||||
now = time.time()
|
now = time.time()
|
||||||
updates = {'state': state}
|
updates = {'state': state}
|
||||||
|
if reason:
|
||||||
|
updates['reason'] = reason
|
||||||
|
else:
|
||||||
|
# Clear reason if not provided
|
||||||
|
updates['reason'] = ''
|
||||||
|
|
||||||
rest_until = 0
|
rest_until = 0
|
||||||
if state == ProfileState.RESTING.value:
|
if state == ProfileState.RESTING.value:
|
||||||
if not rest_duration_minutes or rest_duration_minutes <= 0:
|
if rest_duration_minutes is None:
|
||||||
logger.error("rest_duration_minutes is required when setting proxy state to RESTING.")
|
logger.error("rest_duration_minutes is required when setting proxy state to RESTING.")
|
||||||
return False
|
return False
|
||||||
rest_until = now + rest_duration_minutes * 60
|
|
||||||
|
if rest_duration_minutes == -1:
|
||||||
|
# Use a very large number for "indefinite" to avoid special cases later.
|
||||||
|
# 10 years should be sufficient.
|
||||||
|
rest_until = now + (10 * 365 * 24 * 60 * 60)
|
||||||
|
elif rest_duration_minutes > 0:
|
||||||
|
rest_until = now + rest_duration_minutes * 60
|
||||||
|
else:
|
||||||
|
logger.error("rest_duration_minutes must be positive, or -1 for indefinite.")
|
||||||
|
return False
|
||||||
|
|
||||||
updates['rest_until'] = str(rest_until)
|
updates['rest_until'] = str(rest_until)
|
||||||
updates['work_start_timestamp'] = '0' # Clear work start time
|
updates['work_start_timestamp'] = '0' # Clear work start time
|
||||||
else: # ACTIVE
|
elif state in [ProfileState.ACTIVE.value, ProfileState.BANNED.value]:
|
||||||
updates['rest_until'] = '0'
|
updates['rest_until'] = '0'
|
||||||
updates['work_start_timestamp'] = str(now)
|
if state == ProfileState.ACTIVE.value:
|
||||||
|
updates['work_start_timestamp'] = str(now)
|
||||||
|
else: # BANNED
|
||||||
|
updates['work_start_timestamp'] = '0'
|
||||||
|
|
||||||
self.redis.hset(proxy_key, mapping=updates)
|
self.redis.hset(proxy_key, mapping=updates)
|
||||||
logger.info(f"Set proxy '{proxy_url}' state to {state}.")
|
log_msg = f"Set proxy '{proxy_url}' state to {state}."
|
||||||
|
if reason:
|
||||||
|
log_msg += f" Reason: {reason}"
|
||||||
|
logger.info(log_msg)
|
||||||
|
|
||||||
# Now, update associated profiles
|
# Now, update associated profiles
|
||||||
profiles_on_proxy = self.list_profiles(proxy_filter=proxy_url)
|
profiles_on_proxy = self.list_profiles(proxy_filter=proxy_url)
|
||||||
@ -751,16 +781,31 @@ class ProfileManager:
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
if state == ProfileState.RESTING.value:
|
if state == ProfileState.RESTING.value:
|
||||||
logger.info(f"Propagating RESTING state to profiles on proxy '{proxy_url}'.")
|
propagate_reason = reason or "Proxy resting"
|
||||||
|
logger.info(f"Propagating RESTING state to profiles on proxy '{proxy_url}'. Reason: {propagate_reason}")
|
||||||
for profile in profiles_on_proxy:
|
for profile in profiles_on_proxy:
|
||||||
if profile['state'] == ProfileState.ACTIVE.value:
|
if profile['state'] == ProfileState.ACTIVE.value:
|
||||||
self.update_profile_state(profile['name'], ProfileState.RESTING.value, "Proxy resting")
|
self.update_profile_state(profile['name'], ProfileState.RESTING.value, propagate_reason)
|
||||||
self.update_profile_field(profile['name'], 'rest_until', str(rest_until))
|
self.update_profile_field(profile['name'], 'rest_until', str(rest_until))
|
||||||
elif state == ProfileState.ACTIVE.value:
|
elif state == ProfileState.BANNED.value:
|
||||||
logger.info(f"Propagating ACTIVE state to profiles on proxy '{proxy_url}'.")
|
propagate_reason = reason or "Proxy banned"
|
||||||
|
logger.info(f"Propagating BANNED state to profiles on proxy '{proxy_url}'. Reason: {propagate_reason}")
|
||||||
for profile in profiles_on_proxy:
|
for profile in profiles_on_proxy:
|
||||||
if profile['state'] == ProfileState.RESTING.value and profile.get('rest_reason') == "Proxy resting":
|
if profile['state'] != ProfileState.BANNED.value:
|
||||||
self.update_profile_state(profile['name'], ProfileState.ACTIVE.value, "Proxy activated")
|
self.update_profile_state(profile['name'], ProfileState.BANNED.value, propagate_reason)
|
||||||
|
elif state == ProfileState.ACTIVE.value:
|
||||||
|
propagate_reason = reason or "Proxy activated"
|
||||||
|
logger.info(f"Propagating ACTIVE state to profiles on proxy '{proxy_url}'. Reason: {propagate_reason}")
|
||||||
|
for profile in profiles_on_proxy:
|
||||||
|
# Check for proxy-related reasons in both rest_reason and ban_reason
|
||||||
|
proxy_related_reason = False
|
||||||
|
if profile.get('rest_reason', '').startswith("Proxy "):
|
||||||
|
proxy_related_reason = True
|
||||||
|
if profile.get('ban_reason', '').startswith("Proxy "):
|
||||||
|
proxy_related_reason = True
|
||||||
|
|
||||||
|
if (profile['state'] in [ProfileState.RESTING.value, ProfileState.BANNED.value]) and proxy_related_reason:
|
||||||
|
self.update_profile_state(profile['name'], ProfileState.ACTIVE.value, propagate_reason)
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
@ -1162,8 +1207,8 @@ def add_profile_manager_parser(subparsers):
|
|||||||
|
|
||||||
# List command
|
# List command
|
||||||
list_parser = subparsers.add_parser('list', help='List profiles', parents=[common_parser])
|
list_parser = subparsers.add_parser('list', help='List profiles', parents=[common_parser])
|
||||||
list_parser.add_argument('--auth-env', help='Environment name for the Auth simulation monitor. Use with --download-env for a merged view.')
|
list_parser.add_argument('--auth-env', default=None, help='Environment name for the Auth simulation monitor. Use with --download-env for a merged view. Defaults to YTOPS_AUTH_ENV env var.')
|
||||||
list_parser.add_argument('--download-env', help='Environment name for the Download simulation monitor. Use with --auth-env for a merged view.')
|
list_parser.add_argument('--download-env', default=None, help='Environment name for the Download simulation monitor. Use with --auth-env for a merged view. Defaults to YTOPS_DOWNLOAD_ENV env var.')
|
||||||
list_parser.add_argument('--separate-views', action='store_true', help='In dual-monitor mode, show two separate reports instead of a single merged view.')
|
list_parser.add_argument('--separate-views', action='store_true', help='In dual-monitor mode, show two separate reports instead of a single merged view.')
|
||||||
list_parser.add_argument('--rest-after-requests', type=int, help='(For display) Show countdown to rest based on this request limit.')
|
list_parser.add_argument('--rest-after-requests', type=int, help='(For display) Show countdown to rest based on this request limit.')
|
||||||
list_parser.add_argument('--state', help='Filter by state')
|
list_parser.add_argument('--state', help='Filter by state')
|
||||||
@ -1177,20 +1222,24 @@ def add_profile_manager_parser(subparsers):
|
|||||||
list_parser.add_argument('--no-blink', action='store_true', help='Use ANSI escape codes for smoother screen updates in --live mode (experimental).')
|
list_parser.add_argument('--no-blink', action='store_true', help='Use ANSI escape codes for smoother screen updates in --live mode (experimental).')
|
||||||
list_parser.add_argument('--interval-seconds', type=int, default=5, help='When in --live mode, how often to refresh in seconds. Default: 5.')
|
list_parser.add_argument('--interval-seconds', type=int, default=5, help='When in --live mode, how often to refresh in seconds. Default: 5.')
|
||||||
list_parser.add_argument('--hide-active-state', action='store_true', help="Display 'ACTIVE' state as blank for cleaner UI.")
|
list_parser.add_argument('--hide-active-state', action='store_true', help="Display 'ACTIVE' state as blank for cleaner UI.")
|
||||||
|
list_parser.add_argument('--hide-ungrouped', action='store_true', help="Hide profiles that do not belong to any configured profile group (e.g., old profiles after a config change). Shown by default.")
|
||||||
|
|
||||||
# Get command
|
# Get command
|
||||||
get_parser = subparsers.add_parser('get', help='Get profile details', parents=[common_parser])
|
get_parser = subparsers.add_parser('get', help='Get profile details', parents=[common_parser])
|
||||||
get_parser.add_argument('name', help='Profile name')
|
get_parser.add_argument('name', help='Profile name')
|
||||||
|
|
||||||
# Set proxy state command
|
# Set proxy state command
|
||||||
set_proxy_state_parser = subparsers.add_parser('set-proxy-state', help='Set the state of a proxy and propagate to its profiles.', parents=[common_parser])
|
set_proxy_state_parser = subparsers.add_parser('set-proxy-state', help='Set the state of a proxy (or proxies) and propagate to its profiles.', parents=[common_parser])
|
||||||
set_proxy_state_parser.add_argument('proxy_url', help='Proxy URL')
|
set_proxy_state_parser.add_argument('proxy_urls', help='Proxy URL, or comma-separated list of URLs')
|
||||||
set_proxy_state_parser.add_argument('state', choices=['ACTIVE', 'RESTING'], help='New state for the proxy')
|
set_proxy_state_parser.add_argument('state', choices=['ACTIVE', 'RESTING', 'BANNED'], help='New state for the proxy')
|
||||||
set_proxy_state_parser.add_argument('--duration-minutes', type=int, help='Duration for the RESTING state')
|
set_proxy_state_parser.add_argument('--duration-minutes', type=int, help='Duration for the RESTING state. Use -1 for indefinite rest.')
|
||||||
|
set_proxy_state_parser.add_argument('--reason', help='Reason for the state change. Propagated to profiles.')
|
||||||
|
set_proxy_state_parser.add_argument('--auth-env', default=None, help='Target the Auth simulation environment. Can be used with --download-env. Defaults to YTOPS_AUTH_ENV env var.')
|
||||||
|
set_proxy_state_parser.add_argument('--download-env', default=None, help='Target the Download simulation environment. Can be used with --auth-env. Defaults to YTOPS_DOWNLOAD_ENV env var.')
|
||||||
|
|
||||||
# Update state command
|
# Update state command
|
||||||
update_state_parser = subparsers.add_parser('update-state', help='Update profile state', parents=[common_parser])
|
update_state_parser = subparsers.add_parser('update-state', help='Update profile state for one or more profiles.', parents=[common_parser])
|
||||||
update_state_parser.add_argument('name', help='Profile name')
|
update_state_parser.add_argument('names', help='Profile name, comma-separated list of names, or a pattern with wildcards (e.g., "user31_*")')
|
||||||
update_state_parser.add_argument('state', choices=ProfileState.values(),
|
update_state_parser.add_argument('state', choices=ProfileState.values(),
|
||||||
help='New state')
|
help='New state')
|
||||||
update_state_parser.add_argument('--reason', help='Reason for state change (especially for BAN)')
|
update_state_parser.add_argument('--reason', help='Reason for state change (especially for BAN)')
|
||||||
@ -1202,21 +1251,21 @@ def add_profile_manager_parser(subparsers):
|
|||||||
update_field_parser.add_argument('value', help='New value')
|
update_field_parser.add_argument('value', help='New value')
|
||||||
|
|
||||||
# Pause command (convenience)
|
# Pause command (convenience)
|
||||||
pause_parser = subparsers.add_parser('pause', help=f'Pause a profile (sets state to {ProfileState.PAUSED.value}).', parents=[common_parser])
|
pause_parser = subparsers.add_parser('pause', help=f'Pause one or more profiles (sets state to {ProfileState.PAUSED.value}).', parents=[common_parser])
|
||||||
pause_parser.add_argument('name', help='Profile name')
|
pause_parser.add_argument('names', help='Profile name, comma-separated list of names, or a pattern with wildcards (e.g., "user31_*")')
|
||||||
|
|
||||||
# Activate command (convenience)
|
# Activate command (convenience)
|
||||||
activate_parser = subparsers.add_parser('activate', help=f'Activate a profile (sets state to {ProfileState.ACTIVE.value}). Useful for resuming a PAUSED profile or fixing a stale LOCKED one.', parents=[common_parser])
|
activate_parser = subparsers.add_parser('activate', help=f'Activate one or more profiles (sets state to {ProfileState.ACTIVE.value}). Useful for resuming a PAUSED profile or fixing a stale LOCKED one.', parents=[common_parser])
|
||||||
activate_parser.add_argument('name', help='Profile name')
|
activate_parser.add_argument('names', help='Profile name, comma-separated list of names, or a pattern with wildcards (e.g., "user31_*")')
|
||||||
|
|
||||||
# Ban command (convenience)
|
# Ban command (convenience)
|
||||||
ban_parser = subparsers.add_parser('ban', help=f'Ban a profile (sets state to {ProfileState.BANNED.value}).', parents=[common_parser])
|
ban_parser = subparsers.add_parser('ban', help=f'Ban one or more profiles (sets state to {ProfileState.BANNED.value}).', parents=[common_parser])
|
||||||
ban_parser.add_argument('name', help='Profile name')
|
ban_parser.add_argument('names', help='Profile name, comma-separated list of names, or a pattern with wildcards (e.g., "user31_*")')
|
||||||
ban_parser.add_argument('--reason', required=True, help='Reason for ban')
|
ban_parser.add_argument('--reason', required=True, help='Reason for ban')
|
||||||
|
|
||||||
# Unban command (convenience)
|
# Unban command (convenience)
|
||||||
unban_parser = subparsers.add_parser('unban', help=f'Unban a profile (sets state to {ProfileState.ACTIVE.value} and resets session counters).', parents=[common_parser])
|
unban_parser = subparsers.add_parser('unban', help=f'Unban one or more profiles (sets state to {ProfileState.ACTIVE.value} and resets session counters).', parents=[common_parser])
|
||||||
unban_parser.add_argument('name', help='Profile name')
|
unban_parser.add_argument('names', help='Profile name, comma-separated list of names, or a pattern with wildcards (e.g., "user31_*")')
|
||||||
|
|
||||||
# Delete command
|
# Delete command
|
||||||
delete_parser = subparsers.add_parser('delete', help='Delete a profile', parents=[common_parser])
|
delete_parser = subparsers.add_parser('delete', help='Delete a profile', parents=[common_parser])
|
||||||
@ -1228,6 +1277,15 @@ def add_profile_manager_parser(subparsers):
|
|||||||
delete_all_parser = subparsers.add_parser('delete-all', help='(Destructive) Delete all profiles and data under the current key prefix.', parents=[common_parser])
|
delete_all_parser = subparsers.add_parser('delete-all', help='(Destructive) Delete all profiles and data under the current key prefix.', parents=[common_parser])
|
||||||
delete_all_parser.add_argument('--confirm', action='store_true', help='Confirm this highly destructive action (required)')
|
delete_all_parser.add_argument('--confirm', action='store_true', help='Confirm this highly destructive action (required)')
|
||||||
|
|
||||||
|
# Cleanup ungrouped command
|
||||||
|
cleanup_parser = subparsers.add_parser('cleanup-ungrouped', help='(Safe) Sync profiles in Redis with the setup policy (create missing, delete extra).', parents=[common_parser])
|
||||||
|
cleanup_parser.add_argument('--policy-file', required=True, help='Path to the profile setup policy YAML file (e.g., policies/6_profile_setup_policy.yaml).')
|
||||||
|
cleanup_parser.add_argument('--auth-env', default=None, help="Environment name for the Auth simulation to clean. Defaults to YTOPS_AUTH_ENV env var.")
|
||||||
|
cleanup_parser.add_argument('--download-env', default=None, help="Environment name for the Download simulation to clean. Defaults to YTOPS_DOWNLOAD_ENV env var.")
|
||||||
|
cleanup_parser.add_argument('--dry-run', action='store_true', help="Only show which profiles would be created or deleted, don't actually change them.")
|
||||||
|
cleanup_parser.add_argument('--no-create-missing', action='store_true', help="Only delete ungrouped profiles, do not create profiles missing from Redis.")
|
||||||
|
cleanup_parser.add_argument('--disallow-cleanup-active-downloads', action='store_true', help="In paired auth/download cleanup, PREVENTS deleting a profile if its download side is ACTIVE, even if the auth side is idle. Cleanup of active downloads is allowed by default.")
|
||||||
|
|
||||||
# Reset global counters command
|
# Reset global counters command
|
||||||
reset_global_parser = subparsers.add_parser('reset-global-counters', help='Reset global counters (e.g., failed_lock_attempts).', parents=[common_parser])
|
reset_global_parser = subparsers.add_parser('reset-global-counters', help='Reset global counters (e.g., failed_lock_attempts).', parents=[common_parser])
|
||||||
|
|
||||||
@ -1675,6 +1733,19 @@ def _render_profile_details_table(manager, args, simulation_type, profile_groups
|
|||||||
return
|
return
|
||||||
|
|
||||||
profiles = manager.list_profiles(args.state, args.proxy)
|
profiles = manager.list_profiles(args.state, args.proxy)
|
||||||
|
|
||||||
|
if getattr(args, 'hide_ungrouped', False):
|
||||||
|
all_grouped_profile_names = set()
|
||||||
|
for group in profile_groups_config:
|
||||||
|
for p_name in group.get('profiles_in_group', []):
|
||||||
|
all_grouped_profile_names.add(p_name)
|
||||||
|
|
||||||
|
original_count = len(profiles)
|
||||||
|
profiles = [p for p in profiles if p.get('name') in all_grouped_profile_names]
|
||||||
|
filtered_count = original_count - len(profiles)
|
||||||
|
if filtered_count > 0 and not args.live:
|
||||||
|
print(f"NOTE: {filtered_count} ungrouped profiles were hidden via --hide-ungrouped.", file=sys.stderr)
|
||||||
|
|
||||||
if not profiles:
|
if not profiles:
|
||||||
print("No profiles found matching the criteria.", file=file)
|
print("No profiles found matching the criteria.", file=file)
|
||||||
return
|
return
|
||||||
@ -1832,11 +1903,14 @@ def _render_simulation_view(title, manager, args, file=sys.stdout):
|
|||||||
|
|
||||||
profile_groups_config = _build_profile_groups_config(manager, profiles)
|
profile_groups_config = _build_profile_groups_config(manager, profiles)
|
||||||
|
|
||||||
profile_selection_strategy = manager.get_config('profile_selection_strategy')
|
# The group summary table is only relevant for the Auth simulation, which has
|
||||||
if profile_selection_strategy:
|
# selection strategies and rotation policies.
|
||||||
print(f"Profile Selection Strategy: {profile_selection_strategy}", file=file)
|
is_auth_sim = 'Auth' in title
|
||||||
|
if is_auth_sim:
|
||||||
_render_profile_group_summary_table(manager, profiles, profile_groups_config, args, file=file)
|
profile_selection_strategy = manager.get_config('profile_selection_strategy')
|
||||||
|
if profile_selection_strategy:
|
||||||
|
print(f"Profile Selection Strategy: {profile_selection_strategy}", file=file)
|
||||||
|
_render_profile_group_summary_table(manager, profiles, profile_groups_config, args, file=file)
|
||||||
|
|
||||||
failed_lock_attempts = manager.get_failed_lock_attempts()
|
failed_lock_attempts = manager.get_failed_lock_attempts()
|
||||||
global_stats = manager.get_global_stats()
|
global_stats = manager.get_global_stats()
|
||||||
@ -1997,7 +2071,6 @@ def _render_merged_view(auth_manager, download_manager, args, file=sys.stdout):
|
|||||||
_render_activation_history_table(auth_manager, file=file)
|
_render_activation_history_table(auth_manager, file=file)
|
||||||
|
|
||||||
print(f"\n--- Download Simulation Profile Details ({args.download_env}) ---", file=file)
|
print(f"\n--- Download Simulation Profile Details ({args.download_env}) ---", file=file)
|
||||||
_render_profile_group_summary_table(download_manager, dl_profiles, dl_groups_config, args, file=file)
|
|
||||||
_render_profile_details_table(download_manager, args, "Download", dl_groups_config, file=file)
|
_render_profile_details_table(download_manager, args, "Download", dl_groups_config, file=file)
|
||||||
if args.show_activation_history:
|
if args.show_activation_history:
|
||||||
_render_activation_history_table(download_manager, file=file)
|
_render_activation_history_table(download_manager, file=file)
|
||||||
@ -2010,6 +2083,8 @@ def _print_profile_list(manager, args, title="Profile Status"):
|
|||||||
return _render_simulation_view(title, manager, args, file=sys.stdout)
|
return _render_simulation_view(title, manager, args, file=sys.stdout)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def main_profile_manager(args):
|
def main_profile_manager(args):
|
||||||
"""Main dispatcher for 'profile' command."""
|
"""Main dispatcher for 'profile' command."""
|
||||||
if load_dotenv:
|
if load_dotenv:
|
||||||
@ -2026,6 +2101,13 @@ def main_profile_manager(args):
|
|||||||
print(f"ERROR: The specified --env-file was not found: {args.env_file}", file=sys.stderr)
|
print(f"ERROR: The specified --env-file was not found: {args.env_file}", file=sys.stderr)
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
|
# After loading .env, populate any args that were not provided on the CLI
|
||||||
|
# This is necessary because argparse `default=os.getenv(...)` runs before `load_dotenv`.
|
||||||
|
if hasattr(args, 'auth_env') and args.auth_env is None:
|
||||||
|
args.auth_env = os.getenv('YTOPS_AUTH_ENV')
|
||||||
|
if hasattr(args, 'download_env') and args.download_env is None:
|
||||||
|
args.download_env = os.getenv('YTOPS_DOWNLOAD_ENV')
|
||||||
|
|
||||||
if args.redis_host is None:
|
if args.redis_host is None:
|
||||||
args.redis_host = os.getenv('REDIS_HOST', os.getenv('MASTER_HOST_IP', 'localhost'))
|
args.redis_host = os.getenv('REDIS_HOST', os.getenv('MASTER_HOST_IP', 'localhost'))
|
||||||
if args.redis_port is None:
|
if args.redis_port is None:
|
||||||
@ -2190,8 +2272,46 @@ def main_profile_manager(args):
|
|||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
|
|
||||||
elif args.profile_command == 'set-proxy-state':
|
elif args.profile_command == 'set-proxy-state':
|
||||||
success = manager.set_proxy_state(args.proxy_url, args.state, args.duration_minutes)
|
proxy_urls = [p.strip() for p in args.proxy_urls.split(',') if p.strip()]
|
||||||
return 0 if success else 1
|
if not proxy_urls:
|
||||||
|
print("Error: No proxy URLs provided.", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
envs_to_process = []
|
||||||
|
if args.auth_env:
|
||||||
|
envs_to_process.append(args.auth_env)
|
||||||
|
if args.download_env:
|
||||||
|
envs_to_process.append(args.download_env)
|
||||||
|
|
||||||
|
if envs_to_process:
|
||||||
|
# If --auth-env or --download-env are used, operate on them.
|
||||||
|
all_success = True
|
||||||
|
for env_name in set(envs_to_process):
|
||||||
|
# When operating on specific envs, derive prefix from env name, ignoring --legacy and --key-prefix.
|
||||||
|
# This aligns with the behavior of the 'list' command in dual-mode.
|
||||||
|
key_prefix_for_env = f"{env_name}_profile_mgmt_"
|
||||||
|
print(f"--- Setting proxy state for environment: {env_name} (prefix: {key_prefix_for_env}) ---", file=sys.stderr)
|
||||||
|
env_manager = ProfileManager(
|
||||||
|
redis_host=args.redis_host,
|
||||||
|
redis_port=args.redis_port,
|
||||||
|
redis_password=args.redis_password,
|
||||||
|
key_prefix=key_prefix_for_env,
|
||||||
|
redis_db=args.redis_db
|
||||||
|
)
|
||||||
|
for proxy_url in proxy_urls:
|
||||||
|
success = env_manager.set_proxy_state(proxy_url, args.state, args.duration_minutes, args.reason)
|
||||||
|
if not success:
|
||||||
|
all_success = False
|
||||||
|
return 0 if all_success else 1
|
||||||
|
else:
|
||||||
|
# Fallback to the single manager created with --env, --legacy, or --key-prefix.
|
||||||
|
# This maintains backward compatibility and handles single-environment cases.
|
||||||
|
all_success = True
|
||||||
|
for proxy_url in proxy_urls:
|
||||||
|
success = manager.set_proxy_state(proxy_url, args.state, args.duration_minutes, args.reason)
|
||||||
|
if not success:
|
||||||
|
all_success = False
|
||||||
|
return 0 if all_success else 1
|
||||||
|
|
||||||
elif args.profile_command == 'get':
|
elif args.profile_command == 'get':
|
||||||
profile = manager.get_profile(args.name)
|
profile = manager.get_profile(args.name)
|
||||||
@ -2226,31 +2346,211 @@ def main_profile_manager(args):
|
|||||||
return 0
|
return 0
|
||||||
|
|
||||||
elif args.profile_command == 'update-state':
|
elif args.profile_command == 'update-state':
|
||||||
success = manager.update_profile_state(args.name, args.state, args.reason or '')
|
names_or_patterns = [n.strip() for n in args.names.split(',') if n.strip()]
|
||||||
return 0 if success else 1
|
if not names_or_patterns:
|
||||||
|
print("Error: No profile names or patterns provided.", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
all_profiles = manager.list_profiles()
|
||||||
|
all_profile_names = {p['name'] for p in all_profiles}
|
||||||
|
|
||||||
|
profiles_to_update = set()
|
||||||
|
for item in names_or_patterns:
|
||||||
|
if '*' in item or '?' in item:
|
||||||
|
matched_profiles = {name for name in all_profile_names if fnmatch.fnmatch(name, item)}
|
||||||
|
if not matched_profiles:
|
||||||
|
print(f"Warning: Pattern '{item}' did not match any profiles.", file=sys.stderr)
|
||||||
|
profiles_to_update.update(matched_profiles)
|
||||||
|
else:
|
||||||
|
if item in all_profile_names:
|
||||||
|
profiles_to_update.add(item)
|
||||||
|
else:
|
||||||
|
print(f"Warning: Profile '{item}' not found.", file=sys.stderr)
|
||||||
|
|
||||||
|
if not profiles_to_update:
|
||||||
|
print("No matching profiles found to update.", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
print(f"The following {len(profiles_to_update)} profiles will be updated to state '{args.state}':")
|
||||||
|
print(", ".join(sorted(list(profiles_to_update), key=natural_sort_key)))
|
||||||
|
confirm = input("Are you sure you want to proceed? (y/N): ")
|
||||||
|
if confirm.lower() != 'y':
|
||||||
|
print("Aborted.")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
all_success = True
|
||||||
|
for name in sorted(list(profiles_to_update), key=natural_sort_key):
|
||||||
|
success = manager.update_profile_state(name, args.state, args.reason or '')
|
||||||
|
if not success:
|
||||||
|
all_success = False
|
||||||
|
return 0 if all_success else 1
|
||||||
|
|
||||||
elif args.profile_command == 'update-field':
|
elif args.profile_command == 'update-field':
|
||||||
success = manager.update_profile_field(args.name, args.field, args.value)
|
success = manager.update_profile_field(args.name, args.field, args.value)
|
||||||
return 0 if success else 1
|
return 0 if success else 1
|
||||||
|
|
||||||
elif args.profile_command == 'pause':
|
elif args.profile_command == 'pause':
|
||||||
success = manager.update_profile_state(args.name, ProfileState.PAUSED.value, 'Manual pause')
|
names_or_patterns = [n.strip() for n in args.names.split(',') if n.strip()]
|
||||||
return 0 if success else 1
|
if not names_or_patterns:
|
||||||
|
print("Error: No profile names or patterns provided.", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
all_profiles = manager.list_profiles()
|
||||||
|
all_profile_names = {p['name'] for p in all_profiles}
|
||||||
|
|
||||||
|
profiles_to_update = set()
|
||||||
|
for item in names_or_patterns:
|
||||||
|
if '*' in item or '?' in item:
|
||||||
|
matched_profiles = {name for name in all_profile_names if fnmatch.fnmatch(name, item)}
|
||||||
|
if not matched_profiles:
|
||||||
|
print(f"Warning: Pattern '{item}' did not match any profiles.", file=sys.stderr)
|
||||||
|
profiles_to_update.update(matched_profiles)
|
||||||
|
else:
|
||||||
|
if item in all_profile_names:
|
||||||
|
profiles_to_update.add(item)
|
||||||
|
else:
|
||||||
|
print(f"Warning: Profile '{item}' not found.", file=sys.stderr)
|
||||||
|
|
||||||
|
if not profiles_to_update:
|
||||||
|
print("No matching profiles found to update.", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
print(f"The following {len(profiles_to_update)} profiles will be PAUSED:")
|
||||||
|
print(", ".join(sorted(list(profiles_to_update), key=natural_sort_key)))
|
||||||
|
confirm = input("Are you sure you want to proceed? (y/N): ")
|
||||||
|
if confirm.lower() != 'y':
|
||||||
|
print("Aborted.")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
all_success = True
|
||||||
|
for name in sorted(list(profiles_to_update), key=natural_sort_key):
|
||||||
|
success = manager.update_profile_state(name, ProfileState.PAUSED.value, 'Manual pause')
|
||||||
|
if not success:
|
||||||
|
all_success = False
|
||||||
|
return 0 if all_success else 1
|
||||||
|
|
||||||
elif args.profile_command == 'activate':
|
elif args.profile_command == 'activate':
|
||||||
success = manager.update_profile_state(args.name, ProfileState.ACTIVE.value, 'Manual activation')
|
names_or_patterns = [n.strip() for n in args.names.split(',') if n.strip()]
|
||||||
return 0 if success else 1
|
if not names_or_patterns:
|
||||||
|
print("Error: No profile names or patterns provided.", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
all_profiles = manager.list_profiles()
|
||||||
|
all_profile_names = {p['name'] for p in all_profiles}
|
||||||
|
|
||||||
|
profiles_to_update = set()
|
||||||
|
for item in names_or_patterns:
|
||||||
|
if '*' in item or '?' in item:
|
||||||
|
matched_profiles = {name for name in all_profile_names if fnmatch.fnmatch(name, item)}
|
||||||
|
if not matched_profiles:
|
||||||
|
print(f"Warning: Pattern '{item}' did not match any profiles.", file=sys.stderr)
|
||||||
|
profiles_to_update.update(matched_profiles)
|
||||||
|
else:
|
||||||
|
if item in all_profile_names:
|
||||||
|
profiles_to_update.add(item)
|
||||||
|
else:
|
||||||
|
print(f"Warning: Profile '{item}' not found.", file=sys.stderr)
|
||||||
|
|
||||||
|
if not profiles_to_update:
|
||||||
|
print("No matching profiles found to update.", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
print(f"The following {len(profiles_to_update)} profiles will be ACTIVATED:")
|
||||||
|
print(", ".join(sorted(list(profiles_to_update), key=natural_sort_key)))
|
||||||
|
confirm = input("Are you sure you want to proceed? (y/N): ")
|
||||||
|
if confirm.lower() != 'y':
|
||||||
|
print("Aborted.")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
all_success = True
|
||||||
|
for name in sorted(list(profiles_to_update), key=natural_sort_key):
|
||||||
|
success = manager.update_profile_state(name, ProfileState.ACTIVE.value, 'Manual activation')
|
||||||
|
if not success:
|
||||||
|
all_success = False
|
||||||
|
return 0 if all_success else 1
|
||||||
|
|
||||||
elif args.profile_command == 'ban':
|
elif args.profile_command == 'ban':
|
||||||
success = manager.update_profile_state(args.name, ProfileState.BANNED.value, args.reason)
|
names_or_patterns = [n.strip() for n in args.names.split(',') if n.strip()]
|
||||||
return 0 if success else 1
|
if not names_or_patterns:
|
||||||
|
print("Error: No profile names or patterns provided.", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
all_profiles = manager.list_profiles()
|
||||||
|
all_profile_names = {p['name'] for p in all_profiles}
|
||||||
|
|
||||||
|
profiles_to_update = set()
|
||||||
|
for item in names_or_patterns:
|
||||||
|
if '*' in item or '?' in item:
|
||||||
|
matched_profiles = {name for name in all_profile_names if fnmatch.fnmatch(name, item)}
|
||||||
|
if not matched_profiles:
|
||||||
|
print(f"Warning: Pattern '{item}' did not match any profiles.", file=sys.stderr)
|
||||||
|
profiles_to_update.update(matched_profiles)
|
||||||
|
else:
|
||||||
|
if item in all_profile_names:
|
||||||
|
profiles_to_update.add(item)
|
||||||
|
else:
|
||||||
|
print(f"Warning: Profile '{item}' not found.", file=sys.stderr)
|
||||||
|
|
||||||
|
if not profiles_to_update:
|
||||||
|
print("No matching profiles found to update.", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
print(f"The following {len(profiles_to_update)} profiles will be BANNED:")
|
||||||
|
print(", ".join(sorted(list(profiles_to_update), key=natural_sort_key)))
|
||||||
|
confirm = input("Are you sure you want to proceed? (y/N): ")
|
||||||
|
if confirm.lower() != 'y':
|
||||||
|
print("Aborted.")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
all_success = True
|
||||||
|
for name in sorted(list(profiles_to_update), key=natural_sort_key):
|
||||||
|
success = manager.update_profile_state(name, ProfileState.BANNED.value, args.reason)
|
||||||
|
if not success:
|
||||||
|
all_success = False
|
||||||
|
return 0 if all_success else 1
|
||||||
|
|
||||||
elif args.profile_command == 'unban':
|
elif args.profile_command == 'unban':
|
||||||
# First activate, then reset session counters. The ban reason is cleared by update_profile_state.
|
names_or_patterns = [n.strip() for n in args.names.split(',') if n.strip()]
|
||||||
success = manager.update_profile_state(args.name, ProfileState.ACTIVE.value, 'Manual unban')
|
if not names_or_patterns:
|
||||||
if success:
|
print("Error: No profile names or patterns provided.", file=sys.stderr)
|
||||||
manager.reset_profile_counters(args.name)
|
return 1
|
||||||
return 0 if success else 1
|
|
||||||
|
all_profiles = manager.list_profiles()
|
||||||
|
all_profile_names = {p['name'] for p in all_profiles}
|
||||||
|
|
||||||
|
profiles_to_update = set()
|
||||||
|
for item in names_or_patterns:
|
||||||
|
if '*' in item or '?' in item:
|
||||||
|
matched_profiles = {name for name in all_profile_names if fnmatch.fnmatch(name, item)}
|
||||||
|
if not matched_profiles:
|
||||||
|
print(f"Warning: Pattern '{item}' did not match any profiles.", file=sys.stderr)
|
||||||
|
profiles_to_update.update(matched_profiles)
|
||||||
|
else:
|
||||||
|
if item in all_profile_names:
|
||||||
|
profiles_to_update.add(item)
|
||||||
|
else:
|
||||||
|
print(f"Warning: Profile '{item}' not found.", file=sys.stderr)
|
||||||
|
|
||||||
|
if not profiles_to_update:
|
||||||
|
print("No matching profiles found to update.", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
print(f"The following {len(profiles_to_update)} profiles will be UNBANNED:")
|
||||||
|
print(", ".join(sorted(list(profiles_to_update), key=natural_sort_key)))
|
||||||
|
confirm = input("Are you sure you want to proceed? (y/N): ")
|
||||||
|
if confirm.lower() != 'y':
|
||||||
|
print("Aborted.")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
all_success = True
|
||||||
|
for name in sorted(list(profiles_to_update), key=natural_sort_key):
|
||||||
|
# First activate, then reset session counters. The ban reason is cleared by update_profile_state.
|
||||||
|
success = manager.update_profile_state(name, ProfileState.ACTIVE.value, 'Manual unban')
|
||||||
|
if success:
|
||||||
|
manager.reset_profile_counters(name)
|
||||||
|
if not success:
|
||||||
|
all_success = False
|
||||||
|
return 0 if all_success else 1
|
||||||
|
|
||||||
elif args.profile_command == 'delete':
|
elif args.profile_command == 'delete':
|
||||||
if not args.confirm:
|
if not args.confirm:
|
||||||
@ -2267,6 +2567,9 @@ def main_profile_manager(args):
|
|||||||
print(f"Deleted {deleted_count} key(s) with prefix '{manager.key_prefix}'.")
|
print(f"Deleted {deleted_count} key(s) with prefix '{manager.key_prefix}'.")
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
elif args.profile_command == 'cleanup-ungrouped':
|
||||||
|
return _main_cleanup_ungrouped(args)
|
||||||
|
|
||||||
elif args.profile_command == 'reset-global-counters':
|
elif args.profile_command == 'reset-global-counters':
|
||||||
manager.reset_global_counters()
|
manager.reset_global_counters()
|
||||||
return 0
|
return 0
|
||||||
@ -2310,3 +2613,286 @@ def main_profile_manager(args):
|
|||||||
return 0
|
return 0
|
||||||
|
|
||||||
return 1 # Should not be reached
|
return 1 # Should not be reached
|
||||||
|
|
||||||
|
|
||||||
|
def _main_cleanup_ungrouped(args):
|
||||||
|
"""Handler for the 'cleanup-ungrouped' command."""
|
||||||
|
try:
|
||||||
|
with open(args.policy_file, 'r', encoding='utf-8') as f:
|
||||||
|
policy = yaml.safe_load(f)
|
||||||
|
except (IOError, yaml.YAMLError) as e:
|
||||||
|
print(f"Error: Could not read or parse policy file '{args.policy_file}': {e}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
common_pools = policy.get('common_pools', [])
|
||||||
|
if not common_pools:
|
||||||
|
print(f"Warning: No 'common_pools' found in '{args.policy_file}'. Nothing to do.", file=sys.stderr)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
desired_profiles = {} # name -> proxy
|
||||||
|
for pool in common_pools:
|
||||||
|
prefixes = pool.get('prefixes', [])
|
||||||
|
count = pool.get('count', 0)
|
||||||
|
proxy = pool.get('proxy')
|
||||||
|
if not proxy:
|
||||||
|
print(f"Error: Pool with prefixes {prefixes} is missing a 'proxy' definition.", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
for prefix in prefixes:
|
||||||
|
for i in range(count):
|
||||||
|
name = f"{prefix}_{i}"
|
||||||
|
desired_profiles[name] = proxy
|
||||||
|
|
||||||
|
desired_profile_names = set(desired_profiles.keys())
|
||||||
|
print(f"Loaded setup policy. Found {len(desired_profile_names)} desired profiles across {len(common_pools)} pools.")
|
||||||
|
|
||||||
|
total_deleted = 0
|
||||||
|
total_skipped = 0
|
||||||
|
total_created = 0
|
||||||
|
total_updated = 0
|
||||||
|
|
||||||
|
# --- Paired Cleanup Mode ---
|
||||||
|
if args.auth_env and args.download_env:
|
||||||
|
print("\n--- Running in Paired Cleanup Mode ---")
|
||||||
|
if not args.disallow_cleanup_active_downloads:
|
||||||
|
print("Cleanup of active download profiles is ENABLED (default). Auth profiles will be checked for idleness.")
|
||||||
|
print("If an auth profile is idle, its corresponding download profile will be removed regardless of its state.")
|
||||||
|
else:
|
||||||
|
print("Cleanup of active download profiles is DISABLED. Both auth and download profiles must be idle to be removed.")
|
||||||
|
if args.dry_run:
|
||||||
|
print("--- DRY RUN MODE: No changes will be made. ---")
|
||||||
|
|
||||||
|
def _create_cleanup_manager(env_name):
|
||||||
|
key_prefix = f"{env_name}_profile_mgmt_"
|
||||||
|
if args.legacy: key_prefix = 'profile_mgmt_'
|
||||||
|
if args.key_prefix: key_prefix = args.key_prefix
|
||||||
|
return ProfileManager(
|
||||||
|
redis_host=args.redis_host, redis_port=args.redis_port,
|
||||||
|
redis_password=args.redis_password, key_prefix=key_prefix,
|
||||||
|
redis_db=args.redis_db
|
||||||
|
)
|
||||||
|
|
||||||
|
auth_manager = _create_cleanup_manager(args.auth_env)
|
||||||
|
download_manager = _create_cleanup_manager(args.download_env)
|
||||||
|
|
||||||
|
auth_profiles_map = {p['name']: p for p in auth_manager.list_profiles()}
|
||||||
|
download_profiles_map = {p['name']: p for p in download_manager.list_profiles()}
|
||||||
|
|
||||||
|
# --- Create Missing Profiles ---
|
||||||
|
auth_missing = desired_profile_names - set(auth_profiles_map.keys())
|
||||||
|
download_missing = desired_profile_names - set(download_profiles_map.keys())
|
||||||
|
|
||||||
|
if auth_missing or download_missing:
|
||||||
|
if args.no_create_missing:
|
||||||
|
if auth_missing: print(f"Found {len(auth_missing)} missing profiles in '{args.auth_env}', creation disabled via --no-create-missing.")
|
||||||
|
if download_missing: print(f"Found {len(download_missing)} missing profiles in '{args.download_env}', creation disabled via --no-create-missing.")
|
||||||
|
else:
|
||||||
|
if auth_missing:
|
||||||
|
print(f"Found {len(auth_missing)} missing profiles in '{args.auth_env}'. Creating them...")
|
||||||
|
for name in sorted(list(auth_missing), key=natural_sort_key):
|
||||||
|
proxy = desired_profiles.get(name)
|
||||||
|
print(f" - {'[DRY RUN] Would create' if args.dry_run else 'Creating'} profile '{name}' with proxy '{proxy}' in '{args.auth_env}'")
|
||||||
|
if not args.dry_run:
|
||||||
|
if auth_manager.create_profile(name, proxy): total_created += 1
|
||||||
|
if download_missing:
|
||||||
|
print(f"Found {len(download_missing)} missing profiles in '{args.download_env}'. Creating them...")
|
||||||
|
for name in sorted(list(download_missing), key=natural_sort_key):
|
||||||
|
proxy = desired_profiles.get(name)
|
||||||
|
print(f" - {'[DRY RUN] Would create' if args.dry_run else 'Creating'} profile '{name}' with proxy '{proxy}' in '{args.download_env}'")
|
||||||
|
if not args.dry_run:
|
||||||
|
if download_manager.create_profile(name, proxy): total_created += 1
|
||||||
|
|
||||||
|
# Refresh maps after creation
|
||||||
|
auth_profiles_map = {p['name']: p for p in auth_manager.list_profiles()}
|
||||||
|
download_profiles_map = {p['name']: p for p in download_manager.list_profiles()}
|
||||||
|
|
||||||
|
# --- Update Proxies for Existing Profiles ---
|
||||||
|
auth_existing_policy_names = set(auth_profiles_map.keys()) & desired_profile_names
|
||||||
|
if auth_existing_policy_names:
|
||||||
|
print(f"\nChecking for proxy updates in '{args.auth_env}'...")
|
||||||
|
for name in sorted(list(auth_existing_policy_names), key=natural_sort_key):
|
||||||
|
current_proxy = auth_profiles_map[name].get('proxy')
|
||||||
|
desired_proxy = desired_profiles.get(name)
|
||||||
|
if current_proxy != desired_proxy and desired_proxy:
|
||||||
|
print(f" - {'[DRY RUN] Would update' if args.dry_run else 'Updating'} proxy for profile '{name}': from '{current_proxy}' to '{desired_proxy}'")
|
||||||
|
if not args.dry_run:
|
||||||
|
if auth_manager.update_profile_field(name, 'proxy', desired_proxy): total_updated += 1
|
||||||
|
|
||||||
|
download_existing_policy_names = set(download_profiles_map.keys()) & desired_profile_names
|
||||||
|
if download_existing_policy_names:
|
||||||
|
print(f"\nChecking for proxy updates in '{args.download_env}'...")
|
||||||
|
for name in sorted(list(download_existing_policy_names), key=natural_sort_key):
|
||||||
|
current_proxy = download_profiles_map[name].get('proxy')
|
||||||
|
desired_proxy = desired_profiles.get(name)
|
||||||
|
if current_proxy != desired_proxy and desired_proxy:
|
||||||
|
print(f" - {'[DRY RUN] Would update' if args.dry_run else 'Updating'} proxy for profile '{name}': from '{current_proxy}' to '{desired_proxy}'")
|
||||||
|
if not args.dry_run:
|
||||||
|
if download_manager.update_profile_field(name, 'proxy', desired_proxy): total_updated += 1
|
||||||
|
|
||||||
|
# --- Delete Ungrouped Profiles ---
|
||||||
|
auth_ungrouped = set(auth_profiles_map.keys()) - desired_profile_names
|
||||||
|
download_ungrouped = set(download_profiles_map.keys()) - desired_profile_names
|
||||||
|
all_ungrouped = sorted(list(auth_ungrouped | download_ungrouped), key=natural_sort_key)
|
||||||
|
|
||||||
|
if not all_ungrouped:
|
||||||
|
print("\nNo ungrouped profiles found in either environment to clean up.")
|
||||||
|
else:
|
||||||
|
print(f"\nFound {len(all_ungrouped)} ungrouped profile(s) across both environments to consider for deletion.")
|
||||||
|
for name in all_ungrouped:
|
||||||
|
auth_profile = auth_profiles_map.get(name)
|
||||||
|
download_profile = download_profiles_map.get(name)
|
||||||
|
|
||||||
|
can_delete_auth, auth_skip_reasons = False, []
|
||||||
|
if auth_profile:
|
||||||
|
state = auth_profile.get('state', 'UNKNOWN')
|
||||||
|
pending_dls = auth_manager.get_pending_downloads(name)
|
||||||
|
rest_reason = auth_profile.get('rest_reason')
|
||||||
|
|
||||||
|
is_safe = True
|
||||||
|
if state in [ProfileState.ACTIVE.value, ProfileState.LOCKED.value]:
|
||||||
|
is_safe = False; auth_skip_reasons.append(f"auth is '{state}'")
|
||||||
|
if pending_dls > 0:
|
||||||
|
is_safe = False; auth_skip_reasons.append(f"auth has {pending_dls} pending DLs")
|
||||||
|
if rest_reason == 'waiting_downloads':
|
||||||
|
is_safe = False; auth_skip_reasons.append("auth is 'waiting_downloads'")
|
||||||
|
if is_safe: can_delete_auth = True
|
||||||
|
|
||||||
|
can_delete_download, download_skip_reasons = False, []
|
||||||
|
if download_profile:
|
||||||
|
state = download_profile.get('state', 'UNKNOWN')
|
||||||
|
is_safe = state not in [ProfileState.ACTIVE.value, ProfileState.LOCKED.value]
|
||||||
|
if not is_safe: download_skip_reasons.append(f"download is '{state}'")
|
||||||
|
if is_safe: can_delete_download = True
|
||||||
|
|
||||||
|
if auth_profile and not can_delete_auth:
|
||||||
|
print(f" - SKIPPING '{name}' because its auth profile is busy ({', '.join(auth_skip_reasons)}).")
|
||||||
|
total_skipped += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
was_download_busy = download_profile and not can_delete_download
|
||||||
|
if auth_profile and can_delete_auth and not args.disallow_cleanup_active_downloads:
|
||||||
|
can_delete_download = True # The override rule
|
||||||
|
|
||||||
|
if (auth_profile and can_delete_auth) or (download_profile and can_delete_download):
|
||||||
|
msgs = []
|
||||||
|
if auth_profile and can_delete_auth: msgs.append(f"Auth (State: {auth_profile.get('state', 'N/A')})")
|
||||||
|
if download_profile and can_delete_download:
|
||||||
|
dl_msg = f"Download (State: {download_profile.get('state', 'N/A')})"
|
||||||
|
if was_download_busy: dl_msg += " <-- DELETING ACTIVE"
|
||||||
|
msgs.append(dl_msg)
|
||||||
|
|
||||||
|
print(f" - Deleting '{name}': {'; '.join(msgs)}")
|
||||||
|
if not args.dry_run:
|
||||||
|
if auth_profile and can_delete_auth: auth_manager.delete_profile(name)
|
||||||
|
if download_profile and can_delete_download: download_manager.delete_profile(name)
|
||||||
|
total_deleted += 1
|
||||||
|
else:
|
||||||
|
print(f" - SKIPPING '{name}' because its download profile is busy ({', '.join(download_skip_reasons)}) and no idle auth profile exists to override.")
|
||||||
|
total_skipped += 1
|
||||||
|
# --- Single Environment Cleanup Mode ---
|
||||||
|
else:
|
||||||
|
if args.dry_run:
|
||||||
|
print("--- DRY RUN MODE: No changes will be made. ---")
|
||||||
|
|
||||||
|
envs_to_clean = []
|
||||||
|
if args.auth_env: envs_to_clean.append(args.auth_env)
|
||||||
|
if args.download_env: envs_to_clean.append(args.download_env)
|
||||||
|
|
||||||
|
if not envs_to_clean:
|
||||||
|
if args.env: envs_to_clean.append(args.env)
|
||||||
|
else:
|
||||||
|
print("Error: You must specify at least one environment to clean up (e.g., --auth-env sim_auth, --download-env sim_download, or --env dev).", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
for env_name in envs_to_clean:
|
||||||
|
print(f"\n--- Cleaning environment: {env_name} ---")
|
||||||
|
|
||||||
|
key_prefix = f"{env_name}_profile_mgmt_"
|
||||||
|
if args.legacy: key_prefix = 'profile_mgmt_'
|
||||||
|
if args.key_prefix: key_prefix = args.key_prefix
|
||||||
|
|
||||||
|
manager = ProfileManager(
|
||||||
|
redis_host=args.redis_host, redis_port=args.redis_port,
|
||||||
|
redis_password=args.redis_password, key_prefix=key_prefix,
|
||||||
|
redis_db=args.redis_db
|
||||||
|
)
|
||||||
|
|
||||||
|
current_profiles_list = manager.list_profiles()
|
||||||
|
current_profiles_map = {p['name']: p for p in current_profiles_list}
|
||||||
|
current_profile_names = set(current_profiles_map.keys())
|
||||||
|
ungrouped_names = current_profile_names - desired_profile_names
|
||||||
|
missing_names = desired_profile_names - current_profile_names
|
||||||
|
existing_policy_names = current_profile_names & desired_profile_names
|
||||||
|
|
||||||
|
# --- Update Proxies for Existing Profiles ---
|
||||||
|
profiles_updated_in_env = 0
|
||||||
|
if existing_policy_names:
|
||||||
|
print("Checking for proxy updates for existing profiles...")
|
||||||
|
for name in sorted(list(existing_policy_names), key=natural_sort_key):
|
||||||
|
current_proxy = current_profiles_map[name].get('proxy')
|
||||||
|
desired_proxy = desired_profiles.get(name)
|
||||||
|
if current_proxy != desired_proxy and desired_proxy:
|
||||||
|
print(f" - {'[DRY RUN] Would update' if args.dry_run else 'Updating'} proxy for profile '{name}': from '{current_proxy}' to '{desired_proxy}'")
|
||||||
|
if not args.dry_run:
|
||||||
|
if manager.update_profile_field(name, 'proxy', desired_proxy):
|
||||||
|
profiles_updated_in_env += 1
|
||||||
|
total_updated += profiles_updated_in_env
|
||||||
|
|
||||||
|
profiles_created_in_env = 0
|
||||||
|
if missing_names:
|
||||||
|
print(f"Found {len(missing_names)} profile(s) defined in the policy that do not exist in Redis.")
|
||||||
|
if args.no_create_missing:
|
||||||
|
print("Creation of missing profiles is disabled via --no-create-missing.")
|
||||||
|
else:
|
||||||
|
if not args.dry_run:
|
||||||
|
print("Creating missing profiles...")
|
||||||
|
|
||||||
|
for name in sorted(list(missing_names), key=natural_sort_key):
|
||||||
|
proxy = desired_profiles.get(name)
|
||||||
|
print(f" - {'[DRY RUN] Would create' if args.dry_run else 'Creating'} profile '{name}' with proxy '{proxy}'")
|
||||||
|
if not args.dry_run:
|
||||||
|
if proxy:
|
||||||
|
if manager.create_profile(name, proxy):
|
||||||
|
profiles_created_in_env += 1
|
||||||
|
else:
|
||||||
|
# This should not happen due to the check at the start
|
||||||
|
print(f" - SKIPPING '{name}' because its proxy could not be determined from the policy.", file=sys.stderr)
|
||||||
|
total_created += profiles_created_in_env
|
||||||
|
|
||||||
|
if not ungrouped_names:
|
||||||
|
print("No ungrouped profiles found to clean up.")
|
||||||
|
if profiles_created_in_env == 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
print(f"Found {len(ungrouped_names)} ungrouped profile(s) to consider for deletion.")
|
||||||
|
profiles_to_check = [p for p in current_profiles_list if p['name'] in ungrouped_names]
|
||||||
|
|
||||||
|
for profile in sorted(profiles_to_check, key=lambda p: natural_sort_key(p['name'])):
|
||||||
|
name = profile['name']
|
||||||
|
state = profile.get('state', 'UNKNOWN')
|
||||||
|
pending_dls = manager.get_pending_downloads(name)
|
||||||
|
|
||||||
|
is_safe, reasons = True, []
|
||||||
|
if state in [ProfileState.ACTIVE.value, ProfileState.LOCKED.value]:
|
||||||
|
is_safe = False; reasons.append(f"is in '{state}' state")
|
||||||
|
if pending_dls > 0:
|
||||||
|
is_safe = False; reasons.append(f"has {pending_dls} pending download(s)")
|
||||||
|
if profile.get('rest_reason') == 'waiting_downloads':
|
||||||
|
is_safe = False; reasons.append("is in 'waiting_downloads' state")
|
||||||
|
|
||||||
|
if is_safe:
|
||||||
|
print(f" - Deleting '{name}' (State: {state}, Pending DLs: {pending_dls})")
|
||||||
|
if not args.dry_run: manager.delete_profile(name)
|
||||||
|
total_deleted += 1
|
||||||
|
else:
|
||||||
|
print(f" - SKIPPING '{name}' because it {', '.join(reasons)}.")
|
||||||
|
total_skipped += 1
|
||||||
|
|
||||||
|
print("\n--- Cleanup Summary ---")
|
||||||
|
print(f"Total profiles created: {total_created}")
|
||||||
|
print(f"Total profiles updated: {total_updated}")
|
||||||
|
print(f"Total profiles deleted: {total_deleted}")
|
||||||
|
print(f"Total profiles skipped (still active or has pending work): {total_skipped}")
|
||||||
|
if total_skipped > 0:
|
||||||
|
print("Run the cleanup command again later to remove the skipped profiles once they are idle.")
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|||||||
@ -4,6 +4,7 @@ Redis Queue Management CLI Tool for yt-ops-client.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
import hashlib
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
@ -23,6 +24,15 @@ except ImportError:
|
|||||||
print("'tabulate' library not found. Please install it with: pip install tabulate", file=sys.stderr)
|
print("'tabulate' library not found. Please install it with: pip install tabulate", file=sys.stderr)
|
||||||
tabulate = None
|
tabulate = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
import yaml
|
||||||
|
except ImportError:
|
||||||
|
print("PyYAML is not installed. Please install it with: pip install PyYAML", file=sys.stderr)
|
||||||
|
yaml = None
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
import fnmatch
|
||||||
|
|
||||||
# Configure logging
|
# Configure logging
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
level=logging.INFO,
|
level=logging.INFO,
|
||||||
@ -31,9 +41,53 @@ logging.basicConfig(
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _find_configured_queues(policies_dir="policies", env=None):
|
||||||
|
"""Scans YAML files in a directory to find configured queue names."""
|
||||||
|
if not yaml:
|
||||||
|
return set()
|
||||||
|
|
||||||
|
expected_queues = set()
|
||||||
|
policies_path = Path(policies_dir)
|
||||||
|
if not policies_path.is_dir():
|
||||||
|
logger.debug(f"Policies directory '{policies_dir}' not found, cannot find expected queues.")
|
||||||
|
return set()
|
||||||
|
|
||||||
|
for policy_file in policies_path.glob("*.yaml"):
|
||||||
|
try:
|
||||||
|
with open(policy_file, 'r', encoding='utf-8') as f:
|
||||||
|
policy_data = yaml.safe_load(f)
|
||||||
|
|
||||||
|
if not isinstance(policy_data, dict):
|
||||||
|
continue
|
||||||
|
|
||||||
|
queue_policy = policy_data.get('queue_policy')
|
||||||
|
if not isinstance(queue_policy, dict):
|
||||||
|
continue
|
||||||
|
|
||||||
|
use_prefix = queue_policy.get('use_env_prefix', True)
|
||||||
|
prefix = ""
|
||||||
|
if use_prefix and env:
|
||||||
|
prefix = f"{env}_"
|
||||||
|
|
||||||
|
for key, value in queue_policy.items():
|
||||||
|
if key.endswith('_queue') and isinstance(value, str):
|
||||||
|
expected_queues.add(f"{prefix}{value}")
|
||||||
|
except (IOError, yaml.YAMLError) as e:
|
||||||
|
logger.debug(f"Could not parse policy {policy_file} to find queues: {e}")
|
||||||
|
continue
|
||||||
|
return expected_queues
|
||||||
|
|
||||||
|
|
||||||
class QueueManager:
|
class QueueManager:
|
||||||
"""Manages Redis lists (queues)."""
|
"""Manages Redis lists (queues)."""
|
||||||
|
|
||||||
|
def _push_state_key(self, queue_name: str, file_path: str) -> str:
|
||||||
|
"""Get Redis key for storing the last pushed index for a given queue and file."""
|
||||||
|
# Use a hash of the absolute file path to create a consistent, safe key.
|
||||||
|
abs_path = os.path.abspath(file_path)
|
||||||
|
path_hash = hashlib.sha256(abs_path.encode()).hexdigest()
|
||||||
|
return f"ytops_client:queue_push_state:{queue_name}:{path_hash}"
|
||||||
|
|
||||||
def __init__(self, redis_host='localhost', redis_port=6379, redis_password=None):
|
def __init__(self, redis_host='localhost', redis_port=6379, redis_password=None):
|
||||||
"""Initialize Redis connection."""
|
"""Initialize Redis connection."""
|
||||||
logger.info(f"Attempting to connect to Redis at {redis_host}:{redis_port}...")
|
logger.info(f"Attempting to connect to Redis at {redis_host}:{redis_port}...")
|
||||||
@ -70,9 +124,28 @@ class QueueManager:
|
|||||||
"""Returns the number of items in a queue."""
|
"""Returns the number of items in a queue."""
|
||||||
return self.redis.llen(queue_name)
|
return self.redis.llen(queue_name)
|
||||||
|
|
||||||
def push_from_file(self, queue_name: str, file_path: str, wrap_key: Optional[str] = None) -> int:
|
def push_from_file(self, queue_name: str, file_path: str, wrap_key: Optional[str] = None, limit: Optional[int] = None, start_index: Optional[int] = None, auto_shift: bool = False) -> int:
|
||||||
"""Populates a queue from a file (text with one item per line, or JSON with an array of items)."""
|
"""Populates a queue from a file (text with one item per line, or JSON with an array of items)."""
|
||||||
count = 0
|
count = 0
|
||||||
|
|
||||||
|
# --- State management for file position ---
|
||||||
|
state_key = None
|
||||||
|
current_start_index = 0 # 0-based index
|
||||||
|
|
||||||
|
if auto_shift:
|
||||||
|
state_key = self._push_state_key(queue_name, file_path)
|
||||||
|
last_index_str = self.redis.get(state_key)
|
||||||
|
if last_index_str:
|
||||||
|
current_start_index = int(last_index_str)
|
||||||
|
logger.info(f"Auto-shift enabled. Resuming from line {current_start_index + 1}.")
|
||||||
|
elif start_index is not None:
|
||||||
|
# CLI provides 1-based index, convert to 0-based.
|
||||||
|
current_start_index = max(0, start_index - 1)
|
||||||
|
logger.info(f"Starting from line {current_start_index + 1} as requested.")
|
||||||
|
# ---
|
||||||
|
|
||||||
|
items_to_add = []
|
||||||
|
total_items_in_file = 0
|
||||||
|
|
||||||
if file_path.lower().endswith('.json'):
|
if file_path.lower().endswith('.json'):
|
||||||
if wrap_key:
|
if wrap_key:
|
||||||
@ -85,25 +158,23 @@ class QueueManager:
|
|||||||
logger.error("JSON file must contain a list/array.")
|
logger.error("JSON file must contain a list/array.")
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
total_items_in_file = len(data)
|
||||||
|
if current_start_index >= total_items_in_file:
|
||||||
|
logger.info(f"Start index ({current_start_index + 1}) is past the end of the file ({total_items_in_file} items). Nothing to push.")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
items_to_process = data[current_start_index:]
|
||||||
|
if limit is not None and limit >= 0:
|
||||||
|
items_to_process = items_to_process[:limit]
|
||||||
|
|
||||||
# Items can be strings or objects. If objects, they should be converted to JSON strings.
|
# Items can be strings or objects. If objects, they should be converted to JSON strings.
|
||||||
items_to_add = []
|
for item in items_to_process:
|
||||||
for item in data:
|
|
||||||
if isinstance(item, str):
|
if isinstance(item, str):
|
||||||
items_to_add.append(item.strip())
|
items_to_add.append(item.strip())
|
||||||
else:
|
else:
|
||||||
items_to_add.append(json.dumps(item))
|
items_to_add.append(json.dumps(item))
|
||||||
|
|
||||||
items_to_add = [item for item in items_to_add if item]
|
items_to_add = [item for item in items_to_add if item]
|
||||||
|
|
||||||
pipe = self.redis.pipeline()
|
|
||||||
for item in items_to_add:
|
|
||||||
pipe.rpush(queue_name, item)
|
|
||||||
count += 1
|
|
||||||
if count > 0 and count % 1000 == 0:
|
|
||||||
pipe.execute()
|
|
||||||
logger.info(f"Pushed {count} items...")
|
|
||||||
pipe.execute()
|
|
||||||
|
|
||||||
except (IOError, json.JSONDecodeError) as e:
|
except (IOError, json.JSONDecodeError) as e:
|
||||||
logger.error(f"Failed to read or parse JSON file '{file_path}': {e}")
|
logger.error(f"Failed to read or parse JSON file '{file_path}': {e}")
|
||||||
return 0
|
return 0
|
||||||
@ -111,24 +182,51 @@ class QueueManager:
|
|||||||
logger.info("Reading items from text file (one per line).")
|
logger.info("Reading items from text file (one per line).")
|
||||||
try:
|
try:
|
||||||
with open(file_path, 'r', encoding='utf-8') as f:
|
with open(file_path, 'r', encoding='utf-8') as f:
|
||||||
pipe = self.redis.pipeline()
|
all_lines = f.readlines()
|
||||||
for line in f:
|
|
||||||
item = line.strip()
|
total_items_in_file = len(all_lines)
|
||||||
if item:
|
if current_start_index >= total_items_in_file:
|
||||||
if wrap_key:
|
logger.info(f"Start index ({current_start_index + 1}) is past the end of the file ({total_items_in_file} lines). Nothing to push.")
|
||||||
payload = json.dumps({wrap_key: item})
|
return 0
|
||||||
else:
|
|
||||||
payload = item
|
lines_to_process = all_lines[current_start_index:]
|
||||||
pipe.rpush(queue_name, payload)
|
if limit is not None and limit >= 0:
|
||||||
count += 1
|
lines_to_process = lines_to_process[:limit]
|
||||||
if count > 0 and count % 1000 == 0:
|
|
||||||
pipe.execute()
|
for line in lines_to_process:
|
||||||
logger.info(f"Pushed {count} items...")
|
item = line.strip()
|
||||||
pipe.execute()
|
if item:
|
||||||
|
if wrap_key:
|
||||||
|
payload = json.dumps({wrap_key: item})
|
||||||
|
else:
|
||||||
|
payload = item
|
||||||
|
items_to_add.append(payload)
|
||||||
|
|
||||||
except IOError as e:
|
except IOError as e:
|
||||||
logger.error(f"Failed to read file '{file_path}': {e}")
|
logger.error(f"Failed to read file '{file_path}': {e}")
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
if items_to_add:
|
||||||
|
pipe = self.redis.pipeline()
|
||||||
|
for item in items_to_add:
|
||||||
|
pipe.rpush(queue_name, item)
|
||||||
|
count += 1
|
||||||
|
if count > 0 and count % 1000 == 0:
|
||||||
|
pipe.execute()
|
||||||
|
logger.info(f"Pushed {count} of {len(items_to_add)} items...")
|
||||||
|
pipe.execute()
|
||||||
|
|
||||||
|
if auto_shift and state_key:
|
||||||
|
new_index = current_start_index + count
|
||||||
|
# Don't save a new index if we've reached the end of the file.
|
||||||
|
# This allows re-running the command to start from the beginning again.
|
||||||
|
if new_index >= total_items_in_file:
|
||||||
|
self.redis.delete(state_key)
|
||||||
|
logger.info(f"Auto-shift: Reached end of file. Cleared saved position for '{os.path.basename(file_path)}'. Next run will start from the beginning.")
|
||||||
|
else:
|
||||||
|
self.redis.set(state_key, new_index)
|
||||||
|
logger.info(f"Auto-shift: Saved next start position for '{os.path.basename(file_path)}' as line {new_index + 1}.")
|
||||||
|
|
||||||
logger.info(f"Finished. Pushed a total of {count} items to '{queue_name}'.")
|
logger.info(f"Finished. Pushed a total of {count} items to '{queue_name}'.")
|
||||||
return count
|
return count
|
||||||
|
|
||||||
@ -230,7 +328,11 @@ def add_queue_manager_parser(subparsers):
|
|||||||
# Push command
|
# Push command
|
||||||
push_parser = subparsers.add_parser('push', help='Push items to a queue from a file, a generator, or a static payload.', parents=[common_parser])
|
push_parser = subparsers.add_parser('push', help='Push items to a queue from a file, a generator, or a static payload.', parents=[common_parser])
|
||||||
push_parser.add_argument('queue_name', nargs='?', help="Name of the queue. Defaults to '<env>_stress_inbox'.")
|
push_parser.add_argument('queue_name', nargs='?', help="Name of the queue. Defaults to '<env>_stress_inbox'.")
|
||||||
push_parser.add_argument('--count', type=int, default=1, help='Number of items to push (for --payload-json or --generate-payload-prefix).')
|
push_parser.add_argument('--count', type=int, default=None, help='Number of items to push. For --from-file, limits the number of lines pushed. For other sources, specifies how many items to generate/push (defaults to 1).')
|
||||||
|
|
||||||
|
shift_group = push_parser.add_mutually_exclusive_group()
|
||||||
|
shift_group.add_argument('--start', type=int, help='For --from-file, start pushing from this line number (1-based).')
|
||||||
|
shift_group.add_argument('--auto-shift', action='store_true', help="For --from-file, automatically resume from where the last push left off. State is stored in Redis.")
|
||||||
|
|
||||||
source_group = push_parser.add_mutually_exclusive_group(required=True)
|
source_group = push_parser.add_mutually_exclusive_group(required=True)
|
||||||
source_group.add_argument('--from-file', dest='file_path', help='Path to a file containing items to add (one per line, or a JSON array).')
|
source_group.add_argument('--from-file', dest='file_path', help='Path to a file containing items to add (one per line, or a JSON array).')
|
||||||
@ -285,10 +387,26 @@ def main_queue_manager(args):
|
|||||||
print(f"INFO: No queue name specified, defaulting to '{default_queue_name}' based on --env='{args.env}'.", file=sys.stderr)
|
print(f"INFO: No queue name specified, defaulting to '{default_queue_name}' based on --env='{args.env}'.", file=sys.stderr)
|
||||||
|
|
||||||
if args.queue_command == 'list':
|
if args.queue_command == 'list':
|
||||||
queues = manager.list_queues(args.pattern)
|
queues_from_redis = manager.list_queues(args.pattern)
|
||||||
|
|
||||||
|
# Discover queues from policy files
|
||||||
|
expected_queues_from_policies = _find_configured_queues(env=args.env)
|
||||||
|
|
||||||
|
# Merge Redis results with policy-defined queues
|
||||||
|
all_queues_map = {q['name']: q for q in queues_from_redis}
|
||||||
|
|
||||||
|
for q_name in expected_queues_from_policies:
|
||||||
|
if q_name not in all_queues_map:
|
||||||
|
# Only add if it matches the pattern filter
|
||||||
|
if fnmatch.fnmatch(q_name, args.pattern):
|
||||||
|
all_queues_map[q_name] = {'name': q_name, 'size': 0}
|
||||||
|
|
||||||
|
queues = sorted(list(all_queues_map.values()), key=lambda x: x['name'])
|
||||||
|
|
||||||
if not queues:
|
if not queues:
|
||||||
print(f"No queues found matching pattern '{args.pattern}'.")
|
print(f"No queues found matching pattern '{args.pattern}'.")
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
if tabulate:
|
if tabulate:
|
||||||
print(tabulate(queues, headers='keys', tablefmt='grid'))
|
print(tabulate(queues, headers='keys', tablefmt='grid'))
|
||||||
else:
|
else:
|
||||||
@ -309,16 +427,23 @@ def main_queue_manager(args):
|
|||||||
if not os.path.exists(args.file_path):
|
if not os.path.exists(args.file_path):
|
||||||
print(f"Error: File not found at '{args.file_path}'", file=sys.stderr)
|
print(f"Error: File not found at '{args.file_path}'", file=sys.stderr)
|
||||||
return 1
|
return 1
|
||||||
if args.count > 1:
|
manager.push_from_file(
|
||||||
logger.warning("--count is ignored when using --from-file.")
|
args.queue_name,
|
||||||
manager.push_from_file(args.queue_name, args.file_path, args.wrap_file_line_in_json)
|
args.file_path,
|
||||||
|
args.wrap_file_line_in_json,
|
||||||
|
limit=args.count,
|
||||||
|
start_index=args.start,
|
||||||
|
auto_shift=args.auto_shift
|
||||||
|
)
|
||||||
elif args.payload_json:
|
elif args.payload_json:
|
||||||
manager.push_static(args.queue_name, args.payload_json, args.count)
|
count = args.count if args.count is not None else 1
|
||||||
|
manager.push_static(args.queue_name, args.payload_json, count)
|
||||||
elif args.generate_payload_prefix:
|
elif args.generate_payload_prefix:
|
||||||
if args.count <= 0:
|
count = args.count if args.count is not None else 1
|
||||||
|
if count <= 0:
|
||||||
print("Error: --count must be 1 or greater for --generate-payload-prefix.", file=sys.stderr)
|
print("Error: --count must be 1 or greater for --generate-payload-prefix.", file=sys.stderr)
|
||||||
return 1
|
return 1
|
||||||
manager.push_generated(args.queue_name, args.generate_payload_prefix, args.count)
|
manager.push_generated(args.queue_name, args.generate_payload_prefix, count)
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
elif args.queue_command == 'clear':
|
elif args.queue_command == 'clear':
|
||||||
|
|||||||
@ -167,6 +167,7 @@ Overridable Policy Parameters via --set:
|
|||||||
parser.add_argument('--list-policies', action='store_true', help='List all available policies from the default policies directory and exit.')
|
parser.add_argument('--list-policies', action='store_true', help='List all available policies from the default policies directory and exit.')
|
||||||
parser.add_argument('--show-overrides', action='store_true', help='Load the specified policy and print all its defined values as a single-line of --set arguments, then exit.')
|
parser.add_argument('--show-overrides', action='store_true', help='Load the specified policy and print all its defined values as a single-line of --set arguments, then exit.')
|
||||||
parser.add_argument('--set', action='append', default=[], help="Override a policy setting using 'key.subkey=value' format.\n(e.g., --set execution_control.workers=5)")
|
parser.add_argument('--set', action='append', default=[], help="Override a policy setting using 'key.subkey=value' format.\n(e.g., --set execution_control.workers=5)")
|
||||||
|
parser.add_argument('--workers', type=int, help='Shortcut to override the total number of workers, capping any discovery logic.')
|
||||||
parser.add_argument('--profile-prefix', '--user-prefix', dest='profile_prefix', help="Shortcut to override the profile prefix for profile locking mode. Affects both auth and download stages. Can be a comma-separated list.")
|
parser.add_argument('--profile-prefix', '--user-prefix', dest='profile_prefix', help="Shortcut to override the profile prefix for profile locking mode. Affects both auth and download stages. Can be a comma-separated list.")
|
||||||
parser.add_argument('--start-from-url-index', type=int, help='Start processing from this line number (1-based) in the urls_file. Overrides saved state.')
|
parser.add_argument('--start-from-url-index', type=int, help='Start processing from this line number (1-based) in the urls_file. Overrides saved state.')
|
||||||
parser.add_argument('--expire-time-shift-minutes', type=int, help="Consider URLs expiring in N minutes as expired. Overrides policy.")
|
parser.add_argument('--expire-time-shift-minutes', type=int, help="Consider URLs expiring in N minutes as expired. Overrides policy.")
|
||||||
|
|||||||
@ -952,23 +952,41 @@ def run_direct_docker_download_worker(worker_id, policy, state_manager, args, pr
|
|||||||
failure_rate = dummy_settings.get('download_failure_rate', 0.0)
|
failure_rate = dummy_settings.get('download_failure_rate', 0.0)
|
||||||
skipped_rate = dummy_settings.get('download_skipped_failure_rate', 0.0)
|
skipped_rate = dummy_settings.get('download_skipped_failure_rate', 0.0)
|
||||||
|
|
||||||
# In dummy mode, prioritize the format from the task file, then from the policy.
|
# In dummy mode, prioritize the format from the task metadata.
|
||||||
format_selection = info_data.get('_ytops_download_format')
|
formats_to_test = None
|
||||||
source_of_format = "task file"
|
source_of_format = "unknown"
|
||||||
if not format_selection:
|
|
||||||
format_selection = d_policy.get('formats', '')
|
|
||||||
source_of_format = "policy (download_policy.formats)"
|
|
||||||
|
|
||||||
if not format_selection:
|
|
||||||
ytdlp_config_overrides = direct_policy.get('ytdlp_config_overrides', {})
|
|
||||||
format_selection = ytdlp_config_overrides.get('format', '')
|
|
||||||
source_of_format = "policy (ytdlp_config_overrides.format)"
|
|
||||||
|
|
||||||
if not format_selection:
|
# Prioritize format from task metadata, which supports per-format and per-url tasks.
|
||||||
|
metadata = info_data.get('_ytops_metadata', {})
|
||||||
|
formats_requested = metadata.get('formats_requested')
|
||||||
|
if formats_requested is not None:
|
||||||
|
formats_to_test = formats_requested
|
||||||
|
source_of_format = "task file metadata (_ytops_metadata.formats_requested)"
|
||||||
|
|
||||||
|
if formats_to_test is None:
|
||||||
|
# Fallback for older task formats or different workflows
|
||||||
|
format_selection_str = info_data.get('_ytops_download_format')
|
||||||
|
if format_selection_str:
|
||||||
|
source_of_format = "task file (_ytops_download_format)"
|
||||||
|
formats_to_test = [f.strip() for f in format_selection_str.split(',') if f.strip()]
|
||||||
|
|
||||||
|
if formats_to_test is None:
|
||||||
|
format_selection_str = d_policy.get('formats', '')
|
||||||
|
if format_selection_str:
|
||||||
|
source_of_format = "policy (download_policy.formats)"
|
||||||
|
formats_to_test = [f.strip() for f in format_selection_str.split(',') if f.strip()]
|
||||||
|
|
||||||
|
if formats_to_test is None:
|
||||||
|
ytdlp_config_overrides = direct_policy.get('ytdlp_config_overrides', {})
|
||||||
|
format_selection_str = ytdlp_config_overrides.get('format', '')
|
||||||
|
if format_selection_str:
|
||||||
|
source_of_format = "policy (ytdlp_config_overrides.format)"
|
||||||
|
formats_to_test = [f.strip() for f in format_selection_str.split(',') if f.strip()]
|
||||||
|
|
||||||
|
if formats_to_test is None:
|
||||||
logger.warning(f"[Worker {worker_id}] DUMMY: No format specified in task file or policy. Simulating a single download.")
|
logger.warning(f"[Worker {worker_id}] DUMMY: No format specified in task file or policy. Simulating a single download.")
|
||||||
formats_to_test = ['dummy_format']
|
formats_to_test = ['dummy_format']
|
||||||
else:
|
else:
|
||||||
formats_to_test = [f.strip() for f in format_selection.split(',') if f.strip()]
|
|
||||||
logger.info(f"[Worker {worker_id}] DUMMY: Simulating downloads for formats (from {source_of_format}): {', '.join(formats_to_test)}")
|
logger.info(f"[Worker {worker_id}] DUMMY: Simulating downloads for formats (from {source_of_format}): {', '.join(formats_to_test)}")
|
||||||
|
|
||||||
for format_id in formats_to_test:
|
for format_id in formats_to_test:
|
||||||
@ -1022,14 +1040,47 @@ def run_direct_docker_download_worker(worker_id, policy, state_manager, args, pr
|
|||||||
|
|
||||||
logger.info(f"========== [Worker {worker_id}] END DUMMY DOCKER DOWNLOAD SIMULATION ==========")
|
logger.info(f"========== [Worker {worker_id}] END DUMMY DOCKER DOWNLOAD SIMULATION ==========")
|
||||||
|
|
||||||
# In dummy mode, we just rename the file to processed and continue to the finally block.
|
# --- Airflow Directory Logic (Dummy Mode) ---
|
||||||
|
success = downloads_processed_in_task > 0
|
||||||
|
if success and d_policy.get('output_to_airflow_ready_dir'):
|
||||||
|
try:
|
||||||
|
video_id = info_data.get('id')
|
||||||
|
if not video_id:
|
||||||
|
logger.error(f"[{profile_name}] DUMMY: Could not find video ID in '{claimed_task_path_host.name}' for moving files.")
|
||||||
|
else:
|
||||||
|
# --- Prepare destination directory ---
|
||||||
|
now = datetime.now()
|
||||||
|
rounded_minute = (now.minute // 10) * 10
|
||||||
|
timestamp_str = now.strftime('%Y%m%dT%H') + f"{rounded_minute:02d}"
|
||||||
|
|
||||||
|
base_path = d_policy.get('airflow_ready_dir_base_path', 'downloadfiles/videos/ready')
|
||||||
|
if not os.path.isabs(base_path):
|
||||||
|
base_path = os.path.join(sp_utils._PROJECT_ROOT, base_path)
|
||||||
|
final_dir_base = os.path.join(base_path, timestamp_str)
|
||||||
|
final_dir_path = os.path.join(final_dir_base, video_id)
|
||||||
|
os.makedirs(final_dir_path, exist_ok=True)
|
||||||
|
|
||||||
|
# --- Copy info.json ---
|
||||||
|
new_info_json_name = f"info_{video_id}.json"
|
||||||
|
dest_info_json_path = os.path.join(final_dir_path, new_info_json_name)
|
||||||
|
if not os.path.exists(dest_info_json_path):
|
||||||
|
shutil.copy(str(claimed_task_path_host), dest_info_json_path)
|
||||||
|
logger.info(f"[{profile_name}] DUMMY: Copied info.json to {dest_info_json_path}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[{profile_name}] DUMMY: Failed during post-download processing for Airflow: {e}", exc_info=True)
|
||||||
|
|
||||||
|
# In dummy mode, we handle file cleanup and continue to the finally block.
|
||||||
try:
|
try:
|
||||||
base_path_str = str(claimed_task_path_host).rsplit('.LOCKED.', 1)[0]
|
if d_policy.get('remove_source_info_json'):
|
||||||
processed_path = Path(f"{base_path_str}.processed")
|
claimed_task_path_host.unlink()
|
||||||
claimed_task_path_host.rename(processed_path)
|
logger.debug(f"DUMMY MODE: Removed processed task file '{claimed_task_path_host.name}'.")
|
||||||
logger.debug(f"DUMMY MODE: Renamed processed task file to '{processed_path.name}'.")
|
else:
|
||||||
|
base_path_str = str(claimed_task_path_host).rsplit('.LOCKED.', 1)[0]
|
||||||
|
processed_path = Path(f"{base_path_str}.processed")
|
||||||
|
claimed_task_path_host.rename(processed_path)
|
||||||
|
logger.debug(f"DUMMY MODE: Renamed processed task file to '{processed_path.name}'.")
|
||||||
except (OSError, IndexError) as e:
|
except (OSError, IndexError) as e:
|
||||||
logger.error(f"DUMMY MODE: Failed to rename processed task file '{claimed_task_path_host}': {e}")
|
logger.error(f"DUMMY MODE: Failed to clean up processed task file '{claimed_task_path_host}': {e}")
|
||||||
|
|
||||||
continue # Skip to finally block
|
continue # Skip to finally block
|
||||||
|
|
||||||
@ -1329,15 +1380,19 @@ def run_direct_docker_download_worker(worker_id, policy, state_manager, args, pr
|
|||||||
|
|
||||||
# 6. Clean up task file
|
# 6. Clean up task file
|
||||||
if not queue_policy:
|
if not queue_policy:
|
||||||
# File-based mode: rename to .processed
|
# File-based mode: rename to .processed or remove
|
||||||
try:
|
try:
|
||||||
# The claimed_task_path_host has a .LOCKED suffix, remove it before adding .processed
|
if success and d_policy.get('remove_source_info_json'):
|
||||||
base_path_str = str(claimed_task_path_host).rsplit('.LOCKED.', 1)[0]
|
claimed_task_path_host.unlink()
|
||||||
processed_path = Path(f"{base_path_str}.processed")
|
logger.debug(f"[{sp_utils.get_display_name(claimed_task_path_host)}] Removed processed task file.")
|
||||||
claimed_task_path_host.rename(processed_path)
|
else:
|
||||||
logger.debug(f"[{sp_utils.get_display_name(claimed_task_path_host)}] Renamed processed task file to '{processed_path.name}'.")
|
# The claimed_task_path_host has a .LOCKED suffix, remove it before adding .processed
|
||||||
|
base_path_str = str(claimed_task_path_host).rsplit('.LOCKED.', 1)[0]
|
||||||
|
processed_path = Path(f"{base_path_str}.processed")
|
||||||
|
claimed_task_path_host.rename(processed_path)
|
||||||
|
logger.debug(f"[{sp_utils.get_display_name(claimed_task_path_host)}] Renamed processed task file to '{processed_path.name}'.")
|
||||||
except (OSError, IndexError) as e:
|
except (OSError, IndexError) as e:
|
||||||
logger.error(f"Failed to rename processed task file '{claimed_task_path_host}': {e}")
|
logger.error(f"Failed to clean up processed task file '{claimed_task_path_host}': {e}")
|
||||||
elif d_policy.get('rename_source_info_json_on_success'):
|
elif d_policy.get('rename_source_info_json_on_success'):
|
||||||
# Queue-based mode: respect rename policy
|
# Queue-based mode: respect rename policy
|
||||||
source_path_to_rename = task.get('info_json_path')
|
source_path_to_rename = task.get('info_json_path')
|
||||||
@ -1372,11 +1427,25 @@ def run_direct_docker_download_worker(worker_id, policy, state_manager, args, pr
|
|||||||
# but the task is being finalized, we must assume all potential downloads for this task
|
# but the task is being finalized, we must assume all potential downloads for this task
|
||||||
# are "processed" to prevent the auth profile from getting stuck.
|
# are "processed" to prevent the auth profile from getting stuck.
|
||||||
if downloads_processed_in_task == 0:
|
if downloads_processed_in_task == 0:
|
||||||
logger.warning(f"[Worker {worker_id}] No downloads were counted for this task. Using policy to determine decrement count to avoid stuck profile.")
|
logger.warning(f"[Worker {worker_id}] No downloads were counted for this task. Using task metadata to determine decrement count to avoid stuck profile.")
|
||||||
ytdlp_config_overrides = direct_policy.get('ytdlp_config_overrides', {})
|
|
||||||
formats_str = ytdlp_config_overrides.get('format', d_policy.get('formats', ''))
|
decrement_count = 1 # Default to 1 to be safe
|
||||||
num_formats = formats_str.count(',') + 1 if formats_str else 1
|
metadata = info_data.get('_ytops_metadata', {})
|
||||||
downloads_processed_in_task = num_formats
|
granularity = metadata.get('download_task_granularity')
|
||||||
|
formats_requested = metadata.get('formats_requested') # Can be None
|
||||||
|
|
||||||
|
if granularity == 'per_format':
|
||||||
|
# Each task file represents one format group, so decrement by 1.
|
||||||
|
decrement_count = 1
|
||||||
|
elif granularity == 'per_url':
|
||||||
|
# The task file represents all formats for a URL.
|
||||||
|
decrement_count = len(formats_requested) if formats_requested else 1
|
||||||
|
else:
|
||||||
|
# No granularity info, this may be an older task file.
|
||||||
|
# Assume it's a single download task.
|
||||||
|
decrement_count = 1
|
||||||
|
|
||||||
|
downloads_processed_in_task = decrement_count
|
||||||
logger.warning(f"[Worker {worker_id}] Decrementing by fallback count: {downloads_processed_in_task}")
|
logger.warning(f"[Worker {worker_id}] Decrementing by fallback count: {downloads_processed_in_task}")
|
||||||
|
|
||||||
if downloads_processed_in_task > 0:
|
if downloads_processed_in_task > 0:
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@ -130,6 +130,47 @@ process_lock = threading.Lock()
|
|||||||
logger = logging.getLogger('stress_policy_tool')
|
logger = logging.getLogger('stress_policy_tool')
|
||||||
|
|
||||||
|
|
||||||
|
def _discover_worker_pools(discovery_config, manager_for_discovery):
|
||||||
|
"""
|
||||||
|
Discovers worker pools by scanning profile prefixes in Redis.
|
||||||
|
Returns a list of worker pool configurations or None on error.
|
||||||
|
"""
|
||||||
|
discovery_pattern = discovery_config.get('profile_prefix_pattern')
|
||||||
|
workers_per_group = discovery_config.get('workers_per_profile_group', 1)
|
||||||
|
|
||||||
|
if not discovery_pattern:
|
||||||
|
logger.error("'worker_pool_discovery' is missing required key 'profile_prefix_pattern'.")
|
||||||
|
return None
|
||||||
|
|
||||||
|
logger.info(f"Discovering worker pools from profile prefixes matching '{discovery_pattern}'...")
|
||||||
|
try:
|
||||||
|
all_profiles = manager_for_discovery.list_profiles()
|
||||||
|
found_prefixes = set()
|
||||||
|
for profile in all_profiles:
|
||||||
|
profile_name = profile['name']
|
||||||
|
if fnmatch.fnmatch(profile_name, discovery_pattern):
|
||||||
|
# Assuming standard name format like 'user31_001', extract 'user31'
|
||||||
|
prefix = profile_name.rsplit('_', 1)[0]
|
||||||
|
found_prefixes.add(prefix)
|
||||||
|
|
||||||
|
if not found_prefixes:
|
||||||
|
logger.warning(f"Worker pool discovery found no profiles matching pattern '{discovery_pattern}'. No workers will be started.")
|
||||||
|
return []
|
||||||
|
else:
|
||||||
|
worker_pools = []
|
||||||
|
for prefix in sorted(list(found_prefixes)):
|
||||||
|
worker_pools.append({
|
||||||
|
'profile_prefix': prefix,
|
||||||
|
'workers': workers_per_group
|
||||||
|
})
|
||||||
|
logger.info(f"Discovered {len(found_prefixes)} profile groups, creating {workers_per_group} worker(s) for each: {', '.join(sorted(list(found_prefixes)))}")
|
||||||
|
logger.info("Note: Profile group discovery runs once at startup. A restart is required to detect new profile groups.")
|
||||||
|
return worker_pools
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to discover profile groups from Redis: {e}", exc_info=True)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def main_stress_policy(args):
|
def main_stress_policy(args):
|
||||||
"""Main logic for the 'stress-policy' command."""
|
"""Main logic for the 'stress-policy' command."""
|
||||||
if args.list_policies:
|
if args.list_policies:
|
||||||
@ -362,10 +403,21 @@ def main_stress_policy(args):
|
|||||||
logger.info(f"\nSignal {signum} received, shutting down gracefully...")
|
logger.info(f"\nSignal {signum} received, shutting down gracefully...")
|
||||||
shutdown_event.set()
|
shutdown_event.set()
|
||||||
|
|
||||||
|
# Propagate signal to child processes to allow them to shut down gracefully.
|
||||||
|
with process_lock:
|
||||||
|
if running_processes:
|
||||||
|
logger.info(f"Propagating signal to {len(running_processes)} running subprocess(es)...")
|
||||||
|
for p in running_processes:
|
||||||
|
try:
|
||||||
|
# Send the same signal to the entire process group.
|
||||||
|
os.killpg(os.getpgid(p.pid), signum)
|
||||||
|
except (ProcessLookupError, PermissionError):
|
||||||
|
pass # Process already finished or we lack permissions
|
||||||
|
|
||||||
# Save state immediately to prevent loss on interrupt.
|
# Save state immediately to prevent loss on interrupt.
|
||||||
logger.info("Attempting to save state before shutdown...")
|
logger.info("Attempting to save state before shutdown...")
|
||||||
state_manager.close()
|
state_manager.close()
|
||||||
logger.info("Shutdown requested. Allowing in-progress tasks to complete. No new tasks will be started. Press Ctrl+C again to force exit.")
|
logger.info("Shutdown requested. Signalling in-progress tasks to terminate gracefully. No new tasks will be started. Press Ctrl+C again to force exit.")
|
||||||
else:
|
else:
|
||||||
logger.info("Second signal received, forcing exit.")
|
logger.info("Second signal received, forcing exit.")
|
||||||
# On second signal, forcefully terminate subprocesses.
|
# On second signal, forcefully terminate subprocesses.
|
||||||
@ -579,6 +631,7 @@ def main_stress_policy(args):
|
|||||||
logger.info(f"Starting/resuming from URL index {start_index + 1}.")
|
logger.info(f"Starting/resuming from URL index {start_index + 1}.")
|
||||||
# The worker's get_next_url_batch will respect this starting index.
|
# The worker's get_next_url_batch will respect this starting index.
|
||||||
|
|
||||||
|
logger.info(f"Task source file: {os.path.abspath(urls_file)}")
|
||||||
sp_utils.display_effective_policy(policy, policy_name, args, sources=urls_list)
|
sp_utils.display_effective_policy(policy, policy_name, args, sources=urls_list)
|
||||||
if args.dry_run: return 0
|
if args.dry_run: return 0
|
||||||
|
|
||||||
@ -586,13 +639,8 @@ def main_stress_policy(args):
|
|||||||
worker_pools = exec_control.get('worker_pools', [])
|
worker_pools = exec_control.get('worker_pools', [])
|
||||||
discovery_config = exec_control.get('worker_pool_discovery')
|
discovery_config = exec_control.get('worker_pool_discovery')
|
||||||
|
|
||||||
if discovery_config:
|
if discovery_config and not worker_pools:
|
||||||
if worker_pools:
|
logger.info("Explicit 'worker_pools' not defined. Using 'worker_pool_discovery' as a fallback.")
|
||||||
logger.warning("Both 'worker_pools' and 'worker_pool_discovery' are defined. 'worker_pool_discovery' will take precedence.")
|
|
||||||
|
|
||||||
discovery_pattern = discovery_config.get('profile_prefix_pattern')
|
|
||||||
workers_per_group = discovery_config.get('workers_per_profile_group', 1)
|
|
||||||
|
|
||||||
direct_policy = policy.get('direct_batch_cli_policy', {})
|
direct_policy = policy.get('direct_batch_cli_policy', {})
|
||||||
use_env = direct_policy.get('use_profile_env', 'auth')
|
use_env = direct_policy.get('use_profile_env', 'auth')
|
||||||
manager_for_discovery = profile_managers.get(use_env)
|
manager_for_discovery = profile_managers.get(use_env)
|
||||||
@ -601,35 +649,10 @@ def main_stress_policy(args):
|
|||||||
logger.error(f"Could not determine profile manager for worker pool discovery in mode '{orchestration_mode}/{mode}'.")
|
logger.error(f"Could not determine profile manager for worker pool discovery in mode '{orchestration_mode}/{mode}'.")
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
if not discovery_pattern:
|
discovered_pools = _discover_worker_pools(discovery_config, manager_for_discovery)
|
||||||
logger.error("'worker_pool_discovery' is missing required key 'profile_prefix_pattern'.")
|
if discovered_pools is None:
|
||||||
return 1
|
return 1 # An error occurred during discovery
|
||||||
|
worker_pools = discovered_pools
|
||||||
logger.info(f"Discovering worker pools from profile prefixes matching '{discovery_pattern}'...")
|
|
||||||
try:
|
|
||||||
all_profiles = manager_for_discovery.list_profiles()
|
|
||||||
found_prefixes = set()
|
|
||||||
for profile in all_profiles:
|
|
||||||
profile_name = profile['name']
|
|
||||||
if fnmatch.fnmatch(profile_name, discovery_pattern):
|
|
||||||
# Assuming standard name format like 'user31_001', extract 'user31'
|
|
||||||
prefix = profile_name.rsplit('_', 1)[0]
|
|
||||||
found_prefixes.add(prefix)
|
|
||||||
|
|
||||||
if not found_prefixes:
|
|
||||||
logger.warning(f"Worker pool discovery found no profiles matching pattern '{discovery_pattern}'. No workers will be started.")
|
|
||||||
worker_pools = []
|
|
||||||
else:
|
|
||||||
worker_pools = []
|
|
||||||
for prefix in sorted(list(found_prefixes)):
|
|
||||||
worker_pools.append({
|
|
||||||
'profile_prefix': prefix,
|
|
||||||
'workers': workers_per_group
|
|
||||||
})
|
|
||||||
logger.info(f"Discovered {len(found_prefixes)} profile groups, creating {workers_per_group} worker(s) for each: {', '.join(sorted(list(found_prefixes)))}")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to discover profile groups from Redis: {e}", exc_info=True)
|
|
||||||
return 1
|
|
||||||
|
|
||||||
if not worker_pools and exec_control.get('workers'):
|
if not worker_pools and exec_control.get('workers'):
|
||||||
# Fallback for legacy 'workers: N' config
|
# Fallback for legacy 'workers: N' config
|
||||||
@ -641,29 +664,65 @@ def main_stress_policy(args):
|
|||||||
return 1
|
return 1
|
||||||
|
|
||||||
if args.profile_prefix:
|
if args.profile_prefix:
|
||||||
logger.info(f"CLI --profile-prefix '{args.profile_prefix}' is set, it will override any prefixes defined in worker_pools.")
|
logger.info(f"CLI --profile-prefix '{args.profile_prefix}' is set. Filtering worker pools to match this prefix.")
|
||||||
|
cli_prefixes = {p.strip() for p in args.profile_prefix.split(',') if p.strip()}
|
||||||
|
|
||||||
|
original_pool_count = len(worker_pools)
|
||||||
|
filtered_pools = []
|
||||||
for pool in worker_pools:
|
for pool in worker_pools:
|
||||||
pool['profile_prefix'] = args.profile_prefix
|
pool_prefixes_str = pool.get('profile_prefix', '')
|
||||||
|
if not pool_prefixes_str:
|
||||||
|
continue
|
||||||
|
pool_prefixes = {p.strip() for p in pool_prefixes_str.split(',') if p.strip()}
|
||||||
|
if pool_prefixes.intersection(cli_prefixes):
|
||||||
|
filtered_pools.append(pool)
|
||||||
|
|
||||||
|
if len(filtered_pools) < original_pool_count:
|
||||||
|
logger.info(f"Filtered {original_pool_count - len(filtered_pools)} pool(s) out based on --profile-prefix.")
|
||||||
|
|
||||||
|
worker_pools = filtered_pools
|
||||||
|
|
||||||
|
if not worker_pools:
|
||||||
|
logger.warning(f"After filtering with --profile-prefix '{args.profile_prefix}', no worker pools remain.")
|
||||||
|
|
||||||
worker_specs = []
|
worker_specs = []
|
||||||
worker_id_counter = 0
|
if hasattr(args, 'workers') and args.workers == 1 and len(worker_pools) > 1:
|
||||||
for pool in worker_pools:
|
logger.info("Single worker mode: aggregating all discovered profile groups for the worker.")
|
||||||
pool_workers = pool.get('workers', 1)
|
all_prefixes = []
|
||||||
prefix_str = pool.get('profile_prefix', '')
|
for pool in worker_pools:
|
||||||
prefixes = [p.strip() for p in prefix_str.split(',') if p.strip()]
|
prefix_str = pool.get('profile_prefix', '')
|
||||||
if not prefixes:
|
if prefix_str:
|
||||||
logger.warning(f"Worker pool has no profile_prefix. Skipping: {pool}")
|
all_prefixes.extend(p.strip() for p in prefix_str.split(',') if p.strip())
|
||||||
continue
|
final_prefix_str = ','.join(sorted(list(set(all_prefixes))))
|
||||||
|
logger.info(f"Single worker will manage profile groups: {final_prefix_str}")
|
||||||
|
|
||||||
|
worker_policy = deepcopy(policy)
|
||||||
|
worker_policy.setdefault('info_json_generation_policy', {})['profile_prefix'] = final_prefix_str
|
||||||
|
worker_specs.append({
|
||||||
|
'func': run_direct_batch_worker,
|
||||||
|
'args': (0, worker_policy, state_manager, args, profile_manager_instance, urls_list, running_processes, process_lock)
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
worker_id_counter = 0
|
||||||
|
for pool in worker_pools:
|
||||||
|
if hasattr(args, 'workers') and args.workers is not None and len(worker_specs) >= args.workers:
|
||||||
|
break
|
||||||
|
pool_workers = pool.get('workers', 1)
|
||||||
|
prefix_str = pool.get('profile_prefix', '')
|
||||||
|
if not prefix_str:
|
||||||
|
logger.warning(f"Worker pool has no profile_prefix. Skipping: {pool}")
|
||||||
|
continue
|
||||||
|
|
||||||
for i in range(pool_workers):
|
for i in range(pool_workers):
|
||||||
assigned_prefix = prefixes[i % len(prefixes)]
|
if hasattr(args, 'workers') and args.workers is not None and len(worker_specs) >= args.workers:
|
||||||
worker_policy = deepcopy(policy)
|
break
|
||||||
worker_policy.setdefault('info_json_generation_policy', {})['profile_prefix'] = assigned_prefix
|
worker_policy = deepcopy(policy)
|
||||||
worker_specs.append({
|
worker_policy.setdefault('info_json_generation_policy', {})['profile_prefix'] = prefix_str
|
||||||
'func': run_direct_batch_worker,
|
worker_specs.append({
|
||||||
'args': (worker_id_counter, worker_policy, state_manager, args, profile_manager_instance, urls_list, running_processes, process_lock)
|
'func': run_direct_batch_worker,
|
||||||
})
|
'args': (worker_id_counter, worker_policy, state_manager, args, profile_manager_instance, urls_list, running_processes, process_lock)
|
||||||
worker_id_counter += 1
|
})
|
||||||
|
worker_id_counter += 1
|
||||||
|
|
||||||
with concurrent.futures.ThreadPoolExecutor(max_workers=len(worker_specs)) as executor:
|
with concurrent.futures.ThreadPoolExecutor(max_workers=len(worker_specs)) as executor:
|
||||||
futures = [executor.submit(spec['func'], *spec['args']) for spec in worker_specs]
|
futures = [executor.submit(spec['func'], *spec['args']) for spec in worker_specs]
|
||||||
@ -742,6 +801,7 @@ def main_stress_policy(args):
|
|||||||
if start_index > 0:
|
if start_index > 0:
|
||||||
logger.info(f"Starting/resuming from URL index {start_index + 1}.")
|
logger.info(f"Starting/resuming from URL index {start_index + 1}.")
|
||||||
|
|
||||||
|
logger.info(f"Task source file: {os.path.abspath(urls_file)}")
|
||||||
sp_utils.display_effective_policy(policy, policy_name, args, sources=urls_list)
|
sp_utils.display_effective_policy(policy, policy_name, args, sources=urls_list)
|
||||||
if args.dry_run: return 0
|
if args.dry_run: return 0
|
||||||
|
|
||||||
@ -749,50 +809,20 @@ def main_stress_policy(args):
|
|||||||
worker_pools = exec_control.get('worker_pools', [])
|
worker_pools = exec_control.get('worker_pools', [])
|
||||||
discovery_config = exec_control.get('worker_pool_discovery')
|
discovery_config = exec_control.get('worker_pool_discovery')
|
||||||
|
|
||||||
if discovery_config:
|
if discovery_config and not worker_pools:
|
||||||
if worker_pools:
|
logger.info("Explicit 'worker_pools' not defined. Using 'worker_pool_discovery' as a fallback.")
|
||||||
logger.warning("Both 'worker_pools' and 'worker_pool_discovery' are defined. 'worker_pool_discovery' will take precedence.")
|
|
||||||
|
|
||||||
discovery_pattern = discovery_config.get('profile_prefix_pattern')
|
|
||||||
workers_per_group = discovery_config.get('workers_per_profile_group', 1)
|
|
||||||
|
|
||||||
direct_policy = policy.get('direct_docker_cli_policy', {})
|
direct_policy = policy.get('direct_docker_cli_policy', {})
|
||||||
use_env = direct_policy.get('use_profile_env', 'auth' if mode == 'fetch_only' else 'download')
|
use_env = direct_policy.get('use_profile_env', 'auth' if mode == 'fetch_only' else 'download')
|
||||||
manager_for_discovery = profile_managers.get(use_env)
|
manager_for_discovery = profile_managers.get(use_env)
|
||||||
|
|
||||||
if not manager_for_discovery:
|
if not manager_for_discovery:
|
||||||
logger.error(f"Could not determine profile manager for worker pool discovery in mode '{orchestration_mode}/{mode}'.")
|
logger.error(f"Could not determine profile manager for worker pool discovery in mode '{orchestration_mode}/{mode}'.")
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
if not discovery_pattern:
|
discovered_pools = _discover_worker_pools(discovery_config, manager_for_discovery)
|
||||||
logger.error("'worker_pool_discovery' is missing required key 'profile_prefix_pattern'.")
|
if discovered_pools is None:
|
||||||
return 1
|
return 1 # An error occurred
|
||||||
|
worker_pools = discovered_pools
|
||||||
logger.info(f"Discovering worker pools from profile prefixes matching '{discovery_pattern}'...")
|
|
||||||
try:
|
|
||||||
all_profiles = manager_for_discovery.list_profiles()
|
|
||||||
found_prefixes = set()
|
|
||||||
for profile in all_profiles:
|
|
||||||
profile_name = profile['name']
|
|
||||||
if fnmatch.fnmatch(profile_name, discovery_pattern):
|
|
||||||
# Assuming standard name format like 'user31_001', extract 'user31'
|
|
||||||
prefix = profile_name.rsplit('_', 1)[0]
|
|
||||||
found_prefixes.add(prefix)
|
|
||||||
|
|
||||||
if not found_prefixes:
|
|
||||||
logger.warning(f"Worker pool discovery found no profiles matching pattern '{discovery_pattern}'. No workers will be started.")
|
|
||||||
worker_pools = []
|
|
||||||
else:
|
|
||||||
worker_pools = []
|
|
||||||
for prefix in sorted(list(found_prefixes)):
|
|
||||||
worker_pools.append({
|
|
||||||
'profile_prefix': prefix,
|
|
||||||
'workers': workers_per_group
|
|
||||||
})
|
|
||||||
logger.info(f"Discovered {len(found_prefixes)} profile groups, creating {workers_per_group} worker(s) for each: {', '.join(sorted(list(found_prefixes)))}")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to discover profile groups from Redis: {e}", exc_info=True)
|
|
||||||
return 1
|
|
||||||
|
|
||||||
if not worker_pools and exec_control.get('workers'):
|
if not worker_pools and exec_control.get('workers'):
|
||||||
# Fallback for legacy 'workers: N' config
|
# Fallback for legacy 'workers: N' config
|
||||||
@ -804,30 +834,67 @@ def main_stress_policy(args):
|
|||||||
return 1
|
return 1
|
||||||
|
|
||||||
if args.profile_prefix:
|
if args.profile_prefix:
|
||||||
logger.info(f"CLI --profile-prefix '{args.profile_prefix}' is set, it will override any prefixes defined in worker_pools.")
|
logger.info(f"CLI --profile-prefix '{args.profile_prefix}' is set. Filtering worker pools to match this prefix.")
|
||||||
|
cli_prefixes = {p.strip() for p in args.profile_prefix.split(',') if p.strip()}
|
||||||
|
|
||||||
|
original_pool_count = len(worker_pools)
|
||||||
|
filtered_pools = []
|
||||||
for pool in worker_pools:
|
for pool in worker_pools:
|
||||||
pool['profile_prefix'] = args.profile_prefix
|
pool_prefixes_str = pool.get('profile_prefix', '')
|
||||||
|
if not pool_prefixes_str:
|
||||||
|
continue
|
||||||
|
pool_prefixes = {p.strip() for p in pool_prefixes_str.split(',') if p.strip()}
|
||||||
|
if pool_prefixes.intersection(cli_prefixes):
|
||||||
|
filtered_pools.append(pool)
|
||||||
|
|
||||||
|
if len(filtered_pools) < original_pool_count:
|
||||||
|
logger.info(f"Filtered {original_pool_count - len(filtered_pools)} pool(s) out based on --profile-prefix.")
|
||||||
|
|
||||||
|
worker_pools = filtered_pools
|
||||||
|
|
||||||
|
if not worker_pools:
|
||||||
|
logger.warning(f"After filtering with --profile-prefix '{args.profile_prefix}', no worker pools remain.")
|
||||||
|
|
||||||
worker_specs = []
|
worker_specs = []
|
||||||
worker_id_counter = 0
|
if hasattr(args, 'workers') and args.workers == 1 and len(worker_pools) > 1:
|
||||||
for pool in worker_pools:
|
logger.info("Single worker mode: aggregating all discovered profile groups for the worker.")
|
||||||
pool_workers = pool.get('workers', 1)
|
all_prefixes = []
|
||||||
prefix_str = pool.get('profile_prefix', '')
|
for pool in worker_pools:
|
||||||
prefixes = [p.strip() for p in prefix_str.split(',') if p.strip()]
|
prefix_str = pool.get('profile_prefix', '')
|
||||||
if not prefixes:
|
if prefix_str:
|
||||||
logger.warning(f"Worker pool has no profile_prefix. Skipping: {pool}")
|
all_prefixes.extend(p.strip() for p in prefix_str.split(',') if p.strip())
|
||||||
continue
|
final_prefix_str = ','.join(sorted(list(set(all_prefixes))))
|
||||||
|
logger.info(f"Single worker will manage profile groups: {final_prefix_str}")
|
||||||
|
|
||||||
# Each worker in the pool gets the full list of prefixes from the pool configuration.
|
worker_policy = deepcopy(policy)
|
||||||
for i in range(pool_workers):
|
worker_policy.setdefault('info_json_generation_policy', {})['profile_prefix'] = final_prefix_str
|
||||||
worker_policy = deepcopy(policy)
|
worker_specs.append({
|
||||||
# The worker functions will now handle a comma-separated list of prefixes.
|
'func': run_direct_docker_worker,
|
||||||
worker_policy.setdefault('info_json_generation_policy', {})['profile_prefix'] = prefix_str
|
'args': (0, worker_policy, state_manager, args, profile_manager_instance, urls_list, running_processes, process_lock)
|
||||||
worker_specs.append({
|
})
|
||||||
'func': run_direct_docker_worker,
|
else:
|
||||||
'args': (worker_id_counter, worker_policy, state_manager, args, profile_manager_instance, urls_list, running_processes, process_lock)
|
worker_id_counter = 0
|
||||||
})
|
for pool in worker_pools:
|
||||||
worker_id_counter += 1
|
if hasattr(args, 'workers') and args.workers is not None and len(worker_specs) >= args.workers:
|
||||||
|
break
|
||||||
|
pool_workers = pool.get('workers', 1)
|
||||||
|
prefix_str = pool.get('profile_prefix', '')
|
||||||
|
if not prefix_str:
|
||||||
|
logger.warning(f"Worker pool has no profile_prefix. Skipping: {pool}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Each worker in the pool gets the full list of prefixes from the pool configuration.
|
||||||
|
for i in range(pool_workers):
|
||||||
|
if hasattr(args, 'workers') and args.workers is not None and len(worker_specs) >= args.workers:
|
||||||
|
break
|
||||||
|
worker_policy = deepcopy(policy)
|
||||||
|
# The worker functions will now handle a comma-separated list of prefixes.
|
||||||
|
worker_policy.setdefault('info_json_generation_policy', {})['profile_prefix'] = prefix_str
|
||||||
|
worker_specs.append({
|
||||||
|
'func': run_direct_docker_worker,
|
||||||
|
'args': (worker_id_counter, worker_policy, state_manager, args, profile_manager_instance, urls_list, running_processes, process_lock)
|
||||||
|
})
|
||||||
|
worker_id_counter += 1
|
||||||
|
|
||||||
with concurrent.futures.ThreadPoolExecutor(max_workers=len(worker_specs)) as executor:
|
with concurrent.futures.ThreadPoolExecutor(max_workers=len(worker_specs)) as executor:
|
||||||
futures = [executor.submit(spec['func'], *spec['args']) for spec in worker_specs]
|
futures = [executor.submit(spec['func'], *spec['args']) for spec in worker_specs]
|
||||||
@ -847,6 +914,7 @@ def main_stress_policy(args):
|
|||||||
logger.error(f"Failed to create info.json directory '{info_json_dir}': {e}")
|
logger.error(f"Failed to create info.json directory '{info_json_dir}': {e}")
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
|
logger.info(f"Task source directory: {os.path.abspath(info_json_dir)}")
|
||||||
sp_utils.display_effective_policy(policy, policy_name, args, sources=[])
|
sp_utils.display_effective_policy(policy, policy_name, args, sources=[])
|
||||||
if args.dry_run: return 0
|
if args.dry_run: return 0
|
||||||
|
|
||||||
@ -854,50 +922,20 @@ def main_stress_policy(args):
|
|||||||
worker_pools = exec_control.get('worker_pools', [])
|
worker_pools = exec_control.get('worker_pools', [])
|
||||||
discovery_config = exec_control.get('worker_pool_discovery')
|
discovery_config = exec_control.get('worker_pool_discovery')
|
||||||
|
|
||||||
if discovery_config:
|
if discovery_config and not worker_pools:
|
||||||
if worker_pools:
|
logger.info("Explicit 'worker_pools' not defined. Using 'worker_pool_discovery' as a fallback.")
|
||||||
logger.warning("Both 'worker_pools' and 'worker_pool_discovery' are defined. 'worker_pool_discovery' will take precedence.")
|
|
||||||
|
|
||||||
discovery_pattern = discovery_config.get('profile_prefix_pattern')
|
|
||||||
workers_per_group = discovery_config.get('workers_per_profile_group', 1)
|
|
||||||
|
|
||||||
direct_policy = policy.get('direct_docker_cli_policy', {})
|
direct_policy = policy.get('direct_docker_cli_policy', {})
|
||||||
use_env = direct_policy.get('use_profile_env', 'auth' if mode == 'fetch_only' else 'download')
|
use_env = direct_policy.get('use_profile_env', 'auth' if mode == 'fetch_only' else 'download')
|
||||||
manager_for_discovery = profile_managers.get(use_env)
|
manager_for_discovery = profile_managers.get(use_env)
|
||||||
|
|
||||||
if not manager_for_discovery:
|
if not manager_for_discovery:
|
||||||
logger.error(f"Could not determine profile manager for worker pool discovery in mode '{orchestration_mode}/{mode}'.")
|
logger.error(f"Could not determine profile manager for worker pool discovery in mode '{orchestration_mode}/{mode}'.")
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
if not discovery_pattern:
|
discovered_pools = _discover_worker_pools(discovery_config, manager_for_discovery)
|
||||||
logger.error("'worker_pool_discovery' is missing required key 'profile_prefix_pattern'.")
|
if discovered_pools is None:
|
||||||
return 1
|
return 1 # An error occurred
|
||||||
|
worker_pools = discovered_pools
|
||||||
logger.info(f"Discovering worker pools from profile prefixes matching '{discovery_pattern}'...")
|
|
||||||
try:
|
|
||||||
all_profiles = manager_for_discovery.list_profiles()
|
|
||||||
found_prefixes = set()
|
|
||||||
for profile in all_profiles:
|
|
||||||
profile_name = profile['name']
|
|
||||||
if fnmatch.fnmatch(profile_name, discovery_pattern):
|
|
||||||
# Assuming standard name format like 'user31_001', extract 'user31'
|
|
||||||
prefix = profile_name.rsplit('_', 1)[0]
|
|
||||||
found_prefixes.add(prefix)
|
|
||||||
|
|
||||||
if not found_prefixes:
|
|
||||||
logger.warning(f"Worker pool discovery found no profiles matching pattern '{discovery_pattern}'. No workers will be started.")
|
|
||||||
worker_pools = []
|
|
||||||
else:
|
|
||||||
worker_pools = []
|
|
||||||
for prefix in sorted(list(found_prefixes)):
|
|
||||||
worker_pools.append({
|
|
||||||
'profile_prefix': prefix,
|
|
||||||
'workers': workers_per_group
|
|
||||||
})
|
|
||||||
logger.info(f"Discovered {len(found_prefixes)} profile groups, creating {workers_per_group} worker(s) for each: {', '.join(sorted(list(found_prefixes)))}")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to discover profile groups from Redis: {e}", exc_info=True)
|
|
||||||
return 1
|
|
||||||
|
|
||||||
if not worker_pools and exec_control.get('workers'):
|
if not worker_pools and exec_control.get('workers'):
|
||||||
# Fallback for legacy 'workers: N' config
|
# Fallback for legacy 'workers: N' config
|
||||||
@ -909,30 +947,67 @@ def main_stress_policy(args):
|
|||||||
return 1
|
return 1
|
||||||
|
|
||||||
if args.profile_prefix:
|
if args.profile_prefix:
|
||||||
logger.info(f"CLI --profile-prefix '{args.profile_prefix}' is set, it will override any prefixes defined in worker_pools.")
|
logger.info(f"CLI --profile-prefix '{args.profile_prefix}' is set. Filtering worker pools to match this prefix.")
|
||||||
|
cli_prefixes = {p.strip() for p in args.profile_prefix.split(',') if p.strip()}
|
||||||
|
|
||||||
|
original_pool_count = len(worker_pools)
|
||||||
|
filtered_pools = []
|
||||||
for pool in worker_pools:
|
for pool in worker_pools:
|
||||||
pool['profile_prefix'] = args.profile_prefix
|
pool_prefixes_str = pool.get('profile_prefix', '')
|
||||||
|
if not pool_prefixes_str:
|
||||||
|
continue
|
||||||
|
pool_prefixes = {p.strip() for p in pool_prefixes_str.split(',') if p.strip()}
|
||||||
|
if pool_prefixes.intersection(cli_prefixes):
|
||||||
|
filtered_pools.append(pool)
|
||||||
|
|
||||||
|
if len(filtered_pools) < original_pool_count:
|
||||||
|
logger.info(f"Filtered {original_pool_count - len(filtered_pools)} pool(s) out based on --profile-prefix.")
|
||||||
|
|
||||||
|
worker_pools = filtered_pools
|
||||||
|
|
||||||
|
if not worker_pools:
|
||||||
|
logger.warning(f"After filtering with --profile-prefix '{args.profile_prefix}', no worker pools remain.")
|
||||||
|
|
||||||
worker_specs = []
|
worker_specs = []
|
||||||
worker_id_counter = 0
|
if hasattr(args, 'workers') and args.workers == 1 and len(worker_pools) > 1:
|
||||||
for pool in worker_pools:
|
logger.info("Single worker mode: aggregating all discovered profile groups for the worker.")
|
||||||
pool_workers = pool.get('workers', 1)
|
all_prefixes = []
|
||||||
prefix_str = pool.get('profile_prefix', '')
|
for pool in worker_pools:
|
||||||
prefixes = [p.strip() for p in prefix_str.split(',') if p.strip()]
|
prefix_str = pool.get('profile_prefix', '')
|
||||||
if not prefixes:
|
if prefix_str:
|
||||||
logger.warning(f"Worker pool has no profile_prefix. Skipping: {pool}")
|
all_prefixes.extend(p.strip() for p in prefix_str.split(',') if p.strip())
|
||||||
continue
|
final_prefix_str = ','.join(sorted(list(set(all_prefixes))))
|
||||||
|
logger.info(f"Single worker will manage profile groups: {final_prefix_str}")
|
||||||
|
|
||||||
# Each worker in the pool gets the full list of prefixes from the pool configuration.
|
worker_policy = deepcopy(policy)
|
||||||
for i in range(pool_workers):
|
worker_policy.setdefault('download_policy', {})['profile_prefix'] = final_prefix_str
|
||||||
worker_policy = deepcopy(policy)
|
worker_specs.append({
|
||||||
# The worker functions will now handle a comma-separated list of prefixes.
|
'func': run_direct_docker_download_worker,
|
||||||
worker_policy.setdefault('download_policy', {})['profile_prefix'] = prefix_str
|
'args': (0, worker_policy, state_manager, args, profile_manager_instance, running_processes, process_lock)
|
||||||
worker_specs.append({
|
})
|
||||||
'func': run_direct_docker_download_worker,
|
else:
|
||||||
'args': (worker_id_counter, worker_policy, state_manager, args, profile_manager_instance, running_processes, process_lock)
|
worker_id_counter = 0
|
||||||
})
|
for pool in worker_pools:
|
||||||
worker_id_counter += 1
|
if hasattr(args, 'workers') and args.workers is not None and len(worker_specs) >= args.workers:
|
||||||
|
break
|
||||||
|
pool_workers = pool.get('workers', 1)
|
||||||
|
prefix_str = pool.get('profile_prefix', '')
|
||||||
|
if not prefix_str:
|
||||||
|
logger.warning(f"Worker pool has no profile_prefix. Skipping: {pool}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Each worker in the pool gets the full list of prefixes from the pool configuration.
|
||||||
|
for i in range(pool_workers):
|
||||||
|
if hasattr(args, 'workers') and args.workers is not None and len(worker_specs) >= args.workers:
|
||||||
|
break
|
||||||
|
worker_policy = deepcopy(policy)
|
||||||
|
# The worker functions will now handle a comma-separated list of prefixes.
|
||||||
|
worker_policy.setdefault('download_policy', {})['profile_prefix'] = prefix_str
|
||||||
|
worker_specs.append({
|
||||||
|
'func': run_direct_docker_download_worker,
|
||||||
|
'args': (worker_id_counter, worker_policy, state_manager, args, profile_manager_instance, running_processes, process_lock)
|
||||||
|
})
|
||||||
|
worker_id_counter += 1
|
||||||
|
|
||||||
with concurrent.futures.ThreadPoolExecutor(max_workers=len(worker_specs)) as executor:
|
with concurrent.futures.ThreadPoolExecutor(max_workers=len(worker_specs)) as executor:
|
||||||
futures = [executor.submit(spec['func'], *spec['args']) for spec in worker_specs]
|
futures = [executor.submit(spec['func'], *spec['args']) for spec in worker_specs]
|
||||||
@ -968,6 +1043,7 @@ def main_stress_policy(args):
|
|||||||
logger.error(f"Failed to create info.json directory '{info_json_dir}': {e}")
|
logger.error(f"Failed to create info.json directory '{info_json_dir}': {e}")
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
|
logger.info(f"Task source directory: {os.path.abspath(info_json_dir)}")
|
||||||
sp_utils.display_effective_policy(policy, policy_name, args, sources=[])
|
sp_utils.display_effective_policy(policy, policy_name, args, sources=[])
|
||||||
if args.dry_run: return 0
|
if args.dry_run: return 0
|
||||||
|
|
||||||
@ -975,48 +1051,18 @@ def main_stress_policy(args):
|
|||||||
worker_pools = exec_control.get('worker_pools', [])
|
worker_pools = exec_control.get('worker_pools', [])
|
||||||
discovery_config = exec_control.get('worker_pool_discovery')
|
discovery_config = exec_control.get('worker_pool_discovery')
|
||||||
|
|
||||||
if discovery_config:
|
if discovery_config and not worker_pools:
|
||||||
if worker_pools:
|
logger.info("Explicit 'worker_pools' not defined. Using 'worker_pool_discovery' as a fallback.")
|
||||||
logger.warning("Both 'worker_pools' and 'worker_pool_discovery' are defined. 'worker_pool_discovery' will take precedence.")
|
|
||||||
|
|
||||||
discovery_pattern = discovery_config.get('profile_prefix_pattern')
|
|
||||||
workers_per_group = discovery_config.get('workers_per_profile_group', 1)
|
|
||||||
|
|
||||||
manager_for_discovery = profile_managers.get('download')
|
manager_for_discovery = profile_managers.get('download')
|
||||||
|
|
||||||
if not manager_for_discovery:
|
if not manager_for_discovery:
|
||||||
logger.error(f"Could not determine profile manager for worker pool discovery in mode '{orchestration_mode}/{mode}'.")
|
logger.error(f"Could not determine profile manager for worker pool discovery in mode '{orchestration_mode}/{mode}'.")
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
if not discovery_pattern:
|
discovered_pools = _discover_worker_pools(discovery_config, manager_for_discovery)
|
||||||
logger.error("'worker_pool_discovery' is missing required key 'profile_prefix_pattern'.")
|
if discovered_pools is None:
|
||||||
return 1
|
return 1 # An error occurred
|
||||||
|
worker_pools = discovered_pools
|
||||||
logger.info(f"Discovering worker pools from profile prefixes matching '{discovery_pattern}'...")
|
|
||||||
try:
|
|
||||||
all_profiles = manager_for_discovery.list_profiles()
|
|
||||||
found_prefixes = set()
|
|
||||||
for profile in all_profiles:
|
|
||||||
profile_name = profile['name']
|
|
||||||
if fnmatch.fnmatch(profile_name, discovery_pattern):
|
|
||||||
# Assuming standard name format like 'user31_001', extract 'user31'
|
|
||||||
prefix = profile_name.rsplit('_', 1)[0]
|
|
||||||
found_prefixes.add(prefix)
|
|
||||||
|
|
||||||
if not found_prefixes:
|
|
||||||
logger.warning(f"Worker pool discovery found no profiles matching pattern '{discovery_pattern}'. No workers will be started.")
|
|
||||||
worker_pools = []
|
|
||||||
else:
|
|
||||||
worker_pools = []
|
|
||||||
for prefix in sorted(list(found_prefixes)):
|
|
||||||
worker_pools.append({
|
|
||||||
'profile_prefix': prefix,
|
|
||||||
'workers': workers_per_group
|
|
||||||
})
|
|
||||||
logger.info(f"Discovered {len(found_prefixes)} profile groups, creating {workers_per_group} worker(s) for each: {', '.join(sorted(list(found_prefixes)))}")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to discover profile groups from Redis: {e}", exc_info=True)
|
|
||||||
return 1
|
|
||||||
|
|
||||||
if not worker_pools and exec_control.get('workers'):
|
if not worker_pools and exec_control.get('workers'):
|
||||||
# Fallback for legacy 'workers: N' config
|
# Fallback for legacy 'workers: N' config
|
||||||
@ -1028,30 +1074,67 @@ def main_stress_policy(args):
|
|||||||
return 1
|
return 1
|
||||||
|
|
||||||
if args.profile_prefix:
|
if args.profile_prefix:
|
||||||
logger.info(f"CLI --profile-prefix '{args.profile_prefix}' is set, it will override any prefixes defined in worker_pools.")
|
logger.info(f"CLI --profile-prefix '{args.profile_prefix}' is set. Filtering worker pools to match this prefix.")
|
||||||
|
cli_prefixes = {p.strip() for p in args.profile_prefix.split(',') if p.strip()}
|
||||||
|
|
||||||
|
original_pool_count = len(worker_pools)
|
||||||
|
filtered_pools = []
|
||||||
for pool in worker_pools:
|
for pool in worker_pools:
|
||||||
pool['profile_prefix'] = args.profile_prefix
|
pool_prefixes_str = pool.get('profile_prefix', '')
|
||||||
|
if not pool_prefixes_str:
|
||||||
|
continue
|
||||||
|
pool_prefixes = {p.strip() for p in pool_prefixes_str.split(',') if p.strip()}
|
||||||
|
if pool_prefixes.intersection(cli_prefixes):
|
||||||
|
filtered_pools.append(pool)
|
||||||
|
|
||||||
|
if len(filtered_pools) < original_pool_count:
|
||||||
|
logger.info(f"Filtered {original_pool_count - len(filtered_pools)} pool(s) out based on --profile-prefix.")
|
||||||
|
|
||||||
|
worker_pools = filtered_pools
|
||||||
|
|
||||||
|
if not worker_pools:
|
||||||
|
logger.warning(f"After filtering with --profile-prefix '{args.profile_prefix}', no worker pools remain.")
|
||||||
|
|
||||||
worker_specs = []
|
worker_specs = []
|
||||||
worker_id_counter = 0
|
if hasattr(args, 'workers') and args.workers == 1 and len(worker_pools) > 1:
|
||||||
for pool in worker_pools:
|
logger.info("Single worker mode: aggregating all discovered profile groups for the worker.")
|
||||||
pool_workers = pool.get('workers', 1)
|
all_prefixes = []
|
||||||
prefix_str = pool.get('profile_prefix', '')
|
for pool in worker_pools:
|
||||||
prefixes = [p.strip() for p in prefix_str.split(',') if p.strip()]
|
prefix_str = pool.get('profile_prefix', '')
|
||||||
if not prefixes:
|
if prefix_str:
|
||||||
logger.warning(f"Worker pool has no profile_prefix. Skipping: {pool}")
|
all_prefixes.extend(p.strip() for p in prefix_str.split(',') if p.strip())
|
||||||
continue
|
final_prefix_str = ','.join(sorted(list(set(all_prefixes))))
|
||||||
|
logger.info(f"Single worker will manage profile groups: {final_prefix_str}")
|
||||||
|
|
||||||
# Each worker in the pool gets the full list of prefixes from the pool configuration.
|
worker_policy = deepcopy(policy)
|
||||||
for i in range(pool_workers):
|
worker_policy.setdefault('download_policy', {})['profile_prefix'] = final_prefix_str
|
||||||
worker_policy = deepcopy(policy)
|
worker_specs.append({
|
||||||
# The worker functions will now handle a comma-separated list of prefixes.
|
'func': run_direct_download_worker,
|
||||||
worker_policy.setdefault('download_policy', {})['profile_prefix'] = prefix_str
|
'args': (0, worker_policy, state_manager, args, download_manager, running_processes, process_lock)
|
||||||
worker_specs.append({
|
})
|
||||||
'func': run_direct_download_worker,
|
else:
|
||||||
'args': (worker_id_counter, worker_policy, state_manager, args, download_manager, running_processes, process_lock)
|
worker_id_counter = 0
|
||||||
})
|
for pool in worker_pools:
|
||||||
worker_id_counter += 1
|
if hasattr(args, 'workers') and args.workers is not None and len(worker_specs) >= args.workers:
|
||||||
|
break
|
||||||
|
pool_workers = pool.get('workers', 1)
|
||||||
|
prefix_str = pool.get('profile_prefix', '')
|
||||||
|
if not prefix_str:
|
||||||
|
logger.warning(f"Worker pool has no profile_prefix. Skipping: {pool}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Each worker in the pool gets the full list of prefixes from the pool configuration.
|
||||||
|
for i in range(pool_workers):
|
||||||
|
if hasattr(args, 'workers') and args.workers is not None and len(worker_specs) >= args.workers:
|
||||||
|
break
|
||||||
|
worker_policy = deepcopy(policy)
|
||||||
|
# The worker functions will now handle a comma-separated list of prefixes.
|
||||||
|
worker_policy.setdefault('download_policy', {})['profile_prefix'] = prefix_str
|
||||||
|
worker_specs.append({
|
||||||
|
'func': run_direct_download_worker,
|
||||||
|
'args': (worker_id_counter, worker_policy, state_manager, args, download_manager, running_processes, process_lock)
|
||||||
|
})
|
||||||
|
worker_id_counter += 1
|
||||||
|
|
||||||
with concurrent.futures.ThreadPoolExecutor(max_workers=len(worker_specs)) as executor:
|
with concurrent.futures.ThreadPoolExecutor(max_workers=len(worker_specs)) as executor:
|
||||||
futures = [executor.submit(spec['func'], *spec['args']) for spec in worker_specs]
|
futures = [executor.submit(spec['func'], *spec['args']) for spec in worker_specs]
|
||||||
@ -1129,48 +1212,18 @@ def main_stress_policy(args):
|
|||||||
worker_pools = exec_control.get('worker_pools', [])
|
worker_pools = exec_control.get('worker_pools', [])
|
||||||
discovery_config = exec_control.get('worker_pool_discovery')
|
discovery_config = exec_control.get('worker_pool_discovery')
|
||||||
|
|
||||||
if discovery_config:
|
if discovery_config and not worker_pools:
|
||||||
if worker_pools:
|
logger.info("Explicit 'worker_pools' not defined. Using 'worker_pool_discovery' as a fallback.")
|
||||||
logger.warning("Both 'worker_pools' and 'worker_pool_discovery' are defined. 'worker_pool_discovery' will take precedence.")
|
|
||||||
|
|
||||||
discovery_pattern = discovery_config.get('profile_prefix_pattern')
|
|
||||||
workers_per_group = discovery_config.get('workers_per_profile_group', 1)
|
|
||||||
|
|
||||||
manager_for_discovery = profile_managers.get('auth')
|
manager_for_discovery = profile_managers.get('auth')
|
||||||
|
|
||||||
if not manager_for_discovery:
|
if not manager_for_discovery:
|
||||||
logger.error(f"Could not determine profile manager for worker pool discovery in mode '{orchestration_mode}/{mode}'.")
|
logger.error(f"Could not determine profile manager for worker pool discovery in mode '{orchestration_mode}/{mode}'.")
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
if not discovery_pattern:
|
discovered_pools = _discover_worker_pools(discovery_config, manager_for_discovery)
|
||||||
logger.error("'worker_pool_discovery' is missing required key 'profile_prefix_pattern'.")
|
if discovered_pools is None:
|
||||||
return 1
|
return 1 # An error occurred
|
||||||
|
worker_pools = discovered_pools
|
||||||
logger.info(f"Discovering worker pools from profile prefixes matching '{discovery_pattern}'...")
|
|
||||||
try:
|
|
||||||
all_profiles = manager_for_discovery.list_profiles()
|
|
||||||
found_prefixes = set()
|
|
||||||
for profile in all_profiles:
|
|
||||||
profile_name = profile['name']
|
|
||||||
if fnmatch.fnmatch(profile_name, discovery_pattern):
|
|
||||||
# Assuming standard name format like 'user31_001', extract 'user31'
|
|
||||||
prefix = profile_name.rsplit('_', 1)[0]
|
|
||||||
found_prefixes.add(prefix)
|
|
||||||
|
|
||||||
if not found_prefixes:
|
|
||||||
logger.warning(f"Worker pool discovery found no profiles matching pattern '{discovery_pattern}'. No workers will be started.")
|
|
||||||
worker_pools = []
|
|
||||||
else:
|
|
||||||
worker_pools = []
|
|
||||||
for prefix in sorted(list(found_prefixes)):
|
|
||||||
worker_pools.append({
|
|
||||||
'profile_prefix': prefix,
|
|
||||||
'workers': workers_per_group
|
|
||||||
})
|
|
||||||
logger.info(f"Discovered {len(found_prefixes)} profile groups, creating {workers_per_group} worker(s) for each: {', '.join(sorted(list(found_prefixes)))}")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to discover profile groups from Redis: {e}", exc_info=True)
|
|
||||||
return 1
|
|
||||||
|
|
||||||
if not worker_pools and exec_control.get('workers'):
|
if not worker_pools and exec_control.get('workers'):
|
||||||
# Fallback for legacy 'workers: N' config
|
# Fallback for legacy 'workers: N' config
|
||||||
@ -1189,6 +1242,8 @@ def main_stress_policy(args):
|
|||||||
worker_specs = []
|
worker_specs = []
|
||||||
worker_id_counter = 0
|
worker_id_counter = 0
|
||||||
for pool in worker_pools:
|
for pool in worker_pools:
|
||||||
|
if hasattr(args, 'workers') and args.workers is not None and len(worker_specs) >= args.workers:
|
||||||
|
break
|
||||||
pool_workers = pool.get('workers', 1)
|
pool_workers = pool.get('workers', 1)
|
||||||
prefix_str = pool.get('profile_prefix', '')
|
prefix_str = pool.get('profile_prefix', '')
|
||||||
prefixes = [p.strip() for p in prefix_str.split(',') if p.strip()]
|
prefixes = [p.strip() for p in prefix_str.split(',') if p.strip()]
|
||||||
@ -1198,6 +1253,8 @@ def main_stress_policy(args):
|
|||||||
|
|
||||||
# Each worker in the pool gets the full list of prefixes from the pool configuration.
|
# Each worker in the pool gets the full list of prefixes from the pool configuration.
|
||||||
for i in range(pool_workers):
|
for i in range(pool_workers):
|
||||||
|
if hasattr(args, 'workers') and args.workers is not None and len(worker_specs) >= args.workers:
|
||||||
|
break
|
||||||
worker_policy = deepcopy(policy)
|
worker_policy = deepcopy(policy)
|
||||||
# The worker functions will now handle a comma-separated list of prefixes.
|
# The worker functions will now handle a comma-separated list of prefixes.
|
||||||
worker_policy.setdefault('info_json_generation_policy', {})['profile_prefix'] = prefix_str
|
worker_policy.setdefault('info_json_generation_policy', {})['profile_prefix'] = prefix_str
|
||||||
@ -1285,48 +1342,18 @@ def main_stress_policy(args):
|
|||||||
worker_pools = exec_control.get('worker_pools', [])
|
worker_pools = exec_control.get('worker_pools', [])
|
||||||
discovery_config = exec_control.get('worker_pool_discovery')
|
discovery_config = exec_control.get('worker_pool_discovery')
|
||||||
|
|
||||||
if discovery_config:
|
if discovery_config and not worker_pools:
|
||||||
if worker_pools:
|
logger.info("Explicit 'worker_pools' not defined. Using 'worker_pool_discovery' as a fallback.")
|
||||||
logger.warning("Both 'worker_pools' and 'worker_pool_discovery' are defined. 'worker_pool_discovery' will take precedence.")
|
|
||||||
|
|
||||||
discovery_pattern = discovery_config.get('profile_prefix_pattern')
|
|
||||||
workers_per_group = discovery_config.get('workers_per_profile_group', 1)
|
|
||||||
|
|
||||||
manager_for_discovery = profile_managers.get('download')
|
manager_for_discovery = profile_managers.get('download')
|
||||||
|
|
||||||
if not manager_for_discovery:
|
if not manager_for_discovery:
|
||||||
logger.error(f"Could not determine profile manager for worker pool discovery in mode '{orchestration_mode}/{mode}'.")
|
logger.error(f"Could not determine profile manager for worker pool discovery in mode '{orchestration_mode}/{mode}'.")
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
if not discovery_pattern:
|
discovered_pools = _discover_worker_pools(discovery_config, manager_for_discovery)
|
||||||
logger.error("'worker_pool_discovery' is missing required key 'profile_prefix_pattern'.")
|
if discovered_pools is None:
|
||||||
return 1
|
return 1 # An error occurred
|
||||||
|
worker_pools = discovered_pools
|
||||||
logger.info(f"Discovering worker pools from profile prefixes matching '{discovery_pattern}'...")
|
|
||||||
try:
|
|
||||||
all_profiles = manager_for_discovery.list_profiles()
|
|
||||||
found_prefixes = set()
|
|
||||||
for profile in all_profiles:
|
|
||||||
profile_name = profile['name']
|
|
||||||
if fnmatch.fnmatch(profile_name, discovery_pattern):
|
|
||||||
# Assuming standard name format like 'user31_001', extract 'user31'
|
|
||||||
prefix = profile_name.rsplit('_', 1)[0]
|
|
||||||
found_prefixes.add(prefix)
|
|
||||||
|
|
||||||
if not found_prefixes:
|
|
||||||
logger.warning(f"Worker pool discovery found no profiles matching pattern '{discovery_pattern}'. No workers will be started.")
|
|
||||||
worker_pools = []
|
|
||||||
else:
|
|
||||||
worker_pools = []
|
|
||||||
for prefix in sorted(list(found_prefixes)):
|
|
||||||
worker_pools.append({
|
|
||||||
'profile_prefix': prefix,
|
|
||||||
'workers': workers_per_group
|
|
||||||
})
|
|
||||||
logger.info(f"Discovered {len(found_prefixes)} profile groups, creating {workers_per_group} worker(s) for each: {', '.join(sorted(list(found_prefixes)))}")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to discover profile groups from Redis: {e}", exc_info=True)
|
|
||||||
return 1
|
|
||||||
|
|
||||||
if not worker_pools and exec_control.get('workers'):
|
if not worker_pools and exec_control.get('workers'):
|
||||||
# Fallback for legacy 'workers: N' config
|
# Fallback for legacy 'workers: N' config
|
||||||
@ -1345,6 +1372,8 @@ def main_stress_policy(args):
|
|||||||
worker_specs = []
|
worker_specs = []
|
||||||
worker_id_counter = 0
|
worker_id_counter = 0
|
||||||
for pool in worker_pools:
|
for pool in worker_pools:
|
||||||
|
if hasattr(args, 'workers') and args.workers is not None and len(worker_specs) >= args.workers:
|
||||||
|
break
|
||||||
pool_workers = pool.get('workers', 1)
|
pool_workers = pool.get('workers', 1)
|
||||||
prefix_str = pool.get('profile_prefix', '')
|
prefix_str = pool.get('profile_prefix', '')
|
||||||
prefixes = [p.strip() for p in prefix_str.split(',') if p.strip()]
|
prefixes = [p.strip() for p in prefix_str.split(',') if p.strip()]
|
||||||
@ -1354,6 +1383,8 @@ def main_stress_policy(args):
|
|||||||
|
|
||||||
# Each worker in the pool gets the full list of prefixes from the pool configuration.
|
# Each worker in the pool gets the full list of prefixes from the pool configuration.
|
||||||
for i in range(pool_workers):
|
for i in range(pool_workers):
|
||||||
|
if hasattr(args, 'workers') and args.workers is not None and len(worker_specs) >= args.workers:
|
||||||
|
break
|
||||||
worker_policy = deepcopy(policy)
|
worker_policy = deepcopy(policy)
|
||||||
# The worker functions will now handle a comma-separated list of prefixes.
|
# The worker functions will now handle a comma-separated list of prefixes.
|
||||||
worker_policy.setdefault('download_policy', {})['profile_prefix'] = prefix_str
|
worker_policy.setdefault('download_policy', {})['profile_prefix'] = prefix_str
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user