Updates on single source of profile list in cluster.yaml, fix single tmux worker naming, updates ytops to support workers without profile list
This commit is contained in:
parent
bf12118b2b
commit
4fd9217c6d
@ -108,12 +108,20 @@ ansible-playbook ansible/playbook-stress-lifecycle.yml -i ansible/inventory.gree
|
|||||||
|
|
||||||
### Profile Management
|
### Profile Management
|
||||||
|
|
||||||
|
The `cleanup-profiles` action can be used to remove profiles from Redis. By default, it cleans up "ungrouped" profiles.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Clean up all profiles
|
# Perform a dry run of cleaning up ungrouped profiles
|
||||||
|
ansible-playbook ansible/playbook-stress-control.yml -i ansible/inventory.green.ini -e "action=cleanup-profiles" -e "dry_run=true"
|
||||||
|
|
||||||
|
# Clean up ungrouped profiles
|
||||||
ansible-playbook ansible/playbook-stress-control.yml -i ansible/inventory.green.ini -e "action=cleanup-profiles"
|
ansible-playbook ansible/playbook-stress-control.yml -i ansible/inventory.green.ini -e "action=cleanup-profiles"
|
||||||
|
|
||||||
# Clean up specific profile prefix
|
# To clean up ALL profiles (destructive), set cleanup_mode=full
|
||||||
ansible-playbook ansible/playbook-stress-control.yml -i ansible/inventory.green.ini -e "action=cleanup-profiles" -e "profile_prefix=user1"
|
ansible-playbook ansible/playbook-stress-control.yml -i ansible/inventory.green.ini -e "action=cleanup-profiles" -e "cleanup_mode=full"
|
||||||
|
|
||||||
|
# You can specify a custom setup policy file for cleanup operations
|
||||||
|
ansible-playbook ansible/playbook-stress-control.yml -i ansible/inventory.green.ini -e "action=cleanup-profiles" -e "setup_policy=policies/my_custom_setup_policy.yaml"
|
||||||
```
|
```
|
||||||
|
|
||||||
## Monitoring and Inspection
|
## Monitoring and Inspection
|
||||||
@ -157,7 +165,19 @@ Then, from the jump host, you can sync code or policies to the cluster nodes:
|
|||||||
ansible-playbook ansible/playbook-stress-sync-code.yml -i ansible/inventory.green.ini
|
ansible-playbook ansible/playbook-stress-sync-code.yml -i ansible/inventory.green.ini
|
||||||
|
|
||||||
# Sync only policies and CLI configs
|
# Sync only policies and CLI configs
|
||||||
ansible-playbook ansible/playbook-stress-sync-configs.yml -i ansible/inventory.green.ini
|
ansible-playbook ansible/playbook-stress-sync-policies.yml -i ansible/inventory.green.ini
|
||||||
|
|
||||||
|
# To sync files from a custom source directory on the Ansible controller, use the 'source_base_dir' extra variable:
|
||||||
|
ansible-playbook ansible/playbook-stress-sync-policies.yml -i ansible/inventory.green.ini -e "source_base_dir=/path/to/my-custom-source"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Docker Image Updates
|
||||||
|
|
||||||
|
To update the `yt-dlp` docker image used by download simulators, run the following playbook. This builds the image locally on each worker node.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Build the yt-dlp docker image locally on each worker node
|
||||||
|
ansible-playbook ansible/playbook-update-yt-dlp-docker.yml -i ansible/inventory.green.ini
|
||||||
```
|
```
|
||||||
|
|
||||||
### Adding a New Worker
|
### Adding a New Worker
|
||||||
@ -225,7 +245,10 @@ ansible-playbook ansible/playbook-stress-control.yml -i ansible/inventory.green.
|
|||||||
### Restart Enforcer and Monitoring
|
### Restart Enforcer and Monitoring
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Restart monitoring and enforcer on master
|
# Restart monitoring and enforcer on master using default policies
|
||||||
ansible-playbook ansible/playbook-stress-control.yml -i ansible/inventory.green.ini -e "action=restart-monitoring"
|
ansible-playbook ansible/playbook-stress-control.yml -i ansible/inventory.green.ini -e "action=restart-monitoring"
|
||||||
|
|
||||||
|
# Restart using a custom enforcer policy
|
||||||
|
ansible-playbook ansible/playbook-stress-control.yml -i ansible/inventory.green.ini -e "action=restart-monitoring" -e "enforcer_policy=policies/my_other_enforcer.yaml"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@ -1,4 +1,35 @@
|
|||||||
---
|
---
|
||||||
|
# -------------------------------------------------------------------------------------------------
|
||||||
|
# PHASE 0: Fix Python Dependencies
|
||||||
|
# Ensures remote hosts have compatible Python libraries to prevent module failures.
|
||||||
|
# -------------------------------------------------------------------------------------------------
|
||||||
|
- name: "PHASE 0: Upgrade Python SSL libraries"
|
||||||
|
hosts: all
|
||||||
|
gather_facts: no
|
||||||
|
tasks:
|
||||||
|
- name: Attempt to upgrade pyOpenSSL, cryptography and urllib3
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: "pip3 install --upgrade pyOpenSSL cryptography urllib3"
|
||||||
|
become: yes
|
||||||
|
register: pip_upgrade_result
|
||||||
|
changed_when: "'Successfully installed' in pip_upgrade_result.stdout"
|
||||||
|
failed_when: false
|
||||||
|
|
||||||
|
- name: Retry upgrade with --break-system-packages on specific error
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: "pip3 install --upgrade pyOpenSSL cryptography urllib3 --break-system-packages"
|
||||||
|
become: yes
|
||||||
|
register: pip_upgrade_result_retry
|
||||||
|
changed_when: "'Successfully installed' in pip_upgrade_result_retry.stdout"
|
||||||
|
when: pip_upgrade_result.rc != 0 and 'externally-managed-environment' in pip_upgrade_result.stderr
|
||||||
|
|
||||||
|
- name: Fail if package upgrade did not succeed
|
||||||
|
ansible.builtin.fail:
|
||||||
|
msg: "Failed to upgrade Python packages after retry. Last error: {{ pip_upgrade_result_retry.stderr | default(pip_upgrade_result.stderr) }}"
|
||||||
|
when: >
|
||||||
|
pip_upgrade_result.rc != 0 and
|
||||||
|
(pip_upgrade_result_retry is not defined or pip_upgrade_result_retry.rc != 0)
|
||||||
|
|
||||||
# This playbook provides a complete installation for fresh nodes.
|
# This playbook provides a complete installation for fresh nodes.
|
||||||
# It can install either master or worker roles, or both on the same machine.
|
# It can install either master or worker roles, or both on the same machine.
|
||||||
#
|
#
|
||||||
@ -55,57 +86,10 @@
|
|||||||
|
|
||||||
# -------------------------------------------------------------------------------------------------
|
# -------------------------------------------------------------------------------------------------
|
||||||
# PHASE 4: Build yt-dlp Docker Image
|
# PHASE 4: Build yt-dlp Docker Image
|
||||||
# Builds the yt-dlp container from bin/ directory
|
# Builds the yt-dlp container on each worker node using the dedicated playbook.
|
||||||
# -------------------------------------------------------------------------------------------------
|
# -------------------------------------------------------------------------------------------------
|
||||||
- name: "PHASE 4: Build yt-dlp Docker image"
|
- name: "PHASE 4: Build yt-dlp Docker image on workers"
|
||||||
hosts: all
|
import_playbook: playbook-update-yt-dlp-docker.yml
|
||||||
gather_facts: no
|
|
||||||
vars_files:
|
|
||||||
- "group_vars/all/vault.yml"
|
|
||||||
pre_tasks:
|
|
||||||
- name: Set inventory_env fact
|
|
||||||
ansible.builtin.set_fact:
|
|
||||||
inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
|
|
||||||
- name: Load environment-specific variables
|
|
||||||
ansible.builtin.include_vars: "{{ item }}"
|
|
||||||
with_fileglob:
|
|
||||||
- "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
|
|
||||||
tasks:
|
|
||||||
- name: Define base directory for node
|
|
||||||
ansible.builtin.set_fact:
|
|
||||||
base_dir: "{{ airflow_master_dir if (inventory_hostname in groups['master'] and not (install_worker | default(false) | bool)) else airflow_worker_dir }}"
|
|
||||||
|
|
||||||
- name: Ensure bin directory exists
|
|
||||||
ansible.builtin.file:
|
|
||||||
path: "{{ base_dir }}/bin"
|
|
||||||
state: directory
|
|
||||||
owner: "{{ ansible_user }}"
|
|
||||||
group: "{{ deploy_group }}"
|
|
||||||
mode: '0755'
|
|
||||||
become: yes
|
|
||||||
|
|
||||||
- name: Check if Dockerfile exists in bin directory
|
|
||||||
ansible.builtin.stat:
|
|
||||||
path: "{{ base_dir }}/bin/Dockerfile"
|
|
||||||
register: dockerfile_stat
|
|
||||||
|
|
||||||
- name: Build yt-dlp Docker image if Dockerfile exists
|
|
||||||
community.docker.docker_image:
|
|
||||||
name: yt-dlp-custom
|
|
||||||
tag: latest
|
|
||||||
source: build
|
|
||||||
build:
|
|
||||||
path: "{{ base_dir }}/bin"
|
|
||||||
pull: yes
|
|
||||||
state: present
|
|
||||||
force_source: yes
|
|
||||||
become: yes
|
|
||||||
when: dockerfile_stat.stat.exists
|
|
||||||
|
|
||||||
- name: Display message if Dockerfile not found
|
|
||||||
ansible.builtin.debug:
|
|
||||||
msg: "Dockerfile not found at {{ base_dir }}/bin/Dockerfile - skipping yt-dlp image build"
|
|
||||||
when: not dockerfile_stat.stat.exists
|
|
||||||
|
|
||||||
# -------------------------------------------------------------------------------------------------
|
# -------------------------------------------------------------------------------------------------
|
||||||
# PHASE 5: Sync Code and Install Dependencies
|
# PHASE 5: Sync Code and Install Dependencies
|
||||||
@ -148,6 +132,12 @@
|
|||||||
with_fileglob:
|
with_fileglob:
|
||||||
- "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
|
- "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
|
||||||
tasks:
|
tasks:
|
||||||
|
- name: Install redis-tools
|
||||||
|
ansible.builtin.apt:
|
||||||
|
name: redis-tools
|
||||||
|
state: present
|
||||||
|
become: yes
|
||||||
|
|
||||||
- name: Configure system performance and kernel settings
|
- name: Configure system performance and kernel settings
|
||||||
ansible.builtin.copy:
|
ansible.builtin.copy:
|
||||||
src: "configs/etc/sysctl.d/99-system-limits.conf"
|
src: "configs/etc/sysctl.d/99-system-limits.conf"
|
||||||
@ -174,7 +164,7 @@
|
|||||||
|
|
||||||
- name: Template Docker Compose file for master services
|
- name: Template Docker Compose file for master services
|
||||||
ansible.builtin.template:
|
ansible.builtin.template:
|
||||||
src: templates/docker-compose.stress-master.j2
|
src: docker-compose.stress-master.j2
|
||||||
dest: "{{ airflow_master_dir }}/docker-compose.stress.yml"
|
dest: "{{ airflow_master_dir }}/docker-compose.stress.yml"
|
||||||
owner: "{{ ansible_user }}"
|
owner: "{{ ansible_user }}"
|
||||||
group: "{{ deploy_group }}"
|
group: "{{ deploy_group }}"
|
||||||
@ -184,9 +174,7 @@
|
|||||||
- name: Stop and remove existing containers before starting services
|
- name: Stop and remove existing containers before starting services
|
||||||
ansible.builtin.shell:
|
ansible.builtin.shell:
|
||||||
cmd: |
|
cmd: |
|
||||||
docker ps -a --filter "name=bgutil-provider" --format "{{{{.ID}}}}" | xargs -r docker rm -f
|
docker ps -a --filter "name=bgutil-provider" --format "{{ '{{.ID}}' }}" | xargs -r docker rm -f
|
||||||
docker ps -a --filter "name=redis-stress" --format "{{{{.ID}}}}" | xargs -r docker rm -f
|
|
||||||
docker ps -a --filter "name=minio-stress" --format "{{{{.ID}}}}" | xargs -r docker rm -f
|
|
||||||
become: yes
|
become: yes
|
||||||
changed_when: false
|
changed_when: false
|
||||||
ignore_errors: yes
|
ignore_errors: yes
|
||||||
@ -200,6 +188,14 @@
|
|||||||
remove_orphans: true
|
remove_orphans: true
|
||||||
become: yes
|
become: yes
|
||||||
|
|
||||||
|
- name: Wait for Redis service to be ready
|
||||||
|
ansible.builtin.wait_for:
|
||||||
|
host: localhost
|
||||||
|
port: "{{ redis_port }}"
|
||||||
|
delay: 5
|
||||||
|
timeout: 60
|
||||||
|
delegate_to: "{{ inventory_hostname }}"
|
||||||
|
|
||||||
- name: Wait for MinIO service to be ready
|
- name: Wait for MinIO service to be ready
|
||||||
ansible.builtin.wait_for:
|
ansible.builtin.wait_for:
|
||||||
host: "{{ hostvars[inventory_hostname].ansible_host }}"
|
host: "{{ hostvars[inventory_hostname].ansible_host }}"
|
||||||
@ -240,7 +236,7 @@
|
|||||||
register: mc_mb_result
|
register: mc_mb_result
|
||||||
failed_when: >
|
failed_when: >
|
||||||
mc_mb_result.rc != 0 and
|
mc_mb_result.rc != 0 and
|
||||||
"already exists" not in mc_mb_result.stderr
|
"already own it" not in mc_mb_result.stderr
|
||||||
changed_when: mc_mb_result.rc == 0
|
changed_when: mc_mb_result.rc == 0
|
||||||
environment:
|
environment:
|
||||||
HOME: "/home/{{ ansible_user }}"
|
HOME: "/home/{{ ansible_user }}"
|
||||||
@ -264,7 +260,7 @@
|
|||||||
tasks:
|
tasks:
|
||||||
- name: Template Docker Compose file for worker services
|
- name: Template Docker Compose file for worker services
|
||||||
ansible.builtin.template:
|
ansible.builtin.template:
|
||||||
src: templates/docker-compose.stress-master.j2
|
src: docker-compose.stress-master.j2
|
||||||
dest: "{{ airflow_worker_dir }}/docker-compose.stress.yml"
|
dest: "{{ airflow_worker_dir }}/docker-compose.stress.yml"
|
||||||
owner: "{{ ansible_user }}"
|
owner: "{{ ansible_user }}"
|
||||||
group: "{{ deploy_group }}"
|
group: "{{ deploy_group }}"
|
||||||
@ -274,9 +270,9 @@
|
|||||||
- name: Stop and remove existing containers before starting services
|
- name: Stop and remove existing containers before starting services
|
||||||
ansible.builtin.shell:
|
ansible.builtin.shell:
|
||||||
cmd: |
|
cmd: |
|
||||||
docker ps -a --filter "name=bgutil-provider" --format "{{{{.ID}}}}" | xargs -r docker rm -f
|
docker ps -a --filter "name=bgutil-provider" --format "{{ '{{.ID}}' }}" | xargs -r docker rm -f
|
||||||
docker ps -a --filter "name=redis-stress" --format "{{{{.ID}}}}" | xargs -r docker rm -f
|
docker ps -a --filter "name=redis-stress" --format "{{ '{{.ID}}' }}" | xargs -r docker rm -f
|
||||||
docker ps -a --filter "name=minio-stress" --format "{{{{.ID}}}}" | xargs -r docker rm -f
|
docker ps -a --filter "name=minio-stress" --format "{{ '{{.ID}}' }}" | xargs -r docker rm -f
|
||||||
become: yes
|
become: yes
|
||||||
changed_when: false
|
changed_when: false
|
||||||
ignore_errors: yes
|
ignore_errors: yes
|
||||||
|
|||||||
41
ansible/playbook-stress-cleanup-info-jsons.yml
Normal file
41
ansible/playbook-stress-cleanup-info-jsons.yml
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
---
|
||||||
|
- name: "STRESS-CLEANUP: Remove info.json task files from workers"
|
||||||
|
hosts: workers
|
||||||
|
gather_facts: no
|
||||||
|
vars_files:
|
||||||
|
- "group_vars/all/vault.yml"
|
||||||
|
pre_tasks:
|
||||||
|
- name: Set inventory_env fact
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
|
||||||
|
- name: Load environment-specific variables
|
||||||
|
ansible.builtin.include_vars: "{{ item }}"
|
||||||
|
with_fileglob:
|
||||||
|
- "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
|
||||||
|
tasks:
|
||||||
|
- name: Define the directory to be cleaned
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
target_dir: "{{ airflow_worker_dir }}/run/docker_mount/info_json_tasks/direct_docker_simulation"
|
||||||
|
|
||||||
|
- name: "Display directory being cleaned"
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg: "Cleaning directory: {{ target_dir }} on {{ inventory_hostname }}"
|
||||||
|
|
||||||
|
- name: Remove the info_json_tasks directory
|
||||||
|
ansible.builtin.file:
|
||||||
|
path: "{{ target_dir }}"
|
||||||
|
state: absent
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Recreate the info_json_tasks directory
|
||||||
|
ansible.builtin.file:
|
||||||
|
path: "{{ target_dir }}"
|
||||||
|
state: directory
|
||||||
|
owner: "{{ ansible_user }}"
|
||||||
|
group: "{{ deploy_group }}"
|
||||||
|
mode: '0755'
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: "Display cleanup completion"
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg: "Successfully cleaned and recreated {{ target_dir }} on {{ inventory_hostname }}"
|
||||||
@ -1,10 +1,11 @@
|
|||||||
---
|
---
|
||||||
- name: "STRESS-SETUP: Unified control for stress test processes"
|
- name: "STRESS-SETUP: Unified control for stress test processes"
|
||||||
hosts: all
|
hosts: "{{ 'master' if action in master_only_actions else 'all' }}"
|
||||||
gather_facts: no
|
gather_facts: no
|
||||||
vars:
|
vars:
|
||||||
# Default action is status check
|
# Default action is status check
|
||||||
action: "status"
|
action: "status"
|
||||||
|
graceful_shutdown_timeout_seconds: 30
|
||||||
setup_policy: "policies/6_profile_setup_policy.yaml"
|
setup_policy: "policies/6_profile_setup_policy.yaml"
|
||||||
enforcer_policy: "policies/8_unified_simulation_enforcer.yaml"
|
enforcer_policy: "policies/8_unified_simulation_enforcer.yaml"
|
||||||
master_only_actions:
|
master_only_actions:
|
||||||
@ -80,9 +81,7 @@
|
|||||||
if [ -f .env ]; then
|
if [ -f .env ]; then
|
||||||
set -a && . ./.env && set +a
|
set -a && . ./.env && set +a
|
||||||
fi
|
fi
|
||||||
timeout 10 ./bin/ytops-client profile list \
|
timeout 10 ./bin/ytops-client profile list 2>&1
|
||||||
--auth-env sim_auth \
|
|
||||||
--download-env sim_download 2>&1
|
|
||||||
register: profile_list_output
|
register: profile_list_output
|
||||||
changed_when: false
|
changed_when: false
|
||||||
when:
|
when:
|
||||||
@ -132,7 +131,12 @@
|
|||||||
|
|
||||||
- name: "Display policy being used for profile cleanup"
|
- name: "Display policy being used for profile cleanup"
|
||||||
ansible.builtin.debug:
|
ansible.builtin.debug:
|
||||||
msg: "Using setup policy for cleanup: {{ setup_policy }}"
|
msg: >-
|
||||||
|
{% if cleanup_mode | default('ungrouped') == 'full' %}
|
||||||
|
Performing a FULL cleanup of all profiles using policy: {{ setup_policy }}
|
||||||
|
{% else %}
|
||||||
|
Cleaning up UNGROUPED profiles using policy: {{ setup_policy }}{% if dry_run | default(false) %} (DRY RUN){% endif %}
|
||||||
|
{% endif %}
|
||||||
when:
|
when:
|
||||||
- action == "cleanup-profiles"
|
- action == "cleanup-profiles"
|
||||||
- inventory_hostname in groups['master']
|
- inventory_hostname in groups['master']
|
||||||
@ -146,9 +150,15 @@
|
|||||||
if [ -f .env ]; then
|
if [ -f .env ]; then
|
||||||
set -a && . ./.env && set +a
|
set -a && . ./.env && set +a
|
||||||
fi
|
fi
|
||||||
|
{% if cleanup_mode | default('ungrouped') == 'full' %}
|
||||||
./bin/ytops-client setup-profiles \
|
./bin/ytops-client setup-profiles \
|
||||||
--policy {{ setup_policy }} \
|
--policy {{ setup_policy }} \
|
||||||
--cleanup-all {% if profile_prefix is defined %}--profile-prefix {{ profile_prefix }}{% endif %}
|
--cleanup-all
|
||||||
|
{% else %}
|
||||||
|
./bin/ytops-client profile cleanup-ungrouped \
|
||||||
|
--policy-file {{ setup_policy }} \
|
||||||
|
{% if dry_run | default(false) %}--dry-run{% endif %}
|
||||||
|
{% endif %}
|
||||||
register: cleanup_output
|
register: cleanup_output
|
||||||
changed_when: false
|
changed_when: false
|
||||||
when:
|
when:
|
||||||
@ -172,8 +182,34 @@
|
|||||||
- cleanup_output is defined
|
- cleanup_output is defined
|
||||||
|
|
||||||
- name: Stop all stress test processes on all nodes (stop-all action)
|
- name: Stop all stress test processes on all nodes (stop-all action)
|
||||||
|
vars:
|
||||||
|
process_pattern: "[y]tops-client.*(profile.*list|policy-enforcer|stress-policy)|[p]ython.*ytops"
|
||||||
block:
|
block:
|
||||||
- name: Kill all tmux sessions starting with 'stress-'
|
- name: "Get PIDs of running stress test processes"
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: "ps aux | grep -E '{{ process_pattern }}' | grep -v ansible | awk '{print $2}'"
|
||||||
|
register: pids_to_kill
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: "Gracefully terminate stress test processes"
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: "kill {{ pids_to_kill.stdout_lines | join(' ') }}"
|
||||||
|
when: pids_to_kill.stdout | length > 0
|
||||||
|
ignore_errors: true
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: "Wait for graceful shutdown"
|
||||||
|
ansible.builtin.pause:
|
||||||
|
seconds: "{{ graceful_shutdown_timeout_seconds }}"
|
||||||
|
when: pids_to_kill.stdout | length > 0
|
||||||
|
|
||||||
|
- name: "Force kill any lingering stress test processes"
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: "ps aux | grep -E '{{ process_pattern }}' | grep -v ansible | awk '{print $2}' | xargs kill -9 >/dev/null 2>&1 || true"
|
||||||
|
ignore_errors: true
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: "Kill all stress-related tmux sessions as a failsafe"
|
||||||
ansible.builtin.shell:
|
ansible.builtin.shell:
|
||||||
cmd: |
|
cmd: |
|
||||||
for session in $(tmux list-sessions -F "#{session_name}" 2>/dev/null | grep -E "^stress-"); do
|
for session in $(tmux list-sessions -F "#{session_name}" 2>/dev/null | grep -E "^stress-"); do
|
||||||
@ -186,27 +222,37 @@
|
|||||||
fi
|
fi
|
||||||
ignore_errors: yes
|
ignore_errors: yes
|
||||||
changed_when: false
|
changed_when: false
|
||||||
|
|
||||||
- name: Kill all ytops-client and related python processes
|
|
||||||
ansible.builtin.shell:
|
|
||||||
cmd: |
|
|
||||||
# Gracefully terminate processes by pattern
|
|
||||||
ps aux | grep -E "[y]tops-client.*(profile.*list|policy-enforcer|stress-policy)" | awk '{print $2}' | xargs kill >/dev/null 2>&1 || true
|
|
||||||
ps aux | grep -E "[p]ython.*ytops" | grep -v ansible | awk '{print $2}' | xargs kill >/dev/null 2>&1 || true
|
|
||||||
|
|
||||||
sleep 1 # Wait for graceful shutdown
|
|
||||||
|
|
||||||
# Force kill any remaining processes
|
|
||||||
ps aux | grep -E "[y]tops-client.*(profile.*list|policy-enforcer|stress-policy)" | awk '{print $2}' | xargs kill -9 >/dev/null 2>&1 || true
|
|
||||||
ps aux | grep -E "[p]ython.*ytops" | grep -v ansible | awk '{print $2}' | xargs kill -9 >/dev/null 2>&1 || true
|
|
||||||
ignore_errors: yes
|
|
||||||
changed_when: false
|
|
||||||
|
|
||||||
when: action == "stop-all"
|
when: action == "stop-all"
|
||||||
|
|
||||||
- name: Stop processes on targeted nodes only (stop-nodes action)
|
- name: Stop processes on targeted nodes only (stop-nodes action)
|
||||||
|
vars:
|
||||||
|
process_pattern: "[y]tops-client.*(profile.*list|policy-enforcer|stress-policy)|[p]ython.*ytops"
|
||||||
block:
|
block:
|
||||||
- name: Kill all tmux sessions starting with 'stress-' on this node
|
- name: "Get PIDs of running stress test processes"
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: "ps aux | grep -E '{{ process_pattern }}' | grep -v ansible | awk '{print $2}'"
|
||||||
|
register: pids_to_kill
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: "Gracefully terminate stress test processes"
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: "kill {{ pids_to_kill.stdout_lines | join(' ') }}"
|
||||||
|
when: pids_to_kill.stdout | length > 0
|
||||||
|
ignore_errors: true
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: "Wait for graceful shutdown"
|
||||||
|
ansible.builtin.pause:
|
||||||
|
seconds: "{{ graceful_shutdown_timeout_seconds }}"
|
||||||
|
when: pids_to_kill.stdout | length > 0
|
||||||
|
|
||||||
|
- name: "Force kill any lingering stress test processes"
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: "ps aux | grep -E '{{ process_pattern }}' | grep -v ansible | awk '{print $2}' | xargs kill -9 >/dev/null 2>&1 || true"
|
||||||
|
ignore_errors: true
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: "Kill all stress-related tmux sessions as a failsafe"
|
||||||
ansible.builtin.shell:
|
ansible.builtin.shell:
|
||||||
cmd: |
|
cmd: |
|
||||||
for session in $(tmux list-sessions -F "#{session_name}" 2>/dev/null | grep -E "^stress-"); do
|
for session in $(tmux list-sessions -F "#{session_name}" 2>/dev/null | grep -E "^stress-"); do
|
||||||
@ -219,22 +265,6 @@
|
|||||||
fi
|
fi
|
||||||
ignore_errors: yes
|
ignore_errors: yes
|
||||||
changed_when: false
|
changed_when: false
|
||||||
|
|
||||||
- name: Kill all ytops-client and related python processes on this node
|
|
||||||
ansible.builtin.shell:
|
|
||||||
cmd: |
|
|
||||||
# Gracefully terminate processes by pattern
|
|
||||||
ps aux | grep -E "[y]tops-client.*(profile.*list|policy-enforcer|stress-policy)" | awk '{print $2}' | xargs kill >/dev/null 2>&1 || true
|
|
||||||
ps aux | grep -E "[p]ython.*ytops" | grep -v ansible | awk '{print $2}' | xargs kill >/dev/null 2>&1 || true
|
|
||||||
|
|
||||||
sleep 1 # Wait for graceful shutdown
|
|
||||||
|
|
||||||
# Force kill any remaining processes
|
|
||||||
ps aux | grep -E "[y]tops-client.*(profile.*list|policy-enforcer|stress-policy)" | awk '{print $2}' | xargs kill -9 >/dev/null 2>&1 || true
|
|
||||||
ps aux | grep -E "[p]ython.*ytops" | grep -v ansible | awk '{print $2}' | xargs kill -9 >/dev/null 2>&1 || true
|
|
||||||
ignore_errors: yes
|
|
||||||
changed_when: false
|
|
||||||
|
|
||||||
when: action == "stop-nodes"
|
when: action == "stop-nodes"
|
||||||
|
|
||||||
- name: Restart monitoring and enforcer (restart-monitoring action)
|
- name: Restart monitoring and enforcer (restart-monitoring action)
|
||||||
@ -263,8 +293,6 @@
|
|||||||
working_dir: "{{ airflow_master_dir }}"
|
working_dir: "{{ airflow_master_dir }}"
|
||||||
command_to_run: >
|
command_to_run: >
|
||||||
./bin/ytops-client profile list
|
./bin/ytops-client profile list
|
||||||
--auth-env sim_auth
|
|
||||||
--download-env sim_download
|
|
||||||
--live
|
--live
|
||||||
--no-blink
|
--no-blink
|
||||||
--show-reasons
|
--show-reasons
|
||||||
|
|||||||
@ -3,7 +3,7 @@
|
|||||||
hosts: workers
|
hosts: workers
|
||||||
gather_facts: no
|
gather_facts: no
|
||||||
vars:
|
vars:
|
||||||
tmux_session_download: "stress-download-{{ (profile_prefix | default('default')) | replace(',', '-') }}"
|
tmux_session_download: "{{ 'stress-download-worker-' + (worker_num | string) if (profile_prefix | default('') == 'worker' and worker_num is defined) else 'stress-download-' + (profile_prefix | default('default')) | replace(',', '-') }}"
|
||||||
download_policy: "policies/11_direct_docker_download_simulation.yaml"
|
download_policy: "policies/11_direct_docker_download_simulation.yaml"
|
||||||
vars_files:
|
vars_files:
|
||||||
- "group_vars/all/vault.yml"
|
- "group_vars/all/vault.yml"
|
||||||
@ -41,13 +41,14 @@
|
|||||||
command_to_run: >
|
command_to_run: >
|
||||||
./bin/ytops-client stress-policy
|
./bin/ytops-client stress-policy
|
||||||
--policy {{ download_policy }}
|
--policy {{ download_policy }}
|
||||||
{% if dummy_batch | default(true) | bool %}--dummy-batch{% endif %}
|
{# {% if dummy_batch | default(true) | bool %}--dummy-batch{% endif %} #}
|
||||||
{% if download_min_seconds is defined %}--set 'settings.dummy_simulation_settings.download_min_seconds={{ download_min_seconds }}'{% endif %}
|
{% if download_min_seconds is defined %}--set 'settings.dummy_simulation_settings.download_min_seconds={{ download_min_seconds }}'{% endif %}
|
||||||
{% if download_max_seconds is defined %}--set 'settings.dummy_simulation_settings.download_max_seconds={{ download_max_seconds }}'{% endif %}
|
{% if download_max_seconds is defined %}--set 'settings.dummy_simulation_settings.download_max_seconds={{ download_max_seconds }}'{% endif %}
|
||||||
{% if profile_prefix is defined %}--set 'execution_control.worker_pools=[{"profile_prefix": "{{ profile_prefix }}", "workers": 1}]'{% endif %}
|
{# --set 'execution_control.worker_pools=[{"profile_prefix": "{% if profile_prefix == 'worker' %}{{ display_prefix }}{% else %}{{ profile_prefix }}{% endif %}", "workers": 1}]' #}
|
||||||
{% for setting in (extra_set_args | default('[]')) | from_yaml %}--set '{{ setting }}' {% endfor %}
|
{% for setting in (extra_set_args | default('[]')) | from_yaml %}--set '{{ setting }}' {% endfor %}
|
||||||
--profile-prefix {{ profile_prefix }}
|
{% if profile_prefix == 'worker' %}--workers 1{% endif %}
|
||||||
process_grep_pattern: "ytops-client.*stress-policy.*--policy {{ download_policy }}.*--profile-prefix {{ profile_prefix }}"
|
--profile-prefix {% if profile_prefix == 'worker' %}{{ display_prefix }}{% else %}{{ profile_prefix }}{% endif %}
|
||||||
|
process_grep_pattern: "ytops-client.*stress-policy.*--policy {{ download_policy }}{% if profile_prefix == 'worker' %}.*--workers 1{% endif %}.*--profile-prefix {% if profile_prefix == 'worker' %}{{ display_prefix }}{% else %}{{ profile_prefix }}{% endif %}"
|
||||||
start_process: "{{ start_download | default(false) | bool }}"
|
start_process: "{{ start_download | default(false) | bool }}"
|
||||||
stop_process: "{{ stop_download | default(false) | bool }}"
|
stop_process: "{{ stop_download | default(false) | bool }}"
|
||||||
check_status: "{{ vars.check_status | default(false) | bool }}"
|
check_status: "{{ vars.check_status | default(false) | bool }}"
|
||||||
|
|||||||
@ -15,16 +15,28 @@
|
|||||||
with_fileglob:
|
with_fileglob:
|
||||||
- "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
|
- "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
|
||||||
tasks:
|
tasks:
|
||||||
- name: Check if Redis is running
|
- name: Wait for Redis to be available
|
||||||
|
ansible.builtin.wait_for:
|
||||||
|
host: localhost
|
||||||
|
port: "{{ redis_port }}"
|
||||||
|
timeout: 60
|
||||||
|
delay: 5
|
||||||
|
register: redis_port_check
|
||||||
|
ignore_errors: yes
|
||||||
|
|
||||||
|
- name: Check if Redis is running and responding to commands
|
||||||
ansible.builtin.shell:
|
ansible.builtin.shell:
|
||||||
cmd: "redis-cli -h {{ hostvars[groups['master'][0]].ansible_host }} -p {{ redis_port }} {% if use_redis_password | default(true) | string | lower == 'true' %}-a {{ vault_redis_password }}{% endif %} ping 2>&1 | grep -q PONG"
|
cmd: "redis-cli -h localhost -p {{ redis_port }} {% if use_redis_password | default(true) | string | lower == 'true' %}-a {{ vault_redis_password }}{% endif %} ping 2>&1 | grep -q PONG"
|
||||||
register: redis_check
|
register: redis_check
|
||||||
ignore_errors: yes
|
ignore_errors: yes
|
||||||
changed_when: false
|
changed_when: false
|
||||||
|
retries: 3
|
||||||
|
delay: 5
|
||||||
|
until: redis_check.rc == 0
|
||||||
|
|
||||||
- name: Ensure Redis is accessible
|
- name: Ensure Redis is accessible
|
||||||
ansible.builtin.fail:
|
ansible.builtin.fail:
|
||||||
msg: "Redis is not accessible on master node. Please ensure Redis service is running on {{ hostvars[groups['master'][0]].ansible_host }}:{{ redis_port }}"
|
msg: "Redis is not accessible on master node. Please ensure Redis service is running on localhost:{{ redis_port }}"
|
||||||
when: redis_check.rc != 0
|
when: redis_check.rc != 0
|
||||||
|
|
||||||
- name: Stop any running ytops-client processes on master
|
- name: Stop any running ytops-client processes on master
|
||||||
@ -44,7 +56,7 @@
|
|||||||
--policy {{ setup_policy }} \
|
--policy {{ setup_policy }} \
|
||||||
--cleanup-all
|
--cleanup-all
|
||||||
environment:
|
environment:
|
||||||
REDIS_HOST: "{{ hostvars[groups['master'][0]].ansible_host }}"
|
REDIS_HOST: "localhost"
|
||||||
REDIS_PORT: "{{ redis_port }}"
|
REDIS_PORT: "{{ redis_port }}"
|
||||||
REDIS_PASSWORD: "{{ vault_redis_password if use_redis_password | default(true) | string | lower == 'true' else '' }}"
|
REDIS_PASSWORD: "{{ vault_redis_password if use_redis_password | default(true) | string | lower == 'true' else '' }}"
|
||||||
register: init_result
|
register: init_result
|
||||||
|
|||||||
@ -37,18 +37,52 @@
|
|||||||
ansible.builtin.set_fact:
|
ansible.builtin.set_fact:
|
||||||
base_dir: "{{ airflow_master_dir if inventory_hostname in groups['master'] else airflow_worker_dir }}"
|
base_dir: "{{ airflow_master_dir if inventory_hostname in groups['master'] else airflow_worker_dir }}"
|
||||||
|
|
||||||
- name: Install required Python packages from requirements.txt
|
- name: Attempt to install required Python packages from requirements.txt
|
||||||
|
ansible.builtin.pip:
|
||||||
|
requirements: "{{ base_dir }}/ytops_client/requirements.txt"
|
||||||
|
extra_args: "--ignore-installed"
|
||||||
|
become: yes
|
||||||
|
register: pip_reqs_result
|
||||||
|
failed_when: false
|
||||||
|
|
||||||
|
- name: Retry installing requirements with break-system-packages
|
||||||
ansible.builtin.pip:
|
ansible.builtin.pip:
|
||||||
requirements: "{{ base_dir }}/ytops_client/requirements.txt"
|
requirements: "{{ base_dir }}/ytops_client/requirements.txt"
|
||||||
extra_args: "--ignore-installed"
|
extra_args: "--ignore-installed"
|
||||||
become: yes
|
become: yes
|
||||||
environment:
|
environment:
|
||||||
PIP_BREAK_SYSTEM_PACKAGES: "1"
|
PIP_BREAK_SYSTEM_PACKAGES: "1"
|
||||||
|
register: pip_reqs_result_retry
|
||||||
|
when: pip_reqs_result.failed and 'externally-managed-environment' in pip_reqs_result.msg
|
||||||
|
|
||||||
- name: Explicitly install the thrift package
|
- name: Fail if requirements installation did not succeed
|
||||||
|
ansible.builtin.fail:
|
||||||
|
msg: "Failed to install requirements after retry. Last error: {{ pip_reqs_result_retry.msg | default(pip_reqs_result.msg) }}"
|
||||||
|
when: >
|
||||||
|
pip_reqs_result.failed and
|
||||||
|
(pip_reqs_result_retry is not defined or pip_reqs_result_retry.failed)
|
||||||
|
|
||||||
|
- name: Attempt to explicitly install the thrift package
|
||||||
|
ansible.builtin.pip:
|
||||||
|
name: thrift
|
||||||
|
extra_args: "--ignore-installed"
|
||||||
|
become: yes
|
||||||
|
register: pip_thrift_result
|
||||||
|
failed_when: false
|
||||||
|
|
||||||
|
- name: Retry installing thrift with break-system-packages
|
||||||
ansible.builtin.pip:
|
ansible.builtin.pip:
|
||||||
name: thrift
|
name: thrift
|
||||||
extra_args: "--ignore-installed"
|
extra_args: "--ignore-installed"
|
||||||
become: yes
|
become: yes
|
||||||
environment:
|
environment:
|
||||||
PIP_BREAK_SYSTEM_PACKAGES: "1"
|
PIP_BREAK_SYSTEM_PACKAGES: "1"
|
||||||
|
register: pip_thrift_result_retry
|
||||||
|
when: pip_thrift_result.failed and 'externally-managed-environment' in pip_thrift_result.msg
|
||||||
|
|
||||||
|
- name: Fail if thrift installation did not succeed
|
||||||
|
ansible.builtin.fail:
|
||||||
|
msg: "Failed to install thrift after retry. Last error: {{ pip_thrift_result_retry.msg | default(pip_thrift_result.msg) }}"
|
||||||
|
when: >
|
||||||
|
pip_thrift_result.failed and
|
||||||
|
(pip_thrift_result_retry is not defined or pip_thrift_result_retry.failed)
|
||||||
|
|||||||
@ -4,9 +4,15 @@
|
|||||||
gather_facts: no
|
gather_facts: no
|
||||||
vars:
|
vars:
|
||||||
# Default action
|
# Default action
|
||||||
action: "status" # Available actions: start, stop, status, start-auth, stop-auth, start-download, stop-download, stop-generator
|
action: "status" # Available actions: start, stop, status, start-auth, stop-auth, start-download, stop-download
|
||||||
|
graceful_shutdown_timeout_seconds: 30
|
||||||
|
|
||||||
tasks:
|
tasks:
|
||||||
|
- name: "Ensure profile_prefixes is a flat list of all prefixes from profile_pools"
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
profile_prefixes: "{{ profile_pools | map(attribute='prefixes') | flatten }}"
|
||||||
|
when: profile_pools is defined
|
||||||
|
|
||||||
- name: "Start all configured generators and simulators"
|
- name: "Start all configured generators and simulators"
|
||||||
when: action == "start"
|
when: action == "start"
|
||||||
block:
|
block:
|
||||||
@ -25,7 +31,7 @@
|
|||||||
--limit {{ inventory_hostname }}
|
--limit {{ inventory_hostname }}
|
||||||
-e "start_generator=true"
|
-e "start_generator=true"
|
||||||
-e "profile_prefix={{ combined_prefixes }}"
|
-e "profile_prefix={{ combined_prefixes }}"
|
||||||
{% if dummy_batch is defined %}-e "dummy_batch={{ dummy_batch }}"{% endif %}
|
-e "dummy_batch={{ dummy_batch | default(false) }}"
|
||||||
{% if auth_min_seconds is defined %}-e "auth_min_seconds={{ auth_min_seconds }}"{% endif %}
|
{% if auth_min_seconds is defined %}-e "auth_min_seconds={{ auth_min_seconds }}"{% endif %}
|
||||||
{% if auth_max_seconds is defined %}-e "auth_max_seconds={{ auth_max_seconds }}"{% endif %}
|
{% if auth_max_seconds is defined %}-e "auth_max_seconds={{ auth_max_seconds }}"{% endif %}
|
||||||
{% if batch_size is defined %}-e "batch_size={{ batch_size }}"{% endif %}
|
{% if batch_size is defined %}-e "batch_size={{ batch_size }}"{% endif %}
|
||||||
@ -42,7 +48,7 @@
|
|||||||
--limit {{ inventory_hostname }}
|
--limit {{ inventory_hostname }}
|
||||||
-e "start_generator=true"
|
-e "start_generator=true"
|
||||||
-e "profile_prefix={{ item }}"
|
-e "profile_prefix={{ item }}"
|
||||||
{% if dummy_batch is defined %}-e "dummy_batch={{ dummy_batch }}"{% endif %}
|
-e "dummy_batch={{ dummy_batch | default(false) }}"
|
||||||
{% if auth_min_seconds is defined %}-e "auth_min_seconds={{ auth_min_seconds }}"{% endif %}
|
{% if auth_min_seconds is defined %}-e "auth_min_seconds={{ auth_min_seconds }}"{% endif %}
|
||||||
{% if auth_max_seconds is defined %}-e "auth_max_seconds={{ auth_max_seconds }}"{% endif %}
|
{% if auth_max_seconds is defined %}-e "auth_max_seconds={{ auth_max_seconds }}"{% endif %}
|
||||||
{% if batch_size is defined %}-e "batch_size={{ batch_size }}"{% endif %}
|
{% if batch_size is defined %}-e "batch_size={{ batch_size }}"{% endif %}
|
||||||
@ -56,42 +62,14 @@
|
|||||||
label: "profile: {{ item }}"
|
label: "profile: {{ item }}"
|
||||||
when: auth_workers_per_profile | default(0) | int > 0
|
when: auth_workers_per_profile | default(0) | int > 0
|
||||||
|
|
||||||
- name: "Start download simulator(s)"
|
- name: "WORKAROUND: Align download worker config with auth worker config to bypass inventory bug"
|
||||||
when: profile_prefixes is defined and profile_prefixes | length > 0
|
ansible.builtin.set_fact:
|
||||||
block:
|
download_workers_total: "{{ auth_workers_total | default(0) }}"
|
||||||
- name: "Start single download simulator for all profiles: {{ combined_prefixes | default('none') }}"
|
download_workers_per_profile: "{{ auth_workers_per_profile | default(0) }}"
|
||||||
ansible.builtin.command: >-
|
|
||||||
ansible-playbook {{ playbook_dir }}/playbook-stress-download-simulation.yml
|
|
||||||
-i {{ inventory_file }}
|
|
||||||
--limit {{ inventory_hostname }}
|
|
||||||
-e "start_download=true"
|
|
||||||
-e "profile_prefix={{ combined_prefixes }}"
|
|
||||||
{% if dummy_batch is defined %}-e "dummy_batch={{ dummy_batch }}"{% endif %}
|
|
||||||
{% if download_min_seconds is defined %}-e "download_min_seconds={{ download_min_seconds }}"{% endif %}
|
|
||||||
{% if download_max_seconds is defined %}-e "download_max_seconds={{ download_max_seconds }}"{% endif %}
|
|
||||||
{% if extra_set_args is defined %}-e 'extra_set_args={{ extra_set_args | to_json }}'{% endif %}
|
|
||||||
delegate_to: localhost
|
|
||||||
changed_when: true
|
|
||||||
when: (download_workers_per_profile | default(0) | int == 0) and (download_workers_total | default(0) | int > 0)
|
|
||||||
|
|
||||||
- name: "Start parallel download simulators for each profile"
|
- name: "Start download simulator(s)"
|
||||||
ansible.builtin.command: >-
|
ansible.builtin.include_tasks: tasks/start-download-simulators.yml
|
||||||
ansible-playbook {{ playbook_dir }}/playbook-stress-download-simulation.yml
|
when: profile_prefixes is defined and profile_prefixes | length > 0
|
||||||
-i {{ inventory_file }}
|
|
||||||
--limit {{ inventory_hostname }}
|
|
||||||
-e "start_download=true"
|
|
||||||
-e "profile_prefix={{ item }}"
|
|
||||||
{% if dummy_batch is defined %}-e "dummy_batch={{ dummy_batch }}"{% endif %}
|
|
||||||
{% if download_min_seconds is defined %}-e "download_min_seconds={{ download_min_seconds }}"{% endif %}
|
|
||||||
{% if download_max_seconds is defined %}-e "download_max_seconds={{ download_max_seconds }}"{% endif %}
|
|
||||||
{% if extra_set_args is defined %}-e 'extra_set_args={{ extra_set_args | to_json }}'{% endif %}
|
|
||||||
delegate_to: localhost
|
|
||||||
changed_when: true
|
|
||||||
loop: "{{ profile_prefixes }}"
|
|
||||||
loop_control:
|
|
||||||
loop_var: item
|
|
||||||
label: "profile: {{ item }}"
|
|
||||||
when: download_workers_per_profile | default(0) | int > 0
|
|
||||||
|
|
||||||
- name: "Start only auth generators on workers"
|
- name: "Start only auth generators on workers"
|
||||||
when: action == "start-auth"
|
when: action == "start-auth"
|
||||||
@ -111,7 +89,7 @@
|
|||||||
--limit {{ inventory_hostname }}
|
--limit {{ inventory_hostname }}
|
||||||
-e "start_generator=true"
|
-e "start_generator=true"
|
||||||
-e "profile_prefix={{ combined_prefixes }}"
|
-e "profile_prefix={{ combined_prefixes }}"
|
||||||
{% if dummy_batch is defined %}-e "dummy_batch={{ dummy_batch }}"{% endif %}
|
-e "dummy_batch={{ dummy_batch | default(false) }}"
|
||||||
{% if auth_min_seconds is defined %}-e "auth_min_seconds={{ auth_min_seconds }}"{% endif %}
|
{% if auth_min_seconds is defined %}-e "auth_min_seconds={{ auth_min_seconds }}"{% endif %}
|
||||||
{% if auth_max_seconds is defined %}-e "auth_max_seconds={{ auth_max_seconds }}"{% endif %}
|
{% if auth_max_seconds is defined %}-e "auth_max_seconds={{ auth_max_seconds }}"{% endif %}
|
||||||
{% if batch_size is defined %}-e "batch_size={{ batch_size }}"{% endif %}
|
{% if batch_size is defined %}-e "batch_size={{ batch_size }}"{% endif %}
|
||||||
@ -128,7 +106,7 @@
|
|||||||
--limit {{ inventory_hostname }}
|
--limit {{ inventory_hostname }}
|
||||||
-e "start_generator=true"
|
-e "start_generator=true"
|
||||||
-e "profile_prefix={{ item }}"
|
-e "profile_prefix={{ item }}"
|
||||||
{% if dummy_batch is defined %}-e "dummy_batch={{ dummy_batch }}"{% endif %}
|
-e "dummy_batch={{ dummy_batch | default(false) }}"
|
||||||
{% if auth_min_seconds is defined %}-e "auth_min_seconds={{ auth_min_seconds }}"{% endif %}
|
{% if auth_min_seconds is defined %}-e "auth_min_seconds={{ auth_min_seconds }}"{% endif %}
|
||||||
{% if auth_max_seconds is defined %}-e "auth_max_seconds={{ auth_max_seconds }}"{% endif %}
|
{% if auth_max_seconds is defined %}-e "auth_max_seconds={{ auth_max_seconds }}"{% endif %}
|
||||||
{% if batch_size is defined %}-e "batch_size={{ batch_size }}"{% endif %}
|
{% if batch_size is defined %}-e "batch_size={{ batch_size }}"{% endif %}
|
||||||
@ -145,57 +123,33 @@
|
|||||||
- name: "Start only download simulators on workers"
|
- name: "Start only download simulators on workers"
|
||||||
when: action == "start-download"
|
when: action == "start-download"
|
||||||
block:
|
block:
|
||||||
|
- name: "WORKAROUND: Align download worker config with auth worker config to bypass inventory bug"
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
download_workers_total: "{{ auth_workers_total | default(0) }}"
|
||||||
|
download_workers_per_profile: "{{ auth_workers_per_profile | default(0) }}"
|
||||||
|
|
||||||
- name: "Set combined profile prefixes string"
|
- name: "Set combined profile prefixes string"
|
||||||
ansible.builtin.set_fact:
|
ansible.builtin.set_fact:
|
||||||
combined_prefixes: "{{ profile_prefixes | default([]) | join(',') }}"
|
combined_prefixes: "{{ profile_prefixes | default([]) | join(',') }}"
|
||||||
when: profile_prefixes is defined and profile_prefixes | length > 0
|
when: profile_prefixes is defined and profile_prefixes | length > 0
|
||||||
|
|
||||||
- name: "Start download simulator(s)"
|
- name: "Start download simulator(s)"
|
||||||
|
ansible.builtin.include_tasks: tasks/start-download-simulators.yml
|
||||||
when: profile_prefixes is defined and profile_prefixes | length > 0
|
when: profile_prefixes is defined and profile_prefixes | length > 0
|
||||||
block:
|
|
||||||
- name: "Start single download simulator for all profiles: {{ combined_prefixes | default('none') }}"
|
|
||||||
ansible.builtin.command: >-
|
|
||||||
ansible-playbook {{ playbook_dir }}/playbook-stress-download-simulation.yml
|
|
||||||
-i {{ inventory_file }}
|
|
||||||
--limit {{ inventory_hostname }}
|
|
||||||
-e "start_download=true"
|
|
||||||
-e "profile_prefix={{ combined_prefixes }}"
|
|
||||||
{% if dummy_batch is defined %}-e "dummy_batch={{ dummy_batch }}"{% endif %}
|
|
||||||
{% if download_min_seconds is defined %}-e "download_min_seconds={{ download_min_seconds }}"{% endif %}
|
|
||||||
{% if download_max_seconds is defined %}-e "download_max_seconds={{ download_max_seconds }}"{% endif %}
|
|
||||||
{% if extra_set_args is defined %}-e 'extra_set_args={{ extra_set_args | to_json }}'{% endif %}
|
|
||||||
delegate_to: localhost
|
|
||||||
changed_when: true
|
|
||||||
when: (download_workers_per_profile | default(0) | int == 0) and (download_workers_total | default(0) | int > 0)
|
|
||||||
|
|
||||||
- name: "Start parallel download simulators for each profile"
|
|
||||||
ansible.builtin.command: >-
|
|
||||||
ansible-playbook {{ playbook_dir }}/playbook-stress-download-simulation.yml
|
|
||||||
-i {{ inventory_file }}
|
|
||||||
--limit {{ inventory_hostname }}
|
|
||||||
-e "start_download=true"
|
|
||||||
-e "profile_prefix={{ item }}"
|
|
||||||
{% if dummy_batch is defined %}-e "dummy_batch={{ dummy_batch }}"{% endif %}
|
|
||||||
{% if download_min_seconds is defined %}-e "download_min_seconds={{ download_min_seconds }}"{% endif %}
|
|
||||||
{% if download_max_seconds is defined %}-e "download_max_seconds={{ download_max_seconds }}"{% endif %}
|
|
||||||
{% if extra_set_args is defined %}-e 'extra_set_args={{ extra_set_args | to_json }}'{% endif %}
|
|
||||||
delegate_to: localhost
|
|
||||||
changed_when: true
|
|
||||||
loop: "{{ profile_prefixes }}"
|
|
||||||
loop_control:
|
|
||||||
loop_var: item
|
|
||||||
label: "profile: {{ item }}"
|
|
||||||
when: download_workers_per_profile | default(0) | int > 0
|
|
||||||
|
|
||||||
- name: "Stop only auth generators on workers (via playbook call)"
|
- name: "Stop only auth generators on workers"
|
||||||
when: action == "stop-generator"
|
when: action == "stop-auth"
|
||||||
block:
|
block:
|
||||||
- name: "Set combined profile prefixes string"
|
- name: "Set combined profile prefixes string"
|
||||||
ansible.builtin.set_fact:
|
ansible.builtin.set_fact:
|
||||||
combined_prefixes: "{{ profile_prefixes | default([]) | join(',') }}"
|
combined_prefixes: "{{ profile_prefixes | default([]) | join(',') }}"
|
||||||
when: profile_prefixes is defined and profile_prefixes | length > 0
|
when: profile_prefixes is defined and profile_prefixes | length > 0
|
||||||
|
|
||||||
- name: "Stop single auth generator for profiles: {{ combined_prefixes | default('none') }}"
|
- name: "Gracefully stop auth generator(s) via playbook call"
|
||||||
|
when: profile_prefixes is defined and profile_prefixes | length > 0
|
||||||
|
block:
|
||||||
|
- name: "Stop single auth generator for all profiles: {{ combined_prefixes | default('none') }}"
|
||||||
ansible.builtin.command: >-
|
ansible.builtin.command: >-
|
||||||
ansible-playbook {{ playbook_dir }}/playbook-stress-auth-generator.yml
|
ansible-playbook {{ playbook_dir }}/playbook-stress-auth-generator.yml
|
||||||
-i {{ inventory_file }}
|
-i {{ inventory_file }}
|
||||||
@ -204,53 +158,55 @@
|
|||||||
-e "profile_prefix={{ combined_prefixes }}"
|
-e "profile_prefix={{ combined_prefixes }}"
|
||||||
delegate_to: localhost
|
delegate_to: localhost
|
||||||
changed_when: true
|
changed_when: true
|
||||||
when: profile_prefixes is defined and profile_prefixes | length > 0
|
when: (auth_workers_per_profile | default(0) | int == 0) and (auth_workers_total | default(0) | int > 0)
|
||||||
|
|
||||||
- name: "Stop only auth generators on workers"
|
- name: "Stop parallel auth generators for each profile"
|
||||||
when: action == "stop-auth"
|
ansible.builtin.command: >-
|
||||||
block:
|
ansible-playbook {{ playbook_dir }}/playbook-stress-auth-generator.yml
|
||||||
- name: Kill all auth generator tmux sessions on this worker
|
-i {{ inventory_file }}
|
||||||
ansible.builtin.shell:
|
--limit {{ inventory_hostname }}
|
||||||
cmd: |
|
-e "stop_generator=true"
|
||||||
for session in $(tmux list-sessions -F "#{session_name}" 2>/dev/null | grep -E "^stress-auth-"); do
|
-e "profile_prefix={{ item }}"
|
||||||
tmux kill-session -t "$session"
|
delegate_to: localhost
|
||||||
done || true
|
changed_when: true
|
||||||
ignore_errors: yes
|
loop: "{{ profile_prefixes }}"
|
||||||
changed_when: false
|
loop_control:
|
||||||
|
loop_var: item
|
||||||
- name: Kill all ytops-client auth generator processes on this worker
|
label: "profile: {{ item }}"
|
||||||
ansible.builtin.shell:
|
when: auth_workers_per_profile | default(0) | int > 0
|
||||||
cmd: |
|
|
||||||
# Gracefully terminate
|
|
||||||
ps aux | grep "[y]tops-client.*stress-policy.*12_queue_auth_simulation" | awk '{print $2}' | xargs kill >/dev/null 2>&1 || true
|
|
||||||
sleep 0.5
|
|
||||||
# Force kill
|
|
||||||
ps aux | grep "[y]tops-client.*stress-policy.*12_queue_auth_simulation" | awk '{print $2}' | xargs kill -9 >/dev/null 2>&1 || true
|
|
||||||
ignore_errors: yes
|
|
||||||
changed_when: false
|
|
||||||
|
|
||||||
- name: "Stop only download simulators on workers"
|
- name: "Stop only download simulators on workers"
|
||||||
when: action == "stop-download"
|
when: action == "stop-download"
|
||||||
block:
|
block:
|
||||||
- name: Kill all download simulator tmux sessions on this worker
|
- name: "WORKAROUND: Align download worker config with auth worker config to bypass inventory bug"
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
download_workers_total: "{{ auth_workers_total | default(0) }}"
|
||||||
|
download_workers_per_profile: "{{ auth_workers_per_profile | default(0) }}"
|
||||||
|
|
||||||
|
- name: "Set combined profile prefixes string"
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
combined_prefixes: "{{ profile_prefixes | default([]) | join(',') }}"
|
||||||
|
when: profile_prefixes is defined and profile_prefixes | length > 0
|
||||||
|
|
||||||
|
- name: "Stop single download simulator group"
|
||||||
ansible.builtin.shell:
|
ansible.builtin.shell:
|
||||||
cmd: |
|
cmd: |
|
||||||
for session in $(tmux list-sessions -F "#{session_name}" 2>/dev/null | grep -E "^stress-download-"); do
|
for session in $(tmux list-sessions -F "#{session_name}" 2>/dev/null | grep -E '^stress-download-worker-[0-9]+$'); do
|
||||||
tmux kill-session -t "$session"
|
tmux kill-session -t "$session"
|
||||||
done || true
|
done || true
|
||||||
|
when: download_workers_total | default(0) | int > 0
|
||||||
|
changed_when: true
|
||||||
ignore_errors: yes
|
ignore_errors: yes
|
||||||
changed_when: false
|
|
||||||
|
|
||||||
- name: Kill all ytops-client download simulator processes on this worker
|
- name: "Stop parallel download simulators for each profile"
|
||||||
ansible.builtin.shell:
|
ansible.builtin.command: "tmux kill-session -t stress-download-{{ item }}"
|
||||||
cmd: |
|
loop: "{{ profile_prefixes }}"
|
||||||
# Gracefully terminate
|
loop_control:
|
||||||
ps aux | grep "[y]tops-client.*stress-policy.*11_direct_docker_download_simulation" | awk '{print $2}' | xargs kill >/dev/null 2>&1 || true
|
loop_var: item
|
||||||
sleep 0.5
|
label: "profile: {{ item }}"
|
||||||
# Force kill
|
when: (download_workers_total | default(0) | int == 0) and (download_workers_per_profile | default(0) | int > 0)
|
||||||
ps aux | grep "[y]tops-client.*stress-policy.*11_direct_docker_download_simulation" | awk '{print $2}' | xargs kill -9 >/dev/null 2>&1 || true
|
changed_when: true
|
||||||
ignore_errors: yes
|
ignore_errors: yes
|
||||||
changed_when: false
|
|
||||||
|
|
||||||
- name: "Stop all worker generators and simulators"
|
- name: "Stop all worker generators and simulators"
|
||||||
when: action == "stop"
|
when: action == "stop"
|
||||||
|
|||||||
@ -3,7 +3,7 @@
|
|||||||
hosts: all
|
hosts: all
|
||||||
gather_facts: no
|
gather_facts: no
|
||||||
vars:
|
vars:
|
||||||
ytops_source_dir: "{{ playbook_dir }}/../ytops_client-source"
|
ytops_source_dir: "{{ source_base_dir | default(playbook_dir + '/../ytops_client-source') }}"
|
||||||
vars_files:
|
vars_files:
|
||||||
- "group_vars/all/vault.yml"
|
- "group_vars/all/vault.yml"
|
||||||
pre_tasks:
|
pre_tasks:
|
||||||
|
|||||||
30
ansible/playbook-update-yt-dlp-docker.yml
Normal file
30
ansible/playbook-update-yt-dlp-docker.yml
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
---
|
||||||
|
- name: "STRESS-SETUP: Build and push yt-dlp docker image"
|
||||||
|
hosts: workers
|
||||||
|
gather_facts: no
|
||||||
|
vars_files:
|
||||||
|
- "group_vars/all/vault.yml"
|
||||||
|
pre_tasks:
|
||||||
|
- name: Set inventory_env fact
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
|
||||||
|
- name: Load environment-specific variables
|
||||||
|
ansible.builtin.include_vars: "{{ item }}"
|
||||||
|
with_fileglob:
|
||||||
|
- "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
|
||||||
|
tasks:
|
||||||
|
- name: "Build and push yt-dlp image on worker"
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: |
|
||||||
|
cd {{ airflow_worker_dir }}
|
||||||
|
if [ -f .env ]; then
|
||||||
|
set -a && . ./.env && set +a
|
||||||
|
fi
|
||||||
|
./bin/build-yt-dlp-image
|
||||||
|
register: build_output
|
||||||
|
changed_when: true
|
||||||
|
|
||||||
|
- name: "Display build output"
|
||||||
|
ansible.builtin.debug:
|
||||||
|
var: build_output.stdout_lines
|
||||||
|
when: build_output.stdout_lines is defined
|
||||||
105
ansible/tasks/start-download-simulators.yml
Normal file
105
ansible/tasks/start-download-simulators.yml
Normal file
@ -0,0 +1,105 @@
|
|||||||
|
---
|
||||||
|
- name: "Scale down excess download workers if necessary"
|
||||||
|
when: (download_workers_per_profile | default(0) | int == 0) and (download_workers_total | default(0) | int > 0)
|
||||||
|
block:
|
||||||
|
- name: "Find running download simulator tmux sessions for this group"
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: "tmux list-sessions -F '#{session_name}' 2>/dev/null | grep -E '^stress-download-worker-[0-9]+$' || true"
|
||||||
|
register: running_sessions
|
||||||
|
changed_when: false
|
||||||
|
ignore_errors: yes
|
||||||
|
|
||||||
|
- name: "Identify excess download simulator sessions to stop"
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
excess_sessions_to_stop: "{{ excess_sessions_to_stop | default([]) + [item] }}"
|
||||||
|
vars:
|
||||||
|
worker_num_str: "{{ item | regex_replace('^stress-download-worker-', '') }}"
|
||||||
|
when: worker_num_str is number and (worker_num_str | int > (download_workers_total | int))
|
||||||
|
loop: "{{ running_sessions.stdout_lines }}"
|
||||||
|
loop_control:
|
||||||
|
label: "Identifying excess session: {{ item }}"
|
||||||
|
|
||||||
|
- name: "Get PIDs for excess download workers"
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: |
|
||||||
|
PANE_PID=$(tmux list-panes -s "{{ item }}" -F '#{pane_pid}' | head -n 1)
|
||||||
|
if [ -n "$PANE_PID" ]; then
|
||||||
|
pgrep -P "$PANE_PID" || true
|
||||||
|
fi
|
||||||
|
register: excess_pids_raw
|
||||||
|
loop: "{{ excess_sessions_to_stop | default([]) }}"
|
||||||
|
changed_when: false
|
||||||
|
ignore_errors: true
|
||||||
|
|
||||||
|
- name: "Set fact for PIDs to kill"
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
pids_to_kill_gracefully: "{{ excess_pids_raw.results | map(attribute='stdout') | reject('==', '') | list }}"
|
||||||
|
|
||||||
|
- name: "Gracefully terminate excess download workers"
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: "kill {{ item }}"
|
||||||
|
loop: "{{ pids_to_kill_gracefully | default([]) }}"
|
||||||
|
when: pids_to_kill_gracefully is defined and pids_to_kill_gracefully | length > 0
|
||||||
|
ignore_errors: true
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: "Wait for graceful shutdown of excess workers"
|
||||||
|
ansible.builtin.pause:
|
||||||
|
seconds: "{{ graceful_shutdown_timeout_seconds }}"
|
||||||
|
when: pids_to_kill_gracefully is defined and pids_to_kill_gracefully | length > 0
|
||||||
|
|
||||||
|
- name: "Force kill any lingering excess workers"
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: "kill -9 {{ item }}"
|
||||||
|
loop: "{{ pids_to_kill_gracefully | default([]) }}"
|
||||||
|
when: pids_to_kill_gracefully is defined and pids_to_kill_gracefully | length > 0
|
||||||
|
ignore_errors: true
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: "Kill tmux sessions for excess workers"
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: "tmux kill-session -t {{ item }}"
|
||||||
|
loop: "{{ excess_sessions_to_stop | default([]) }}"
|
||||||
|
when: excess_sessions_to_stop is defined and excess_sessions_to_stop | length > 0
|
||||||
|
ignore_errors: true
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: "Start single download simulator for all profiles: {{ combined_prefixes | default('none') }}"
|
||||||
|
ansible.builtin.command: >-
|
||||||
|
ansible-playbook {{ playbook_dir }}/playbook-stress-download-simulation.yml
|
||||||
|
-i {{ inventory_file }}
|
||||||
|
--limit {{ inventory_hostname }}
|
||||||
|
-e "start_download=true"
|
||||||
|
-e "profile_prefix=worker"
|
||||||
|
-e "display_prefix={{ combined_prefixes }}"
|
||||||
|
-e "worker_num={{ worker_num }}"
|
||||||
|
{% if dummy_batch | default(false) %}-e "dummy_batch=true"{% endif %}
|
||||||
|
{% if download_min_seconds is defined %}-e "download_min_seconds={{ download_min_seconds }}"{% endif %}
|
||||||
|
{% if download_max_seconds is defined %}-e "download_max_seconds={{ download_max_seconds }}"{% endif %}
|
||||||
|
{% if extra_set_args is defined %}-e 'extra_set_args={{ extra_set_args | to_json }}'{% endif %}
|
||||||
|
delegate_to: localhost
|
||||||
|
changed_when: true
|
||||||
|
when: download_workers_total | default(0) | int > 0
|
||||||
|
loop: "{{ range(1, (download_workers_total | default(1) | int) + 1) | list }}"
|
||||||
|
loop_control:
|
||||||
|
loop_var: worker_num
|
||||||
|
label: "worker {{ worker_num }}"
|
||||||
|
|
||||||
|
- name: "Start parallel download simulators for each profile"
|
||||||
|
ansible.builtin.command: >-
|
||||||
|
ansible-playbook {{ playbook_dir }}/playbook-stress-download-simulation.yml
|
||||||
|
-i {{ inventory_file }}
|
||||||
|
--limit {{ inventory_hostname }}
|
||||||
|
-e "start_download=true"
|
||||||
|
-e "profile_prefix={{ item }}"
|
||||||
|
{% if dummy_batch | default(false) %}-e "dummy_batch=true"{% endif %}
|
||||||
|
{% if download_min_seconds is defined %}-e "download_min_seconds={{ download_min_seconds }}"{% endif %}
|
||||||
|
{% if download_max_seconds is defined %}-e "download_max_seconds={{ download_max_seconds }}"{% endif %}
|
||||||
|
{% if extra_set_args is defined %}-e 'extra_set_args={{ extra_set_args | to_json }}'{% endif %}
|
||||||
|
delegate_to: localhost
|
||||||
|
changed_when: true
|
||||||
|
loop: "{{ profile_prefixes }}"
|
||||||
|
loop_control:
|
||||||
|
loop_var: item
|
||||||
|
label: "profile: {{ item }}"
|
||||||
|
when: (download_workers_total | default(0) | int == 0) and (download_workers_per_profile | default(0) | int > 0)
|
||||||
@ -15,3 +15,7 @@ AWS_REGION={{ vault_s3_delivery_aws_region }}
|
|||||||
ACCOUNT_ACTIVE_DURATION_MIN=7
|
ACCOUNT_ACTIVE_DURATION_MIN=7
|
||||||
ACCOUNT_COOLDOWN_DURATION_MIN=30
|
ACCOUNT_COOLDOWN_DURATION_MIN=30
|
||||||
STRESS_POLICY_INBOX_QUEUE=dev_stress_inbox
|
STRESS_POLICY_INBOX_QUEUE=dev_stress_inbox
|
||||||
|
|
||||||
|
# --- Stress Test Environment Names ---
|
||||||
|
YTOPS_AUTH_ENV={{ stress_auth_env | default('sim_auth') }}
|
||||||
|
YTOPS_DOWNLOAD_ENV={{ stress_download_env | default('sim_download') }}
|
||||||
|
|||||||
@ -121,20 +121,214 @@ def generate_policy(cluster_config, output_path):
|
|||||||
print(f"Successfully generated profile setup policy at: {output_path}")
|
print(f"Successfully generated profile setup policy at: {output_path}")
|
||||||
|
|
||||||
|
|
||||||
|
def generate_enforcer_policy(cluster_config, output_path):
|
||||||
|
"""Generate the enforcer policy file."""
|
||||||
|
all_workers = cluster_config.get('workers', {})
|
||||||
|
|
||||||
|
enforcement_pools = []
|
||||||
|
for worker_name, worker_config in sorted(all_workers.items()):
|
||||||
|
all_prefixes = []
|
||||||
|
for pool in worker_config.get('profile_pools', []):
|
||||||
|
all_prefixes.extend(pool.get('prefixes', []))
|
||||||
|
|
||||||
|
if not all_prefixes:
|
||||||
|
continue
|
||||||
|
|
||||||
|
pool_entry = OrderedDict([
|
||||||
|
('name', f"server_{worker_name}"),
|
||||||
|
('profile_group_patterns', sorted(list(set(all_prefixes)))),
|
||||||
|
('max_active_profiles', 1)
|
||||||
|
])
|
||||||
|
enforcement_pools.append(pool_entry)
|
||||||
|
|
||||||
|
with open(output_path, 'w') as f:
|
||||||
|
f.write("# Policy for the unified simulation enforcer.\n")
|
||||||
|
f.write("# This file is used by `bin/ytops-client policy-enforcer --live` to manage\n")
|
||||||
|
f.write("# both the authentication and download simulation environments from a single process.\n\n")
|
||||||
|
f.write("# !!! THIS FILE IS AUTO-GENERATED by tools/generate-profile-setup-policy.py !!!\n")
|
||||||
|
f.write("# !!! DO NOT EDIT. Your changes will be overwritten. !!!\n")
|
||||||
|
f.write("# !!! Edit cluster.green.yml and re-run the generator instead. !!!\n\n")
|
||||||
|
|
||||||
|
f.write("simulation_parameters:\n")
|
||||||
|
f.write(" # --- Common Redis settings for all tools ---\n")
|
||||||
|
f.write(" # The enforcer will connect to two different Redis environments (key prefixes)\n")
|
||||||
|
f.write(" # based on these settings, applying the corresponding policies to each.\n")
|
||||||
|
f.write(' env_file: ".env"\n')
|
||||||
|
f.write(' auth_env: "sim_auth"\n')
|
||||||
|
f.write(' download_env: "sim_download"\n')
|
||||||
|
f.write(" \n")
|
||||||
|
f.write(" # How often the enforcer should wake up and apply all policies.\n")
|
||||||
|
f.write(" interval_seconds: 2\n\n")
|
||||||
|
|
||||||
|
f.write("# --- Common & Pool-specific Settings ---\n")
|
||||||
|
f.write("# Common settings are applied to all profile groups discovered via the pools below.\n")
|
||||||
|
f.write("# A pool can optionally override these settings by defining its own 'group_settings' block.\n")
|
||||||
|
f.write("common_group_settings:\n")
|
||||||
|
f.write(" auth:\n")
|
||||||
|
f.write(" max_active_profiles: 1\n")
|
||||||
|
f.write(" rotate_after_requests: 5\n")
|
||||||
|
f.write(" rest_duration_minutes_on_rotation: 0.20\n")
|
||||||
|
f.write(" wait_download_finish_per_group: true\n")
|
||||||
|
f.write(" max_wait_for_downloads_minutes: 240\n")
|
||||||
|
f.write(" download:\n")
|
||||||
|
f.write(" max_active_profiles: 1\n")
|
||||||
|
f.write(" rotate_after_requests: 0\n")
|
||||||
|
f.write(" rest_duration_minutes_on_rotation: 0.2\n\n")
|
||||||
|
|
||||||
|
f.write("# Defines pools of profile groups with their own concurrency limits.\n")
|
||||||
|
f.write("enforcement_pools:\n")
|
||||||
|
|
||||||
|
for pool in enforcement_pools:
|
||||||
|
f.write(f' - name: "{pool["name"]}"\n')
|
||||||
|
patterns_str = ", ".join([f'"{p}"' for p in pool['profile_group_patterns']])
|
||||||
|
f.write(f' profile_group_patterns: [{patterns_str}]\n')
|
||||||
|
f.write(f' max_active_profiles: {pool["max_active_profiles"]}\n')
|
||||||
|
|
||||||
|
rest_of_file = """
|
||||||
|
# --- Policies for the Authentication Simulation ---
|
||||||
|
auth_policy_enforcer_config:
|
||||||
|
|
||||||
|
# Ban if 2 failures occur within a 1-minute window.
|
||||||
|
#ban_on_failures: 2
|
||||||
|
#ban_on_failures_window_minutes: 1
|
||||||
|
|
||||||
|
# The standard rest policy is disabled, as rotation is handled by the profile group.
|
||||||
|
|
||||||
|
# New rate limit policy to enforce requests-per-hour limits.
|
||||||
|
# For guest sessions, the limit is ~300 videos/hour.
|
||||||
|
rate_limit_requests: 0
|
||||||
|
rate_limit_window_minutes: 60
|
||||||
|
rate_limit_rest_duration_minutes: 5
|
||||||
|
|
||||||
|
rest_after_requests: 0
|
||||||
|
rest_duration_minutes: 10
|
||||||
|
|
||||||
|
# NOTE on Rate Limits: With the default yt-dlp settings, the rate limit for guest
|
||||||
|
# sessions is ~300 videos/hour (~1000 webpage/player requests per hour).
|
||||||
|
# For accounts, it is ~2000 videos/hour (~4000 webpage/player requests per hour).
|
||||||
|
# The settings below should be configured to respect these limits.
|
||||||
|
|
||||||
|
# New setting for load balancing across profile groups.
|
||||||
|
# "longest_idle": Activates the profile that has been idle the longest across all groups (based on last_used time).
|
||||||
|
# This is a global FIFO strategy that effectively cycles through profiles regardless of their group.
|
||||||
|
# "least_loaded": Prioritizes activating a profile from the group with the fewest pending downloads.
|
||||||
|
# If multiple groups have zero pending downloads, it acts as a FIFO queue, activating
|
||||||
|
# the one that finished its last download batch the earliest. This is useful when you want
|
||||||
|
# to ensure a group finishes its entire workload before another group starts.
|
||||||
|
profile_selection_strategy: "longest_idle"
|
||||||
|
|
||||||
|
# The 'global_max_active_profiles' setting is now superseded by the per-pool limits
|
||||||
|
# defined in the 'enforcement_pools' section.
|
||||||
|
|
||||||
|
# The 'profile_groups' section is now inherited from 'profile_group_definitions' above.
|
||||||
|
# The enforcer logic should be updated to read from there.
|
||||||
|
|
||||||
|
proxy_work_minutes: 0
|
||||||
|
proxy_rest_duration_minutes: 0
|
||||||
|
|
||||||
|
# Global maximum time a proxy can be active before being rested, regardless of
|
||||||
|
# other rules. Acts as a safety net. Set to 0 to disable.
|
||||||
|
max_global_proxy_active_minutes: 0
|
||||||
|
rest_duration_on_max_active: 10
|
||||||
|
|
||||||
|
# Proxy-level ban on failure burst is disabled.
|
||||||
|
proxy_ban_on_failures: 0
|
||||||
|
proxy_ban_window_minutes: 2
|
||||||
|
|
||||||
|
# Clean up locks held for more than 16 minutes (960s) to prevent stuck workers.
|
||||||
|
# This should be longer than the docker container timeout (15m).
|
||||||
|
unlock_stale_locks_after_seconds: 960
|
||||||
|
|
||||||
|
# A short post-task cooldown for auth simulation profiles. When a batch is finished,
|
||||||
|
# the profile is put into COOLDOWN briefly. This prevents a worker from immediately
|
||||||
|
# re-locking the same profile, giving the policy enforcer a window to perform rotation.
|
||||||
|
unlock_cooldown_seconds: 0
|
||||||
|
|
||||||
|
# --- Cross-simulation synchronization ---
|
||||||
|
# This section is simplified because the link between auth and download profiles
|
||||||
|
# is now defined in the `profile_group_definitions`.
|
||||||
|
cross_simulation_sync:
|
||||||
|
# Which states to synchronize from auth to download.
|
||||||
|
sync_states:
|
||||||
|
- "BANNED"
|
||||||
|
# If true, a BANNED state on an auth profile will force the download profile to also be BANNED.
|
||||||
|
enforce_auth_lead: true
|
||||||
|
# CRITICAL: Ensures the correct download profile GROUP is active.
|
||||||
|
sync_active_profile: true
|
||||||
|
# When an auth profile is in the 'waiting_downloads' state, ensure the matching download profile is active.
|
||||||
|
sync_waiting_downloads: true
|
||||||
|
|
||||||
|
# --- Policies for the Download Simulation ---
|
||||||
|
download_policy_enforcer_config:
|
||||||
|
|
||||||
|
# Ban if 1 failure occurs within a 1-minute window.
|
||||||
|
ban_on_failures: 1
|
||||||
|
ban_on_failures_window_minutes: 1
|
||||||
|
|
||||||
|
# Standard rest policy is disabled in favor of group rotation.
|
||||||
|
|
||||||
|
# New rate limit policy to enforce requests-per-hour limits.
|
||||||
|
# For guest sessions, the limit is ~300 videos/hour. We set it slightly lower to be safe.
|
||||||
|
rate_limit_requests: 280
|
||||||
|
rate_limit_window_minutes: 60
|
||||||
|
rate_limit_rest_duration_minutes: 5
|
||||||
|
rest_after_requests: 0
|
||||||
|
rest_duration_minutes: 20
|
||||||
|
|
||||||
|
# NOTE on Rate Limits: With the default yt-dlp settings, the rate limit for guest
|
||||||
|
# sessions is ~300 videos/hour (~1000 webpage/player requests per hour).
|
||||||
|
# For accounts, it is ~2000 videos/hour (~4000 webpage/player requests per hour).
|
||||||
|
# The settings below should be configured to respect these limits.
|
||||||
|
|
||||||
|
# The 'profile_groups' section is now inherited from 'profile_group_definitions' above.
|
||||||
|
# The enforcer logic should be updated to read from there.
|
||||||
|
|
||||||
|
# Time-based proxy rules are disabled.
|
||||||
|
proxy_work_minutes: 0
|
||||||
|
proxy_rest_duration_minutes: 10
|
||||||
|
|
||||||
|
# Global maximum time a proxy can be active before being rested, regardless of
|
||||||
|
# other rules. Acts as a safety net. Set to 0 to disable.
|
||||||
|
max_global_proxy_active_minutes: 0
|
||||||
|
rest_duration_on_max_active: 10
|
||||||
|
|
||||||
|
# Proxy-level ban on failure burst is disabled.
|
||||||
|
proxy_ban_on_failures: 3
|
||||||
|
proxy_ban_window_minutes: 1
|
||||||
|
|
||||||
|
# Clean up download locks held for more than 16 minutes (960s) to allow for long downloads.
|
||||||
|
# This should be longer than the docker container timeout (15m).
|
||||||
|
unlock_stale_locks_after_seconds: 960
|
||||||
|
|
||||||
|
# After a profile is used for a download, unlock it but put it in COOLDOWN
|
||||||
|
# state for 2-3s. This is enforced by the worker, which reads this config from Redis.
|
||||||
|
unlock_cooldown_seconds: [2, 3]
|
||||||
|
"""
|
||||||
|
with open(output_path, 'a') as f:
|
||||||
|
f.write(rest_of_file)
|
||||||
|
|
||||||
|
print(f"Successfully generated enforcer policy at: {output_path}")
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
if len(sys.argv) != 3:
|
if len(sys.argv) < 3 or len(sys.argv) > 4:
|
||||||
print("Usage: ./tools/generate-profile-setup-policy.py <cluster-config-file> <output-policy-file>")
|
print("Usage: ./tools/generate-profile-setup-policy.py <cluster-config-file> <output-profile-policy-file> [<output-enforcer-policy-file>]")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
config_path = sys.argv[1]
|
config_path = sys.argv[1]
|
||||||
output_path = sys.argv[2]
|
profile_output_path = sys.argv[2]
|
||||||
|
|
||||||
if not os.path.exists(config_path):
|
if not os.path.exists(config_path):
|
||||||
print(f"Error: Cluster configuration file not found at '{config_path}'", file=sys.stderr)
|
print(f"Error: Cluster configuration file not found at '{config_path}'", file=sys.stderr)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
cluster_config = load_cluster_config(config_path)
|
cluster_config = load_cluster_config(config_path)
|
||||||
generate_policy(cluster_config, output_path)
|
generate_policy(cluster_config, profile_output_path)
|
||||||
|
|
||||||
|
if len(sys.argv) == 4:
|
||||||
|
enforcer_output_path = sys.argv[3]
|
||||||
|
generate_enforcer_policy(cluster_config, enforcer_output_path)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|||||||
@ -206,6 +206,18 @@ queue_policy:
|
|||||||
# Example: formats_to_download: ["140-dashy", "299-dashy"]
|
# Example: formats_to_download: ["140-dashy", "299-dashy"]
|
||||||
formats_to_download: "from_download_policy"
|
formats_to_download: "from_download_policy"
|
||||||
|
|
||||||
|
# Whether to report completion back to a queue. Always reported for auth.
|
||||||
|
report_completion: true
|
||||||
|
|
||||||
|
# Queue to report completion to
|
||||||
|
completion_queue: "queue2_auth_completed"
|
||||||
|
|
||||||
|
# Queue to report failures to
|
||||||
|
failure_queue: "queue2_auth_fail"
|
||||||
|
|
||||||
|
# Queue to report skipped tasks to
|
||||||
|
skipped_queue: "queue2_auth_skipped"
|
||||||
|
|
||||||
simulation_parameters:
|
simulation_parameters:
|
||||||
auth_env: "sim_auth"
|
auth_env: "sim_auth"
|
||||||
download_env: "sim_download"
|
download_env: "sim_download"
|
||||||
|
|||||||
@ -13,13 +13,10 @@ simulation_parameters:
|
|||||||
# How often the enforcer should wake up and apply all policies.
|
# How often the enforcer should wake up and apply all policies.
|
||||||
interval_seconds: 2
|
interval_seconds: 2
|
||||||
|
|
||||||
# --- Dynamic Profile Group Templates ---
|
# --- Common & Pool-specific Settings ---
|
||||||
# The policy enforcer will find all profile prefixes matching a pattern in Redis
|
# Common settings are applied to all profile groups discovered via the pools below.
|
||||||
# and apply the settings from the matching template. This avoids having to list
|
# A pool can optionally override these settings by defining its own 'group_settings' block.
|
||||||
# every profile group manually.
|
common_group_settings:
|
||||||
# NOTE: The policy enforcer tool must be updated to support this format.
|
|
||||||
profile_group_templates:
|
|
||||||
- pattern: "user*"
|
|
||||||
auth:
|
auth:
|
||||||
max_active_profiles: 1
|
max_active_profiles: 1
|
||||||
rotate_after_requests: 5
|
rotate_after_requests: 5
|
||||||
@ -27,9 +24,19 @@ profile_group_templates:
|
|||||||
wait_download_finish_per_group: true
|
wait_download_finish_per_group: true
|
||||||
max_wait_for_downloads_minutes: 240
|
max_wait_for_downloads_minutes: 240
|
||||||
download:
|
download:
|
||||||
|
max_active_profiles: 1
|
||||||
rotate_after_requests: 0
|
rotate_after_requests: 0
|
||||||
rest_duration_minutes_on_rotation: 0.2
|
rest_duration_minutes_on_rotation: 0.2
|
||||||
|
|
||||||
|
# Defines pools of profile groups with their own concurrency limits.
|
||||||
|
enforcement_pools:
|
||||||
|
- name: "server_dl003_pool"
|
||||||
|
profile_group_patterns: ["user31", "user32"]
|
||||||
|
max_active_profiles: 1
|
||||||
|
- name: "server_dl006_pool"
|
||||||
|
profile_group_patterns: ["user61", "user62"]
|
||||||
|
max_active_profiles: 1
|
||||||
|
|
||||||
# --- Policies for the Authentication Simulation ---
|
# --- Policies for the Authentication Simulation ---
|
||||||
auth_policy_enforcer_config:
|
auth_policy_enforcer_config:
|
||||||
|
|
||||||
@ -62,9 +69,8 @@ auth_policy_enforcer_config:
|
|||||||
# to ensure a group finishes its entire workload before another group starts.
|
# to ensure a group finishes its entire workload before another group starts.
|
||||||
profile_selection_strategy: "longest_idle"
|
profile_selection_strategy: "longest_idle"
|
||||||
|
|
||||||
# Enforce a total limit of active profiles across all groups defined below.
|
# The 'global_max_active_profiles' setting is now superseded by the per-pool limits
|
||||||
# Set to 1 to ensure only one group's profile is active at any time.
|
# defined in the 'enforcement_pools' section.
|
||||||
global_max_active_profiles: 1
|
|
||||||
|
|
||||||
# The 'profile_groups' section is now inherited from 'profile_group_definitions' above.
|
# The 'profile_groups' section is now inherited from 'profile_group_definitions' above.
|
||||||
# The enforcer logic should be updated to read from there.
|
# The enforcer logic should be updated to read from there.
|
||||||
@ -106,6 +112,7 @@ cross_simulation_sync:
|
|||||||
|
|
||||||
# --- Policies for the Download Simulation ---
|
# --- Policies for the Download Simulation ---
|
||||||
download_policy_enforcer_config:
|
download_policy_enforcer_config:
|
||||||
|
|
||||||
# Ban if 1 failure occurs within a 1-minute window.
|
# Ban if 1 failure occurs within a 1-minute window.
|
||||||
ban_on_failures: 1
|
ban_on_failures: 1
|
||||||
ban_on_failures_window_minutes: 1
|
ban_on_failures_window_minutes: 1
|
||||||
|
|||||||
@ -68,6 +68,9 @@ class PolicyEnforcer:
|
|||||||
all_profiles_list = self.manager.list_profiles()
|
all_profiles_list = self.manager.list_profiles()
|
||||||
all_profiles_map = {p['name']: p for p in all_profiles_list}
|
all_profiles_map = {p['name']: p for p in all_profiles_list}
|
||||||
|
|
||||||
|
# Sync profile states from their assigned proxy's state (e.g., if proxy is BANNED, ban profile).
|
||||||
|
self.enforce_proxy_state_on_profiles(all_profiles_list, all_profiles_map)
|
||||||
|
|
||||||
# Apply profile group policies (rotation, max_active). This will modify the local `all_profiles_map`.
|
# Apply profile group policies (rotation, max_active). This will modify the local `all_profiles_map`.
|
||||||
self.enforce_profile_group_policies(getattr(args, 'profile_groups', []), all_profiles_map, args)
|
self.enforce_profile_group_policies(getattr(args, 'profile_groups', []), all_profiles_map, args)
|
||||||
|
|
||||||
@ -197,12 +200,26 @@ class PolicyEnforcer:
|
|||||||
live_active_counts[group_name] = count
|
live_active_counts[group_name] = count
|
||||||
logger.debug(f"Initial live active counts: {live_active_counts}")
|
logger.debug(f"Initial live active counts: {live_active_counts}")
|
||||||
|
|
||||||
# --- New Global Max Active Logic ---
|
# --- New Enforcement Pool and Global Max Active Logic ---
|
||||||
|
enforcement_pools = getattr(args, 'enforcement_pools', [])
|
||||||
|
live_pool_active_counts = {}
|
||||||
|
if enforcement_pools:
|
||||||
|
for i, pool in enumerate(enforcement_pools):
|
||||||
|
pool_name = pool.get('name', f'pool_{i}')
|
||||||
|
live_pool_active_counts[pool_name] = 0
|
||||||
|
|
||||||
|
for group_name, count in live_active_counts.items():
|
||||||
|
group_policy = next((g for g in profile_groups if g.get('name') == group_name), {})
|
||||||
|
pool_name = group_policy.get('pool_name')
|
||||||
|
if pool_name:
|
||||||
|
live_pool_active_counts[pool_name] = live_pool_active_counts.get(pool_name, 0) + count
|
||||||
|
logger.debug(f"Initial live pool active counts: {live_pool_active_counts}")
|
||||||
|
|
||||||
global_max_active = getattr(args, 'global_max_active_profiles', 0)
|
global_max_active = getattr(args, 'global_max_active_profiles', 0)
|
||||||
live_global_active_count = sum(live_active_counts.values())
|
live_global_active_count = sum(live_active_counts.values())
|
||||||
if global_max_active > 0:
|
if global_max_active > 0:
|
||||||
logger.debug(f"Enforcing global max active profiles limit of {global_max_active}. Current global active: {live_global_active_count}")
|
logger.debug(f"Enforcing global max active profiles limit of {global_max_active}. Current global active: {live_global_active_count}")
|
||||||
# --- End New Global Logic ---
|
# --- End New Logic ---
|
||||||
|
|
||||||
# --- End group logic setup ---
|
# --- End group logic setup ---
|
||||||
|
|
||||||
@ -278,7 +295,7 @@ class PolicyEnforcer:
|
|||||||
# --- End new logic ---
|
# --- End new logic ---
|
||||||
|
|
||||||
# --- New Sorting Logic based on Profile Selection Strategy ---
|
# --- New Sorting Logic based on Profile Selection Strategy ---
|
||||||
strategy = getattr(args, 'profile_selection_strategy', 'longest_idle')
|
strategy = getattr(args, 'profile_selection_strategy', None)
|
||||||
if strategy == 'least_loaded' and profile_groups:
|
if strategy == 'least_loaded' and profile_groups:
|
||||||
logger.debug("Applying 'least_loaded' profile selection strategy.")
|
logger.debug("Applying 'least_loaded' profile selection strategy.")
|
||||||
# Separate profiles that are ready from those that are not
|
# Separate profiles that are ready from those that are not
|
||||||
@ -345,10 +362,8 @@ class PolicyEnforcer:
|
|||||||
profiles_to_check = sorted_ready_profiles + not_ready_profiles
|
profiles_to_check = sorted_ready_profiles + not_ready_profiles
|
||||||
logger.debug(f"Activation candidates for 'least_loaded' strategy (first 10): {[p['name'] for p in profiles_to_check[:10]]}")
|
logger.debug(f"Activation candidates for 'least_loaded' strategy (first 10): {[p['name'] for p in profiles_to_check[:10]]}")
|
||||||
|
|
||||||
else: # Default 'longest_idle' sort
|
elif strategy == 'longest_idle':
|
||||||
if strategy not in ['longest_idle']:
|
logger.debug("Applying 'longest_idle' profile selection strategy.")
|
||||||
logger.warning(f"Unknown or unhandled profile_selection_strategy '{strategy}'. Defaulting to 'longest_idle'.")
|
|
||||||
|
|
||||||
# Separate profiles that are ready to be activated from those still resting.
|
# Separate profiles that are ready to be activated from those still resting.
|
||||||
# A profile waiting for downloads is NOT considered ready for activation.
|
# A profile waiting for downloads is NOT considered ready for activation.
|
||||||
ready_profiles = [
|
ready_profiles = [
|
||||||
@ -369,6 +384,11 @@ class PolicyEnforcer:
|
|||||||
# The final list to check will process all ready profiles first, then wait for the not-ready ones.
|
# The final list to check will process all ready profiles first, then wait for the not-ready ones.
|
||||||
profiles_to_check = ready_profiles + not_ready_profiles
|
profiles_to_check = ready_profiles + not_ready_profiles
|
||||||
logger.debug(f"Activation candidates for 'longest_idle' strategy (first 10): {[p['name'] for p in profiles_to_check[:10]]}")
|
logger.debug(f"Activation candidates for 'longest_idle' strategy (first 10): {[p['name'] for p in profiles_to_check[:10]]}")
|
||||||
|
else: # Default sort (no strategy)
|
||||||
|
if strategy: # Log a warning if an unknown strategy was provided
|
||||||
|
logger.warning(f"Unknown profile_selection_strategy '{strategy}'. Using default FIFO sort by rest time.")
|
||||||
|
# Default to a simple FIFO sort based on when their rest period ends.
|
||||||
|
profiles_to_check.sort(key=lambda p: (p.get('rest_until', 0), natural_sort_key(p.get('name', ''))))
|
||||||
# --- End New Sorting Logic ---
|
# --- End New Sorting Logic ---
|
||||||
|
|
||||||
# --- New logic: Identify groups with waiting profiles ---
|
# --- New logic: Identify groups with waiting profiles ---
|
||||||
@ -490,13 +510,30 @@ class PolicyEnforcer:
|
|||||||
profile_name = profile['name']
|
profile_name = profile['name']
|
||||||
group_name = profile_to_group_map.get(profile_name)
|
group_name = profile_to_group_map.get(profile_name)
|
||||||
|
|
||||||
# --- New Global Max Active Check ---
|
# --- New Pool and Global Max Active Check ---
|
||||||
# This check prevents NEW profiles (in RESTING state) from becoming active if the global limit is reached.
|
is_new_activation = profile['state'] == ProfileState.RESTING.value
|
||||||
# It allows COOLDOWN profiles to become active, as they are already part of the active count.
|
if is_new_activation:
|
||||||
if global_max_active > 0 and live_global_active_count >= global_max_active and profile['state'] == ProfileState.RESTING.value:
|
# Check pool limits first
|
||||||
|
if enforcement_pools and group_name:
|
||||||
|
group_policy = next((g for g in profile_groups if g.get('name') == group_name), {})
|
||||||
|
pool_name = group_policy.get('pool_name')
|
||||||
|
pool_config = None
|
||||||
|
for i, p in enumerate(enforcement_pools):
|
||||||
|
if p.get('name', f'pool_{i}') == pool_name:
|
||||||
|
pool_config = p
|
||||||
|
break
|
||||||
|
if pool_config:
|
||||||
|
pool_max_active = pool_config.get('max_active_profiles', 0)
|
||||||
|
current_pool_active = live_pool_active_counts.get(pool_name, 0)
|
||||||
|
if pool_max_active > 0 and current_pool_active >= pool_max_active:
|
||||||
|
logger.debug(f"Profile '{profile_name}' rest ended, but pool '{pool_name}' max active limit ({pool_max_active}) has been reached. Deferring activation.")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Then check global limit if it's still configured (for backward compatibility)
|
||||||
|
if global_max_active > 0 and live_global_active_count >= global_max_active:
|
||||||
logger.debug(f"Profile '{profile_name}' rest ended, but global max active limit ({global_max_active}) has been reached. Deferring activation.")
|
logger.debug(f"Profile '{profile_name}' rest ended, but global max active limit ({global_max_active}) has been reached. Deferring activation.")
|
||||||
continue
|
continue
|
||||||
# --- End New Global Check ---
|
# --- End New Check ---
|
||||||
|
|
||||||
# --- Group-aware unrest check ---
|
# --- Group-aware unrest check ---
|
||||||
if group_name:
|
if group_name:
|
||||||
@ -577,11 +614,13 @@ class PolicyEnforcer:
|
|||||||
continue # Skip activation for this profile
|
continue # Skip activation for this profile
|
||||||
# --- End group check ---
|
# --- End group check ---
|
||||||
|
|
||||||
# Before activating, ensure the profile's proxy is not resting.
|
# Before activating, ensure the profile's proxy is not resting or banned.
|
||||||
proxy_url = profile.get('proxy')
|
proxy_url = profile.get('proxy')
|
||||||
if proxy_url:
|
if proxy_url:
|
||||||
proxy_state_data = proxy_states.get(proxy_url, {})
|
proxy_state_data = proxy_states.get(proxy_url, {})
|
||||||
if proxy_state_data.get('state') == ProfileState.RESTING.value:
|
proxy_state = proxy_state_data.get('state')
|
||||||
|
|
||||||
|
if proxy_state == ProfileState.RESTING.value:
|
||||||
logger.debug(f"Profile '{profile['name']}' rest period ended, but its proxy '{proxy_url}' is still resting. Deferring activation.")
|
logger.debug(f"Profile '{profile['name']}' rest period ended, but its proxy '{proxy_url}' is still resting. Deferring activation.")
|
||||||
|
|
||||||
# Update reason for clarity in the UI when a profile is blocked by its proxy.
|
# Update reason for clarity in the UI when a profile is blocked by its proxy.
|
||||||
@ -596,10 +635,32 @@ class PolicyEnforcer:
|
|||||||
|
|
||||||
continue # Do not activate this profile yet.
|
continue # Do not activate this profile yet.
|
||||||
|
|
||||||
# Update group counter BEFORE making any changes, so subsequent checks in this cycle use the updated count
|
elif proxy_state == ProfileState.BANNED.value and profile['state'] != ProfileState.BANNED.value:
|
||||||
|
# This profile is about to be activated, but its proxy is banned. Ban it.
|
||||||
|
reason = f"Proxy '{proxy_url}' is BANNED"
|
||||||
|
logger.warning(f"Banning profile '{profile['name']}' because its proxy is banned: {reason}")
|
||||||
|
self.actions_taken_this_cycle += 1
|
||||||
|
if not self.dry_run:
|
||||||
|
sm = self.manager.get_state_machine(profile_name)
|
||||||
|
if sm:
|
||||||
|
sm.ban(reason=reason)
|
||||||
|
# Update local map
|
||||||
|
all_profiles_map[profile_name]['state'] = ProfileState.BANNED.value
|
||||||
|
all_profiles_map[profile_name]['reason'] = reason
|
||||||
|
continue # Skip activation; it is now banned.
|
||||||
|
|
||||||
|
# Update group and pool counters BEFORE making any changes, so subsequent checks in this cycle use the updated count
|
||||||
if group_name and profile['state'] == ProfileState.RESTING.value:
|
if group_name and profile['state'] == ProfileState.RESTING.value:
|
||||||
# For RESTING profiles, they're becoming active, so increment the count
|
# For RESTING profiles, they're becoming active, so increment the count
|
||||||
live_active_counts[group_name] = live_active_counts.get(group_name, 0) + 1
|
live_active_counts[group_name] = live_active_counts.get(group_name, 0) + 1
|
||||||
|
|
||||||
|
# Also increment the pool counter
|
||||||
|
if enforcement_pools:
|
||||||
|
group_policy = next((g for g in profile_groups if g.get('name') == group_name), {})
|
||||||
|
pool_name = group_policy.get('pool_name')
|
||||||
|
if pool_name:
|
||||||
|
live_pool_active_counts[pool_name] = live_pool_active_counts.get(pool_name, 0) + 1
|
||||||
|
|
||||||
# Also increment the global counter
|
# Also increment the global counter
|
||||||
if global_max_active > 0:
|
if global_max_active > 0:
|
||||||
live_global_active_count += 1
|
live_global_active_count += 1
|
||||||
@ -903,20 +964,68 @@ class PolicyEnforcer:
|
|||||||
if num_active_or_locked == 0:
|
if num_active_or_locked == 0:
|
||||||
logger.debug(f"Group '{group_name}' has no active profiles. `enforce_unrest_policy` will attempt to activate one.")
|
logger.debug(f"Group '{group_name}' has no active profiles. `enforce_unrest_policy` will attempt to activate one.")
|
||||||
|
|
||||||
# --- 4. Global Self-Healing: Enforce global_max_active_profiles ---
|
# --- 4. Pool and Global Self-Healing ---
|
||||||
# This runs after all per-group healing and ensures the global limit is respected.
|
enforcement_pools = getattr(args, 'enforcement_pools', [])
|
||||||
|
if enforcement_pools:
|
||||||
|
for i, pool in enumerate(enforcement_pools):
|
||||||
|
pool_name = pool.get('name', f'pool_{i}')
|
||||||
|
pool_max_active = pool.get('max_active_profiles', 0)
|
||||||
|
if not pool_max_active or pool_max_active <= 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Get all profile names belonging to this pool
|
||||||
|
pool_profile_names = set()
|
||||||
|
for group in profile_groups:
|
||||||
|
if group.get('pool_name') == pool_name:
|
||||||
|
prefix = group.get('prefix')
|
||||||
|
if prefix:
|
||||||
|
for p_name in all_profiles_map:
|
||||||
|
if p_name.startswith(prefix):
|
||||||
|
pool_profile_names.add(p_name)
|
||||||
|
|
||||||
|
# Get current active count for this pool from our local map
|
||||||
|
current_pool_active = [
|
||||||
|
p for name, p in all_profiles_map.items()
|
||||||
|
if name in pool_profile_names and p['state'] in [ProfileState.ACTIVE.value, ProfileState.LOCKED.value, ProfileState.COOLDOWN.value]
|
||||||
|
]
|
||||||
|
|
||||||
|
num_pool_active = len(current_pool_active)
|
||||||
|
if num_pool_active > pool_max_active:
|
||||||
|
logger.warning(f"Pool Healing ('{pool_name}'): Found {num_pool_active} active profiles, but pool max is {pool_max_active}. Resting excess.")
|
||||||
|
|
||||||
|
profiles_that_can_be_rested = [p for p in current_pool_active if p['state'] == ProfileState.ACTIVE.value]
|
||||||
|
profiles_that_can_be_rested.sort(key=lambda p: natural_sort_key(p.get('name', '')), reverse=True)
|
||||||
|
profiles_that_can_be_rested.sort(key=lambda p: (
|
||||||
|
p.get('success_count', 0) + p.get('failure_count', 0) +
|
||||||
|
p.get('tolerated_error_count', 0) + p.get('download_count', 0) + p.get('download_error_count', 0)
|
||||||
|
), reverse=True)
|
||||||
|
|
||||||
|
num_to_rest = num_pool_active - pool_max_active
|
||||||
|
for profile in profiles_that_can_be_rested[:num_to_rest]:
|
||||||
|
logger.warning(f"Pool Healing ('{pool_name}'): Resting profile '{profile['name']}'.")
|
||||||
|
self.actions_taken_this_cycle += 1
|
||||||
|
if not self.dry_run:
|
||||||
|
sm = self.manager.get_state_machine(profile['name'])
|
||||||
|
if sm:
|
||||||
|
sm.rest(reason=f"Pool '{pool_name}' max_active healing", duration_minutes=0.02)
|
||||||
|
|
||||||
|
all_profiles_map[profile['name']]['state'] = ProfileState.RESTING.value
|
||||||
|
all_profiles_map[profile['name']]['rest_reason'] = f"Pool '{pool_name}' max_active healing"
|
||||||
|
all_profiles_map[profile['name']]['rest_until'] = time.time() + (0.02 * 60)
|
||||||
|
|
||||||
|
# For backward compatibility, also enforce global_max_active_profiles if it is set.
|
||||||
global_max_active = getattr(args, 'global_max_active_profiles', 0)
|
global_max_active = getattr(args, 'global_max_active_profiles', 0)
|
||||||
if global_max_active > 0:
|
if global_max_active > 0:
|
||||||
# Get all profiles managed by any group
|
# Get all profiles managed by any group
|
||||||
all_grouped_profiles = set()
|
all_grouped_profiles = set()
|
||||||
for group in profile_groups:
|
for group in profile_groups:
|
||||||
profiles_in_group = set()
|
if 'prefix' in group:
|
||||||
if 'profiles' in group:
|
|
||||||
profiles_in_group = set(group['profiles'])
|
|
||||||
elif 'prefix' in group:
|
|
||||||
prefix = group['prefix']
|
prefix = group['prefix']
|
||||||
profiles_in_group = {p['name'] for p in all_profiles_list if p['name'].startswith(prefix)}
|
for p_name in all_profiles_map:
|
||||||
all_grouped_profiles.update(profiles_in_group)
|
if p_name.startswith(prefix):
|
||||||
|
all_grouped_profiles.add(p_name)
|
||||||
|
elif 'profiles' in group:
|
||||||
|
all_grouped_profiles.update(group['profiles'])
|
||||||
|
|
||||||
# Get current active count across all groups from our local map
|
# Get current active count across all groups from our local map
|
||||||
current_global_active = [
|
current_global_active = [
|
||||||
@ -928,29 +1037,23 @@ class PolicyEnforcer:
|
|||||||
if num_global_active > global_max_active:
|
if num_global_active > global_max_active:
|
||||||
logger.warning(f"Global Healing: Found {num_global_active} active profiles across all groups, but global max is {global_max_active}. Resting excess.")
|
logger.warning(f"Global Healing: Found {num_global_active} active profiles across all groups, but global max is {global_max_active}. Resting excess.")
|
||||||
|
|
||||||
# We can only rest profiles that are in the ACTIVE state, not LOCKED.
|
|
||||||
profiles_that_can_be_rested = [p for p in current_global_active if p['state'] == ProfileState.ACTIVE.value]
|
profiles_that_can_be_rested = [p for p in current_global_active if p['state'] == ProfileState.ACTIVE.value]
|
||||||
|
profiles_that_can_be_rested.sort(key=lambda p: natural_sort_key(p.get('name', '')), reverse=True)
|
||||||
# Sort to determine which profiles to rest, using the same logic as per-group healing.
|
|
||||||
profiles_that_can_be_rested.sort(key=lambda p: natural_sort_key(p.get('name', '')), reverse=True) # Higher name first
|
|
||||||
profiles_that_can_be_rested.sort(key=lambda p: (
|
profiles_that_can_be_rested.sort(key=lambda p: (
|
||||||
p.get('success_count', 0) + p.get('failure_count', 0) +
|
p.get('success_count', 0) + p.get('failure_count', 0) +
|
||||||
p.get('tolerated_error_count', 0) +
|
p.get('tolerated_error_count', 0) +
|
||||||
p.get('download_count', 0) + p.get('download_error_count', 0)
|
p.get('download_count', 0) + p.get('download_error_count', 0)
|
||||||
), reverse=True) # Most requests first
|
), reverse=True)
|
||||||
|
|
||||||
num_to_rest = num_global_active - global_max_active
|
num_to_rest = num_global_active - global_max_active
|
||||||
profiles_to_rest = profiles_that_can_be_rested[:num_to_rest]
|
for profile in profiles_that_can_be_rested[:num_to_rest]:
|
||||||
for profile in profiles_to_rest:
|
|
||||||
logger.warning(f"Global Healing: Resting profile '{profile['name']}'.")
|
logger.warning(f"Global Healing: Resting profile '{profile['name']}'.")
|
||||||
self.actions_taken_this_cycle += 1
|
self.actions_taken_this_cycle += 1
|
||||||
if not self.dry_run:
|
if not self.dry_run:
|
||||||
sm = self.manager.get_state_machine(profile['name'])
|
sm = self.manager.get_state_machine(profile['name'])
|
||||||
if sm:
|
if sm:
|
||||||
# Rest for a minimal duration to prevent immediate re-activation in the same cycle.
|
sm.rest(reason="Global max_active healing", duration_minutes=0.02)
|
||||||
sm.rest(reason="Global max_active healing", duration_minutes=0.02) # ~1.2 seconds
|
|
||||||
|
|
||||||
# Update local map to reflect the change for this cycle
|
|
||||||
all_profiles_map[profile['name']]['state'] = ProfileState.RESTING.value
|
all_profiles_map[profile['name']]['state'] = ProfileState.RESTING.value
|
||||||
all_profiles_map[profile['name']]['rest_reason'] = "Global max_active healing"
|
all_profiles_map[profile['name']]['rest_reason'] = "Global max_active healing"
|
||||||
all_profiles_map[profile['name']]['rest_until'] = time.time() + (0.02 * 60)
|
all_profiles_map[profile['name']]['rest_until'] = time.time() + (0.02 * 60)
|
||||||
@ -1062,6 +1165,10 @@ class PolicyEnforcer:
|
|||||||
for proxy_url, state_data in proxy_states.items():
|
for proxy_url, state_data in proxy_states.items():
|
||||||
state = state_data.get('state', ProfileState.ACTIVE.value)
|
state = state_data.get('state', ProfileState.ACTIVE.value)
|
||||||
|
|
||||||
|
if state == ProfileState.BANNED.value:
|
||||||
|
logger.debug(f"Proxy '{proxy_url}' is BANNED. Skipping work/rest cycle enforcement.")
|
||||||
|
continue
|
||||||
|
|
||||||
# Un-rest logic
|
# Un-rest logic
|
||||||
if state == ProfileState.RESTING.value:
|
if state == ProfileState.RESTING.value:
|
||||||
rest_until = state_data.get('rest_until', 0)
|
rest_until = state_data.get('rest_until', 0)
|
||||||
@ -1257,6 +1364,60 @@ class PolicyEnforcer:
|
|||||||
return True # Indicates action was taken
|
return True # Indicates action was taken
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def enforce_proxy_state_on_profiles(self, all_profiles_list, all_profiles_map):
|
||||||
|
"""
|
||||||
|
Enforces the state of a proxy onto all profiles that use it.
|
||||||
|
- If a proxy is BANNED, any non-banned profile using it will be banned.
|
||||||
|
- If a proxy is RESTING, any ACTIVE profile using it will be rested.
|
||||||
|
This is a safeguard that runs after all proxy state changes and before profile
|
||||||
|
state logic.
|
||||||
|
"""
|
||||||
|
unique_proxies = sorted(list(set(p['proxy'] for p in all_profiles_list if p.get('proxy'))))
|
||||||
|
if not unique_proxies:
|
||||||
|
return
|
||||||
|
|
||||||
|
proxy_states = self.manager.get_proxy_states(unique_proxies)
|
||||||
|
|
||||||
|
for profile in all_profiles_list:
|
||||||
|
proxy_url = profile.get('proxy')
|
||||||
|
if not proxy_url:
|
||||||
|
continue
|
||||||
|
|
||||||
|
proxy_state_data = proxy_states.get(proxy_url)
|
||||||
|
if not proxy_state_data:
|
||||||
|
continue
|
||||||
|
|
||||||
|
proxy_state = proxy_state_data.get('state')
|
||||||
|
profile_name = profile['name']
|
||||||
|
|
||||||
|
if proxy_state == ProfileState.BANNED.value and profile['state'] != ProfileState.BANNED.value:
|
||||||
|
reason = f"Proxy '{proxy_url}' is BANNED"
|
||||||
|
logger.warning(f"Banning profile '{profile_name}' because its proxy is banned: {reason}")
|
||||||
|
self.actions_taken_this_cycle += 1
|
||||||
|
if not self.dry_run:
|
||||||
|
sm = self.manager.get_state_machine(profile_name)
|
||||||
|
if sm:
|
||||||
|
sm.ban(reason=reason)
|
||||||
|
# Update local map for consistency in this cycle
|
||||||
|
all_profiles_map[profile_name]['state'] = ProfileState.BANNED.value
|
||||||
|
all_profiles_map[profile_name]['reason'] = reason
|
||||||
|
|
||||||
|
elif proxy_state == ProfileState.RESTING.value and profile['state'] == ProfileState.ACTIVE.value:
|
||||||
|
logger.info(f"Resting profile '{profile_name}' because its proxy '{proxy_url}' is resting.")
|
||||||
|
self.actions_taken_this_cycle += 1
|
||||||
|
if not self.dry_run:
|
||||||
|
# Rest it for as long as the proxy is resting.
|
||||||
|
proxy_rest_until = proxy_state_data.get('rest_until', 0)
|
||||||
|
duration_minutes = max(0, (proxy_rest_until - time.time()) / 60)
|
||||||
|
sm = self.manager.get_state_machine(profile_name)
|
||||||
|
if sm:
|
||||||
|
sm.rest(reason=self.PROXY_REST_REASON, duration_minutes=duration_minutes)
|
||||||
|
# Update local map for consistency in this cycle
|
||||||
|
proxy_rest_until = proxy_state_data.get('rest_until', 0)
|
||||||
|
all_profiles_map[profile_name]['state'] = ProfileState.RESTING.value
|
||||||
|
all_profiles_map[profile_name]['rest_reason'] = self.PROXY_REST_REASON
|
||||||
|
all_profiles_map[profile_name]['rest_until'] = proxy_rest_until
|
||||||
|
|
||||||
def add_policy_enforcer_parser(subparsers):
|
def add_policy_enforcer_parser(subparsers):
|
||||||
"""Adds the parser for the 'policy-enforcer' command."""
|
"""Adds the parser for the 'policy-enforcer' command."""
|
||||||
parser = subparsers.add_parser(
|
parser = subparsers.add_parser(
|
||||||
@ -1421,6 +1582,10 @@ def sync_cross_simulation(auth_manager, download_manager, sync_config, dry_run=F
|
|||||||
|
|
||||||
logger.debug("Syncing active profiles from Auth to Download simulation...")
|
logger.debug("Syncing active profiles from Auth to Download simulation...")
|
||||||
|
|
||||||
|
# Get all download proxy states once for efficiency
|
||||||
|
all_dl_proxies = sorted(list(set(p['proxy'] for p in all_download_profiles.values() if p.get('proxy'))))
|
||||||
|
all_dl_proxy_states = download_manager.get_proxy_states(all_dl_proxies)
|
||||||
|
|
||||||
# Get profiles that should be active in the download simulation
|
# Get profiles that should be active in the download simulation
|
||||||
target_active_download_profiles = set()
|
target_active_download_profiles = set()
|
||||||
|
|
||||||
@ -1465,6 +1630,13 @@ def sync_cross_simulation(auth_manager, download_manager, sync_config, dry_run=F
|
|||||||
logger.warning(f"Auth profile '{target_profile_name}' needs an active download profile, but no corresponding download profile found.")
|
logger.warning(f"Auth profile '{target_profile_name}' needs an active download profile, but no corresponding download profile found.")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Check proxy state before activating
|
||||||
|
proxy_url = download_profile.get('proxy')
|
||||||
|
proxy_state = all_dl_proxy_states.get(proxy_url, {}).get('state')
|
||||||
|
if proxy_state in [ProfileState.BANNED.value, ProfileState.RESTING.value]:
|
||||||
|
logger.debug(f"Sync: Deferring activation of download profile '{target_profile_name}' because its proxy '{proxy_url}' is {proxy_state}.")
|
||||||
|
continue
|
||||||
|
|
||||||
if download_profile['state'] not in [ProfileState.ACTIVE.value, ProfileState.LOCKED.value]:
|
if download_profile['state'] not in [ProfileState.ACTIVE.value, ProfileState.LOCKED.value]:
|
||||||
is_from_cooldown = download_profile['state'] == ProfileState.COOLDOWN.value
|
is_from_cooldown = download_profile['state'] == ProfileState.COOLDOWN.value
|
||||||
log_msg_suffix = " (from COOLDOWN)" if is_from_cooldown else ""
|
log_msg_suffix = " (from COOLDOWN)" if is_from_cooldown else ""
|
||||||
@ -1476,24 +1648,12 @@ def sync_cross_simulation(auth_manager, download_manager, sync_config, dry_run=F
|
|||||||
sm.activate(profile=download_profile)
|
sm.activate(profile=download_profile)
|
||||||
|
|
||||||
# --- Group-Aware Deactivation ---
|
# --- Group-Aware Deactivation ---
|
||||||
# Identify the target download groups based on target_active_download_profiles.
|
# Deactivate any download profiles that are active but are not the target profile for their group.
|
||||||
# CRITICAL FIX: Directly map individual profiles to their groups instead of relying on name patterns.
|
# This ensures that if auth wants 'user1_1' to be active, the currently active 'user1_0' is rested first.
|
||||||
target_download_groups = set()
|
|
||||||
|
|
||||||
for target_profile_name in target_active_download_profiles:
|
|
||||||
group_info = dl_profile_to_group.get(target_profile_name)
|
|
||||||
if group_info:
|
|
||||||
target_download_groups.add(group_info['name'])
|
|
||||||
|
|
||||||
logger.debug(f"Target download groups for this sync cycle: {target_download_groups}")
|
|
||||||
|
|
||||||
# Deactivate any download profiles that are active but are not in a target group
|
|
||||||
for dl_profile_name, dl_profile in all_download_profiles.items():
|
for dl_profile_name, dl_profile in all_download_profiles.items():
|
||||||
if dl_profile['state'] == ProfileState.ACTIVE.value:
|
if dl_profile['state'] == ProfileState.ACTIVE.value:
|
||||||
group_info = dl_profile_to_group.get(dl_profile_name)
|
if dl_profile_name not in target_active_download_profiles:
|
||||||
# If the profile is in a group, and that group is NOT a target group, rest it.
|
logger.info(f"Syncing active state: Resting download profile '{dl_profile_name}' as it is no longer the target active profile for its group.")
|
||||||
if group_info and group_info['name'] not in target_download_groups:
|
|
||||||
logger.info(f"Syncing active state: Resting download profile '{dl_profile_name}' as its group '{group_info['name']}' is no longer active.")
|
|
||||||
if not dry_run:
|
if not dry_run:
|
||||||
sm = download_manager.get_state_machine(dl_profile_name)
|
sm = download_manager.get_state_machine(dl_profile_name)
|
||||||
if sm:
|
if sm:
|
||||||
@ -1539,9 +1699,9 @@ def main_policy_enforcer(args):
|
|||||||
'unlock_stale_locks_after_seconds': 120,
|
'unlock_stale_locks_after_seconds': 120,
|
||||||
'unlock_cooldown_seconds': 0,
|
'unlock_cooldown_seconds': 0,
|
||||||
'max_global_proxy_active_minutes': 0, 'rest_duration_on_max_active': 10,
|
'max_global_proxy_active_minutes': 0, 'rest_duration_on_max_active': 10,
|
||||||
'profile_selection_strategy': 'longest_idle',
|
'profile_selection_strategy': None,
|
||||||
'global_max_active_profiles': 0,
|
'global_max_active_profiles': 0,
|
||||||
'interval_seconds': 60, 'proxy_groups': [], 'profile_groups': []
|
'interval_seconds': 60, 'proxy_groups': [], 'profile_groups': [], 'enforcement_pools': []
|
||||||
}
|
}
|
||||||
|
|
||||||
sim_params = policy.get('simulation_parameters', {})
|
sim_params = policy.get('simulation_parameters', {})
|
||||||
@ -1586,10 +1746,90 @@ def main_policy_enforcer(args):
|
|||||||
|
|
||||||
logger.info(f"Setting up enforcer for {sim_type} simulation...")
|
logger.info(f"Setting up enforcer for {sim_type} simulation...")
|
||||||
|
|
||||||
# --- Dynamic Profile Group Discovery ---
|
# --- Hybrid Profile Group Discovery (Static + Dynamic) ---
|
||||||
profile_group_templates = policy.get('profile_group_templates')
|
common_group_settings = policy.get('common_group_settings', {})
|
||||||
# Check if templates exist and if the config block doesn't already have groups (CLI overrides take precedence)
|
enforcement_pools = policy.get('enforcement_pools')
|
||||||
if profile_group_templates and 'profile_groups' not in policy_config:
|
profile_group_templates = policy.get('profile_group_templates') # For backward compatibility
|
||||||
|
|
||||||
|
# Start with any statically defined groups from the policy.
|
||||||
|
final_profile_groups = policy_config.get('profile_groups', [])
|
||||||
|
|
||||||
|
# If enforcement_pools are defined, discover dynamic groups and merge them.
|
||||||
|
if enforcement_pools:
|
||||||
|
logger.info(f"Found 'enforcement_pools'. Discovering dynamic profile groups to merge with static ones for {sim_type}...")
|
||||||
|
|
||||||
|
# Determine key_prefix to connect to the right Redis env
|
||||||
|
policy_env = sim_params.get(env_policy_key)
|
||||||
|
default_policy_env = sim_params.get('env')
|
||||||
|
effective_env = env_cli_arg or args.env or policy_env or default_policy_env or 'dev'
|
||||||
|
if args.key_prefix: temp_key_prefix = args.key_prefix
|
||||||
|
elif args.legacy: temp_key_prefix = 'profile_mgmt_'
|
||||||
|
else: temp_key_prefix = f"{effective_env}_profile_mgmt_"
|
||||||
|
|
||||||
|
try:
|
||||||
|
temp_manager = ProfileManager(redis_host, redis_port, redis_password, temp_key_prefix, redis_db)
|
||||||
|
all_profiles = temp_manager.list_profiles()
|
||||||
|
found_prefixes = set(p['name'].rsplit('_', 1)[0] for p in all_profiles)
|
||||||
|
|
||||||
|
if not found_prefixes:
|
||||||
|
logger.warning(f"Dynamic discovery found no profile prefixes for env '{effective_env}'.")
|
||||||
|
else:
|
||||||
|
logger.info(f"Discovered {len(found_prefixes)} unique profile prefixes: {sorted(list(found_prefixes))}")
|
||||||
|
|
||||||
|
dynamically_generated_groups = []
|
||||||
|
# Match discovered prefixes against patterns in each enforcement pool
|
||||||
|
for i, pool in enumerate(enforcement_pools):
|
||||||
|
pool_name = pool.get('name', f'pool_{i}')
|
||||||
|
pool_patterns = pool.get('profile_group_patterns', [])
|
||||||
|
# If a pool has no patterns, it's just for defining concurrency for static groups. Skip discovery.
|
||||||
|
if not pool_patterns:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Merge common settings with any pool-specific overrides
|
||||||
|
group_settings_template = deepcopy(common_group_settings)
|
||||||
|
pool_specific_settings = pool.get('group_settings', {})
|
||||||
|
|
||||||
|
# A simple way to deep merge the two levels (auth/download)
|
||||||
|
auth_settings = group_settings_template.get('auth', {})
|
||||||
|
auth_settings.update(pool_specific_settings.get('auth', {}))
|
||||||
|
group_settings_template['auth'] = auth_settings
|
||||||
|
|
||||||
|
download_settings = group_settings_template.get('download', {})
|
||||||
|
download_settings.update(pool_specific_settings.get('download', {}))
|
||||||
|
group_settings_template['download'] = download_settings
|
||||||
|
|
||||||
|
for prefix in sorted(list(found_prefixes)):
|
||||||
|
for pattern in pool_patterns:
|
||||||
|
if fnmatch.fnmatch(prefix, pattern):
|
||||||
|
sim_settings = group_settings_template.get(sim_type.lower())
|
||||||
|
if not sim_settings:
|
||||||
|
logger.debug(f"Pool '{pool_name}' has no settings for '{sim_type}'. Skipping for prefix '{prefix}'.")
|
||||||
|
continue
|
||||||
|
|
||||||
|
new_group = deepcopy(sim_settings)
|
||||||
|
new_group['prefix'] = prefix
|
||||||
|
new_group['name'] = prefix
|
||||||
|
new_group['pool_name'] = pool_name
|
||||||
|
|
||||||
|
dynamically_generated_groups.append(new_group)
|
||||||
|
logger.debug(f"Assigned prefix '{prefix}' to pool '{pool_name}' for {sim_type} simulation.")
|
||||||
|
break
|
||||||
|
|
||||||
|
if dynamically_generated_groups:
|
||||||
|
logger.info(f"Merging {len(final_profile_groups)} static group(s) with {len(dynamically_generated_groups)} discovered dynamic group(s).")
|
||||||
|
final_profile_groups.extend(dynamically_generated_groups)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed during dynamic profile group discovery: {e}", exc_info=args.verbose)
|
||||||
|
|
||||||
|
# Update the policy_config with the final merged list and the pool definitions
|
||||||
|
policy_config['profile_groups'] = final_profile_groups
|
||||||
|
# CRITICAL: Deepcopy enforcement_pools to prevent modification in one simulation
|
||||||
|
# from affecting the other, since the policy object is shared.
|
||||||
|
policy_config['enforcement_pools'] = deepcopy(enforcement_pools)
|
||||||
|
|
||||||
|
# For backward compatibility with the old template format
|
||||||
|
elif profile_group_templates and 'profile_groups' not in policy_config:
|
||||||
logger.info(f"Found 'profile_group_templates'. Discovering profile groups dynamically for {sim_type}...")
|
logger.info(f"Found 'profile_group_templates'. Discovering profile groups dynamically for {sim_type}...")
|
||||||
|
|
||||||
# Determine key_prefix to connect to the right Redis env (logic duplicated from below)
|
# Determine key_prefix to connect to the right Redis env (logic duplicated from below)
|
||||||
@ -1640,6 +1880,21 @@ def main_policy_enforcer(args):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed during dynamic profile group discovery: {e}", exc_info=args.verbose)
|
logger.error(f"Failed during dynamic profile group discovery: {e}", exc_info=args.verbose)
|
||||||
|
|
||||||
|
# In the download simulation, the active profiles are dictated entirely by the
|
||||||
|
# cross-simulation sync logic. We must disable the download enforcer's own
|
||||||
|
# concurrency limits (max_active_profiles) to prevent it from "healing"
|
||||||
|
# profiles that the sync logic has correctly activated.
|
||||||
|
if sim_type == 'Download':
|
||||||
|
logger.info("Disabling max_active_profiles limits for Download simulation. Active profiles will be managed by cross-sim sync.")
|
||||||
|
if 'profile_groups' in policy_config:
|
||||||
|
for group in policy_config['profile_groups']:
|
||||||
|
group['max_active_profiles'] = 0
|
||||||
|
if 'enforcement_pools' in policy_config:
|
||||||
|
for pool in policy_config['enforcement_pools']:
|
||||||
|
pool['max_active_profiles'] = 0
|
||||||
|
# Also disable the global limit for the download simulation.
|
||||||
|
policy_config['global_max_active_profiles'] = 0
|
||||||
|
|
||||||
config = Config(args, policy_config, code_defaults)
|
config = Config(args, policy_config, code_defaults)
|
||||||
|
|
||||||
# Determine the effective environment name with correct precedence:
|
# Determine the effective environment name with correct precedence:
|
||||||
|
|||||||
@ -18,8 +18,10 @@ import time
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Dict, List, Optional, Any
|
from typing import Dict, List, Optional, Any
|
||||||
import collections
|
import collections
|
||||||
|
import fnmatch
|
||||||
|
|
||||||
import redis
|
import redis
|
||||||
|
import yaml
|
||||||
|
|
||||||
from .profile_statemachine import ProfileState, ProfileStateMachine
|
from .profile_statemachine import ProfileState, ProfileStateMachine
|
||||||
|
|
||||||
@ -199,8 +201,8 @@ class ProfileManager:
|
|||||||
|
|
||||||
# When decrementing, ensure the counter exists to avoid creating negative counters from stray calls.
|
# When decrementing, ensure the counter exists to avoid creating negative counters from stray calls.
|
||||||
if count < 0 and not self.redis.exists(key):
|
if count < 0 and not self.redis.exists(key):
|
||||||
logger.warning(f"Attempted to decrement pending downloads for '{profile_name}' by {abs(count)}, but no counter exists. No action taken.")
|
logger.warning(f"Attempted to decrement pending downloads for '{profile_name}' by {abs(count)}, but no counter exists. This can happen in a race condition. Assuming task is complete and counter is zero.")
|
||||||
return None
|
return 0
|
||||||
|
|
||||||
new_value = self.redis.incrby(key, count)
|
new_value = self.redis.incrby(key, count)
|
||||||
|
|
||||||
@ -225,8 +227,8 @@ class ProfileManager:
|
|||||||
|
|
||||||
# Only decrement if the key exists. This prevents stray calls from creating negative counters.
|
# Only decrement if the key exists. This prevents stray calls from creating negative counters.
|
||||||
if not self.redis.exists(key):
|
if not self.redis.exists(key):
|
||||||
logger.warning(f"Attempted to decrement pending downloads for '{profile_name}', but no counter exists. No action taken.")
|
logger.warning(f"Attempted to decrement pending downloads for '{profile_name}', but no counter exists. This can happen in a race condition. Assuming task is complete and counter is zero.")
|
||||||
return None
|
return 0
|
||||||
|
|
||||||
new_value = self.redis.decr(key)
|
new_value = self.redis.decr(key)
|
||||||
|
|
||||||
@ -444,7 +446,12 @@ class ProfileManager:
|
|||||||
logger.error(f"Invalid state: {new_state}")
|
logger.error(f"Invalid state: {new_state}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
sm = self.get_state_machine(name)
|
profile = self.get_profile(name)
|
||||||
|
if not profile:
|
||||||
|
# get_profile already logs an error if the profile is not found.
|
||||||
|
return False
|
||||||
|
|
||||||
|
sm = self.get_state_machine(name, profile=profile)
|
||||||
if not sm:
|
if not sm:
|
||||||
return False # get_state_machine logs the error
|
return False # get_state_machine logs the error
|
||||||
|
|
||||||
@ -453,14 +460,16 @@ class ProfileManager:
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
# Pass the profile object to the transition methods for context,
|
||||||
|
# which is consistent with the policy enforcer's usage.
|
||||||
if new_state == ProfileState.ACTIVE.value:
|
if new_state == ProfileState.ACTIVE.value:
|
||||||
sm.activate()
|
sm.activate(profile=profile)
|
||||||
elif new_state == ProfileState.BANNED.value:
|
elif new_state == ProfileState.BANNED.value:
|
||||||
sm.ban(reason=reason)
|
sm.ban(reason=reason, profile=profile)
|
||||||
elif new_state == ProfileState.RESTING.value:
|
elif new_state == ProfileState.RESTING.value:
|
||||||
sm.rest(reason=reason)
|
sm.rest(reason=reason, profile=profile)
|
||||||
elif new_state == ProfileState.PAUSED.value:
|
elif new_state == ProfileState.PAUSED.value:
|
||||||
sm.pause(reason=reason)
|
sm.pause(reason=reason, profile=profile)
|
||||||
# LOCKED and COOLDOWN are not handled here as they are special transitions
|
# LOCKED and COOLDOWN are not handled here as they are special transitions
|
||||||
# from lock_profile and unlock_profile, and should not be set directly.
|
# from lock_profile and unlock_profile, and should not be set directly.
|
||||||
elif new_state in [ProfileState.LOCKED.value, ProfileState.COOLDOWN.value]:
|
elif new_state in [ProfileState.LOCKED.value, ProfileState.COOLDOWN.value]:
|
||||||
@ -720,30 +729,51 @@ class ProfileManager:
|
|||||||
logger.info(f"Deleted {deleted_count} global counter key(s).")
|
logger.info(f"Deleted {deleted_count} global counter key(s).")
|
||||||
return deleted_count
|
return deleted_count
|
||||||
|
|
||||||
def set_proxy_state(self, proxy_url: str, state: str, rest_duration_minutes: Optional[int] = None) -> bool:
|
def set_proxy_state(self, proxy_url: str, state: str, rest_duration_minutes: Optional[int] = None, reason: Optional[str] = None) -> bool:
|
||||||
"""Set the state of a proxy and propagates it to associated profiles."""
|
"""Set the state of a proxy and propagates it to associated profiles."""
|
||||||
if state not in [ProfileState.ACTIVE.value, ProfileState.RESTING.value]:
|
if state not in [ProfileState.ACTIVE.value, ProfileState.RESTING.value, ProfileState.BANNED.value]:
|
||||||
logger.error(f"Invalid proxy state: {state}. Only ACTIVE and RESTING are supported for proxies.")
|
logger.error(f"Invalid proxy state: {state}. Only ACTIVE, RESTING, and BANNED are supported for proxies.")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
proxy_key = self._proxy_state_key(proxy_url)
|
proxy_key = self._proxy_state_key(proxy_url)
|
||||||
now = time.time()
|
now = time.time()
|
||||||
updates = {'state': state}
|
updates = {'state': state}
|
||||||
|
if reason:
|
||||||
|
updates['reason'] = reason
|
||||||
|
else:
|
||||||
|
# Clear reason if not provided
|
||||||
|
updates['reason'] = ''
|
||||||
|
|
||||||
rest_until = 0
|
rest_until = 0
|
||||||
if state == ProfileState.RESTING.value:
|
if state == ProfileState.RESTING.value:
|
||||||
if not rest_duration_minutes or rest_duration_minutes <= 0:
|
if rest_duration_minutes is None:
|
||||||
logger.error("rest_duration_minutes is required when setting proxy state to RESTING.")
|
logger.error("rest_duration_minutes is required when setting proxy state to RESTING.")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
if rest_duration_minutes == -1:
|
||||||
|
# Use a very large number for "indefinite" to avoid special cases later.
|
||||||
|
# 10 years should be sufficient.
|
||||||
|
rest_until = now + (10 * 365 * 24 * 60 * 60)
|
||||||
|
elif rest_duration_minutes > 0:
|
||||||
rest_until = now + rest_duration_minutes * 60
|
rest_until = now + rest_duration_minutes * 60
|
||||||
|
else:
|
||||||
|
logger.error("rest_duration_minutes must be positive, or -1 for indefinite.")
|
||||||
|
return False
|
||||||
|
|
||||||
updates['rest_until'] = str(rest_until)
|
updates['rest_until'] = str(rest_until)
|
||||||
updates['work_start_timestamp'] = '0' # Clear work start time
|
updates['work_start_timestamp'] = '0' # Clear work start time
|
||||||
else: # ACTIVE
|
elif state in [ProfileState.ACTIVE.value, ProfileState.BANNED.value]:
|
||||||
updates['rest_until'] = '0'
|
updates['rest_until'] = '0'
|
||||||
|
if state == ProfileState.ACTIVE.value:
|
||||||
updates['work_start_timestamp'] = str(now)
|
updates['work_start_timestamp'] = str(now)
|
||||||
|
else: # BANNED
|
||||||
|
updates['work_start_timestamp'] = '0'
|
||||||
|
|
||||||
self.redis.hset(proxy_key, mapping=updates)
|
self.redis.hset(proxy_key, mapping=updates)
|
||||||
logger.info(f"Set proxy '{proxy_url}' state to {state}.")
|
log_msg = f"Set proxy '{proxy_url}' state to {state}."
|
||||||
|
if reason:
|
||||||
|
log_msg += f" Reason: {reason}"
|
||||||
|
logger.info(log_msg)
|
||||||
|
|
||||||
# Now, update associated profiles
|
# Now, update associated profiles
|
||||||
profiles_on_proxy = self.list_profiles(proxy_filter=proxy_url)
|
profiles_on_proxy = self.list_profiles(proxy_filter=proxy_url)
|
||||||
@ -751,16 +781,31 @@ class ProfileManager:
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
if state == ProfileState.RESTING.value:
|
if state == ProfileState.RESTING.value:
|
||||||
logger.info(f"Propagating RESTING state to profiles on proxy '{proxy_url}'.")
|
propagate_reason = reason or "Proxy resting"
|
||||||
|
logger.info(f"Propagating RESTING state to profiles on proxy '{proxy_url}'. Reason: {propagate_reason}")
|
||||||
for profile in profiles_on_proxy:
|
for profile in profiles_on_proxy:
|
||||||
if profile['state'] == ProfileState.ACTIVE.value:
|
if profile['state'] == ProfileState.ACTIVE.value:
|
||||||
self.update_profile_state(profile['name'], ProfileState.RESTING.value, "Proxy resting")
|
self.update_profile_state(profile['name'], ProfileState.RESTING.value, propagate_reason)
|
||||||
self.update_profile_field(profile['name'], 'rest_until', str(rest_until))
|
self.update_profile_field(profile['name'], 'rest_until', str(rest_until))
|
||||||
elif state == ProfileState.ACTIVE.value:
|
elif state == ProfileState.BANNED.value:
|
||||||
logger.info(f"Propagating ACTIVE state to profiles on proxy '{proxy_url}'.")
|
propagate_reason = reason or "Proxy banned"
|
||||||
|
logger.info(f"Propagating BANNED state to profiles on proxy '{proxy_url}'. Reason: {propagate_reason}")
|
||||||
for profile in profiles_on_proxy:
|
for profile in profiles_on_proxy:
|
||||||
if profile['state'] == ProfileState.RESTING.value and profile.get('rest_reason') == "Proxy resting":
|
if profile['state'] != ProfileState.BANNED.value:
|
||||||
self.update_profile_state(profile['name'], ProfileState.ACTIVE.value, "Proxy activated")
|
self.update_profile_state(profile['name'], ProfileState.BANNED.value, propagate_reason)
|
||||||
|
elif state == ProfileState.ACTIVE.value:
|
||||||
|
propagate_reason = reason or "Proxy activated"
|
||||||
|
logger.info(f"Propagating ACTIVE state to profiles on proxy '{proxy_url}'. Reason: {propagate_reason}")
|
||||||
|
for profile in profiles_on_proxy:
|
||||||
|
# Check for proxy-related reasons in both rest_reason and ban_reason
|
||||||
|
proxy_related_reason = False
|
||||||
|
if profile.get('rest_reason', '').startswith("Proxy "):
|
||||||
|
proxy_related_reason = True
|
||||||
|
if profile.get('ban_reason', '').startswith("Proxy "):
|
||||||
|
proxy_related_reason = True
|
||||||
|
|
||||||
|
if (profile['state'] in [ProfileState.RESTING.value, ProfileState.BANNED.value]) and proxy_related_reason:
|
||||||
|
self.update_profile_state(profile['name'], ProfileState.ACTIVE.value, propagate_reason)
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
@ -1162,8 +1207,8 @@ def add_profile_manager_parser(subparsers):
|
|||||||
|
|
||||||
# List command
|
# List command
|
||||||
list_parser = subparsers.add_parser('list', help='List profiles', parents=[common_parser])
|
list_parser = subparsers.add_parser('list', help='List profiles', parents=[common_parser])
|
||||||
list_parser.add_argument('--auth-env', help='Environment name for the Auth simulation monitor. Use with --download-env for a merged view.')
|
list_parser.add_argument('--auth-env', default=None, help='Environment name for the Auth simulation monitor. Use with --download-env for a merged view. Defaults to YTOPS_AUTH_ENV env var.')
|
||||||
list_parser.add_argument('--download-env', help='Environment name for the Download simulation monitor. Use with --auth-env for a merged view.')
|
list_parser.add_argument('--download-env', default=None, help='Environment name for the Download simulation monitor. Use with --auth-env for a merged view. Defaults to YTOPS_DOWNLOAD_ENV env var.')
|
||||||
list_parser.add_argument('--separate-views', action='store_true', help='In dual-monitor mode, show two separate reports instead of a single merged view.')
|
list_parser.add_argument('--separate-views', action='store_true', help='In dual-monitor mode, show two separate reports instead of a single merged view.')
|
||||||
list_parser.add_argument('--rest-after-requests', type=int, help='(For display) Show countdown to rest based on this request limit.')
|
list_parser.add_argument('--rest-after-requests', type=int, help='(For display) Show countdown to rest based on this request limit.')
|
||||||
list_parser.add_argument('--state', help='Filter by state')
|
list_parser.add_argument('--state', help='Filter by state')
|
||||||
@ -1177,20 +1222,24 @@ def add_profile_manager_parser(subparsers):
|
|||||||
list_parser.add_argument('--no-blink', action='store_true', help='Use ANSI escape codes for smoother screen updates in --live mode (experimental).')
|
list_parser.add_argument('--no-blink', action='store_true', help='Use ANSI escape codes for smoother screen updates in --live mode (experimental).')
|
||||||
list_parser.add_argument('--interval-seconds', type=int, default=5, help='When in --live mode, how often to refresh in seconds. Default: 5.')
|
list_parser.add_argument('--interval-seconds', type=int, default=5, help='When in --live mode, how often to refresh in seconds. Default: 5.')
|
||||||
list_parser.add_argument('--hide-active-state', action='store_true', help="Display 'ACTIVE' state as blank for cleaner UI.")
|
list_parser.add_argument('--hide-active-state', action='store_true', help="Display 'ACTIVE' state as blank for cleaner UI.")
|
||||||
|
list_parser.add_argument('--hide-ungrouped', action='store_true', help="Hide profiles that do not belong to any configured profile group (e.g., old profiles after a config change). Shown by default.")
|
||||||
|
|
||||||
# Get command
|
# Get command
|
||||||
get_parser = subparsers.add_parser('get', help='Get profile details', parents=[common_parser])
|
get_parser = subparsers.add_parser('get', help='Get profile details', parents=[common_parser])
|
||||||
get_parser.add_argument('name', help='Profile name')
|
get_parser.add_argument('name', help='Profile name')
|
||||||
|
|
||||||
# Set proxy state command
|
# Set proxy state command
|
||||||
set_proxy_state_parser = subparsers.add_parser('set-proxy-state', help='Set the state of a proxy and propagate to its profiles.', parents=[common_parser])
|
set_proxy_state_parser = subparsers.add_parser('set-proxy-state', help='Set the state of a proxy (or proxies) and propagate to its profiles.', parents=[common_parser])
|
||||||
set_proxy_state_parser.add_argument('proxy_url', help='Proxy URL')
|
set_proxy_state_parser.add_argument('proxy_urls', help='Proxy URL, or comma-separated list of URLs')
|
||||||
set_proxy_state_parser.add_argument('state', choices=['ACTIVE', 'RESTING'], help='New state for the proxy')
|
set_proxy_state_parser.add_argument('state', choices=['ACTIVE', 'RESTING', 'BANNED'], help='New state for the proxy')
|
||||||
set_proxy_state_parser.add_argument('--duration-minutes', type=int, help='Duration for the RESTING state')
|
set_proxy_state_parser.add_argument('--duration-minutes', type=int, help='Duration for the RESTING state. Use -1 for indefinite rest.')
|
||||||
|
set_proxy_state_parser.add_argument('--reason', help='Reason for the state change. Propagated to profiles.')
|
||||||
|
set_proxy_state_parser.add_argument('--auth-env', default=None, help='Target the Auth simulation environment. Can be used with --download-env. Defaults to YTOPS_AUTH_ENV env var.')
|
||||||
|
set_proxy_state_parser.add_argument('--download-env', default=None, help='Target the Download simulation environment. Can be used with --auth-env. Defaults to YTOPS_DOWNLOAD_ENV env var.')
|
||||||
|
|
||||||
# Update state command
|
# Update state command
|
||||||
update_state_parser = subparsers.add_parser('update-state', help='Update profile state', parents=[common_parser])
|
update_state_parser = subparsers.add_parser('update-state', help='Update profile state for one or more profiles.', parents=[common_parser])
|
||||||
update_state_parser.add_argument('name', help='Profile name')
|
update_state_parser.add_argument('names', help='Profile name, comma-separated list of names, or a pattern with wildcards (e.g., "user31_*")')
|
||||||
update_state_parser.add_argument('state', choices=ProfileState.values(),
|
update_state_parser.add_argument('state', choices=ProfileState.values(),
|
||||||
help='New state')
|
help='New state')
|
||||||
update_state_parser.add_argument('--reason', help='Reason for state change (especially for BAN)')
|
update_state_parser.add_argument('--reason', help='Reason for state change (especially for BAN)')
|
||||||
@ -1202,21 +1251,21 @@ def add_profile_manager_parser(subparsers):
|
|||||||
update_field_parser.add_argument('value', help='New value')
|
update_field_parser.add_argument('value', help='New value')
|
||||||
|
|
||||||
# Pause command (convenience)
|
# Pause command (convenience)
|
||||||
pause_parser = subparsers.add_parser('pause', help=f'Pause a profile (sets state to {ProfileState.PAUSED.value}).', parents=[common_parser])
|
pause_parser = subparsers.add_parser('pause', help=f'Pause one or more profiles (sets state to {ProfileState.PAUSED.value}).', parents=[common_parser])
|
||||||
pause_parser.add_argument('name', help='Profile name')
|
pause_parser.add_argument('names', help='Profile name, comma-separated list of names, or a pattern with wildcards (e.g., "user31_*")')
|
||||||
|
|
||||||
# Activate command (convenience)
|
# Activate command (convenience)
|
||||||
activate_parser = subparsers.add_parser('activate', help=f'Activate a profile (sets state to {ProfileState.ACTIVE.value}). Useful for resuming a PAUSED profile or fixing a stale LOCKED one.', parents=[common_parser])
|
activate_parser = subparsers.add_parser('activate', help=f'Activate one or more profiles (sets state to {ProfileState.ACTIVE.value}). Useful for resuming a PAUSED profile or fixing a stale LOCKED one.', parents=[common_parser])
|
||||||
activate_parser.add_argument('name', help='Profile name')
|
activate_parser.add_argument('names', help='Profile name, comma-separated list of names, or a pattern with wildcards (e.g., "user31_*")')
|
||||||
|
|
||||||
# Ban command (convenience)
|
# Ban command (convenience)
|
||||||
ban_parser = subparsers.add_parser('ban', help=f'Ban a profile (sets state to {ProfileState.BANNED.value}).', parents=[common_parser])
|
ban_parser = subparsers.add_parser('ban', help=f'Ban one or more profiles (sets state to {ProfileState.BANNED.value}).', parents=[common_parser])
|
||||||
ban_parser.add_argument('name', help='Profile name')
|
ban_parser.add_argument('names', help='Profile name, comma-separated list of names, or a pattern with wildcards (e.g., "user31_*")')
|
||||||
ban_parser.add_argument('--reason', required=True, help='Reason for ban')
|
ban_parser.add_argument('--reason', required=True, help='Reason for ban')
|
||||||
|
|
||||||
# Unban command (convenience)
|
# Unban command (convenience)
|
||||||
unban_parser = subparsers.add_parser('unban', help=f'Unban a profile (sets state to {ProfileState.ACTIVE.value} and resets session counters).', parents=[common_parser])
|
unban_parser = subparsers.add_parser('unban', help=f'Unban one or more profiles (sets state to {ProfileState.ACTIVE.value} and resets session counters).', parents=[common_parser])
|
||||||
unban_parser.add_argument('name', help='Profile name')
|
unban_parser.add_argument('names', help='Profile name, comma-separated list of names, or a pattern with wildcards (e.g., "user31_*")')
|
||||||
|
|
||||||
# Delete command
|
# Delete command
|
||||||
delete_parser = subparsers.add_parser('delete', help='Delete a profile', parents=[common_parser])
|
delete_parser = subparsers.add_parser('delete', help='Delete a profile', parents=[common_parser])
|
||||||
@ -1228,6 +1277,15 @@ def add_profile_manager_parser(subparsers):
|
|||||||
delete_all_parser = subparsers.add_parser('delete-all', help='(Destructive) Delete all profiles and data under the current key prefix.', parents=[common_parser])
|
delete_all_parser = subparsers.add_parser('delete-all', help='(Destructive) Delete all profiles and data under the current key prefix.', parents=[common_parser])
|
||||||
delete_all_parser.add_argument('--confirm', action='store_true', help='Confirm this highly destructive action (required)')
|
delete_all_parser.add_argument('--confirm', action='store_true', help='Confirm this highly destructive action (required)')
|
||||||
|
|
||||||
|
# Cleanup ungrouped command
|
||||||
|
cleanup_parser = subparsers.add_parser('cleanup-ungrouped', help='(Safe) Sync profiles in Redis with the setup policy (create missing, delete extra).', parents=[common_parser])
|
||||||
|
cleanup_parser.add_argument('--policy-file', required=True, help='Path to the profile setup policy YAML file (e.g., policies/6_profile_setup_policy.yaml).')
|
||||||
|
cleanup_parser.add_argument('--auth-env', default=None, help="Environment name for the Auth simulation to clean. Defaults to YTOPS_AUTH_ENV env var.")
|
||||||
|
cleanup_parser.add_argument('--download-env', default=None, help="Environment name for the Download simulation to clean. Defaults to YTOPS_DOWNLOAD_ENV env var.")
|
||||||
|
cleanup_parser.add_argument('--dry-run', action='store_true', help="Only show which profiles would be created or deleted, don't actually change them.")
|
||||||
|
cleanup_parser.add_argument('--no-create-missing', action='store_true', help="Only delete ungrouped profiles, do not create profiles missing from Redis.")
|
||||||
|
cleanup_parser.add_argument('--disallow-cleanup-active-downloads', action='store_true', help="In paired auth/download cleanup, PREVENTS deleting a profile if its download side is ACTIVE, even if the auth side is idle. Cleanup of active downloads is allowed by default.")
|
||||||
|
|
||||||
# Reset global counters command
|
# Reset global counters command
|
||||||
reset_global_parser = subparsers.add_parser('reset-global-counters', help='Reset global counters (e.g., failed_lock_attempts).', parents=[common_parser])
|
reset_global_parser = subparsers.add_parser('reset-global-counters', help='Reset global counters (e.g., failed_lock_attempts).', parents=[common_parser])
|
||||||
|
|
||||||
@ -1675,6 +1733,19 @@ def _render_profile_details_table(manager, args, simulation_type, profile_groups
|
|||||||
return
|
return
|
||||||
|
|
||||||
profiles = manager.list_profiles(args.state, args.proxy)
|
profiles = manager.list_profiles(args.state, args.proxy)
|
||||||
|
|
||||||
|
if getattr(args, 'hide_ungrouped', False):
|
||||||
|
all_grouped_profile_names = set()
|
||||||
|
for group in profile_groups_config:
|
||||||
|
for p_name in group.get('profiles_in_group', []):
|
||||||
|
all_grouped_profile_names.add(p_name)
|
||||||
|
|
||||||
|
original_count = len(profiles)
|
||||||
|
profiles = [p for p in profiles if p.get('name') in all_grouped_profile_names]
|
||||||
|
filtered_count = original_count - len(profiles)
|
||||||
|
if filtered_count > 0 and not args.live:
|
||||||
|
print(f"NOTE: {filtered_count} ungrouped profiles were hidden via --hide-ungrouped.", file=sys.stderr)
|
||||||
|
|
||||||
if not profiles:
|
if not profiles:
|
||||||
print("No profiles found matching the criteria.", file=file)
|
print("No profiles found matching the criteria.", file=file)
|
||||||
return
|
return
|
||||||
@ -1832,10 +1903,13 @@ def _render_simulation_view(title, manager, args, file=sys.stdout):
|
|||||||
|
|
||||||
profile_groups_config = _build_profile_groups_config(manager, profiles)
|
profile_groups_config = _build_profile_groups_config(manager, profiles)
|
||||||
|
|
||||||
|
# The group summary table is only relevant for the Auth simulation, which has
|
||||||
|
# selection strategies and rotation policies.
|
||||||
|
is_auth_sim = 'Auth' in title
|
||||||
|
if is_auth_sim:
|
||||||
profile_selection_strategy = manager.get_config('profile_selection_strategy')
|
profile_selection_strategy = manager.get_config('profile_selection_strategy')
|
||||||
if profile_selection_strategy:
|
if profile_selection_strategy:
|
||||||
print(f"Profile Selection Strategy: {profile_selection_strategy}", file=file)
|
print(f"Profile Selection Strategy: {profile_selection_strategy}", file=file)
|
||||||
|
|
||||||
_render_profile_group_summary_table(manager, profiles, profile_groups_config, args, file=file)
|
_render_profile_group_summary_table(manager, profiles, profile_groups_config, args, file=file)
|
||||||
|
|
||||||
failed_lock_attempts = manager.get_failed_lock_attempts()
|
failed_lock_attempts = manager.get_failed_lock_attempts()
|
||||||
@ -1997,7 +2071,6 @@ def _render_merged_view(auth_manager, download_manager, args, file=sys.stdout):
|
|||||||
_render_activation_history_table(auth_manager, file=file)
|
_render_activation_history_table(auth_manager, file=file)
|
||||||
|
|
||||||
print(f"\n--- Download Simulation Profile Details ({args.download_env}) ---", file=file)
|
print(f"\n--- Download Simulation Profile Details ({args.download_env}) ---", file=file)
|
||||||
_render_profile_group_summary_table(download_manager, dl_profiles, dl_groups_config, args, file=file)
|
|
||||||
_render_profile_details_table(download_manager, args, "Download", dl_groups_config, file=file)
|
_render_profile_details_table(download_manager, args, "Download", dl_groups_config, file=file)
|
||||||
if args.show_activation_history:
|
if args.show_activation_history:
|
||||||
_render_activation_history_table(download_manager, file=file)
|
_render_activation_history_table(download_manager, file=file)
|
||||||
@ -2010,6 +2083,8 @@ def _print_profile_list(manager, args, title="Profile Status"):
|
|||||||
return _render_simulation_view(title, manager, args, file=sys.stdout)
|
return _render_simulation_view(title, manager, args, file=sys.stdout)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def main_profile_manager(args):
|
def main_profile_manager(args):
|
||||||
"""Main dispatcher for 'profile' command."""
|
"""Main dispatcher for 'profile' command."""
|
||||||
if load_dotenv:
|
if load_dotenv:
|
||||||
@ -2026,6 +2101,13 @@ def main_profile_manager(args):
|
|||||||
print(f"ERROR: The specified --env-file was not found: {args.env_file}", file=sys.stderr)
|
print(f"ERROR: The specified --env-file was not found: {args.env_file}", file=sys.stderr)
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
|
# After loading .env, populate any args that were not provided on the CLI
|
||||||
|
# This is necessary because argparse `default=os.getenv(...)` runs before `load_dotenv`.
|
||||||
|
if hasattr(args, 'auth_env') and args.auth_env is None:
|
||||||
|
args.auth_env = os.getenv('YTOPS_AUTH_ENV')
|
||||||
|
if hasattr(args, 'download_env') and args.download_env is None:
|
||||||
|
args.download_env = os.getenv('YTOPS_DOWNLOAD_ENV')
|
||||||
|
|
||||||
if args.redis_host is None:
|
if args.redis_host is None:
|
||||||
args.redis_host = os.getenv('REDIS_HOST', os.getenv('MASTER_HOST_IP', 'localhost'))
|
args.redis_host = os.getenv('REDIS_HOST', os.getenv('MASTER_HOST_IP', 'localhost'))
|
||||||
if args.redis_port is None:
|
if args.redis_port is None:
|
||||||
@ -2190,8 +2272,46 @@ def main_profile_manager(args):
|
|||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
|
|
||||||
elif args.profile_command == 'set-proxy-state':
|
elif args.profile_command == 'set-proxy-state':
|
||||||
success = manager.set_proxy_state(args.proxy_url, args.state, args.duration_minutes)
|
proxy_urls = [p.strip() for p in args.proxy_urls.split(',') if p.strip()]
|
||||||
return 0 if success else 1
|
if not proxy_urls:
|
||||||
|
print("Error: No proxy URLs provided.", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
envs_to_process = []
|
||||||
|
if args.auth_env:
|
||||||
|
envs_to_process.append(args.auth_env)
|
||||||
|
if args.download_env:
|
||||||
|
envs_to_process.append(args.download_env)
|
||||||
|
|
||||||
|
if envs_to_process:
|
||||||
|
# If --auth-env or --download-env are used, operate on them.
|
||||||
|
all_success = True
|
||||||
|
for env_name in set(envs_to_process):
|
||||||
|
# When operating on specific envs, derive prefix from env name, ignoring --legacy and --key-prefix.
|
||||||
|
# This aligns with the behavior of the 'list' command in dual-mode.
|
||||||
|
key_prefix_for_env = f"{env_name}_profile_mgmt_"
|
||||||
|
print(f"--- Setting proxy state for environment: {env_name} (prefix: {key_prefix_for_env}) ---", file=sys.stderr)
|
||||||
|
env_manager = ProfileManager(
|
||||||
|
redis_host=args.redis_host,
|
||||||
|
redis_port=args.redis_port,
|
||||||
|
redis_password=args.redis_password,
|
||||||
|
key_prefix=key_prefix_for_env,
|
||||||
|
redis_db=args.redis_db
|
||||||
|
)
|
||||||
|
for proxy_url in proxy_urls:
|
||||||
|
success = env_manager.set_proxy_state(proxy_url, args.state, args.duration_minutes, args.reason)
|
||||||
|
if not success:
|
||||||
|
all_success = False
|
||||||
|
return 0 if all_success else 1
|
||||||
|
else:
|
||||||
|
# Fallback to the single manager created with --env, --legacy, or --key-prefix.
|
||||||
|
# This maintains backward compatibility and handles single-environment cases.
|
||||||
|
all_success = True
|
||||||
|
for proxy_url in proxy_urls:
|
||||||
|
success = manager.set_proxy_state(proxy_url, args.state, args.duration_minutes, args.reason)
|
||||||
|
if not success:
|
||||||
|
all_success = False
|
||||||
|
return 0 if all_success else 1
|
||||||
|
|
||||||
elif args.profile_command == 'get':
|
elif args.profile_command == 'get':
|
||||||
profile = manager.get_profile(args.name)
|
profile = manager.get_profile(args.name)
|
||||||
@ -2226,31 +2346,211 @@ def main_profile_manager(args):
|
|||||||
return 0
|
return 0
|
||||||
|
|
||||||
elif args.profile_command == 'update-state':
|
elif args.profile_command == 'update-state':
|
||||||
success = manager.update_profile_state(args.name, args.state, args.reason or '')
|
names_or_patterns = [n.strip() for n in args.names.split(',') if n.strip()]
|
||||||
return 0 if success else 1
|
if not names_or_patterns:
|
||||||
|
print("Error: No profile names or patterns provided.", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
all_profiles = manager.list_profiles()
|
||||||
|
all_profile_names = {p['name'] for p in all_profiles}
|
||||||
|
|
||||||
|
profiles_to_update = set()
|
||||||
|
for item in names_or_patterns:
|
||||||
|
if '*' in item or '?' in item:
|
||||||
|
matched_profiles = {name for name in all_profile_names if fnmatch.fnmatch(name, item)}
|
||||||
|
if not matched_profiles:
|
||||||
|
print(f"Warning: Pattern '{item}' did not match any profiles.", file=sys.stderr)
|
||||||
|
profiles_to_update.update(matched_profiles)
|
||||||
|
else:
|
||||||
|
if item in all_profile_names:
|
||||||
|
profiles_to_update.add(item)
|
||||||
|
else:
|
||||||
|
print(f"Warning: Profile '{item}' not found.", file=sys.stderr)
|
||||||
|
|
||||||
|
if not profiles_to_update:
|
||||||
|
print("No matching profiles found to update.", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
print(f"The following {len(profiles_to_update)} profiles will be updated to state '{args.state}':")
|
||||||
|
print(", ".join(sorted(list(profiles_to_update), key=natural_sort_key)))
|
||||||
|
confirm = input("Are you sure you want to proceed? (y/N): ")
|
||||||
|
if confirm.lower() != 'y':
|
||||||
|
print("Aborted.")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
all_success = True
|
||||||
|
for name in sorted(list(profiles_to_update), key=natural_sort_key):
|
||||||
|
success = manager.update_profile_state(name, args.state, args.reason or '')
|
||||||
|
if not success:
|
||||||
|
all_success = False
|
||||||
|
return 0 if all_success else 1
|
||||||
|
|
||||||
elif args.profile_command == 'update-field':
|
elif args.profile_command == 'update-field':
|
||||||
success = manager.update_profile_field(args.name, args.field, args.value)
|
success = manager.update_profile_field(args.name, args.field, args.value)
|
||||||
return 0 if success else 1
|
return 0 if success else 1
|
||||||
|
|
||||||
elif args.profile_command == 'pause':
|
elif args.profile_command == 'pause':
|
||||||
success = manager.update_profile_state(args.name, ProfileState.PAUSED.value, 'Manual pause')
|
names_or_patterns = [n.strip() for n in args.names.split(',') if n.strip()]
|
||||||
return 0 if success else 1
|
if not names_or_patterns:
|
||||||
|
print("Error: No profile names or patterns provided.", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
all_profiles = manager.list_profiles()
|
||||||
|
all_profile_names = {p['name'] for p in all_profiles}
|
||||||
|
|
||||||
|
profiles_to_update = set()
|
||||||
|
for item in names_or_patterns:
|
||||||
|
if '*' in item or '?' in item:
|
||||||
|
matched_profiles = {name for name in all_profile_names if fnmatch.fnmatch(name, item)}
|
||||||
|
if not matched_profiles:
|
||||||
|
print(f"Warning: Pattern '{item}' did not match any profiles.", file=sys.stderr)
|
||||||
|
profiles_to_update.update(matched_profiles)
|
||||||
|
else:
|
||||||
|
if item in all_profile_names:
|
||||||
|
profiles_to_update.add(item)
|
||||||
|
else:
|
||||||
|
print(f"Warning: Profile '{item}' not found.", file=sys.stderr)
|
||||||
|
|
||||||
|
if not profiles_to_update:
|
||||||
|
print("No matching profiles found to update.", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
print(f"The following {len(profiles_to_update)} profiles will be PAUSED:")
|
||||||
|
print(", ".join(sorted(list(profiles_to_update), key=natural_sort_key)))
|
||||||
|
confirm = input("Are you sure you want to proceed? (y/N): ")
|
||||||
|
if confirm.lower() != 'y':
|
||||||
|
print("Aborted.")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
all_success = True
|
||||||
|
for name in sorted(list(profiles_to_update), key=natural_sort_key):
|
||||||
|
success = manager.update_profile_state(name, ProfileState.PAUSED.value, 'Manual pause')
|
||||||
|
if not success:
|
||||||
|
all_success = False
|
||||||
|
return 0 if all_success else 1
|
||||||
|
|
||||||
elif args.profile_command == 'activate':
|
elif args.profile_command == 'activate':
|
||||||
success = manager.update_profile_state(args.name, ProfileState.ACTIVE.value, 'Manual activation')
|
names_or_patterns = [n.strip() for n in args.names.split(',') if n.strip()]
|
||||||
return 0 if success else 1
|
if not names_or_patterns:
|
||||||
|
print("Error: No profile names or patterns provided.", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
all_profiles = manager.list_profiles()
|
||||||
|
all_profile_names = {p['name'] for p in all_profiles}
|
||||||
|
|
||||||
|
profiles_to_update = set()
|
||||||
|
for item in names_or_patterns:
|
||||||
|
if '*' in item or '?' in item:
|
||||||
|
matched_profiles = {name for name in all_profile_names if fnmatch.fnmatch(name, item)}
|
||||||
|
if not matched_profiles:
|
||||||
|
print(f"Warning: Pattern '{item}' did not match any profiles.", file=sys.stderr)
|
||||||
|
profiles_to_update.update(matched_profiles)
|
||||||
|
else:
|
||||||
|
if item in all_profile_names:
|
||||||
|
profiles_to_update.add(item)
|
||||||
|
else:
|
||||||
|
print(f"Warning: Profile '{item}' not found.", file=sys.stderr)
|
||||||
|
|
||||||
|
if not profiles_to_update:
|
||||||
|
print("No matching profiles found to update.", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
print(f"The following {len(profiles_to_update)} profiles will be ACTIVATED:")
|
||||||
|
print(", ".join(sorted(list(profiles_to_update), key=natural_sort_key)))
|
||||||
|
confirm = input("Are you sure you want to proceed? (y/N): ")
|
||||||
|
if confirm.lower() != 'y':
|
||||||
|
print("Aborted.")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
all_success = True
|
||||||
|
for name in sorted(list(profiles_to_update), key=natural_sort_key):
|
||||||
|
success = manager.update_profile_state(name, ProfileState.ACTIVE.value, 'Manual activation')
|
||||||
|
if not success:
|
||||||
|
all_success = False
|
||||||
|
return 0 if all_success else 1
|
||||||
|
|
||||||
elif args.profile_command == 'ban':
|
elif args.profile_command == 'ban':
|
||||||
success = manager.update_profile_state(args.name, ProfileState.BANNED.value, args.reason)
|
names_or_patterns = [n.strip() for n in args.names.split(',') if n.strip()]
|
||||||
return 0 if success else 1
|
if not names_or_patterns:
|
||||||
|
print("Error: No profile names or patterns provided.", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
all_profiles = manager.list_profiles()
|
||||||
|
all_profile_names = {p['name'] for p in all_profiles}
|
||||||
|
|
||||||
|
profiles_to_update = set()
|
||||||
|
for item in names_or_patterns:
|
||||||
|
if '*' in item or '?' in item:
|
||||||
|
matched_profiles = {name for name in all_profile_names if fnmatch.fnmatch(name, item)}
|
||||||
|
if not matched_profiles:
|
||||||
|
print(f"Warning: Pattern '{item}' did not match any profiles.", file=sys.stderr)
|
||||||
|
profiles_to_update.update(matched_profiles)
|
||||||
|
else:
|
||||||
|
if item in all_profile_names:
|
||||||
|
profiles_to_update.add(item)
|
||||||
|
else:
|
||||||
|
print(f"Warning: Profile '{item}' not found.", file=sys.stderr)
|
||||||
|
|
||||||
|
if not profiles_to_update:
|
||||||
|
print("No matching profiles found to update.", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
print(f"The following {len(profiles_to_update)} profiles will be BANNED:")
|
||||||
|
print(", ".join(sorted(list(profiles_to_update), key=natural_sort_key)))
|
||||||
|
confirm = input("Are you sure you want to proceed? (y/N): ")
|
||||||
|
if confirm.lower() != 'y':
|
||||||
|
print("Aborted.")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
all_success = True
|
||||||
|
for name in sorted(list(profiles_to_update), key=natural_sort_key):
|
||||||
|
success = manager.update_profile_state(name, ProfileState.BANNED.value, args.reason)
|
||||||
|
if not success:
|
||||||
|
all_success = False
|
||||||
|
return 0 if all_success else 1
|
||||||
|
|
||||||
elif args.profile_command == 'unban':
|
elif args.profile_command == 'unban':
|
||||||
|
names_or_patterns = [n.strip() for n in args.names.split(',') if n.strip()]
|
||||||
|
if not names_or_patterns:
|
||||||
|
print("Error: No profile names or patterns provided.", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
all_profiles = manager.list_profiles()
|
||||||
|
all_profile_names = {p['name'] for p in all_profiles}
|
||||||
|
|
||||||
|
profiles_to_update = set()
|
||||||
|
for item in names_or_patterns:
|
||||||
|
if '*' in item or '?' in item:
|
||||||
|
matched_profiles = {name for name in all_profile_names if fnmatch.fnmatch(name, item)}
|
||||||
|
if not matched_profiles:
|
||||||
|
print(f"Warning: Pattern '{item}' did not match any profiles.", file=sys.stderr)
|
||||||
|
profiles_to_update.update(matched_profiles)
|
||||||
|
else:
|
||||||
|
if item in all_profile_names:
|
||||||
|
profiles_to_update.add(item)
|
||||||
|
else:
|
||||||
|
print(f"Warning: Profile '{item}' not found.", file=sys.stderr)
|
||||||
|
|
||||||
|
if not profiles_to_update:
|
||||||
|
print("No matching profiles found to update.", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
print(f"The following {len(profiles_to_update)} profiles will be UNBANNED:")
|
||||||
|
print(", ".join(sorted(list(profiles_to_update), key=natural_sort_key)))
|
||||||
|
confirm = input("Are you sure you want to proceed? (y/N): ")
|
||||||
|
if confirm.lower() != 'y':
|
||||||
|
print("Aborted.")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
all_success = True
|
||||||
|
for name in sorted(list(profiles_to_update), key=natural_sort_key):
|
||||||
# First activate, then reset session counters. The ban reason is cleared by update_profile_state.
|
# First activate, then reset session counters. The ban reason is cleared by update_profile_state.
|
||||||
success = manager.update_profile_state(args.name, ProfileState.ACTIVE.value, 'Manual unban')
|
success = manager.update_profile_state(name, ProfileState.ACTIVE.value, 'Manual unban')
|
||||||
if success:
|
if success:
|
||||||
manager.reset_profile_counters(args.name)
|
manager.reset_profile_counters(name)
|
||||||
return 0 if success else 1
|
if not success:
|
||||||
|
all_success = False
|
||||||
|
return 0 if all_success else 1
|
||||||
|
|
||||||
elif args.profile_command == 'delete':
|
elif args.profile_command == 'delete':
|
||||||
if not args.confirm:
|
if not args.confirm:
|
||||||
@ -2267,6 +2567,9 @@ def main_profile_manager(args):
|
|||||||
print(f"Deleted {deleted_count} key(s) with prefix '{manager.key_prefix}'.")
|
print(f"Deleted {deleted_count} key(s) with prefix '{manager.key_prefix}'.")
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
elif args.profile_command == 'cleanup-ungrouped':
|
||||||
|
return _main_cleanup_ungrouped(args)
|
||||||
|
|
||||||
elif args.profile_command == 'reset-global-counters':
|
elif args.profile_command == 'reset-global-counters':
|
||||||
manager.reset_global_counters()
|
manager.reset_global_counters()
|
||||||
return 0
|
return 0
|
||||||
@ -2310,3 +2613,286 @@ def main_profile_manager(args):
|
|||||||
return 0
|
return 0
|
||||||
|
|
||||||
return 1 # Should not be reached
|
return 1 # Should not be reached
|
||||||
|
|
||||||
|
|
||||||
|
def _main_cleanup_ungrouped(args):
|
||||||
|
"""Handler for the 'cleanup-ungrouped' command."""
|
||||||
|
try:
|
||||||
|
with open(args.policy_file, 'r', encoding='utf-8') as f:
|
||||||
|
policy = yaml.safe_load(f)
|
||||||
|
except (IOError, yaml.YAMLError) as e:
|
||||||
|
print(f"Error: Could not read or parse policy file '{args.policy_file}': {e}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
common_pools = policy.get('common_pools', [])
|
||||||
|
if not common_pools:
|
||||||
|
print(f"Warning: No 'common_pools' found in '{args.policy_file}'. Nothing to do.", file=sys.stderr)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
desired_profiles = {} # name -> proxy
|
||||||
|
for pool in common_pools:
|
||||||
|
prefixes = pool.get('prefixes', [])
|
||||||
|
count = pool.get('count', 0)
|
||||||
|
proxy = pool.get('proxy')
|
||||||
|
if not proxy:
|
||||||
|
print(f"Error: Pool with prefixes {prefixes} is missing a 'proxy' definition.", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
for prefix in prefixes:
|
||||||
|
for i in range(count):
|
||||||
|
name = f"{prefix}_{i}"
|
||||||
|
desired_profiles[name] = proxy
|
||||||
|
|
||||||
|
desired_profile_names = set(desired_profiles.keys())
|
||||||
|
print(f"Loaded setup policy. Found {len(desired_profile_names)} desired profiles across {len(common_pools)} pools.")
|
||||||
|
|
||||||
|
total_deleted = 0
|
||||||
|
total_skipped = 0
|
||||||
|
total_created = 0
|
||||||
|
total_updated = 0
|
||||||
|
|
||||||
|
# --- Paired Cleanup Mode ---
|
||||||
|
if args.auth_env and args.download_env:
|
||||||
|
print("\n--- Running in Paired Cleanup Mode ---")
|
||||||
|
if not args.disallow_cleanup_active_downloads:
|
||||||
|
print("Cleanup of active download profiles is ENABLED (default). Auth profiles will be checked for idleness.")
|
||||||
|
print("If an auth profile is idle, its corresponding download profile will be removed regardless of its state.")
|
||||||
|
else:
|
||||||
|
print("Cleanup of active download profiles is DISABLED. Both auth and download profiles must be idle to be removed.")
|
||||||
|
if args.dry_run:
|
||||||
|
print("--- DRY RUN MODE: No changes will be made. ---")
|
||||||
|
|
||||||
|
def _create_cleanup_manager(env_name):
|
||||||
|
key_prefix = f"{env_name}_profile_mgmt_"
|
||||||
|
if args.legacy: key_prefix = 'profile_mgmt_'
|
||||||
|
if args.key_prefix: key_prefix = args.key_prefix
|
||||||
|
return ProfileManager(
|
||||||
|
redis_host=args.redis_host, redis_port=args.redis_port,
|
||||||
|
redis_password=args.redis_password, key_prefix=key_prefix,
|
||||||
|
redis_db=args.redis_db
|
||||||
|
)
|
||||||
|
|
||||||
|
auth_manager = _create_cleanup_manager(args.auth_env)
|
||||||
|
download_manager = _create_cleanup_manager(args.download_env)
|
||||||
|
|
||||||
|
auth_profiles_map = {p['name']: p for p in auth_manager.list_profiles()}
|
||||||
|
download_profiles_map = {p['name']: p for p in download_manager.list_profiles()}
|
||||||
|
|
||||||
|
# --- Create Missing Profiles ---
|
||||||
|
auth_missing = desired_profile_names - set(auth_profiles_map.keys())
|
||||||
|
download_missing = desired_profile_names - set(download_profiles_map.keys())
|
||||||
|
|
||||||
|
if auth_missing or download_missing:
|
||||||
|
if args.no_create_missing:
|
||||||
|
if auth_missing: print(f"Found {len(auth_missing)} missing profiles in '{args.auth_env}', creation disabled via --no-create-missing.")
|
||||||
|
if download_missing: print(f"Found {len(download_missing)} missing profiles in '{args.download_env}', creation disabled via --no-create-missing.")
|
||||||
|
else:
|
||||||
|
if auth_missing:
|
||||||
|
print(f"Found {len(auth_missing)} missing profiles in '{args.auth_env}'. Creating them...")
|
||||||
|
for name in sorted(list(auth_missing), key=natural_sort_key):
|
||||||
|
proxy = desired_profiles.get(name)
|
||||||
|
print(f" - {'[DRY RUN] Would create' if args.dry_run else 'Creating'} profile '{name}' with proxy '{proxy}' in '{args.auth_env}'")
|
||||||
|
if not args.dry_run:
|
||||||
|
if auth_manager.create_profile(name, proxy): total_created += 1
|
||||||
|
if download_missing:
|
||||||
|
print(f"Found {len(download_missing)} missing profiles in '{args.download_env}'. Creating them...")
|
||||||
|
for name in sorted(list(download_missing), key=natural_sort_key):
|
||||||
|
proxy = desired_profiles.get(name)
|
||||||
|
print(f" - {'[DRY RUN] Would create' if args.dry_run else 'Creating'} profile '{name}' with proxy '{proxy}' in '{args.download_env}'")
|
||||||
|
if not args.dry_run:
|
||||||
|
if download_manager.create_profile(name, proxy): total_created += 1
|
||||||
|
|
||||||
|
# Refresh maps after creation
|
||||||
|
auth_profiles_map = {p['name']: p for p in auth_manager.list_profiles()}
|
||||||
|
download_profiles_map = {p['name']: p for p in download_manager.list_profiles()}
|
||||||
|
|
||||||
|
# --- Update Proxies for Existing Profiles ---
|
||||||
|
auth_existing_policy_names = set(auth_profiles_map.keys()) & desired_profile_names
|
||||||
|
if auth_existing_policy_names:
|
||||||
|
print(f"\nChecking for proxy updates in '{args.auth_env}'...")
|
||||||
|
for name in sorted(list(auth_existing_policy_names), key=natural_sort_key):
|
||||||
|
current_proxy = auth_profiles_map[name].get('proxy')
|
||||||
|
desired_proxy = desired_profiles.get(name)
|
||||||
|
if current_proxy != desired_proxy and desired_proxy:
|
||||||
|
print(f" - {'[DRY RUN] Would update' if args.dry_run else 'Updating'} proxy for profile '{name}': from '{current_proxy}' to '{desired_proxy}'")
|
||||||
|
if not args.dry_run:
|
||||||
|
if auth_manager.update_profile_field(name, 'proxy', desired_proxy): total_updated += 1
|
||||||
|
|
||||||
|
download_existing_policy_names = set(download_profiles_map.keys()) & desired_profile_names
|
||||||
|
if download_existing_policy_names:
|
||||||
|
print(f"\nChecking for proxy updates in '{args.download_env}'...")
|
||||||
|
for name in sorted(list(download_existing_policy_names), key=natural_sort_key):
|
||||||
|
current_proxy = download_profiles_map[name].get('proxy')
|
||||||
|
desired_proxy = desired_profiles.get(name)
|
||||||
|
if current_proxy != desired_proxy and desired_proxy:
|
||||||
|
print(f" - {'[DRY RUN] Would update' if args.dry_run else 'Updating'} proxy for profile '{name}': from '{current_proxy}' to '{desired_proxy}'")
|
||||||
|
if not args.dry_run:
|
||||||
|
if download_manager.update_profile_field(name, 'proxy', desired_proxy): total_updated += 1
|
||||||
|
|
||||||
|
# --- Delete Ungrouped Profiles ---
|
||||||
|
auth_ungrouped = set(auth_profiles_map.keys()) - desired_profile_names
|
||||||
|
download_ungrouped = set(download_profiles_map.keys()) - desired_profile_names
|
||||||
|
all_ungrouped = sorted(list(auth_ungrouped | download_ungrouped), key=natural_sort_key)
|
||||||
|
|
||||||
|
if not all_ungrouped:
|
||||||
|
print("\nNo ungrouped profiles found in either environment to clean up.")
|
||||||
|
else:
|
||||||
|
print(f"\nFound {len(all_ungrouped)} ungrouped profile(s) across both environments to consider for deletion.")
|
||||||
|
for name in all_ungrouped:
|
||||||
|
auth_profile = auth_profiles_map.get(name)
|
||||||
|
download_profile = download_profiles_map.get(name)
|
||||||
|
|
||||||
|
can_delete_auth, auth_skip_reasons = False, []
|
||||||
|
if auth_profile:
|
||||||
|
state = auth_profile.get('state', 'UNKNOWN')
|
||||||
|
pending_dls = auth_manager.get_pending_downloads(name)
|
||||||
|
rest_reason = auth_profile.get('rest_reason')
|
||||||
|
|
||||||
|
is_safe = True
|
||||||
|
if state in [ProfileState.ACTIVE.value, ProfileState.LOCKED.value]:
|
||||||
|
is_safe = False; auth_skip_reasons.append(f"auth is '{state}'")
|
||||||
|
if pending_dls > 0:
|
||||||
|
is_safe = False; auth_skip_reasons.append(f"auth has {pending_dls} pending DLs")
|
||||||
|
if rest_reason == 'waiting_downloads':
|
||||||
|
is_safe = False; auth_skip_reasons.append("auth is 'waiting_downloads'")
|
||||||
|
if is_safe: can_delete_auth = True
|
||||||
|
|
||||||
|
can_delete_download, download_skip_reasons = False, []
|
||||||
|
if download_profile:
|
||||||
|
state = download_profile.get('state', 'UNKNOWN')
|
||||||
|
is_safe = state not in [ProfileState.ACTIVE.value, ProfileState.LOCKED.value]
|
||||||
|
if not is_safe: download_skip_reasons.append(f"download is '{state}'")
|
||||||
|
if is_safe: can_delete_download = True
|
||||||
|
|
||||||
|
if auth_profile and not can_delete_auth:
|
||||||
|
print(f" - SKIPPING '{name}' because its auth profile is busy ({', '.join(auth_skip_reasons)}).")
|
||||||
|
total_skipped += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
was_download_busy = download_profile and not can_delete_download
|
||||||
|
if auth_profile and can_delete_auth and not args.disallow_cleanup_active_downloads:
|
||||||
|
can_delete_download = True # The override rule
|
||||||
|
|
||||||
|
if (auth_profile and can_delete_auth) or (download_profile and can_delete_download):
|
||||||
|
msgs = []
|
||||||
|
if auth_profile and can_delete_auth: msgs.append(f"Auth (State: {auth_profile.get('state', 'N/A')})")
|
||||||
|
if download_profile and can_delete_download:
|
||||||
|
dl_msg = f"Download (State: {download_profile.get('state', 'N/A')})"
|
||||||
|
if was_download_busy: dl_msg += " <-- DELETING ACTIVE"
|
||||||
|
msgs.append(dl_msg)
|
||||||
|
|
||||||
|
print(f" - Deleting '{name}': {'; '.join(msgs)}")
|
||||||
|
if not args.dry_run:
|
||||||
|
if auth_profile and can_delete_auth: auth_manager.delete_profile(name)
|
||||||
|
if download_profile and can_delete_download: download_manager.delete_profile(name)
|
||||||
|
total_deleted += 1
|
||||||
|
else:
|
||||||
|
print(f" - SKIPPING '{name}' because its download profile is busy ({', '.join(download_skip_reasons)}) and no idle auth profile exists to override.")
|
||||||
|
total_skipped += 1
|
||||||
|
# --- Single Environment Cleanup Mode ---
|
||||||
|
else:
|
||||||
|
if args.dry_run:
|
||||||
|
print("--- DRY RUN MODE: No changes will be made. ---")
|
||||||
|
|
||||||
|
envs_to_clean = []
|
||||||
|
if args.auth_env: envs_to_clean.append(args.auth_env)
|
||||||
|
if args.download_env: envs_to_clean.append(args.download_env)
|
||||||
|
|
||||||
|
if not envs_to_clean:
|
||||||
|
if args.env: envs_to_clean.append(args.env)
|
||||||
|
else:
|
||||||
|
print("Error: You must specify at least one environment to clean up (e.g., --auth-env sim_auth, --download-env sim_download, or --env dev).", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
for env_name in envs_to_clean:
|
||||||
|
print(f"\n--- Cleaning environment: {env_name} ---")
|
||||||
|
|
||||||
|
key_prefix = f"{env_name}_profile_mgmt_"
|
||||||
|
if args.legacy: key_prefix = 'profile_mgmt_'
|
||||||
|
if args.key_prefix: key_prefix = args.key_prefix
|
||||||
|
|
||||||
|
manager = ProfileManager(
|
||||||
|
redis_host=args.redis_host, redis_port=args.redis_port,
|
||||||
|
redis_password=args.redis_password, key_prefix=key_prefix,
|
||||||
|
redis_db=args.redis_db
|
||||||
|
)
|
||||||
|
|
||||||
|
current_profiles_list = manager.list_profiles()
|
||||||
|
current_profiles_map = {p['name']: p for p in current_profiles_list}
|
||||||
|
current_profile_names = set(current_profiles_map.keys())
|
||||||
|
ungrouped_names = current_profile_names - desired_profile_names
|
||||||
|
missing_names = desired_profile_names - current_profile_names
|
||||||
|
existing_policy_names = current_profile_names & desired_profile_names
|
||||||
|
|
||||||
|
# --- Update Proxies for Existing Profiles ---
|
||||||
|
profiles_updated_in_env = 0
|
||||||
|
if existing_policy_names:
|
||||||
|
print("Checking for proxy updates for existing profiles...")
|
||||||
|
for name in sorted(list(existing_policy_names), key=natural_sort_key):
|
||||||
|
current_proxy = current_profiles_map[name].get('proxy')
|
||||||
|
desired_proxy = desired_profiles.get(name)
|
||||||
|
if current_proxy != desired_proxy and desired_proxy:
|
||||||
|
print(f" - {'[DRY RUN] Would update' if args.dry_run else 'Updating'} proxy for profile '{name}': from '{current_proxy}' to '{desired_proxy}'")
|
||||||
|
if not args.dry_run:
|
||||||
|
if manager.update_profile_field(name, 'proxy', desired_proxy):
|
||||||
|
profiles_updated_in_env += 1
|
||||||
|
total_updated += profiles_updated_in_env
|
||||||
|
|
||||||
|
profiles_created_in_env = 0
|
||||||
|
if missing_names:
|
||||||
|
print(f"Found {len(missing_names)} profile(s) defined in the policy that do not exist in Redis.")
|
||||||
|
if args.no_create_missing:
|
||||||
|
print("Creation of missing profiles is disabled via --no-create-missing.")
|
||||||
|
else:
|
||||||
|
if not args.dry_run:
|
||||||
|
print("Creating missing profiles...")
|
||||||
|
|
||||||
|
for name in sorted(list(missing_names), key=natural_sort_key):
|
||||||
|
proxy = desired_profiles.get(name)
|
||||||
|
print(f" - {'[DRY RUN] Would create' if args.dry_run else 'Creating'} profile '{name}' with proxy '{proxy}'")
|
||||||
|
if not args.dry_run:
|
||||||
|
if proxy:
|
||||||
|
if manager.create_profile(name, proxy):
|
||||||
|
profiles_created_in_env += 1
|
||||||
|
else:
|
||||||
|
# This should not happen due to the check at the start
|
||||||
|
print(f" - SKIPPING '{name}' because its proxy could not be determined from the policy.", file=sys.stderr)
|
||||||
|
total_created += profiles_created_in_env
|
||||||
|
|
||||||
|
if not ungrouped_names:
|
||||||
|
print("No ungrouped profiles found to clean up.")
|
||||||
|
if profiles_created_in_env == 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
print(f"Found {len(ungrouped_names)} ungrouped profile(s) to consider for deletion.")
|
||||||
|
profiles_to_check = [p for p in current_profiles_list if p['name'] in ungrouped_names]
|
||||||
|
|
||||||
|
for profile in sorted(profiles_to_check, key=lambda p: natural_sort_key(p['name'])):
|
||||||
|
name = profile['name']
|
||||||
|
state = profile.get('state', 'UNKNOWN')
|
||||||
|
pending_dls = manager.get_pending_downloads(name)
|
||||||
|
|
||||||
|
is_safe, reasons = True, []
|
||||||
|
if state in [ProfileState.ACTIVE.value, ProfileState.LOCKED.value]:
|
||||||
|
is_safe = False; reasons.append(f"is in '{state}' state")
|
||||||
|
if pending_dls > 0:
|
||||||
|
is_safe = False; reasons.append(f"has {pending_dls} pending download(s)")
|
||||||
|
if profile.get('rest_reason') == 'waiting_downloads':
|
||||||
|
is_safe = False; reasons.append("is in 'waiting_downloads' state")
|
||||||
|
|
||||||
|
if is_safe:
|
||||||
|
print(f" - Deleting '{name}' (State: {state}, Pending DLs: {pending_dls})")
|
||||||
|
if not args.dry_run: manager.delete_profile(name)
|
||||||
|
total_deleted += 1
|
||||||
|
else:
|
||||||
|
print(f" - SKIPPING '{name}' because it {', '.join(reasons)}.")
|
||||||
|
total_skipped += 1
|
||||||
|
|
||||||
|
print("\n--- Cleanup Summary ---")
|
||||||
|
print(f"Total profiles created: {total_created}")
|
||||||
|
print(f"Total profiles updated: {total_updated}")
|
||||||
|
print(f"Total profiles deleted: {total_deleted}")
|
||||||
|
print(f"Total profiles skipped (still active or has pending work): {total_skipped}")
|
||||||
|
if total_skipped > 0:
|
||||||
|
print("Run the cleanup command again later to remove the skipped profiles once they are idle.")
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|||||||
@ -4,6 +4,7 @@ Redis Queue Management CLI Tool for yt-ops-client.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
import hashlib
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
@ -23,6 +24,15 @@ except ImportError:
|
|||||||
print("'tabulate' library not found. Please install it with: pip install tabulate", file=sys.stderr)
|
print("'tabulate' library not found. Please install it with: pip install tabulate", file=sys.stderr)
|
||||||
tabulate = None
|
tabulate = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
import yaml
|
||||||
|
except ImportError:
|
||||||
|
print("PyYAML is not installed. Please install it with: pip install PyYAML", file=sys.stderr)
|
||||||
|
yaml = None
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
import fnmatch
|
||||||
|
|
||||||
# Configure logging
|
# Configure logging
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
level=logging.INFO,
|
level=logging.INFO,
|
||||||
@ -31,9 +41,53 @@ logging.basicConfig(
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _find_configured_queues(policies_dir="policies", env=None):
|
||||||
|
"""Scans YAML files in a directory to find configured queue names."""
|
||||||
|
if not yaml:
|
||||||
|
return set()
|
||||||
|
|
||||||
|
expected_queues = set()
|
||||||
|
policies_path = Path(policies_dir)
|
||||||
|
if not policies_path.is_dir():
|
||||||
|
logger.debug(f"Policies directory '{policies_dir}' not found, cannot find expected queues.")
|
||||||
|
return set()
|
||||||
|
|
||||||
|
for policy_file in policies_path.glob("*.yaml"):
|
||||||
|
try:
|
||||||
|
with open(policy_file, 'r', encoding='utf-8') as f:
|
||||||
|
policy_data = yaml.safe_load(f)
|
||||||
|
|
||||||
|
if not isinstance(policy_data, dict):
|
||||||
|
continue
|
||||||
|
|
||||||
|
queue_policy = policy_data.get('queue_policy')
|
||||||
|
if not isinstance(queue_policy, dict):
|
||||||
|
continue
|
||||||
|
|
||||||
|
use_prefix = queue_policy.get('use_env_prefix', True)
|
||||||
|
prefix = ""
|
||||||
|
if use_prefix and env:
|
||||||
|
prefix = f"{env}_"
|
||||||
|
|
||||||
|
for key, value in queue_policy.items():
|
||||||
|
if key.endswith('_queue') and isinstance(value, str):
|
||||||
|
expected_queues.add(f"{prefix}{value}")
|
||||||
|
except (IOError, yaml.YAMLError) as e:
|
||||||
|
logger.debug(f"Could not parse policy {policy_file} to find queues: {e}")
|
||||||
|
continue
|
||||||
|
return expected_queues
|
||||||
|
|
||||||
|
|
||||||
class QueueManager:
|
class QueueManager:
|
||||||
"""Manages Redis lists (queues)."""
|
"""Manages Redis lists (queues)."""
|
||||||
|
|
||||||
|
def _push_state_key(self, queue_name: str, file_path: str) -> str:
|
||||||
|
"""Get Redis key for storing the last pushed index for a given queue and file."""
|
||||||
|
# Use a hash of the absolute file path to create a consistent, safe key.
|
||||||
|
abs_path = os.path.abspath(file_path)
|
||||||
|
path_hash = hashlib.sha256(abs_path.encode()).hexdigest()
|
||||||
|
return f"ytops_client:queue_push_state:{queue_name}:{path_hash}"
|
||||||
|
|
||||||
def __init__(self, redis_host='localhost', redis_port=6379, redis_password=None):
|
def __init__(self, redis_host='localhost', redis_port=6379, redis_password=None):
|
||||||
"""Initialize Redis connection."""
|
"""Initialize Redis connection."""
|
||||||
logger.info(f"Attempting to connect to Redis at {redis_host}:{redis_port}...")
|
logger.info(f"Attempting to connect to Redis at {redis_host}:{redis_port}...")
|
||||||
@ -70,10 +124,29 @@ class QueueManager:
|
|||||||
"""Returns the number of items in a queue."""
|
"""Returns the number of items in a queue."""
|
||||||
return self.redis.llen(queue_name)
|
return self.redis.llen(queue_name)
|
||||||
|
|
||||||
def push_from_file(self, queue_name: str, file_path: str, wrap_key: Optional[str] = None) -> int:
|
def push_from_file(self, queue_name: str, file_path: str, wrap_key: Optional[str] = None, limit: Optional[int] = None, start_index: Optional[int] = None, auto_shift: bool = False) -> int:
|
||||||
"""Populates a queue from a file (text with one item per line, or JSON with an array of items)."""
|
"""Populates a queue from a file (text with one item per line, or JSON with an array of items)."""
|
||||||
count = 0
|
count = 0
|
||||||
|
|
||||||
|
# --- State management for file position ---
|
||||||
|
state_key = None
|
||||||
|
current_start_index = 0 # 0-based index
|
||||||
|
|
||||||
|
if auto_shift:
|
||||||
|
state_key = self._push_state_key(queue_name, file_path)
|
||||||
|
last_index_str = self.redis.get(state_key)
|
||||||
|
if last_index_str:
|
||||||
|
current_start_index = int(last_index_str)
|
||||||
|
logger.info(f"Auto-shift enabled. Resuming from line {current_start_index + 1}.")
|
||||||
|
elif start_index is not None:
|
||||||
|
# CLI provides 1-based index, convert to 0-based.
|
||||||
|
current_start_index = max(0, start_index - 1)
|
||||||
|
logger.info(f"Starting from line {current_start_index + 1} as requested.")
|
||||||
|
# ---
|
||||||
|
|
||||||
|
items_to_add = []
|
||||||
|
total_items_in_file = 0
|
||||||
|
|
||||||
if file_path.lower().endswith('.json'):
|
if file_path.lower().endswith('.json'):
|
||||||
if wrap_key:
|
if wrap_key:
|
||||||
logger.warning("--wrap-file-line-in-json is ignored for JSON files, as they are expected to contain complete items.")
|
logger.warning("--wrap-file-line-in-json is ignored for JSON files, as they are expected to contain complete items.")
|
||||||
@ -85,25 +158,23 @@ class QueueManager:
|
|||||||
logger.error("JSON file must contain a list/array.")
|
logger.error("JSON file must contain a list/array.")
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
total_items_in_file = len(data)
|
||||||
|
if current_start_index >= total_items_in_file:
|
||||||
|
logger.info(f"Start index ({current_start_index + 1}) is past the end of the file ({total_items_in_file} items). Nothing to push.")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
items_to_process = data[current_start_index:]
|
||||||
|
if limit is not None and limit >= 0:
|
||||||
|
items_to_process = items_to_process[:limit]
|
||||||
|
|
||||||
# Items can be strings or objects. If objects, they should be converted to JSON strings.
|
# Items can be strings or objects. If objects, they should be converted to JSON strings.
|
||||||
items_to_add = []
|
for item in items_to_process:
|
||||||
for item in data:
|
|
||||||
if isinstance(item, str):
|
if isinstance(item, str):
|
||||||
items_to_add.append(item.strip())
|
items_to_add.append(item.strip())
|
||||||
else:
|
else:
|
||||||
items_to_add.append(json.dumps(item))
|
items_to_add.append(json.dumps(item))
|
||||||
|
|
||||||
items_to_add = [item for item in items_to_add if item]
|
items_to_add = [item for item in items_to_add if item]
|
||||||
|
|
||||||
pipe = self.redis.pipeline()
|
|
||||||
for item in items_to_add:
|
|
||||||
pipe.rpush(queue_name, item)
|
|
||||||
count += 1
|
|
||||||
if count > 0 and count % 1000 == 0:
|
|
||||||
pipe.execute()
|
|
||||||
logger.info(f"Pushed {count} items...")
|
|
||||||
pipe.execute()
|
|
||||||
|
|
||||||
except (IOError, json.JSONDecodeError) as e:
|
except (IOError, json.JSONDecodeError) as e:
|
||||||
logger.error(f"Failed to read or parse JSON file '{file_path}': {e}")
|
logger.error(f"Failed to read or parse JSON file '{file_path}': {e}")
|
||||||
return 0
|
return 0
|
||||||
@ -111,24 +182,51 @@ class QueueManager:
|
|||||||
logger.info("Reading items from text file (one per line).")
|
logger.info("Reading items from text file (one per line).")
|
||||||
try:
|
try:
|
||||||
with open(file_path, 'r', encoding='utf-8') as f:
|
with open(file_path, 'r', encoding='utf-8') as f:
|
||||||
pipe = self.redis.pipeline()
|
all_lines = f.readlines()
|
||||||
for line in f:
|
|
||||||
|
total_items_in_file = len(all_lines)
|
||||||
|
if current_start_index >= total_items_in_file:
|
||||||
|
logger.info(f"Start index ({current_start_index + 1}) is past the end of the file ({total_items_in_file} lines). Nothing to push.")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
lines_to_process = all_lines[current_start_index:]
|
||||||
|
if limit is not None and limit >= 0:
|
||||||
|
lines_to_process = lines_to_process[:limit]
|
||||||
|
|
||||||
|
for line in lines_to_process:
|
||||||
item = line.strip()
|
item = line.strip()
|
||||||
if item:
|
if item:
|
||||||
if wrap_key:
|
if wrap_key:
|
||||||
payload = json.dumps({wrap_key: item})
|
payload = json.dumps({wrap_key: item})
|
||||||
else:
|
else:
|
||||||
payload = item
|
payload = item
|
||||||
pipe.rpush(queue_name, payload)
|
items_to_add.append(payload)
|
||||||
count += 1
|
|
||||||
if count > 0 and count % 1000 == 0:
|
|
||||||
pipe.execute()
|
|
||||||
logger.info(f"Pushed {count} items...")
|
|
||||||
pipe.execute()
|
|
||||||
except IOError as e:
|
except IOError as e:
|
||||||
logger.error(f"Failed to read file '{file_path}': {e}")
|
logger.error(f"Failed to read file '{file_path}': {e}")
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
if items_to_add:
|
||||||
|
pipe = self.redis.pipeline()
|
||||||
|
for item in items_to_add:
|
||||||
|
pipe.rpush(queue_name, item)
|
||||||
|
count += 1
|
||||||
|
if count > 0 and count % 1000 == 0:
|
||||||
|
pipe.execute()
|
||||||
|
logger.info(f"Pushed {count} of {len(items_to_add)} items...")
|
||||||
|
pipe.execute()
|
||||||
|
|
||||||
|
if auto_shift and state_key:
|
||||||
|
new_index = current_start_index + count
|
||||||
|
# Don't save a new index if we've reached the end of the file.
|
||||||
|
# This allows re-running the command to start from the beginning again.
|
||||||
|
if new_index >= total_items_in_file:
|
||||||
|
self.redis.delete(state_key)
|
||||||
|
logger.info(f"Auto-shift: Reached end of file. Cleared saved position for '{os.path.basename(file_path)}'. Next run will start from the beginning.")
|
||||||
|
else:
|
||||||
|
self.redis.set(state_key, new_index)
|
||||||
|
logger.info(f"Auto-shift: Saved next start position for '{os.path.basename(file_path)}' as line {new_index + 1}.")
|
||||||
|
|
||||||
logger.info(f"Finished. Pushed a total of {count} items to '{queue_name}'.")
|
logger.info(f"Finished. Pushed a total of {count} items to '{queue_name}'.")
|
||||||
return count
|
return count
|
||||||
|
|
||||||
@ -230,7 +328,11 @@ def add_queue_manager_parser(subparsers):
|
|||||||
# Push command
|
# Push command
|
||||||
push_parser = subparsers.add_parser('push', help='Push items to a queue from a file, a generator, or a static payload.', parents=[common_parser])
|
push_parser = subparsers.add_parser('push', help='Push items to a queue from a file, a generator, or a static payload.', parents=[common_parser])
|
||||||
push_parser.add_argument('queue_name', nargs='?', help="Name of the queue. Defaults to '<env>_stress_inbox'.")
|
push_parser.add_argument('queue_name', nargs='?', help="Name of the queue. Defaults to '<env>_stress_inbox'.")
|
||||||
push_parser.add_argument('--count', type=int, default=1, help='Number of items to push (for --payload-json or --generate-payload-prefix).')
|
push_parser.add_argument('--count', type=int, default=None, help='Number of items to push. For --from-file, limits the number of lines pushed. For other sources, specifies how many items to generate/push (defaults to 1).')
|
||||||
|
|
||||||
|
shift_group = push_parser.add_mutually_exclusive_group()
|
||||||
|
shift_group.add_argument('--start', type=int, help='For --from-file, start pushing from this line number (1-based).')
|
||||||
|
shift_group.add_argument('--auto-shift', action='store_true', help="For --from-file, automatically resume from where the last push left off. State is stored in Redis.")
|
||||||
|
|
||||||
source_group = push_parser.add_mutually_exclusive_group(required=True)
|
source_group = push_parser.add_mutually_exclusive_group(required=True)
|
||||||
source_group.add_argument('--from-file', dest='file_path', help='Path to a file containing items to add (one per line, or a JSON array).')
|
source_group.add_argument('--from-file', dest='file_path', help='Path to a file containing items to add (one per line, or a JSON array).')
|
||||||
@ -285,10 +387,26 @@ def main_queue_manager(args):
|
|||||||
print(f"INFO: No queue name specified, defaulting to '{default_queue_name}' based on --env='{args.env}'.", file=sys.stderr)
|
print(f"INFO: No queue name specified, defaulting to '{default_queue_name}' based on --env='{args.env}'.", file=sys.stderr)
|
||||||
|
|
||||||
if args.queue_command == 'list':
|
if args.queue_command == 'list':
|
||||||
queues = manager.list_queues(args.pattern)
|
queues_from_redis = manager.list_queues(args.pattern)
|
||||||
|
|
||||||
|
# Discover queues from policy files
|
||||||
|
expected_queues_from_policies = _find_configured_queues(env=args.env)
|
||||||
|
|
||||||
|
# Merge Redis results with policy-defined queues
|
||||||
|
all_queues_map = {q['name']: q for q in queues_from_redis}
|
||||||
|
|
||||||
|
for q_name in expected_queues_from_policies:
|
||||||
|
if q_name not in all_queues_map:
|
||||||
|
# Only add if it matches the pattern filter
|
||||||
|
if fnmatch.fnmatch(q_name, args.pattern):
|
||||||
|
all_queues_map[q_name] = {'name': q_name, 'size': 0}
|
||||||
|
|
||||||
|
queues = sorted(list(all_queues_map.values()), key=lambda x: x['name'])
|
||||||
|
|
||||||
if not queues:
|
if not queues:
|
||||||
print(f"No queues found matching pattern '{args.pattern}'.")
|
print(f"No queues found matching pattern '{args.pattern}'.")
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
if tabulate:
|
if tabulate:
|
||||||
print(tabulate(queues, headers='keys', tablefmt='grid'))
|
print(tabulate(queues, headers='keys', tablefmt='grid'))
|
||||||
else:
|
else:
|
||||||
@ -309,16 +427,23 @@ def main_queue_manager(args):
|
|||||||
if not os.path.exists(args.file_path):
|
if not os.path.exists(args.file_path):
|
||||||
print(f"Error: File not found at '{args.file_path}'", file=sys.stderr)
|
print(f"Error: File not found at '{args.file_path}'", file=sys.stderr)
|
||||||
return 1
|
return 1
|
||||||
if args.count > 1:
|
manager.push_from_file(
|
||||||
logger.warning("--count is ignored when using --from-file.")
|
args.queue_name,
|
||||||
manager.push_from_file(args.queue_name, args.file_path, args.wrap_file_line_in_json)
|
args.file_path,
|
||||||
|
args.wrap_file_line_in_json,
|
||||||
|
limit=args.count,
|
||||||
|
start_index=args.start,
|
||||||
|
auto_shift=args.auto_shift
|
||||||
|
)
|
||||||
elif args.payload_json:
|
elif args.payload_json:
|
||||||
manager.push_static(args.queue_name, args.payload_json, args.count)
|
count = args.count if args.count is not None else 1
|
||||||
|
manager.push_static(args.queue_name, args.payload_json, count)
|
||||||
elif args.generate_payload_prefix:
|
elif args.generate_payload_prefix:
|
||||||
if args.count <= 0:
|
count = args.count if args.count is not None else 1
|
||||||
|
if count <= 0:
|
||||||
print("Error: --count must be 1 or greater for --generate-payload-prefix.", file=sys.stderr)
|
print("Error: --count must be 1 or greater for --generate-payload-prefix.", file=sys.stderr)
|
||||||
return 1
|
return 1
|
||||||
manager.push_generated(args.queue_name, args.generate_payload_prefix, args.count)
|
manager.push_generated(args.queue_name, args.generate_payload_prefix, count)
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
elif args.queue_command == 'clear':
|
elif args.queue_command == 'clear':
|
||||||
|
|||||||
@ -167,6 +167,7 @@ Overridable Policy Parameters via --set:
|
|||||||
parser.add_argument('--list-policies', action='store_true', help='List all available policies from the default policies directory and exit.')
|
parser.add_argument('--list-policies', action='store_true', help='List all available policies from the default policies directory and exit.')
|
||||||
parser.add_argument('--show-overrides', action='store_true', help='Load the specified policy and print all its defined values as a single-line of --set arguments, then exit.')
|
parser.add_argument('--show-overrides', action='store_true', help='Load the specified policy and print all its defined values as a single-line of --set arguments, then exit.')
|
||||||
parser.add_argument('--set', action='append', default=[], help="Override a policy setting using 'key.subkey=value' format.\n(e.g., --set execution_control.workers=5)")
|
parser.add_argument('--set', action='append', default=[], help="Override a policy setting using 'key.subkey=value' format.\n(e.g., --set execution_control.workers=5)")
|
||||||
|
parser.add_argument('--workers', type=int, help='Shortcut to override the total number of workers, capping any discovery logic.')
|
||||||
parser.add_argument('--profile-prefix', '--user-prefix', dest='profile_prefix', help="Shortcut to override the profile prefix for profile locking mode. Affects both auth and download stages. Can be a comma-separated list.")
|
parser.add_argument('--profile-prefix', '--user-prefix', dest='profile_prefix', help="Shortcut to override the profile prefix for profile locking mode. Affects both auth and download stages. Can be a comma-separated list.")
|
||||||
parser.add_argument('--start-from-url-index', type=int, help='Start processing from this line number (1-based) in the urls_file. Overrides saved state.')
|
parser.add_argument('--start-from-url-index', type=int, help='Start processing from this line number (1-based) in the urls_file. Overrides saved state.')
|
||||||
parser.add_argument('--expire-time-shift-minutes', type=int, help="Consider URLs expiring in N minutes as expired. Overrides policy.")
|
parser.add_argument('--expire-time-shift-minutes', type=int, help="Consider URLs expiring in N minutes as expired. Overrides policy.")
|
||||||
|
|||||||
@ -952,23 +952,41 @@ def run_direct_docker_download_worker(worker_id, policy, state_manager, args, pr
|
|||||||
failure_rate = dummy_settings.get('download_failure_rate', 0.0)
|
failure_rate = dummy_settings.get('download_failure_rate', 0.0)
|
||||||
skipped_rate = dummy_settings.get('download_skipped_failure_rate', 0.0)
|
skipped_rate = dummy_settings.get('download_skipped_failure_rate', 0.0)
|
||||||
|
|
||||||
# In dummy mode, prioritize the format from the task file, then from the policy.
|
# In dummy mode, prioritize the format from the task metadata.
|
||||||
format_selection = info_data.get('_ytops_download_format')
|
formats_to_test = None
|
||||||
source_of_format = "task file"
|
source_of_format = "unknown"
|
||||||
if not format_selection:
|
|
||||||
format_selection = d_policy.get('formats', '')
|
# Prioritize format from task metadata, which supports per-format and per-url tasks.
|
||||||
|
metadata = info_data.get('_ytops_metadata', {})
|
||||||
|
formats_requested = metadata.get('formats_requested')
|
||||||
|
if formats_requested is not None:
|
||||||
|
formats_to_test = formats_requested
|
||||||
|
source_of_format = "task file metadata (_ytops_metadata.formats_requested)"
|
||||||
|
|
||||||
|
if formats_to_test is None:
|
||||||
|
# Fallback for older task formats or different workflows
|
||||||
|
format_selection_str = info_data.get('_ytops_download_format')
|
||||||
|
if format_selection_str:
|
||||||
|
source_of_format = "task file (_ytops_download_format)"
|
||||||
|
formats_to_test = [f.strip() for f in format_selection_str.split(',') if f.strip()]
|
||||||
|
|
||||||
|
if formats_to_test is None:
|
||||||
|
format_selection_str = d_policy.get('formats', '')
|
||||||
|
if format_selection_str:
|
||||||
source_of_format = "policy (download_policy.formats)"
|
source_of_format = "policy (download_policy.formats)"
|
||||||
|
formats_to_test = [f.strip() for f in format_selection_str.split(',') if f.strip()]
|
||||||
|
|
||||||
if not format_selection:
|
if formats_to_test is None:
|
||||||
ytdlp_config_overrides = direct_policy.get('ytdlp_config_overrides', {})
|
ytdlp_config_overrides = direct_policy.get('ytdlp_config_overrides', {})
|
||||||
format_selection = ytdlp_config_overrides.get('format', '')
|
format_selection_str = ytdlp_config_overrides.get('format', '')
|
||||||
|
if format_selection_str:
|
||||||
source_of_format = "policy (ytdlp_config_overrides.format)"
|
source_of_format = "policy (ytdlp_config_overrides.format)"
|
||||||
|
formats_to_test = [f.strip() for f in format_selection_str.split(',') if f.strip()]
|
||||||
|
|
||||||
if not format_selection:
|
if formats_to_test is None:
|
||||||
logger.warning(f"[Worker {worker_id}] DUMMY: No format specified in task file or policy. Simulating a single download.")
|
logger.warning(f"[Worker {worker_id}] DUMMY: No format specified in task file or policy. Simulating a single download.")
|
||||||
formats_to_test = ['dummy_format']
|
formats_to_test = ['dummy_format']
|
||||||
else:
|
else:
|
||||||
formats_to_test = [f.strip() for f in format_selection.split(',') if f.strip()]
|
|
||||||
logger.info(f"[Worker {worker_id}] DUMMY: Simulating downloads for formats (from {source_of_format}): {', '.join(formats_to_test)}")
|
logger.info(f"[Worker {worker_id}] DUMMY: Simulating downloads for formats (from {source_of_format}): {', '.join(formats_to_test)}")
|
||||||
|
|
||||||
for format_id in formats_to_test:
|
for format_id in formats_to_test:
|
||||||
@ -1022,14 +1040,47 @@ def run_direct_docker_download_worker(worker_id, policy, state_manager, args, pr
|
|||||||
|
|
||||||
logger.info(f"========== [Worker {worker_id}] END DUMMY DOCKER DOWNLOAD SIMULATION ==========")
|
logger.info(f"========== [Worker {worker_id}] END DUMMY DOCKER DOWNLOAD SIMULATION ==========")
|
||||||
|
|
||||||
# In dummy mode, we just rename the file to processed and continue to the finally block.
|
# --- Airflow Directory Logic (Dummy Mode) ---
|
||||||
|
success = downloads_processed_in_task > 0
|
||||||
|
if success and d_policy.get('output_to_airflow_ready_dir'):
|
||||||
try:
|
try:
|
||||||
|
video_id = info_data.get('id')
|
||||||
|
if not video_id:
|
||||||
|
logger.error(f"[{profile_name}] DUMMY: Could not find video ID in '{claimed_task_path_host.name}' for moving files.")
|
||||||
|
else:
|
||||||
|
# --- Prepare destination directory ---
|
||||||
|
now = datetime.now()
|
||||||
|
rounded_minute = (now.minute // 10) * 10
|
||||||
|
timestamp_str = now.strftime('%Y%m%dT%H') + f"{rounded_minute:02d}"
|
||||||
|
|
||||||
|
base_path = d_policy.get('airflow_ready_dir_base_path', 'downloadfiles/videos/ready')
|
||||||
|
if not os.path.isabs(base_path):
|
||||||
|
base_path = os.path.join(sp_utils._PROJECT_ROOT, base_path)
|
||||||
|
final_dir_base = os.path.join(base_path, timestamp_str)
|
||||||
|
final_dir_path = os.path.join(final_dir_base, video_id)
|
||||||
|
os.makedirs(final_dir_path, exist_ok=True)
|
||||||
|
|
||||||
|
# --- Copy info.json ---
|
||||||
|
new_info_json_name = f"info_{video_id}.json"
|
||||||
|
dest_info_json_path = os.path.join(final_dir_path, new_info_json_name)
|
||||||
|
if not os.path.exists(dest_info_json_path):
|
||||||
|
shutil.copy(str(claimed_task_path_host), dest_info_json_path)
|
||||||
|
logger.info(f"[{profile_name}] DUMMY: Copied info.json to {dest_info_json_path}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[{profile_name}] DUMMY: Failed during post-download processing for Airflow: {e}", exc_info=True)
|
||||||
|
|
||||||
|
# In dummy mode, we handle file cleanup and continue to the finally block.
|
||||||
|
try:
|
||||||
|
if d_policy.get('remove_source_info_json'):
|
||||||
|
claimed_task_path_host.unlink()
|
||||||
|
logger.debug(f"DUMMY MODE: Removed processed task file '{claimed_task_path_host.name}'.")
|
||||||
|
else:
|
||||||
base_path_str = str(claimed_task_path_host).rsplit('.LOCKED.', 1)[0]
|
base_path_str = str(claimed_task_path_host).rsplit('.LOCKED.', 1)[0]
|
||||||
processed_path = Path(f"{base_path_str}.processed")
|
processed_path = Path(f"{base_path_str}.processed")
|
||||||
claimed_task_path_host.rename(processed_path)
|
claimed_task_path_host.rename(processed_path)
|
||||||
logger.debug(f"DUMMY MODE: Renamed processed task file to '{processed_path.name}'.")
|
logger.debug(f"DUMMY MODE: Renamed processed task file to '{processed_path.name}'.")
|
||||||
except (OSError, IndexError) as e:
|
except (OSError, IndexError) as e:
|
||||||
logger.error(f"DUMMY MODE: Failed to rename processed task file '{claimed_task_path_host}': {e}")
|
logger.error(f"DUMMY MODE: Failed to clean up processed task file '{claimed_task_path_host}': {e}")
|
||||||
|
|
||||||
continue # Skip to finally block
|
continue # Skip to finally block
|
||||||
|
|
||||||
@ -1329,15 +1380,19 @@ def run_direct_docker_download_worker(worker_id, policy, state_manager, args, pr
|
|||||||
|
|
||||||
# 6. Clean up task file
|
# 6. Clean up task file
|
||||||
if not queue_policy:
|
if not queue_policy:
|
||||||
# File-based mode: rename to .processed
|
# File-based mode: rename to .processed or remove
|
||||||
try:
|
try:
|
||||||
|
if success and d_policy.get('remove_source_info_json'):
|
||||||
|
claimed_task_path_host.unlink()
|
||||||
|
logger.debug(f"[{sp_utils.get_display_name(claimed_task_path_host)}] Removed processed task file.")
|
||||||
|
else:
|
||||||
# The claimed_task_path_host has a .LOCKED suffix, remove it before adding .processed
|
# The claimed_task_path_host has a .LOCKED suffix, remove it before adding .processed
|
||||||
base_path_str = str(claimed_task_path_host).rsplit('.LOCKED.', 1)[0]
|
base_path_str = str(claimed_task_path_host).rsplit('.LOCKED.', 1)[0]
|
||||||
processed_path = Path(f"{base_path_str}.processed")
|
processed_path = Path(f"{base_path_str}.processed")
|
||||||
claimed_task_path_host.rename(processed_path)
|
claimed_task_path_host.rename(processed_path)
|
||||||
logger.debug(f"[{sp_utils.get_display_name(claimed_task_path_host)}] Renamed processed task file to '{processed_path.name}'.")
|
logger.debug(f"[{sp_utils.get_display_name(claimed_task_path_host)}] Renamed processed task file to '{processed_path.name}'.")
|
||||||
except (OSError, IndexError) as e:
|
except (OSError, IndexError) as e:
|
||||||
logger.error(f"Failed to rename processed task file '{claimed_task_path_host}': {e}")
|
logger.error(f"Failed to clean up processed task file '{claimed_task_path_host}': {e}")
|
||||||
elif d_policy.get('rename_source_info_json_on_success'):
|
elif d_policy.get('rename_source_info_json_on_success'):
|
||||||
# Queue-based mode: respect rename policy
|
# Queue-based mode: respect rename policy
|
||||||
source_path_to_rename = task.get('info_json_path')
|
source_path_to_rename = task.get('info_json_path')
|
||||||
@ -1372,11 +1427,25 @@ def run_direct_docker_download_worker(worker_id, policy, state_manager, args, pr
|
|||||||
# but the task is being finalized, we must assume all potential downloads for this task
|
# but the task is being finalized, we must assume all potential downloads for this task
|
||||||
# are "processed" to prevent the auth profile from getting stuck.
|
# are "processed" to prevent the auth profile from getting stuck.
|
||||||
if downloads_processed_in_task == 0:
|
if downloads_processed_in_task == 0:
|
||||||
logger.warning(f"[Worker {worker_id}] No downloads were counted for this task. Using policy to determine decrement count to avoid stuck profile.")
|
logger.warning(f"[Worker {worker_id}] No downloads were counted for this task. Using task metadata to determine decrement count to avoid stuck profile.")
|
||||||
ytdlp_config_overrides = direct_policy.get('ytdlp_config_overrides', {})
|
|
||||||
formats_str = ytdlp_config_overrides.get('format', d_policy.get('formats', ''))
|
decrement_count = 1 # Default to 1 to be safe
|
||||||
num_formats = formats_str.count(',') + 1 if formats_str else 1
|
metadata = info_data.get('_ytops_metadata', {})
|
||||||
downloads_processed_in_task = num_formats
|
granularity = metadata.get('download_task_granularity')
|
||||||
|
formats_requested = metadata.get('formats_requested') # Can be None
|
||||||
|
|
||||||
|
if granularity == 'per_format':
|
||||||
|
# Each task file represents one format group, so decrement by 1.
|
||||||
|
decrement_count = 1
|
||||||
|
elif granularity == 'per_url':
|
||||||
|
# The task file represents all formats for a URL.
|
||||||
|
decrement_count = len(formats_requested) if formats_requested else 1
|
||||||
|
else:
|
||||||
|
# No granularity info, this may be an older task file.
|
||||||
|
# Assume it's a single download task.
|
||||||
|
decrement_count = 1
|
||||||
|
|
||||||
|
downloads_processed_in_task = decrement_count
|
||||||
logger.warning(f"[Worker {worker_id}] Decrementing by fallback count: {downloads_processed_in_task}")
|
logger.warning(f"[Worker {worker_id}] Decrementing by fallback count: {downloads_processed_in_task}")
|
||||||
|
|
||||||
if downloads_processed_in_task > 0:
|
if downloads_processed_in_task > 0:
|
||||||
|
|||||||
@ -30,192 +30,18 @@ from .queue_provider import RedisQueueProvider
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def run_queue_auth_worker(worker_id, policy, state_manager, args, profile_manager_instance, running_processes, process_lock):
|
def _process_single_auth_result(success, info_data, stderr, retcode, task, task_id, profile_name, proxy_url, worker_id, policy, state_manager, args, profile_manager_instance, info_json_path=None):
|
||||||
"""Worker function for processing authentication tasks from a queue in batches."""
|
"""
|
||||||
owner_id = f"queue-auth-worker-{worker_id}"
|
Helper to process the result of a single authentication attempt, whether it was
|
||||||
|
part of a batch or a single run.
|
||||||
|
"""
|
||||||
settings = policy.get('settings', {})
|
settings = policy.get('settings', {})
|
||||||
exec_control = policy.get('execution_control', {})
|
|
||||||
gen_policy = policy.get('info_json_generation_policy', {})
|
|
||||||
queue_policy = policy.get('queue_policy', {})
|
queue_policy = policy.get('queue_policy', {})
|
||||||
|
gen_policy = policy.get('info_json_generation_policy', {})
|
||||||
task_granularity = gen_policy.get('download_task_granularity', 'per_format')
|
task_granularity = gen_policy.get('download_task_granularity', 'per_format')
|
||||||
|
|
||||||
profile_prefix = gen_policy.get('profile_prefix')
|
|
||||||
if not profile_prefix:
|
|
||||||
logger.error(f"[Worker {worker_id}] Queue auth mode requires 'info_json_generation_policy.profile_prefix'. Worker exiting.")
|
|
||||||
return []
|
|
||||||
|
|
||||||
save_dir = settings.get('save_info_json_dir')
|
save_dir = settings.get('save_info_json_dir')
|
||||||
if not save_dir and queue_policy.get('formats_to_download'):
|
|
||||||
save_dir = os.path.join('run', 'stress_policy', 'info_jsons')
|
|
||||||
logger.info(f"[Worker {worker_id}] 'formats_to_download' is set and 'save_info_json_dir' is not, defaulting to '{save_dir}'")
|
|
||||||
|
|
||||||
if save_dir:
|
|
||||||
os.makedirs(save_dir, exist_ok=True)
|
|
||||||
logger.info(f"[Worker {worker_id}] Will save info.json files to '{save_dir}'")
|
|
||||||
|
|
||||||
batch_size = queue_policy.get('batch_size', 1)
|
|
||||||
logger.info(f"[Worker {worker_id}] Auth worker configured to process tasks in batches of {batch_size}.")
|
|
||||||
task_counter = 0
|
|
||||||
last_no_task_log_msg = ""
|
|
||||||
|
|
||||||
while not state_manager.shutdown_event.is_set():
|
|
||||||
locked_profile = None
|
|
||||||
tasks = []
|
|
||||||
|
|
||||||
try:
|
|
||||||
# 1. Get a batch of tasks from the queue FIRST
|
|
||||||
tasks = state_manager.get_auth_tasks_batch(batch_size)
|
|
||||||
if not tasks:
|
|
||||||
polling_interval = exec_control.get('worker_polling_interval_seconds', 1)
|
|
||||||
base_log_msg = f"[Worker {worker_id}] No tasks available in queue, polling."
|
|
||||||
if base_log_msg == last_no_task_log_msg:
|
|
||||||
print(".", end="", file=sys.stdout, flush=True)
|
|
||||||
else:
|
|
||||||
if last_no_task_log_msg:
|
|
||||||
print(file=sys.stdout)
|
|
||||||
logger.info(base_log_msg)
|
|
||||||
last_no_task_log_msg = base_log_msg
|
|
||||||
time.sleep(polling_interval)
|
|
||||||
continue
|
|
||||||
|
|
||||||
if last_no_task_log_msg:
|
|
||||||
print(file=sys.stderr)
|
|
||||||
last_no_task_log_msg = ""
|
|
||||||
|
|
||||||
# 2. Lock a profile, trying any of the specified prefixes
|
|
||||||
locked_profile = None
|
|
||||||
prefixes_to_try = []
|
|
||||||
if profile_prefix:
|
|
||||||
prefixes_to_try = [p.strip() for p in profile_prefix.split(',') if p.strip()]
|
|
||||||
|
|
||||||
if prefixes_to_try:
|
|
||||||
random.shuffle(prefixes_to_try)
|
|
||||||
for prefix in prefixes_to_try:
|
|
||||||
locked_profile = profile_manager_instance.lock_profile(owner=owner_id, profile_prefix=prefix)
|
|
||||||
if locked_profile:
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
# Fallback for empty/no prefix, which means lock any available profile
|
|
||||||
locked_profile = profile_manager_instance.lock_profile(owner=owner_id, profile_prefix=None)
|
|
||||||
|
|
||||||
if not locked_profile:
|
|
||||||
polling_interval = exec_control.get('worker_polling_interval_seconds', 1)
|
|
||||||
logger.warning(f"[Worker {worker_id}] No profiles available for {len(tasks)} task(s). Re-queueing and sleeping for {polling_interval}s.")
|
|
||||||
state_manager.add_auth_tasks_batch(tasks)
|
|
||||||
time.sleep(polling_interval)
|
|
||||||
continue
|
|
||||||
|
|
||||||
profile_name = locked_profile['name']
|
|
||||||
proxy_url = locked_profile['proxy']
|
|
||||||
|
|
||||||
logger.info(f"[Worker {worker_id}] Locked profile '{profile_name}' to process a batch of {len(tasks)} tasks.")
|
|
||||||
|
|
||||||
# 3. Process each task in the batch
|
|
||||||
for task in tasks:
|
|
||||||
if state_manager.shutdown_event.is_set():
|
|
||||||
logger.info(f"[Worker {worker_id}] Shutdown requested, stopping batch processing for profile '{profile_name}'.")
|
|
||||||
break
|
|
||||||
|
|
||||||
temp_task_dir = None
|
|
||||||
task_id = None
|
|
||||||
url = None
|
|
||||||
try:
|
|
||||||
temp_task_dir = tempfile.mkdtemp(prefix=f"queue-auth-{worker_id}-")
|
|
||||||
task_id = task.get('id') or task.get('task_id')
|
|
||||||
if not task_id:
|
|
||||||
task_id = f"task_{worker_id}_{task_counter}"
|
|
||||||
task_counter += 1
|
|
||||||
task['task_id'] = task_id
|
|
||||||
|
|
||||||
url = task.get('url')
|
url = task.get('url')
|
||||||
if not url:
|
|
||||||
logger.error(f"[Worker {worker_id}] Task {task_id} has no URL. Skipping.")
|
|
||||||
state_manager.report_auth_skipped(task_id, {"error": "No URL in task", "task": task})
|
|
||||||
continue
|
|
||||||
|
|
||||||
logger.info(f"[Worker {worker_id}] [{profile_name}] Processing task {task_id}: {url}")
|
|
||||||
state_manager.mark_auth_in_progress(task_id, owner_id)
|
|
||||||
|
|
||||||
# --- Main processing logic for a single task ---
|
|
||||||
success, info_data, stderr, retcode = False, None, "", 0
|
|
||||||
if args.dummy or args.dummy_batch:
|
|
||||||
logger.info(f"[Worker {worker_id}] [{profile_name}] DUMMY MODE: Simulating auth for {url}")
|
|
||||||
dummy_settings = policy.get('settings', {}).get('dummy_simulation_settings', {})
|
|
||||||
min_seconds = dummy_settings.get('auth_min_seconds', 0.1)
|
|
||||||
max_seconds = dummy_settings.get('auth_max_seconds', 0.5)
|
|
||||||
failure_rate = args.dummy_auth_failure_rate or dummy_settings.get('auth_failure_rate', 0.0)
|
|
||||||
skipped_rate = args.dummy_auth_skipped_failure_rate or dummy_settings.get('auth_skipped_failure_rate', 0.0)
|
|
||||||
time.sleep(random.uniform(min_seconds, max_seconds))
|
|
||||||
|
|
||||||
rand_val = random.random()
|
|
||||||
if rand_val < skipped_rate:
|
|
||||||
stderr = "Dummy skipped failure"
|
|
||||||
elif rand_val < (skipped_rate + failure_rate):
|
|
||||||
stderr = "Dummy fatal failure"
|
|
||||||
else:
|
|
||||||
success = True
|
|
||||||
# In dummy mode, robustly extract ID from URL to avoid 'unknown_video_id'
|
|
||||||
try:
|
|
||||||
# First, try the robust library function
|
|
||||||
video_id = sp_utils.get_video_id(url)
|
|
||||||
if not video_id and url:
|
|
||||||
# Fallback parsing for dummy URLs like "...?v=dummy_0099"
|
|
||||||
parsed_url = urlparse(url)
|
|
||||||
video_id = parse_qs(parsed_url.query).get('v', [None])[0]
|
|
||||||
|
|
||||||
# Additional fallback for URLs like "youtube.com/watch?v=dummy_0028"
|
|
||||||
if not video_id and 'dummy_' in url:
|
|
||||||
dummy_match = re.search(r'dummy_\d+', url)
|
|
||||||
if dummy_match:
|
|
||||||
video_id = dummy_match.group(0)
|
|
||||||
except Exception:
|
|
||||||
video_id = None # Ensure video_id is None on parsing failure
|
|
||||||
|
|
||||||
# If all extraction methods fail, use the task_id as a reliable fallback.
|
|
||||||
if not video_id:
|
|
||||||
logger.debug(f"Could not extract video ID from URL '{url}', using task ID '{task_id}' for dummy data.")
|
|
||||||
video_id = task_id
|
|
||||||
|
|
||||||
info_data = {'id': video_id, 'title': f'Dummy Video {video_id}', '_dummy': True, 'formats': [{'format_id': '18'}, {'format_id': '140'}]}
|
|
||||||
else:
|
|
||||||
client, req_params = state_manager.get_client_for_request(profile_name, gen_policy)
|
|
||||||
cmd = [
|
|
||||||
sys.executable, '-m', 'ytops_client.cli', 'get-info',
|
|
||||||
'--client', client or '', '--profile', profile_name
|
|
||||||
]
|
|
||||||
if proxy_url:
|
|
||||||
cmd.extend(['--proxy', proxy_url])
|
|
||||||
if req_params:
|
|
||||||
cmd.extend(['--request-params', json.dumps(req_params)])
|
|
||||||
extra_args = gen_policy.get('extra_args')
|
|
||||||
if extra_args:
|
|
||||||
cmd.extend(shlex.split(extra_args))
|
|
||||||
|
|
||||||
# The URL must be the last positional argument
|
|
||||||
cmd.append(url)
|
|
||||||
|
|
||||||
logger.info(f"[Worker {worker_id}] Running command: {' '.join(shlex.quote(s) for s in cmd)}")
|
|
||||||
retcode, stdout, stderr = run_command(
|
|
||||||
cmd, running_processes, process_lock, stream_output=args.verbose,
|
|
||||||
stream_prefix=f"[Worker {worker_id} | get-info] "
|
|
||||||
)
|
|
||||||
success = (retcode == 0)
|
|
||||||
if success:
|
|
||||||
info_json_path = next((line.strip() for line in stdout.strip().split('\n') if line.endswith('.json') and os.path.exists(line.strip())), None)
|
|
||||||
if info_json_path:
|
|
||||||
try:
|
|
||||||
with open(info_json_path, 'r', encoding='utf-8') as f:
|
|
||||||
info_data = json.load(f)
|
|
||||||
except (IOError, json.JSONDecodeError) as e:
|
|
||||||
logger.error(f"[Worker {worker_id}] Failed to read/parse info.json from get-info: {e}")
|
|
||||||
success = False
|
|
||||||
stderr += f"\nFailed to read/parse info.json: {e}"
|
|
||||||
else:
|
|
||||||
logger.error(f"[Worker {worker_id}] Command succeeded but no info.json path found in output.")
|
|
||||||
success = False
|
|
||||||
stderr += "\nNo info.json path in output"
|
|
||||||
|
|
||||||
# --- Result processing for a single task ---
|
|
||||||
if success and info_data:
|
if success and info_data:
|
||||||
try:
|
try:
|
||||||
auth_env_name = profile_manager_instance.key_prefix.replace('_profile_mgmt_', '')
|
auth_env_name = profile_manager_instance.key_prefix.replace('_profile_mgmt_', '')
|
||||||
@ -360,13 +186,14 @@ def run_queue_auth_worker(worker_id, policy, state_manager, args, profile_manage
|
|||||||
else:
|
else:
|
||||||
is_bot_error = "Sign in to confirm you're not a bot" in stderr
|
is_bot_error = "Sign in to confirm you're not a bot" in stderr
|
||||||
is_timeout_error = "Read timed out" in stderr
|
is_timeout_error = "Read timed out" in stderr
|
||||||
|
is_proxy_error = "ProxyError" in stderr or "Unable to connect to proxy" in stderr
|
||||||
is_unavailable = "This video is unavailable" in stderr or "Video unavailable" in stderr
|
is_unavailable = "This video is unavailable" in stderr or "Video unavailable" in stderr
|
||||||
is_private = "This video is private" in stderr or "Private video" in stderr
|
is_private = "This video is private" in stderr or "Private video" in stderr
|
||||||
is_deleted = "This video has been removed" in stderr
|
is_deleted = "This video has been removed" in stderr
|
||||||
is_dummy_skipped = "Dummy skipped failure" in stderr
|
is_dummy_skipped = "Dummy skipped failure" in stderr
|
||||||
|
|
||||||
if is_unavailable or is_private or is_deleted or is_dummy_skipped:
|
if is_unavailable or is_private or is_deleted or is_dummy_skipped or is_proxy_error:
|
||||||
reason = "Video unavailable" if is_unavailable else "Private video" if is_private else "Video removed" if is_deleted else "Dummy skipped"
|
reason = "Video unavailable" if is_unavailable else "Private video" if is_private else "Video removed" if is_deleted else "Proxy error" if is_proxy_error else "Dummy skipped"
|
||||||
logger.warning(f"[Worker {worker_id}] [{profile_name}] Auth skipped for {url}: {reason}")
|
logger.warning(f"[Worker {worker_id}] [{profile_name}] Auth skipped for {url}: {reason}")
|
||||||
profile_manager_instance.record_activity(profile_name, 'tolerated_error', is_dummy=(args.dummy or args.dummy_batch))
|
profile_manager_instance.record_activity(profile_name, 'tolerated_error', is_dummy=(args.dummy or args.dummy_batch))
|
||||||
state_manager.report_auth_skipped(task_id, {"url": url, "reason": reason, "stderr": stderr})
|
state_manager.report_auth_skipped(task_id, {"url": url, "reason": reason, "stderr": stderr})
|
||||||
@ -376,14 +203,309 @@ def run_queue_auth_worker(worker_id, policy, state_manager, args, profile_manage
|
|||||||
profile_manager_instance.record_activity(profile_name, 'failure', is_dummy=(args.dummy or args.dummy_batch))
|
profile_manager_instance.record_activity(profile_name, 'failure', is_dummy=(args.dummy or args.dummy_batch))
|
||||||
state_manager.report_auth_failure(task_id, {"url": url, "error_type": error_type, "stderr": stderr, "exit_code": retcode})
|
state_manager.report_auth_failure(task_id, {"url": url, "error_type": error_type, "stderr": stderr, "exit_code": retcode})
|
||||||
|
|
||||||
|
|
||||||
|
def run_queue_auth_worker(worker_id, policy, state_manager, args, profile_manager_instance, running_processes, process_lock):
|
||||||
|
"""Worker function for processing authentication tasks from a queue in batches."""
|
||||||
|
owner_id = f"queue-auth-worker-{worker_id}"
|
||||||
|
settings = policy.get('settings', {})
|
||||||
|
exec_control = policy.get('execution_control', {})
|
||||||
|
gen_policy = policy.get('info_json_generation_policy', {})
|
||||||
|
queue_policy = policy.get('queue_policy', {})
|
||||||
|
docker_policy = policy.get('direct_docker_cli_policy', {})
|
||||||
|
task_granularity = gen_policy.get('download_task_granularity', 'per_format')
|
||||||
|
|
||||||
|
profile_prefix = gen_policy.get('profile_prefix')
|
||||||
|
if not profile_prefix:
|
||||||
|
logger.error(f"[Worker {worker_id}] Queue auth mode requires 'info_json_generation_policy.profile_prefix'. Worker exiting.")
|
||||||
|
return []
|
||||||
|
|
||||||
|
save_dir = settings.get('save_info_json_dir')
|
||||||
|
if not save_dir and queue_policy.get('formats_to_download'):
|
||||||
|
save_dir = os.path.join('run', 'stress_policy', 'info_jsons')
|
||||||
|
logger.info(f"[Worker {worker_id}] 'formats_to_download' is set and 'save_info_json_dir' is not, defaulting to '{save_dir}'")
|
||||||
|
|
||||||
|
if save_dir:
|
||||||
|
os.makedirs(save_dir, exist_ok=True)
|
||||||
|
logger.info(f"[Worker {worker_id}] Will save info.json files to '{save_dir}'")
|
||||||
|
|
||||||
|
batch_size = queue_policy.get('batch_size', 1)
|
||||||
|
logger.info(f"[Worker {worker_id}] Auth worker configured to process tasks in batches of {batch_size}.")
|
||||||
|
task_counter = 0
|
||||||
|
last_no_task_log_msg = ""
|
||||||
|
|
||||||
|
while not state_manager.shutdown_event.is_set():
|
||||||
|
locked_profile = None
|
||||||
|
tasks = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
# 1. Get a batch of tasks from the queue FIRST
|
||||||
|
raw_tasks = state_manager.get_auth_tasks_batch(batch_size)
|
||||||
|
if not raw_tasks:
|
||||||
|
polling_interval = exec_control.get('worker_polling_interval_seconds', 1)
|
||||||
|
base_log_msg = f"[Worker {worker_id}] No tasks available in queue, polling."
|
||||||
|
if base_log_msg == last_no_task_log_msg:
|
||||||
|
print(".", end="", file=sys.stdout, flush=True)
|
||||||
|
else:
|
||||||
|
if last_no_task_log_msg:
|
||||||
|
print(file=sys.stdout)
|
||||||
|
logger.info(base_log_msg)
|
||||||
|
last_no_task_log_msg = base_log_msg
|
||||||
|
time.sleep(polling_interval)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Normalize tasks to handle both raw URL strings and JSON objects
|
||||||
|
tasks = []
|
||||||
|
for raw_task in raw_tasks:
|
||||||
|
if isinstance(raw_task, str):
|
||||||
|
tasks.append({'url': raw_task})
|
||||||
|
elif isinstance(raw_task, dict) and raw_task.get('url'):
|
||||||
|
tasks.append(raw_task)
|
||||||
|
else:
|
||||||
|
logger.warning(f"[Worker {worker_id}] Skipping invalid task of type {type(raw_task)}: {raw_task}")
|
||||||
|
|
||||||
|
if not tasks:
|
||||||
|
logger.warning(f"[Worker {worker_id}] All tasks in batch were invalid. Waiting for next batch.")
|
||||||
|
time.sleep(1)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if last_no_task_log_msg:
|
||||||
|
print(file=sys.stderr)
|
||||||
|
last_no_task_log_msg = ""
|
||||||
|
|
||||||
|
# 2. Lock a profile, trying any of the specified prefixes
|
||||||
|
locked_profile = None
|
||||||
|
prefixes_to_try = []
|
||||||
|
if profile_prefix:
|
||||||
|
prefixes_to_try = [p.strip() for p in profile_prefix.split(',') if p.strip()]
|
||||||
|
|
||||||
|
if prefixes_to_try:
|
||||||
|
random.shuffle(prefixes_to_try)
|
||||||
|
for prefix in prefixes_to_try:
|
||||||
|
locked_profile = profile_manager_instance.lock_profile(owner=owner_id, profile_prefix=prefix)
|
||||||
|
if locked_profile:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
# Fallback for empty/no prefix, which means lock any available profile
|
||||||
|
locked_profile = profile_manager_instance.lock_profile(owner=owner_id, profile_prefix=None)
|
||||||
|
|
||||||
|
if not locked_profile:
|
||||||
|
polling_interval = exec_control.get('worker_polling_interval_seconds', 1)
|
||||||
|
logger.warning(f"[Worker {worker_id}] No profiles available for {len(tasks)} task(s). Re-queueing and sleeping for {polling_interval}s.")
|
||||||
|
state_manager.add_auth_tasks_batch(tasks)
|
||||||
|
time.sleep(polling_interval)
|
||||||
|
continue
|
||||||
|
|
||||||
|
profile_name = locked_profile['name']
|
||||||
|
proxy_url = locked_profile['proxy']
|
||||||
|
|
||||||
|
logger.info(f"[Worker {worker_id}] Locked profile '{profile_name}' to process a batch of {len(tasks)} tasks.")
|
||||||
|
|
||||||
|
# 3. Process the entire batch
|
||||||
|
if args.dummy or args.dummy_batch:
|
||||||
|
# Dummy mode is not optimized for batching, so we process tasks individually.
|
||||||
|
logger.info(f"[Worker {worker_id}] [{profile_name}] DUMMY MODE: Processing {len(tasks)} tasks individually.")
|
||||||
|
for task in tasks:
|
||||||
|
if state_manager.shutdown_event.is_set():
|
||||||
|
break
|
||||||
|
|
||||||
|
task_id = task.get('id') or task.get('task_id') or f"task_{worker_id}_{task_counter}"
|
||||||
|
url = task.get('url')
|
||||||
|
|
||||||
|
# Dummy simulation logic copied from single-task implementation
|
||||||
|
logger.info(f"[Worker {worker_id}] [{profile_name}] DUMMY MODE: Simulating auth for {url}")
|
||||||
|
dummy_settings = policy.get('settings', {}).get('dummy_simulation_settings', {})
|
||||||
|
min_seconds = dummy_settings.get('auth_min_seconds', 0.1)
|
||||||
|
max_seconds = dummy_settings.get('auth_max_seconds', 0.5)
|
||||||
|
failure_rate = args.dummy_auth_failure_rate or dummy_settings.get('auth_failure_rate', 0.0)
|
||||||
|
skipped_rate = args.dummy_auth_skipped_failure_rate or dummy_settings.get('auth_skipped_failure_rate', 0.0)
|
||||||
|
time.sleep(random.uniform(min_seconds, max_seconds))
|
||||||
|
|
||||||
|
success, info_data, stderr, retcode = False, None, "", 0
|
||||||
|
rand_val = random.random()
|
||||||
|
if rand_val < skipped_rate:
|
||||||
|
stderr = "Dummy skipped failure"
|
||||||
|
elif rand_val < (skipped_rate + failure_rate):
|
||||||
|
stderr = "Dummy fatal failure"
|
||||||
|
else:
|
||||||
|
success = True
|
||||||
|
video_id = sp_utils.get_video_id(url) or task_id
|
||||||
|
info_data = {'id': video_id, 'title': f'Dummy Video {video_id}', '_dummy': True, 'formats': [{'format_id': '18'}, {'format_id': '140'}]}
|
||||||
|
|
||||||
|
_process_single_auth_result(
|
||||||
|
success, info_data, stderr, retcode, task, task_id, profile_name, proxy_url,
|
||||||
|
worker_id, policy, state_manager, args, profile_manager_instance
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# BATCH MODE: Process all tasks in a single Docker run
|
||||||
|
batch_temp_dir = None
|
||||||
|
try:
|
||||||
|
# Create a temporary directory for this batch's files
|
||||||
|
host_mount_path = docker_policy.get('docker_host_mount_path')
|
||||||
|
temp_parent_dir = host_mount_path if host_mount_path and os.path.isdir(host_mount_path) else None
|
||||||
|
batch_temp_dir = tempfile.mkdtemp(prefix=f"queue-auth-batch-{worker_id}-", dir=temp_parent_dir)
|
||||||
|
task_dir_name = os.path.basename(batch_temp_dir)
|
||||||
|
task_dir_container = os.path.join(docker_policy.get('docker_container_mount_path'), task_dir_name)
|
||||||
|
|
||||||
|
# Map URLs to their original tasks for post-processing
|
||||||
|
url_to_task_map = {task.get('url'): task for task in tasks if task.get('url')}
|
||||||
|
|
||||||
|
# Create a batch file with all URLs
|
||||||
|
batch_file_host_path = os.path.join(batch_temp_dir, 'batch_urls.txt')
|
||||||
|
with open(batch_file_host_path, 'w', encoding='utf-8') as f:
|
||||||
|
for url in url_to_task_map.keys():
|
||||||
|
f.write(f"{url}\n")
|
||||||
|
|
||||||
|
# Mark all tasks in the batch as in-progress
|
||||||
|
for task in tasks:
|
||||||
|
task_id = task.get('id') or task.get('task_id')
|
||||||
|
if not task_id:
|
||||||
|
# Generate a temporary, worker-unique ID if none exists.
|
||||||
|
task_id = f"task_{worker_id}_{int(time.time()*1000)}_{task_counter}"
|
||||||
|
task['task_id'] = task_id # Store it back for later stages
|
||||||
|
task_counter += 1
|
||||||
|
logger.warning(f"[Worker {worker_id}] Task missing 'id' or 'task_id'. Generated a temporary one: {task_id}. Task content: {task}")
|
||||||
|
state_manager.mark_auth_in_progress(task_id, owner_id)
|
||||||
|
|
||||||
|
# Docker batch processing logic
|
||||||
|
image_name = docker_policy.get('docker_image_name')
|
||||||
|
if not image_name:
|
||||||
|
raise ValueError("'direct_docker_cli_policy.docker_image_name' is not defined.")
|
||||||
|
|
||||||
|
network_name = docker_policy.get('docker_network_name')
|
||||||
|
container_mount_path = docker_policy.get('docker_container_mount_path')
|
||||||
|
host_cache_path = docker_policy.get('docker_host_cache_path')
|
||||||
|
container_cache_path = docker_policy.get('docker_container_cache_path')
|
||||||
|
|
||||||
|
volumes = {}
|
||||||
|
if host_mount_path and container_mount_path:
|
||||||
|
volumes[str(Path(host_mount_path).resolve())] = {'bind': container_mount_path, 'mode': 'rw'}
|
||||||
|
if host_cache_path and container_cache_path:
|
||||||
|
Path(host_cache_path).mkdir(parents=True, exist_ok=True)
|
||||||
|
volumes[str(Path(host_cache_path).resolve())] = {'bind': container_cache_path, 'mode': 'rw'}
|
||||||
|
|
||||||
|
environment = {}
|
||||||
|
if host_cache_path and container_cache_path:
|
||||||
|
environment['XDG_CACHE_HOME'] = container_cache_path
|
||||||
|
|
||||||
|
# --- Build config file content ---
|
||||||
|
base_config_content = ""
|
||||||
|
base_config_file = docker_policy.get('ytdlp_config_file')
|
||||||
|
if base_config_file:
|
||||||
|
config_path_to_read = Path(base_config_file)
|
||||||
|
if not config_path_to_read.exists():
|
||||||
|
config_path_to_read = Path(sp_utils._PROJECT_ROOT) / base_config_file
|
||||||
|
|
||||||
|
if config_path_to_read.exists():
|
||||||
|
try:
|
||||||
|
with open(config_path_to_read, 'r', encoding='utf-8') as f:
|
||||||
|
base_config_content = f.read()
|
||||||
|
except IOError as e:
|
||||||
|
logger.error(f"[Worker {worker_id}] Could not read ytdlp_config_file '{config_path_to_read}': {e}")
|
||||||
|
else:
|
||||||
|
logger.error(f"[Worker {worker_id}] Could not find ytdlp_config_file: '{base_config_file}'")
|
||||||
|
|
||||||
|
config_overrides = docker_policy.get('ytdlp_config_overrides', {}).copy()
|
||||||
|
config_overrides['proxy'] = proxy_url
|
||||||
|
config_overrides['batch-file'] = os.path.join(task_dir_container, 'batch_urls.txt')
|
||||||
|
config_overrides['output'] = os.path.join(task_dir_container, '%(id)s')
|
||||||
|
|
||||||
|
overrides_content = sp_utils._config_dict_to_flags_file_content(config_overrides)
|
||||||
|
raw_args_from_policy = docker_policy.get('ytdlp_raw_args', [])
|
||||||
|
raw_args_content = '\n'.join(raw_args_from_policy)
|
||||||
|
|
||||||
|
config_content = f"{base_config_content.strip()}\n\n# --- Overrides from policy ---\n{overrides_content}"
|
||||||
|
if raw_args_content:
|
||||||
|
config_content += f"\n\n# --- Raw args from policy ---\n{raw_args_content}"
|
||||||
|
|
||||||
|
# Write the combined config to a file in the temp batch dir
|
||||||
|
ytdlp_config_dir_host = os.path.join(batch_temp_dir, 'yt-dlp')
|
||||||
|
os.makedirs(ytdlp_config_dir_host, exist_ok=True)
|
||||||
|
temp_config_file_host = os.path.join(ytdlp_config_dir_host, 'config')
|
||||||
|
with open(temp_config_file_host, 'w', encoding='utf-8') as f:
|
||||||
|
f.write(config_content)
|
||||||
|
|
||||||
|
# The command tells yt-dlp exactly where to find the config file we created.
|
||||||
|
cmd = ['yt-dlp', '--config-locations', os.path.join(task_dir_container, 'yt-dlp/config')]
|
||||||
|
|
||||||
|
processed_urls = set()
|
||||||
|
activity_lock = threading.Lock()
|
||||||
|
|
||||||
|
def log_parser_callback(line):
|
||||||
|
# Not thread-safe, but only one thread writes to this set
|
||||||
|
nonlocal processed_urls
|
||||||
|
|
||||||
|
if '[info] Writing video metadata as JSON to:' in line:
|
||||||
|
try:
|
||||||
|
path_match = re.search(r"Writing video metadata as JSON to: '?([^']+)'?$", line)
|
||||||
|
if not path_match:
|
||||||
|
path_match = re.search(r"Writing video metadata as JSON to: (.*)$", line)
|
||||||
|
|
||||||
|
if path_match:
|
||||||
|
container_file_path = path_match.group(1).strip()
|
||||||
|
if container_file_path.startswith(container_mount_path):
|
||||||
|
relative_path = os.path.relpath(container_file_path, container_mount_path)
|
||||||
|
host_file_path = os.path.join(host_mount_path, relative_path)
|
||||||
|
|
||||||
|
# Wait for file to exist
|
||||||
|
for _ in range(5):
|
||||||
|
if os.path.exists(host_file_path): break
|
||||||
|
time.sleep(0.1)
|
||||||
|
|
||||||
|
if os.path.exists(host_file_path):
|
||||||
|
with open(host_file_path, 'r', encoding='utf-8') as f:
|
||||||
|
info_data = json.load(f)
|
||||||
|
|
||||||
|
original_url = info_data.get('original_url') or info_data.get('webpage_url')
|
||||||
|
if original_url in url_to_task_map:
|
||||||
|
with activity_lock:
|
||||||
|
if original_url not in processed_urls:
|
||||||
|
processed_urls.add(original_url)
|
||||||
|
task = url_to_task_map[original_url]
|
||||||
|
task_id = task.get('id') or task.get('task_id')
|
||||||
|
_process_single_auth_result(
|
||||||
|
True, info_data, "", 0, task, task_id, profile_name, proxy_url,
|
||||||
|
worker_id, policy, state_manager, args, profile_manager_instance,
|
||||||
|
info_json_path=host_file_path
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.warning(f"[Worker {worker_id}] Could not map info.json '{Path(host_file_path).name}' with URL '{original_url}' to an original task. Skipping.")
|
||||||
|
except (IOError, json.JSONDecodeError, KeyError) as e:
|
||||||
|
logger.error(f"[Worker {worker_id}] Error during live post-processing from log line: {e}")
|
||||||
|
return False # Don't stop the container
|
||||||
|
|
||||||
|
logger.info(f"[Worker {worker_id}] [{profile_name}] Running Docker container for batch of {len(tasks)} URLs.")
|
||||||
|
retcode, stdout, stderr, _ = run_docker_container(
|
||||||
|
image_name=image_name, command=cmd, volumes=volumes,
|
||||||
|
environment=environment, network_name=network_name,
|
||||||
|
stream_prefix=f"[Worker {worker_id} | Docker] ",
|
||||||
|
log_callback=log_parser_callback
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- Post-processing for failed URLs ---
|
||||||
|
unprocessed_urls = set(url_to_task_map.keys()) - processed_urls
|
||||||
|
if unprocessed_urls:
|
||||||
|
logger.warning(f"[Worker {worker_id}] [{profile_name}] {len(unprocessed_urls)} URLs from the batch did not produce an info.json and will be marked as failed.")
|
||||||
|
for url in unprocessed_urls:
|
||||||
|
task = url_to_task_map[url]
|
||||||
|
task_id = task.get('id') or task.get('task_id')
|
||||||
|
_process_single_auth_result(
|
||||||
|
False, None, stderr, retcode, task, task_id, profile_name, proxy_url,
|
||||||
|
worker_id, policy, state_manager, args, profile_manager_instance
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"[Worker {worker_id}] [{profile_name}] Unexpected error processing task {task_id}: {e}", exc_info=True)
|
logger.error(f"[Worker {worker_id}] [{profile_name}] Unexpected error during batch processing: {e}", exc_info=True)
|
||||||
if task_id:
|
for task in tasks:
|
||||||
state_manager.report_auth_failure(task_id, {"error": f"Unexpected error: {str(e)}", "url": url or "unknown"})
|
task_id = task.get('id') or task.get('task_id')
|
||||||
profile_manager_instance.record_activity(profile_name, 'failure', is_dummy=(args.dummy or args.dummy_batch))
|
_process_single_auth_result(
|
||||||
|
False, None, f"Unexpected batch error: {str(e)}", -1, task, task_id, profile_name, proxy_url,
|
||||||
|
worker_id, policy, state_manager, args, profile_manager_instance
|
||||||
|
)
|
||||||
finally:
|
finally:
|
||||||
if temp_task_dir and os.path.exists(temp_task_dir):
|
if batch_temp_dir and os.path.exists(batch_temp_dir):
|
||||||
shutil.rmtree(temp_task_dir)
|
shutil.rmtree(batch_temp_dir)
|
||||||
|
for task in tasks:
|
||||||
|
task_id = task.get('id') or task.get('task_id')
|
||||||
if task_id:
|
if task_id:
|
||||||
state_manager.remove_auth_in_progress(task_id)
|
state_manager.remove_auth_in_progress(task_id)
|
||||||
|
|
||||||
@ -437,24 +559,41 @@ def run_queue_download_worker(worker_id, policy, state_manager, args, profile_ma
|
|||||||
settings = policy.get('settings', {})
|
settings = policy.get('settings', {})
|
||||||
exec_control = policy.get('execution_control', {})
|
exec_control = policy.get('execution_control', {})
|
||||||
d_policy = policy.get('download_policy', {})
|
d_policy = policy.get('download_policy', {})
|
||||||
|
direct_policy = policy.get('direct_docker_cli_policy', {})
|
||||||
queue_policy = policy.get('queue_policy', {})
|
queue_policy = policy.get('queue_policy', {})
|
||||||
|
|
||||||
profile_prefix = d_policy.get('profile_prefix')
|
profile_prefix = d_policy.get('profile_prefix')
|
||||||
if not profile_prefix:
|
if not profile_prefix:
|
||||||
logger.error(f"[Worker {worker_id}] Queue download mode requires 'download_policy.profile_prefix'. Worker exiting.")
|
logger.error(f"[Worker {worker_id}] Queue download mode requires a profile prefix, but none was provided to the worker. Check 'execution_control' in policy. Worker exiting.")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
output_dir = d_policy.get('output_dir')
|
# --- Docker specific config for download worker ---
|
||||||
if output_dir:
|
image_name = direct_policy.get('docker_image_name')
|
||||||
os.makedirs(output_dir, exist_ok=True)
|
host_mount_path = direct_policy.get('docker_host_mount_path')
|
||||||
logger.info(f"[Worker {worker_id}] Will save downloads to '{output_dir}'")
|
container_mount_path = direct_policy.get('docker_container_mount_path')
|
||||||
|
host_download_path = direct_policy.get('docker_host_download_path')
|
||||||
|
container_download_path = direct_policy.get('docker_container_download_path')
|
||||||
|
network_name = direct_policy.get('docker_network_name')
|
||||||
|
|
||||||
|
if not all([image_name, host_mount_path, container_mount_path, host_download_path, container_download_path]):
|
||||||
|
logger.error(f"[Worker {worker_id}] Queue docker download mode requires all docker_* keys in 'direct_docker_cli_policy'. Worker exiting.")
|
||||||
|
return []
|
||||||
|
|
||||||
|
try:
|
||||||
|
os.makedirs(host_mount_path, exist_ok=True)
|
||||||
|
os.makedirs(host_download_path, exist_ok=True)
|
||||||
|
except OSError as e:
|
||||||
|
logger.error(f"[Worker {worker_id}] Could not create required host directories: {e}. Worker exiting.")
|
||||||
|
return []
|
||||||
|
|
||||||
task_counter = 0
|
task_counter = 0
|
||||||
last_no_task_log_msg = ""
|
last_no_task_log_msg = ""
|
||||||
|
|
||||||
while not state_manager.shutdown_event.is_set():
|
while not state_manager.shutdown_event.is_set():
|
||||||
locked_profile = None
|
locked_profile = None
|
||||||
temp_task_dir = None
|
temp_task_dir = None # Used by dummy mode
|
||||||
|
temp_config_dir_host = None # Used by docker mode
|
||||||
|
was_banned_by_parser = False
|
||||||
task = None
|
task = None
|
||||||
auth_profile_name, auth_env = None, None
|
auth_profile_name, auth_env = None, None
|
||||||
|
|
||||||
@ -594,105 +733,132 @@ def run_queue_download_worker(worker_id, policy, state_manager, args, profile_ma
|
|||||||
# race conditions between workers processing different formats for the
|
# race conditions between workers processing different formats for the
|
||||||
# same video. The files can be cleaned up manually between runs.
|
# same video. The files can be cleaned up manually between runs.
|
||||||
else:
|
else:
|
||||||
cmd = [
|
retcode = -1; stdout = ""; stderr = ""; stop_reason = None
|
||||||
sys.executable, '-m', 'ytops_client.cli', 'download', 'py',
|
live_success_count, live_failure_count, live_tolerated_count = 0, 0, 0
|
||||||
'--load-info-json', info_json_path,
|
|
||||||
'-f', format_id
|
|
||||||
]
|
|
||||||
if proxy_url:
|
|
||||||
cmd.extend(['--proxy', proxy_url])
|
|
||||||
if output_dir:
|
|
||||||
cmd.extend(['--output-dir', output_dir])
|
|
||||||
extra_args = d_policy.get('extra_args')
|
|
||||||
if extra_args:
|
|
||||||
cmd.extend(shlex.split(extra_args))
|
|
||||||
|
|
||||||
logger.info(f"[Worker {worker_id}] Running command: {' '.join(shlex.quote(s) for s in cmd)}")
|
# --- Pre-flight checks ---
|
||||||
retcode, stdout, stderr = run_command(
|
if d_policy.get('check_url_expiration', True):
|
||||||
cmd, running_processes, process_lock, stream_output=args.verbose,
|
try:
|
||||||
stream_prefix=f"[Worker {worker_id} | download] "
|
with open(info_json_path, 'r', encoding='utf-8') as f: info_data = json.load(f)
|
||||||
|
first_format = next((f for f in info_data.get('formats', []) if 'url' in f), None)
|
||||||
|
if first_format:
|
||||||
|
status, _ = sp_utils.check_url_expiry(first_format['url'], d_policy.get('expire_time_shift_minutes', 0))
|
||||||
|
if status == 'expired': stderr = "Download URL is expired"
|
||||||
|
except (IOError, json.JSONDecodeError) as e: stderr = f"Could not read info.json: {e}"
|
||||||
|
|
||||||
|
if not stderr:
|
||||||
|
relative_task_path = os.path.relpath(os.path.abspath(info_json_path), host_mount_path)
|
||||||
|
task_path_container = os.path.join(container_mount_path, relative_task_path)
|
||||||
|
temp_config_dir_host = tempfile.mkdtemp(prefix=f"docker-dl-config-{worker_id}-", dir=host_mount_path)
|
||||||
|
config_dir_container = os.path.join(container_mount_path, os.path.basename(temp_config_dir_host))
|
||||||
|
|
||||||
|
base_config_content = ""
|
||||||
|
base_config_file = direct_policy.get('ytdlp_config_file')
|
||||||
|
if base_config_file:
|
||||||
|
config_path_to_read = Path(base_config_file)
|
||||||
|
if not config_path_to_read.exists(): config_path_to_read = Path(sp_utils._PROJECT_ROOT) / base_config_file
|
||||||
|
if config_path_to_read.exists():
|
||||||
|
try:
|
||||||
|
with open(config_path_to_read, 'r', encoding='utf-8') as f: base_config_content = f.read()
|
||||||
|
except IOError as e: logger.error(f"[Worker {worker_id}] Could not read ytdlp_config_file '{config_path_to_read}': {e}")
|
||||||
|
|
||||||
|
config_overrides = direct_policy.get('ytdlp_config_overrides', {}).copy()
|
||||||
|
config_overrides.update({
|
||||||
|
'proxy': proxy_url, 'load-info-json': task_path_container,
|
||||||
|
'output': os.path.join(container_download_path, '%(id)s.f%(format_id)s.%(ext)s'),
|
||||||
|
'format': format_id, 'no-cache-dir': True
|
||||||
|
})
|
||||||
|
overrides_content = sp_utils._config_dict_to_flags_file_content(config_overrides)
|
||||||
|
raw_args_content = '\n'.join(direct_policy.get('ytdlp_raw_args', []))
|
||||||
|
config_content = f"{base_config_content.strip()}\n\n# --- Overrides ---\n{overrides_content}\n{raw_args_content}"
|
||||||
|
|
||||||
|
ytdlp_config_dir_host = os.path.join(temp_config_dir_host, 'yt-dlp')
|
||||||
|
os.makedirs(ytdlp_config_dir_host, exist_ok=True)
|
||||||
|
with open(os.path.join(ytdlp_config_dir_host, 'config'), 'w', encoding='utf-8') as f: f.write(config_content)
|
||||||
|
|
||||||
|
volumes = {
|
||||||
|
os.path.abspath(host_mount_path): {'bind': container_mount_path, 'mode': 'ro'},
|
||||||
|
os.path.abspath(host_download_path): {'bind': container_download_path, 'mode': 'rw'}
|
||||||
|
}
|
||||||
|
command = ['yt-dlp', '--config-locations', os.path.join(config_dir_container, 'yt-dlp/config')]
|
||||||
|
|
||||||
|
activity_lock = threading.Lock()
|
||||||
|
tolerated_error_patterns = direct_policy.get('tolerated_error_patterns', [])
|
||||||
|
fatal_error_patterns = direct_policy.get('fatal_error_patterns', [])
|
||||||
|
|
||||||
|
def log_parser_callback(line):
|
||||||
|
nonlocal live_success_count, live_failure_count, live_tolerated_count, was_banned_by_parser
|
||||||
|
if '[download] 100% of' in line or 'has already been downloaded' in line:
|
||||||
|
with activity_lock: live_success_count += 1; profile_manager_instance.record_activity(profile_name, 'download', is_dummy=False)
|
||||||
|
return False
|
||||||
|
for pattern in fatal_error_patterns:
|
||||||
|
if re.search(pattern, line, re.IGNORECASE):
|
||||||
|
with activity_lock:
|
||||||
|
live_failure_count += 1; profile_manager_instance.record_activity(profile_name, 'download_error', is_dummy=False)
|
||||||
|
if direct_policy.get('ban_on_fatal_error_in_batch'):
|
||||||
|
profile_manager_instance.update_profile_state(profile_name, 'BANNED', 'Fatal error during download')
|
||||||
|
was_banned_by_parser = True; return True
|
||||||
|
return False
|
||||||
|
if 'ERROR:' not in line: return False
|
||||||
|
for pattern in tolerated_error_patterns:
|
||||||
|
if re.search(pattern, line, re.IGNORECASE):
|
||||||
|
with activity_lock: live_tolerated_count += 1; profile_manager_instance.record_activity(profile_name, 'tolerated_error', is_dummy=False)
|
||||||
|
return False
|
||||||
|
with activity_lock: live_failure_count += 1; profile_manager_instance.record_activity(profile_name, 'download_error', is_dummy=False)
|
||||||
|
return False
|
||||||
|
|
||||||
|
retcode, stdout, stderr, stop_reason = run_docker_container(
|
||||||
|
image_name=image_name, command=command, volumes=volumes, stream_prefix=f"[Worker {worker_id} | docker-ytdlp] ",
|
||||||
|
network_name=network_name, log_callback=log_parser_callback, profile_manager=profile_manager_instance,
|
||||||
|
profile_name=profile_name, environment={}
|
||||||
)
|
)
|
||||||
success = (retcode == 0)
|
|
||||||
if success:
|
success = live_success_count > 0 or (retcode == 0 and not stop_reason and live_failure_count == 0)
|
||||||
for line in stdout.strip().split('\n'):
|
|
||||||
if os.path.exists(line.strip()):
|
|
||||||
downloaded_filepath = line.strip()
|
|
||||||
break
|
|
||||||
|
|
||||||
if success:
|
if success:
|
||||||
# --- Airflow Integration ---
|
downloaded_filepath = None
|
||||||
if d_policy.get('output_to_airflow_ready_dir'):
|
if d_policy.get('output_to_airflow_ready_dir'):
|
||||||
base_path = d_policy.get('airflow_ready_dir_base_path')
|
base_path = d_policy.get('airflow_ready_dir_base_path')
|
||||||
if not base_path:
|
if not base_path:
|
||||||
logger.error(f"[Worker {worker_id}] 'output_to_airflow_ready_dir' is true but 'airflow_ready_dir_base_path' is not set. Skipping Airflow output.")
|
logger.error(f"[Worker {worker_id}] 'output_to_airflow_ready_dir' is true but 'airflow_ready_dir_base_path' is not set.")
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
# Create a unique, timestamped directory for the video
|
|
||||||
ts = datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S_%f')
|
ts = datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S_%f')
|
||||||
final_dir = os.path.join(base_path, ts + '_' + (video_id or 'unknown_video'))
|
final_dir = os.path.join(base_path, ts + '_' + (video_id or 'unknown_video'))
|
||||||
os.makedirs(final_dir, exist_ok=True)
|
os.makedirs(final_dir, exist_ok=True)
|
||||||
|
|
||||||
# Copy info.json to avoid a race condition where multiple download workers
|
|
||||||
# for different formats try to move the same source file.
|
|
||||||
if info_json_path and os.path.exists(info_json_path):
|
if info_json_path and os.path.exists(info_json_path):
|
||||||
final_info_json_path = os.path.join(final_dir, os.path.basename(info_json_path))
|
shutil.copy(info_json_path, os.path.join(final_dir, os.path.basename(info_json_path)))
|
||||||
shutil.copy(info_json_path, final_info_json_path)
|
|
||||||
logger.info(f"[Worker {worker_id}] Copied info.json to Airflow-ready dir: {final_info_json_path}")
|
|
||||||
# Update the path for reporting to point to the new copy
|
|
||||||
info_json_path = final_info_json_path
|
|
||||||
|
|
||||||
# Move downloaded file (if not in dummy mode)
|
downloaded_files = [f for f in os.listdir(host_download_path) if not f.endswith(('.part', '.ytdl'))]
|
||||||
if downloaded_filepath and os.path.exists(downloaded_filepath):
|
if not downloaded_files: logger.warning(f"No media files found in '{host_download_path}' for video '{video_id}'.")
|
||||||
final_media_path = os.path.join(final_dir, os.path.basename(downloaded_filepath))
|
|
||||||
shutil.move(downloaded_filepath, final_media_path)
|
|
||||||
logger.info(f"[Worker {worker_id}] Moved media file to Airflow-ready dir: {final_media_path}")
|
|
||||||
# Update the path for reporting
|
|
||||||
downloaded_filepath = final_media_path
|
|
||||||
|
|
||||||
except Exception as e:
|
for filename in downloaded_files:
|
||||||
logger.error(f"[Worker {worker_id}] Failed to move files to Airflow-ready directory: {e}", exc_info=True)
|
final_media_path = os.path.join(final_dir, filename)
|
||||||
|
shutil.move(os.path.join(host_download_path, filename), final_media_path)
|
||||||
|
if downloaded_filepath is None: downloaded_filepath = final_media_path
|
||||||
|
except Exception as e: logger.error(f"Failed to move files to Airflow-ready dir: {e}", exc_info=True)
|
||||||
|
|
||||||
profile_manager_instance.record_activity(profile_name, 'download', is_dummy=(args.dummy or args.dummy_batch))
|
|
||||||
state_manager.report_download_success(task_id, {
|
state_manager.report_download_success(task_id, {
|
||||||
"video_id": video_id, "url": url, "format_id": format_id,
|
"video_id": video_id, "url": url, "format_id": format_id, "profile_name": profile_name, "proxy_url": proxy_url,
|
||||||
"profile_name": profile_name, "proxy_url": proxy_url,
|
|
||||||
"downloaded_filepath": downloaded_filepath, "info_json_path": info_json_path
|
"downloaded_filepath": downloaded_filepath, "info_json_path": info_json_path
|
||||||
})
|
})
|
||||||
logger.info(f"[Worker {worker_id}] Download successful: {video_id or url} format {format_id}")
|
logger.info(f"[Worker {worker_id}] Download successful: {video_id or url} format {format_id}")
|
||||||
|
|
||||||
# --- Rename source info.json to mark as processed ---
|
|
||||||
if d_policy.get('rename_source_info_json_on_success'):
|
if d_policy.get('rename_source_info_json_on_success'):
|
||||||
# Use the original path from the task, as the `info_json_path` variable
|
source_path = task.get('info_json_path')
|
||||||
# may have been updated by the Airflow logic to point to a copy.
|
if source_path and os.path.exists(source_path):
|
||||||
source_path_to_rename = task.get('info_json_path')
|
try: shutil.move(source_path, source_path + ".processed")
|
||||||
if source_path_to_rename and os.path.exists(source_path_to_rename):
|
except Exception as e: logger.warning(f"Could not rename source info.json '{source_path}': {e}")
|
||||||
try:
|
|
||||||
processed_path = source_path_to_rename + ".processed"
|
|
||||||
shutil.move(source_path_to_rename, processed_path)
|
|
||||||
logger.info(f"[Worker {worker_id}] Renamed source info.json to '{processed_path}'")
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"[Worker {worker_id}] Could not rename source info.json '{source_path_to_rename}': {e}")
|
|
||||||
else:
|
else:
|
||||||
is_bot_error = "Sign in to confirm you're not a bot" in stderr
|
is_unavailable = "unavailable" in stderr.lower() or "expired" in stderr.lower()
|
||||||
is_timeout_error = "Read timed out" in stderr
|
|
||||||
is_unavailable = "This video is unavailable" in stderr or "Video unavailable" in stderr
|
|
||||||
is_format_error = "requested format not available" in stderr
|
is_format_error = "requested format not available" in stderr
|
||||||
|
|
||||||
if is_unavailable or is_format_error:
|
if is_unavailable or is_format_error:
|
||||||
logger.warning(f"[Worker {worker_id}] Download skipped: {video_id or url} format {format_id}")
|
reason = "Video/URL unavailable or expired" if is_unavailable else "Format not available"
|
||||||
profile_manager_instance.record_activity(profile_name, 'tolerated_error', is_dummy=(args.dummy or args.dummy_batch))
|
state_manager.report_download_skipped(task_id, {"video_id": video_id, "url": url, "format_id": format_id, "reason": reason, "stderr": stderr})
|
||||||
state_manager.report_download_skipped(task_id, {
|
|
||||||
"video_id": video_id, "url": url, "format_id": format_id,
|
|
||||||
"reason": "Video unavailable" if is_unavailable else "Format not available", "stderr": stderr
|
|
||||||
})
|
|
||||||
else:
|
else:
|
||||||
error_type = "Bot detection" if is_bot_error else "Timeout" if is_timeout_error else f"Exit code {retcode}"
|
|
||||||
logger.error(f"[Worker {worker_id}] Download failed ({error_type}): {video_id or url} format {format_id}")
|
|
||||||
profile_manager_instance.record_activity(profile_name, 'download_error', is_dummy=(args.dummy or args.dummy_batch))
|
|
||||||
state_manager.report_download_failure(task_id, {
|
state_manager.report_download_failure(task_id, {
|
||||||
"video_id": video_id, "url": url, "format_id": format_id,
|
"video_id": video_id, "url": url, "format_id": format_id, "error_type": f"Exit code {retcode}",
|
||||||
"error_type": error_type, "stderr": stderr, "exit_code": retcode,
|
"stderr": stderr, "exit_code": retcode, "original_task": task
|
||||||
"original_task": task
|
|
||||||
})
|
})
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@ -722,6 +888,9 @@ def run_queue_download_worker(worker_id, policy, state_manager, args, profile_ma
|
|||||||
logger.warning(f"[Worker {worker_id}] Could not find auth profile name and/or auth_env in task metadata. Pending downloads counter will not be decremented.")
|
logger.warning(f"[Worker {worker_id}] Could not find auth profile name and/or auth_env in task metadata. Pending downloads counter will not be decremented.")
|
||||||
|
|
||||||
if locked_profile:
|
if locked_profile:
|
||||||
|
if was_banned_by_parser:
|
||||||
|
logger.info(f"[Worker {worker_id}] Profile '{locked_profile['name']}' was banned by log parser. Skipping unlock.")
|
||||||
|
else:
|
||||||
cooldown = None
|
cooldown = None
|
||||||
cooldown_config = profile_manager_instance.get_config('unlock_cooldown_seconds')
|
cooldown_config = profile_manager_instance.get_config('unlock_cooldown_seconds')
|
||||||
if cooldown_config:
|
if cooldown_config:
|
||||||
@ -732,7 +901,7 @@ def run_queue_download_worker(worker_id, policy, state_manager, args, profile_ma
|
|||||||
elif isinstance(val, int):
|
elif isinstance(val, int):
|
||||||
cooldown = val
|
cooldown = val
|
||||||
except (json.JSONDecodeError, TypeError):
|
except (json.JSONDecodeError, TypeError):
|
||||||
if cooldown_config.isdigit():
|
if isinstance(cooldown_config, str) and cooldown_config.isdigit():
|
||||||
cooldown = int(cooldown_config)
|
cooldown = int(cooldown_config)
|
||||||
|
|
||||||
if cooldown:
|
if cooldown:
|
||||||
@ -747,6 +916,9 @@ def run_queue_download_worker(worker_id, policy, state_manager, args, profile_ma
|
|||||||
if temp_task_dir and os.path.exists(temp_task_dir):
|
if temp_task_dir and os.path.exists(temp_task_dir):
|
||||||
shutil.rmtree(temp_task_dir)
|
shutil.rmtree(temp_task_dir)
|
||||||
|
|
||||||
|
if temp_config_dir_host and os.path.exists(temp_config_dir_host):
|
||||||
|
shutil.rmtree(temp_config_dir_host)
|
||||||
|
|
||||||
if task and task_id:
|
if task and task_id:
|
||||||
state_manager.remove_download_in_progress(task_id)
|
state_manager.remove_download_in_progress(task_id)
|
||||||
|
|
||||||
|
|||||||
@ -130,6 +130,47 @@ process_lock = threading.Lock()
|
|||||||
logger = logging.getLogger('stress_policy_tool')
|
logger = logging.getLogger('stress_policy_tool')
|
||||||
|
|
||||||
|
|
||||||
|
def _discover_worker_pools(discovery_config, manager_for_discovery):
|
||||||
|
"""
|
||||||
|
Discovers worker pools by scanning profile prefixes in Redis.
|
||||||
|
Returns a list of worker pool configurations or None on error.
|
||||||
|
"""
|
||||||
|
discovery_pattern = discovery_config.get('profile_prefix_pattern')
|
||||||
|
workers_per_group = discovery_config.get('workers_per_profile_group', 1)
|
||||||
|
|
||||||
|
if not discovery_pattern:
|
||||||
|
logger.error("'worker_pool_discovery' is missing required key 'profile_prefix_pattern'.")
|
||||||
|
return None
|
||||||
|
|
||||||
|
logger.info(f"Discovering worker pools from profile prefixes matching '{discovery_pattern}'...")
|
||||||
|
try:
|
||||||
|
all_profiles = manager_for_discovery.list_profiles()
|
||||||
|
found_prefixes = set()
|
||||||
|
for profile in all_profiles:
|
||||||
|
profile_name = profile['name']
|
||||||
|
if fnmatch.fnmatch(profile_name, discovery_pattern):
|
||||||
|
# Assuming standard name format like 'user31_001', extract 'user31'
|
||||||
|
prefix = profile_name.rsplit('_', 1)[0]
|
||||||
|
found_prefixes.add(prefix)
|
||||||
|
|
||||||
|
if not found_prefixes:
|
||||||
|
logger.warning(f"Worker pool discovery found no profiles matching pattern '{discovery_pattern}'. No workers will be started.")
|
||||||
|
return []
|
||||||
|
else:
|
||||||
|
worker_pools = []
|
||||||
|
for prefix in sorted(list(found_prefixes)):
|
||||||
|
worker_pools.append({
|
||||||
|
'profile_prefix': prefix,
|
||||||
|
'workers': workers_per_group
|
||||||
|
})
|
||||||
|
logger.info(f"Discovered {len(found_prefixes)} profile groups, creating {workers_per_group} worker(s) for each: {', '.join(sorted(list(found_prefixes)))}")
|
||||||
|
logger.info("Note: Profile group discovery runs once at startup. A restart is required to detect new profile groups.")
|
||||||
|
return worker_pools
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to discover profile groups from Redis: {e}", exc_info=True)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def main_stress_policy(args):
|
def main_stress_policy(args):
|
||||||
"""Main logic for the 'stress-policy' command."""
|
"""Main logic for the 'stress-policy' command."""
|
||||||
if args.list_policies:
|
if args.list_policies:
|
||||||
@ -362,10 +403,21 @@ def main_stress_policy(args):
|
|||||||
logger.info(f"\nSignal {signum} received, shutting down gracefully...")
|
logger.info(f"\nSignal {signum} received, shutting down gracefully...")
|
||||||
shutdown_event.set()
|
shutdown_event.set()
|
||||||
|
|
||||||
|
# Propagate signal to child processes to allow them to shut down gracefully.
|
||||||
|
with process_lock:
|
||||||
|
if running_processes:
|
||||||
|
logger.info(f"Propagating signal to {len(running_processes)} running subprocess(es)...")
|
||||||
|
for p in running_processes:
|
||||||
|
try:
|
||||||
|
# Send the same signal to the entire process group.
|
||||||
|
os.killpg(os.getpgid(p.pid), signum)
|
||||||
|
except (ProcessLookupError, PermissionError):
|
||||||
|
pass # Process already finished or we lack permissions
|
||||||
|
|
||||||
# Save state immediately to prevent loss on interrupt.
|
# Save state immediately to prevent loss on interrupt.
|
||||||
logger.info("Attempting to save state before shutdown...")
|
logger.info("Attempting to save state before shutdown...")
|
||||||
state_manager.close()
|
state_manager.close()
|
||||||
logger.info("Shutdown requested. Allowing in-progress tasks to complete. No new tasks will be started. Press Ctrl+C again to force exit.")
|
logger.info("Shutdown requested. Signalling in-progress tasks to terminate gracefully. No new tasks will be started. Press Ctrl+C again to force exit.")
|
||||||
else:
|
else:
|
||||||
logger.info("Second signal received, forcing exit.")
|
logger.info("Second signal received, forcing exit.")
|
||||||
# On second signal, forcefully terminate subprocesses.
|
# On second signal, forcefully terminate subprocesses.
|
||||||
@ -579,6 +631,7 @@ def main_stress_policy(args):
|
|||||||
logger.info(f"Starting/resuming from URL index {start_index + 1}.")
|
logger.info(f"Starting/resuming from URL index {start_index + 1}.")
|
||||||
# The worker's get_next_url_batch will respect this starting index.
|
# The worker's get_next_url_batch will respect this starting index.
|
||||||
|
|
||||||
|
logger.info(f"Task source file: {os.path.abspath(urls_file)}")
|
||||||
sp_utils.display_effective_policy(policy, policy_name, args, sources=urls_list)
|
sp_utils.display_effective_policy(policy, policy_name, args, sources=urls_list)
|
||||||
if args.dry_run: return 0
|
if args.dry_run: return 0
|
||||||
|
|
||||||
@ -586,13 +639,8 @@ def main_stress_policy(args):
|
|||||||
worker_pools = exec_control.get('worker_pools', [])
|
worker_pools = exec_control.get('worker_pools', [])
|
||||||
discovery_config = exec_control.get('worker_pool_discovery')
|
discovery_config = exec_control.get('worker_pool_discovery')
|
||||||
|
|
||||||
if discovery_config:
|
if discovery_config and not worker_pools:
|
||||||
if worker_pools:
|
logger.info("Explicit 'worker_pools' not defined. Using 'worker_pool_discovery' as a fallback.")
|
||||||
logger.warning("Both 'worker_pools' and 'worker_pool_discovery' are defined. 'worker_pool_discovery' will take precedence.")
|
|
||||||
|
|
||||||
discovery_pattern = discovery_config.get('profile_prefix_pattern')
|
|
||||||
workers_per_group = discovery_config.get('workers_per_profile_group', 1)
|
|
||||||
|
|
||||||
direct_policy = policy.get('direct_batch_cli_policy', {})
|
direct_policy = policy.get('direct_batch_cli_policy', {})
|
||||||
use_env = direct_policy.get('use_profile_env', 'auth')
|
use_env = direct_policy.get('use_profile_env', 'auth')
|
||||||
manager_for_discovery = profile_managers.get(use_env)
|
manager_for_discovery = profile_managers.get(use_env)
|
||||||
@ -601,35 +649,10 @@ def main_stress_policy(args):
|
|||||||
logger.error(f"Could not determine profile manager for worker pool discovery in mode '{orchestration_mode}/{mode}'.")
|
logger.error(f"Could not determine profile manager for worker pool discovery in mode '{orchestration_mode}/{mode}'.")
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
if not discovery_pattern:
|
discovered_pools = _discover_worker_pools(discovery_config, manager_for_discovery)
|
||||||
logger.error("'worker_pool_discovery' is missing required key 'profile_prefix_pattern'.")
|
if discovered_pools is None:
|
||||||
return 1
|
return 1 # An error occurred during discovery
|
||||||
|
worker_pools = discovered_pools
|
||||||
logger.info(f"Discovering worker pools from profile prefixes matching '{discovery_pattern}'...")
|
|
||||||
try:
|
|
||||||
all_profiles = manager_for_discovery.list_profiles()
|
|
||||||
found_prefixes = set()
|
|
||||||
for profile in all_profiles:
|
|
||||||
profile_name = profile['name']
|
|
||||||
if fnmatch.fnmatch(profile_name, discovery_pattern):
|
|
||||||
# Assuming standard name format like 'user31_001', extract 'user31'
|
|
||||||
prefix = profile_name.rsplit('_', 1)[0]
|
|
||||||
found_prefixes.add(prefix)
|
|
||||||
|
|
||||||
if not found_prefixes:
|
|
||||||
logger.warning(f"Worker pool discovery found no profiles matching pattern '{discovery_pattern}'. No workers will be started.")
|
|
||||||
worker_pools = []
|
|
||||||
else:
|
|
||||||
worker_pools = []
|
|
||||||
for prefix in sorted(list(found_prefixes)):
|
|
||||||
worker_pools.append({
|
|
||||||
'profile_prefix': prefix,
|
|
||||||
'workers': workers_per_group
|
|
||||||
})
|
|
||||||
logger.info(f"Discovered {len(found_prefixes)} profile groups, creating {workers_per_group} worker(s) for each: {', '.join(sorted(list(found_prefixes)))}")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to discover profile groups from Redis: {e}", exc_info=True)
|
|
||||||
return 1
|
|
||||||
|
|
||||||
if not worker_pools and exec_control.get('workers'):
|
if not worker_pools and exec_control.get('workers'):
|
||||||
# Fallback for legacy 'workers: N' config
|
# Fallback for legacy 'workers: N' config
|
||||||
@ -641,24 +664,60 @@ def main_stress_policy(args):
|
|||||||
return 1
|
return 1
|
||||||
|
|
||||||
if args.profile_prefix:
|
if args.profile_prefix:
|
||||||
logger.info(f"CLI --profile-prefix '{args.profile_prefix}' is set, it will override any prefixes defined in worker_pools.")
|
logger.info(f"CLI --profile-prefix '{args.profile_prefix}' is set. Filtering worker pools to match this prefix.")
|
||||||
|
cli_prefixes = {p.strip() for p in args.profile_prefix.split(',') if p.strip()}
|
||||||
|
|
||||||
|
original_pool_count = len(worker_pools)
|
||||||
|
filtered_pools = []
|
||||||
for pool in worker_pools:
|
for pool in worker_pools:
|
||||||
pool['profile_prefix'] = args.profile_prefix
|
pool_prefixes_str = pool.get('profile_prefix', '')
|
||||||
|
if not pool_prefixes_str:
|
||||||
|
continue
|
||||||
|
pool_prefixes = {p.strip() for p in pool_prefixes_str.split(',') if p.strip()}
|
||||||
|
if pool_prefixes.intersection(cli_prefixes):
|
||||||
|
filtered_pools.append(pool)
|
||||||
|
|
||||||
|
if len(filtered_pools) < original_pool_count:
|
||||||
|
logger.info(f"Filtered {original_pool_count - len(filtered_pools)} pool(s) out based on --profile-prefix.")
|
||||||
|
|
||||||
|
worker_pools = filtered_pools
|
||||||
|
|
||||||
|
if not worker_pools:
|
||||||
|
logger.warning(f"After filtering with --profile-prefix '{args.profile_prefix}', no worker pools remain.")
|
||||||
|
|
||||||
worker_specs = []
|
worker_specs = []
|
||||||
|
if hasattr(args, 'workers') and args.workers == 1 and len(worker_pools) > 1:
|
||||||
|
logger.info("Single worker mode: aggregating all discovered profile groups for the worker.")
|
||||||
|
all_prefixes = []
|
||||||
|
for pool in worker_pools:
|
||||||
|
prefix_str = pool.get('profile_prefix', '')
|
||||||
|
if prefix_str:
|
||||||
|
all_prefixes.extend(p.strip() for p in prefix_str.split(',') if p.strip())
|
||||||
|
final_prefix_str = ','.join(sorted(list(set(all_prefixes))))
|
||||||
|
logger.info(f"Single worker will manage profile groups: {final_prefix_str}")
|
||||||
|
|
||||||
|
worker_policy = deepcopy(policy)
|
||||||
|
worker_policy.setdefault('info_json_generation_policy', {})['profile_prefix'] = final_prefix_str
|
||||||
|
worker_specs.append({
|
||||||
|
'func': run_direct_batch_worker,
|
||||||
|
'args': (0, worker_policy, state_manager, args, profile_manager_instance, urls_list, running_processes, process_lock)
|
||||||
|
})
|
||||||
|
else:
|
||||||
worker_id_counter = 0
|
worker_id_counter = 0
|
||||||
for pool in worker_pools:
|
for pool in worker_pools:
|
||||||
|
if hasattr(args, 'workers') and args.workers is not None and len(worker_specs) >= args.workers:
|
||||||
|
break
|
||||||
pool_workers = pool.get('workers', 1)
|
pool_workers = pool.get('workers', 1)
|
||||||
prefix_str = pool.get('profile_prefix', '')
|
prefix_str = pool.get('profile_prefix', '')
|
||||||
prefixes = [p.strip() for p in prefix_str.split(',') if p.strip()]
|
if not prefix_str:
|
||||||
if not prefixes:
|
|
||||||
logger.warning(f"Worker pool has no profile_prefix. Skipping: {pool}")
|
logger.warning(f"Worker pool has no profile_prefix. Skipping: {pool}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
for i in range(pool_workers):
|
for i in range(pool_workers):
|
||||||
assigned_prefix = prefixes[i % len(prefixes)]
|
if hasattr(args, 'workers') and args.workers is not None and len(worker_specs) >= args.workers:
|
||||||
|
break
|
||||||
worker_policy = deepcopy(policy)
|
worker_policy = deepcopy(policy)
|
||||||
worker_policy.setdefault('info_json_generation_policy', {})['profile_prefix'] = assigned_prefix
|
worker_policy.setdefault('info_json_generation_policy', {})['profile_prefix'] = prefix_str
|
||||||
worker_specs.append({
|
worker_specs.append({
|
||||||
'func': run_direct_batch_worker,
|
'func': run_direct_batch_worker,
|
||||||
'args': (worker_id_counter, worker_policy, state_manager, args, profile_manager_instance, urls_list, running_processes, process_lock)
|
'args': (worker_id_counter, worker_policy, state_manager, args, profile_manager_instance, urls_list, running_processes, process_lock)
|
||||||
@ -742,6 +801,7 @@ def main_stress_policy(args):
|
|||||||
if start_index > 0:
|
if start_index > 0:
|
||||||
logger.info(f"Starting/resuming from URL index {start_index + 1}.")
|
logger.info(f"Starting/resuming from URL index {start_index + 1}.")
|
||||||
|
|
||||||
|
logger.info(f"Task source file: {os.path.abspath(urls_file)}")
|
||||||
sp_utils.display_effective_policy(policy, policy_name, args, sources=urls_list)
|
sp_utils.display_effective_policy(policy, policy_name, args, sources=urls_list)
|
||||||
if args.dry_run: return 0
|
if args.dry_run: return 0
|
||||||
|
|
||||||
@ -749,13 +809,8 @@ def main_stress_policy(args):
|
|||||||
worker_pools = exec_control.get('worker_pools', [])
|
worker_pools = exec_control.get('worker_pools', [])
|
||||||
discovery_config = exec_control.get('worker_pool_discovery')
|
discovery_config = exec_control.get('worker_pool_discovery')
|
||||||
|
|
||||||
if discovery_config:
|
if discovery_config and not worker_pools:
|
||||||
if worker_pools:
|
logger.info("Explicit 'worker_pools' not defined. Using 'worker_pool_discovery' as a fallback.")
|
||||||
logger.warning("Both 'worker_pools' and 'worker_pool_discovery' are defined. 'worker_pool_discovery' will take precedence.")
|
|
||||||
|
|
||||||
discovery_pattern = discovery_config.get('profile_prefix_pattern')
|
|
||||||
workers_per_group = discovery_config.get('workers_per_profile_group', 1)
|
|
||||||
|
|
||||||
direct_policy = policy.get('direct_docker_cli_policy', {})
|
direct_policy = policy.get('direct_docker_cli_policy', {})
|
||||||
use_env = direct_policy.get('use_profile_env', 'auth' if mode == 'fetch_only' else 'download')
|
use_env = direct_policy.get('use_profile_env', 'auth' if mode == 'fetch_only' else 'download')
|
||||||
manager_for_discovery = profile_managers.get(use_env)
|
manager_for_discovery = profile_managers.get(use_env)
|
||||||
@ -764,35 +819,10 @@ def main_stress_policy(args):
|
|||||||
logger.error(f"Could not determine profile manager for worker pool discovery in mode '{orchestration_mode}/{mode}'.")
|
logger.error(f"Could not determine profile manager for worker pool discovery in mode '{orchestration_mode}/{mode}'.")
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
if not discovery_pattern:
|
discovered_pools = _discover_worker_pools(discovery_config, manager_for_discovery)
|
||||||
logger.error("'worker_pool_discovery' is missing required key 'profile_prefix_pattern'.")
|
if discovered_pools is None:
|
||||||
return 1
|
return 1 # An error occurred
|
||||||
|
worker_pools = discovered_pools
|
||||||
logger.info(f"Discovering worker pools from profile prefixes matching '{discovery_pattern}'...")
|
|
||||||
try:
|
|
||||||
all_profiles = manager_for_discovery.list_profiles()
|
|
||||||
found_prefixes = set()
|
|
||||||
for profile in all_profiles:
|
|
||||||
profile_name = profile['name']
|
|
||||||
if fnmatch.fnmatch(profile_name, discovery_pattern):
|
|
||||||
# Assuming standard name format like 'user31_001', extract 'user31'
|
|
||||||
prefix = profile_name.rsplit('_', 1)[0]
|
|
||||||
found_prefixes.add(prefix)
|
|
||||||
|
|
||||||
if not found_prefixes:
|
|
||||||
logger.warning(f"Worker pool discovery found no profiles matching pattern '{discovery_pattern}'. No workers will be started.")
|
|
||||||
worker_pools = []
|
|
||||||
else:
|
|
||||||
worker_pools = []
|
|
||||||
for prefix in sorted(list(found_prefixes)):
|
|
||||||
worker_pools.append({
|
|
||||||
'profile_prefix': prefix,
|
|
||||||
'workers': workers_per_group
|
|
||||||
})
|
|
||||||
logger.info(f"Discovered {len(found_prefixes)} profile groups, creating {workers_per_group} worker(s) for each: {', '.join(sorted(list(found_prefixes)))}")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to discover profile groups from Redis: {e}", exc_info=True)
|
|
||||||
return 1
|
|
||||||
|
|
||||||
if not worker_pools and exec_control.get('workers'):
|
if not worker_pools and exec_control.get('workers'):
|
||||||
# Fallback for legacy 'workers: N' config
|
# Fallback for legacy 'workers: N' config
|
||||||
@ -804,22 +834,59 @@ def main_stress_policy(args):
|
|||||||
return 1
|
return 1
|
||||||
|
|
||||||
if args.profile_prefix:
|
if args.profile_prefix:
|
||||||
logger.info(f"CLI --profile-prefix '{args.profile_prefix}' is set, it will override any prefixes defined in worker_pools.")
|
logger.info(f"CLI --profile-prefix '{args.profile_prefix}' is set. Filtering worker pools to match this prefix.")
|
||||||
|
cli_prefixes = {p.strip() for p in args.profile_prefix.split(',') if p.strip()}
|
||||||
|
|
||||||
|
original_pool_count = len(worker_pools)
|
||||||
|
filtered_pools = []
|
||||||
for pool in worker_pools:
|
for pool in worker_pools:
|
||||||
pool['profile_prefix'] = args.profile_prefix
|
pool_prefixes_str = pool.get('profile_prefix', '')
|
||||||
|
if not pool_prefixes_str:
|
||||||
|
continue
|
||||||
|
pool_prefixes = {p.strip() for p in pool_prefixes_str.split(',') if p.strip()}
|
||||||
|
if pool_prefixes.intersection(cli_prefixes):
|
||||||
|
filtered_pools.append(pool)
|
||||||
|
|
||||||
|
if len(filtered_pools) < original_pool_count:
|
||||||
|
logger.info(f"Filtered {original_pool_count - len(filtered_pools)} pool(s) out based on --profile-prefix.")
|
||||||
|
|
||||||
|
worker_pools = filtered_pools
|
||||||
|
|
||||||
|
if not worker_pools:
|
||||||
|
logger.warning(f"After filtering with --profile-prefix '{args.profile_prefix}', no worker pools remain.")
|
||||||
|
|
||||||
worker_specs = []
|
worker_specs = []
|
||||||
|
if hasattr(args, 'workers') and args.workers == 1 and len(worker_pools) > 1:
|
||||||
|
logger.info("Single worker mode: aggregating all discovered profile groups for the worker.")
|
||||||
|
all_prefixes = []
|
||||||
|
for pool in worker_pools:
|
||||||
|
prefix_str = pool.get('profile_prefix', '')
|
||||||
|
if prefix_str:
|
||||||
|
all_prefixes.extend(p.strip() for p in prefix_str.split(',') if p.strip())
|
||||||
|
final_prefix_str = ','.join(sorted(list(set(all_prefixes))))
|
||||||
|
logger.info(f"Single worker will manage profile groups: {final_prefix_str}")
|
||||||
|
|
||||||
|
worker_policy = deepcopy(policy)
|
||||||
|
worker_policy.setdefault('info_json_generation_policy', {})['profile_prefix'] = final_prefix_str
|
||||||
|
worker_specs.append({
|
||||||
|
'func': run_direct_docker_worker,
|
||||||
|
'args': (0, worker_policy, state_manager, args, profile_manager_instance, urls_list, running_processes, process_lock)
|
||||||
|
})
|
||||||
|
else:
|
||||||
worker_id_counter = 0
|
worker_id_counter = 0
|
||||||
for pool in worker_pools:
|
for pool in worker_pools:
|
||||||
|
if hasattr(args, 'workers') and args.workers is not None and len(worker_specs) >= args.workers:
|
||||||
|
break
|
||||||
pool_workers = pool.get('workers', 1)
|
pool_workers = pool.get('workers', 1)
|
||||||
prefix_str = pool.get('profile_prefix', '')
|
prefix_str = pool.get('profile_prefix', '')
|
||||||
prefixes = [p.strip() for p in prefix_str.split(',') if p.strip()]
|
if not prefix_str:
|
||||||
if not prefixes:
|
|
||||||
logger.warning(f"Worker pool has no profile_prefix. Skipping: {pool}")
|
logger.warning(f"Worker pool has no profile_prefix. Skipping: {pool}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Each worker in the pool gets the full list of prefixes from the pool configuration.
|
# Each worker in the pool gets the full list of prefixes from the pool configuration.
|
||||||
for i in range(pool_workers):
|
for i in range(pool_workers):
|
||||||
|
if hasattr(args, 'workers') and args.workers is not None and len(worker_specs) >= args.workers:
|
||||||
|
break
|
||||||
worker_policy = deepcopy(policy)
|
worker_policy = deepcopy(policy)
|
||||||
# The worker functions will now handle a comma-separated list of prefixes.
|
# The worker functions will now handle a comma-separated list of prefixes.
|
||||||
worker_policy.setdefault('info_json_generation_policy', {})['profile_prefix'] = prefix_str
|
worker_policy.setdefault('info_json_generation_policy', {})['profile_prefix'] = prefix_str
|
||||||
@ -847,6 +914,7 @@ def main_stress_policy(args):
|
|||||||
logger.error(f"Failed to create info.json directory '{info_json_dir}': {e}")
|
logger.error(f"Failed to create info.json directory '{info_json_dir}': {e}")
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
|
logger.info(f"Task source directory: {os.path.abspath(info_json_dir)}")
|
||||||
sp_utils.display_effective_policy(policy, policy_name, args, sources=[])
|
sp_utils.display_effective_policy(policy, policy_name, args, sources=[])
|
||||||
if args.dry_run: return 0
|
if args.dry_run: return 0
|
||||||
|
|
||||||
@ -854,13 +922,8 @@ def main_stress_policy(args):
|
|||||||
worker_pools = exec_control.get('worker_pools', [])
|
worker_pools = exec_control.get('worker_pools', [])
|
||||||
discovery_config = exec_control.get('worker_pool_discovery')
|
discovery_config = exec_control.get('worker_pool_discovery')
|
||||||
|
|
||||||
if discovery_config:
|
if discovery_config and not worker_pools:
|
||||||
if worker_pools:
|
logger.info("Explicit 'worker_pools' not defined. Using 'worker_pool_discovery' as a fallback.")
|
||||||
logger.warning("Both 'worker_pools' and 'worker_pool_discovery' are defined. 'worker_pool_discovery' will take precedence.")
|
|
||||||
|
|
||||||
discovery_pattern = discovery_config.get('profile_prefix_pattern')
|
|
||||||
workers_per_group = discovery_config.get('workers_per_profile_group', 1)
|
|
||||||
|
|
||||||
direct_policy = policy.get('direct_docker_cli_policy', {})
|
direct_policy = policy.get('direct_docker_cli_policy', {})
|
||||||
use_env = direct_policy.get('use_profile_env', 'auth' if mode == 'fetch_only' else 'download')
|
use_env = direct_policy.get('use_profile_env', 'auth' if mode == 'fetch_only' else 'download')
|
||||||
manager_for_discovery = profile_managers.get(use_env)
|
manager_for_discovery = profile_managers.get(use_env)
|
||||||
@ -869,35 +932,10 @@ def main_stress_policy(args):
|
|||||||
logger.error(f"Could not determine profile manager for worker pool discovery in mode '{orchestration_mode}/{mode}'.")
|
logger.error(f"Could not determine profile manager for worker pool discovery in mode '{orchestration_mode}/{mode}'.")
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
if not discovery_pattern:
|
discovered_pools = _discover_worker_pools(discovery_config, manager_for_discovery)
|
||||||
logger.error("'worker_pool_discovery' is missing required key 'profile_prefix_pattern'.")
|
if discovered_pools is None:
|
||||||
return 1
|
return 1 # An error occurred
|
||||||
|
worker_pools = discovered_pools
|
||||||
logger.info(f"Discovering worker pools from profile prefixes matching '{discovery_pattern}'...")
|
|
||||||
try:
|
|
||||||
all_profiles = manager_for_discovery.list_profiles()
|
|
||||||
found_prefixes = set()
|
|
||||||
for profile in all_profiles:
|
|
||||||
profile_name = profile['name']
|
|
||||||
if fnmatch.fnmatch(profile_name, discovery_pattern):
|
|
||||||
# Assuming standard name format like 'user31_001', extract 'user31'
|
|
||||||
prefix = profile_name.rsplit('_', 1)[0]
|
|
||||||
found_prefixes.add(prefix)
|
|
||||||
|
|
||||||
if not found_prefixes:
|
|
||||||
logger.warning(f"Worker pool discovery found no profiles matching pattern '{discovery_pattern}'. No workers will be started.")
|
|
||||||
worker_pools = []
|
|
||||||
else:
|
|
||||||
worker_pools = []
|
|
||||||
for prefix in sorted(list(found_prefixes)):
|
|
||||||
worker_pools.append({
|
|
||||||
'profile_prefix': prefix,
|
|
||||||
'workers': workers_per_group
|
|
||||||
})
|
|
||||||
logger.info(f"Discovered {len(found_prefixes)} profile groups, creating {workers_per_group} worker(s) for each: {', '.join(sorted(list(found_prefixes)))}")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to discover profile groups from Redis: {e}", exc_info=True)
|
|
||||||
return 1
|
|
||||||
|
|
||||||
if not worker_pools and exec_control.get('workers'):
|
if not worker_pools and exec_control.get('workers'):
|
||||||
# Fallback for legacy 'workers: N' config
|
# Fallback for legacy 'workers: N' config
|
||||||
@ -909,22 +947,59 @@ def main_stress_policy(args):
|
|||||||
return 1
|
return 1
|
||||||
|
|
||||||
if args.profile_prefix:
|
if args.profile_prefix:
|
||||||
logger.info(f"CLI --profile-prefix '{args.profile_prefix}' is set, it will override any prefixes defined in worker_pools.")
|
logger.info(f"CLI --profile-prefix '{args.profile_prefix}' is set. Filtering worker pools to match this prefix.")
|
||||||
|
cli_prefixes = {p.strip() for p in args.profile_prefix.split(',') if p.strip()}
|
||||||
|
|
||||||
|
original_pool_count = len(worker_pools)
|
||||||
|
filtered_pools = []
|
||||||
for pool in worker_pools:
|
for pool in worker_pools:
|
||||||
pool['profile_prefix'] = args.profile_prefix
|
pool_prefixes_str = pool.get('profile_prefix', '')
|
||||||
|
if not pool_prefixes_str:
|
||||||
|
continue
|
||||||
|
pool_prefixes = {p.strip() for p in pool_prefixes_str.split(',') if p.strip()}
|
||||||
|
if pool_prefixes.intersection(cli_prefixes):
|
||||||
|
filtered_pools.append(pool)
|
||||||
|
|
||||||
|
if len(filtered_pools) < original_pool_count:
|
||||||
|
logger.info(f"Filtered {original_pool_count - len(filtered_pools)} pool(s) out based on --profile-prefix.")
|
||||||
|
|
||||||
|
worker_pools = filtered_pools
|
||||||
|
|
||||||
|
if not worker_pools:
|
||||||
|
logger.warning(f"After filtering with --profile-prefix '{args.profile_prefix}', no worker pools remain.")
|
||||||
|
|
||||||
worker_specs = []
|
worker_specs = []
|
||||||
|
if hasattr(args, 'workers') and args.workers == 1 and len(worker_pools) > 1:
|
||||||
|
logger.info("Single worker mode: aggregating all discovered profile groups for the worker.")
|
||||||
|
all_prefixes = []
|
||||||
|
for pool in worker_pools:
|
||||||
|
prefix_str = pool.get('profile_prefix', '')
|
||||||
|
if prefix_str:
|
||||||
|
all_prefixes.extend(p.strip() for p in prefix_str.split(',') if p.strip())
|
||||||
|
final_prefix_str = ','.join(sorted(list(set(all_prefixes))))
|
||||||
|
logger.info(f"Single worker will manage profile groups: {final_prefix_str}")
|
||||||
|
|
||||||
|
worker_policy = deepcopy(policy)
|
||||||
|
worker_policy.setdefault('download_policy', {})['profile_prefix'] = final_prefix_str
|
||||||
|
worker_specs.append({
|
||||||
|
'func': run_direct_docker_download_worker,
|
||||||
|
'args': (0, worker_policy, state_manager, args, profile_manager_instance, running_processes, process_lock)
|
||||||
|
})
|
||||||
|
else:
|
||||||
worker_id_counter = 0
|
worker_id_counter = 0
|
||||||
for pool in worker_pools:
|
for pool in worker_pools:
|
||||||
|
if hasattr(args, 'workers') and args.workers is not None and len(worker_specs) >= args.workers:
|
||||||
|
break
|
||||||
pool_workers = pool.get('workers', 1)
|
pool_workers = pool.get('workers', 1)
|
||||||
prefix_str = pool.get('profile_prefix', '')
|
prefix_str = pool.get('profile_prefix', '')
|
||||||
prefixes = [p.strip() for p in prefix_str.split(',') if p.strip()]
|
if not prefix_str:
|
||||||
if not prefixes:
|
|
||||||
logger.warning(f"Worker pool has no profile_prefix. Skipping: {pool}")
|
logger.warning(f"Worker pool has no profile_prefix. Skipping: {pool}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Each worker in the pool gets the full list of prefixes from the pool configuration.
|
# Each worker in the pool gets the full list of prefixes from the pool configuration.
|
||||||
for i in range(pool_workers):
|
for i in range(pool_workers):
|
||||||
|
if hasattr(args, 'workers') and args.workers is not None and len(worker_specs) >= args.workers:
|
||||||
|
break
|
||||||
worker_policy = deepcopy(policy)
|
worker_policy = deepcopy(policy)
|
||||||
# The worker functions will now handle a comma-separated list of prefixes.
|
# The worker functions will now handle a comma-separated list of prefixes.
|
||||||
worker_policy.setdefault('download_policy', {})['profile_prefix'] = prefix_str
|
worker_policy.setdefault('download_policy', {})['profile_prefix'] = prefix_str
|
||||||
@ -968,6 +1043,7 @@ def main_stress_policy(args):
|
|||||||
logger.error(f"Failed to create info.json directory '{info_json_dir}': {e}")
|
logger.error(f"Failed to create info.json directory '{info_json_dir}': {e}")
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
|
logger.info(f"Task source directory: {os.path.abspath(info_json_dir)}")
|
||||||
sp_utils.display_effective_policy(policy, policy_name, args, sources=[])
|
sp_utils.display_effective_policy(policy, policy_name, args, sources=[])
|
||||||
if args.dry_run: return 0
|
if args.dry_run: return 0
|
||||||
|
|
||||||
@ -975,48 +1051,18 @@ def main_stress_policy(args):
|
|||||||
worker_pools = exec_control.get('worker_pools', [])
|
worker_pools = exec_control.get('worker_pools', [])
|
||||||
discovery_config = exec_control.get('worker_pool_discovery')
|
discovery_config = exec_control.get('worker_pool_discovery')
|
||||||
|
|
||||||
if discovery_config:
|
if discovery_config and not worker_pools:
|
||||||
if worker_pools:
|
logger.info("Explicit 'worker_pools' not defined. Using 'worker_pool_discovery' as a fallback.")
|
||||||
logger.warning("Both 'worker_pools' and 'worker_pool_discovery' are defined. 'worker_pool_discovery' will take precedence.")
|
|
||||||
|
|
||||||
discovery_pattern = discovery_config.get('profile_prefix_pattern')
|
|
||||||
workers_per_group = discovery_config.get('workers_per_profile_group', 1)
|
|
||||||
|
|
||||||
manager_for_discovery = profile_managers.get('download')
|
manager_for_discovery = profile_managers.get('download')
|
||||||
|
|
||||||
if not manager_for_discovery:
|
if not manager_for_discovery:
|
||||||
logger.error(f"Could not determine profile manager for worker pool discovery in mode '{orchestration_mode}/{mode}'.")
|
logger.error(f"Could not determine profile manager for worker pool discovery in mode '{orchestration_mode}/{mode}'.")
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
if not discovery_pattern:
|
discovered_pools = _discover_worker_pools(discovery_config, manager_for_discovery)
|
||||||
logger.error("'worker_pool_discovery' is missing required key 'profile_prefix_pattern'.")
|
if discovered_pools is None:
|
||||||
return 1
|
return 1 # An error occurred
|
||||||
|
worker_pools = discovered_pools
|
||||||
logger.info(f"Discovering worker pools from profile prefixes matching '{discovery_pattern}'...")
|
|
||||||
try:
|
|
||||||
all_profiles = manager_for_discovery.list_profiles()
|
|
||||||
found_prefixes = set()
|
|
||||||
for profile in all_profiles:
|
|
||||||
profile_name = profile['name']
|
|
||||||
if fnmatch.fnmatch(profile_name, discovery_pattern):
|
|
||||||
# Assuming standard name format like 'user31_001', extract 'user31'
|
|
||||||
prefix = profile_name.rsplit('_', 1)[0]
|
|
||||||
found_prefixes.add(prefix)
|
|
||||||
|
|
||||||
if not found_prefixes:
|
|
||||||
logger.warning(f"Worker pool discovery found no profiles matching pattern '{discovery_pattern}'. No workers will be started.")
|
|
||||||
worker_pools = []
|
|
||||||
else:
|
|
||||||
worker_pools = []
|
|
||||||
for prefix in sorted(list(found_prefixes)):
|
|
||||||
worker_pools.append({
|
|
||||||
'profile_prefix': prefix,
|
|
||||||
'workers': workers_per_group
|
|
||||||
})
|
|
||||||
logger.info(f"Discovered {len(found_prefixes)} profile groups, creating {workers_per_group} worker(s) for each: {', '.join(sorted(list(found_prefixes)))}")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to discover profile groups from Redis: {e}", exc_info=True)
|
|
||||||
return 1
|
|
||||||
|
|
||||||
if not worker_pools and exec_control.get('workers'):
|
if not worker_pools and exec_control.get('workers'):
|
||||||
# Fallback for legacy 'workers: N' config
|
# Fallback for legacy 'workers: N' config
|
||||||
@ -1028,22 +1074,59 @@ def main_stress_policy(args):
|
|||||||
return 1
|
return 1
|
||||||
|
|
||||||
if args.profile_prefix:
|
if args.profile_prefix:
|
||||||
logger.info(f"CLI --profile-prefix '{args.profile_prefix}' is set, it will override any prefixes defined in worker_pools.")
|
logger.info(f"CLI --profile-prefix '{args.profile_prefix}' is set. Filtering worker pools to match this prefix.")
|
||||||
|
cli_prefixes = {p.strip() for p in args.profile_prefix.split(',') if p.strip()}
|
||||||
|
|
||||||
|
original_pool_count = len(worker_pools)
|
||||||
|
filtered_pools = []
|
||||||
for pool in worker_pools:
|
for pool in worker_pools:
|
||||||
pool['profile_prefix'] = args.profile_prefix
|
pool_prefixes_str = pool.get('profile_prefix', '')
|
||||||
|
if not pool_prefixes_str:
|
||||||
|
continue
|
||||||
|
pool_prefixes = {p.strip() for p in pool_prefixes_str.split(',') if p.strip()}
|
||||||
|
if pool_prefixes.intersection(cli_prefixes):
|
||||||
|
filtered_pools.append(pool)
|
||||||
|
|
||||||
|
if len(filtered_pools) < original_pool_count:
|
||||||
|
logger.info(f"Filtered {original_pool_count - len(filtered_pools)} pool(s) out based on --profile-prefix.")
|
||||||
|
|
||||||
|
worker_pools = filtered_pools
|
||||||
|
|
||||||
|
if not worker_pools:
|
||||||
|
logger.warning(f"After filtering with --profile-prefix '{args.profile_prefix}', no worker pools remain.")
|
||||||
|
|
||||||
worker_specs = []
|
worker_specs = []
|
||||||
|
if hasattr(args, 'workers') and args.workers == 1 and len(worker_pools) > 1:
|
||||||
|
logger.info("Single worker mode: aggregating all discovered profile groups for the worker.")
|
||||||
|
all_prefixes = []
|
||||||
|
for pool in worker_pools:
|
||||||
|
prefix_str = pool.get('profile_prefix', '')
|
||||||
|
if prefix_str:
|
||||||
|
all_prefixes.extend(p.strip() for p in prefix_str.split(',') if p.strip())
|
||||||
|
final_prefix_str = ','.join(sorted(list(set(all_prefixes))))
|
||||||
|
logger.info(f"Single worker will manage profile groups: {final_prefix_str}")
|
||||||
|
|
||||||
|
worker_policy = deepcopy(policy)
|
||||||
|
worker_policy.setdefault('download_policy', {})['profile_prefix'] = final_prefix_str
|
||||||
|
worker_specs.append({
|
||||||
|
'func': run_direct_download_worker,
|
||||||
|
'args': (0, worker_policy, state_manager, args, download_manager, running_processes, process_lock)
|
||||||
|
})
|
||||||
|
else:
|
||||||
worker_id_counter = 0
|
worker_id_counter = 0
|
||||||
for pool in worker_pools:
|
for pool in worker_pools:
|
||||||
|
if hasattr(args, 'workers') and args.workers is not None and len(worker_specs) >= args.workers:
|
||||||
|
break
|
||||||
pool_workers = pool.get('workers', 1)
|
pool_workers = pool.get('workers', 1)
|
||||||
prefix_str = pool.get('profile_prefix', '')
|
prefix_str = pool.get('profile_prefix', '')
|
||||||
prefixes = [p.strip() for p in prefix_str.split(',') if p.strip()]
|
if not prefix_str:
|
||||||
if not prefixes:
|
|
||||||
logger.warning(f"Worker pool has no profile_prefix. Skipping: {pool}")
|
logger.warning(f"Worker pool has no profile_prefix. Skipping: {pool}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Each worker in the pool gets the full list of prefixes from the pool configuration.
|
# Each worker in the pool gets the full list of prefixes from the pool configuration.
|
||||||
for i in range(pool_workers):
|
for i in range(pool_workers):
|
||||||
|
if hasattr(args, 'workers') and args.workers is not None and len(worker_specs) >= args.workers:
|
||||||
|
break
|
||||||
worker_policy = deepcopy(policy)
|
worker_policy = deepcopy(policy)
|
||||||
# The worker functions will now handle a comma-separated list of prefixes.
|
# The worker functions will now handle a comma-separated list of prefixes.
|
||||||
worker_policy.setdefault('download_policy', {})['profile_prefix'] = prefix_str
|
worker_policy.setdefault('download_policy', {})['profile_prefix'] = prefix_str
|
||||||
@ -1129,48 +1212,18 @@ def main_stress_policy(args):
|
|||||||
worker_pools = exec_control.get('worker_pools', [])
|
worker_pools = exec_control.get('worker_pools', [])
|
||||||
discovery_config = exec_control.get('worker_pool_discovery')
|
discovery_config = exec_control.get('worker_pool_discovery')
|
||||||
|
|
||||||
if discovery_config:
|
if discovery_config and not worker_pools:
|
||||||
if worker_pools:
|
logger.info("Explicit 'worker_pools' not defined. Using 'worker_pool_discovery' as a fallback.")
|
||||||
logger.warning("Both 'worker_pools' and 'worker_pool_discovery' are defined. 'worker_pool_discovery' will take precedence.")
|
|
||||||
|
|
||||||
discovery_pattern = discovery_config.get('profile_prefix_pattern')
|
|
||||||
workers_per_group = discovery_config.get('workers_per_profile_group', 1)
|
|
||||||
|
|
||||||
manager_for_discovery = profile_managers.get('auth')
|
manager_for_discovery = profile_managers.get('auth')
|
||||||
|
|
||||||
if not manager_for_discovery:
|
if not manager_for_discovery:
|
||||||
logger.error(f"Could not determine profile manager for worker pool discovery in mode '{orchestration_mode}/{mode}'.")
|
logger.error(f"Could not determine profile manager for worker pool discovery in mode '{orchestration_mode}/{mode}'.")
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
if not discovery_pattern:
|
discovered_pools = _discover_worker_pools(discovery_config, manager_for_discovery)
|
||||||
logger.error("'worker_pool_discovery' is missing required key 'profile_prefix_pattern'.")
|
if discovered_pools is None:
|
||||||
return 1
|
return 1 # An error occurred
|
||||||
|
worker_pools = discovered_pools
|
||||||
logger.info(f"Discovering worker pools from profile prefixes matching '{discovery_pattern}'...")
|
|
||||||
try:
|
|
||||||
all_profiles = manager_for_discovery.list_profiles()
|
|
||||||
found_prefixes = set()
|
|
||||||
for profile in all_profiles:
|
|
||||||
profile_name = profile['name']
|
|
||||||
if fnmatch.fnmatch(profile_name, discovery_pattern):
|
|
||||||
# Assuming standard name format like 'user31_001', extract 'user31'
|
|
||||||
prefix = profile_name.rsplit('_', 1)[0]
|
|
||||||
found_prefixes.add(prefix)
|
|
||||||
|
|
||||||
if not found_prefixes:
|
|
||||||
logger.warning(f"Worker pool discovery found no profiles matching pattern '{discovery_pattern}'. No workers will be started.")
|
|
||||||
worker_pools = []
|
|
||||||
else:
|
|
||||||
worker_pools = []
|
|
||||||
for prefix in sorted(list(found_prefixes)):
|
|
||||||
worker_pools.append({
|
|
||||||
'profile_prefix': prefix,
|
|
||||||
'workers': workers_per_group
|
|
||||||
})
|
|
||||||
logger.info(f"Discovered {len(found_prefixes)} profile groups, creating {workers_per_group} worker(s) for each: {', '.join(sorted(list(found_prefixes)))}")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to discover profile groups from Redis: {e}", exc_info=True)
|
|
||||||
return 1
|
|
||||||
|
|
||||||
if not worker_pools and exec_control.get('workers'):
|
if not worker_pools and exec_control.get('workers'):
|
||||||
# Fallback for legacy 'workers: N' config
|
# Fallback for legacy 'workers: N' config
|
||||||
@ -1189,6 +1242,8 @@ def main_stress_policy(args):
|
|||||||
worker_specs = []
|
worker_specs = []
|
||||||
worker_id_counter = 0
|
worker_id_counter = 0
|
||||||
for pool in worker_pools:
|
for pool in worker_pools:
|
||||||
|
if hasattr(args, 'workers') and args.workers is not None and len(worker_specs) >= args.workers:
|
||||||
|
break
|
||||||
pool_workers = pool.get('workers', 1)
|
pool_workers = pool.get('workers', 1)
|
||||||
prefix_str = pool.get('profile_prefix', '')
|
prefix_str = pool.get('profile_prefix', '')
|
||||||
prefixes = [p.strip() for p in prefix_str.split(',') if p.strip()]
|
prefixes = [p.strip() for p in prefix_str.split(',') if p.strip()]
|
||||||
@ -1198,6 +1253,8 @@ def main_stress_policy(args):
|
|||||||
|
|
||||||
# Each worker in the pool gets the full list of prefixes from the pool configuration.
|
# Each worker in the pool gets the full list of prefixes from the pool configuration.
|
||||||
for i in range(pool_workers):
|
for i in range(pool_workers):
|
||||||
|
if hasattr(args, 'workers') and args.workers is not None and len(worker_specs) >= args.workers:
|
||||||
|
break
|
||||||
worker_policy = deepcopy(policy)
|
worker_policy = deepcopy(policy)
|
||||||
# The worker functions will now handle a comma-separated list of prefixes.
|
# The worker functions will now handle a comma-separated list of prefixes.
|
||||||
worker_policy.setdefault('info_json_generation_policy', {})['profile_prefix'] = prefix_str
|
worker_policy.setdefault('info_json_generation_policy', {})['profile_prefix'] = prefix_str
|
||||||
@ -1285,48 +1342,18 @@ def main_stress_policy(args):
|
|||||||
worker_pools = exec_control.get('worker_pools', [])
|
worker_pools = exec_control.get('worker_pools', [])
|
||||||
discovery_config = exec_control.get('worker_pool_discovery')
|
discovery_config = exec_control.get('worker_pool_discovery')
|
||||||
|
|
||||||
if discovery_config:
|
if discovery_config and not worker_pools:
|
||||||
if worker_pools:
|
logger.info("Explicit 'worker_pools' not defined. Using 'worker_pool_discovery' as a fallback.")
|
||||||
logger.warning("Both 'worker_pools' and 'worker_pool_discovery' are defined. 'worker_pool_discovery' will take precedence.")
|
|
||||||
|
|
||||||
discovery_pattern = discovery_config.get('profile_prefix_pattern')
|
|
||||||
workers_per_group = discovery_config.get('workers_per_profile_group', 1)
|
|
||||||
|
|
||||||
manager_for_discovery = profile_managers.get('download')
|
manager_for_discovery = profile_managers.get('download')
|
||||||
|
|
||||||
if not manager_for_discovery:
|
if not manager_for_discovery:
|
||||||
logger.error(f"Could not determine profile manager for worker pool discovery in mode '{orchestration_mode}/{mode}'.")
|
logger.error(f"Could not determine profile manager for worker pool discovery in mode '{orchestration_mode}/{mode}'.")
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
if not discovery_pattern:
|
discovered_pools = _discover_worker_pools(discovery_config, manager_for_discovery)
|
||||||
logger.error("'worker_pool_discovery' is missing required key 'profile_prefix_pattern'.")
|
if discovered_pools is None:
|
||||||
return 1
|
return 1 # An error occurred
|
||||||
|
worker_pools = discovered_pools
|
||||||
logger.info(f"Discovering worker pools from profile prefixes matching '{discovery_pattern}'...")
|
|
||||||
try:
|
|
||||||
all_profiles = manager_for_discovery.list_profiles()
|
|
||||||
found_prefixes = set()
|
|
||||||
for profile in all_profiles:
|
|
||||||
profile_name = profile['name']
|
|
||||||
if fnmatch.fnmatch(profile_name, discovery_pattern):
|
|
||||||
# Assuming standard name format like 'user31_001', extract 'user31'
|
|
||||||
prefix = profile_name.rsplit('_', 1)[0]
|
|
||||||
found_prefixes.add(prefix)
|
|
||||||
|
|
||||||
if not found_prefixes:
|
|
||||||
logger.warning(f"Worker pool discovery found no profiles matching pattern '{discovery_pattern}'. No workers will be started.")
|
|
||||||
worker_pools = []
|
|
||||||
else:
|
|
||||||
worker_pools = []
|
|
||||||
for prefix in sorted(list(found_prefixes)):
|
|
||||||
worker_pools.append({
|
|
||||||
'profile_prefix': prefix,
|
|
||||||
'workers': workers_per_group
|
|
||||||
})
|
|
||||||
logger.info(f"Discovered {len(found_prefixes)} profile groups, creating {workers_per_group} worker(s) for each: {', '.join(sorted(list(found_prefixes)))}")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to discover profile groups from Redis: {e}", exc_info=True)
|
|
||||||
return 1
|
|
||||||
|
|
||||||
if not worker_pools and exec_control.get('workers'):
|
if not worker_pools and exec_control.get('workers'):
|
||||||
# Fallback for legacy 'workers: N' config
|
# Fallback for legacy 'workers: N' config
|
||||||
@ -1345,6 +1372,8 @@ def main_stress_policy(args):
|
|||||||
worker_specs = []
|
worker_specs = []
|
||||||
worker_id_counter = 0
|
worker_id_counter = 0
|
||||||
for pool in worker_pools:
|
for pool in worker_pools:
|
||||||
|
if hasattr(args, 'workers') and args.workers is not None and len(worker_specs) >= args.workers:
|
||||||
|
break
|
||||||
pool_workers = pool.get('workers', 1)
|
pool_workers = pool.get('workers', 1)
|
||||||
prefix_str = pool.get('profile_prefix', '')
|
prefix_str = pool.get('profile_prefix', '')
|
||||||
prefixes = [p.strip() for p in prefix_str.split(',') if p.strip()]
|
prefixes = [p.strip() for p in prefix_str.split(',') if p.strip()]
|
||||||
@ -1354,6 +1383,8 @@ def main_stress_policy(args):
|
|||||||
|
|
||||||
# Each worker in the pool gets the full list of prefixes from the pool configuration.
|
# Each worker in the pool gets the full list of prefixes from the pool configuration.
|
||||||
for i in range(pool_workers):
|
for i in range(pool_workers):
|
||||||
|
if hasattr(args, 'workers') and args.workers is not None and len(worker_specs) >= args.workers:
|
||||||
|
break
|
||||||
worker_policy = deepcopy(policy)
|
worker_policy = deepcopy(policy)
|
||||||
# The worker functions will now handle a comma-separated list of prefixes.
|
# The worker functions will now handle a comma-separated list of prefixes.
|
||||||
worker_policy.setdefault('download_policy', {})['profile_prefix'] = prefix_str
|
worker_policy.setdefault('download_policy', {})['profile_prefix'] = prefix_str
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user