Update ansible works with auth and downloads on dl machine, while enforcer on master
This commit is contained in:
parent
db78171281
commit
7a514ab8ce
@ -1,7 +1,5 @@
|
|||||||
vault_redis_password: "rOhTAIlTFFylXsjhqwxnYxDChFc"
|
vault_redis_password: "rOhTAIlTFFylXsjhqwxnYxDChFc"
|
||||||
vault_postgres_password: "pgdb_pwd_A7bC2xY9zE1wV5uP"
|
vault_redis_port: 52909
|
||||||
vault_airflow_admin_password: "2r234sdfrt3q454arq45q355"
|
|
||||||
vault_flower_password: "dO4eXm7UkF81OdMvT8E2tIKFtPYPCzyzwlcZ4RyOmCsmG4qzrNFqM5sNTOT9"
|
|
||||||
vault_vnc_password: "vnc_pwd_Z5xW8cV2bN4mP7lK"
|
vault_vnc_password: "vnc_pwd_Z5xW8cV2bN4mP7lK"
|
||||||
vault_ss_password_1: "UCUAR7vRO/u9Zo71nfA13c+/b1MCiJpfZJo+EmEBCfA="
|
vault_ss_password_1: "UCUAR7vRO/u9Zo71nfA13c+/b1MCiJpfZJo+EmEBCfA="
|
||||||
vault_ss_password_2: "tgtQcfjJp/A3F01g4woO0bEQoxij3CAOK/iR1OTPuF4="
|
vault_ss_password_2: "tgtQcfjJp/A3F01g4woO0bEQoxij3CAOK/iR1OTPuF4="
|
||||||
|
|||||||
102
ansible/manage-processes-tasks.yml
Normal file
102
ansible/manage-processes-tasks.yml
Normal file
@ -0,0 +1,102 @@
|
|||||||
|
---
|
||||||
|
# This file is managed by include_tasks and is not a standalone playbook.
|
||||||
|
# It provides generic tasks for managing a process within a tmux session.
|
||||||
|
# Required variables:
|
||||||
|
# - tmux_session_name: The name of the tmux session.
|
||||||
|
# - working_dir: The directory where the command should be executed.
|
||||||
|
# - command_to_run: The command to execute inside the tmux session.
|
||||||
|
# - process_grep_pattern: A regex pattern to identify the process for pkill/status check.
|
||||||
|
#
|
||||||
|
# Optional variables (control flags):
|
||||||
|
# - start_process: (bool) Set to true to start the process. Default: false.
|
||||||
|
# - stop_process: (bool) Set to true to stop the process. Default: false.
|
||||||
|
# - check_status: (bool) Set to true to check and display the process status. Default: false.
|
||||||
|
|
||||||
|
- name: Ensure tmux is installed
|
||||||
|
ansible.builtin.apt:
|
||||||
|
name: tmux
|
||||||
|
state: present
|
||||||
|
become: yes
|
||||||
|
run_once: true # No need to run this on every include
|
||||||
|
|
||||||
|
- name: Stop existing tmux session
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: "tmux kill-session -t {{ tmux_session_name }} 2>/dev/null || true"
|
||||||
|
when: stop_process | default(false) | bool
|
||||||
|
|
||||||
|
- name: Kill any orphaned processes
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: |
|
||||||
|
PIDS=$(ps aux | grep -E "{{ process_grep_pattern }}" | grep -v "grep" | awk '{print $2}')
|
||||||
|
if [ -n "$PIDS" ]; then
|
||||||
|
kill $PIDS >/dev/null 2>&1 || true
|
||||||
|
sleep 0.5
|
||||||
|
kill -9 $PIDS >/dev/null 2>&1 || true
|
||||||
|
fi
|
||||||
|
changed_when: false
|
||||||
|
when: stop_process | default(false) | bool
|
||||||
|
|
||||||
|
- name: Stop existing process before starting (makes start idempotent)
|
||||||
|
block:
|
||||||
|
- name: Stop existing tmux session
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: "tmux kill-session -t {{ tmux_session_name }} 2>/dev/null || true"
|
||||||
|
|
||||||
|
- name: Kill any orphaned processes
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: |
|
||||||
|
PIDS=$(ps aux | grep -E "{{ process_grep_pattern }}" | grep -v "grep" | awk '{print $2}')
|
||||||
|
if [ -n "$PIDS" ]; then
|
||||||
|
kill $PIDS >/dev/null 2>&1 || true
|
||||||
|
sleep 0.5
|
||||||
|
kill -9 $PIDS >/dev/null 2>&1 || true
|
||||||
|
fi
|
||||||
|
changed_when: false
|
||||||
|
when: start_process | default(false) | bool
|
||||||
|
|
||||||
|
- name: Ensure client script is executable
|
||||||
|
ansible.builtin.file:
|
||||||
|
path: "{{ working_dir }}/bin/ytops-client"
|
||||||
|
mode: "a+x"
|
||||||
|
when: start_process | default(false) | bool
|
||||||
|
|
||||||
|
- name: Display command for tmux session
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg: "Command for tmux session '{{ tmux_session_name }}': {{ command_to_run }}"
|
||||||
|
when: start_process | default(false) | bool
|
||||||
|
|
||||||
|
- name: Start process in tmux session
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: |
|
||||||
|
cd {{ working_dir }}
|
||||||
|
# The command is wrapped with a final sleep to keep the tmux session alive for debugging even if the process fails.
|
||||||
|
# The actual command is run in a subshell (...) to prevent 'exec' from terminating the parent shell.
|
||||||
|
tmux new-session -d -s {{ tmux_session_name }} \
|
||||||
|
"if [ -f .env ]; then set -a; . ./.env; set +a; fi; \
|
||||||
|
COMMAND_TO_RUN='{{ command_to_run | replace("'", "'\\''") }}'; \
|
||||||
|
echo '>>> Running command:'; \
|
||||||
|
echo "\$COMMAND_TO_RUN"; \
|
||||||
|
echo '---'; \
|
||||||
|
(eval "\$COMMAND_TO_RUN") ; \
|
||||||
|
echo; echo '---'; echo 'Process exited. This tmux session will remain open for debugging.'; echo 'You can attach with: tmux attach -t {{ tmux_session_name }}'; \
|
||||||
|
sleep 3600"
|
||||||
|
when: start_process | default(false) | bool
|
||||||
|
|
||||||
|
- name: Check process status
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: |
|
||||||
|
echo "Process status on {{ inventory_hostname }} for session '{{ tmux_session_name }}':"
|
||||||
|
if tmux has-session -t {{ tmux_session_name }} 2>/dev/null; then
|
||||||
|
echo " - Tmux session '{{ tmux_session_name }}' is running"
|
||||||
|
ps aux | grep -E "{{ process_grep_pattern }}" | grep -v grep || echo " - No matching process found"
|
||||||
|
else
|
||||||
|
echo " - Tmux session '{{ tmux_session_name }}' is NOT running"
|
||||||
|
fi
|
||||||
|
register: status_check
|
||||||
|
changed_when: false
|
||||||
|
when: check_status | default(false) | bool
|
||||||
|
|
||||||
|
- name: Display status
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg: "{{ status_check.stdout_lines }}"
|
||||||
|
when: check_status | default(false) | bool
|
||||||
219
ansible/playbook-README.md
Normal file
219
ansible/playbook-README.md
Normal file
@ -0,0 +1,219 @@
|
|||||||
|
# Ansible Playbooks Documentation
|
||||||
|
|
||||||
|
This document provides an overview of all available playbooks, their purpose, and how to use them operationally.
|
||||||
|
|
||||||
|
## Table of Contents
|
||||||
|
|
||||||
|
1. [Deployment Workflow](#deployment-workflow)
|
||||||
|
2. [Initial Setup Playbooks](#initial-setup-playbooks)
|
||||||
|
3. [Operational Playbooks](#operational-playbooks)
|
||||||
|
4. [Monitoring and Inspection](#monitoring-and-inspection)
|
||||||
|
5. [Common Operations](#common-operations)
|
||||||
|
|
||||||
|
## Deployment Workflow
|
||||||
|
|
||||||
|
The typical deployment workflow follows these steps:
|
||||||
|
|
||||||
|
1. **Initial Setup**: Configure machines, deploy proxies, install dependencies
|
||||||
|
2. **Master Setup**: Configure Redis, MinIO, and other infrastructure services
|
||||||
|
3. **Worker Setup**: Configure auth generators and download simulators
|
||||||
|
4. **Operational Management**: Start/stop processes, monitor status, cleanup profiles
|
||||||
|
|
||||||
|
## Initial Setup Playbooks
|
||||||
|
|
||||||
|
For a complete, automated installation on fresh nodes, you can use the main `full-install` playbook:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Run the complete installation process on all nodes
|
||||||
|
ansible-playbook ansible/playbook-full-install.yml -i ansible/inventory.green.ini
|
||||||
|
```
|
||||||
|
|
||||||
|
The steps below are for running each part of the installation manually.
|
||||||
|
|
||||||
|
### Base Installation
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Deploy base system requirements to all nodes
|
||||||
|
ansible-playbook ansible/playbook-base-system.yml -i ansible/inventory.green.ini
|
||||||
|
```
|
||||||
|
|
||||||
|
### Proxy Deployment
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Deploy shadowsocks proxies to all nodes (as defined in cluster config)
|
||||||
|
ansible-playbook ansible/playbook-proxies.yml -i ansible/inventory.green.ini
|
||||||
|
```
|
||||||
|
|
||||||
|
### Code, Environment, and Dependencies
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Sync code to all nodes
|
||||||
|
ansible-playbook ansible/playbook-stress-sync-code.yml -i ansible/inventory.green.ini
|
||||||
|
|
||||||
|
# Generate .env files for all nodes
|
||||||
|
ansible-playbook ansible/playbook-stress-generate-env.yml -i ansible/inventory.green.ini
|
||||||
|
|
||||||
|
# Install dependencies on all nodes
|
||||||
|
ansible-playbook ansible/playbook-stress-install-deps.yml -i ansible/inventory.green.ini
|
||||||
|
```
|
||||||
|
|
||||||
|
## Operational Playbooks
|
||||||
|
|
||||||
|
### Master Node Services
|
||||||
|
|
||||||
|
Redis and MinIO are deployed as Docker containers during the initial setup (`playbook-full-install.yml`).
|
||||||
|
|
||||||
|
To start the policy enforcer and monitoring tmux sessions on the master node:
|
||||||
|
```bash
|
||||||
|
# Start policy enforcer and monitoring on master
|
||||||
|
ansible-playbook ansible/playbook-stress-manage-processes.yml -i ansible/inventory.green.ini -e "start_enforcer=true start_monitor=true"
|
||||||
|
```
|
||||||
|
|
||||||
|
To stop processes on the master node:
|
||||||
|
```bash
|
||||||
|
# Stop ONLY the policy enforcer
|
||||||
|
ansible-playbook ansible/playbook-stress-manage-processes.yml -i ansible/inventory.green.ini -e "stop_enforcer=true"
|
||||||
|
|
||||||
|
# Stop ONLY the monitor
|
||||||
|
ansible-playbook ansible/playbook-stress-manage-processes.yml -i ansible/inventory.green.ini -e "stop_monitor=true"
|
||||||
|
|
||||||
|
# Stop BOTH the enforcer and monitor
|
||||||
|
ansible-playbook ansible/playbook-stress-manage-processes.yml -i ansible/inventory.green.ini -e "stop_sessions=true"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Worker Node Processes
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Start auth generators and download simulators on all workers
|
||||||
|
ansible-playbook ansible/playbook-stress-lifecycle.yml -i ansible/inventory.green.ini -e "action=start"
|
||||||
|
|
||||||
|
# Stop all processes on workers
|
||||||
|
ansible-playbook ansible/playbook-stress-lifecycle.yml -i ansible/inventory.green.ini -e "action=stop"
|
||||||
|
|
||||||
|
# Check status of all worker processes
|
||||||
|
ansible-playbook ansible/playbook-stress-lifecycle.yml -i ansible/inventory.green.ini -e "action=status"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Profile Management
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Clean up all profiles
|
||||||
|
ansible-playbook ansible/playbook-stress-control.yml -i ansible/inventory.green.ini -e "action=cleanup-profiles"
|
||||||
|
|
||||||
|
# Clean up specific profile prefix
|
||||||
|
ansible-playbook ansible/playbook-stress-control.yml -i ansible/inventory.green.ini -e "action=cleanup-profiles" -e "profile_prefix=user1"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Monitoring and Inspection
|
||||||
|
|
||||||
|
### Status Checks
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Check status of all processes on all nodes
|
||||||
|
ansible-playbook ansible/playbook-stress-control.yml -i ansible/inventory.green.ini -e "action=status"
|
||||||
|
|
||||||
|
# Check enforcer status on master
|
||||||
|
ansible-playbook ansible/playbook-stress-control.yml -i ansible/inventory.green.ini -e "action=check-enforcer"
|
||||||
|
|
||||||
|
# Check profile status
|
||||||
|
ansible-playbook ansible/playbook-stress-control.yml -i ansible/inventory.green.ini -e "action=profile-status"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Log Inspection
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# View tmux session output on a specific node
|
||||||
|
ssh user@hostname
|
||||||
|
tmux attach -t stress-auth-user1,user2 # For auth generator
|
||||||
|
tmux attach -t stress-download-user1,user2 # For download simulator
|
||||||
|
tmux attach -t stress-enforcer # For policy enforcer on master
|
||||||
|
```
|
||||||
|
|
||||||
|
## Common Operations
|
||||||
|
|
||||||
|
### Code and Policy Updates
|
||||||
|
|
||||||
|
First, sync your local changes to the jump host from your development machine:
|
||||||
|
```bash
|
||||||
|
# Sync project to jump host
|
||||||
|
./tools/sync-to-jump.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Then, from the jump host, you can sync code or policies to the cluster nodes:
|
||||||
|
```bash
|
||||||
|
# Sync all application code (Python sources, scripts, etc.)
|
||||||
|
ansible-playbook ansible/playbook-stress-sync-code.yml -i ansible/inventory.green.ini
|
||||||
|
|
||||||
|
# Sync only policies and CLI configs
|
||||||
|
ansible-playbook ansible/playbook-stress-sync-configs.yml -i ansible/inventory.green.ini
|
||||||
|
```
|
||||||
|
|
||||||
|
### Adding a New Worker
|
||||||
|
|
||||||
|
1. Update `cluster.green.yml` with the new worker definition:
|
||||||
|
```yaml
|
||||||
|
workers:
|
||||||
|
new-worker:
|
||||||
|
ip: x.x.x.x
|
||||||
|
port: 22
|
||||||
|
profile_prefixes:
|
||||||
|
- "user4"
|
||||||
|
proxies:
|
||||||
|
- "sslocal-rust-1090"
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Regenerate inventory:
|
||||||
|
```bash
|
||||||
|
./tools/generate-inventory.py cluster.green.yml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Run the full installation playbook, limiting it to the new worker:
|
||||||
|
```bash
|
||||||
|
ansible-playbook ansible/playbook-full-install.yml -i ansible/inventory.green.ini --limit new-worker
|
||||||
|
```
|
||||||
|
|
||||||
|
4. Start processes on the new worker:
|
||||||
|
```bash
|
||||||
|
ansible-playbook ansible/playbook-stress-lifecycle.yml -i ansible/inventory.green.ini -e "action=start" --limit new-worker
|
||||||
|
```
|
||||||
|
|
||||||
|
### Removing a Worker
|
||||||
|
|
||||||
|
1. Stop all processes on the worker:
|
||||||
|
```bash
|
||||||
|
ansible-playbook ansible/playbook-stress-lifecycle.yml -i ansible/inventory.green.ini -e "action=stop" --limit worker-to-remove
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Remove the worker from `cluster.green.yml`
|
||||||
|
|
||||||
|
3. Regenerate inventory:
|
||||||
|
```bash
|
||||||
|
./tools/generate-inventory.py cluster.green.yml
|
||||||
|
```
|
||||||
|
|
||||||
|
### Emergency Stop All
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Stop all processes on all nodes
|
||||||
|
ansible-playbook ansible/playbook-stress-control.yml -i ansible/inventory.green.ini -e "action=stop-all"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Stopping Specific Nodes
|
||||||
|
|
||||||
|
To stop processes on a specific worker or group of workers, you can use the `stop-nodes` action and limit the playbook run.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Stop all processes on a single worker (e.g., dl003)
|
||||||
|
ansible-playbook ansible/playbook-stress-control.yml -i ansible/inventory.green.ini -e "action=stop-nodes" --limit dl003
|
||||||
|
|
||||||
|
# Stop all processes on all workers
|
||||||
|
ansible-playbook ansible/playbook-stress-control.yml -i ansible/inventory.green.ini -e "action=stop-nodes" --limit workers
|
||||||
|
```
|
||||||
|
|
||||||
|
### Restart Enforcer and Monitoring
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Restart monitoring and enforcer on master
|
||||||
|
ansible-playbook ansible/playbook-stress-control.yml -i ansible/inventory.green.ini -e "action=restart-monitoring"
|
||||||
|
```
|
||||||
|
|
||||||
@ -3,9 +3,15 @@
|
|||||||
hosts: all
|
hosts: all
|
||||||
gather_facts: yes
|
gather_facts: yes
|
||||||
vars_files:
|
vars_files:
|
||||||
- "group_vars/all/generated_vars.stress.yml" # Assumes generate-inventory.py was run with cluster.stress.yml
|
|
||||||
- "group_vars/all/vault.yml"
|
- "group_vars/all/vault.yml"
|
||||||
pre_tasks:
|
pre_tasks:
|
||||||
|
- name: Set inventory_env fact
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
|
||||||
|
- name: Load environment-specific variables
|
||||||
|
ansible.builtin.include_vars: "{{ item }}"
|
||||||
|
with_fileglob:
|
||||||
|
- "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
|
||||||
- name: Announce base system setup
|
- name: Announce base system setup
|
||||||
ansible.builtin.debug:
|
ansible.builtin.debug:
|
||||||
msg: "Starting base system setup on {{ inventory_hostname }}"
|
msg: "Starting base system setup on {{ inventory_hostname }}"
|
||||||
|
|||||||
@ -20,12 +20,19 @@
|
|||||||
hosts: all
|
hosts: all
|
||||||
gather_facts: no
|
gather_facts: no
|
||||||
vars_files:
|
vars_files:
|
||||||
- "group_vars/all/generated_vars.stress.yml"
|
|
||||||
- "group_vars/all/vault.yml"
|
- "group_vars/all/vault.yml"
|
||||||
|
pre_tasks:
|
||||||
|
- name: Set inventory_env fact
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
|
||||||
|
- name: Load environment-specific variables
|
||||||
|
ansible.builtin.include_vars: "{{ item }}"
|
||||||
|
with_fileglob:
|
||||||
|
- "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
|
||||||
tasks:
|
tasks:
|
||||||
- name: Define base directory for node
|
- name: Define base directory for node
|
||||||
ansible.builtin.set_fact:
|
ansible.builtin.set_fact:
|
||||||
base_dir: "{{ airflow_master_dir if inventory_hostname in groups['airflow_master'] else airflow_worker_dir }}"
|
base_dir: "{{ airflow_master_dir if inventory_hostname in groups['master'] else airflow_worker_dir }}"
|
||||||
|
|
||||||
- name: Ensure base directories and subdirectories exist
|
- name: Ensure base directories and subdirectories exist
|
||||||
ansible.builtin.file:
|
ansible.builtin.file:
|
||||||
@ -41,12 +48,12 @@
|
|||||||
- "run/docker_mount/fetched_info_jsons"
|
- "run/docker_mount/fetched_info_jsons"
|
||||||
become: yes
|
become: yes
|
||||||
|
|
||||||
- name: "PHASE 2.2: Import playbook to install Python dependencies"
|
- name: "PHASE 2.2: Import playbook to sync local code"
|
||||||
import_playbook: playbook-stress-install-deps.yml
|
|
||||||
|
|
||||||
- name: "PHASE 2.3: Import playbook to sync local code"
|
|
||||||
import_playbook: playbook-stress-sync-code.yml
|
import_playbook: playbook-stress-sync-code.yml
|
||||||
|
|
||||||
|
- name: "PHASE 2.3: Import playbook to install Python dependencies"
|
||||||
|
import_playbook: playbook-stress-install-deps.yml
|
||||||
|
|
||||||
# -------------------------------------------------------------------------------------------------
|
# -------------------------------------------------------------------------------------------------
|
||||||
# PHASE 3: Environment and Service Configuration
|
# PHASE 3: Environment and Service Configuration
|
||||||
# Generates the .env file and starts the role-specific services on master and workers.
|
# Generates the .env file and starts the role-specific services on master and workers.
|
||||||
@ -55,11 +62,17 @@
|
|||||||
import_playbook: playbook-stress-generate-env.yml
|
import_playbook: playbook-stress-generate-env.yml
|
||||||
|
|
||||||
- name: "PHASE 3.2: Master Node Services Setup"
|
- name: "PHASE 3.2: Master Node Services Setup"
|
||||||
hosts: airflow_master
|
hosts: master
|
||||||
gather_facts: no
|
gather_facts: no
|
||||||
|
vars:
|
||||||
|
inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
|
||||||
vars_files:
|
vars_files:
|
||||||
- "group_vars/all/generated_vars.stress.yml"
|
|
||||||
- "group_vars/all/vault.yml"
|
- "group_vars/all/vault.yml"
|
||||||
|
pre_tasks:
|
||||||
|
- name: Load environment-specific variables
|
||||||
|
ansible.builtin.include_vars: "{{ item }}"
|
||||||
|
with_fileglob:
|
||||||
|
- "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
|
||||||
tasks:
|
tasks:
|
||||||
- name: Configure system performance and kernel settings
|
- name: Configure system performance and kernel settings
|
||||||
ansible.builtin.copy:
|
ansible.builtin.copy:
|
||||||
@ -94,6 +107,16 @@
|
|||||||
mode: '0644'
|
mode: '0644'
|
||||||
become: yes
|
become: yes
|
||||||
|
|
||||||
|
- name: Stop and remove existing containers before starting services
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: |
|
||||||
|
docker ps -a --filter "name=bgutil-provider" --format "{{{{.ID}}}}" | xargs -r docker rm -f
|
||||||
|
docker ps -a --filter "name=redis-stress" --format "{{{{.ID}}}}" | xargs -r docker rm -f
|
||||||
|
docker ps -a --filter "name=minio-stress" --format "{{{{.ID}}}}" | xargs -r docker rm -f
|
||||||
|
become: yes
|
||||||
|
changed_when: false
|
||||||
|
ignore_errors: yes
|
||||||
|
|
||||||
- name: Start master services (Redis, MinIO)
|
- name: Start master services (Redis, MinIO)
|
||||||
community.docker.docker_compose_v2:
|
community.docker.docker_compose_v2:
|
||||||
project_src: "{{ airflow_master_dir }}"
|
project_src: "{{ airflow_master_dir }}"
|
||||||
@ -148,16 +171,63 @@
|
|||||||
environment:
|
environment:
|
||||||
HOME: "/home/{{ ansible_user }}"
|
HOME: "/home/{{ ansible_user }}"
|
||||||
|
|
||||||
- name: "PHASE 3.3: Shared Storage Setup (s3fs)"
|
- name: "PHASE 3.2a: Worker Node Services Setup"
|
||||||
hosts: airflow_master:airflow_workers
|
hosts: workers
|
||||||
gather_facts: no
|
gather_facts: no
|
||||||
|
vars:
|
||||||
|
inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
|
||||||
vars_files:
|
vars_files:
|
||||||
- "group_vars/all/generated_vars.stress.yml"
|
|
||||||
- "group_vars/all/vault.yml"
|
- "group_vars/all/vault.yml"
|
||||||
|
pre_tasks:
|
||||||
|
- name: Load environment-specific variables
|
||||||
|
ansible.builtin.include_vars: "{{ item }}"
|
||||||
|
with_fileglob:
|
||||||
|
- "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
|
||||||
|
tasks:
|
||||||
|
- name: Template Docker Compose file for worker services
|
||||||
|
ansible.builtin.template:
|
||||||
|
src: templates/docker-compose.stress-master.j2
|
||||||
|
dest: "{{ airflow_worker_dir }}/docker-compose.stress.yml"
|
||||||
|
owner: "{{ ansible_user }}"
|
||||||
|
group: "{{ deploy_group }}"
|
||||||
|
mode: '0644'
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Stop and remove existing containers before starting services
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: |
|
||||||
|
docker ps -a --filter "name=bgutil-provider" --format "{{{{.ID}}}}" | xargs -r docker rm -f
|
||||||
|
docker ps -a --filter "name=redis-stress" --format "{{{{.ID}}}}" | xargs -r docker rm -f
|
||||||
|
docker ps -a --filter "name=minio-stress" --format "{{{{.ID}}}}" | xargs -r docker rm -f
|
||||||
|
become: yes
|
||||||
|
changed_when: false
|
||||||
|
ignore_errors: yes
|
||||||
|
|
||||||
|
- name: Start worker services
|
||||||
|
community.docker.docker_compose_v2:
|
||||||
|
project_src: "{{ airflow_worker_dir }}"
|
||||||
|
files:
|
||||||
|
- docker-compose.stress.yml
|
||||||
|
state: present
|
||||||
|
remove_orphans: true
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: "PHASE 3.3: Shared Storage Setup (s3fs)"
|
||||||
|
hosts: master:workers
|
||||||
|
gather_facts: no
|
||||||
|
vars:
|
||||||
|
inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
|
||||||
|
vars_files:
|
||||||
|
- "group_vars/all/vault.yml"
|
||||||
|
pre_tasks:
|
||||||
|
- name: Load environment-specific variables
|
||||||
|
ansible.builtin.include_vars: "{{ item }}"
|
||||||
|
with_fileglob:
|
||||||
|
- "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
|
||||||
tasks:
|
tasks:
|
||||||
- name: Define base directory for node
|
- name: Define base directory for node
|
||||||
ansible.builtin.set_fact:
|
ansible.builtin.set_fact:
|
||||||
base_dir: "{{ airflow_master_dir if inventory_hostname in groups['airflow_master'] else airflow_worker_dir }}"
|
base_dir: "{{ airflow_master_dir if inventory_hostname in groups['master'] else airflow_worker_dir }}"
|
||||||
|
|
||||||
- name: Mount S3 buckets via s3fs
|
- name: Mount S3 buckets via s3fs
|
||||||
block:
|
block:
|
||||||
@ -176,14 +246,124 @@
|
|||||||
mode: '0600'
|
mode: '0600'
|
||||||
become: yes
|
become: yes
|
||||||
|
|
||||||
|
- name: Check if mount points are already mounted
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: "mount | grep -q '{{ item.path }}'"
|
||||||
|
loop:
|
||||||
|
- { bucket: 'stress-inputs', path: '{{ base_dir }}/inputfiles' }
|
||||||
|
- { bucket: 'stress-jsons', path: '{{ base_dir }}/run/docker_mount/fetched_info_jsons' }
|
||||||
|
register: mount_check
|
||||||
|
changed_when: false
|
||||||
|
failed_when: false
|
||||||
|
|
||||||
|
- name: Ensure mount point directories exist (only if not mounted)
|
||||||
|
ansible.builtin.file:
|
||||||
|
path: "{{ item.item.path }}"
|
||||||
|
state: directory
|
||||||
|
owner: "{{ ansible_user }}"
|
||||||
|
group: "{{ deploy_group }}"
|
||||||
|
mode: '0755'
|
||||||
|
loop: "{{ mount_check.results }}"
|
||||||
|
when: item.rc != 0
|
||||||
|
become: yes
|
||||||
|
|
||||||
- name: Mount S3 buckets for stress testing
|
- name: Mount S3 buckets for stress testing
|
||||||
ansible.posix.mount:
|
ansible.posix.mount:
|
||||||
src: "s3fs#{{ item.bucket }}"
|
src: "s3fs#{{ item.bucket }}"
|
||||||
path: "{{ item.path }}"
|
path: "{{ item.path }}"
|
||||||
fstype: fuse
|
fstype: fuse
|
||||||
opts: "_netdev,allow_other,use_path_request_style,nonempty,url=http://{{ hostvars[groups['airflow_master'][0]].ansible_host }}:9000,passwd_file=/home/{{ ansible_user }}/.passwd-s3fs"
|
opts: "_netdev,allow_other,use_path_request_style,nonempty,url=http://{{ hostvars[groups['master'][0]].ansible_host }}:9000,passwd_file=/home/{{ ansible_user }}/.passwd-s3fs"
|
||||||
state: mounted
|
state: mounted
|
||||||
loop:
|
loop:
|
||||||
- { bucket: 'stress-inputs', path: '{{ base_dir }}/inputfiles' }
|
- { bucket: 'stress-inputs', path: '{{ base_dir }}/inputfiles' }
|
||||||
- { bucket: 'stress-jsons', path: '{{ base_dir }}/run/docker_mount/fetched_info_jsons' }
|
- { bucket: 'stress-jsons', path: '{{ base_dir }}/run/docker_mount/fetched_info_jsons' }
|
||||||
become: yes
|
become: yes
|
||||||
|
|
||||||
|
- name: "PHASE 3.4: Import playbook to initialize Redis profiles"
|
||||||
|
import_playbook: playbook-stress-init-redis.yml
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------------------------------------
|
||||||
|
# PHASE 4: Monitoring and Management Services Setup
|
||||||
|
# Starts monitoring, enforcer, and simulation processes.
|
||||||
|
# -------------------------------------------------------------------------------------------------
|
||||||
|
- name: "PHASE 4.1: Import playbook to manage monitoring and enforcer processes"
|
||||||
|
import_playbook: playbook-stress-manage-processes.yml
|
||||||
|
|
||||||
|
- name: "PHASE 4.2: Start monitoring and enforcer services"
|
||||||
|
hosts: master
|
||||||
|
gather_facts: no
|
||||||
|
vars:
|
||||||
|
inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
|
||||||
|
start_monitor: true
|
||||||
|
start_enforcer: true
|
||||||
|
vars_files:
|
||||||
|
- "group_vars/all/vault.yml"
|
||||||
|
pre_tasks:
|
||||||
|
- name: Load environment-specific variables
|
||||||
|
ansible.builtin.include_vars: "{{ item }}"
|
||||||
|
with_fileglob:
|
||||||
|
- "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
|
||||||
|
tasks:
|
||||||
|
- name: Ensure tmux is installed
|
||||||
|
ansible.builtin.apt:
|
||||||
|
name: tmux
|
||||||
|
state: present
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Start profile monitoring in tmux session
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: |
|
||||||
|
cd {{ airflow_master_dir }}
|
||||||
|
tmux new-session -d -s stress-monitor \
|
||||||
|
"set -a && . ./.env && set +a && \
|
||||||
|
./bin/ytops-client profile list \
|
||||||
|
--auth-env sim_auth \
|
||||||
|
--download-env sim_download \
|
||||||
|
--live \
|
||||||
|
--no-blink \
|
||||||
|
--show-reasons"
|
||||||
|
when: start_monitor | default(false) | bool
|
||||||
|
|
||||||
|
- name: Start policy enforcer in tmux session
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: |
|
||||||
|
cd {{ airflow_master_dir }}
|
||||||
|
tmux new-session -d -s stress-enforcer \
|
||||||
|
"set -a && . ./.env && set +a && \
|
||||||
|
./bin/ytops-client policy-enforcer \
|
||||||
|
--policy policies/8_unified_simulation_enforcer.yaml \
|
||||||
|
--live"
|
||||||
|
when: start_enforcer | default(false) | bool
|
||||||
|
|
||||||
|
- name: List active tmux sessions
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: tmux list-sessions
|
||||||
|
register: tmux_sessions
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: Display active sessions
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg: "Active tmux sessions: {{ tmux_sessions.stdout_lines }}"
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------------------------------------
|
||||||
|
# PHASE 5: Simulation Workload Generation (Optional - can be run manually)
|
||||||
|
# These playbooks are available but not automatically started by default.
|
||||||
|
# -------------------------------------------------------------------------------------------------
|
||||||
|
- name: "PHASE 5.1: Note about simulation workload generation"
|
||||||
|
hosts: localhost
|
||||||
|
gather_facts: no
|
||||||
|
tasks:
|
||||||
|
- name: Display note about simulation playbooks
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg: |
|
||||||
|
Simulation workload generation playbooks are available:
|
||||||
|
- ansible/playbook-stress-auth-generator.yml
|
||||||
|
- ansible/playbook-stress-download-simulation.yml
|
||||||
|
|
||||||
|
To start simulations manually, run:
|
||||||
|
ansible-playbook ansible/playbook-stress-auth-generator.yml \
|
||||||
|
-e "start_generator=true dummy_batch=true auth_min_seconds=2 auth_max_seconds=3"
|
||||||
|
|
||||||
|
ansible-playbook ansible/playbook-stress-download-simulation.yml \
|
||||||
|
-e "start_download=true profile_prefix=user1 download_min_seconds=2 download_max_seconds=5" \
|
||||||
|
--limit airflow_workers[0]
|
||||||
|
|||||||
404
ansible/playbook-full-install.yml
Normal file
404
ansible/playbook-full-install.yml
Normal file
@ -0,0 +1,404 @@
|
|||||||
|
---
|
||||||
|
# This playbook provides a complete installation for fresh nodes.
|
||||||
|
# It can install either master or worker roles, or both on the same machine.
|
||||||
|
#
|
||||||
|
# Usage examples:
|
||||||
|
# # Install everything on all nodes
|
||||||
|
# ansible-playbook ansible/playbook-full-install.yml
|
||||||
|
#
|
||||||
|
# # Install only on workers
|
||||||
|
# ansible-playbook ansible/playbook-full-install.yml --limit workers
|
||||||
|
#
|
||||||
|
# # Install only on master
|
||||||
|
# ansible-playbook ansible/playbook-full-install.yml --limit master
|
||||||
|
#
|
||||||
|
# # Install both roles on a single machine
|
||||||
|
# ansible-playbook ansible/playbook-full-install.yml --limit specific-host -e "install_master=true install_worker=true"
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------------------------------------
|
||||||
|
# PHASE 1: Base System Configuration
|
||||||
|
# Ensures all nodes have the necessary base packages, user configurations, and Docker installed.
|
||||||
|
# -------------------------------------------------------------------------------------------------
|
||||||
|
- name: "PHASE 1: Import base system setup playbook"
|
||||||
|
import_playbook: playbook-base-system.yml
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------------------------------------
|
||||||
|
# PHASE 2: Generate Environment Configuration
|
||||||
|
# Creates .env files needed by all subsequent steps
|
||||||
|
# -------------------------------------------------------------------------------------------------
|
||||||
|
- name: "PHASE 2: Generate .env configuration"
|
||||||
|
import_playbook: playbook-stress-generate-env.yml
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------------------------------------
|
||||||
|
# PHASE 3: Docker Network Setup
|
||||||
|
# Ensures the shared Docker network exists before building containers
|
||||||
|
# -------------------------------------------------------------------------------------------------
|
||||||
|
- name: "PHASE 3: Ensure Docker network exists"
|
||||||
|
hosts: all
|
||||||
|
gather_facts: no
|
||||||
|
vars_files:
|
||||||
|
- "group_vars/all/vault.yml"
|
||||||
|
pre_tasks:
|
||||||
|
- name: Set inventory_env fact
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
|
||||||
|
- name: Load environment-specific variables
|
||||||
|
ansible.builtin.include_vars: "{{ item }}"
|
||||||
|
with_fileglob:
|
||||||
|
- "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
|
||||||
|
tasks:
|
||||||
|
- name: Create shared Docker network
|
||||||
|
community.docker.docker_network:
|
||||||
|
name: "{{ docker_network_name }}"
|
||||||
|
driver: bridge
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------------------------------------
|
||||||
|
# PHASE 4: Build yt-dlp Docker Image
|
||||||
|
# Builds the yt-dlp container from bin/ directory
|
||||||
|
# -------------------------------------------------------------------------------------------------
|
||||||
|
- name: "PHASE 4: Build yt-dlp Docker image"
|
||||||
|
hosts: all
|
||||||
|
gather_facts: no
|
||||||
|
vars_files:
|
||||||
|
- "group_vars/all/vault.yml"
|
||||||
|
pre_tasks:
|
||||||
|
- name: Set inventory_env fact
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
|
||||||
|
- name: Load environment-specific variables
|
||||||
|
ansible.builtin.include_vars: "{{ item }}"
|
||||||
|
with_fileglob:
|
||||||
|
- "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
|
||||||
|
tasks:
|
||||||
|
- name: Define base directory for node
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
base_dir: "{{ airflow_master_dir if (inventory_hostname in groups['master'] and not (install_worker | default(false) | bool)) else airflow_worker_dir }}"
|
||||||
|
|
||||||
|
- name: Ensure bin directory exists
|
||||||
|
ansible.builtin.file:
|
||||||
|
path: "{{ base_dir }}/bin"
|
||||||
|
state: directory
|
||||||
|
owner: "{{ ansible_user }}"
|
||||||
|
group: "{{ deploy_group }}"
|
||||||
|
mode: '0755'
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Check if Dockerfile exists in bin directory
|
||||||
|
ansible.builtin.stat:
|
||||||
|
path: "{{ base_dir }}/bin/Dockerfile"
|
||||||
|
register: dockerfile_stat
|
||||||
|
|
||||||
|
- name: Build yt-dlp Docker image if Dockerfile exists
|
||||||
|
community.docker.docker_image:
|
||||||
|
name: yt-dlp-custom
|
||||||
|
tag: latest
|
||||||
|
source: build
|
||||||
|
build:
|
||||||
|
path: "{{ base_dir }}/bin"
|
||||||
|
pull: yes
|
||||||
|
state: present
|
||||||
|
force_source: yes
|
||||||
|
become: yes
|
||||||
|
when: dockerfile_stat.stat.exists
|
||||||
|
|
||||||
|
- name: Display message if Dockerfile not found
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg: "Dockerfile not found at {{ base_dir }}/bin/Dockerfile - skipping yt-dlp image build"
|
||||||
|
when: not dockerfile_stat.stat.exists
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------------------------------------
|
||||||
|
# PHASE 5: Sync Code and Install Dependencies
|
||||||
|
# Copies application code and installs Python dependencies
|
||||||
|
# -------------------------------------------------------------------------------------------------
|
||||||
|
- name: "PHASE 5.1: Sync application code"
|
||||||
|
import_playbook: playbook-stress-sync-code.yml
|
||||||
|
|
||||||
|
- name: "PHASE 5.2: Install Python dependencies"
|
||||||
|
import_playbook: playbook-stress-install-deps.yml
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------------------------------------
|
||||||
|
# PHASE 6: Deploy Shadowsocks Proxies
|
||||||
|
# Configures and starts proxy services
|
||||||
|
# -------------------------------------------------------------------------------------------------
|
||||||
|
- name: "PHASE 6: Deploy proxy services"
|
||||||
|
import_playbook: playbook-proxies.yml
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------------------------------------
|
||||||
|
# PHASE 7: Install bgutils
|
||||||
|
# Note: Currently bgutils is deployed on master via docker-compose
|
||||||
|
# -------------------------------------------------------------------------------------------------
|
||||||
|
- name: "PHASE 7: Install bgutils"
|
||||||
|
import_playbook: playbook-install-bgutils.yml
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------------------------------------
|
||||||
|
# PHASE 8: Master-Specific Services Setup
|
||||||
|
# Starts Redis, MinIO, and other master-only services
|
||||||
|
# -------------------------------------------------------------------------------------------------
|
||||||
|
- name: "PHASE 8: Master Node Services Setup"
|
||||||
|
hosts: master
|
||||||
|
gather_facts: no
|
||||||
|
vars:
|
||||||
|
inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
|
||||||
|
vars_files:
|
||||||
|
- "group_vars/all/vault.yml"
|
||||||
|
pre_tasks:
|
||||||
|
- name: Load environment-specific variables
|
||||||
|
ansible.builtin.include_vars: "{{ item }}"
|
||||||
|
with_fileglob:
|
||||||
|
- "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
|
||||||
|
tasks:
|
||||||
|
- name: Configure system performance and kernel settings
|
||||||
|
ansible.builtin.copy:
|
||||||
|
src: "configs/etc/sysctl.d/99-system-limits.conf"
|
||||||
|
dest: "/etc/sysctl.d/99-system-limits.conf"
|
||||||
|
owner: root
|
||||||
|
group: root
|
||||||
|
mode: '0644'
|
||||||
|
become: yes
|
||||||
|
register: sysctl_config_copy
|
||||||
|
|
||||||
|
- name: Apply sysctl settings
|
||||||
|
ansible.builtin.command: sysctl --system
|
||||||
|
become: yes
|
||||||
|
when: sysctl_config_copy.changed
|
||||||
|
|
||||||
|
- name: Ensure MinIO data directory exists
|
||||||
|
ansible.builtin.file:
|
||||||
|
path: "{{ airflow_master_dir }}/minio-data"
|
||||||
|
state: directory
|
||||||
|
owner: "{{ ansible_user }}"
|
||||||
|
group: "{{ deploy_group }}"
|
||||||
|
mode: '0755'
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Template Docker Compose file for master services
|
||||||
|
ansible.builtin.template:
|
||||||
|
src: templates/docker-compose.stress-master.j2
|
||||||
|
dest: "{{ airflow_master_dir }}/docker-compose.stress.yml"
|
||||||
|
owner: "{{ ansible_user }}"
|
||||||
|
group: "{{ deploy_group }}"
|
||||||
|
mode: '0644'
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Stop and remove existing containers before starting services
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: |
|
||||||
|
docker ps -a --filter "name=bgutil-provider" --format "{{{{.ID}}}}" | xargs -r docker rm -f
|
||||||
|
docker ps -a --filter "name=redis-stress" --format "{{{{.ID}}}}" | xargs -r docker rm -f
|
||||||
|
docker ps -a --filter "name=minio-stress" --format "{{{{.ID}}}}" | xargs -r docker rm -f
|
||||||
|
become: yes
|
||||||
|
changed_when: false
|
||||||
|
ignore_errors: yes
|
||||||
|
|
||||||
|
- name: Start master services (Redis, MinIO)
|
||||||
|
community.docker.docker_compose_v2:
|
||||||
|
project_src: "{{ airflow_master_dir }}"
|
||||||
|
files:
|
||||||
|
- docker-compose.stress.yml
|
||||||
|
state: present
|
||||||
|
remove_orphans: true
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Wait for MinIO service to be ready
|
||||||
|
ansible.builtin.wait_for:
|
||||||
|
host: "{{ hostvars[inventory_hostname].ansible_host }}"
|
||||||
|
port: 9000
|
||||||
|
delay: 5
|
||||||
|
timeout: 60
|
||||||
|
delegate_to: localhost
|
||||||
|
|
||||||
|
- name: Download MinIO Client (mc) if not present
|
||||||
|
ansible.builtin.command:
|
||||||
|
cmd: wget https://dl.min.io/client/mc/release/linux-amd64/mc -O /usr/local/bin/mc
|
||||||
|
creates: /usr/local/bin/mc
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Ensure MinIO Client (mc) is executable
|
||||||
|
ansible.builtin.file:
|
||||||
|
path: /usr/local/bin/mc
|
||||||
|
mode: '0755'
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Configure mc alias for local MinIO
|
||||||
|
ansible.builtin.command: >
|
||||||
|
mc alias set local http://localhost:9000 {{ vault_s3_access_key_id }} {{ vault_s3_secret_access_key }}
|
||||||
|
become: yes
|
||||||
|
become_user: "{{ ansible_user }}"
|
||||||
|
changed_when: false
|
||||||
|
environment:
|
||||||
|
HOME: "/home/{{ ansible_user }}"
|
||||||
|
|
||||||
|
- name: Ensure S3 buckets exist in MinIO using mc
|
||||||
|
ansible.builtin.command: >
|
||||||
|
mc mb local/{{ item }}
|
||||||
|
loop:
|
||||||
|
- "stress-inputs"
|
||||||
|
- "stress-jsons"
|
||||||
|
become: yes
|
||||||
|
become_user: "{{ ansible_user }}"
|
||||||
|
register: mc_mb_result
|
||||||
|
failed_when: >
|
||||||
|
mc_mb_result.rc != 0 and
|
||||||
|
"already exists" not in mc_mb_result.stderr
|
||||||
|
changed_when: mc_mb_result.rc == 0
|
||||||
|
environment:
|
||||||
|
HOME: "/home/{{ ansible_user }}"
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------------------------------------
|
||||||
|
# PHASE 9: Worker-Specific Services Setup
|
||||||
|
# Starts worker-only services if needed
|
||||||
|
# -------------------------------------------------------------------------------------------------
|
||||||
|
- name: "PHASE 9: Worker Node Services Setup"
|
||||||
|
hosts: workers
|
||||||
|
gather_facts: no
|
||||||
|
vars:
|
||||||
|
inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
|
||||||
|
vars_files:
|
||||||
|
- "group_vars/all/vault.yml"
|
||||||
|
pre_tasks:
|
||||||
|
- name: Load environment-specific variables
|
||||||
|
ansible.builtin.include_vars: "{{ item }}"
|
||||||
|
with_fileglob:
|
||||||
|
- "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
|
||||||
|
tasks:
|
||||||
|
- name: Template Docker Compose file for worker services
|
||||||
|
ansible.builtin.template:
|
||||||
|
src: templates/docker-compose.stress-master.j2
|
||||||
|
dest: "{{ airflow_worker_dir }}/docker-compose.stress.yml"
|
||||||
|
owner: "{{ ansible_user }}"
|
||||||
|
group: "{{ deploy_group }}"
|
||||||
|
mode: '0644'
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Stop and remove existing containers before starting services
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: |
|
||||||
|
docker ps -a --filter "name=bgutil-provider" --format "{{{{.ID}}}}" | xargs -r docker rm -f
|
||||||
|
docker ps -a --filter "name=redis-stress" --format "{{{{.ID}}}}" | xargs -r docker rm -f
|
||||||
|
docker ps -a --filter "name=minio-stress" --format "{{{{.ID}}}}" | xargs -r docker rm -f
|
||||||
|
become: yes
|
||||||
|
changed_when: false
|
||||||
|
ignore_errors: yes
|
||||||
|
|
||||||
|
- name: Start worker services
|
||||||
|
community.docker.docker_compose_v2:
|
||||||
|
project_src: "{{ airflow_worker_dir }}"
|
||||||
|
files:
|
||||||
|
- docker-compose.stress.yml
|
||||||
|
state: present
|
||||||
|
remove_orphans: true
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------------------------------------
|
||||||
|
# PHASE 10: Shared Storage Setup (s3fs)
|
||||||
|
# Mounts S3 buckets on all nodes
|
||||||
|
# -------------------------------------------------------------------------------------------------
|
||||||
|
- name: "PHASE 10: Shared Storage Setup (s3fs)"
|
||||||
|
hosts: master:workers
|
||||||
|
gather_facts: no
|
||||||
|
vars:
|
||||||
|
inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
|
||||||
|
vars_files:
|
||||||
|
- "group_vars/all/vault.yml"
|
||||||
|
pre_tasks:
|
||||||
|
- name: Load environment-specific variables
|
||||||
|
ansible.builtin.include_vars: "{{ item }}"
|
||||||
|
with_fileglob:
|
||||||
|
- "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
|
||||||
|
tasks:
|
||||||
|
- name: Define base directory for node
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
base_dir: "{{ airflow_master_dir if inventory_hostname in groups['master'] else airflow_worker_dir }}"
|
||||||
|
|
||||||
|
- name: Mount S3 buckets via s3fs
|
||||||
|
block:
|
||||||
|
- name: Install s3fs for mounting S3 buckets
|
||||||
|
ansible.builtin.apt:
|
||||||
|
name: s3fs
|
||||||
|
state: present
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Configure s3fs credentials
|
||||||
|
ansible.builtin.copy:
|
||||||
|
content: "{{ vault_s3_access_key_id }}:{{ vault_s3_secret_access_key }}"
|
||||||
|
dest: "/home/{{ ansible_user }}/.passwd-s3fs"
|
||||||
|
owner: "{{ ansible_user }}"
|
||||||
|
group: "{{ deploy_group }}"
|
||||||
|
mode: '0600'
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Check if mount points are already mounted
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: "mount | grep -q '{{ item.path }}'"
|
||||||
|
loop:
|
||||||
|
- { bucket: 'stress-inputs', path: '{{ base_dir }}/inputfiles' }
|
||||||
|
- { bucket: 'stress-jsons', path: '{{ base_dir }}/run/docker_mount/fetched_info_jsons' }
|
||||||
|
register: mount_check
|
||||||
|
changed_when: false
|
||||||
|
failed_when: false
|
||||||
|
|
||||||
|
- name: Ensure mount point directories exist (only if not mounted)
|
||||||
|
ansible.builtin.file:
|
||||||
|
path: "{{ item.item.path }}"
|
||||||
|
state: directory
|
||||||
|
owner: "{{ ansible_user }}"
|
||||||
|
group: "{{ deploy_group }}"
|
||||||
|
mode: '0755'
|
||||||
|
loop: "{{ mount_check.results }}"
|
||||||
|
when: item.rc != 0
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Mount S3 buckets for stress testing
|
||||||
|
ansible.posix.mount:
|
||||||
|
src: "s3fs#{{ item.bucket }}"
|
||||||
|
path: "{{ item.path }}"
|
||||||
|
fstype: fuse
|
||||||
|
opts: "_netdev,allow_other,use_path_request_style,nonempty,url=http://{{ hostvars[groups['master'][0]].ansible_host }}:9000,passwd_file=/home/{{ ansible_user }}/.passwd-s3fs"
|
||||||
|
state: mounted
|
||||||
|
loop:
|
||||||
|
- { bucket: 'stress-inputs', path: '{{ base_dir }}/inputfiles' }
|
||||||
|
- { bucket: 'stress-jsons', path: '{{ base_dir }}/run/docker_mount/fetched_info_jsons' }
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------------------------------------
|
||||||
|
# PHASE 11: Initialize Redis (Master Only)
|
||||||
|
# Sets up profiles and policies in Redis
|
||||||
|
# -------------------------------------------------------------------------------------------------
|
||||||
|
- name: "PHASE 11: Initialize Redis profiles"
|
||||||
|
import_playbook: playbook-stress-init-redis.yml
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------------------------------------
|
||||||
|
# PHASE 12: Final Status and Next Steps
|
||||||
|
# -------------------------------------------------------------------------------------------------
|
||||||
|
- name: "PHASE 12: Installation Complete"
|
||||||
|
hosts: localhost
|
||||||
|
gather_facts: no
|
||||||
|
tasks:
|
||||||
|
- name: Display installation completion message
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg: |
|
||||||
|
========================================
|
||||||
|
Full installation complete!
|
||||||
|
========================================
|
||||||
|
|
||||||
|
Next steps:
|
||||||
|
|
||||||
|
1. Start monitoring and enforcer (on master):
|
||||||
|
ansible-playbook ansible/playbook-stress-manage-processes.yml \
|
||||||
|
-e "start_monitor=true start_enforcer=true"
|
||||||
|
|
||||||
|
2. Start auth generator (on master):
|
||||||
|
ansible-playbook ansible/playbook-stress-auth-generator.yml \
|
||||||
|
-e "start_generator=true dummy_batch=true auth_min_seconds=2 auth_max_seconds=3"
|
||||||
|
|
||||||
|
3. Start download simulation (on workers):
|
||||||
|
ansible-playbook ansible/playbook-stress-download-simulation.yml \
|
||||||
|
-e "start_download=true profile_prefix=user1 download_min_seconds=2 download_max_seconds=5" \
|
||||||
|
--limit workers
|
||||||
|
|
||||||
|
4. Check status:
|
||||||
|
ansible-playbook ansible/playbook-stress-control.yml -e "action=status"
|
||||||
|
|
||||||
|
5. Monitor profiles:
|
||||||
|
ansible-playbook ansible/playbook-stress-control.yml -e "action=profile-status"
|
||||||
18
ansible/playbook-install-bgutils.yml
Normal file
18
ansible/playbook-install-bgutils.yml
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
---
|
||||||
|
- name: "UTIL-SETUP: Install and configure bgutils container on workers"
|
||||||
|
hosts: workers
|
||||||
|
gather_facts: yes
|
||||||
|
vars_files:
|
||||||
|
- "group_vars/all/vault.yml"
|
||||||
|
pre_tasks:
|
||||||
|
- name: Set inventory_env fact
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
|
||||||
|
- name: Load environment-specific variables
|
||||||
|
ansible.builtin.include_vars: "{{ item }}"
|
||||||
|
with_fileglob:
|
||||||
|
- "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
|
||||||
|
tasks:
|
||||||
|
- name: Note that bgutil-provider is now on master
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg: "The bgutil-provider service is now deployed on the master node via docker-compose and is no longer deployed on workers."
|
||||||
165
ansible/playbook-install-cleanup-media.yml
Normal file
165
ansible/playbook-install-cleanup-media.yml
Normal file
@ -0,0 +1,165 @@
|
|||||||
|
---
|
||||||
|
- name: "UTIL-SETUP: Install media cleanup script and configure cron"
|
||||||
|
hosts: all
|
||||||
|
gather_facts: yes
|
||||||
|
vars_files:
|
||||||
|
- "group_vars/all/vault.yml"
|
||||||
|
pre_tasks:
|
||||||
|
- name: Set inventory_env fact
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
|
||||||
|
- name: Load environment-specific variables
|
||||||
|
ansible.builtin.include_vars: "{{ item }}"
|
||||||
|
with_fileglob:
|
||||||
|
- "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
|
||||||
|
tasks:
|
||||||
|
- name: Define base directory for node
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
base_dir: "{{ airflow_master_dir if inventory_hostname in groups['master'] else airflow_worker_dir }}"
|
||||||
|
|
||||||
|
- name: Ensure cleanup script directory exists
|
||||||
|
ansible.builtin.file:
|
||||||
|
path: "{{ base_dir }}/bin"
|
||||||
|
state: directory
|
||||||
|
owner: "{{ ansible_user }}"
|
||||||
|
group: "{{ deploy_group }}"
|
||||||
|
mode: '0755'
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Copy cleanup_media.py script
|
||||||
|
ansible.builtin.copy:
|
||||||
|
src: "{{ playbook_dir }}/../yt-ops-services-debug/cleanup_media.py"
|
||||||
|
dest: "{{ base_dir }}/bin/cleanup_media.py"
|
||||||
|
owner: "{{ ansible_user }}"
|
||||||
|
group: "{{ deploy_group }}"
|
||||||
|
mode: '0755'
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Install s5cmd for S3 uploads
|
||||||
|
block:
|
||||||
|
- name: Check if s5cmd is already installed
|
||||||
|
ansible.builtin.stat:
|
||||||
|
path: /usr/local/bin/s5cmd
|
||||||
|
register: s5cmd_binary
|
||||||
|
|
||||||
|
- name: Download and install s5cmd
|
||||||
|
block:
|
||||||
|
- name: Create temporary directory for s5cmd download
|
||||||
|
ansible.builtin.tempfile:
|
||||||
|
state: directory
|
||||||
|
suffix: s5cmd
|
||||||
|
register: s5cmd_temp_dir
|
||||||
|
|
||||||
|
- name: Download s5cmd
|
||||||
|
ansible.builtin.get_url:
|
||||||
|
url: "https://github.com/peak/s5cmd/releases/download/v2.2.2/s5cmd_2.2.2_Linux-64bit.tar.gz"
|
||||||
|
dest: "{{ s5cmd_temp_dir.path }}/s5cmd.tar.gz"
|
||||||
|
mode: '0644'
|
||||||
|
|
||||||
|
- name: Extract s5cmd
|
||||||
|
ansible.builtin.unarchive:
|
||||||
|
src: "{{ s5cmd_temp_dir.path }}/s5cmd.tar.gz"
|
||||||
|
dest: "{{ s5cmd_temp_dir.path }}"
|
||||||
|
remote_src: yes
|
||||||
|
|
||||||
|
- name: Install s5cmd to /usr/local/bin
|
||||||
|
ansible.builtin.copy:
|
||||||
|
src: "{{ s5cmd_temp_dir.path }}/s5cmd"
|
||||||
|
dest: /usr/local/bin/s5cmd
|
||||||
|
mode: '0755'
|
||||||
|
remote_src: yes
|
||||||
|
|
||||||
|
- name: Clean up temporary directory
|
||||||
|
ansible.builtin.file:
|
||||||
|
path: "{{ s5cmd_temp_dir.path }}"
|
||||||
|
state: absent
|
||||||
|
when: not s5cmd_binary.stat.exists
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Ensure log directory exists
|
||||||
|
ansible.builtin.file:
|
||||||
|
path: "/var/log"
|
||||||
|
state: directory
|
||||||
|
owner: root
|
||||||
|
group: root
|
||||||
|
mode: '0755'
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Create wrapper script to source .env before running cleanup
|
||||||
|
ansible.builtin.copy:
|
||||||
|
content: |
|
||||||
|
#!/bin/bash
|
||||||
|
# Wrapper script to run cleanup_media.py with environment variables from .env
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
BASE_DIR="{{ base_dir }}"
|
||||||
|
|
||||||
|
# Source .env file if it exists
|
||||||
|
if [ -f "${BASE_DIR}/.env" ]; then
|
||||||
|
set -a
|
||||||
|
source "${BASE_DIR}/.env"
|
||||||
|
set +a
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Determine cleanup mode based on environment variable or default
|
||||||
|
CLEANUP_MODE="${CLEANUP_MODE:-{{ cleanup_settings.mode | default('s3-upload') }}}"
|
||||||
|
|
||||||
|
# Run cleanup script
|
||||||
|
cd "${BASE_DIR}"
|
||||||
|
|
||||||
|
if [ "$CLEANUP_MODE" = "s3-upload" ]; then
|
||||||
|
# S3 upload mode - uploads to S3 then deletes
|
||||||
|
exec python3 "${BASE_DIR}/bin/cleanup_media.py" \
|
||||||
|
--target-dir "${BASE_DIR}/run" \
|
||||||
|
--target-dir "${BASE_DIR}/downloadfiles" \
|
||||||
|
--max-age {{ cleanup_settings.max_age_seconds | default(3600) }} \
|
||||||
|
--log-file /var/log/cleanup_media.log \
|
||||||
|
--s3-upload \
|
||||||
|
--s3-bucket "${S3_BUCKET:-stress-media-archive}" \
|
||||||
|
--s3-prefix "archived-media/$(hostname)" \
|
||||||
|
--s5cmd-path /usr/local/bin/s5cmd
|
||||||
|
else
|
||||||
|
# Simple cleanup mode - just truncate and rename
|
||||||
|
exec python3 "${BASE_DIR}/bin/cleanup_media.py" \
|
||||||
|
--target-dir "${BASE_DIR}/run" \
|
||||||
|
--target-dir "${BASE_DIR}/downloadfiles" \
|
||||||
|
--max-age {{ cleanup_settings.max_age_seconds | default(3600) }} \
|
||||||
|
--log-file /var/log/cleanup_media.log
|
||||||
|
fi
|
||||||
|
dest: "{{ base_dir }}/bin/cleanup_media_wrapper.sh"
|
||||||
|
owner: "{{ ansible_user }}"
|
||||||
|
group: "{{ deploy_group }}"
|
||||||
|
mode: '0755'
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Configure cron job for media cleanup
|
||||||
|
ansible.builtin.cron:
|
||||||
|
name: "Media cleanup - {{ base_dir }}"
|
||||||
|
minute: "0"
|
||||||
|
hour: "*"
|
||||||
|
job: "{{ base_dir }}/bin/cleanup_media_wrapper.sh 2>&1 | logger -t cleanup_media"
|
||||||
|
user: "{{ ansible_user }}"
|
||||||
|
state: "{{ 'present' if cleanup_settings.enabled | default(true) | bool else 'absent' }}"
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Display installation summary
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg: |
|
||||||
|
Media cleanup script installed successfully on {{ inventory_hostname }}
|
||||||
|
|
||||||
|
Configuration from cluster.green.yml:
|
||||||
|
- Base directory: {{ base_dir }}
|
||||||
|
- Enabled: {{ cleanup_settings.enabled | default(true) }}
|
||||||
|
- Cleanup mode: {{ cleanup_settings.mode | default('s-upload') }}
|
||||||
|
- Max age: {{ cleanup_settings.max_age_seconds | default(3600) }} seconds
|
||||||
|
- Cron schedule: Every hour (0 * * * *)
|
||||||
|
- Cron job state: {{ 'present' if cleanup_settings.enabled | default(true) | bool else 'absent' }}
|
||||||
|
- Log file: /var/log/cleanup_media.log
|
||||||
|
|
||||||
|
Note: You can override the cleanup mode for a single run by setting the
|
||||||
|
CLEANUP_MODE environment variable before executing the wrapper script.
|
||||||
|
e.g., CLEANUP_MODE=cleanup {{ base_dir }}/bin/cleanup_media_wrapper.sh
|
||||||
|
|
||||||
|
To view logs:
|
||||||
|
tail -f /var/log/cleanup_media.log
|
||||||
@ -1,7 +1,17 @@
|
|||||||
---
|
---
|
||||||
- name: Deploy Shadowsocks-Rust Proxy Configurations
|
- name: Deploy Shadowsocks-Rust Proxy Configurations
|
||||||
hosts: all
|
hosts: workers
|
||||||
gather_facts: yes
|
gather_facts: yes
|
||||||
|
vars_files:
|
||||||
|
- "group_vars/all/vault.yml"
|
||||||
|
pre_tasks:
|
||||||
|
- name: Set inventory_env fact
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
|
||||||
|
- name: Load environment-specific variables
|
||||||
|
ansible.builtin.include_vars: "{{ item }}"
|
||||||
|
with_fileglob:
|
||||||
|
- "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
|
||||||
tasks:
|
tasks:
|
||||||
- name: Deploy Shadowsocks-Rust proxy services
|
- name: Deploy Shadowsocks-Rust proxy services
|
||||||
block:
|
block:
|
||||||
@ -55,11 +65,18 @@
|
|||||||
path: /srv/shadowsocks-rust/docker-compose.yaml
|
path: /srv/shadowsocks-rust/docker-compose.yaml
|
||||||
state: absent
|
state: absent
|
||||||
|
|
||||||
- name: Force stop and remove known proxy containers to prevent conflicts
|
- name: Find and stop any container using the target proxy ports
|
||||||
community.docker.docker_container:
|
ansible.builtin.shell:
|
||||||
name: "{{ item.key }}"
|
cmd: |
|
||||||
state: absent
|
container_id=$(docker ps -aq --filter "publish={{ item.value.local_port }}")
|
||||||
|
if [ -n "$container_id" ]; then
|
||||||
|
echo "Found container ${container_id} using port {{ item.value.local_port }}. Stopping and removing it."
|
||||||
|
docker stop "${container_id}" >/dev/null 2>&1 || true
|
||||||
|
docker rm -f "${container_id}" >/dev/null 2>&1 || true
|
||||||
|
fi
|
||||||
loop: "{{ shadowsocks_proxies | dict2items }}"
|
loop: "{{ shadowsocks_proxies | dict2items }}"
|
||||||
|
register: stop_conflicting_containers
|
||||||
|
changed_when: "'Stopping and removing it' in stop_conflicting_containers.stdout"
|
||||||
loop_control:
|
loop_control:
|
||||||
label: "{{ item.key }}"
|
label: "{{ item.key }}"
|
||||||
|
|
||||||
|
|||||||
95
ansible/playbook-stress-auth-generator.yml
Normal file
95
ansible/playbook-stress-auth-generator.yml
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
---
|
||||||
|
- name: "STRESS-SETUP: Manage auth simulation generator"
|
||||||
|
hosts: workers
|
||||||
|
gather_facts: no
|
||||||
|
vars:
|
||||||
|
tmux_session_auth_gen: "stress-auth-{{ (profile_prefix | default('default')) | replace(',', '-') }}"
|
||||||
|
auth_policy: "policies/12_queue_auth_simulation.yaml"
|
||||||
|
vars_files:
|
||||||
|
- "group_vars/all/vault.yml"
|
||||||
|
pre_tasks:
|
||||||
|
- name: Set inventory_env fact
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
|
||||||
|
- name: Load environment-specific variables
|
||||||
|
ansible.builtin.include_vars: "{{ item }}"
|
||||||
|
with_fileglob:
|
||||||
|
- "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
|
||||||
|
tasks:
|
||||||
|
- name: Define base directory for node
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
base_dir: "{{ airflow_worker_dir }}"
|
||||||
|
|
||||||
|
- name: Validate profile_prefix is provided when starting or stopping
|
||||||
|
ansible.builtin.fail:
|
||||||
|
msg: "profile_prefix is required when start_generator=true or stop_generator=true"
|
||||||
|
when:
|
||||||
|
- (start_generator | default(false) | bool or stop_generator | default(false) | bool)
|
||||||
|
- profile_prefix is not defined
|
||||||
|
|
||||||
|
- name: "Display policy being used for auth generator"
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg: "Using auth generator policy: {{ auth_policy }}"
|
||||||
|
when: start_generator | default(false) | bool
|
||||||
|
|
||||||
|
- name: Manage auth generator process
|
||||||
|
ansible.builtin.include_tasks:
|
||||||
|
file: manage-processes-tasks.yml
|
||||||
|
vars:
|
||||||
|
tmux_session_name: "{{ tmux_session_auth_gen }}"
|
||||||
|
working_dir: "{{ base_dir }}"
|
||||||
|
command_to_run: >
|
||||||
|
./bin/ytops-client stress-policy
|
||||||
|
--policy {{ auth_policy }}
|
||||||
|
{% if dummy_batch | default(true) | bool %}--dummy-batch{% endif %}
|
||||||
|
{% if auth_min_seconds is defined %}--set 'settings.dummy_simulation_settings.auth_min_seconds={{ auth_min_seconds }}'{% endif %}
|
||||||
|
{% if auth_max_seconds is defined %}--set 'settings.dummy_simulation_settings.auth_max_seconds={{ auth_max_seconds }}'{% endif %}
|
||||||
|
{% if batch_size is defined %}--set 'queue_policy.batch_size={{ batch_size }}'{% endif %}
|
||||||
|
{% if create_download_tasks is defined %}--set 'queue_policy.create_download_tasks={{ create_download_tasks }}'{% endif %}
|
||||||
|
{% if formats_to_download is defined %}--set 'queue_policy.formats_to_download={{ formats_to_download }}'{% endif %}
|
||||||
|
{% if profile_prefix is defined %}--set 'execution_control.worker_pools=[{"profile_prefix": "{{ profile_prefix }}", "workers": 1}]'{% endif %}
|
||||||
|
--profile-prefix {{ profile_prefix }}
|
||||||
|
process_grep_pattern: "ytops-client.*stress-policy.*--policy {{ auth_policy }}.*--profile-prefix {{ profile_prefix }}"
|
||||||
|
start_process: "{{ start_generator | default(false) | bool }}"
|
||||||
|
stop_process: "{{ stop_generator | default(false) | bool }}"
|
||||||
|
check_status: "{{ vars.check_status | default(false) | bool }}"
|
||||||
|
|
||||||
|
- name: List active tmux sessions
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: tmux list-sessions 2>/dev/null || true
|
||||||
|
register: tmux_sessions
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: Display active sessions
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg: "Active tmux sessions: {{ tmux_sessions.stdout_lines }}"
|
||||||
|
|
||||||
|
- name: Check tmux session output for errors
|
||||||
|
block:
|
||||||
|
- name: Wait for a moment for the process to start
|
||||||
|
ansible.builtin.pause:
|
||||||
|
seconds: 2
|
||||||
|
|
||||||
|
- name: Capture tmux pane content
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: "tmux capture-pane -p -t {{ tmux_session_auth_gen }}"
|
||||||
|
register: tmux_output
|
||||||
|
changed_when: false
|
||||||
|
ignore_errors: true
|
||||||
|
|
||||||
|
- name: Display tmux pane content if session exists
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg: "Initial output from tmux session '{{ tmux_session_auth_gen }}':"
|
||||||
|
when: tmux_output.rc == 0
|
||||||
|
|
||||||
|
- name: Show output lines if session exists
|
||||||
|
ansible.builtin.debug:
|
||||||
|
var: tmux_output.stdout_lines
|
||||||
|
when: tmux_output.rc == 0
|
||||||
|
|
||||||
|
- name: Report if session not found
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg: "Tmux session '{{ tmux_session_auth_gen }}' was not found. It may have exited immediately upon starting."
|
||||||
|
when: tmux_output.rc != 0
|
||||||
|
|
||||||
|
when: start_generator | default(false) | bool
|
||||||
288
ansible/playbook-stress-control.yml
Normal file
288
ansible/playbook-stress-control.yml
Normal file
@ -0,0 +1,288 @@
|
|||||||
|
---
|
||||||
|
- name: "STRESS-SETUP: Unified control for stress test processes"
|
||||||
|
hosts: all
|
||||||
|
gather_facts: no
|
||||||
|
vars:
|
||||||
|
# Default action is status check
|
||||||
|
action: "status"
|
||||||
|
setup_policy: "policies/6_profile_setup_policy.yaml"
|
||||||
|
enforcer_policy: "policies/8_unified_simulation_enforcer.yaml"
|
||||||
|
master_only_actions:
|
||||||
|
- "check-enforcer"
|
||||||
|
- "profile-status"
|
||||||
|
- "run-command"
|
||||||
|
- "cleanup-profiles"
|
||||||
|
- "restart-monitoring"
|
||||||
|
vars_files:
|
||||||
|
- "group_vars/all/vault.yml"
|
||||||
|
pre_tasks:
|
||||||
|
- name: Set inventory_env fact
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
|
||||||
|
when: action not in master_only_actions or inventory_hostname in groups['master']
|
||||||
|
- name: Load environment-specific variables
|
||||||
|
ansible.builtin.include_vars: "{{ item }}"
|
||||||
|
with_fileglob:
|
||||||
|
- "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
|
||||||
|
when: action not in master_only_actions or inventory_hostname in groups['master']
|
||||||
|
tasks:
|
||||||
|
- name: Define base directory for node
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
base_dir: "{{ airflow_master_dir if inventory_hostname in groups['master'] else airflow_worker_dir }}"
|
||||||
|
when: action not in master_only_actions or inventory_hostname in groups['master']
|
||||||
|
|
||||||
|
- name: Check running processes (status action)
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: |
|
||||||
|
echo "=== Process status on {{ inventory_hostname }} ==="
|
||||||
|
echo "1. Tmux sessions:"
|
||||||
|
tmux list-sessions 2>/dev/null || echo "No tmux sessions"
|
||||||
|
echo ""
|
||||||
|
echo "2. ytops-client processes:"
|
||||||
|
ps aux | grep -E "ytops-client.*(profile|policy-enforcer|stress-policy)" | grep -v grep || echo "No ytops-client processes"
|
||||||
|
echo ""
|
||||||
|
echo "3. Python processes related to stress test:"
|
||||||
|
ps aux | grep -E "python.*(ytops|stress)" | grep -v grep || echo "No related python processes"
|
||||||
|
register: status_output
|
||||||
|
changed_when: false
|
||||||
|
when: action == "status"
|
||||||
|
|
||||||
|
- name: Display status
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg: "{{ status_output.stdout_lines }}"
|
||||||
|
when: action == "status"
|
||||||
|
|
||||||
|
- name: Check enforcer status (check-enforcer action)
|
||||||
|
block:
|
||||||
|
- name: Check for enforcer tmux session and process
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: |
|
||||||
|
echo "Enforcer status on {{ inventory_hostname }}:"
|
||||||
|
if tmux has-session -t stress-enforcer 2>/dev/null; then
|
||||||
|
echo " - Tmux session 'stress-enforcer' is RUNNING."
|
||||||
|
ps aux | grep -E "ytops-client.*policy-enforcer" | grep -v grep || echo " - WARNING: Tmux session exists, but no matching process found."
|
||||||
|
else
|
||||||
|
echo " - Tmux session 'stress-enforcer' is NOT RUNNING."
|
||||||
|
fi
|
||||||
|
register: enforcer_status
|
||||||
|
changed_when: false
|
||||||
|
- name: Display enforcer status
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg: "{{ enforcer_status.stdout_lines }}"
|
||||||
|
when: action == "check-enforcer"
|
||||||
|
delegate_to: "{{ groups['master'][0] }}"
|
||||||
|
run_once: true
|
||||||
|
|
||||||
|
- name: Run ytops-client profile list on master (profile-status action)
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: |
|
||||||
|
cd {{ airflow_master_dir }}
|
||||||
|
if [ -f .env ]; then
|
||||||
|
set -a && . ./.env && set +a
|
||||||
|
fi
|
||||||
|
timeout 10 ./bin/ytops-client profile list \
|
||||||
|
--auth-env sim_auth \
|
||||||
|
--download-env sim_download 2>&1
|
||||||
|
register: profile_list_output
|
||||||
|
changed_when: false
|
||||||
|
when:
|
||||||
|
- action == "profile-status"
|
||||||
|
- inventory_hostname in groups['master']
|
||||||
|
delegate_to: "{{ groups['master'][0] }}"
|
||||||
|
run_once: true
|
||||||
|
|
||||||
|
- name: Show profile list output lines
|
||||||
|
ansible.builtin.debug:
|
||||||
|
var: profile_list_output.stdout_lines
|
||||||
|
when:
|
||||||
|
- action == "profile-status"
|
||||||
|
- profile_list_output is defined
|
||||||
|
- inventory_hostname in groups['master']
|
||||||
|
|
||||||
|
- name: Run custom ytops-client command on master (run-command action)
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: |
|
||||||
|
cd {{ airflow_master_dir }}
|
||||||
|
if [ -f .env ]; then
|
||||||
|
set -a && . ./.env && set +a
|
||||||
|
fi
|
||||||
|
{{ command_to_run }}
|
||||||
|
register: command_output
|
||||||
|
changed_when: false
|
||||||
|
when:
|
||||||
|
- action == "run-command"
|
||||||
|
- inventory_hostname in groups['master']
|
||||||
|
- command_to_run is defined
|
||||||
|
delegate_to: "{{ groups['master'][0] }}"
|
||||||
|
run_once: true
|
||||||
|
|
||||||
|
- name: Display command output
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg: "Command output:"
|
||||||
|
when:
|
||||||
|
- action == "run-command"
|
||||||
|
- command_output is defined
|
||||||
|
|
||||||
|
- name: Show command output lines
|
||||||
|
ansible.builtin.debug:
|
||||||
|
var: command_output.stdout_lines
|
||||||
|
when:
|
||||||
|
- action == "run-command"
|
||||||
|
- command_output is defined
|
||||||
|
|
||||||
|
- name: "Display policy being used for profile cleanup"
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg: "Using setup policy for cleanup: {{ setup_policy }}"
|
||||||
|
when:
|
||||||
|
- action == "cleanup-profiles"
|
||||||
|
- inventory_hostname in groups['master']
|
||||||
|
delegate_to: "{{ groups['master'][0] }}"
|
||||||
|
run_once: true
|
||||||
|
|
||||||
|
- name: Cleanup profiles on master (cleanup-profiles action)
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: |
|
||||||
|
cd {{ airflow_master_dir }}
|
||||||
|
if [ -f .env ]; then
|
||||||
|
set -a && . ./.env && set +a
|
||||||
|
fi
|
||||||
|
./bin/ytops-client setup-profiles \
|
||||||
|
--policy {{ setup_policy }} \
|
||||||
|
--cleanup-all {% if profile_prefix is defined %}--profile-prefix {{ profile_prefix }}{% endif %}
|
||||||
|
register: cleanup_output
|
||||||
|
changed_when: false
|
||||||
|
when:
|
||||||
|
- action == "cleanup-profiles"
|
||||||
|
- inventory_hostname in groups['master']
|
||||||
|
delegate_to: "{{ groups['master'][0] }}"
|
||||||
|
run_once: true
|
||||||
|
|
||||||
|
- name: Display cleanup output
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg: "Cleanup output:"
|
||||||
|
when:
|
||||||
|
- action == "cleanup-profiles"
|
||||||
|
- cleanup_output is defined
|
||||||
|
|
||||||
|
- name: Show cleanup output lines
|
||||||
|
ansible.builtin.debug:
|
||||||
|
var: cleanup_output.stdout_lines
|
||||||
|
when:
|
||||||
|
- action == "cleanup-profiles"
|
||||||
|
- cleanup_output is defined
|
||||||
|
|
||||||
|
- name: Stop all stress test processes on all nodes (stop-all action)
|
||||||
|
block:
|
||||||
|
- name: Kill all tmux sessions starting with 'stress-'
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: |
|
||||||
|
for session in $(tmux list-sessions -F "#{session_name}" 2>/dev/null | grep -E "^stress-"); do
|
||||||
|
tmux kill-session -t "$session"
|
||||||
|
done || true
|
||||||
|
# Failsafe: kill any lingering tmux server processes for stress sessions
|
||||||
|
TMUX_PIDS_TO_KILL=$(ps aux | grep '[t]mux' | grep 'stress-' | grep -v 'grep' | grep -v 'ansible' | awk '{print $2}')
|
||||||
|
if [ -n "$TMUX_PIDS_TO_KILL" ]; then
|
||||||
|
kill -9 $TMUX_PIDS_TO_KILL >/dev/null 2>&1 || true
|
||||||
|
fi
|
||||||
|
ignore_errors: yes
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: Kill all ytops-client and related python processes
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: |
|
||||||
|
# Gracefully terminate processes by pattern
|
||||||
|
ps aux | grep -E "[y]tops-client.*(profile.*list|policy-enforcer|stress-policy)" | awk '{print $2}' | xargs kill >/dev/null 2>&1 || true
|
||||||
|
ps aux | grep -E "[p]ython.*ytops" | grep -v ansible | awk '{print $2}' | xargs kill >/dev/null 2>&1 || true
|
||||||
|
|
||||||
|
sleep 1 # Wait for graceful shutdown
|
||||||
|
|
||||||
|
# Force kill any remaining processes
|
||||||
|
ps aux | grep -E "[y]tops-client.*(profile.*list|policy-enforcer|stress-policy)" | awk '{print $2}' | xargs kill -9 >/dev/null 2>&1 || true
|
||||||
|
ps aux | grep -E "[p]ython.*ytops" | grep -v ansible | awk '{print $2}' | xargs kill -9 >/dev/null 2>&1 || true
|
||||||
|
ignore_errors: yes
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
when: action == "stop-all"
|
||||||
|
|
||||||
|
- name: Stop processes on targeted nodes only (stop-nodes action)
|
||||||
|
block:
|
||||||
|
- name: Kill all tmux sessions starting with 'stress-' on this node
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: |
|
||||||
|
for session in $(tmux list-sessions -F "#{session_name}" 2>/dev/null | grep -E "^stress-"); do
|
||||||
|
tmux kill-session -t "$session"
|
||||||
|
done || true
|
||||||
|
# Failsafe: kill any lingering tmux server processes for stress sessions
|
||||||
|
TMUX_PIDS_TO_KILL=$(ps aux | grep '[t]mux' | grep 'stress-' | grep -v 'grep' | grep -v 'ansible' | awk '{print $2}')
|
||||||
|
if [ -n "$TMUX_PIDS_TO_KILL" ]; then
|
||||||
|
kill -9 $TMUX_PIDS_TO_KILL >/dev/null 2>&1 || true
|
||||||
|
fi
|
||||||
|
ignore_errors: yes
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: Kill all ytops-client and related python processes on this node
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: |
|
||||||
|
# Gracefully terminate processes by pattern
|
||||||
|
ps aux | grep -E "[y]tops-client.*(profile.*list|policy-enforcer|stress-policy)" | awk '{print $2}' | xargs kill >/dev/null 2>&1 || true
|
||||||
|
ps aux | grep -E "[p]ython.*ytops" | grep -v ansible | awk '{print $2}' | xargs kill >/dev/null 2>&1 || true
|
||||||
|
|
||||||
|
sleep 1 # Wait for graceful shutdown
|
||||||
|
|
||||||
|
# Force kill any remaining processes
|
||||||
|
ps aux | grep -E "[y]tops-client.*(profile.*list|policy-enforcer|stress-policy)" | awk '{print $2}' | xargs kill -9 >/dev/null 2>&1 || true
|
||||||
|
ps aux | grep -E "[p]ython.*ytops" | grep -v ansible | awk '{print $2}' | xargs kill -9 >/dev/null 2>&1 || true
|
||||||
|
ignore_errors: yes
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
when: action == "stop-nodes"
|
||||||
|
|
||||||
|
- name: Restart monitoring and enforcer (restart-monitoring action)
|
||||||
|
block:
|
||||||
|
- name: Stop monitor process
|
||||||
|
ansible.builtin.include_tasks:
|
||||||
|
file: manage-processes-tasks.yml
|
||||||
|
vars:
|
||||||
|
tmux_session_name: "stress-monitor"
|
||||||
|
process_grep_pattern: "ytops-client.*profile.*list"
|
||||||
|
stop_process: true
|
||||||
|
|
||||||
|
- name: Stop enforcer process
|
||||||
|
ansible.builtin.include_tasks:
|
||||||
|
file: manage-processes-tasks.yml
|
||||||
|
vars:
|
||||||
|
tmux_session_name: "stress-enforcer"
|
||||||
|
process_grep_pattern: "ytops-client.*policy-enforcer.*{{ enforcer_policy }}"
|
||||||
|
stop_process: true
|
||||||
|
|
||||||
|
- name: Start monitor process
|
||||||
|
ansible.builtin.include_tasks:
|
||||||
|
file: manage-processes-tasks.yml
|
||||||
|
vars:
|
||||||
|
tmux_session_name: "stress-monitor"
|
||||||
|
working_dir: "{{ airflow_master_dir }}"
|
||||||
|
command_to_run: >
|
||||||
|
./bin/ytops-client profile list
|
||||||
|
--auth-env sim_auth
|
||||||
|
--download-env sim_download
|
||||||
|
--live
|
||||||
|
--no-blink
|
||||||
|
--show-reasons
|
||||||
|
process_grep_pattern: "ytops-client.*profile.*list"
|
||||||
|
start_process: true
|
||||||
|
|
||||||
|
- name: Start enforcer process
|
||||||
|
ansible.builtin.include_tasks:
|
||||||
|
file: manage-processes-tasks.yml
|
||||||
|
vars:
|
||||||
|
tmux_session_name: "stress-enforcer"
|
||||||
|
working_dir: "{{ airflow_master_dir }}"
|
||||||
|
command_to_run: >
|
||||||
|
./bin/ytops-client policy-enforcer
|
||||||
|
--policy {{ enforcer_policy }}
|
||||||
|
--live
|
||||||
|
process_grep_pattern: "ytops-client.*policy-enforcer.*{{ enforcer_policy }}"
|
||||||
|
start_process: true
|
||||||
|
when:
|
||||||
|
- action == "restart-monitoring"
|
||||||
|
- inventory_hostname == groups['master'][0]
|
||||||
93
ansible/playbook-stress-download-simulation.yml
Normal file
93
ansible/playbook-stress-download-simulation.yml
Normal file
@ -0,0 +1,93 @@
|
|||||||
|
---
|
||||||
|
- name: "STRESS-SETUP: Manage download simulation"
|
||||||
|
hosts: workers
|
||||||
|
gather_facts: no
|
||||||
|
vars:
|
||||||
|
tmux_session_download: "stress-download-{{ (profile_prefix | default('default')) | replace(',', '-') }}"
|
||||||
|
download_policy: "policies/11_direct_docker_download_simulation.yaml"
|
||||||
|
vars_files:
|
||||||
|
- "group_vars/all/vault.yml"
|
||||||
|
pre_tasks:
|
||||||
|
- name: Set inventory_env fact
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
|
||||||
|
- name: Load environment-specific variables
|
||||||
|
ansible.builtin.include_vars: "{{ item }}"
|
||||||
|
with_fileglob:
|
||||||
|
- "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
|
||||||
|
tasks:
|
||||||
|
- name: Define base directory for node
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
base_dir: "{{ airflow_worker_dir }}"
|
||||||
|
|
||||||
|
- name: Validate profile_prefix is provided when starting or stopping
|
||||||
|
ansible.builtin.fail:
|
||||||
|
msg: "profile_prefix is required when start_download=true or stop_download=true"
|
||||||
|
when:
|
||||||
|
- (start_download | default(false) | bool or stop_download | default(false) | bool)
|
||||||
|
- profile_prefix is not defined
|
||||||
|
|
||||||
|
- name: "Display policy being used for download simulation"
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg: "Using download simulation policy: {{ download_policy }}"
|
||||||
|
when: start_download | default(false) | bool
|
||||||
|
|
||||||
|
- name: Manage download simulation process
|
||||||
|
ansible.builtin.include_tasks:
|
||||||
|
file: manage-processes-tasks.yml
|
||||||
|
vars:
|
||||||
|
tmux_session_name: "{{ tmux_session_download }}"
|
||||||
|
working_dir: "{{ base_dir }}"
|
||||||
|
command_to_run: >
|
||||||
|
./bin/ytops-client stress-policy
|
||||||
|
--policy {{ download_policy }}
|
||||||
|
{% if dummy_batch | default(true) | bool %}--dummy-batch{% endif %}
|
||||||
|
{% if download_min_seconds is defined %}--set 'settings.dummy_simulation_settings.download_min_seconds={{ download_min_seconds }}'{% endif %}
|
||||||
|
{% if download_max_seconds is defined %}--set 'settings.dummy_simulation_settings.download_max_seconds={{ download_max_seconds }}'{% endif %}
|
||||||
|
{% if profile_prefix is defined %}--set 'execution_control.worker_pools=[{"profile_prefix": "{{ profile_prefix }}", "workers": 1}]'{% endif %}
|
||||||
|
{% for setting in (extra_set_args | default('[]')) | from_yaml %}--set '{{ setting }}' {% endfor %}
|
||||||
|
--profile-prefix {{ profile_prefix }}
|
||||||
|
process_grep_pattern: "ytops-client.*stress-policy.*--policy {{ download_policy }}.*--profile-prefix {{ profile_prefix }}"
|
||||||
|
start_process: "{{ start_download | default(false) | bool }}"
|
||||||
|
stop_process: "{{ stop_download | default(false) | bool }}"
|
||||||
|
check_status: "{{ vars.check_status | default(false) | bool }}"
|
||||||
|
|
||||||
|
- name: List active tmux sessions
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: tmux list-sessions 2>/dev/null || true
|
||||||
|
register: tmux_sessions
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: Display active sessions
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg: "Active tmux sessions on {{ inventory_hostname }}: {{ tmux_sessions.stdout_lines }}"
|
||||||
|
|
||||||
|
- name: Check tmux session output for errors
|
||||||
|
block:
|
||||||
|
- name: Wait for a moment for the process to start
|
||||||
|
ansible.builtin.pause:
|
||||||
|
seconds: 2
|
||||||
|
|
||||||
|
- name: Capture tmux pane content
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: "tmux capture-pane -p -t {{ tmux_session_download }}"
|
||||||
|
register: tmux_output
|
||||||
|
changed_when: false
|
||||||
|
ignore_errors: true
|
||||||
|
|
||||||
|
- name: Display tmux pane content if session exists
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg: "Initial output from tmux session '{{ tmux_session_download }}':"
|
||||||
|
when: tmux_output.rc == 0
|
||||||
|
|
||||||
|
- name: Show output lines if session exists
|
||||||
|
ansible.builtin.debug:
|
||||||
|
var: tmux_output.stdout_lines
|
||||||
|
when: tmux_output.rc == 0
|
||||||
|
|
||||||
|
- name: Report if session not found
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg: "Tmux session '{{ tmux_session_download }}' was not found. It may have exited immediately upon starting."
|
||||||
|
when: tmux_output.rc != 0
|
||||||
|
|
||||||
|
when: start_download | default(false) | bool
|
||||||
@ -3,12 +3,19 @@
|
|||||||
hosts: all
|
hosts: all
|
||||||
gather_facts: no
|
gather_facts: no
|
||||||
vars_files:
|
vars_files:
|
||||||
- "group_vars/all/generated_vars.stress.yml"
|
|
||||||
- "group_vars/all/vault.yml"
|
- "group_vars/all/vault.yml"
|
||||||
|
pre_tasks:
|
||||||
|
- name: Set inventory_env fact
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
|
||||||
|
- name: Load environment-specific variables
|
||||||
|
ansible.builtin.include_vars: "{{ item }}"
|
||||||
|
with_fileglob:
|
||||||
|
- "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
|
||||||
tasks:
|
tasks:
|
||||||
- name: Define base directory for node
|
- name: Define base directory for node
|
||||||
ansible.builtin.set_fact:
|
ansible.builtin.set_fact:
|
||||||
base_dir: "{{ airflow_master_dir if inventory_hostname in groups['airflow_master'] else airflow_worker_dir }}"
|
base_dir: "{{ airflow_master_dir if inventory_hostname in groups['master'] else airflow_worker_dir }}"
|
||||||
|
|
||||||
- name: Create .env file for stress test environment
|
- name: Create .env file for stress test environment
|
||||||
ansible.builtin.template:
|
ansible.builtin.template:
|
||||||
@ -18,15 +25,3 @@
|
|||||||
group: "{{ deploy_group }}"
|
group: "{{ deploy_group }}"
|
||||||
mode: '0644'
|
mode: '0644'
|
||||||
become: yes
|
become: yes
|
||||||
|
|
||||||
- name: Ensure REDIS_PORT is set in .env file
|
|
||||||
ansible.builtin.lineinfile:
|
|
||||||
path: "{{ base_dir }}/.env"
|
|
||||||
line: "REDIS_PORT={{ redis_port }}"
|
|
||||||
regexp: "^REDIS_PORT="
|
|
||||||
state: present
|
|
||||||
create: yes
|
|
||||||
owner: "{{ ansible_user }}"
|
|
||||||
group: "{{ deploy_group }}"
|
|
||||||
mode: '0644'
|
|
||||||
become: yes
|
|
||||||
|
|||||||
61
ansible/playbook-stress-init-redis.yml
Normal file
61
ansible/playbook-stress-init-redis.yml
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
---
|
||||||
|
- name: "STRESS-SETUP: Initialize Redis with profiles and policies"
|
||||||
|
hosts: master
|
||||||
|
gather_facts: no
|
||||||
|
vars:
|
||||||
|
setup_policy: "policies/6_profile_setup_policy.yaml"
|
||||||
|
vars_files:
|
||||||
|
- "group_vars/all/vault.yml"
|
||||||
|
pre_tasks:
|
||||||
|
- name: Set inventory_env fact
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
|
||||||
|
- name: Load environment-specific variables
|
||||||
|
ansible.builtin.include_vars: "{{ item }}"
|
||||||
|
with_fileglob:
|
||||||
|
- "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
|
||||||
|
tasks:
|
||||||
|
- name: Check if Redis is running
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: "redis-cli -h {{ hostvars[groups['master'][0]].ansible_host }} -p {{ redis_port }} {% if use_redis_password | default(true) | string | lower == 'true' %}-a {{ vault_redis_password }}{% endif %} ping 2>&1 | grep -q PONG"
|
||||||
|
register: redis_check
|
||||||
|
ignore_errors: yes
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: Ensure Redis is accessible
|
||||||
|
ansible.builtin.fail:
|
||||||
|
msg: "Redis is not accessible on master node. Please ensure Redis service is running on {{ hostvars[groups['master'][0]].ansible_host }}:{{ redis_port }}"
|
||||||
|
when: redis_check.rc != 0
|
||||||
|
|
||||||
|
- name: Stop any running ytops-client processes on master
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: ps aux | grep "[y]tops-client" | awk '{print $2}' | xargs kill -9 >/dev/null 2>&1 || true
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: "Display policy being used for Redis initialization"
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg: "Using setup policy: {{ setup_policy }}"
|
||||||
|
|
||||||
|
- name: Initialize Redis profiles and policies
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: |
|
||||||
|
cd {{ airflow_master_dir }}
|
||||||
|
./bin/ytops-client setup-profiles \
|
||||||
|
--policy {{ setup_policy }} \
|
||||||
|
--cleanup-all
|
||||||
|
environment:
|
||||||
|
REDIS_HOST: "{{ hostvars[groups['master'][0]].ansible_host }}"
|
||||||
|
REDIS_PORT: "{{ redis_port }}"
|
||||||
|
REDIS_PASSWORD: "{{ vault_redis_password if use_redis_password | default(true) | string | lower == 'true' else '' }}"
|
||||||
|
register: init_result
|
||||||
|
changed_when: init_result.rc == 0
|
||||||
|
|
||||||
|
- name: Display initialization result
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg: "Redis profile initialization completed successfully"
|
||||||
|
when: init_result.rc == 0
|
||||||
|
|
||||||
|
- name: Handle initialization failure
|
||||||
|
ansible.builtin.fail:
|
||||||
|
msg: "Failed to initialize Redis profiles: {{ init_result.stderr }}"
|
||||||
|
when: init_result.rc != 0
|
||||||
@ -3,9 +3,15 @@
|
|||||||
hosts: all
|
hosts: all
|
||||||
gather_facts: yes
|
gather_facts: yes
|
||||||
vars_files:
|
vars_files:
|
||||||
- "group_vars/all/generated_vars.stress.yml"
|
|
||||||
- "group_vars/all/vault.yml"
|
- "group_vars/all/vault.yml"
|
||||||
pre_tasks:
|
pre_tasks:
|
||||||
|
- name: Set inventory_env fact
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
|
||||||
|
- name: Load environment-specific variables
|
||||||
|
ansible.builtin.include_vars: "{{ item }}"
|
||||||
|
with_fileglob:
|
||||||
|
- "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
|
||||||
- name: Ensure python3-pip is installed
|
- name: Ensure python3-pip is installed
|
||||||
block:
|
block:
|
||||||
- name: Install prerequisites for managing repositories
|
- name: Install prerequisites for managing repositories
|
||||||
@ -27,17 +33,22 @@
|
|||||||
become: yes
|
become: yes
|
||||||
|
|
||||||
tasks:
|
tasks:
|
||||||
- name: Install required Python packages
|
- name: Define base directory for node
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
base_dir: "{{ airflow_master_dir if inventory_hostname in groups['master'] else airflow_worker_dir }}"
|
||||||
|
|
||||||
|
- name: Install required Python packages from requirements.txt
|
||||||
ansible.builtin.pip:
|
ansible.builtin.pip:
|
||||||
name:
|
requirements: "{{ base_dir }}/ytops_client/requirements.txt"
|
||||||
- python-dotenv
|
extra_args: "--ignore-installed"
|
||||||
- aria2p
|
become: yes
|
||||||
- tabulate
|
environment:
|
||||||
- redis
|
PIP_BREAK_SYSTEM_PACKAGES: "1"
|
||||||
- PyYAML
|
|
||||||
- aiothrift
|
- name: Explicitly install the thrift package
|
||||||
- PySocks
|
ansible.builtin.pip:
|
||||||
state: present
|
name: thrift
|
||||||
|
extra_args: "--ignore-installed"
|
||||||
become: yes
|
become: yes
|
||||||
environment:
|
environment:
|
||||||
PIP_BREAK_SYSTEM_PACKAGES: "1"
|
PIP_BREAK_SYSTEM_PACKAGES: "1"
|
||||||
|
|||||||
113
ansible/playbook-stress-lifecycle.yml
Normal file
113
ansible/playbook-stress-lifecycle.yml
Normal file
@ -0,0 +1,113 @@
|
|||||||
|
---
|
||||||
|
- name: "STRESS-SETUP: Manage full worker lifecycle based on inventory"
|
||||||
|
hosts: workers
|
||||||
|
gather_facts: no
|
||||||
|
vars:
|
||||||
|
# Default action
|
||||||
|
action: "status" # Available actions: start, stop, status
|
||||||
|
|
||||||
|
tasks:
|
||||||
|
- name: "Start all configured generators and simulators"
|
||||||
|
when: action == "start"
|
||||||
|
block:
|
||||||
|
- name: "Set combined profile prefixes string"
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
combined_prefixes: "{{ profile_prefixes | default([]) | join(',') }}"
|
||||||
|
when: profile_prefixes is defined and profile_prefixes | length > 0
|
||||||
|
|
||||||
|
- name: "Start single auth generator for all profiles: {{ combined_prefixes | default('none') }}"
|
||||||
|
ansible.builtin.command: >-
|
||||||
|
ansible-playbook {{ playbook_dir }}/playbook-stress-auth-generator.yml
|
||||||
|
-i {{ inventory_file }}
|
||||||
|
--limit {{ inventory_hostname }}
|
||||||
|
-e "start_generator=true"
|
||||||
|
-e "profile_prefix={{ combined_prefixes }}"
|
||||||
|
{% if dummy_batch is defined %}-e "dummy_batch={{ dummy_batch }}"{% endif %}
|
||||||
|
{% if auth_min_seconds is defined %}-e "auth_min_seconds={{ auth_min_seconds }}"{% endif %}
|
||||||
|
{% if auth_max_seconds is defined %}-e "auth_max_seconds={{ auth_max_seconds }}"{% endif %}
|
||||||
|
{% if batch_size is defined %}-e "batch_size={{ batch_size }}"{% endif %}
|
||||||
|
{% if create_download_tasks is defined %}-e "create_download_tasks={{ create_download_tasks }}"{% endif %}
|
||||||
|
{% if formats_to_download is defined %}-e "formats_to_download={{ formats_to_download }}"{% endif %}
|
||||||
|
delegate_to: localhost
|
||||||
|
changed_when: true
|
||||||
|
when: profile_prefixes is defined and profile_prefixes | length > 0
|
||||||
|
|
||||||
|
- name: "Start single download simulator for all profiles: {{ combined_prefixes | default('none') }}"
|
||||||
|
ansible.builtin.command: >-
|
||||||
|
ansible-playbook {{ playbook_dir }}/playbook-stress-download-simulation.yml
|
||||||
|
-i {{ inventory_file }}
|
||||||
|
--limit {{ inventory_hostname }}
|
||||||
|
-e "start_download=true"
|
||||||
|
-e "profile_prefix={{ combined_prefixes }}"
|
||||||
|
{% if dummy_batch is defined %}-e "dummy_batch={{ dummy_batch }}"{% endif %}
|
||||||
|
{% if download_min_seconds is defined %}-e "download_min_seconds={{ download_min_seconds }}"{% endif %}
|
||||||
|
{% if download_max_seconds is defined %}-e "download_max_seconds={{ download_max_seconds }}"{% endif %}
|
||||||
|
{% if extra_set_args is defined %}-e 'extra_set_args={{ extra_set_args | to_json }}'{% endif %}
|
||||||
|
delegate_to: localhost
|
||||||
|
changed_when: true
|
||||||
|
when: profile_prefixes is defined and profile_prefixes | length > 0
|
||||||
|
|
||||||
|
- name: "Stop all worker generators and simulators"
|
||||||
|
when: action == "stop"
|
||||||
|
block:
|
||||||
|
- name: Kill all tmux sessions starting with 'stress-' on this worker
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: |
|
||||||
|
for session in $(tmux list-sessions -F "#{session_name}" 2>/dev/null | grep -E "^stress-"); do
|
||||||
|
tmux kill-session -t "$session"
|
||||||
|
done || true
|
||||||
|
ignore_errors: yes
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: Kill all ytops-client processes on this worker
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: |
|
||||||
|
# Gracefully terminate
|
||||||
|
ps aux | grep "[y]tops-client.*stress-policy" | awk '{print $2}' | xargs kill >/dev/null 2>&1 || true
|
||||||
|
sleep 0.5
|
||||||
|
# Force kill
|
||||||
|
ps aux | grep "[y]tops-client.*stress-policy" | awk '{print $2}' | xargs kill -9 >/dev/null 2>&1 || true
|
||||||
|
ignore_errors: yes
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: "Check status of all configured generators and simulators"
|
||||||
|
when: action == "status"
|
||||||
|
block:
|
||||||
|
- name: "Set combined profile prefixes string"
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
combined_prefixes: "{{ profile_prefixes | default([]) | join(',') }}"
|
||||||
|
when: profile_prefixes is defined and profile_prefixes | length > 0
|
||||||
|
|
||||||
|
- name: "Check single auth generator for all profiles: {{ combined_prefixes | default('none') }}"
|
||||||
|
ansible.builtin.command: >-
|
||||||
|
ansible-playbook {{ playbook_dir }}/playbook-stress-auth-generator.yml
|
||||||
|
-i {{ inventory_file }}
|
||||||
|
--limit {{ inventory_hostname }}
|
||||||
|
-e "check_status=true"
|
||||||
|
-e "profile_prefix={{ combined_prefixes }}"
|
||||||
|
delegate_to: localhost
|
||||||
|
changed_when: false
|
||||||
|
when: profile_prefixes is defined and profile_prefixes | length > 0
|
||||||
|
register: auth_status_check
|
||||||
|
|
||||||
|
- name: "Display auth generator status for {{ inventory_hostname }}"
|
||||||
|
ansible.builtin.debug:
|
||||||
|
var: auth_status_check.stdout_lines
|
||||||
|
when: auth_status_check is defined
|
||||||
|
|
||||||
|
- name: "Check single download simulator for all profiles: {{ combined_prefixes | default('none') }}"
|
||||||
|
ansible.builtin.command: >-
|
||||||
|
ansible-playbook {{ playbook_dir }}/playbook-stress-download-simulation.yml
|
||||||
|
-i {{ inventory_file }}
|
||||||
|
--limit {{ inventory_hostname }}
|
||||||
|
-e "check_status=true"
|
||||||
|
-e "profile_prefix={{ combined_prefixes }}"
|
||||||
|
delegate_to: localhost
|
||||||
|
changed_when: false
|
||||||
|
when: profile_prefixes is defined and profile_prefixes | length > 0
|
||||||
|
register: download_status_check
|
||||||
|
|
||||||
|
- name: "Display download simulator status for {{ inventory_hostname }}"
|
||||||
|
ansible.builtin.debug:
|
||||||
|
var: download_status_check.stdout_lines
|
||||||
|
when: download_status_check is defined
|
||||||
111
ansible/playbook-stress-manage-processes.yml
Normal file
111
ansible/playbook-stress-manage-processes.yml
Normal file
@ -0,0 +1,111 @@
|
|||||||
|
---
|
||||||
|
- name: "STRESS-SETUP: Manage tmux sessions for monitoring and enforcer"
|
||||||
|
hosts: master
|
||||||
|
gather_facts: no
|
||||||
|
vars:
|
||||||
|
tmux_session_monitor: "stress-monitor"
|
||||||
|
tmux_session_enforcer: "stress-enforcer"
|
||||||
|
enforcer_policy: "policies/8_unified_simulation_enforcer.yaml"
|
||||||
|
vars_files:
|
||||||
|
- "group_vars/all/vault.yml"
|
||||||
|
pre_tasks:
|
||||||
|
- name: Set inventory_env fact
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
|
||||||
|
- name: Load environment-specific variables
|
||||||
|
ansible.builtin.include_vars: "{{ item }}"
|
||||||
|
with_fileglob:
|
||||||
|
- "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
|
||||||
|
tasks:
|
||||||
|
- name: Manage monitor process
|
||||||
|
ansible.builtin.include_tasks:
|
||||||
|
file: manage-processes-tasks.yml
|
||||||
|
vars:
|
||||||
|
tmux_session_name: "{{ tmux_session_monitor }}"
|
||||||
|
working_dir: "{{ airflow_master_dir }}"
|
||||||
|
command_to_run: >
|
||||||
|
./bin/ytops-client profile list
|
||||||
|
--auth-env sim_auth
|
||||||
|
--download-env sim_download
|
||||||
|
--live
|
||||||
|
--no-blink
|
||||||
|
--show-reasons
|
||||||
|
process_grep_pattern: "ytops-client.*profile.*list"
|
||||||
|
start_process: "{{ start_monitor | default(false) | bool }}"
|
||||||
|
stop_process: "{{ stop_sessions | default(false) | bool or stop_monitor | default(false) | bool }}"
|
||||||
|
check_status: "{{ vars.check_status | default(false) | bool }}"
|
||||||
|
|
||||||
|
- name: Check monitor session output for errors
|
||||||
|
block:
|
||||||
|
- name: Wait for a moment for the process to start
|
||||||
|
ansible.builtin.pause:
|
||||||
|
seconds: 2
|
||||||
|
- name: Capture tmux pane content
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: "tmux capture-pane -p -t {{ tmux_session_monitor }}"
|
||||||
|
register: tmux_output
|
||||||
|
changed_when: false
|
||||||
|
ignore_errors: true
|
||||||
|
- name: Display tmux pane content if session exists
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg: "Initial output from tmux session '{{ tmux_session_monitor }}':"
|
||||||
|
when: tmux_output.rc == 0
|
||||||
|
- name: Show output lines if session exists
|
||||||
|
ansible.builtin.debug:
|
||||||
|
var: tmux_output.stdout_lines
|
||||||
|
when: tmux_output.rc == 0
|
||||||
|
- name: Report if session not found
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg: "Tmux session '{{ tmux_session_monitor }}' was not found."
|
||||||
|
when: tmux_output.rc != 0
|
||||||
|
when: start_monitor | default(false) | bool
|
||||||
|
|
||||||
|
- name: Manage enforcer process
|
||||||
|
ansible.builtin.include_tasks:
|
||||||
|
file: manage-processes-tasks.yml
|
||||||
|
vars:
|
||||||
|
tmux_session_name: "{{ tmux_session_enforcer }}"
|
||||||
|
working_dir: "{{ airflow_master_dir }}"
|
||||||
|
command_to_run: >
|
||||||
|
./bin/ytops-client policy-enforcer
|
||||||
|
--policy {{ enforcer_policy }}
|
||||||
|
--live
|
||||||
|
process_grep_pattern: "ytops-client.*policy-enforcer.*{{ enforcer_policy }}"
|
||||||
|
start_process: "{{ start_enforcer | default(false) | bool }}"
|
||||||
|
stop_process: "{{ stop_sessions | default(false) | bool or stop_enforcer | default(false) | bool }}"
|
||||||
|
check_status: "{{ vars.check_status | default(false) | bool }}"
|
||||||
|
|
||||||
|
- name: Check enforcer session output for errors
|
||||||
|
block:
|
||||||
|
- name: Wait for a moment for the process to start
|
||||||
|
ansible.builtin.pause:
|
||||||
|
seconds: 2
|
||||||
|
- name: Capture tmux pane content
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: "tmux capture-pane -p -t {{ tmux_session_enforcer }}"
|
||||||
|
register: tmux_output
|
||||||
|
changed_when: false
|
||||||
|
ignore_errors: true
|
||||||
|
- name: Display tmux pane content if session exists
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg: "Initial output from tmux session '{{ tmux_session_enforcer }}':"
|
||||||
|
when: tmux_output.rc == 0
|
||||||
|
- name: Show output lines if session exists
|
||||||
|
ansible.builtin.debug:
|
||||||
|
var: tmux_output.stdout_lines
|
||||||
|
when: tmux_output.rc == 0
|
||||||
|
- name: Report if session not found
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg: "Tmux session '{{ tmux_session_enforcer }}' was not found."
|
||||||
|
when: tmux_output.rc != 0
|
||||||
|
when: start_enforcer | default(false) | bool
|
||||||
|
|
||||||
|
- name: List active tmux sessions
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: tmux list-sessions 2>/dev/null || true
|
||||||
|
register: tmux_sessions
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: Display active sessions
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg: "Active tmux sessions: {{ tmux_sessions.stdout_lines }}"
|
||||||
@ -2,13 +2,22 @@
|
|||||||
- name: "STRESS-SETUP: Sync Local Code"
|
- name: "STRESS-SETUP: Sync Local Code"
|
||||||
hosts: all
|
hosts: all
|
||||||
gather_facts: no
|
gather_facts: no
|
||||||
|
vars:
|
||||||
|
ytops_source_dir: "{{ playbook_dir }}/../ytops_client-source"
|
||||||
vars_files:
|
vars_files:
|
||||||
- "group_vars/all/generated_vars.stress.yml"
|
|
||||||
- "group_vars/all/vault.yml"
|
- "group_vars/all/vault.yml"
|
||||||
|
pre_tasks:
|
||||||
|
- name: Set inventory_env fact
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
|
||||||
|
- name: Load environment-specific variables
|
||||||
|
ansible.builtin.include_vars: "{{ item }}"
|
||||||
|
with_fileglob:
|
||||||
|
- "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
|
||||||
tasks:
|
tasks:
|
||||||
- name: Define base directory for node
|
- name: Define base directory for node
|
||||||
ansible.builtin.set_fact:
|
ansible.builtin.set_fact:
|
||||||
base_dir: "{{ airflow_master_dir if inventory_hostname in groups['airflow_master'] else airflow_worker_dir }}"
|
base_dir: "{{ airflow_master_dir if inventory_hostname in groups['master'] else airflow_worker_dir }}"
|
||||||
|
|
||||||
- name: Ensure base directory exists for code sync
|
- name: Ensure base directory exists for code sync
|
||||||
ansible.builtin.file:
|
ansible.builtin.file:
|
||||||
@ -21,7 +30,7 @@
|
|||||||
|
|
||||||
- name: Sync python packages and directories for stress testing
|
- name: Sync python packages and directories for stress testing
|
||||||
ansible.posix.synchronize:
|
ansible.posix.synchronize:
|
||||||
src: "../ytops_client-source/{{ item }}/"
|
src: "{{ ytops_source_dir }}/{{ item }}/"
|
||||||
dest: "{{ base_dir }}/{{ item }}/"
|
dest: "{{ base_dir }}/{{ item }}/"
|
||||||
rsync_opts:
|
rsync_opts:
|
||||||
- "--delete"
|
- "--delete"
|
||||||
@ -42,7 +51,7 @@
|
|||||||
|
|
||||||
- name: Sync client utility scripts and configs
|
- name: Sync client utility scripts and configs
|
||||||
ansible.posix.synchronize:
|
ansible.posix.synchronize:
|
||||||
src: "../ytops_client-source/{{ item }}"
|
src: "{{ ytops_source_dir }}/{{ item }}"
|
||||||
dest: "{{ base_dir }}/{{ item }}"
|
dest: "{{ base_dir }}/{{ item }}"
|
||||||
perms: yes
|
perms: yes
|
||||||
loop:
|
loop:
|
||||||
@ -56,3 +65,4 @@
|
|||||||
- "ytdlp.json"
|
- "ytdlp.json"
|
||||||
become: yes
|
become: yes
|
||||||
become_user: "{{ ansible_user }}"
|
become_user: "{{ ansible_user }}"
|
||||||
|
|
||||||
|
|||||||
58
ansible/playbook-stress-sync-policies.yml
Normal file
58
ansible/playbook-stress-sync-policies.yml
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
---
|
||||||
|
- name: "STRESS-SETUP: Sync Policies and CLI Configs"
|
||||||
|
hosts: all
|
||||||
|
gather_facts: no
|
||||||
|
vars:
|
||||||
|
ytops_source_dir: "{{ playbook_dir }}/../ytops_client-source"
|
||||||
|
vars_files:
|
||||||
|
- "group_vars/all/vault.yml"
|
||||||
|
pre_tasks:
|
||||||
|
- name: Set inventory_env fact
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
|
||||||
|
- name: Load environment-specific variables
|
||||||
|
ansible.builtin.include_vars: "{{ item }}"
|
||||||
|
with_fileglob:
|
||||||
|
- "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
|
||||||
|
tasks:
|
||||||
|
- name: Define base directory for node
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
base_dir: "{{ airflow_master_dir if inventory_hostname in groups['master'] else airflow_worker_dir }}"
|
||||||
|
|
||||||
|
- name: Ensure policies directory exists
|
||||||
|
ansible.builtin.file:
|
||||||
|
path: "{{ base_dir }}/policies"
|
||||||
|
state: directory
|
||||||
|
owner: "{{ ansible_user }}"
|
||||||
|
group: "{{ deploy_group }}"
|
||||||
|
mode: '0755'
|
||||||
|
become: yes
|
||||||
|
|
||||||
|
- name: Sync policies directory only
|
||||||
|
ansible.posix.synchronize:
|
||||||
|
src: "{{ ytops_source_dir }}/policies/"
|
||||||
|
dest: "{{ base_dir }}/policies/"
|
||||||
|
rsync_opts:
|
||||||
|
- "--delete"
|
||||||
|
- "--exclude=.DS_Store"
|
||||||
|
- "--exclude=__pycache__"
|
||||||
|
- "--exclude='*.pyc'"
|
||||||
|
recursive: yes
|
||||||
|
perms: yes
|
||||||
|
become: yes
|
||||||
|
become_user: "{{ ansible_user }}"
|
||||||
|
|
||||||
|
- name: Sync client CLI config files
|
||||||
|
ansible.posix.synchronize:
|
||||||
|
src: "{{ ytops_source_dir }}/{{ item }}"
|
||||||
|
dest: "{{ base_dir }}/{{ item }}"
|
||||||
|
perms: yes
|
||||||
|
loop:
|
||||||
|
- "cli.auth.config"
|
||||||
|
- "cli.download.config"
|
||||||
|
become: yes
|
||||||
|
become_user: "{{ ansible_user }}"
|
||||||
|
|
||||||
|
- name: Display sync completion
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg: "Policies and CLI configs synced to {{ base_dir }}"
|
||||||
@ -1,8 +1,16 @@
|
|||||||
# This file is managed by Ansible for the stress test environment.
|
# This file is managed by Ansible for the stress test environment.
|
||||||
# --- Network Settings ---
|
# --- Network Settings ---
|
||||||
REDIS_HOST={{ hostvars[groups['airflow_master'][0]].ansible_host }}
|
REDIS_HOST={{ hostvars[groups['master'][0]].ansible_host }}
|
||||||
|
REDIS_PORT={{ vault_redis_port }}
|
||||||
REDIS_PASSWORD={{ vault_redis_password }}
|
REDIS_PASSWORD={{ vault_redis_password }}
|
||||||
|
|
||||||
|
# --- S3 Storage Configuration ---
|
||||||
|
S3_ACCESS_KEY={{ vault_s3_delivery_access_key_id }}
|
||||||
|
S3_SECRET_KEY={{ vault_s3_delivery_secret_access_key }}
|
||||||
|
S3_ENDPOINT={{ vault_s3_delivery_endpoint }}
|
||||||
|
S3_BUCKET={{ vault_s3_delivery_bucket }}
|
||||||
|
AWS_REGION={{ vault_s3_delivery_aws_region }}
|
||||||
|
|
||||||
# --- Account Manager Configuration ---
|
# --- Account Manager Configuration ---
|
||||||
ACCOUNT_ACTIVE_DURATION_MIN=7
|
ACCOUNT_ACTIVE_DURATION_MIN=7
|
||||||
ACCOUNT_COOLDOWN_DURATION_MIN=30
|
ACCOUNT_COOLDOWN_DURATION_MIN=30
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
# Template for stress test master services
|
# Template for stress test master services
|
||||||
name: "stress-services"
|
name: "stress-services"
|
||||||
services:
|
services:
|
||||||
|
{% if inventory_hostname in groups['master'] %}
|
||||||
redis:
|
redis:
|
||||||
image: redis:7-alpine
|
image: redis:7-alpine
|
||||||
container_name: stress-redis
|
container_name: stress-redis
|
||||||
@ -26,7 +27,7 @@ services:
|
|||||||
command: server /data --console-address ":9001"
|
command: server /data --console-address ":9001"
|
||||||
networks:
|
networks:
|
||||||
- {{ docker_network_name }}
|
- {{ docker_network_name }}
|
||||||
|
{% endif %}
|
||||||
bgutil-provider:
|
bgutil-provider:
|
||||||
image: brainicism/bgutil-ytdlp-pot-provider
|
image: brainicism/bgutil-ytdlp-pot-provider
|
||||||
container_name: bgutil-provider
|
container_name: bgutil-provider
|
||||||
|
|||||||
@ -11,7 +11,6 @@ services:
|
|||||||
volumes:
|
volumes:
|
||||||
- ./config_ssp_{{ proxy_config.local_port }}/:/etc/shadowsocks-rust/:ro
|
- ./config_ssp_{{ proxy_config.local_port }}/:/etc/shadowsocks-rust/:ro
|
||||||
networks:
|
networks:
|
||||||
- default
|
|
||||||
- {{ docker_network_name }}
|
- {{ docker_network_name }}
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
|
||||||
|
|||||||
@ -19,7 +19,7 @@ def generate_inventory(cluster_config, inventory_path):
|
|||||||
f.write("# Edit cluster.yml and re-run the generator instead.\n\n")
|
f.write("# Edit cluster.yml and re-run the generator instead.\n\n")
|
||||||
|
|
||||||
# Master group
|
# Master group
|
||||||
f.write("[airflow_master]\n")
|
f.write("[master]\n")
|
||||||
for hostname, config in cluster_config.get('master', {}).items():
|
for hostname, config in cluster_config.get('master', {}).items():
|
||||||
line = f"{hostname} ansible_host={config['ip']}"
|
line = f"{hostname} ansible_host={config['ip']}"
|
||||||
if 'port' in config:
|
if 'port' in config:
|
||||||
@ -29,7 +29,7 @@ def generate_inventory(cluster_config, inventory_path):
|
|||||||
f.write("\n")
|
f.write("\n")
|
||||||
|
|
||||||
# Workers group (handles case where workers are not defined)
|
# Workers group (handles case where workers are not defined)
|
||||||
f.write("[airflow_workers]\n")
|
f.write("[workers]\n")
|
||||||
for hostname, config in cluster_config.get('workers', {}).items():
|
for hostname, config in cluster_config.get('workers', {}).items():
|
||||||
line = f"{hostname} ansible_host={config['ip']}"
|
line = f"{hostname} ansible_host={config['ip']}"
|
||||||
if 'port' in config:
|
if 'port' in config:
|
||||||
@ -47,6 +47,11 @@ def generate_host_vars(cluster_config, host_vars_dir):
|
|||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
master_ip = list(master_nodes.values())[0]['ip']
|
master_ip = list(master_nodes.values())[0]['ip']
|
||||||
|
|
||||||
|
# Get global vars for aliases
|
||||||
|
global_vars = cluster_config.get('global_vars', {})
|
||||||
|
airflow_master_dir = global_vars.get('airflow_master_dir')
|
||||||
|
airflow_worker_dir = global_vars.get('airflow_worker_dir')
|
||||||
|
|
||||||
# Get global proxy definitions
|
# Get global proxy definitions
|
||||||
shadowsocks_proxies = cluster_config.get('shadowsocks_proxies', {})
|
shadowsocks_proxies = cluster_config.get('shadowsocks_proxies', {})
|
||||||
|
|
||||||
@ -58,6 +63,8 @@ def generate_host_vars(cluster_config, host_vars_dir):
|
|||||||
|
|
||||||
# Per-node list of proxies to USE
|
# Per-node list of proxies to USE
|
||||||
worker_proxies = config.get('proxies', [])
|
worker_proxies = config.get('proxies', [])
|
||||||
|
profile_prefixes = config.get('profile_prefixes', [])
|
||||||
|
cleanup_settings = config.get('cleanup_settings')
|
||||||
|
|
||||||
with open(host_vars_file, 'w') as f:
|
with open(host_vars_file, 'w') as f:
|
||||||
f.write("---\n")
|
f.write("---\n")
|
||||||
@ -65,6 +72,13 @@ def generate_host_vars(cluster_config, host_vars_dir):
|
|||||||
f.write(f"master_host_ip: {master_ip}\n")
|
f.write(f"master_host_ip: {master_ip}\n")
|
||||||
f.write("redis_port: 52909\n")
|
f.write("redis_port: 52909\n")
|
||||||
|
|
||||||
|
# Add node-specific directory aliases for template compatibility
|
||||||
|
# The master path is needed by all nodes for the .env template.
|
||||||
|
if airflow_master_dir:
|
||||||
|
f.write(f"airflow_master: \"{airflow_master_dir}\"\n")
|
||||||
|
if hostname in cluster_config.get('workers', {}) and airflow_worker_dir:
|
||||||
|
f.write(f"airflow_dl_worker: \"{airflow_worker_dir}\"\n")
|
||||||
|
|
||||||
# Write the global proxy definitions for deployment
|
# Write the global proxy definitions for deployment
|
||||||
if shadowsocks_proxies:
|
if shadowsocks_proxies:
|
||||||
f.write("shadowsocks_proxies:\n")
|
f.write("shadowsocks_proxies:\n")
|
||||||
@ -81,6 +95,22 @@ def generate_host_vars(cluster_config, host_vars_dir):
|
|||||||
for proxy in worker_proxies:
|
for proxy in worker_proxies:
|
||||||
f.write(f" - \"{proxy}\"\n")
|
f.write(f" - \"{proxy}\"\n")
|
||||||
|
|
||||||
|
# Write worker-specific profile prefixes
|
||||||
|
if profile_prefixes:
|
||||||
|
f.write("profile_prefixes:\n")
|
||||||
|
for prefix in profile_prefixes:
|
||||||
|
f.write(f" - \"{prefix}\"\n")
|
||||||
|
|
||||||
|
# Write worker-specific cleanup settings (overrides global)
|
||||||
|
if cleanup_settings:
|
||||||
|
f.write("cleanup_settings:\n")
|
||||||
|
if 'enabled' in cleanup_settings:
|
||||||
|
f.write(f" enabled: {str(cleanup_settings['enabled']).lower()}\n")
|
||||||
|
if 'mode' in cleanup_settings:
|
||||||
|
f.write(f" mode: \"{cleanup_settings['mode']}\"\n")
|
||||||
|
if 'max_age_seconds' in cleanup_settings:
|
||||||
|
f.write(f" max_age_seconds: {cleanup_settings['max_age_seconds']}\n")
|
||||||
|
|
||||||
def generate_group_vars(cluster_config, group_vars_path):
|
def generate_group_vars(cluster_config, group_vars_path):
|
||||||
"""Generate group-level variables"""
|
"""Generate group-level variables"""
|
||||||
# Create parent directory if it doesn't exist
|
# Create parent directory if it doesn't exist
|
||||||
@ -107,11 +137,11 @@ def generate_group_vars(cluster_config, group_vars_path):
|
|||||||
generated_data = {
|
generated_data = {
|
||||||
'master_host_ip': master_ip,
|
'master_host_ip': master_ip,
|
||||||
'redis_port': 52909,
|
'redis_port': 52909,
|
||||||
'external_access_ips': external_ips if external_ips else [],
|
'external_access_ips': external_ips if external_ips else []
|
||||||
'hostvars': all_nodes
|
|
||||||
}
|
}
|
||||||
generated_data.update(global_vars)
|
generated_data.update(global_vars)
|
||||||
|
|
||||||
|
|
||||||
with open(group_vars_path, 'w') as f:
|
with open(group_vars_path, 'w') as f:
|
||||||
f.write("---\n")
|
f.write("---\n")
|
||||||
f.write("# This file is auto-generated by tools/generate-inventory.py\n")
|
f.write("# This file is auto-generated by tools/generate-inventory.py\n")
|
||||||
|
|||||||
@ -17,7 +17,8 @@ settings:
|
|||||||
urls_file: "inputfiles/urls.rt300.txt"
|
urls_file: "inputfiles/urls.rt300.txt"
|
||||||
# The save directory MUST be inside the docker_host_mount_path for the download
|
# The save directory MUST be inside the docker_host_mount_path for the download
|
||||||
# simulation to be able to find the files.
|
# simulation to be able to find the files.
|
||||||
save_info_json_dir: "run/docker_mount/fetched_info_jsons/direct_docker_simulation"
|
# NOTE: This path is expected to be on an s3fs mount for cross-host communication.
|
||||||
|
save_info_json_dir: "run/docker_mount/info_json_tasks/direct_docker_simulation"
|
||||||
|
|
||||||
# Settings for controlling the behavior of dummy/simulation modes.
|
# Settings for controlling the behavior of dummy/simulation modes.
|
||||||
# These values can be overridden at runtime with the --set flag.
|
# These values can be overridden at runtime with the --set flag.
|
||||||
@ -88,6 +89,7 @@ direct_docker_cli_policy:
|
|||||||
docker_container_mount_path: "/config" # The mount point inside the container
|
docker_container_mount_path: "/config" # The mount point inside the container
|
||||||
|
|
||||||
# Host path for persisting cache data (e.g., cookies, sigfuncs) between runs.
|
# Host path for persisting cache data (e.g., cookies, sigfuncs) between runs.
|
||||||
|
# NOTE: This path should be on a fast, local disk, NOT on s3fs.
|
||||||
docker_host_cache_path: ".cache/direct_docker_simulation"
|
docker_host_cache_path: ".cache/direct_docker_simulation"
|
||||||
# Path inside the container where the cache is mounted. Should match HOME/.cache
|
# Path inside the container where the cache is mounted. Should match HOME/.cache
|
||||||
docker_container_cache_path: "/config/.cache"
|
docker_container_cache_path: "/config/.cache"
|
||||||
|
|||||||
@ -15,7 +15,8 @@ settings:
|
|||||||
# This directory should contain info.json files generated by an auth simulation,
|
# This directory should contain info.json files generated by an auth simulation,
|
||||||
# like `10_direct_docker_auth_simulation`.
|
# like `10_direct_docker_auth_simulation`.
|
||||||
# It MUST be inside the docker_host_mount_path.
|
# It MUST be inside the docker_host_mount_path.
|
||||||
info_json_dir: "run/docker_mount/fetched_info_jsons/direct_docker_simulation"
|
# NOTE: This path is expected to be on an s3fs mount for cross-host communication.
|
||||||
|
info_json_dir: "run/docker_mount/info_json_tasks/direct_docker_simulation"
|
||||||
#info_json_dir: "run/docker_mount/download_tasks"
|
#info_json_dir: "run/docker_mount/download_tasks"
|
||||||
# Regex to extract the profile name from a task filename. The first capture
|
# Regex to extract the profile name from a task filename. The first capture
|
||||||
# group is used. This is crucial for the task-first locking strategy.
|
# group is used. This is crucial for the task-first locking strategy.
|
||||||
@ -30,6 +31,8 @@ execution_control:
|
|||||||
workers: 1
|
workers: 1
|
||||||
- profile_prefix: "user2"
|
- profile_prefix: "user2"
|
||||||
workers: 1
|
workers: 1
|
||||||
|
- profile_prefix: "user3"
|
||||||
|
workers: 1
|
||||||
# How long a worker should pause if it cannot find an available profile or task.
|
# How long a worker should pause if it cannot find an available profile or task.
|
||||||
worker_polling_interval_seconds: 1
|
worker_polling_interval_seconds: 1
|
||||||
|
|
||||||
@ -86,6 +89,7 @@ direct_docker_cli_policy:
|
|||||||
docker_container_mount_path: "/config"
|
docker_container_mount_path: "/config"
|
||||||
|
|
||||||
# Path on the HOST where downloaded files will be saved.
|
# Path on the HOST where downloaded files will be saved.
|
||||||
|
# NOTE: This path should be on a fast, local disk, NOT on s3fs.
|
||||||
docker_host_download_path: "downloaded_media/direct_docker_simulation"
|
docker_host_download_path: "downloaded_media/direct_docker_simulation"
|
||||||
# Path inside the CONTAINER where `docker_host_download_path` is mounted.
|
# Path inside the CONTAINER where `docker_host_download_path` is mounted.
|
||||||
docker_container_download_path: "/downloads"
|
docker_container_download_path: "/downloads"
|
||||||
|
|||||||
@ -1,27 +1,82 @@
|
|||||||
# Policy: Queue-based Authentication Simulation via Direct Docker Exec
|
# Policy: Queue-based Authentication Simulation
|
||||||
#
|
#
|
||||||
# This policy simulates a continuous stream of info.json fetch requests using
|
# This policy simulates a continuous stream of info.json fetch requests. It pulls
|
||||||
# the 'direct_docker_cli' mode. It pulls URLs from a Redis queue, creates a
|
# URLs from a Redis queue and processes them, acting as the first stage in a
|
||||||
# temporary batch file, and then calls a yt-dlp command inside a running
|
# two-stage simulation. The second stage (downloading) can be handled in one
|
||||||
# Docker container.
|
# of two ways, configured below:
|
||||||
|
#
|
||||||
|
# --- WORKFLOW 1: Queue-Auth -> File-Download ---
|
||||||
|
# - This policy creates info.json files in a shared directory (`save_info_json_dir`).
|
||||||
|
# - A separate download simulation (e.g., policy 11_direct_docker_download_simulation.yaml)
|
||||||
|
# watches that directory, picks up the files, and performs the downloads.
|
||||||
|
# - To enable:
|
||||||
|
# - Set `create_download_tasks: false`
|
||||||
|
# - Ensure `save_info_json_dir` points to a shared path.
|
||||||
|
#
|
||||||
|
# --- WORKFLOW 2: Queue-Auth -> Queue-Download ---
|
||||||
|
# - This policy creates download *tasks* and pushes them to another Redis queue.
|
||||||
|
# - A separate download simulation (e.g., policy 13_queue_download_simulation.yaml)
|
||||||
|
# pulls tasks from that queue and performs the downloads.
|
||||||
|
# - To enable:
|
||||||
|
# - Set `create_download_tasks: true`
|
||||||
|
# - Configure `download_task_queue` to the correct queue name.
|
||||||
|
# - Use `download_task_granularity` to control if one task is created per-URL
|
||||||
|
# or per-format.
|
||||||
#
|
#
|
||||||
name: 12_queue_auth_simulation
|
name: 12_queue_auth_simulation
|
||||||
|
|
||||||
settings:
|
settings:
|
||||||
mode: fetch_only
|
mode: fetch_only
|
||||||
orchestration_mode: direct_docker_cli
|
orchestration_mode: queue_auth
|
||||||
profile_mode: from_pool_with_lock
|
profile_mode: from_pool_with_lock
|
||||||
# The save directory MUST be inside the docker_host_mount_path.
|
# For Queue-Auth -> File-Download workflow: Directory to save generated info.json files.
|
||||||
save_info_json_dir: "run/docker_mount/fetched_info_jsons/queue_simulation"
|
# A file-based download worker (e.g., policy 11) will watch this directory.
|
||||||
|
# This directory MUST be inside the docker_host_mount_path.
|
||||||
|
# NOTE: This path is expected to be on an s3fs mount for cross-host communication.
|
||||||
|
save_info_json_dir: "run/docker_mount/info_json_tasks/direct_docker_simulation"
|
||||||
|
|
||||||
execution_control:
|
execution_control:
|
||||||
|
# Define worker pools for multiple user groups
|
||||||
|
worker_pools:
|
||||||
|
- profile_prefix: "user1"
|
||||||
|
workers: 1
|
||||||
|
- profile_prefix: "user2"
|
||||||
|
workers: 1
|
||||||
|
- profile_prefix: "user3"
|
||||||
workers: 1
|
workers: 1
|
||||||
# How long a worker should pause if it cannot find an available profile to lock.
|
# How long a worker should pause if it cannot find an available profile to lock.
|
||||||
worker_polling_interval_seconds: 1
|
worker_polling_interval_seconds: 1
|
||||||
# No sleep between tasks; throughput is controlled by yt-dlp performance and profile availability.
|
# No sleep between tasks; throughput is controlled by yt-dlp performance and profile availability.
|
||||||
|
|
||||||
info_json_generation_policy:
|
info_json_generation_policy:
|
||||||
profile_prefix: "user1"
|
# This setting tells the auth worker how many download tasks will be generated
|
||||||
|
# per successful info.json. It is used to correctly increment the
|
||||||
|
# 'pending_downloads' counter on the auth profile.
|
||||||
|
# Can be an integer, or 'from_download_policy' to automatically count formats
|
||||||
|
# from the 'download_policy.formats' setting in this same policy file.
|
||||||
|
downloads_per_url: "from_download_policy"
|
||||||
|
# (For Queue-Download workflow) Controls how download tasks are created.
|
||||||
|
#
|
||||||
|
# "per_format": (Default) Creates one download task for EACH format specified in 'formats_to_download'.
|
||||||
|
# If `formats_to_download` is "140,299", two download tasks are created, and
|
||||||
|
# the 'pending_downloads' counter is incremented by 2.
|
||||||
|
#
|
||||||
|
# "per_url": Creates a SINGLE download task for the entire URL. The 'formats_to_download'
|
||||||
|
# string is passed to the download worker as the format selector, but 'pending_downloads'
|
||||||
|
# is only incremented by 1 for the whole URL.
|
||||||
|
#
|
||||||
|
# --- Current Setting ---
|
||||||
|
download_task_granularity: "per_format"
|
||||||
|
# --- Alternative Setting (commented out) ---
|
||||||
|
# download_task_granularity: "per_url"
|
||||||
|
# profile_prefix is now defined per-pool in execution_control.worker_pools
|
||||||
|
# However, for queue auth mode, we need a fallback prefix
|
||||||
|
profile_prefix: "user"
|
||||||
|
|
||||||
|
# This section is needed for the 'downloads_per_url: from_download_policy' setting.
|
||||||
|
# It should mirror the formats being used by the download simulation.
|
||||||
|
download_policy:
|
||||||
|
formats: "299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy,140-dashy/140-dashy-0/140"
|
||||||
|
|
||||||
direct_docker_cli_policy:
|
direct_docker_cli_policy:
|
||||||
# Which simulation environment's profiles to use for locking.
|
# Which simulation environment's profiles to use for locking.
|
||||||
@ -50,6 +105,7 @@ direct_docker_cli_policy:
|
|||||||
docker_container_mount_path: "/config" # The mount point inside the container
|
docker_container_mount_path: "/config" # The mount point inside the container
|
||||||
|
|
||||||
# Host path for persisting cache data (e.g., cookies, sigfuncs) between runs.
|
# Host path for persisting cache data (e.g., cookies, sigfuncs) between runs.
|
||||||
|
# NOTE: This path should be on a fast, local disk, NOT on s3fs.
|
||||||
docker_host_cache_path: ".cache/queue_auth_simulation"
|
docker_host_cache_path: ".cache/queue_auth_simulation"
|
||||||
# Path inside the container where the cache is mounted. Should match HOME/.cache
|
# Path inside the container where the cache is mounted. Should match HOME/.cache
|
||||||
docker_container_cache_path: "/config/.cache"
|
docker_container_cache_path: "/config/.cache"
|
||||||
@ -109,18 +165,47 @@ direct_docker_cli_policy:
|
|||||||
# Template for renaming the final info.json.
|
# Template for renaming the final info.json.
|
||||||
rename_file_template: "{video_id}-{profile_name}-{proxy}.info.json"
|
rename_file_template: "{video_id}-{profile_name}-{proxy}.info.json"
|
||||||
|
|
||||||
|
# Settings for controlling the behavior of dummy/simulation modes.
|
||||||
|
# These values can be overridden at runtime with the --set flag.
|
||||||
|
dummy_simulation_settings:
|
||||||
|
# Timings for dummy auth simulation (per-URL delay in a batch)
|
||||||
|
auth_min_seconds: 0.1
|
||||||
|
auth_max_seconds: 0.5
|
||||||
|
auth_failure_rate: 0.0
|
||||||
|
auth_skipped_failure_rate: 0.0
|
||||||
|
# Timings for dummy download simulation (per-format download time)
|
||||||
|
download_min_seconds: 1.0
|
||||||
|
download_max_seconds: 3.0
|
||||||
|
download_failure_rate: 0.0
|
||||||
|
download_skipped_failure_rate: 0.0
|
||||||
|
|
||||||
queue_policy:
|
queue_policy:
|
||||||
# Set to false to use legacy, unprefixed queue names (e.g., 'queue2_auth_inbox').
|
# Set to false to use legacy, unprefixed queue names (e.g., 'queue2_auth_inbox').
|
||||||
# Set to true (or omit) to use environment-prefixed names (e.g., 'sim_auth_queue2_auth_inbox').
|
# Set to true (or omit) to use environment-prefixed names (e.g., 'sim_auth_queue2_auth_inbox').
|
||||||
use_env_prefix: false
|
use_env_prefix: false
|
||||||
|
|
||||||
# If specified, create download tasks for these formats
|
# Queue to pull URLs from
|
||||||
# Can be "all", a specific format ID, or a list of format IDs
|
input_queue: "queue2_auth_inbox"
|
||||||
formats_to_download: "140-dashy/140-dashy-0/140,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy"
|
|
||||||
|
# --- Download Handoff Configuration ---
|
||||||
|
# Set to 'true' for Queue-Auth -> Queue-Download workflow.
|
||||||
|
# Set to 'false' for Queue-Auth -> File-Download workflow.
|
||||||
|
create_download_tasks: false
|
||||||
|
|
||||||
|
# Queue to push download tasks to (if create_download_tasks is true)
|
||||||
|
download_task_queue: "queue2_dl_inbox"
|
||||||
|
|
||||||
# How many tasks a worker should pull from the queue at once.
|
# How many tasks a worker should pull from the queue at once.
|
||||||
# This will become the batch size for the docker run.
|
# This will become the batch size for the docker run.
|
||||||
batch_size: 25
|
batch_size: 5
|
||||||
|
|
||||||
|
# If specified, create download tasks for these formats
|
||||||
|
# Can be "all", a specific format ID, or a list of format IDs
|
||||||
|
# Defaults to the formats in download_policy.formats
|
||||||
|
# Example: formats_to_download: "140-dashy,299-dashy"
|
||||||
|
# Example: formats_to_download: "all"
|
||||||
|
# Example: formats_to_download: ["140-dashy", "299-dashy"]
|
||||||
|
formats_to_download: "from_download_policy"
|
||||||
|
|
||||||
simulation_parameters:
|
simulation_parameters:
|
||||||
auth_env: "sim_auth"
|
auth_env: "sim_auth"
|
||||||
|
|||||||
@ -1,16 +1,22 @@
|
|||||||
# Policy: Queue-based Download Simulation via Direct Docker Exec
|
# Policy: Queue-based Download Simulation
|
||||||
#
|
#
|
||||||
# This policy simulates a continuous stream of downloads using the
|
# This policy simulates a continuous stream of downloads. It pulls download tasks
|
||||||
# 'direct_docker_cli' mode with `mode: download_only`. It pulls download
|
# from a Redis queue, where each task typically contains a path to an info.json
|
||||||
# tasks from a Redis queue, each containing a path to an info.json file,
|
# file and a format to download.
|
||||||
# and invokes a yt-dlp command inside a running Docker container to perform
|
#
|
||||||
# the download.
|
# This policy is designed to be the *second stage* of a two-stage simulation,
|
||||||
|
# consuming tasks produced by an authentication simulation like:
|
||||||
|
# - `12_queue_auth_simulation.yaml` (when configured for Queue-Download workflow)
|
||||||
|
#
|
||||||
|
# It does not matter to this policy whether the auth stage created tasks per-URL
|
||||||
|
# or per-format; this worker will simply process whatever task it receives from
|
||||||
|
# the `input_queue`.
|
||||||
#
|
#
|
||||||
name: 13_queue_download_simulation
|
name: 13_queue_download_simulation
|
||||||
|
|
||||||
settings:
|
settings:
|
||||||
mode: download_only
|
mode: download_only
|
||||||
orchestration_mode: direct_docker_cli
|
orchestration_mode: queue_download
|
||||||
profile_mode: from_pool_with_lock
|
profile_mode: from_pool_with_lock
|
||||||
# In queue mode, info_json_dir is not used to find tasks.
|
# In queue mode, info_json_dir is not used to find tasks.
|
||||||
# However, the paths inside the download tasks must be accessible
|
# However, the paths inside the download tasks must be accessible
|
||||||
@ -19,12 +25,19 @@ settings:
|
|||||||
# can be specified in the download task.
|
# can be specified in the download task.
|
||||||
|
|
||||||
execution_control:
|
execution_control:
|
||||||
workers: 4
|
# Define worker pools for multiple user groups
|
||||||
|
worker_pools:
|
||||||
|
- profile_prefix: "user1"
|
||||||
|
workers: 1
|
||||||
|
- profile_prefix: "user2"
|
||||||
|
workers: 1
|
||||||
|
- profile_prefix: "user3"
|
||||||
|
workers: 1
|
||||||
# How long a worker should pause if it cannot find an available profile or task.
|
# How long a worker should pause if it cannot find an available profile or task.
|
||||||
worker_polling_interval_seconds: 1
|
worker_polling_interval_seconds: 1
|
||||||
|
|
||||||
download_policy:
|
download_policy:
|
||||||
profile_prefix: "user1"
|
# profile_prefix is now defined per-pool in execution_control.worker_pools
|
||||||
# Default cooldown in seconds if not specified by the enforcer in Redis.
|
# Default cooldown in seconds if not specified by the enforcer in Redis.
|
||||||
# The value from Redis (set via `unlock_cooldown_seconds` in the enforcer policy)
|
# The value from Redis (set via `unlock_cooldown_seconds` in the enforcer policy)
|
||||||
# will always take precedence. This is a fallback.
|
# will always take precedence. This is a fallback.
|
||||||
@ -65,6 +78,7 @@ direct_docker_cli_policy:
|
|||||||
docker_container_mount_path: "/config"
|
docker_container_mount_path: "/config"
|
||||||
|
|
||||||
# Path on the HOST where downloaded files will be saved.
|
# Path on the HOST where downloaded files will be saved.
|
||||||
|
# NOTE: This path should be on a fast, local disk, NOT on s3fs.
|
||||||
docker_host_download_path: "downloaded_media/queue_downloads"
|
docker_host_download_path: "downloaded_media/queue_downloads"
|
||||||
# Path inside the CONTAINER where `docker_host_download_path` is mounted.
|
# Path inside the CONTAINER where `docker_host_download_path` is mounted.
|
||||||
docker_container_download_path: "/downloads"
|
docker_container_download_path: "/downloads"
|
||||||
@ -95,11 +109,36 @@ direct_docker_cli_policy:
|
|||||||
- "Invalid data found when processing input"
|
- "Invalid data found when processing input"
|
||||||
- "Error opening input files"
|
- "Error opening input files"
|
||||||
|
|
||||||
|
# Settings for controlling the behavior of dummy/simulation modes.
|
||||||
|
# These values can be overridden at runtime with the --set flag.
|
||||||
|
dummy_simulation_settings:
|
||||||
|
# Timings for dummy download simulation (per-format download time)
|
||||||
|
download_min_seconds: 1.0
|
||||||
|
download_max_seconds: 3.0
|
||||||
|
download_failure_rate: 0.0
|
||||||
|
download_skipped_failure_rate: 0.0
|
||||||
|
|
||||||
queue_policy:
|
queue_policy:
|
||||||
# Set to false to use legacy, unprefixed queue names (e.g., 'queue2_dl_inbox').
|
# Set to false to use legacy, unprefixed queue names (e.g., 'queue2_dl_inbox').
|
||||||
# Set to true (or omit) to use environment-prefixed names (e.g., 'sim_download_queue2_dl_inbox').
|
# Set to true (or omit) to use environment-prefixed names (e.g., 'sim_download_queue2_dl_inbox').
|
||||||
use_env_prefix: false
|
use_env_prefix: false
|
||||||
|
|
||||||
|
# Queue to pull download tasks from
|
||||||
|
input_queue: "queue2_dl_inbox"
|
||||||
|
|
||||||
|
# Whether to report completion back to a queue
|
||||||
|
# Can be true (report all), false (report none), or "success_only"/"failure_only"
|
||||||
|
report_completion: true
|
||||||
|
|
||||||
|
# Queue to report completion to
|
||||||
|
completion_queue: "queue2_dl_completed"
|
||||||
|
|
||||||
|
# Queue to report failures to (always reported regardless of report_completion)
|
||||||
|
failure_queue: "queue2_dl_fail"
|
||||||
|
|
||||||
|
# Queue to report skipped tasks to
|
||||||
|
skipped_queue: "queue2_dl_skipped"
|
||||||
|
|
||||||
# How many tasks to process in a batch. For downloads, this should be 1,
|
# How many tasks to process in a batch. For downloads, this should be 1,
|
||||||
# as each worker locks a profile for a single download task.
|
# as each worker locks a profile for a single download task.
|
||||||
batch_size: 1
|
batch_size: 1
|
||||||
|
|||||||
@ -13,13 +13,13 @@ auth_profile_setup:
|
|||||||
cleanup_before_run: true
|
cleanup_before_run: true
|
||||||
pools:
|
pools:
|
||||||
- prefix: "user1"
|
- prefix: "user1"
|
||||||
proxy: "sslocal-rust-1092:1092"
|
proxy: "sslocal-rust-1088:1088"
|
||||||
count: 3
|
count: 3
|
||||||
- prefix: "user2"
|
- prefix: "user2"
|
||||||
proxy: "sslocal-rust-1092:1092"
|
proxy: "sslocal-rust-1085:1085"
|
||||||
count: 3
|
count: 3
|
||||||
- prefix: "user3"
|
- prefix: "user3"
|
||||||
proxy: "sslocal-rust-1092:1092"
|
proxy: "sslocal-rust-1084:1084"
|
||||||
count: 3
|
count: 3
|
||||||
|
|
||||||
# --- Profile setup for the DOWNLOAD simulation ---
|
# --- Profile setup for the DOWNLOAD simulation ---
|
||||||
@ -28,11 +28,11 @@ download_profile_setup:
|
|||||||
cleanup_before_run: true
|
cleanup_before_run: true
|
||||||
pools:
|
pools:
|
||||||
- prefix: "user1"
|
- prefix: "user1"
|
||||||
proxy: "sslocal-rust-1092:1092"
|
proxy: "sslocal-rust-1088:1088"
|
||||||
count: 3
|
count: 3
|
||||||
- prefix: "user2"
|
- prefix: "user2"
|
||||||
proxy: "sslocal-rust-1092:1092"
|
proxy: "sslocal-rust-1085:1085"
|
||||||
count: 3
|
count: 3
|
||||||
- prefix: "user3"
|
- prefix: "user3"
|
||||||
proxy: "sslocal-rust-1092:1092"
|
proxy: "sslocal-rust-1084:1084"
|
||||||
count: 3
|
count: 3
|
||||||
|
|||||||
@ -16,13 +16,6 @@ import threading
|
|||||||
import time
|
import time
|
||||||
from urllib.parse import urljoin
|
from urllib.parse import urljoin
|
||||||
|
|
||||||
try:
|
|
||||||
import aria2p
|
|
||||||
from aria2p.utils import human_readable_bytes
|
|
||||||
import yt_dlp
|
|
||||||
except ImportError:
|
|
||||||
print("aria2p or yt-dlp is not installed. Please install them with: pip install aria2p yt-dlp", file=sys.stderr)
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
logger = logging.getLogger('download_aria_tool')
|
logger = logging.getLogger('download_aria_tool')
|
||||||
|
|
||||||
@ -173,6 +166,14 @@ def parse_aria_args_to_options(args_str):
|
|||||||
|
|
||||||
def main_download_aria(args):
|
def main_download_aria(args):
|
||||||
"""Main logic for the 'download-aria' command."""
|
"""Main logic for the 'download-aria' command."""
|
||||||
|
try:
|
||||||
|
import aria2p
|
||||||
|
from aria2p.utils import human_readable_bytes
|
||||||
|
import yt_dlp
|
||||||
|
except ImportError:
|
||||||
|
print("aria2p or yt-dlp is not installed. Please install them with: pip install aria2p yt-dlp", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
log_level = logging.DEBUG if args.verbose else logging.INFO
|
log_level = logging.DEBUG if args.verbose else logging.INFO
|
||||||
# Reconfigure root logger to ensure our settings are applied.
|
# Reconfigure root logger to ensure our settings are applied.
|
||||||
for handler in logging.root.handlers[:]:
|
for handler in logging.root.handlers[:]:
|
||||||
@ -405,6 +406,7 @@ def main_download_aria(args):
|
|||||||
|
|
||||||
def download_url_aria(args, api, url, filename, aria_options, timeout_seconds, remote_dir=None):
|
def download_url_aria(args, api, url, filename, aria_options, timeout_seconds, remote_dir=None):
|
||||||
"""Handle downloading a single URL with aria2c."""
|
"""Handle downloading a single URL with aria2c."""
|
||||||
|
import aria2p
|
||||||
if remote_dir:
|
if remote_dir:
|
||||||
aria_options['dir'] = remote_dir
|
aria_options['dir'] = remote_dir
|
||||||
logger.info(f"Adding download for format '{args.format}' with URL: {url[:70]}...")
|
logger.info(f"Adding download for format '{args.format}' with URL: {url[:70]}...")
|
||||||
@ -532,6 +534,7 @@ def download_url_aria(args, api, url, filename, aria_options, timeout_seconds, r
|
|||||||
|
|
||||||
def download_fragments_aria(args, api, target_format, filename, aria_options, timeout_seconds, remote_dir=None):
|
def download_fragments_aria(args, api, target_format, filename, aria_options, timeout_seconds, remote_dir=None):
|
||||||
"""Handle downloading fragmented formats with aria2c."""
|
"""Handle downloading fragmented formats with aria2c."""
|
||||||
|
import aria2p
|
||||||
logger.info(f"Format '{args.format}' is fragmented. Adding all fragments to download queue.")
|
logger.info(f"Format '{args.format}' is fragmented. Adding all fragments to download queue.")
|
||||||
fragment_base_url = target_format.get('fragment_base_url')
|
fragment_base_url = target_format.get('fragment_base_url')
|
||||||
fragments = target_format['fragments']
|
fragments = target_format['fragments']
|
||||||
|
|||||||
@ -16,12 +16,6 @@ import sys
|
|||||||
import time
|
import time
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
try:
|
|
||||||
import yt_dlp
|
|
||||||
from yt_dlp.utils import match_filter_func
|
|
||||||
except ImportError:
|
|
||||||
print("yt-dlp is not installed. Please install it with: pip install yt-dlp", file=sys.stderr)
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
logger = logging.getLogger('download_native_py_tool')
|
logger = logging.getLogger('download_native_py_tool')
|
||||||
|
|
||||||
@ -110,6 +104,8 @@ def _download_single_format(format_id, info_data, base_ydl_opts, args):
|
|||||||
|
|
||||||
Returns a tuple: (success: bool, ytdlp_logger: YTDLPLogger)
|
Returns a tuple: (success: bool, ytdlp_logger: YTDLPLogger)
|
||||||
"""
|
"""
|
||||||
|
import yt_dlp
|
||||||
|
|
||||||
# Deep copy info_data so we can modify it without affecting other downloads
|
# Deep copy info_data so we can modify it without affecting other downloads
|
||||||
local_info_data = copy.deepcopy(info_data)
|
local_info_data = copy.deepcopy(info_data)
|
||||||
|
|
||||||
@ -178,6 +174,13 @@ def _download_single_format(format_id, info_data, base_ydl_opts, args):
|
|||||||
|
|
||||||
def main_download_native_py(args):
|
def main_download_native_py(args):
|
||||||
"""Main logic for the 'download-native-py' command."""
|
"""Main logic for the 'download-native-py' command."""
|
||||||
|
try:
|
||||||
|
import yt_dlp
|
||||||
|
from yt_dlp.utils import match_filter_func
|
||||||
|
except ImportError:
|
||||||
|
print("yt-dlp is not installed. Please install it with: pip install yt-dlp", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
# All logging should go to stderr to keep stdout clean for the final filename, or for binary data with --output-buffer.
|
# All logging should go to stderr to keep stdout clean for the final filename, or for binary data with --output-buffer.
|
||||||
log_stream = sys.stderr
|
log_stream = sys.stderr
|
||||||
log_level = logging.DEBUG if args.verbose else logging.INFO
|
log_level = logging.DEBUG if args.verbose else logging.INFO
|
||||||
|
|||||||
@ -44,6 +44,7 @@ class PolicyEnforcer:
|
|||||||
self.manager = manager
|
self.manager = manager
|
||||||
self.dry_run = dry_run
|
self.dry_run = dry_run
|
||||||
self.actions_taken_this_cycle = 0
|
self.actions_taken_this_cycle = 0
|
||||||
|
self._last_wait_log_message = ""
|
||||||
|
|
||||||
PROXY_REST_REASON = "Proxy resting"
|
PROXY_REST_REASON = "Proxy resting"
|
||||||
|
|
||||||
@ -248,7 +249,19 @@ class PolicyEnforcer:
|
|||||||
# This prevents a deadlock if all groups are just 'Working' but none are in the 'waiting_downloads' state yet.
|
# This prevents a deadlock if all groups are just 'Working' but none are in the 'waiting_downloads' state yet.
|
||||||
if not is_any_group_idle and groups_currently_waiting:
|
if not is_any_group_idle and groups_currently_waiting:
|
||||||
is_system_blocked_by_downloads = True
|
is_system_blocked_by_downloads = True
|
||||||
logger.info(f"System is waiting for downloads to finish in groups: {groups_currently_waiting}. No new profiles will be activated until a group is free.")
|
log_message = f"System is waiting for downloads to finish in groups: {groups_currently_waiting}. No new profiles will be activated until a group is free."
|
||||||
|
if log_message != self._last_wait_log_message:
|
||||||
|
if self._last_wait_log_message:
|
||||||
|
print(file=sys.stderr) # Newline if we were printing dots
|
||||||
|
logger.info(log_message)
|
||||||
|
self._last_wait_log_message = log_message
|
||||||
|
else:
|
||||||
|
print(".", end="", file=sys.stderr, flush=True)
|
||||||
|
else:
|
||||||
|
# If we are no longer blocked, reset the message tracker
|
||||||
|
if self._last_wait_log_message:
|
||||||
|
print(file=sys.stderr) # Newline to clean up after dots
|
||||||
|
self._last_wait_log_message = ""
|
||||||
|
|
||||||
if is_system_blocked_by_downloads:
|
if is_system_blocked_by_downloads:
|
||||||
# When blocked, we only want to consider profiles that are in the 'waiting_downloads' state,
|
# When blocked, we only want to consider profiles that are in the 'waiting_downloads' state,
|
||||||
@ -328,6 +341,7 @@ class PolicyEnforcer:
|
|||||||
# The final list to check is the sorted ready profiles, followed by the not-ready ones.
|
# The final list to check is the sorted ready profiles, followed by the not-ready ones.
|
||||||
not_ready_profiles.sort(key=lambda p: (p.get('rest_until', 0), natural_sort_key(p.get('name', ''))))
|
not_ready_profiles.sort(key=lambda p: (p.get('rest_until', 0), natural_sort_key(p.get('name', ''))))
|
||||||
profiles_to_check = sorted_ready_profiles + not_ready_profiles
|
profiles_to_check = sorted_ready_profiles + not_ready_profiles
|
||||||
|
logger.debug(f"Activation candidates for 'least_loaded' strategy (first 10): {[p['name'] for p in profiles_to_check[:10]]}")
|
||||||
|
|
||||||
else: # Default 'longest_idle' sort
|
else: # Default 'longest_idle' sort
|
||||||
if strategy not in ['longest_idle']:
|
if strategy not in ['longest_idle']:
|
||||||
@ -352,6 +366,7 @@ class PolicyEnforcer:
|
|||||||
|
|
||||||
# The final list to check will process all ready profiles first, then wait for the not-ready ones.
|
# The final list to check will process all ready profiles first, then wait for the not-ready ones.
|
||||||
profiles_to_check = ready_profiles + not_ready_profiles
|
profiles_to_check = ready_profiles + not_ready_profiles
|
||||||
|
logger.debug(f"Activation candidates for 'longest_idle' strategy (first 10): {[p['name'] for p in profiles_to_check[:10]]}")
|
||||||
# --- End New Sorting Logic ---
|
# --- End New Sorting Logic ---
|
||||||
|
|
||||||
# --- New logic: Identify groups with waiting profiles ---
|
# --- New logic: Identify groups with waiting profiles ---
|
||||||
@ -382,7 +397,7 @@ class PolicyEnforcer:
|
|||||||
group_name = profile_to_group_map.get(profile_name)
|
group_name = profile_to_group_map.get(profile_name)
|
||||||
|
|
||||||
# --- New check to prevent activating profiles from a waiting group ---
|
# --- New check to prevent activating profiles from a waiting group ---
|
||||||
if group_name in waiting_group_names and profile.get('rest_reason') != 'waiting_downloads':
|
if group_name in waiting_group_names:
|
||||||
logger.debug(f"Profile '{profile_name}' activation deferred because its group '{group_name}' is waiting for downloads to complete.")
|
logger.debug(f"Profile '{profile_name}' activation deferred because its group '{group_name}' is waiting for downloads to complete.")
|
||||||
continue
|
continue
|
||||||
# --- End new logic ---
|
# --- End new logic ---
|
||||||
|
|||||||
@ -102,10 +102,23 @@ class ProfileManager:
|
|||||||
self.redis.ping()
|
self.redis.ping()
|
||||||
logger.info(f"Successfully connected to Redis.")
|
logger.info(f"Successfully connected to Redis.")
|
||||||
logger.info(f"Using key prefix: {key_prefix}")
|
logger.info(f"Using key prefix: {key_prefix}")
|
||||||
|
self._last_lock_warning = ""
|
||||||
except redis.exceptions.ConnectionError as e:
|
except redis.exceptions.ConnectionError as e:
|
||||||
logger.error(f"Failed to connect to Redis at {redis_host}:{redis_port}: {e}")
|
logger.error(f"Failed to connect to Redis at {redis_host}:{redis_port}: {e}")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
def _log_lock_warning(self, message: str):
|
||||||
|
"""Logs a lock-related warning, printing dots for repeated messages."""
|
||||||
|
if message == self._last_lock_warning:
|
||||||
|
# Use print to avoid logger formatting and newlines
|
||||||
|
print(".", end="", file=sys.stderr, flush=True)
|
||||||
|
else:
|
||||||
|
# If we were printing dots, start a new line before the new message
|
||||||
|
if self._last_lock_warning:
|
||||||
|
print(file=sys.stderr)
|
||||||
|
logger.warning(message)
|
||||||
|
self._last_lock_warning = message
|
||||||
|
|
||||||
def _profile_key(self, profile_name: str) -> str:
|
def _profile_key(self, profile_name: str) -> str:
|
||||||
"""Get Redis key for a profile."""
|
"""Get Redis key for a profile."""
|
||||||
return f"{self.key_prefix}profile:{profile_name}"
|
return f"{self.key_prefix}profile:{profile_name}"
|
||||||
@ -288,11 +301,13 @@ class ProfileManager:
|
|||||||
'tolerated_error_count': '0',
|
'tolerated_error_count': '0',
|
||||||
'download_count': '0',
|
'download_count': '0',
|
||||||
'download_error_count': '0',
|
'download_error_count': '0',
|
||||||
|
'predicted_download_count': '0',
|
||||||
'global_success_count': '0',
|
'global_success_count': '0',
|
||||||
'global_failure_count': '0',
|
'global_failure_count': '0',
|
||||||
'global_tolerated_error_count': '0',
|
'global_tolerated_error_count': '0',
|
||||||
'global_download_count': '0',
|
'global_download_count': '0',
|
||||||
'global_download_error_count': '0',
|
'global_download_error_count': '0',
|
||||||
|
'global_predicted_download_count': '0',
|
||||||
'lock_timestamp': '0',
|
'lock_timestamp': '0',
|
||||||
'lock_owner': '',
|
'lock_owner': '',
|
||||||
'rest_until': '0',
|
'rest_until': '0',
|
||||||
@ -328,10 +343,10 @@ class ProfileManager:
|
|||||||
|
|
||||||
# Convert numeric fields
|
# Convert numeric fields
|
||||||
numeric_fields = ['created_at', 'last_used', 'success_count', 'failure_count',
|
numeric_fields = ['created_at', 'last_used', 'success_count', 'failure_count',
|
||||||
'tolerated_error_count', 'download_count', 'download_error_count',
|
'tolerated_error_count', 'download_count', 'download_error_count', 'predicted_download_count',
|
||||||
'global_success_count', 'global_failure_count',
|
'global_success_count', 'global_failure_count',
|
||||||
'global_tolerated_error_count', 'global_download_count',
|
'global_tolerated_error_count', 'global_download_count',
|
||||||
'global_download_error_count',
|
'global_download_error_count', 'global_predicted_download_count',
|
||||||
'lock_timestamp', 'rest_until', 'last_rest_timestamp', 'wait_started_at']
|
'lock_timestamp', 'rest_until', 'last_rest_timestamp', 'wait_started_at']
|
||||||
for field in numeric_fields:
|
for field in numeric_fields:
|
||||||
if field in data:
|
if field in data:
|
||||||
@ -388,10 +403,10 @@ class ProfileManager:
|
|||||||
# --- End batch fetch ---
|
# --- End batch fetch ---
|
||||||
|
|
||||||
numeric_fields = ['created_at', 'last_used', 'success_count', 'failure_count',
|
numeric_fields = ['created_at', 'last_used', 'success_count', 'failure_count',
|
||||||
'tolerated_error_count', 'download_count', 'download_error_count',
|
'tolerated_error_count', 'download_count', 'download_error_count', 'predicted_download_count',
|
||||||
'global_success_count', 'global_failure_count',
|
'global_success_count', 'global_failure_count',
|
||||||
'global_tolerated_error_count', 'global_download_count',
|
'global_tolerated_error_count', 'global_download_count',
|
||||||
'global_download_error_count',
|
'global_download_error_count', 'global_predicted_download_count',
|
||||||
'lock_timestamp', 'rest_until', 'last_rest_timestamp', 'wait_started_at']
|
'lock_timestamp', 'rest_until', 'last_rest_timestamp', 'wait_started_at']
|
||||||
|
|
||||||
for i, data in enumerate(all_profile_data):
|
for i, data in enumerate(all_profile_data):
|
||||||
@ -528,8 +543,12 @@ class ProfileManager:
|
|||||||
return total_deleted
|
return total_deleted
|
||||||
|
|
||||||
def record_activity(self, name: str, activity_type: str,
|
def record_activity(self, name: str, activity_type: str,
|
||||||
timestamp: Optional[float] = None) -> bool:
|
timestamp: Optional[float] = None, is_dummy: bool = False) -> bool:
|
||||||
"""Record activity (success/failure) for a profile."""
|
"""
|
||||||
|
Record activity (success/failure) for a profile.
|
||||||
|
If is_dummy is True, the activity will NOT be recorded for the associated proxy,
|
||||||
|
preventing dummy failures from triggering proxy-level enforcer actions.
|
||||||
|
"""
|
||||||
if activity_type not in ['success', 'failure', 'tolerated_error', 'download', 'download_error']:
|
if activity_type not in ['success', 'failure', 'tolerated_error', 'download', 'download_error']:
|
||||||
logger.error(f"Invalid activity type: {activity_type}")
|
logger.error(f"Invalid activity type: {activity_type}")
|
||||||
return False
|
return False
|
||||||
@ -558,7 +577,8 @@ class ProfileManager:
|
|||||||
# Keep only last 1000 activities to prevent unbounded growth
|
# Keep only last 1000 activities to prevent unbounded growth
|
||||||
self.redis.zremrangebyrank(activity_key, 0, -1001)
|
self.redis.zremrangebyrank(activity_key, 0, -1001)
|
||||||
|
|
||||||
# Also record activity for the proxy
|
# Also record activity for the proxy, BUT NOT for dummy activities.
|
||||||
|
if not is_dummy:
|
||||||
proxy_url = profile.get('proxy')
|
proxy_url = profile.get('proxy')
|
||||||
if proxy_url:
|
if proxy_url:
|
||||||
proxy_activity_key = self._proxy_activity_key(proxy_url, activity_type)
|
proxy_activity_key = self._proxy_activity_key(proxy_url, activity_type)
|
||||||
@ -569,7 +589,36 @@ class ProfileManager:
|
|||||||
pipe.execute()
|
pipe.execute()
|
||||||
logger.debug(f"Recorded {activity_type} for proxy '{proxy_url}'")
|
logger.debug(f"Recorded {activity_type} for proxy '{proxy_url}'")
|
||||||
|
|
||||||
logger.debug(f"Recorded {activity_type} for profile '{name}' at {ts}")
|
log_msg = f"Recorded {activity_type} for profile '{name}' at {ts}"
|
||||||
|
if is_dummy:
|
||||||
|
log_msg += " (dummy, proxy activity skipped)"
|
||||||
|
logger.debug(log_msg)
|
||||||
|
return True
|
||||||
|
|
||||||
|
def record_predicted_downloads(self, name: str, count: int, is_dummy: bool = False) -> bool:
|
||||||
|
"""Records the number of download tasks predicted/created for a profile."""
|
||||||
|
if count <= 0:
|
||||||
|
return False
|
||||||
|
|
||||||
|
profile = self.get_profile(name)
|
||||||
|
if not profile:
|
||||||
|
logger.error(f"Profile '{name}' not found")
|
||||||
|
return False
|
||||||
|
|
||||||
|
ts = time.time()
|
||||||
|
|
||||||
|
# Update counters in profile
|
||||||
|
profile_key = self._profile_key(name)
|
||||||
|
self.redis.hincrby(profile_key, 'predicted_download_count', count)
|
||||||
|
self.redis.hincrby(profile_key, 'global_predicted_download_count', count)
|
||||||
|
|
||||||
|
# Update last_used
|
||||||
|
self.redis.hset(profile_key, 'last_used', str(ts))
|
||||||
|
|
||||||
|
log_msg = f"Recorded {count} predicted downloads for profile '{name}'"
|
||||||
|
if is_dummy:
|
||||||
|
log_msg += " (dummy, proxy activity skipped)"
|
||||||
|
logger.debug(log_msg)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def get_activity_rate(self, name: str, activity_type: str,
|
def get_activity_rate(self, name: str, activity_type: str,
|
||||||
@ -612,6 +661,7 @@ class ProfileManager:
|
|||||||
'tolerated_error_count': '0',
|
'tolerated_error_count': '0',
|
||||||
'download_count': '0',
|
'download_count': '0',
|
||||||
'download_error_count': '0',
|
'download_error_count': '0',
|
||||||
|
'predicted_download_count': '0',
|
||||||
}
|
}
|
||||||
self.redis.hset(profile_key, mapping=counters_to_reset)
|
self.redis.hset(profile_key, mapping=counters_to_reset)
|
||||||
logger.info(f"Reset session counters for profile '{name}'.")
|
logger.info(f"Reset session counters for profile '{name}'.")
|
||||||
@ -630,19 +680,21 @@ class ProfileManager:
|
|||||||
total_tolerated_error = sum(int(p.get('global_tolerated_error_count', 0)) for p in profiles)
|
total_tolerated_error = sum(int(p.get('global_tolerated_error_count', 0)) for p in profiles)
|
||||||
total_downloads = sum(int(p.get('global_download_count', 0)) for p in profiles)
|
total_downloads = sum(int(p.get('global_download_count', 0)) for p in profiles)
|
||||||
total_download_errors = sum(int(p.get('global_download_error_count', 0)) for p in profiles)
|
total_download_errors = sum(int(p.get('global_download_error_count', 0)) for p in profiles)
|
||||||
|
total_predicted_downloads = sum(int(p.get('global_predicted_download_count', 0)) for p in profiles)
|
||||||
return {
|
return {
|
||||||
'total_success': total_success,
|
'total_success': total_success,
|
||||||
'total_failure': total_failure,
|
'total_failure': total_failure,
|
||||||
'total_tolerated_error': total_tolerated_error,
|
'total_tolerated_error': total_tolerated_error,
|
||||||
'total_downloads': total_downloads,
|
'total_downloads': total_downloads,
|
||||||
'total_download_errors': total_download_errors,
|
'total_download_errors': total_download_errors,
|
||||||
|
'total_predicted_downloads': total_predicted_downloads,
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_per_proxy_stats(self) -> Dict[str, Dict[str, Any]]:
|
def get_per_proxy_stats(self) -> Dict[str, Dict[str, Any]]:
|
||||||
"""Get aggregated stats per proxy."""
|
"""Get aggregated stats per proxy."""
|
||||||
profiles = self.list_profiles()
|
profiles = self.list_profiles()
|
||||||
proxy_stats = collections.defaultdict(lambda: {
|
proxy_stats = collections.defaultdict(lambda: {
|
||||||
'success': 0, 'failure': 0, 'tolerated_error': 0, 'downloads': 0, 'download_errors': 0, 'profiles': 0
|
'success': 0, 'failure': 0, 'tolerated_error': 0, 'downloads': 0, 'download_errors': 0, 'predicted_downloads': 0, 'profiles': 0
|
||||||
})
|
})
|
||||||
for p in profiles:
|
for p in profiles:
|
||||||
proxy = p.get('proxy')
|
proxy = p.get('proxy')
|
||||||
@ -652,6 +704,7 @@ class ProfileManager:
|
|||||||
proxy_stats[proxy]['tolerated_error'] += int(p.get('global_tolerated_error_count', 0))
|
proxy_stats[proxy]['tolerated_error'] += int(p.get('global_tolerated_error_count', 0))
|
||||||
proxy_stats[proxy]['downloads'] += int(p.get('global_download_count', 0))
|
proxy_stats[proxy]['downloads'] += int(p.get('global_download_count', 0))
|
||||||
proxy_stats[proxy]['download_errors'] += int(p.get('global_download_error_count', 0))
|
proxy_stats[proxy]['download_errors'] += int(p.get('global_download_error_count', 0))
|
||||||
|
proxy_stats[proxy]['predicted_downloads'] += int(p.get('global_predicted_download_count', 0))
|
||||||
proxy_stats[proxy]['profiles'] += 1
|
proxy_stats[proxy]['profiles'] += 1
|
||||||
return dict(proxy_stats)
|
return dict(proxy_stats)
|
||||||
|
|
||||||
@ -862,14 +915,14 @@ class ProfileManager:
|
|||||||
# Original logic: find all active profiles, optionally filtered by prefix.
|
# Original logic: find all active profiles, optionally filtered by prefix.
|
||||||
active_profiles = self.redis.zrange(self._state_key(ProfileState.ACTIVE.value), 0, -1)
|
active_profiles = self.redis.zrange(self._state_key(ProfileState.ACTIVE.value), 0, -1)
|
||||||
if not active_profiles:
|
if not active_profiles:
|
||||||
logger.warning("No active profiles available to lock.")
|
self._log_lock_warning("No active profiles available to lock.")
|
||||||
self.redis.incr(self._failed_lock_attempts_key())
|
self.redis.incr(self._failed_lock_attempts_key())
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if profile_prefix:
|
if profile_prefix:
|
||||||
profiles_to_check = [p for p in active_profiles if p.startswith(profile_prefix)]
|
profiles_to_check = [p for p in active_profiles if p.startswith(profile_prefix)]
|
||||||
if not profiles_to_check:
|
if not profiles_to_check:
|
||||||
logger.warning(f"No active profiles with prefix '{profile_prefix}' available to lock.")
|
self._log_lock_warning(f"No active profiles with prefix '{profile_prefix}' available to lock.")
|
||||||
self.redis.incr(self._failed_lock_attempts_key())
|
self.redis.incr(self._failed_lock_attempts_key())
|
||||||
return None
|
return None
|
||||||
else:
|
else:
|
||||||
@ -883,9 +936,9 @@ class ProfileManager:
|
|||||||
|
|
||||||
if not full_profiles:
|
if not full_profiles:
|
||||||
if specific_profile_name:
|
if specific_profile_name:
|
||||||
logger.warning(f"Profile '{specific_profile_name}' is not eligible for locking (e.g., not ACTIVE or missing).")
|
self._log_lock_warning(f"Profile '{specific_profile_name}' is not eligible for locking (e.g., not ACTIVE or missing).")
|
||||||
else:
|
else:
|
||||||
logger.warning("No active profiles available to lock after filtering.")
|
self._log_lock_warning("No active profiles available to lock after filtering.")
|
||||||
self.redis.incr(self._failed_lock_attempts_key())
|
self.redis.incr(self._failed_lock_attempts_key())
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@ -898,7 +951,7 @@ class ProfileManager:
|
|||||||
]
|
]
|
||||||
|
|
||||||
if not eligible_profiles:
|
if not eligible_profiles:
|
||||||
logger.warning("No active profiles with an active proxy available to lock.")
|
self._log_lock_warning("No active profiles with an active proxy available to lock.")
|
||||||
self.redis.incr(self._failed_lock_attempts_key())
|
self.redis.incr(self._failed_lock_attempts_key())
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@ -922,7 +975,7 @@ class ProfileManager:
|
|||||||
if not current_state or current_state.upper() != ProfileState.ACTIVE.value:
|
if not current_state or current_state.upper() != ProfileState.ACTIVE.value:
|
||||||
# Another process (enforcer) changed the state. Release lock and try next.
|
# Another process (enforcer) changed the state. Release lock and try next.
|
||||||
self.redis.hdel(locks_key, name)
|
self.redis.hdel(locks_key, name)
|
||||||
logger.warning(f"Aborted lock for '{name}'; state changed from ACTIVE to '{current_state}' during lock acquisition.")
|
self._log_lock_warning(f"Aborted lock for '{name}'; state changed from ACTIVE to '{current_state}' during lock acquisition.")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# State is still ACTIVE, proceed with locking.
|
# State is still ACTIVE, proceed with locking.
|
||||||
@ -937,6 +990,7 @@ class ProfileManager:
|
|||||||
sm.lock(owner=owner)
|
sm.lock(owner=owner)
|
||||||
# The on_enter_locked action handles all Redis updates for the profile itself.
|
# The on_enter_locked action handles all Redis updates for the profile itself.
|
||||||
# The logger messages are also in the action.
|
# The logger messages are also in the action.
|
||||||
|
self._last_lock_warning = "" # Reset on successful lock
|
||||||
return self.get_profile(name)
|
return self.get_profile(name)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# This could be a TransitionNotAllowed error if the state changed,
|
# This could be a TransitionNotAllowed error if the state changed,
|
||||||
@ -946,7 +1000,7 @@ class ProfileManager:
|
|||||||
self.redis.hdel(locks_key, name)
|
self.redis.hdel(locks_key, name)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
logger.warning("Could not lock any active profile (all may have been locked by other workers).")
|
self._log_lock_warning("Could not lock any active profile (all may have been locked by other workers).")
|
||||||
self.redis.incr(self._failed_lock_attempts_key())
|
self.redis.incr(self._failed_lock_attempts_key())
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@ -1016,45 +1070,15 @@ class ProfileManager:
|
|||||||
# the `on_enter` actions for the current state. We can suppress the initial transition
|
# the `on_enter` actions for the current state. We can suppress the initial transition
|
||||||
# and set the state directly.
|
# and set the state directly.
|
||||||
|
|
||||||
# WORKAROUND for older statemachine library:
|
# By passing `initial_value`, we tell the state machine to start in the
|
||||||
# Instantiating the machine triggers an initial transition to ACTIVE, which wrongly updates Redis.
|
# profile's actual current state from Redis, instead of executing the
|
||||||
# We let this happen, and then immediately correct the state if it was supposed to be something else.
|
# default `initial=True` transition to ACTIVE. This prevents incorrect
|
||||||
sm = ProfileStateMachine(manager=self, profile_name=name)
|
# state changes and logs during "hydration" of the state machine.
|
||||||
|
sm = ProfileStateMachine(
|
||||||
# The sm is now in ACTIVE state, and Redis has been updated. If the original state was
|
manager=self,
|
||||||
# LOCKED, we must re-lock it to fix Redis and the state machine object so transitions work.
|
profile_name=name,
|
||||||
if current_state_str == ProfileState.LOCKED.value:
|
initial_value=current_state_str
|
||||||
lock_owner = profile.get('lock_owner', 're-lock-owner')
|
)
|
||||||
try:
|
|
||||||
# This transition ensures the `on_enter_LOCKED` actions are run, making the
|
|
||||||
# state consistent in Redis and in the state machine object.
|
|
||||||
sm.lock(owner=lock_owner)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to re-lock profile '{name}' during state machine hydration: {e}")
|
|
||||||
# The state is now inconsistent, best to not return a broken machine.
|
|
||||||
return None
|
|
||||||
elif current_state_str != sm.current_state.value.upper():
|
|
||||||
# For any other state, we must manually fix both the state machine object and Redis,
|
|
||||||
# as the constructor wrongly transitioned to ACTIVE.
|
|
||||||
|
|
||||||
# 1. Force state on the machine object. This does not trigger actions.
|
|
||||||
target_state_obj = next((s for s in sm.states if s.value.upper() == current_state_str), None)
|
|
||||||
if not target_state_obj:
|
|
||||||
logger.error(f"Could not find state object for '{current_state_str}' during hydration of '{name}'.")
|
|
||||||
return None
|
|
||||||
sm.current_state = target_state_obj
|
|
||||||
|
|
||||||
# 2. Manually revert the state in Redis to what it should be.
|
|
||||||
profile_key = self._profile_key(name)
|
|
||||||
pipe = self.redis.pipeline()
|
|
||||||
pipe.hset(profile_key, 'state', current_state_str)
|
|
||||||
# Atomically move the profile from the incorrect ACTIVE index to the correct one.
|
|
||||||
# The constructor may have added it to ACTIVE without removing it from its original state index.
|
|
||||||
pipe.zrem(self._state_key(ProfileState.ACTIVE.value), name)
|
|
||||||
pipe.zadd(self._state_key(current_state_str), {name: profile.get('last_used', time.time())})
|
|
||||||
pipe.execute()
|
|
||||||
logger.debug(f"Corrected state for '{name}' to '{current_state_str}' in object and Redis during hydration.")
|
|
||||||
|
|
||||||
return sm
|
return sm
|
||||||
|
|
||||||
def cleanup_stale_locks(self, max_lock_time_seconds: int) -> int:
|
def cleanup_stale_locks(self, max_lock_time_seconds: int) -> int:
|
||||||
@ -1475,36 +1499,77 @@ def _render_profile_group_summary_table(manager, all_profiles, profile_groups_co
|
|||||||
next_up_reason = f"least_loaded (load: {load}, {finish_str})"
|
next_up_reason = f"least_loaded (load: {load}, {finish_str})"
|
||||||
|
|
||||||
elif profile_selection_strategy == 'longest_idle':
|
elif profile_selection_strategy == 'longest_idle':
|
||||||
# Find the single longest idle profile across all groups
|
# Find groups that don't have an active profile
|
||||||
ready_profiles = []
|
groups_without_active = []
|
||||||
|
for group in profile_groups_config:
|
||||||
|
profiles_in_group = group.get('profiles_in_group', [])
|
||||||
|
has_active = False
|
||||||
|
for p_name in profiles_in_group:
|
||||||
|
p = all_profiles_by_name.get(p_name)
|
||||||
|
if p and p['state'] in [ProfileState.ACTIVE.value, ProfileState.LOCKED.value]:
|
||||||
|
has_active = True
|
||||||
|
break
|
||||||
|
if not has_active:
|
||||||
|
groups_without_active.append(group)
|
||||||
|
|
||||||
|
# If all groups have active profiles, we need to find which one will be rotated next
|
||||||
|
# For now, let's find the group with the profile that has been used the longest
|
||||||
|
if not groups_without_active:
|
||||||
|
# Find the active profile with the highest request count (closest to rotation)
|
||||||
|
active_profiles_info = []
|
||||||
for group in profile_groups_config:
|
for group in profile_groups_config:
|
||||||
|
for p_name in group.get('profiles_in_group', []):
|
||||||
|
p = all_profiles_by_name.get(p_name)
|
||||||
|
if p and p['state'] in [ProfileState.ACTIVE.value, ProfileState.LOCKED.value]:
|
||||||
|
# Calculate total requests
|
||||||
|
total_reqs = (
|
||||||
|
p.get('success_count', 0) + p.get('failure_count', 0) +
|
||||||
|
p.get('tolerated_error_count', 0) +
|
||||||
|
p.get('download_count', 0) + p.get('download_error_count', 0)
|
||||||
|
)
|
||||||
|
active_profiles_info.append({
|
||||||
|
'profile': p,
|
||||||
|
'group': group,
|
||||||
|
'total_reqs': total_reqs,
|
||||||
|
'last_used': p.get('last_used', 0)
|
||||||
|
})
|
||||||
|
|
||||||
|
if active_profiles_info:
|
||||||
|
# Sort by total requests descending (highest first - closest to rotation)
|
||||||
|
# Then by last_used ascending (oldest first)
|
||||||
|
active_profiles_info.sort(key=lambda x: (-x['total_reqs'], x['last_used']))
|
||||||
|
# The first one is most likely to be rotated next
|
||||||
|
# Find which group should be activated after it
|
||||||
|
# For simplicity, let's just indicate the current active group
|
||||||
|
# But this is not ideal
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Find the longest idle profile from groups without active profiles
|
||||||
|
ready_profiles = []
|
||||||
|
for group in groups_without_active:
|
||||||
for p_name in group.get('profiles_in_group', []):
|
for p_name in group.get('profiles_in_group', []):
|
||||||
p = all_profiles_by_name.get(p_name)
|
p = all_profiles_by_name.get(p_name)
|
||||||
if p and p['state'] in [ProfileState.RESTING.value, ProfileState.COOLDOWN.value] and p.get('rest_until', 0) <= now and p.get('rest_reason') != 'waiting_downloads':
|
if p and p['state'] in [ProfileState.RESTING.value, ProfileState.COOLDOWN.value] and p.get('rest_until', 0) <= now and p.get('rest_reason') != 'waiting_downloads':
|
||||||
ready_profiles.append(p)
|
ready_profiles.append((p, group))
|
||||||
|
|
||||||
if ready_profiles:
|
if ready_profiles:
|
||||||
# Sort them according to the 'longest_idle' activation logic
|
# Sort them according to the 'longest_idle' activation logic
|
||||||
unused_profiles = [p for p in ready_profiles if (p.get('success_count', 0) + p.get('failure_count', 0) + p.get('tolerated_error_count', 0) + p.get('download_count', 0) + p.get('download_error_count', 0)) == 0]
|
unused_profiles = [(p, g) for p, g in ready_profiles if (p.get('success_count', 0) + p.get('failure_count', 0) + p.get('tolerated_error_count', 0) + p.get('download_count', 0) + p.get('download_error_count', 0)) == 0]
|
||||||
used_profiles = [p for p in ready_profiles if p not in unused_profiles]
|
used_profiles = [(p, g) for p, g in ready_profiles if (p, g) not in unused_profiles]
|
||||||
|
|
||||||
unused_profiles.sort(key=lambda p: natural_sort_key(p.get('name', '')))
|
unused_profiles.sort(key=lambda x: natural_sort_key(x[0].get('name', '')))
|
||||||
used_profiles.sort(key=lambda p: (p.get('last_used', 0), natural_sort_key(p.get('name', ''))))
|
used_profiles.sort(key=lambda x: (x[0].get('last_used', 0), natural_sort_key(x[0].get('name', ''))))
|
||||||
|
|
||||||
sorted_ready_profiles = unused_profiles + used_profiles
|
sorted_ready_profiles = unused_profiles + used_profiles
|
||||||
|
|
||||||
if sorted_ready_profiles:
|
if sorted_ready_profiles:
|
||||||
next_profile = sorted_ready_profiles[0]
|
next_profile, next_group = sorted_ready_profiles[0]
|
||||||
# Find which group it belongs to
|
next_up_group_name = next_group['name']
|
||||||
for group in profile_groups_config:
|
|
||||||
if next_profile['name'] in group.get('profiles_in_group', []):
|
|
||||||
next_up_group_name = group['name']
|
|
||||||
next_up_reason = profile_selection_strategy
|
next_up_reason = profile_selection_strategy
|
||||||
if getattr(args, 'show_reasons', False):
|
if getattr(args, 'show_reasons', False):
|
||||||
last_used_ts = next_profile.get('last_used', 0)
|
last_used_ts = next_profile.get('last_used', 0)
|
||||||
idle_time_str = f"idle for {format_duration(time.time() - last_used_ts)}" if last_used_ts > 0 else "never used"
|
idle_time_str = f"idle for {format_duration(time.time() - last_used_ts)}" if last_used_ts > 0 else "never used"
|
||||||
next_up_reason = f"longest_idle (via {next_profile['name']}, {idle_time_str})"
|
next_up_reason = f"longest_idle (via {next_profile['name']}, {idle_time_str})"
|
||||||
break
|
|
||||||
# --- End new logic ---
|
# --- End new logic ---
|
||||||
|
|
||||||
for group in profile_groups_config:
|
for group in profile_groups_config:
|
||||||
@ -1672,9 +1737,11 @@ def _render_profile_details_table(manager, args, simulation_type, profile_groups
|
|||||||
if is_auth_sim:
|
if is_auth_sim:
|
||||||
row.extend([
|
row.extend([
|
||||||
p.get('success_count', 0),
|
p.get('success_count', 0),
|
||||||
|
p.get('predicted_download_count', 0),
|
||||||
p.get('failure_count', 0),
|
p.get('failure_count', 0),
|
||||||
p.get('tolerated_error_count', 0),
|
p.get('tolerated_error_count', 0),
|
||||||
p.get('global_success_count', 0),
|
p.get('global_success_count', 0),
|
||||||
|
p.get('global_predicted_download_count', 0),
|
||||||
p.get('global_failure_count', 0),
|
p.get('global_failure_count', 0),
|
||||||
])
|
])
|
||||||
else: # is_download_sim or unknown
|
else: # is_download_sim or unknown
|
||||||
@ -1700,7 +1767,7 @@ def _render_profile_details_table(manager, args, simulation_type, profile_groups
|
|||||||
headers = ['Name', 'Proxy', 'State', 'Last Used']
|
headers = ['Name', 'Proxy', 'State', 'Last Used']
|
||||||
|
|
||||||
if is_auth_sim:
|
if is_auth_sim:
|
||||||
headers.extend(['AuthOK', 'AuthFail', 'Skip.Err', 'Tot.AuthOK', 'Tot.AuthFail'])
|
headers.extend(['AuthOK', 'DataPred', 'AuthFail', 'Skip.Err', 'Tot.AuthOK', 'Tot.DataPred', 'Tot.AuthFail'])
|
||||||
else: # is_download_sim or unknown
|
else: # is_download_sim or unknown
|
||||||
headers.extend(['DataOK', 'DownFail', 'Skip.Err', 'Tot.DataOK', 'Tot.DownFail'])
|
headers.extend(['DataOK', 'DownFail', 'Skip.Err', 'Tot.DataOK', 'Tot.DownFail'])
|
||||||
|
|
||||||
@ -1870,7 +1937,7 @@ def _render_merged_view(auth_manager, download_manager, args, file=sys.stdout):
|
|||||||
dl_success_rate = (dl_stats['total_downloads'] / total_dls * 100) if total_dls > 0 else 100
|
dl_success_rate = (dl_stats['total_downloads'] / total_dls * 100) if total_dls > 0 else 100
|
||||||
|
|
||||||
global_summary_str = (
|
global_summary_str = (
|
||||||
f"Auth: {total_reqs} reqs ({auth_stats['total_success']} OK, {auth_stats['total_failure']} Fail, {auth_stats['total_tolerated_error']} Tol.Err) | "
|
f"Auth: {total_reqs} reqs ({auth_stats['total_success']} OK, {auth_stats.get('total_predicted_downloads', 0)} Tasks, {auth_stats['total_failure']} Fail, {auth_stats['total_tolerated_error']} Tol.Err) | "
|
||||||
f"OK Rate: {success_rate:.2f}% | "
|
f"OK Rate: {success_rate:.2f}% | "
|
||||||
f"Failed Locks: {auth_failed_locks} || "
|
f"Failed Locks: {auth_failed_locks} || "
|
||||||
f"Download: {total_dls} attempts ({dl_stats['total_downloads']} OK, {dl_stats['total_download_errors']} Fail) | "
|
f"Download: {total_dls} attempts ({dl_stats['total_downloads']} OK, {dl_stats['total_download_errors']} Fail) | "
|
||||||
@ -1894,6 +1961,7 @@ def _render_merged_view(auth_manager, download_manager, args, file=sys.stdout):
|
|||||||
astats.get('profiles', 0),
|
astats.get('profiles', 0),
|
||||||
dstats.get('profiles', 0),
|
dstats.get('profiles', 0),
|
||||||
astats.get('success', 0),
|
astats.get('success', 0),
|
||||||
|
astats.get('predicted_downloads', 0),
|
||||||
astats.get('failure', 0),
|
astats.get('failure', 0),
|
||||||
astats.get('tolerated_error', 0),
|
astats.get('tolerated_error', 0),
|
||||||
dstats.get('downloads', 0),
|
dstats.get('downloads', 0),
|
||||||
@ -1916,7 +1984,7 @@ def _render_merged_view(auth_manager, download_manager, args, file=sys.stdout):
|
|||||||
|
|
||||||
if all_proxies:
|
if all_proxies:
|
||||||
print("\n--- Per-Proxy Stats (Merged) ---", file=file)
|
print("\n--- Per-Proxy Stats (Merged) ---", file=file)
|
||||||
proxy_headers = ['Proxy', 'State (A/D)', 'Profiles (A)', 'Profiles (D)', 'AuthOK', 'AuthFail', 'Skip.Err(A)', 'DataOK', 'DownFail', 'Skip.Err(D)']
|
proxy_headers = ['Proxy', 'State (A/D)', 'Profiles (A)', 'Profiles (D)', 'AuthOK', 'DataPred', 'AuthFail', 'Skip.Err(A)', 'DataOK', 'DownFail', 'Skip.Err(D)']
|
||||||
print(tabulate(proxy_table_data, headers=proxy_headers, tablefmt='grid'), file=file)
|
print(tabulate(proxy_table_data, headers=proxy_headers, tablefmt='grid'), file=file)
|
||||||
|
|
||||||
print(f"\n--- Auth Simulation Profile Details ({args.auth_env}) ---", file=file)
|
print(f"\n--- Auth Simulation Profile Details ({args.auth_env}) ---", file=file)
|
||||||
|
|||||||
@ -51,10 +51,31 @@ class ProfileStateMachine(StateMachine):
|
|||||||
|
|
||||||
pause = active.to(paused) | locked.to(paused) | resting.to(paused) | cooldown.to(paused)
|
pause = active.to(paused) | locked.to(paused) | resting.to(paused) | cooldown.to(paused)
|
||||||
|
|
||||||
def __init__(self, manager: ProfileManager, profile_name: str, *args, **kwargs):
|
def __init__(self, manager: ProfileManager, profile_name: str, initial_value=None):
|
||||||
self.manager = manager
|
self.manager = manager
|
||||||
self.profile_name = profile_name
|
self.profile_name = profile_name
|
||||||
super().__init__(*args, **kwargs)
|
# Call parent constructor with model=self
|
||||||
|
super().__init__(model=self)
|
||||||
|
# If initial_value is provided, set the current state without triggering transitions
|
||||||
|
if initial_value is not None:
|
||||||
|
# Convert to uppercase to match state values
|
||||||
|
initial_value = initial_value.upper()
|
||||||
|
# Check if we're not already in this state
|
||||||
|
# Compare case-insensitively to handle any discrepancies
|
||||||
|
if self.current_state.value.upper() != initial_value:
|
||||||
|
# Find the corresponding state object
|
||||||
|
target_state = None
|
||||||
|
for state in self.states:
|
||||||
|
# Compare both .value and .id case-insensitively
|
||||||
|
if state.value.upper() == initial_value or (hasattr(state, 'id') and state.id.upper() == initial_value):
|
||||||
|
target_state = state
|
||||||
|
break
|
||||||
|
if target_state:
|
||||||
|
# Set current state without triggering transitions
|
||||||
|
self.current_state = target_state
|
||||||
|
else:
|
||||||
|
# If state not found, log a warning but don't crash
|
||||||
|
logger.warning(f"Could not find state '{initial_value}' (case-insensitive) for profile '{profile_name}'. Keeping current state '{self.current_state.value}'.")
|
||||||
|
|
||||||
# --- Action Methods ---
|
# --- Action Methods ---
|
||||||
|
|
||||||
|
|||||||
@ -70,12 +70,14 @@ class QueueManager:
|
|||||||
"""Returns the number of items in a queue."""
|
"""Returns the number of items in a queue."""
|
||||||
return self.redis.llen(queue_name)
|
return self.redis.llen(queue_name)
|
||||||
|
|
||||||
def populate(self, queue_name: str, file_path: str) -> int:
|
def push_from_file(self, queue_name: str, file_path: str, wrap_key: Optional[str] = None) -> int:
|
||||||
"""Populates a queue from a file (text with one item per line, or JSON with an array of strings)."""
|
"""Populates a queue from a file (text with one item per line, or JSON with an array of items)."""
|
||||||
count = 0
|
count = 0
|
||||||
|
|
||||||
if file_path.lower().endswith('.json'):
|
if file_path.lower().endswith('.json'):
|
||||||
logger.info("Detected JSON file. Attempting to parse as an array of strings.")
|
if wrap_key:
|
||||||
|
logger.warning("--wrap-file-line-in-json is ignored for JSON files, as they are expected to contain complete items.")
|
||||||
|
logger.info("Detected JSON file. Attempting to parse as an array of items.")
|
||||||
try:
|
try:
|
||||||
with open(file_path, 'r', encoding='utf-8') as f:
|
with open(file_path, 'r', encoding='utf-8') as f:
|
||||||
data = json.load(f)
|
data = json.load(f)
|
||||||
@ -83,13 +85,21 @@ class QueueManager:
|
|||||||
logger.error("JSON file must contain a list/array.")
|
logger.error("JSON file must contain a list/array.")
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
items_to_add = [str(item).strip() for item in data if str(item).strip()]
|
# Items can be strings or objects. If objects, they should be converted to JSON strings.
|
||||||
|
items_to_add = []
|
||||||
|
for item in data:
|
||||||
|
if isinstance(item, str):
|
||||||
|
items_to_add.append(item.strip())
|
||||||
|
else:
|
||||||
|
items_to_add.append(json.dumps(item))
|
||||||
|
|
||||||
|
items_to_add = [item for item in items_to_add if item]
|
||||||
|
|
||||||
pipe = self.redis.pipeline()
|
pipe = self.redis.pipeline()
|
||||||
for item in items_to_add:
|
for item in items_to_add:
|
||||||
pipe.rpush(queue_name, item)
|
pipe.rpush(queue_name, item)
|
||||||
count += 1
|
count += 1
|
||||||
if count % 1000 == 0:
|
if count > 0 and count % 1000 == 0:
|
||||||
pipe.execute()
|
pipe.execute()
|
||||||
logger.info(f"Pushed {count} items...")
|
logger.info(f"Pushed {count} items...")
|
||||||
pipe.execute()
|
pipe.execute()
|
||||||
@ -105,9 +115,13 @@ class QueueManager:
|
|||||||
for line in f:
|
for line in f:
|
||||||
item = line.strip()
|
item = line.strip()
|
||||||
if item:
|
if item:
|
||||||
pipe.rpush(queue_name, item)
|
if wrap_key:
|
||||||
|
payload = json.dumps({wrap_key: item})
|
||||||
|
else:
|
||||||
|
payload = item
|
||||||
|
pipe.rpush(queue_name, payload)
|
||||||
count += 1
|
count += 1
|
||||||
if count % 1000 == 0:
|
if count > 0 and count % 1000 == 0:
|
||||||
pipe.execute()
|
pipe.execute()
|
||||||
logger.info(f"Pushed {count} items...")
|
logger.info(f"Pushed {count} items...")
|
||||||
pipe.execute()
|
pipe.execute()
|
||||||
@ -118,6 +132,45 @@ class QueueManager:
|
|||||||
logger.info(f"Finished. Pushed a total of {count} items to '{queue_name}'.")
|
logger.info(f"Finished. Pushed a total of {count} items to '{queue_name}'.")
|
||||||
return count
|
return count
|
||||||
|
|
||||||
|
def push_generated(self, queue_name: str, prefix: str, count: int) -> int:
|
||||||
|
"""Pushes generated payloads to a queue."""
|
||||||
|
from datetime import datetime
|
||||||
|
timestamp = datetime.now().strftime('%Y%m%dt%H%M')
|
||||||
|
|
||||||
|
pipe = self.redis.pipeline()
|
||||||
|
pushed_count = 0
|
||||||
|
for i in range(count):
|
||||||
|
generated_value = f"{prefix}_{timestamp}_{i:04d}"
|
||||||
|
payload = json.dumps({"url": generated_value})
|
||||||
|
pipe.rpush(queue_name, payload)
|
||||||
|
pushed_count += 1
|
||||||
|
if pushed_count > 0 and pushed_count % 1000 == 0:
|
||||||
|
pipe.execute()
|
||||||
|
logger.info(f"Pushed {pushed_count} of {count} items...")
|
||||||
|
pipe.execute()
|
||||||
|
logger.info(f"Finished. Pushed a total of {pushed_count} items to '{queue_name}'.")
|
||||||
|
return pushed_count
|
||||||
|
|
||||||
|
def push_static(self, queue_name: str, payload: str, count: int) -> int:
|
||||||
|
"""Pushes a static payload multiple times to a queue."""
|
||||||
|
try:
|
||||||
|
json.loads(payload)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
logger.error(f"Invalid JSON in --payload-json: {payload}")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
pipe = self.redis.pipeline()
|
||||||
|
pushed_count = 0
|
||||||
|
for _ in range(count):
|
||||||
|
pipe.rpush(queue_name, payload)
|
||||||
|
pushed_count += 1
|
||||||
|
if pushed_count > 0 and pushed_count % 1000 == 0:
|
||||||
|
pipe.execute()
|
||||||
|
logger.info(f"Pushed {pushed_count} of {count} items...")
|
||||||
|
pipe.execute()
|
||||||
|
logger.info(f"Finished. Pushed a total of {pushed_count} items to '{queue_name}'.")
|
||||||
|
return pushed_count
|
||||||
|
|
||||||
def clear(self, queue_name: str, dump_path: Optional[str] = None) -> int:
|
def clear(self, queue_name: str, dump_path: Optional[str] = None) -> int:
|
||||||
"""Clears a queue, optionally dumping its contents to a file."""
|
"""Clears a queue, optionally dumping its contents to a file."""
|
||||||
size = self.redis.llen(queue_name)
|
size = self.redis.llen(queue_name)
|
||||||
@ -174,10 +227,17 @@ def add_queue_manager_parser(subparsers):
|
|||||||
peek_parser.add_argument('queue_name', nargs='?', help="Name of the queue. Defaults to '<env>_stress_inbox'.")
|
peek_parser.add_argument('queue_name', nargs='?', help="Name of the queue. Defaults to '<env>_stress_inbox'.")
|
||||||
peek_parser.add_argument('--count', type=int, default=10, help='Number of items to show (default: 10)')
|
peek_parser.add_argument('--count', type=int, default=10, help='Number of items to show (default: 10)')
|
||||||
|
|
||||||
# Populate command
|
# Push command
|
||||||
populate_parser = subparsers.add_parser('populate', help='Populate a queue from a file (one item per line).', parents=[common_parser])
|
push_parser = subparsers.add_parser('push', help='Push items to a queue from a file, a generator, or a static payload.', parents=[common_parser])
|
||||||
populate_parser.add_argument('file_path', help='Path to the file containing items to add.')
|
push_parser.add_argument('queue_name', nargs='?', help="Name of the queue. Defaults to '<env>_stress_inbox'.")
|
||||||
populate_parser.add_argument('--queue-name', help="Name of the queue to populate. Defaults to '<env>_stress_inbox'.")
|
push_parser.add_argument('--count', type=int, default=1, help='Number of items to push (for --payload-json or --generate-payload-prefix).')
|
||||||
|
|
||||||
|
source_group = push_parser.add_mutually_exclusive_group(required=True)
|
||||||
|
source_group.add_argument('--from-file', dest='file_path', help='Path to a file containing items to add (one per line, or a JSON array).')
|
||||||
|
source_group.add_argument('--payload-json', help='A static JSON payload to push. Use with --count to push multiple times.')
|
||||||
|
source_group.add_argument('--generate-payload-prefix', help='Generate JSON payloads with a timestamp and counter. Example: {"url": "PREFIX_yyyymmddthhmm_0001"}. Use with --count.')
|
||||||
|
|
||||||
|
push_parser.add_argument('--wrap-file-line-in-json', metavar='KEY', help="For text files (--from-file), wrap each line in a JSON object with the specified key (e.g., 'url' -> {\"url\": \"line_content\"}).")
|
||||||
|
|
||||||
# Clear command
|
# Clear command
|
||||||
clear_parser = subparsers.add_parser('clear', help='Clear a queue, optionally dumping its contents.', parents=[common_parser])
|
clear_parser = subparsers.add_parser('clear', help='Clear a queue, optionally dumping its contents.', parents=[common_parser])
|
||||||
@ -215,9 +275,9 @@ def main_queue_manager(args):
|
|||||||
)
|
)
|
||||||
|
|
||||||
# For commands that operate on a single queue, set a default name based on the environment if not provided.
|
# For commands that operate on a single queue, set a default name based on the environment if not provided.
|
||||||
is_single_queue_command = args.queue_command in ['peek', 'populate', 'clear']
|
is_single_queue_command = args.queue_command in ['peek', 'push', 'clear']
|
||||||
if is_single_queue_command:
|
if is_single_queue_command:
|
||||||
# `populate` uses an option (--queue-name), while `peek` and `clear` use a positional argument.
|
# `push`, `peek` and `clear` use a positional argument for queue_name.
|
||||||
# We check for `queue_name` attribute and if it's falsy (None or empty string).
|
# We check for `queue_name` attribute and if it's falsy (None or empty string).
|
||||||
if not getattr(args, 'queue_name', None):
|
if not getattr(args, 'queue_name', None):
|
||||||
default_queue_name = f"{args.env}_stress_inbox"
|
default_queue_name = f"{args.env}_stress_inbox"
|
||||||
@ -244,11 +304,21 @@ def main_queue_manager(args):
|
|||||||
print(f"{i+1: >3}: {item}")
|
print(f"{i+1: >3}: {item}")
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
elif args.queue_command == 'populate':
|
elif args.queue_command == 'push':
|
||||||
|
if args.file_path:
|
||||||
if not os.path.exists(args.file_path):
|
if not os.path.exists(args.file_path):
|
||||||
print(f"Error: File not found at '{args.file_path}'", file=sys.stderr)
|
print(f"Error: File not found at '{args.file_path}'", file=sys.stderr)
|
||||||
return 1
|
return 1
|
||||||
manager.populate(args.queue_name, args.file_path)
|
if args.count > 1:
|
||||||
|
logger.warning("--count is ignored when using --from-file.")
|
||||||
|
manager.push_from_file(args.queue_name, args.file_path, args.wrap_file_line_in_json)
|
||||||
|
elif args.payload_json:
|
||||||
|
manager.push_static(args.queue_name, args.payload_json, args.count)
|
||||||
|
elif args.generate_payload_prefix:
|
||||||
|
if args.count <= 0:
|
||||||
|
print("Error: --count must be 1 or greater for --generate-payload-prefix.", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
manager.push_generated(args.queue_name, args.generate_payload_prefix, args.count)
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
elif args.queue_command == 'clear':
|
elif args.queue_command == 'clear':
|
||||||
|
|||||||
@ -11,6 +11,9 @@ aria2p
|
|||||||
# For reading .env files for configuration
|
# For reading .env files for configuration
|
||||||
python-dotenv==1.0.1
|
python-dotenv==1.0.1
|
||||||
|
|
||||||
|
# For 'direct_docker_cli' orchestration mode in stress-policy
|
||||||
|
docker
|
||||||
|
|
||||||
# For SOCKS proxy support in client tools
|
# For SOCKS proxy support in client tools
|
||||||
PySocks
|
PySocks
|
||||||
|
|
||||||
|
|||||||
@ -167,7 +167,7 @@ Overridable Policy Parameters via --set:
|
|||||||
parser.add_argument('--list-policies', action='store_true', help='List all available policies from the default policies directory and exit.')
|
parser.add_argument('--list-policies', action='store_true', help='List all available policies from the default policies directory and exit.')
|
||||||
parser.add_argument('--show-overrides', action='store_true', help='Load the specified policy and print all its defined values as a single-line of --set arguments, then exit.')
|
parser.add_argument('--show-overrides', action='store_true', help='Load the specified policy and print all its defined values as a single-line of --set arguments, then exit.')
|
||||||
parser.add_argument('--set', action='append', default=[], help="Override a policy setting using 'key.subkey=value' format.\n(e.g., --set execution_control.workers=5)")
|
parser.add_argument('--set', action='append', default=[], help="Override a policy setting using 'key.subkey=value' format.\n(e.g., --set execution_control.workers=5)")
|
||||||
parser.add_argument('--profile-prefix', help="Shortcut to override the profile prefix for profile locking mode. Affects both auth and download stages.")
|
parser.add_argument('--profile-prefix', '--user-prefix', dest='profile_prefix', help="Shortcut to override the profile prefix for profile locking mode. Affects both auth and download stages. Can be a comma-separated list.")
|
||||||
parser.add_argument('--start-from-url-index', type=int, help='Start processing from this line number (1-based) in the urls_file. Overrides saved state.')
|
parser.add_argument('--start-from-url-index', type=int, help='Start processing from this line number (1-based) in the urls_file. Overrides saved state.')
|
||||||
parser.add_argument('--expire-time-shift-minutes', type=int, help="Consider URLs expiring in N minutes as expired. Overrides policy.")
|
parser.add_argument('--expire-time-shift-minutes', type=int, help="Consider URLs expiring in N minutes as expired. Overrides policy.")
|
||||||
|
|
||||||
|
|||||||
@ -53,6 +53,7 @@ def run_direct_batch_worker(worker_id, policy, state_manager, args, profile_mana
|
|||||||
os.makedirs(save_dir, exist_ok=True)
|
os.makedirs(save_dir, exist_ok=True)
|
||||||
|
|
||||||
last_used_profile_name = None
|
last_used_profile_name = None
|
||||||
|
last_no_task_log_msg = ""
|
||||||
while not state_manager.shutdown_event.is_set():
|
while not state_manager.shutdown_event.is_set():
|
||||||
locked_profile = None
|
locked_profile = None
|
||||||
temp_batch_file = None
|
temp_batch_file = None
|
||||||
@ -93,13 +94,24 @@ def run_direct_batch_worker(worker_id, policy, state_manager, args, profile_mana
|
|||||||
if profiles_in_prefix:
|
if profiles_in_prefix:
|
||||||
state_counts = collections.Counter(p['state'] for p in profiles_in_prefix)
|
state_counts = collections.Counter(p['state'] for p in profiles_in_prefix)
|
||||||
states_summary = ', '.join(f"{count} {state}" for state, count in sorted(state_counts.items()))
|
states_summary = ', '.join(f"{count} {state}" for state, count in sorted(state_counts.items()))
|
||||||
logger.info(f"[Worker {worker_id}] No auth profiles available to lock. Pool status ({profile_prefix}*): {states_summary}. Pausing for {polling_interval}s.")
|
base_log_msg = f"[Worker {worker_id}] No auth profiles available to lock. Pool status ({profile_prefix}*): {states_summary}."
|
||||||
else:
|
else:
|
||||||
logger.info(f"[Worker {worker_id}] No auth profiles available to lock. No profiles found with prefix '{profile_prefix}'. Pausing for {polling_interval}s.")
|
base_log_msg = f"[Worker {worker_id}] No auth profiles available to lock. No profiles found with prefix '{profile_prefix}'."
|
||||||
|
|
||||||
|
if base_log_msg == last_no_task_log_msg:
|
||||||
|
print(".", end="", file=sys.stderr, flush=True)
|
||||||
|
else:
|
||||||
|
if last_no_task_log_msg:
|
||||||
|
print(file=sys.stderr)
|
||||||
|
logger.info(f"{base_log_msg} Pausing for {polling_interval}s.")
|
||||||
|
last_no_task_log_msg = base_log_msg
|
||||||
# --- End diagnostic logging ---
|
# --- End diagnostic logging ---
|
||||||
time.sleep(polling_interval)
|
time.sleep(polling_interval)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
if last_no_task_log_msg:
|
||||||
|
print(file=sys.stderr)
|
||||||
|
last_no_task_log_msg = ""
|
||||||
profile_name = locked_profile['name']
|
profile_name = locked_profile['name']
|
||||||
proxy_url = locked_profile['proxy']
|
proxy_url = locked_profile['proxy']
|
||||||
|
|
||||||
|
|||||||
@ -113,6 +113,7 @@ def run_direct_docker_worker(worker_id, policy, state_manager, args, profile_man
|
|||||||
return []
|
return []
|
||||||
|
|
||||||
last_used_profile_name = None
|
last_used_profile_name = None
|
||||||
|
last_no_task_log_msg = ""
|
||||||
while not state_manager.shutdown_event.is_set():
|
while not state_manager.shutdown_event.is_set():
|
||||||
locked_profile = None
|
locked_profile = None
|
||||||
temp_task_dir_host = None
|
temp_task_dir_host = None
|
||||||
@ -120,11 +121,25 @@ def run_direct_docker_worker(worker_id, policy, state_manager, args, profile_man
|
|||||||
# --- Variables for robust finalization ---
|
# --- Variables for robust finalization ---
|
||||||
url_batch_len = 0
|
url_batch_len = 0
|
||||||
batch_started = False
|
batch_started = False
|
||||||
downloads_per_url = 0 # Default to 0, meaning no increment unless configured
|
num_formats_per_url = 0
|
||||||
|
downloads_to_increment = 0
|
||||||
# ---
|
# ---
|
||||||
try:
|
try:
|
||||||
# 1. Lock a profile
|
# 1. Lock a profile, trying any of the specified prefixes
|
||||||
locked_profile = profile_manager_instance.lock_profile(owner=owner_id, profile_prefix=profile_prefix)
|
locked_profile = None
|
||||||
|
prefixes_to_try = []
|
||||||
|
if profile_prefix:
|
||||||
|
prefixes_to_try = [p.strip() for p in profile_prefix.split(',') if p.strip()]
|
||||||
|
|
||||||
|
if prefixes_to_try:
|
||||||
|
random.shuffle(prefixes_to_try)
|
||||||
|
for prefix in prefixes_to_try:
|
||||||
|
locked_profile = profile_manager_instance.lock_profile(owner=owner_id, profile_prefix=prefix)
|
||||||
|
if locked_profile:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
# Fallback for empty/no prefix, which means lock any available profile
|
||||||
|
locked_profile = profile_manager_instance.lock_profile(owner=owner_id, profile_prefix=None)
|
||||||
|
|
||||||
# --- New logic to avoid immediate reuse ---
|
# --- New logic to avoid immediate reuse ---
|
||||||
avoid_reuse = direct_policy.get('avoid_immediate_profile_reuse', False)
|
avoid_reuse = direct_policy.get('avoid_immediate_profile_reuse', False)
|
||||||
@ -135,9 +150,21 @@ def run_direct_docker_worker(worker_id, policy, state_manager, args, profile_man
|
|||||||
wait_seconds = direct_policy.get('avoid_reuse_max_wait_seconds', 5)
|
wait_seconds = direct_policy.get('avoid_reuse_max_wait_seconds', 5)
|
||||||
time.sleep(wait_seconds)
|
time.sleep(wait_seconds)
|
||||||
|
|
||||||
# After waiting, try to lock again.
|
# After waiting, try to lock again, from any of the available prefixes
|
||||||
logger.info(f"[Worker {worker_id}] Attempting to lock a new profile after waiting.")
|
logger.info(f"[Worker {worker_id}] Attempting to lock a new profile after waiting.")
|
||||||
locked_profile = profile_manager_instance.lock_profile(owner=owner_id, profile_prefix=profile_prefix)
|
locked_profile = None
|
||||||
|
prefixes_to_try = []
|
||||||
|
if profile_prefix:
|
||||||
|
prefixes_to_try = [p.strip() for p in profile_prefix.split(',') if p.strip()]
|
||||||
|
|
||||||
|
if prefixes_to_try:
|
||||||
|
random.shuffle(prefixes_to_try)
|
||||||
|
for prefix in prefixes_to_try:
|
||||||
|
locked_profile = profile_manager_instance.lock_profile(owner=owner_id, profile_prefix=prefix)
|
||||||
|
if locked_profile:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
locked_profile = profile_manager_instance.lock_profile(owner=owner_id, profile_prefix=None)
|
||||||
|
|
||||||
if locked_profile and locked_profile['name'] == last_used_profile_name:
|
if locked_profile and locked_profile['name'] == last_used_profile_name:
|
||||||
logger.warning(f"[Worker {worker_id}] Still locking the same profile '{locked_profile['name']}' after waiting. Proceeding to use it to avoid getting stuck.")
|
logger.warning(f"[Worker {worker_id}] Still locking the same profile '{locked_profile['name']}' after waiting. Proceeding to use it to avoid getting stuck.")
|
||||||
@ -153,13 +180,24 @@ def run_direct_docker_worker(worker_id, policy, state_manager, args, profile_man
|
|||||||
if profiles_in_prefix:
|
if profiles_in_prefix:
|
||||||
state_counts = collections.Counter(p['state'] for p in profiles_in_prefix)
|
state_counts = collections.Counter(p['state'] for p in profiles_in_prefix)
|
||||||
states_summary = ', '.join(f"{count} {state}" for state, count in sorted(state_counts.items()))
|
states_summary = ', '.join(f"{count} {state}" for state, count in sorted(state_counts.items()))
|
||||||
logger.info(f"[Worker {worker_id}] No auth profiles available to lock. Pool status ({profile_prefix or '*'}*): {states_summary}. Pausing for {polling_interval}s.")
|
base_log_msg = f"[Worker {worker_id}] No auth profiles available to lock. Pool status ({profile_prefix or '*'}*): {states_summary}."
|
||||||
else:
|
else:
|
||||||
logger.info(f"[Worker {worker_id}] No auth profiles available to lock. No profiles found with prefix '{profile_prefix or '*'}'. Pausing for {polling_interval}s.")
|
base_log_msg = f"[Worker {worker_id}] No auth profiles available to lock. No profiles found with prefix '{profile_prefix or '*'}'."
|
||||||
|
|
||||||
|
if base_log_msg == last_no_task_log_msg:
|
||||||
|
print(".", end="", file=sys.stdout, flush=True)
|
||||||
|
else:
|
||||||
|
if last_no_task_log_msg:
|
||||||
|
print(file=sys.stdout)
|
||||||
|
logger.info(f"{base_log_msg} Pausing for {polling_interval}s.")
|
||||||
|
last_no_task_log_msg = base_log_msg
|
||||||
# --- End diagnostic logging ---
|
# --- End diagnostic logging ---
|
||||||
time.sleep(polling_interval)
|
time.sleep(polling_interval)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
if last_no_task_log_msg:
|
||||||
|
print(file=sys.stderr)
|
||||||
|
last_no_task_log_msg = ""
|
||||||
profile_name = locked_profile['name']
|
profile_name = locked_profile['name']
|
||||||
proxy_url = locked_profile['proxy']
|
proxy_url = locked_profile['proxy']
|
||||||
|
|
||||||
@ -237,24 +275,33 @@ def run_direct_docker_worker(worker_id, policy, state_manager, args, profile_man
|
|||||||
url_batch_len = len(url_batch)
|
url_batch_len = len(url_batch)
|
||||||
batch_started = True
|
batch_started = True
|
||||||
|
|
||||||
# --- Calculate how many download tasks will be generated ---
|
# --- Calculate how many download tasks will be generated to set the counter ---
|
||||||
# The "pending downloads" counter for an auth profile tracks the number of
|
|
||||||
# info.json files it has generated that are waiting to be processed.
|
|
||||||
# Each successful URL fetch creates one info.json file (one task).
|
|
||||||
# The number of actual media files downloaded from that info.json is
|
|
||||||
# irrelevant to the auth profile's counter.
|
|
||||||
downloads_per_url = 0 # Default to 0, meaning no increment unless configured
|
|
||||||
downloads_per_url_config = gen_policy.get('downloads_per_url')
|
downloads_per_url_config = gen_policy.get('downloads_per_url')
|
||||||
if downloads_per_url_config:
|
num_formats_per_url = 0 # Default to no increment
|
||||||
downloads_per_url = 1 # We just need a flag to enable the logic, the count is per-URL.
|
|
||||||
|
|
||||||
if downloads_per_url > 0:
|
if downloads_per_url_config:
|
||||||
# Increment by the number of URLs in the batch.
|
if isinstance(downloads_per_url_config, int):
|
||||||
downloads_to_increment = url_batch_len
|
num_formats_per_url = downloads_per_url_config
|
||||||
profile_manager_instance.increment_pending_downloads(profile_name, downloads_to_increment)
|
elif downloads_per_url_config == 'from_download_policy':
|
||||||
logger.info(f"[Worker {worker_id}] [{profile_name}] Preemptively incremented pending downloads by {downloads_to_increment} for the upcoming batch of {url_batch_len} URLs.")
|
# Heuristic: count comma-separated groups in the format selector.
|
||||||
|
# This mirrors how yt-dlp processes multiple format downloads.
|
||||||
|
d_policy = policy.get('download_policy', {})
|
||||||
|
formats_str = d_policy.get('formats', '')
|
||||||
|
if formats_str:
|
||||||
|
# Each comma separates a group from which one format is downloaded.
|
||||||
|
num_formats_per_url = formats_str.count(',') + 1
|
||||||
else:
|
else:
|
||||||
logger.warning(f"[Worker {worker_id}] [{profile_name}] 'downloads_per_url' is not configured. Pending downloads counter will not be incremented for this batch.")
|
num_formats_per_url = 1 # fallback to 1 if formats is empty
|
||||||
|
elif downloads_per_url_config:
|
||||||
|
# Fallback for non-int, non-'from_download_policy' truthy values
|
||||||
|
num_formats_per_url = 1
|
||||||
|
|
||||||
|
if num_formats_per_url > 0:
|
||||||
|
downloads_to_increment = url_batch_len * num_formats_per_url
|
||||||
|
profile_manager_instance.increment_pending_downloads(profile_name, downloads_to_increment)
|
||||||
|
logger.info(f"[Worker {worker_id}] [{profile_name}] Preemptively incremented pending downloads by {downloads_to_increment} for a batch of {url_batch_len} URLs ({num_formats_per_url} format(s)/URL).")
|
||||||
|
else:
|
||||||
|
logger.warning(f"[Worker {worker_id}] [{profile_name}] 'downloads_per_url' is not configured or is zero. Pending downloads counter will not be incremented for this batch.")
|
||||||
|
|
||||||
end_idx = start_idx + len(url_batch)
|
end_idx = start_idx + len(url_batch)
|
||||||
logger.info(f"[Worker {worker_id}] [{profile_name}] Processing batch of {len(url_batch)} URLs (lines {start_idx + 1}-{end_idx} from source).")
|
logger.info(f"[Worker {worker_id}] [{profile_name}] Processing batch of {len(url_batch)} URLs (lines {start_idx + 1}-{end_idx} from source).")
|
||||||
@ -480,14 +527,15 @@ def run_direct_docker_worker(worker_id, policy, state_manager, args, profile_man
|
|||||||
logger.error(f"Error during immediate post-processing from log line: {e}")
|
logger.error(f"Error during immediate post-processing from log line: {e}")
|
||||||
|
|
||||||
with activity_lock:
|
with activity_lock:
|
||||||
|
is_dummy = args.dummy or args.dummy_batch
|
||||||
if post_processed_successfully:
|
if post_processed_successfully:
|
||||||
live_success_count += 1
|
live_success_count += 1
|
||||||
logger.info(f"[Worker {worker_id}] [{profile_name}] Live success #{live_success_count} detected and post-processed.")
|
logger.info(f"[Worker {worker_id}] [{profile_name}] Live success #{live_success_count} detected and post-processed.")
|
||||||
profile_manager_instance.record_activity(profile_name, 'success')
|
profile_manager_instance.record_activity(profile_name, 'success', is_dummy=is_dummy)
|
||||||
else:
|
else:
|
||||||
live_failure_count += 1
|
live_failure_count += 1
|
||||||
logger.error(f"[Worker {worker_id}] [{profile_name}] Post-processing failed for a successful fetch. Recording as failure.")
|
logger.error(f"[Worker {worker_id}] [{profile_name}] Post-processing failed for a successful fetch. Recording as failure.")
|
||||||
profile_manager_instance.record_activity(profile_name, 'failure')
|
profile_manager_instance.record_activity(profile_name, 'failure', is_dummy=is_dummy)
|
||||||
# --- End immediate post-processing ---
|
# --- End immediate post-processing ---
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@ -495,9 +543,10 @@ def run_direct_docker_worker(worker_id, policy, state_manager, args, profile_man
|
|||||||
for pattern in fatal_error_patterns:
|
for pattern in fatal_error_patterns:
|
||||||
if re.search(pattern, line, re.IGNORECASE):
|
if re.search(pattern, line, re.IGNORECASE):
|
||||||
with activity_lock:
|
with activity_lock:
|
||||||
|
is_dummy = args.dummy or args.dummy_batch
|
||||||
live_failure_count += 1
|
live_failure_count += 1
|
||||||
logger.error(f"[Worker {worker_id}] [{profile_name}] Live FATAL error #{live_failure_count} detected from log: {line}")
|
logger.error(f"[Worker {worker_id}] [{profile_name}] Live FATAL error #{live_failure_count} detected from log: {line}")
|
||||||
profile_manager_instance.record_activity(profile_name, 'failure')
|
profile_manager_instance.record_activity(profile_name, 'failure', is_dummy=is_dummy)
|
||||||
if direct_policy.get('ban_on_fatal_error_in_batch'):
|
if direct_policy.get('ban_on_fatal_error_in_batch'):
|
||||||
logger.warning(f"Banning profile '{profile_name}' immediately due to fatal error to stop container.")
|
logger.warning(f"Banning profile '{profile_name}' immediately due to fatal error to stop container.")
|
||||||
profile_manager_instance.update_profile_state(profile_name, 'BANNED', 'Fatal error during batch')
|
profile_manager_instance.update_profile_state(profile_name, 'BANNED', 'Fatal error during batch')
|
||||||
@ -512,16 +561,18 @@ def run_direct_docker_worker(worker_id, policy, state_manager, args, profile_man
|
|||||||
for pattern in tolerated_error_patterns:
|
for pattern in tolerated_error_patterns:
|
||||||
if re.search(pattern, line, re.IGNORECASE):
|
if re.search(pattern, line, re.IGNORECASE):
|
||||||
with activity_lock:
|
with activity_lock:
|
||||||
|
is_dummy = args.dummy or args.dummy_batch
|
||||||
live_tolerated_count += 1
|
live_tolerated_count += 1
|
||||||
logger.warning(f"[Worker {worker_id}] [{profile_name}] Live TOLERATED error #{live_tolerated_count} detected from log: {line}")
|
logger.warning(f"[Worker {worker_id}] [{profile_name}] Live TOLERATED error #{live_tolerated_count} detected from log: {line}")
|
||||||
profile_manager_instance.record_activity(profile_name, 'tolerated_error')
|
profile_manager_instance.record_activity(profile_name, 'tolerated_error', is_dummy=is_dummy)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# If it's an ERROR: line and not tolerated, it's a failure
|
# If it's an ERROR: line and not tolerated, it's a failure
|
||||||
with activity_lock:
|
with activity_lock:
|
||||||
|
is_dummy = args.dummy or args.dummy_batch
|
||||||
live_failure_count += 1
|
live_failure_count += 1
|
||||||
logger.warning(f"[Worker {worker_id}] [{profile_name}] Live failure #{live_failure_count} detected from log: {line}")
|
logger.warning(f"[Worker {worker_id}] [{profile_name}] Live failure #{live_failure_count} detected from log: {line}")
|
||||||
profile_manager_instance.record_activity(profile_name, 'failure')
|
profile_manager_instance.record_activity(profile_name, 'failure', is_dummy=is_dummy)
|
||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@ -544,14 +595,14 @@ def run_direct_docker_worker(worker_id, policy, state_manager, args, profile_man
|
|||||||
rand_val = random.random()
|
rand_val = random.random()
|
||||||
if rand_val < auth_skipped_rate:
|
if rand_val < auth_skipped_rate:
|
||||||
logger.warning(f"[Worker {worker_id}] [{profile_name}] DUMMY: Simulating tolerated failure for {video_id}.")
|
logger.warning(f"[Worker {worker_id}] [{profile_name}] DUMMY: Simulating tolerated failure for {video_id}.")
|
||||||
profile_manager_instance.record_activity(profile_name, 'tolerated_error')
|
profile_manager_instance.record_activity(profile_name, 'tolerated_error', is_dummy=True)
|
||||||
elif rand_val < (auth_skipped_rate + auth_failure_rate):
|
elif rand_val < (auth_skipped_rate + auth_failure_rate):
|
||||||
logger.warning(f"[Worker {worker_id}] [{profile_name}] DUMMY: Simulating fatal failure for {video_id}.")
|
logger.warning(f"[Worker {worker_id}] [{profile_name}] DUMMY: Simulating fatal failure for {video_id}.")
|
||||||
profile_manager_instance.record_activity(profile_name, 'failure')
|
profile_manager_instance.record_activity(profile_name, 'failure', is_dummy=True)
|
||||||
else:
|
else:
|
||||||
# Success
|
# Success
|
||||||
files_created += 1
|
files_created += 1
|
||||||
profile_manager_instance.record_activity(profile_name, 'success')
|
profile_manager_instance.record_activity(profile_name, 'success', is_dummy=True)
|
||||||
|
|
||||||
# Create a dummy file in the temp output dir to simulate success
|
# Create a dummy file in the temp output dir to simulate success
|
||||||
dummy_output_path = Path(temp_task_dir_host) / f"{video_id}.info.json"
|
dummy_output_path = Path(temp_task_dir_host) / f"{video_id}.info.json"
|
||||||
@ -644,13 +695,14 @@ def run_direct_docker_worker(worker_id, policy, state_manager, args, profile_man
|
|||||||
finally:
|
finally:
|
||||||
if locked_profile and batch_started:
|
if locked_profile and batch_started:
|
||||||
# --- Reconcile pending downloads counter ---
|
# --- Reconcile pending downloads counter ---
|
||||||
if downloads_per_url > 0:
|
if downloads_to_increment > 0:
|
||||||
# We incremented by url_batch_len at the start.
|
# We incremented at the start. Now we adjust for any URLs that failed to produce an info.json.
|
||||||
# Now we adjust for any URLs that failed to produce an info.json.
|
initial_increment = downloads_to_increment
|
||||||
# The adjustment is the number of successes minus the number of attempts.
|
actual_successes = live_success_count # This is count of successful info.jsons
|
||||||
initial_increment = url_batch_len
|
|
||||||
actual_successes = live_success_count
|
expected_downloads_from_successes = actual_successes * num_formats_per_url
|
||||||
adjustment = actual_successes - initial_increment
|
|
||||||
|
adjustment = expected_downloads_from_successes - initial_increment
|
||||||
|
|
||||||
if adjustment != 0:
|
if adjustment != 0:
|
||||||
# The adjustment will be negative, effectively decrementing the counter for each failure.
|
# The adjustment will be negative, effectively decrementing the counter for each failure.
|
||||||
@ -680,10 +732,21 @@ def run_direct_docker_worker(worker_id, policy, state_manager, args, profile_man
|
|||||||
if cooldown:
|
if cooldown:
|
||||||
logger.info(f"[Worker {worker_id}] Putting profile '{locked_profile['name']}' into COOLDOWN for {cooldown}s.")
|
logger.info(f"[Worker {worker_id}] Putting profile '{locked_profile['name']}' into COOLDOWN for {cooldown}s.")
|
||||||
|
|
||||||
|
# For auth simulation, check if profile has pending downloads
|
||||||
|
# If so, don't apply cooldown to avoid conflicting with enforcer
|
||||||
|
profile_data = profile_manager_instance.get_profile(locked_profile['name'])
|
||||||
|
should_apply_cooldown = cooldown
|
||||||
|
if profile_data:
|
||||||
|
pending_downloads = profile_manager_instance.get_pending_downloads(locked_profile['name'])
|
||||||
|
rest_reason = profile_data.get('rest_reason')
|
||||||
|
if pending_downloads > 0 or rest_reason == 'waiting_downloads':
|
||||||
|
should_apply_cooldown = None
|
||||||
|
logger.info(f"[Worker {worker_id}] Auth profile '{locked_profile['name']}' has pending downloads or is waiting for downloads. Not applying cooldown.")
|
||||||
|
|
||||||
unlocked_successfully = profile_manager_instance.unlock_profile(
|
unlocked_successfully = profile_manager_instance.unlock_profile(
|
||||||
locked_profile['name'],
|
locked_profile['name'],
|
||||||
owner=owner_id,
|
owner=owner_id,
|
||||||
rest_for_seconds=cooldown
|
rest_for_seconds=should_apply_cooldown
|
||||||
)
|
)
|
||||||
if not unlocked_successfully:
|
if not unlocked_successfully:
|
||||||
logger.error(f"[Worker {worker_id}] FAILED to unlock profile '{locked_profile['name']}'. The profile may be stuck.")
|
logger.error(f"[Worker {worker_id}] FAILED to unlock profile '{locked_profile['name']}'. The profile may be stuck.")
|
||||||
@ -751,7 +814,7 @@ def run_direct_docker_download_worker(worker_id, policy, state_manager, args, pr
|
|||||||
return []
|
return []
|
||||||
|
|
||||||
no_task_streak = 0
|
no_task_streak = 0
|
||||||
last_used_profile_name = None
|
last_no_task_log_msg = ""
|
||||||
task_counter = 0
|
task_counter = 0
|
||||||
while not state_manager.shutdown_event.is_set():
|
while not state_manager.shutdown_event.is_set():
|
||||||
locked_profile = None
|
locked_profile = None
|
||||||
@ -760,6 +823,7 @@ def run_direct_docker_download_worker(worker_id, policy, state_manager, args, pr
|
|||||||
was_banned_by_parser = False
|
was_banned_by_parser = False
|
||||||
task = None
|
task = None
|
||||||
task_id = None
|
task_id = None
|
||||||
|
downloads_processed_in_task = 0
|
||||||
try:
|
try:
|
||||||
if no_task_streak > 0 and not queue_policy: # Polling only makes sense for file mode
|
if no_task_streak > 0 and not queue_policy: # Polling only makes sense for file mode
|
||||||
polling_interval = exec_control.get('worker_polling_interval_seconds', 1)
|
polling_interval = exec_control.get('worker_polling_interval_seconds', 1)
|
||||||
@ -769,9 +833,18 @@ def run_direct_docker_download_worker(worker_id, policy, state_manager, args, pr
|
|||||||
if profiles_in_prefix:
|
if profiles_in_prefix:
|
||||||
state_counts = collections.Counter(p['state'] for p in profiles_in_prefix)
|
state_counts = collections.Counter(p['state'] for p in profiles_in_prefix)
|
||||||
states_summary = ', '.join(f"{count} {state}" for state, count in sorted(state_counts.items()))
|
states_summary = ', '.join(f"{count} {state}" for state, count in sorted(state_counts.items()))
|
||||||
logger.info(f"[Worker {worker_id}] No tasks found or profiles available. Pool status ({profile_prefix or '*'}*): {states_summary}. Pausing for {polling_interval}s. (Streak: {no_task_streak})")
|
base_log_msg = f"[Worker {worker_id}] No tasks found or profiles available. Pool status ({profile_prefix or '*'}*): {states_summary}."
|
||||||
else:
|
else:
|
||||||
logger.info(f"[Worker {worker_id}] No tasks found or profiles available. No profiles found with prefix '{profile_prefix or '*'}'. Pausing for {polling_interval}s. (Streak: {no_task_streak})")
|
base_log_msg = f"[Worker {worker_id}] No tasks found or profiles available. No profiles found with prefix '{profile_prefix or '*'}'."
|
||||||
|
|
||||||
|
if base_log_msg == last_no_task_log_msg:
|
||||||
|
print(".", end="", file=sys.stdout, flush=True)
|
||||||
|
else:
|
||||||
|
if last_no_task_log_msg:
|
||||||
|
print(file=sys.stdout)
|
||||||
|
full_log_msg = f"{base_log_msg} Pausing for {polling_interval}s. (Streak: {no_task_streak})"
|
||||||
|
logger.info(full_log_msg)
|
||||||
|
last_no_task_log_msg = base_log_msg
|
||||||
# --- End diagnostic logging ---
|
# --- End diagnostic logging ---
|
||||||
time.sleep(polling_interval)
|
time.sleep(polling_interval)
|
||||||
if state_manager.shutdown_event.is_set(): continue
|
if state_manager.shutdown_event.is_set(): continue
|
||||||
@ -844,17 +917,27 @@ def run_direct_docker_download_worker(worker_id, policy, state_manager, args, pr
|
|||||||
|
|
||||||
if claimed_task_path_host:
|
if claimed_task_path_host:
|
||||||
no_task_streak = 0
|
no_task_streak = 0
|
||||||
|
if last_no_task_log_msg:
|
||||||
|
print(file=sys.stderr)
|
||||||
|
last_no_task_log_msg = ""
|
||||||
auth_profile_name, auth_env = None, None
|
auth_profile_name, auth_env = None, None
|
||||||
info_data = None
|
info_data = None
|
||||||
|
|
||||||
# --- Read info.json content and metadata first ---
|
# In queue mode, the task object is the primary source of truth for auth metadata.
|
||||||
|
# This check is conditional because in file-based mode, `task` will be None.
|
||||||
|
if task:
|
||||||
|
auth_profile_name = task.get('auth_profile_name')
|
||||||
|
auth_env = task.get('auth_env')
|
||||||
|
|
||||||
|
# --- Read info.json content and use its metadata as a fallback ---
|
||||||
try:
|
try:
|
||||||
with open(claimed_task_path_host, 'r', encoding='utf-8') as f:
|
with open(claimed_task_path_host, 'r', encoding='utf-8') as f:
|
||||||
info_data = json.load(f)
|
info_data = json.load(f)
|
||||||
# This is critical for decrementing the counter in the finally block
|
# Fallback to info.json metadata if not present in the task object.
|
||||||
|
if not auth_profile_name or not auth_env:
|
||||||
metadata = info_data.get('_ytops_metadata', {})
|
metadata = info_data.get('_ytops_metadata', {})
|
||||||
auth_profile_name = metadata.get('profile_name')
|
auth_profile_name = auth_profile_name or metadata.get('profile_name')
|
||||||
auth_env = metadata.get('auth_env')
|
auth_env = auth_env or metadata.get('auth_env')
|
||||||
except (IOError, json.JSONDecodeError) as e:
|
except (IOError, json.JSONDecodeError) as e:
|
||||||
logger.error(f"CRITICAL: Could not read or parse task file '{claimed_task_path_host.name}': {e}. This task will be skipped, but the pending downloads counter CANNOT be decremented.")
|
logger.error(f"CRITICAL: Could not read or parse task file '{claimed_task_path_host.name}': {e}. This task will be skipped, but the pending downloads counter CANNOT be decremented.")
|
||||||
continue # Skip to finally block to unlock profile
|
continue # Skip to finally block to unlock profile
|
||||||
@ -874,7 +957,12 @@ def run_direct_docker_download_worker(worker_id, policy, state_manager, args, pr
|
|||||||
source_of_format = "task file"
|
source_of_format = "task file"
|
||||||
if not format_selection:
|
if not format_selection:
|
||||||
format_selection = d_policy.get('formats', '')
|
format_selection = d_policy.get('formats', '')
|
||||||
source_of_format = "policy"
|
source_of_format = "policy (download_policy.formats)"
|
||||||
|
|
||||||
|
if not format_selection:
|
||||||
|
ytdlp_config_overrides = direct_policy.get('ytdlp_config_overrides', {})
|
||||||
|
format_selection = ytdlp_config_overrides.get('format', '')
|
||||||
|
source_of_format = "policy (ytdlp_config_overrides.format)"
|
||||||
|
|
||||||
if not format_selection:
|
if not format_selection:
|
||||||
logger.warning(f"[Worker {worker_id}] DUMMY: No format specified in task file or policy. Simulating a single download.")
|
logger.warning(f"[Worker {worker_id}] DUMMY: No format specified in task file or policy. Simulating a single download.")
|
||||||
@ -907,17 +995,18 @@ def run_direct_docker_download_worker(worker_id, policy, state_manager, args, pr
|
|||||||
details = f"Dummy skipped failure for format {format_id}"
|
details = f"Dummy skipped failure for format {format_id}"
|
||||||
error_type = "DummySkippedFailure"
|
error_type = "DummySkippedFailure"
|
||||||
is_tolerated_error = True
|
is_tolerated_error = True
|
||||||
profile_manager_instance.record_activity(profile_name, 'tolerated_error')
|
profile_manager_instance.record_activity(profile_name, 'tolerated_error', is_dummy=True)
|
||||||
elif should_fail_fatal:
|
elif should_fail_fatal:
|
||||||
logger.warning(f"[Worker {worker_id}] DUMMY: Simulating fatal download failure for format '{format_id}'.")
|
logger.warning(f"[Worker {worker_id}] DUMMY: Simulating fatal download failure for format '{format_id}'.")
|
||||||
details = f"Dummy fatal failure for format {format_id}"
|
details = f"Dummy fatal failure for format {format_id}"
|
||||||
error_type = "DummyFailure"
|
error_type = "DummyFailure"
|
||||||
profile_manager_instance.record_activity(profile_name, 'download_error')
|
profile_manager_instance.record_activity(profile_name, 'download_error', is_dummy=True)
|
||||||
else:
|
else:
|
||||||
logger.info(f"[Worker {worker_id}] DUMMY: Simulating download success for format '{format_id}'.")
|
logger.info(f"[Worker {worker_id}] DUMMY: Simulating download success for format '{format_id}'.")
|
||||||
success = True
|
success = True
|
||||||
details = f"Dummy success for format {format_id}"
|
details = f"Dummy success for format {format_id}"
|
||||||
profile_manager_instance.record_activity(profile_name, 'download')
|
profile_manager_instance.record_activity(profile_name, 'download', is_dummy=True)
|
||||||
|
downloads_processed_in_task += 1
|
||||||
|
|
||||||
event = {
|
event = {
|
||||||
'type': 'direct_docker_download',
|
'type': 'direct_docker_download',
|
||||||
@ -963,7 +1052,7 @@ def run_direct_docker_download_worker(worker_id, policy, state_manager, args, pr
|
|||||||
else:
|
else:
|
||||||
logger.warning(f"[Worker {worker_id}] [{profile_name}] Skipping task '{claimed_task_path_host.name}' because its URL is expired.")
|
logger.warning(f"[Worker {worker_id}] [{profile_name}] Skipping task '{claimed_task_path_host.name}' because its URL is expired.")
|
||||||
|
|
||||||
profile_manager_instance.record_activity(profile_name, 'tolerated_error')
|
profile_manager_instance.record_activity(profile_name, 'tolerated_error', is_dummy=(args.dummy or args.dummy_batch))
|
||||||
|
|
||||||
event = {
|
event = {
|
||||||
'type': 'direct_docker_download', 'profile': profile_name,
|
'type': 'direct_docker_download', 'profile': profile_name,
|
||||||
@ -1066,23 +1155,23 @@ def run_direct_docker_download_worker(worker_id, policy, state_manager, args, pr
|
|||||||
def log_parser_callback(line):
|
def log_parser_callback(line):
|
||||||
nonlocal live_success_count, live_failure_count, live_tolerated_count, was_banned_by_parser
|
nonlocal live_success_count, live_failure_count, live_tolerated_count, was_banned_by_parser
|
||||||
|
|
||||||
# Success is a high-priority check. Only record one success per task.
|
# Success is a high-priority check.
|
||||||
if '[download] 100% of' in line or 'has already been downloaded' in line:
|
if '[download] 100% of' in line or 'has already been downloaded' in line:
|
||||||
with activity_lock:
|
with activity_lock:
|
||||||
# Only count one success per task
|
is_dummy = args.dummy or args.dummy_batch
|
||||||
if live_success_count == 0:
|
|
||||||
live_success_count += 1
|
live_success_count += 1
|
||||||
logger.info(f"[Worker {worker_id}] [{profile_name}] Live download success detected from log.")
|
logger.info(f"[Worker {worker_id}] [{profile_name}] Live download success #{live_success_count} detected from log.")
|
||||||
profile_manager_instance.record_activity(profile_name, 'download')
|
profile_manager_instance.record_activity(profile_name, 'download', is_dummy=is_dummy)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Check for fatal patterns
|
# Check for fatal patterns
|
||||||
for pattern in fatal_error_patterns:
|
for pattern in fatal_error_patterns:
|
||||||
if re.search(pattern, line, re.IGNORECASE):
|
if re.search(pattern, line, re.IGNORECASE):
|
||||||
with activity_lock:
|
with activity_lock:
|
||||||
|
is_dummy = args.dummy or args.dummy_batch
|
||||||
live_failure_count += 1
|
live_failure_count += 1
|
||||||
logger.error(f"[Worker {worker_id}] [{profile_name}] Live FATAL download error #{live_failure_count} detected from log: {line}")
|
logger.error(f"[Worker {worker_id}] [{profile_name}] Live FATAL download error #{live_failure_count} detected from log: {line}")
|
||||||
profile_manager_instance.record_activity(profile_name, 'download_error')
|
profile_manager_instance.record_activity(profile_name, 'download_error', is_dummy=is_dummy)
|
||||||
if direct_policy.get('ban_on_fatal_error_in_batch'):
|
if direct_policy.get('ban_on_fatal_error_in_batch'):
|
||||||
logger.warning(f"Banning profile '{profile_name}' immediately due to fatal download error to stop container.")
|
logger.warning(f"Banning profile '{profile_name}' immediately due to fatal download error to stop container.")
|
||||||
profile_manager_instance.update_profile_state(profile_name, 'BANNED', 'Fatal error during download')
|
profile_manager_instance.update_profile_state(profile_name, 'BANNED', 'Fatal error during download')
|
||||||
@ -1098,16 +1187,18 @@ def run_direct_docker_download_worker(worker_id, policy, state_manager, args, pr
|
|||||||
for pattern in tolerated_error_patterns:
|
for pattern in tolerated_error_patterns:
|
||||||
if re.search(pattern, line, re.IGNORECASE):
|
if re.search(pattern, line, re.IGNORECASE):
|
||||||
with activity_lock:
|
with activity_lock:
|
||||||
|
is_dummy = args.dummy or args.dummy_batch
|
||||||
live_tolerated_count += 1
|
live_tolerated_count += 1
|
||||||
logger.warning(f"[Worker {worker_id}] [{profile_name}] Live TOLERATED download error #{live_tolerated_count} detected from log: {line}")
|
logger.warning(f"[Worker {worker_id}] [{profile_name}] Live TOLERATED download error #{live_tolerated_count} detected from log: {line}")
|
||||||
profile_manager_instance.record_activity(profile_name, 'tolerated_error')
|
profile_manager_instance.record_activity(profile_name, 'tolerated_error', is_dummy=is_dummy)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# If it's an ERROR: line and not tolerated, it's a failure
|
# If it's an ERROR: line and not tolerated, it's a failure
|
||||||
with activity_lock:
|
with activity_lock:
|
||||||
|
is_dummy = args.dummy or args.dummy_batch
|
||||||
live_failure_count += 1
|
live_failure_count += 1
|
||||||
logger.warning(f"[Worker {worker_id}] [{profile_name}] Live download failure #{live_failure_count} detected from log: {line}")
|
logger.warning(f"[Worker {worker_id}] [{profile_name}] Live download failure #{live_failure_count} detected from log: {line}")
|
||||||
profile_manager_instance.record_activity(profile_name, 'download_error')
|
profile_manager_instance.record_activity(profile_name, 'download_error', is_dummy=is_dummy)
|
||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@ -1149,11 +1240,11 @@ def run_direct_docker_download_worker(worker_id, policy, state_manager, args, pr
|
|||||||
final_outcome = "download"
|
final_outcome = "download"
|
||||||
logger.warning(f"[Worker {worker_id}] [{profile_name}] No specific success/error log line matched, but exit code is 0. Assuming success, but this may indicate a parsing issue.")
|
logger.warning(f"[Worker {worker_id}] [{profile_name}] No specific success/error log line matched, but exit code is 0. Assuming success, but this may indicate a parsing issue.")
|
||||||
# We record a success here as a fallback, in case the log parser missed it.
|
# We record a success here as a fallback, in case the log parser missed it.
|
||||||
profile_manager_instance.record_activity(profile_name, 'download')
|
profile_manager_instance.record_activity(profile_name, 'download', is_dummy=(args.dummy or args.dummy_batch))
|
||||||
else:
|
else:
|
||||||
final_outcome = "download_error"
|
final_outcome = "download_error"
|
||||||
logger.warning(f"[Worker {worker_id}] [{profile_name}] No specific error log line matched, but exit code was {retcode}. Recording a generic download_error.")
|
logger.warning(f"[Worker {worker_id}] [{profile_name}] No specific error log line matched, but exit code was {retcode}. Recording a generic download_error.")
|
||||||
profile_manager_instance.record_activity(profile_name, 'download_error')
|
profile_manager_instance.record_activity(profile_name, 'download_error', is_dummy=(args.dummy or args.dummy_batch))
|
||||||
|
|
||||||
# --- Airflow Directory Logic ---
|
# --- Airflow Directory Logic ---
|
||||||
if success and d_policy.get('output_to_airflow_ready_dir'):
|
if success and d_policy.get('output_to_airflow_ready_dir'):
|
||||||
@ -1231,6 +1322,9 @@ def run_direct_docker_download_worker(worker_id, policy, state_manager, args, pr
|
|||||||
event = { 'type': 'direct_docker_download', 'profile': profile_name, 'proxy_url': locked_profile['proxy'], 'success': success, 'details': event_details }
|
event = { 'type': 'direct_docker_download', 'profile': profile_name, 'proxy_url': locked_profile['proxy'], 'success': success, 'details': event_details }
|
||||||
state_manager.log_event(event)
|
state_manager.log_event(event)
|
||||||
|
|
||||||
|
# Store the number of detected successful downloads to use for decrementing the counter.
|
||||||
|
downloads_processed_in_task = live_success_count
|
||||||
|
|
||||||
logger.info(f"[Worker {worker_id}] [{profile_name}] Task processing complete. Worker will now unlock profile and attempt next task.")
|
logger.info(f"[Worker {worker_id}] [{profile_name}] Task processing complete. Worker will now unlock profile and attempt next task.")
|
||||||
|
|
||||||
# 6. Clean up task file
|
# 6. Clean up task file
|
||||||
@ -1274,25 +1368,34 @@ def run_direct_docker_download_worker(worker_id, policy, state_manager, args, pr
|
|||||||
auth_manager = get_auth_manager(profile_manager_instance, auth_env)
|
auth_manager = get_auth_manager(profile_manager_instance, auth_env)
|
||||||
if auth_manager:
|
if auth_manager:
|
||||||
try:
|
try:
|
||||||
auth_manager.decrement_pending_downloads(auth_profile_name)
|
# Fallback: if no downloads were counted (e.g., due to an early exit or parsing issue),
|
||||||
logger.info(f"[Worker {worker_id}] Decremented pending downloads for auth profile '{auth_profile_name}' in env '{auth_env}'.")
|
# but the task is being finalized, we must assume all potential downloads for this task
|
||||||
|
# are "processed" to prevent the auth profile from getting stuck.
|
||||||
|
if downloads_processed_in_task == 0:
|
||||||
|
logger.warning(f"[Worker {worker_id}] No downloads were counted for this task. Using policy to determine decrement count to avoid stuck profile.")
|
||||||
|
ytdlp_config_overrides = direct_policy.get('ytdlp_config_overrides', {})
|
||||||
|
formats_str = ytdlp_config_overrides.get('format', d_policy.get('formats', ''))
|
||||||
|
num_formats = formats_str.count(',') + 1 if formats_str else 1
|
||||||
|
downloads_processed_in_task = num_formats
|
||||||
|
logger.warning(f"[Worker {worker_id}] Decrementing by fallback count: {downloads_processed_in_task}")
|
||||||
|
|
||||||
|
if downloads_processed_in_task > 0:
|
||||||
|
new_count = auth_manager.increment_pending_downloads(auth_profile_name, -downloads_processed_in_task)
|
||||||
|
logger.info(f"[Worker {worker_id}] Decremented pending downloads for auth profile '{auth_profile_name}' by {downloads_processed_in_task} in env '{auth_env}'. New count: {new_count}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"[Worker {worker_id}] Failed to decrement pending downloads for auth profile '{auth_profile_name}' in env '{auth_env}': {e}", exc_info=True)
|
logger.error(f"[Worker {worker_id}] Failed to decrement pending downloads for auth profile '{auth_profile_name}' in env '{auth_env}': {e}", exc_info=True)
|
||||||
else:
|
else:
|
||||||
logger.error(f"[Worker {worker_id}] Could not get auth profile manager for env '{auth_env}'. Pending downloads counter will not be decremented.")
|
logger.error(f"[Worker {worker_id}] Could not get auth profile manager for env '{auth_env}'. Pending downloads counter will not be decremented.")
|
||||||
else:
|
else:
|
||||||
logger.warning(f"[Worker {worker_id}] Could not find auth profile name and/or auth_env in info.json metadata. Pending downloads counter will not be decremented. (Profile: {auth_profile_name}, Env: {auth_env})")
|
logger.warning(f"[Worker {worker_id}] Could not find auth profile name and/or auth_env. Pending downloads counter will not be decremented. (Profile: {auth_profile_name}, Env: {auth_env}) Task had auth_profile_name: {task.get('auth_profile_name')}, auth_env: {task.get('auth_env')}")
|
||||||
|
|
||||||
if was_banned_by_parser:
|
if was_banned_by_parser:
|
||||||
logger.info(f"[Worker {worker_id}] Profile '{locked_profile['name']}' was already banned by the log parser. Skipping unlock/cooldown.")
|
logger.info(f"[Worker {worker_id}] Profile '{locked_profile['name']}' was already banned by the log parser. Skipping unlock/cooldown.")
|
||||||
else:
|
else:
|
||||||
last_used_profile_name = locked_profile['name']
|
|
||||||
cooldown = None
|
cooldown = None
|
||||||
|
|
||||||
# Only apply cooldown if a task was actually claimed and processed.
|
# Only apply cooldown if a task was actually claimed and processed.
|
||||||
if claimed_task_path_host:
|
if claimed_task_path_host:
|
||||||
# Enforcer is the only point where we configure to apply different policies,
|
|
||||||
# since we might restart enforcer, but won't restart stress-policy working on auth and downloads simultaneously.
|
|
||||||
# This is like applying a policy across multiple workers/machines without needing to restart each of them.
|
|
||||||
# DESIGN: The cooldown duration is not configured in the worker's policy.
|
# DESIGN: The cooldown duration is not configured in the worker's policy.
|
||||||
# Instead, it is read from a central Redis key. This key is set by the
|
# Instead, it is read from a central Redis key. This key is set by the
|
||||||
# policy-enforcer, making the enforcer the single source of truth for
|
# policy-enforcer, making the enforcer the single source of truth for
|
||||||
|
|||||||
@ -46,11 +46,14 @@ def run_direct_download_worker(worker_id, policy, state_manager, args, profile_m
|
|||||||
|
|
||||||
os.makedirs(output_dir, exist_ok=True)
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
no_task_streak = 0
|
no_task_streak = 0
|
||||||
|
last_no_task_log_msg = ""
|
||||||
|
|
||||||
while not state_manager.shutdown_event.is_set():
|
while not state_manager.shutdown_event.is_set():
|
||||||
locked_profile = None
|
locked_profile = None
|
||||||
claimed_task_path = None
|
claimed_task_path = None
|
||||||
auth_profile_name, auth_env = None, None # For finally block
|
auth_profile_name, auth_env = None, None # For finally block
|
||||||
|
downloads_to_decrement = 0
|
||||||
|
formats_from_metadata, granularity_from_metadata = [], 'per_format'
|
||||||
try:
|
try:
|
||||||
# 0. If no tasks were found, pause briefly.
|
# 0. If no tasks were found, pause briefly.
|
||||||
if no_task_streak > 0:
|
if no_task_streak > 0:
|
||||||
@ -61,9 +64,19 @@ def run_direct_download_worker(worker_id, policy, state_manager, args, profile_m
|
|||||||
if profiles_in_prefix:
|
if profiles_in_prefix:
|
||||||
state_counts = collections.Counter(p['state'] for p in profiles_in_prefix)
|
state_counts = collections.Counter(p['state'] for p in profiles_in_prefix)
|
||||||
states_summary = ', '.join(f"{count} {state}" for state, count in sorted(state_counts.items()))
|
states_summary = ', '.join(f"{count} {state}" for state, count in sorted(state_counts.items()))
|
||||||
logger.info(f"[Worker {worker_id}] No tasks found for available profiles. Pool status ({profile_prefix or '*'}*): {states_summary}. Pausing for {polling_interval}s. (Streak: {no_task_streak})")
|
base_log_msg = f"[Worker {worker_id}] No tasks found for available profiles. Pool status ({profile_prefix or '*'}*): {states_summary}. Pausing for {polling_interval}s."
|
||||||
else:
|
else:
|
||||||
logger.info(f"[Worker {worker_id}] No tasks found for available profiles. No profiles found with prefix '{profile_prefix or '*'}'. Pausing for {polling_interval}s. (Streak: {no_task_streak})")
|
base_log_msg = f"[Worker {worker_id}] No tasks found for available profiles. No profiles found with prefix '{profile_prefix or '*'}'. Pausing for {polling_interval}s."
|
||||||
|
|
||||||
|
if base_log_msg == last_no_task_log_msg:
|
||||||
|
print(".", end="", file=sys.stderr, flush=True)
|
||||||
|
else:
|
||||||
|
if last_no_task_log_msg:
|
||||||
|
print(file=sys.stderr) # Newline to clean up after dots
|
||||||
|
|
||||||
|
full_log_msg = f"{base_log_msg} (Streak: {no_task_streak})"
|
||||||
|
logger.info(full_log_msg)
|
||||||
|
last_no_task_log_msg = base_log_msg
|
||||||
# --- End diagnostic logging ---
|
# --- End diagnostic logging ---
|
||||||
time.sleep(polling_interval)
|
time.sleep(polling_interval)
|
||||||
if state_manager.shutdown_event.is_set(): continue
|
if state_manager.shutdown_event.is_set(): continue
|
||||||
@ -83,6 +96,9 @@ def run_direct_download_worker(worker_id, policy, state_manager, args, profile_m
|
|||||||
|
|
||||||
if claimed_task_path:
|
if claimed_task_path:
|
||||||
no_task_streak = 0 # Reset streak
|
no_task_streak = 0 # Reset streak
|
||||||
|
if last_no_task_log_msg:
|
||||||
|
print(file=sys.stderr) # Newline to clean up after dots
|
||||||
|
last_no_task_log_msg = ""
|
||||||
|
|
||||||
# --- Read metadata before processing/deleting file ---
|
# --- Read metadata before processing/deleting file ---
|
||||||
try:
|
try:
|
||||||
@ -91,6 +107,18 @@ def run_direct_download_worker(worker_id, policy, state_manager, args, profile_m
|
|||||||
metadata = info_data.get('_ytops_metadata', {})
|
metadata = info_data.get('_ytops_metadata', {})
|
||||||
auth_profile_name = metadata.get('profile_name')
|
auth_profile_name = metadata.get('profile_name')
|
||||||
auth_env = metadata.get('auth_env')
|
auth_env = metadata.get('auth_env')
|
||||||
|
# Use .get() without a default to distinguish "not present" (None) from "present and empty" ([]).
|
||||||
|
raw_formats_from_metadata = metadata.get('formats_requested')
|
||||||
|
granularity_from_metadata = metadata.get('download_task_granularity', 'per_format')
|
||||||
|
|
||||||
|
# Normalize to a list for simulation logic, while preserving the original raw state for decrement logic.
|
||||||
|
formats_from_metadata = raw_formats_from_metadata
|
||||||
|
if formats_from_metadata is None:
|
||||||
|
formats_from_metadata = []
|
||||||
|
elif isinstance(formats_from_metadata, str):
|
||||||
|
formats_from_metadata = [f.strip() for f in formats_from_metadata.split(',')]
|
||||||
|
elif not isinstance(formats_from_metadata, list):
|
||||||
|
formats_from_metadata = []
|
||||||
except (IOError, json.JSONDecodeError) as e:
|
except (IOError, json.JSONDecodeError) as e:
|
||||||
logger.error(f"CRITICAL: Could not read or parse task file '{claimed_task_path.name}': {e}. This task will be skipped, but the pending downloads counter CANNOT be decremented.")
|
logger.error(f"CRITICAL: Could not read or parse task file '{claimed_task_path.name}': {e}. This task will be skipped, but the pending downloads counter CANNOT be decremented.")
|
||||||
continue # Skip to finally block to unlock profile
|
continue # Skip to finally block to unlock profile
|
||||||
@ -152,10 +180,43 @@ def run_direct_download_worker(worker_id, policy, state_manager, args, profile_m
|
|||||||
|
|
||||||
logger.info(f"[Worker {worker_id}] [{profile_name}] Processing task '{claimed_task_path.name}'...")
|
logger.info(f"[Worker {worker_id}] [{profile_name}] Processing task '{claimed_task_path.name}'...")
|
||||||
if args.dummy or args.dummy_batch:
|
if args.dummy or args.dummy_batch:
|
||||||
logger.info(f"========== [Worker {worker_id}] BEGIN DUMMY DIRECT DOWNLOAD ==========")
|
logger.info(f"========== [Worker {worker_id}] BEGIN DUMMY DOCKER DOWNLOAD PER-FORMAT SIMULATION ==========")
|
||||||
logger.info(f"[Worker {worker_id}] Profile: {profile_name} | Task: {claimed_task_path.name}")
|
logger.info(f"[Worker {worker_id}] Profile: {profile_name} | Task: {claimed_task_path.name}")
|
||||||
logger.info(f"[Worker {worker_id}] Would run command: {' '.join(shlex.quote(s) for s in cmd)}")
|
|
||||||
logger.info(f"[Worker {worker_id}] With environment: {custom_env}")
|
formats_to_simulate = []
|
||||||
|
# Prioritize formats from the info.json metadata if the key was present
|
||||||
|
if raw_formats_from_metadata is not None:
|
||||||
|
formats_to_simulate = formats_from_metadata
|
||||||
|
logger.info(f"[Worker {worker_id}] DUMMY: Simulating download for {len(formats_to_simulate)} formats from info.json: {formats_to_simulate}")
|
||||||
|
else:
|
||||||
|
# Fallback to policy file if metadata key is absent
|
||||||
|
formats_config = d_policy.get('formats')
|
||||||
|
if isinstance(formats_config, str):
|
||||||
|
formats_to_simulate = [f.strip() for f in formats_config.split(',')]
|
||||||
|
elif isinstance(formats_config, list):
|
||||||
|
formats_to_simulate = formats_config
|
||||||
|
|
||||||
|
if not formats_to_simulate and raw_formats_from_metadata is None:
|
||||||
|
logger.info(f"[Worker {worker_id}] DUMMY: No format specified in task file or policy. Simulating a single download for backward compatibility.")
|
||||||
|
formats_to_simulate = ['dummy_format']
|
||||||
|
# For counting purposes, we should still decrement appropriately
|
||||||
|
if granularity_from_metadata == 'per_url':
|
||||||
|
downloads_to_decrement = 1
|
||||||
|
else:
|
||||||
|
downloads_to_decrement = 1
|
||||||
|
|
||||||
|
# If granularity was 'per_url', we only decrement by 1, even if multiple
|
||||||
|
# format selectors were passed (e.g., as one string).
|
||||||
|
if granularity_from_metadata == 'per_url':
|
||||||
|
downloads_to_decrement = 1
|
||||||
|
else: # per_format or not specified
|
||||||
|
if raw_formats_from_metadata is not None:
|
||||||
|
# If key was present, trust the count from metadata. Can be 0.
|
||||||
|
downloads_to_decrement = len(formats_from_metadata)
|
||||||
|
else:
|
||||||
|
# Key was absent. Decrement by the number of formats we are simulating
|
||||||
|
# from policy, or 1 as a final fallback for old files.
|
||||||
|
downloads_to_decrement = len(formats_to_simulate) if formats_to_simulate else 1
|
||||||
|
|
||||||
dummy_settings = policy.get('settings', {}).get('dummy_simulation_settings', {})
|
dummy_settings = policy.get('settings', {}).get('dummy_simulation_settings', {})
|
||||||
min_seconds = dummy_settings.get('download_min_seconds', 0.5)
|
min_seconds = dummy_settings.get('download_min_seconds', 0.5)
|
||||||
@ -163,46 +224,39 @@ def run_direct_download_worker(worker_id, policy, state_manager, args, profile_m
|
|||||||
failure_rate = dummy_settings.get('download_failure_rate', 0.0)
|
failure_rate = dummy_settings.get('download_failure_rate', 0.0)
|
||||||
skipped_rate = dummy_settings.get('download_skipped_failure_rate', 0.0)
|
skipped_rate = dummy_settings.get('download_skipped_failure_rate', 0.0)
|
||||||
|
|
||||||
time.sleep(random.uniform(min_seconds, max_seconds))
|
for i, format_id in enumerate(formats_to_simulate):
|
||||||
|
logger.info(f"[Worker {worker_id}] DUMMY: Simulating download for format '{format_id}'...")
|
||||||
|
sim_duration = random.uniform(min_seconds, max_seconds)
|
||||||
|
logger.info(f"[Worker {worker_id}] DUMMY: Simulating download for {sim_duration:.2f}s (from policy range {min_seconds}-{max_seconds}s).")
|
||||||
|
time.sleep(sim_duration)
|
||||||
|
|
||||||
rand_val = random.random()
|
rand_val = random.random()
|
||||||
should_fail_skipped = rand_val < skipped_rate
|
is_skipped = rand_val < skipped_rate
|
||||||
should_fail_fatal = not should_fail_skipped and rand_val < (skipped_rate + failure_rate)
|
is_fatal = not is_skipped and rand_val < (skipped_rate + failure_rate)
|
||||||
|
|
||||||
success = False
|
if is_skipped:
|
||||||
details = ""
|
logger.warning(f"[Worker {worker_id}] DUMMY: Simulating skipped download for format '{format_id}'.")
|
||||||
error_type = None
|
profile_manager_instance.record_activity(profile_name, 'tolerated_error', is_dummy=True)
|
||||||
is_tolerated_error = False
|
elif is_fatal:
|
||||||
|
logger.error(f"[Worker {worker_id}] DUMMY: Simulating fatal download failure for format '{format_id}'.")
|
||||||
|
profile_manager_instance.record_activity(profile_name, 'download_error', is_dummy=True)
|
||||||
|
else: # success
|
||||||
|
logger.info(f"[Worker {worker_id}] DUMMY: Simulating download success for format '{format_id}'.")
|
||||||
|
profile_manager_instance.record_activity(profile_name, 'download', is_dummy=True)
|
||||||
|
|
||||||
if should_fail_skipped:
|
logger.info(f"========== [Worker {worker_id}] END DUMMY DOCKER DOWNLOAD SIMULATION ==========")
|
||||||
logger.warning(f"[Worker {worker_id}] [{profile_name}] DUMMY: Simulating skipped download failure for task '{claimed_task_path.name}'.")
|
|
||||||
details = "Dummy skipped failure"
|
|
||||||
error_type = "DummySkippedFailure"
|
|
||||||
is_tolerated_error = True
|
|
||||||
profile_manager_instance.record_activity(profile_name, 'tolerated_error')
|
|
||||||
elif should_fail_fatal:
|
|
||||||
logger.warning(f"[Worker {worker_id}] [{profile_name}] DUMMY: Simulating fatal download failure for task '{claimed_task_path.name}'.")
|
|
||||||
details = "Dummy fatal failure"
|
|
||||||
error_type = "DummyFailure"
|
|
||||||
profile_manager_instance.record_activity(profile_name, 'download_error')
|
|
||||||
else:
|
else:
|
||||||
logger.info(f"[Worker {worker_id}] [{profile_name}] DUMMY: Simulating download success for task '{claimed_task_path.name}'.")
|
# Determine downloads_to_decrement based on metadata
|
||||||
success = True
|
if granularity_from_metadata == 'per_url':
|
||||||
details = "Dummy success"
|
downloads_to_decrement = 1
|
||||||
profile_manager_instance.record_activity(profile_name, 'download')
|
else: # per_format or not specified
|
||||||
|
if raw_formats_from_metadata is not None:
|
||||||
event = {
|
# If key was present, trust the count. It can be 0.
|
||||||
'type': 'direct_download',
|
downloads_to_decrement = len(formats_from_metadata)
|
||||||
'profile': profile_name,
|
|
||||||
'proxy_url': locked_profile['proxy'],
|
|
||||||
'success': success,
|
|
||||||
'details': details,
|
|
||||||
'error_type': error_type,
|
|
||||||
'is_tolerated_error': is_tolerated_error
|
|
||||||
}
|
|
||||||
state_manager.log_event(event)
|
|
||||||
logger.info(f"========== [Worker {worker_id}] END DUMMY DIRECT DOWNLOAD ==========")
|
|
||||||
else:
|
else:
|
||||||
|
# Key was absent, fall back to 1 for old info.jsons
|
||||||
|
downloads_to_decrement = 1
|
||||||
|
|
||||||
# --- Real execution ---
|
# --- Real execution ---
|
||||||
logger.info(f"[Worker {worker_id}] [{profile_name}] Running command: {' '.join(shlex.quote(s) for s in cmd)}")
|
logger.info(f"[Worker {worker_id}] [{profile_name}] Running command: {' '.join(shlex.quote(s) for s in cmd)}")
|
||||||
retcode, stdout, stderr = run_command(
|
retcode, stdout, stderr = run_command(
|
||||||
@ -268,7 +322,13 @@ def run_direct_download_worker(worker_id, policy, state_manager, args, profile_m
|
|||||||
if claimed_task_path and auth_profile_name and auth_env:
|
if claimed_task_path and auth_profile_name and auth_env:
|
||||||
auth_manager = get_auth_manager(profile_manager_instance, auth_env)
|
auth_manager = get_auth_manager(profile_manager_instance, auth_env)
|
||||||
if auth_manager:
|
if auth_manager:
|
||||||
auth_manager.decrement_pending_downloads(auth_profile_name)
|
if downloads_to_decrement > 0:
|
||||||
|
auth_manager.increment_pending_downloads(auth_profile_name, count=-downloads_to_decrement)
|
||||||
|
elif downloads_to_decrement == 0:
|
||||||
|
logger.warning(f"[Worker {worker_id}] `downloads_to_decrement` was zero. Pending downloads counter for '{auth_profile_name}' will not be changed.")
|
||||||
|
else:
|
||||||
|
# This shouldn't happen, but log it
|
||||||
|
logger.error(f"[Worker {worker_id}] `downloads_to_decrement` was negative: {downloads_to_decrement}")
|
||||||
else:
|
else:
|
||||||
logger.error(f"Could not get auth profile manager for env '{auth_env}'. Pending downloads counter will not be decremented.")
|
logger.error(f"Could not get auth profile manager for env '{auth_env}'. Pending downloads counter will not be decremented.")
|
||||||
elif claimed_task_path:
|
elif claimed_task_path:
|
||||||
|
|||||||
@ -20,6 +20,7 @@ from copy import deepcopy
|
|||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Dict, List, Optional, Any, Tuple, Union
|
from typing import Dict, List, Optional, Any, Tuple, Union
|
||||||
|
from urllib.parse import urlparse, parse_qs
|
||||||
|
|
||||||
from . import utils as sp_utils
|
from . import utils as sp_utils
|
||||||
from .process_runners import run_command, run_docker_container, get_worker_id
|
from .process_runners import run_command, run_docker_container, get_worker_id
|
||||||
@ -36,6 +37,7 @@ def run_queue_auth_worker(worker_id, policy, state_manager, args, profile_manage
|
|||||||
exec_control = policy.get('execution_control', {})
|
exec_control = policy.get('execution_control', {})
|
||||||
gen_policy = policy.get('info_json_generation_policy', {})
|
gen_policy = policy.get('info_json_generation_policy', {})
|
||||||
queue_policy = policy.get('queue_policy', {})
|
queue_policy = policy.get('queue_policy', {})
|
||||||
|
task_granularity = gen_policy.get('download_task_granularity', 'per_format')
|
||||||
|
|
||||||
profile_prefix = gen_policy.get('profile_prefix')
|
profile_prefix = gen_policy.get('profile_prefix')
|
||||||
if not profile_prefix:
|
if not profile_prefix:
|
||||||
@ -54,35 +56,58 @@ def run_queue_auth_worker(worker_id, policy, state_manager, args, profile_manage
|
|||||||
batch_size = queue_policy.get('batch_size', 1)
|
batch_size = queue_policy.get('batch_size', 1)
|
||||||
logger.info(f"[Worker {worker_id}] Auth worker configured to process tasks in batches of {batch_size}.")
|
logger.info(f"[Worker {worker_id}] Auth worker configured to process tasks in batches of {batch_size}.")
|
||||||
task_counter = 0
|
task_counter = 0
|
||||||
|
last_no_task_log_msg = ""
|
||||||
|
|
||||||
while not state_manager.shutdown_event.is_set():
|
while not state_manager.shutdown_event.is_set():
|
||||||
locked_profile = None
|
locked_profile = None
|
||||||
tasks = []
|
tasks = []
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# 1. Lock a profile FIRST
|
# 1. Get a batch of tasks from the queue FIRST
|
||||||
locked_profile = profile_manager_instance.lock_profile(owner=owner_id, profile_prefix=profile_prefix)
|
tasks = state_manager.get_auth_tasks_batch(batch_size)
|
||||||
|
if not tasks:
|
||||||
|
polling_interval = exec_control.get('worker_polling_interval_seconds', 1)
|
||||||
|
base_log_msg = f"[Worker {worker_id}] No tasks available in queue, polling."
|
||||||
|
if base_log_msg == last_no_task_log_msg:
|
||||||
|
print(".", end="", file=sys.stdout, flush=True)
|
||||||
|
else:
|
||||||
|
if last_no_task_log_msg:
|
||||||
|
print(file=sys.stdout)
|
||||||
|
logger.info(base_log_msg)
|
||||||
|
last_no_task_log_msg = base_log_msg
|
||||||
|
time.sleep(polling_interval)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if last_no_task_log_msg:
|
||||||
|
print(file=sys.stderr)
|
||||||
|
last_no_task_log_msg = ""
|
||||||
|
|
||||||
|
# 2. Lock a profile, trying any of the specified prefixes
|
||||||
|
locked_profile = None
|
||||||
|
prefixes_to_try = []
|
||||||
|
if profile_prefix:
|
||||||
|
prefixes_to_try = [p.strip() for p in profile_prefix.split(',') if p.strip()]
|
||||||
|
|
||||||
|
if prefixes_to_try:
|
||||||
|
random.shuffle(prefixes_to_try)
|
||||||
|
for prefix in prefixes_to_try:
|
||||||
|
locked_profile = profile_manager_instance.lock_profile(owner=owner_id, profile_prefix=prefix)
|
||||||
|
if locked_profile:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
# Fallback for empty/no prefix, which means lock any available profile
|
||||||
|
locked_profile = profile_manager_instance.lock_profile(owner=owner_id, profile_prefix=None)
|
||||||
|
|
||||||
if not locked_profile:
|
if not locked_profile:
|
||||||
polling_interval = exec_control.get('worker_polling_interval_seconds', 1)
|
polling_interval = exec_control.get('worker_polling_interval_seconds', 1)
|
||||||
logger.debug(f"[Worker {worker_id}] No profiles available to lock. Sleeping for {polling_interval}s.")
|
logger.warning(f"[Worker {worker_id}] No profiles available for {len(tasks)} task(s). Re-queueing and sleeping for {polling_interval}s.")
|
||||||
|
state_manager.add_auth_tasks_batch(tasks)
|
||||||
time.sleep(polling_interval)
|
time.sleep(polling_interval)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
profile_name = locked_profile['name']
|
profile_name = locked_profile['name']
|
||||||
proxy_url = locked_profile['proxy']
|
proxy_url = locked_profile['proxy']
|
||||||
|
|
||||||
# 2. Get a batch of tasks from the queue
|
|
||||||
tasks = state_manager.get_auth_tasks_batch(batch_size)
|
|
||||||
if not tasks:
|
|
||||||
polling_interval = exec_control.get('worker_polling_interval_seconds', 1)
|
|
||||||
logger.debug(f"[Worker {worker_id}] No tasks available for profile '{profile_name}'. Unlocking and sleeping for {polling_interval}s.")
|
|
||||||
# Unlock immediately since we have no work to do.
|
|
||||||
# No cooldown is applied here to make the profile available again quickly.
|
|
||||||
profile_manager_instance.unlock_profile(profile_name, owner=owner_id)
|
|
||||||
locked_profile = None # To prevent double-unlock in finally
|
|
||||||
time.sleep(polling_interval)
|
|
||||||
continue
|
|
||||||
|
|
||||||
logger.info(f"[Worker {worker_id}] Locked profile '{profile_name}' to process a batch of {len(tasks)} tasks.")
|
logger.info(f"[Worker {worker_id}] Locked profile '{profile_name}' to process a batch of {len(tasks)} tasks.")
|
||||||
|
|
||||||
# 3. Process each task in the batch
|
# 3. Process each task in the batch
|
||||||
@ -129,13 +154,34 @@ def run_queue_auth_worker(worker_id, policy, state_manager, args, profile_manage
|
|||||||
stderr = "Dummy fatal failure"
|
stderr = "Dummy fatal failure"
|
||||||
else:
|
else:
|
||||||
success = True
|
success = True
|
||||||
video_id = sp_utils.get_video_id(url) or f"dummy_{random.randint(1000, 9999)}"
|
# In dummy mode, robustly extract ID from URL to avoid 'unknown_video_id'
|
||||||
|
try:
|
||||||
|
# First, try the robust library function
|
||||||
|
video_id = sp_utils.get_video_id(url)
|
||||||
|
if not video_id and url:
|
||||||
|
# Fallback parsing for dummy URLs like "...?v=dummy_0099"
|
||||||
|
parsed_url = urlparse(url)
|
||||||
|
video_id = parse_qs(parsed_url.query).get('v', [None])[0]
|
||||||
|
|
||||||
|
# Additional fallback for URLs like "youtube.com/watch?v=dummy_0028"
|
||||||
|
if not video_id and 'dummy_' in url:
|
||||||
|
dummy_match = re.search(r'dummy_\d+', url)
|
||||||
|
if dummy_match:
|
||||||
|
video_id = dummy_match.group(0)
|
||||||
|
except Exception:
|
||||||
|
video_id = None # Ensure video_id is None on parsing failure
|
||||||
|
|
||||||
|
# If all extraction methods fail, use the task_id as a reliable fallback.
|
||||||
|
if not video_id:
|
||||||
|
logger.debug(f"Could not extract video ID from URL '{url}', using task ID '{task_id}' for dummy data.")
|
||||||
|
video_id = task_id
|
||||||
|
|
||||||
info_data = {'id': video_id, 'title': f'Dummy Video {video_id}', '_dummy': True, 'formats': [{'format_id': '18'}, {'format_id': '140'}]}
|
info_data = {'id': video_id, 'title': f'Dummy Video {video_id}', '_dummy': True, 'formats': [{'format_id': '18'}, {'format_id': '140'}]}
|
||||||
else:
|
else:
|
||||||
client, req_params = state_manager.get_client_for_request(profile_name, gen_policy)
|
client, req_params = state_manager.get_client_for_request(profile_name, gen_policy)
|
||||||
cmd = [
|
cmd = [
|
||||||
sys.executable, '-m', 'ytops_client.cli', 'get-info',
|
sys.executable, '-m', 'ytops_client.cli', 'get-info',
|
||||||
'--client', client, '--profile', profile_name
|
'--client', client or '', '--profile', profile_name
|
||||||
]
|
]
|
||||||
if proxy_url:
|
if proxy_url:
|
||||||
cmd.extend(['--proxy', proxy_url])
|
cmd.extend(['--proxy', proxy_url])
|
||||||
@ -173,61 +219,143 @@ def run_queue_auth_worker(worker_id, policy, state_manager, args, profile_manage
|
|||||||
if success and info_data:
|
if success and info_data:
|
||||||
try:
|
try:
|
||||||
auth_env_name = profile_manager_instance.key_prefix.replace('_profile_mgmt_', '')
|
auth_env_name = profile_manager_instance.key_prefix.replace('_profile_mgmt_', '')
|
||||||
info_data['_ytops_metadata'] = {
|
is_dummy_run = args.dummy or args.dummy_batch
|
||||||
|
|
||||||
|
# --- 1. Determine which formats will be requested ---
|
||||||
|
formats_to_download = queue_policy.get('formats_to_download')
|
||||||
|
requested_formats_list = []
|
||||||
|
if formats_to_download:
|
||||||
|
if task_granularity == 'per_url':
|
||||||
|
format_selector = formats_to_download
|
||||||
|
if isinstance(formats_to_download, list):
|
||||||
|
format_selector = ",".join(formats_to_download)
|
||||||
|
requested_formats_list = [format_selector]
|
||||||
|
else: # per_format
|
||||||
|
formats_source = formats_to_download
|
||||||
|
if formats_source == 'from_download_policy':
|
||||||
|
formats_source = policy.get('download_policy', {}).get('formats')
|
||||||
|
|
||||||
|
if formats_source == 'all':
|
||||||
|
requested_formats_list = [f['format_id'] for f in info_data.get('formats', [])]
|
||||||
|
elif isinstance(formats_source, list):
|
||||||
|
requested_formats_list = formats_source
|
||||||
|
elif isinstance(formats_source, str):
|
||||||
|
# A comma-separated string of format groups
|
||||||
|
requested_formats_list = [f.strip() for f in formats_source.split(',')]
|
||||||
|
elif formats_source:
|
||||||
|
# Handles integer format IDs or other non-string/list values
|
||||||
|
requested_formats_list = [str(formats_source)]
|
||||||
|
else:
|
||||||
|
# formats_source is None or empty
|
||||||
|
requested_formats_list = []
|
||||||
|
|
||||||
|
# --- 2. Create download tasks and save info.json(s) based on granularity ---
|
||||||
|
download_tasks_to_create = []
|
||||||
|
created_file_paths = []
|
||||||
|
|
||||||
|
if task_granularity == 'per_format' and requested_formats_list:
|
||||||
|
for i, format_id in enumerate(requested_formats_list):
|
||||||
|
# Deepcopy to avoid modifying the original info_data in the loop
|
||||||
|
task_info_data = deepcopy(info_data)
|
||||||
|
|
||||||
|
metadata = {
|
||||||
'profile_name': profile_name, 'proxy_url': proxy_url,
|
'profile_name': profile_name, 'proxy_url': proxy_url,
|
||||||
'generation_timestamp_utc': datetime.now(timezone.utc).isoformat(),
|
'generation_timestamp_utc': datetime.now(timezone.utc).isoformat(),
|
||||||
'task_id': task_id, 'url': url, 'auth_env': auth_env_name
|
'task_id': task_id, 'url': url, 'auth_env': auth_env_name,
|
||||||
|
'formats_requested': [format_id], # This task is for ONE format
|
||||||
|
'download_task_granularity': task_granularity
|
||||||
}
|
}
|
||||||
|
if is_dummy_run: metadata['_dummy'] = True
|
||||||
|
task_info_data['_ytops_metadata'] = metadata
|
||||||
|
|
||||||
final_path = None
|
final_path = None
|
||||||
if save_dir:
|
if save_dir:
|
||||||
video_id = info_data.get('id', 'unknown')
|
video_id = task_info_data.get('id') or task_id or 'unknown_video_id'
|
||||||
sanitized_proxy = re.sub(r'[:/]', '_', proxy_url) if proxy_url else 'noproxy'
|
sanitized_proxy = re.sub(r'[:/]', '_', proxy_url) if proxy_url else 'noproxy'
|
||||||
new_name = f"{video_id}-{profile_name}-{sanitized_proxy}.json"
|
# Sanitize format_id for use in filename
|
||||||
|
sanitized_format = re.sub(r'[^a-zA-Z0-9_-]', '_', format_id)
|
||||||
|
# Add index `i` to guarantee uniqueness even if sanitized format IDs clash
|
||||||
|
new_name = f"{video_id}-{profile_name}-{sanitized_proxy}-fmt{i}_{sanitized_format}.info.json"
|
||||||
|
final_path = os.path.join(save_dir, new_name)
|
||||||
|
with open(final_path, 'w', encoding='utf-8') as f:
|
||||||
|
json.dump(task_info_data, f, indent=2)
|
||||||
|
logger.info(f"[Worker {worker_id}] [{profile_name}] Saved format-specific info.json to '{final_path}'")
|
||||||
|
created_file_paths.append(final_path)
|
||||||
|
|
||||||
|
download_tasks_to_create.append({
|
||||||
|
'info_json_path': final_path, 'format_id': format_id,
|
||||||
|
'video_id': task_info_data.get('id'), 'url': url,
|
||||||
|
'auth_profile_name': profile_name, 'proxy_url': proxy_url,
|
||||||
|
'auth_env': auth_env_name,
|
||||||
|
'original_task': task
|
||||||
|
})
|
||||||
|
else: # per_url or default (including per_format with no formats)
|
||||||
|
metadata = {
|
||||||
|
'profile_name': profile_name, 'proxy_url': proxy_url,
|
||||||
|
'generation_timestamp_utc': datetime.now(timezone.utc).isoformat(),
|
||||||
|
'task_id': task_id, 'url': url, 'auth_env': auth_env_name,
|
||||||
|
'formats_requested': requested_formats_list,
|
||||||
|
'download_task_granularity': task_granularity
|
||||||
|
}
|
||||||
|
if is_dummy_run: metadata['_dummy'] = True
|
||||||
|
info_data['_ytops_metadata'] = metadata
|
||||||
|
|
||||||
|
final_path = None
|
||||||
|
if save_dir:
|
||||||
|
video_id = info_data.get('id') or task_id or 'unknown_video_id'
|
||||||
|
sanitized_proxy = re.sub(r'[:/]', '_', proxy_url) if proxy_url else 'noproxy'
|
||||||
|
new_name = f"{video_id}-{profile_name}-{sanitized_proxy}.info.json"
|
||||||
final_path = os.path.join(save_dir, new_name)
|
final_path = os.path.join(save_dir, new_name)
|
||||||
with open(final_path, 'w', encoding='utf-8') as f:
|
with open(final_path, 'w', encoding='utf-8') as f:
|
||||||
json.dump(info_data, f, indent=2)
|
json.dump(info_data, f, indent=2)
|
||||||
logger.info(f"[Worker {worker_id}] [{profile_name}] Saved info.json to '{final_path}'")
|
logger.info(f"[Worker {worker_id}] [{profile_name}] Saved info.json to '{final_path}'")
|
||||||
else:
|
created_file_paths.append(final_path)
|
||||||
# This case means auth-only (no downloads) and no save_dir specified.
|
|
||||||
# The info.json is not persisted.
|
|
||||||
logger.debug(f"[Worker {worker_id}] [{profile_name}] Auth-only task succeeded. No save_dir, so info.json is not saved.")
|
|
||||||
|
|
||||||
profile_manager_instance.record_activity(profile_name, 'success')
|
# For per_url, requested_formats_list should contain a single format selector string.
|
||||||
|
# The loop will run once, creating one task.
|
||||||
formats_to_download = queue_policy.get('formats_to_download')
|
if requested_formats_list:
|
||||||
download_tasks = []
|
for format_selector in requested_formats_list:
|
||||||
if formats_to_download:
|
download_tasks_to_create.append({
|
||||||
task_formats = []
|
'info_json_path': final_path, 'format_id': format_selector,
|
||||||
if formats_to_download == 'all':
|
|
||||||
task_formats = [f['format_id'] for f in info_data.get('formats', [])]
|
|
||||||
elif isinstance(formats_to_download, list):
|
|
||||||
task_formats = formats_to_download
|
|
||||||
else:
|
|
||||||
task_formats = [str(formats_to_download)]
|
|
||||||
|
|
||||||
for format_id in task_formats:
|
|
||||||
download_tasks.append({
|
|
||||||
'info_json_path': final_path, 'format_id': format_id,
|
|
||||||
'video_id': info_data.get('id'), 'url': url,
|
'video_id': info_data.get('id'), 'url': url,
|
||||||
'auth_profile_name': profile_name, 'proxy_url': proxy_url,
|
'auth_profile_name': profile_name, 'proxy_url': proxy_url,
|
||||||
'auth_env': auth_env_name,
|
'auth_env': auth_env_name,
|
||||||
'original_task': task
|
'original_task': task
|
||||||
})
|
})
|
||||||
|
else: # Handle the case where requested_formats_list is empty
|
||||||
|
logger.debug(f"[Worker {worker_id}] [{profile_name}] No formats requested, no download tasks created.")
|
||||||
|
|
||||||
if download_tasks:
|
|
||||||
added_count = state_manager.add_download_tasks_batch(download_tasks)
|
profile_manager_instance.record_activity(profile_name, 'success', is_dummy=(args.dummy or args.dummy_batch))
|
||||||
|
|
||||||
|
# --- 3. Dispatch download tasks ---
|
||||||
|
create_download_tasks = queue_policy.get('create_download_tasks', True)
|
||||||
|
if download_tasks_to_create:
|
||||||
|
num_tasks_to_process = 0
|
||||||
|
if create_download_tasks:
|
||||||
|
added_count = state_manager.add_download_tasks_batch(download_tasks_to_create)
|
||||||
logger.info(f"[Worker {worker_id}] [{profile_name}] Added {added_count} download tasks to queue")
|
logger.info(f"[Worker {worker_id}] [{profile_name}] Added {added_count} download tasks to queue")
|
||||||
profile_manager_instance.increment_pending_downloads(profile_name, count=added_count)
|
num_tasks_to_process = added_count
|
||||||
|
else:
|
||||||
|
num_tasks = len(download_tasks_to_create)
|
||||||
|
logger.info(f"[Worker {worker_id}] [{profile_name}] File-based workflow: created {num_tasks} tasks (no queue tasks created)")
|
||||||
|
num_tasks_to_process = num_tasks
|
||||||
|
|
||||||
state_manager.report_auth_success(task_id, {
|
if num_tasks_to_process > 0:
|
||||||
|
profile_manager_instance.increment_pending_downloads(profile_name, count=num_tasks_to_process)
|
||||||
|
profile_manager_instance.record_predicted_downloads(profile_name, count=num_tasks_to_process, is_dummy=(args.dummy or args.dummy_batch))
|
||||||
|
|
||||||
|
report_payload = {
|
||||||
"url": url, "video_id": info_data.get('id'), "profile_name": profile_name,
|
"url": url, "video_id": info_data.get('id'), "profile_name": profile_name,
|
||||||
"proxy_url": proxy_url, "info_json_path": final_path,
|
"proxy_url": proxy_url, "info_json_path": created_file_paths[0] if len(created_file_paths) == 1 else created_file_paths,
|
||||||
"download_tasks_created": len(download_tasks)
|
"download_tasks_created": len(download_tasks_to_create)
|
||||||
})
|
}
|
||||||
|
if is_dummy_run:
|
||||||
|
report_payload['_dummy'] = True
|
||||||
|
state_manager.report_auth_success(task_id, report_payload)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"[Worker {worker_id}] [{profile_name}] Error processing successful auth result: {e}", exc_info=True)
|
logger.error(f"[Worker {worker_id}] [{profile_name}] Error processing successful auth result: {e}", exc_info=True)
|
||||||
profile_manager_instance.record_activity(profile_name, 'failure')
|
profile_manager_instance.record_activity(profile_name, 'failure', is_dummy=(args.dummy or args.dummy_batch))
|
||||||
state_manager.report_auth_failure(task_id, {"error": f"Error processing info.json: {str(e)}", "url": url})
|
state_manager.report_auth_failure(task_id, {"error": f"Error processing info.json: {str(e)}", "url": url})
|
||||||
else:
|
else:
|
||||||
is_bot_error = "Sign in to confirm you're not a bot" in stderr
|
is_bot_error = "Sign in to confirm you're not a bot" in stderr
|
||||||
@ -240,19 +368,19 @@ def run_queue_auth_worker(worker_id, policy, state_manager, args, profile_manage
|
|||||||
if is_unavailable or is_private or is_deleted or is_dummy_skipped:
|
if is_unavailable or is_private or is_deleted or is_dummy_skipped:
|
||||||
reason = "Video unavailable" if is_unavailable else "Private video" if is_private else "Video removed" if is_deleted else "Dummy skipped"
|
reason = "Video unavailable" if is_unavailable else "Private video" if is_private else "Video removed" if is_deleted else "Dummy skipped"
|
||||||
logger.warning(f"[Worker {worker_id}] [{profile_name}] Auth skipped for {url}: {reason}")
|
logger.warning(f"[Worker {worker_id}] [{profile_name}] Auth skipped for {url}: {reason}")
|
||||||
profile_manager_instance.record_activity(profile_name, 'tolerated_error')
|
profile_manager_instance.record_activity(profile_name, 'tolerated_error', is_dummy=(args.dummy or args.dummy_batch))
|
||||||
state_manager.report_auth_skipped(task_id, {"url": url, "reason": reason, "stderr": stderr})
|
state_manager.report_auth_skipped(task_id, {"url": url, "reason": reason, "stderr": stderr})
|
||||||
else:
|
else:
|
||||||
error_type = "Bot detection" if is_bot_error else "Timeout" if is_timeout_error else "Dummy fatal failure" if "Dummy fatal failure" in stderr else f"Exit code {retcode}"
|
error_type = "Bot detection" if is_bot_error else "Timeout" if is_timeout_error else "Dummy fatal failure" if "Dummy fatal failure" in stderr else f"Exit code {retcode}"
|
||||||
logger.error(f"[Worker {worker_id}] [{profile_name}] Authentication failed ({error_type}): {url}")
|
logger.error(f"[Worker {worker_id}] [{profile_name}] Authentication failed ({error_type}): {url}")
|
||||||
profile_manager_instance.record_activity(profile_name, 'failure')
|
profile_manager_instance.record_activity(profile_name, 'failure', is_dummy=(args.dummy or args.dummy_batch))
|
||||||
state_manager.report_auth_failure(task_id, {"url": url, "error_type": error_type, "stderr": stderr, "exit_code": retcode})
|
state_manager.report_auth_failure(task_id, {"url": url, "error_type": error_type, "stderr": stderr, "exit_code": retcode})
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"[Worker {worker_id}] [{profile_name}] Unexpected error processing task {task_id}: {e}", exc_info=True)
|
logger.error(f"[Worker {worker_id}] [{profile_name}] Unexpected error processing task {task_id}: {e}", exc_info=True)
|
||||||
if task_id:
|
if task_id:
|
||||||
state_manager.report_auth_failure(task_id, {"error": f"Unexpected error: {str(e)}", "url": url or "unknown"})
|
state_manager.report_auth_failure(task_id, {"error": f"Unexpected error: {str(e)}", "url": url or "unknown"})
|
||||||
profile_manager_instance.record_activity(profile_name, 'failure')
|
profile_manager_instance.record_activity(profile_name, 'failure', is_dummy=(args.dummy or args.dummy_batch))
|
||||||
finally:
|
finally:
|
||||||
if temp_task_dir and os.path.exists(temp_task_dir):
|
if temp_task_dir and os.path.exists(temp_task_dir):
|
||||||
shutil.rmtree(temp_task_dir)
|
shutil.rmtree(temp_task_dir)
|
||||||
@ -262,7 +390,7 @@ def run_queue_auth_worker(worker_id, policy, state_manager, args, profile_manage
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"[Worker {worker_id}] Unexpected error in outer worker loop: {e}", exc_info=True)
|
logger.error(f"[Worker {worker_id}] Unexpected error in outer worker loop: {e}", exc_info=True)
|
||||||
if locked_profile:
|
if locked_profile:
|
||||||
profile_manager_instance.record_activity(locked_profile['name'], 'failure')
|
profile_manager_instance.record_activity(locked_profile['name'], 'failure', is_dummy=(args.dummy or args.dummy_batch))
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
if locked_profile:
|
if locked_profile:
|
||||||
@ -276,16 +404,27 @@ def run_queue_auth_worker(worker_id, policy, state_manager, args, profile_manage
|
|||||||
elif isinstance(val, int):
|
elif isinstance(val, int):
|
||||||
cooldown = val
|
cooldown = val
|
||||||
except (json.JSONDecodeError, TypeError):
|
except (json.JSONDecodeError, TypeError):
|
||||||
if cooldown_config.isdigit():
|
if isinstance(cooldown_config, str) and cooldown_config.isdigit():
|
||||||
cooldown = int(cooldown_config)
|
cooldown = int(cooldown_config)
|
||||||
|
|
||||||
if cooldown:
|
# For auth simulation, check if profile has pending downloads
|
||||||
logger.info(f"[Worker {worker_id}] Putting profile '{locked_profile['name']}' into COOLDOWN for {cooldown}s.")
|
# If so, don't apply cooldown to avoid conflicting with enforcer
|
||||||
|
profile_data = profile_manager_instance.get_profile(locked_profile['name'])
|
||||||
|
should_apply_cooldown = cooldown
|
||||||
|
if profile_data:
|
||||||
|
pending_downloads = profile_manager_instance.get_pending_downloads(locked_profile['name'])
|
||||||
|
rest_reason = profile_data.get('rest_reason')
|
||||||
|
if pending_downloads > 0 or rest_reason == 'waiting_downloads':
|
||||||
|
should_apply_cooldown = None
|
||||||
|
logger.info(f"[Worker {worker_id}] Auth profile '{locked_profile['name']}' has pending downloads or is waiting for downloads. Not applying cooldown.")
|
||||||
|
|
||||||
|
if should_apply_cooldown:
|
||||||
|
logger.info(f"[Worker {worker_id}] Putting profile '{locked_profile['name']}' into COOLDOWN for {should_apply_cooldown}s.")
|
||||||
|
|
||||||
profile_manager_instance.unlock_profile(
|
profile_manager_instance.unlock_profile(
|
||||||
locked_profile['name'],
|
locked_profile['name'],
|
||||||
owner=owner_id,
|
owner=owner_id,
|
||||||
rest_for_seconds=cooldown
|
rest_for_seconds=should_apply_cooldown
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info(f"[Worker {worker_id}] Queue auth worker exiting.")
|
logger.info(f"[Worker {worker_id}] Queue auth worker exiting.")
|
||||||
@ -311,6 +450,7 @@ def run_queue_download_worker(worker_id, policy, state_manager, args, profile_ma
|
|||||||
logger.info(f"[Worker {worker_id}] Will save downloads to '{output_dir}'")
|
logger.info(f"[Worker {worker_id}] Will save downloads to '{output_dir}'")
|
||||||
|
|
||||||
task_counter = 0
|
task_counter = 0
|
||||||
|
last_no_task_log_msg = ""
|
||||||
|
|
||||||
while not state_manager.shutdown_event.is_set():
|
while not state_manager.shutdown_event.is_set():
|
||||||
locked_profile = None
|
locked_profile = None
|
||||||
@ -322,7 +462,14 @@ def run_queue_download_worker(worker_id, policy, state_manager, args, profile_ma
|
|||||||
task = state_manager.get_download_task()
|
task = state_manager.get_download_task()
|
||||||
if not task:
|
if not task:
|
||||||
polling_interval = exec_control.get('worker_polling_interval_seconds', 1)
|
polling_interval = exec_control.get('worker_polling_interval_seconds', 1)
|
||||||
logger.debug(f"[Worker {worker_id}] No download tasks available in queue. Sleeping for {polling_interval}s.")
|
base_log_msg = f"[Worker {worker_id}] No download tasks available in queue."
|
||||||
|
if base_log_msg == last_no_task_log_msg:
|
||||||
|
print(".", end="", file=sys.stderr, flush=True)
|
||||||
|
else:
|
||||||
|
if last_no_task_log_msg:
|
||||||
|
print(file=sys.stderr)
|
||||||
|
logger.debug(f"{base_log_msg} Sleeping for {polling_interval}s.")
|
||||||
|
last_no_task_log_msg = base_log_msg
|
||||||
time.sleep(polling_interval)
|
time.sleep(polling_interval)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@ -377,20 +524,45 @@ def run_queue_download_worker(worker_id, policy, state_manager, args, profile_ma
|
|||||||
if specific_profile:
|
if specific_profile:
|
||||||
locked_profile = profile_manager_instance.lock_profile(owner=owner_id, specific_profile_name=specific_profile)
|
locked_profile = profile_manager_instance.lock_profile(owner=owner_id, specific_profile_name=specific_profile)
|
||||||
if not locked_profile:
|
if not locked_profile:
|
||||||
logger.warning(f"[Worker {worker_id}] Could not lock specific profile '{specific_profile}'. Trying any profile with prefix.")
|
logger.warning(f"[Worker {worker_id}] Could not lock specific profile '{specific_profile}'. Trying any profile from configured prefixes.")
|
||||||
locked_profile = profile_manager_instance.lock_profile(owner=owner_id, profile_prefix=profile_prefix)
|
|
||||||
else:
|
if not locked_profile:
|
||||||
locked_profile = profile_manager_instance.lock_profile(owner=owner_id, profile_prefix=profile_prefix)
|
# Lock from any of the specified prefixes
|
||||||
|
prefixes_to_try = []
|
||||||
|
if profile_prefix:
|
||||||
|
prefixes_to_try = [p.strip() for p in profile_prefix.split(',') if p.strip()]
|
||||||
|
|
||||||
|
if prefixes_to_try:
|
||||||
|
random.shuffle(prefixes_to_try)
|
||||||
|
for prefix in prefixes_to_try:
|
||||||
|
locked_profile = profile_manager_instance.lock_profile(owner=owner_id, profile_prefix=prefix)
|
||||||
|
if locked_profile:
|
||||||
|
break
|
||||||
|
|
||||||
|
if not locked_profile:
|
||||||
|
# Fallback for empty/no prefix, or if no profile from list was lockable
|
||||||
|
locked_profile = profile_manager_instance.lock_profile(owner=owner_id, profile_prefix=None)
|
||||||
|
|
||||||
if not locked_profile:
|
if not locked_profile:
|
||||||
polling_interval = exec_control.get('worker_polling_interval_seconds', 1)
|
polling_interval = exec_control.get('worker_polling_interval_seconds', 1)
|
||||||
logger.warning(f"[Worker {worker_id}] No profiles available for task {task_id}. Re-queueing and sleeping for {polling_interval}s.")
|
base_log_msg = f"[Worker {worker_id}] No profiles available, polling."
|
||||||
|
if base_log_msg == last_no_task_log_msg:
|
||||||
|
print(".", end="", file=sys.stderr, flush=True)
|
||||||
|
else:
|
||||||
|
if last_no_task_log_msg:
|
||||||
|
print(file=sys.stderr)
|
||||||
|
logger.info(f"{base_log_msg} Re-queueing task {task_id} and sleeping for {polling_interval}s.")
|
||||||
|
last_no_task_log_msg = base_log_msg
|
||||||
|
|
||||||
# Re-queue the task by adding it back to the inbox.
|
# Re-queue the task by adding it back to the inbox.
|
||||||
state_manager.add_download_tasks_batch([task])
|
state_manager.add_download_tasks_batch([task])
|
||||||
# The 'in_progress' marker for this attempt will be cleaned up by the finally block.
|
# The 'in_progress' marker for this attempt will be cleaned up by the finally block.
|
||||||
time.sleep(polling_interval)
|
time.sleep(polling_interval)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
if last_no_task_log_msg:
|
||||||
|
print(file=sys.stderr)
|
||||||
|
last_no_task_log_msg = ""
|
||||||
profile_name = locked_profile['name']
|
profile_name = locked_profile['name']
|
||||||
proxy_url = locked_profile['proxy']
|
proxy_url = locked_profile['proxy']
|
||||||
logger.info(f"[Worker {worker_id}] Locked profile '{profile_name}' with proxy '{proxy_url}'")
|
logger.info(f"[Worker {worker_id}] Locked profile '{profile_name}' with proxy '{proxy_url}'")
|
||||||
@ -480,7 +652,7 @@ def run_queue_download_worker(worker_id, policy, state_manager, args, profile_ma
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"[Worker {worker_id}] Failed to move files to Airflow-ready directory: {e}", exc_info=True)
|
logger.error(f"[Worker {worker_id}] Failed to move files to Airflow-ready directory: {e}", exc_info=True)
|
||||||
|
|
||||||
profile_manager_instance.record_activity(profile_name, 'download_success')
|
profile_manager_instance.record_activity(profile_name, 'download', is_dummy=(args.dummy or args.dummy_batch))
|
||||||
state_manager.report_download_success(task_id, {
|
state_manager.report_download_success(task_id, {
|
||||||
"video_id": video_id, "url": url, "format_id": format_id,
|
"video_id": video_id, "url": url, "format_id": format_id,
|
||||||
"profile_name": profile_name, "proxy_url": proxy_url,
|
"profile_name": profile_name, "proxy_url": proxy_url,
|
||||||
@ -508,7 +680,7 @@ def run_queue_download_worker(worker_id, policy, state_manager, args, profile_ma
|
|||||||
|
|
||||||
if is_unavailable or is_format_error:
|
if is_unavailable or is_format_error:
|
||||||
logger.warning(f"[Worker {worker_id}] Download skipped: {video_id or url} format {format_id}")
|
logger.warning(f"[Worker {worker_id}] Download skipped: {video_id or url} format {format_id}")
|
||||||
profile_manager_instance.record_activity(profile_name, 'tolerated_error')
|
profile_manager_instance.record_activity(profile_name, 'tolerated_error', is_dummy=(args.dummy or args.dummy_batch))
|
||||||
state_manager.report_download_skipped(task_id, {
|
state_manager.report_download_skipped(task_id, {
|
||||||
"video_id": video_id, "url": url, "format_id": format_id,
|
"video_id": video_id, "url": url, "format_id": format_id,
|
||||||
"reason": "Video unavailable" if is_unavailable else "Format not available", "stderr": stderr
|
"reason": "Video unavailable" if is_unavailable else "Format not available", "stderr": stderr
|
||||||
@ -516,23 +688,13 @@ def run_queue_download_worker(worker_id, policy, state_manager, args, profile_ma
|
|||||||
else:
|
else:
|
||||||
error_type = "Bot detection" if is_bot_error else "Timeout" if is_timeout_error else f"Exit code {retcode}"
|
error_type = "Bot detection" if is_bot_error else "Timeout" if is_timeout_error else f"Exit code {retcode}"
|
||||||
logger.error(f"[Worker {worker_id}] Download failed ({error_type}): {video_id or url} format {format_id}")
|
logger.error(f"[Worker {worker_id}] Download failed ({error_type}): {video_id or url} format {format_id}")
|
||||||
profile_manager_instance.record_activity(profile_name, 'download_error')
|
profile_manager_instance.record_activity(profile_name, 'download_error', is_dummy=(args.dummy or args.dummy_batch))
|
||||||
state_manager.report_download_failure(task_id, {
|
state_manager.report_download_failure(task_id, {
|
||||||
"video_id": video_id, "url": url, "format_id": format_id,
|
"video_id": video_id, "url": url, "format_id": format_id,
|
||||||
"error_type": error_type, "stderr": stderr, "exit_code": retcode,
|
"error_type": error_type, "stderr": stderr, "exit_code": retcode,
|
||||||
"original_task": task
|
"original_task": task
|
||||||
})
|
})
|
||||||
|
|
||||||
# Decrement pending downloads counter on the original auth profile, regardless of outcome
|
|
||||||
if auth_profile_name and auth_env:
|
|
||||||
auth_manager = get_auth_manager(profile_manager_instance, auth_env)
|
|
||||||
if auth_manager:
|
|
||||||
auth_manager.decrement_pending_downloads(auth_profile_name)
|
|
||||||
else:
|
|
||||||
logger.error(f"Could not get auth profile manager for env '{auth_env}'. Pending downloads counter will not be decremented.")
|
|
||||||
elif task: # Only warn if we had a task but couldn't get metadata
|
|
||||||
logger.warning(f"Could not find auth profile name and/or auth_env in info.json metadata. Pending downloads counter will not be decremented.")
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"[Worker {worker_id}] Unexpected error: {e}", exc_info=True)
|
logger.error(f"[Worker {worker_id}] Unexpected error: {e}", exc_info=True)
|
||||||
if task and task_id:
|
if task and task_id:
|
||||||
@ -543,9 +705,22 @@ def run_queue_download_worker(worker_id, policy, state_manager, args, profile_ma
|
|||||||
"error": f"Unexpected error: {str(e)}", "original_task": task
|
"error": f"Unexpected error: {str(e)}", "original_task": task
|
||||||
})
|
})
|
||||||
if locked_profile:
|
if locked_profile:
|
||||||
profile_manager_instance.record_activity(locked_profile['name'], 'download_error')
|
profile_manager_instance.record_activity(locked_profile['name'], 'download_error', is_dummy=(args.dummy or args.dummy_batch))
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
|
# Decrement pending downloads counter on the original auth profile, regardless of outcome.
|
||||||
|
# This is critical for releasing the auth profile from the 'waiting_downloads' state.
|
||||||
|
if task: # Only attempt to decrement if a task was actually pulled.
|
||||||
|
if auth_profile_name and auth_env:
|
||||||
|
auth_manager = get_auth_manager(profile_manager_instance, auth_env)
|
||||||
|
if auth_manager:
|
||||||
|
new_count = auth_manager.decrement_pending_downloads(auth_profile_name)
|
||||||
|
logger.info(f"[Worker {worker_id}] Decremented pending downloads for auth profile '{auth_profile_name}' in env '{auth_env}'. New count: {new_count}")
|
||||||
|
else:
|
||||||
|
logger.error(f"[Worker {worker_id}] Could not get auth profile manager for env '{auth_env}'. Pending downloads counter will not be decremented.")
|
||||||
|
else:
|
||||||
|
logger.warning(f"[Worker {worker_id}] Could not find auth profile name and/or auth_env in task metadata. Pending downloads counter will not be decremented.")
|
||||||
|
|
||||||
if locked_profile:
|
if locked_profile:
|
||||||
cooldown = None
|
cooldown = None
|
||||||
cooldown_config = profile_manager_instance.get_config('unlock_cooldown_seconds')
|
cooldown_config = profile_manager_instance.get_config('unlock_cooldown_seconds')
|
||||||
|
|||||||
@ -904,6 +904,22 @@ class StateManager:
|
|||||||
|
|
||||||
return self.queue_provider.remove_in_progress(self.queue_provider.AUTH_PROGRESS, task_id)
|
return self.queue_provider.remove_in_progress(self.queue_provider.AUTH_PROGRESS, task_id)
|
||||||
|
|
||||||
|
def add_auth_task(self, task: Dict) -> bool:
|
||||||
|
"""Add an authentication task to the queue."""
|
||||||
|
if not self.queue_provider:
|
||||||
|
logger.error("Queue provider not initialized")
|
||||||
|
return False
|
||||||
|
|
||||||
|
return self.queue_provider.add_task(self.queue_provider.AUTH_INBOX, task)
|
||||||
|
|
||||||
|
def add_auth_tasks_batch(self, tasks: List[Dict]) -> int:
|
||||||
|
"""Add a batch of authentication tasks to the queue."""
|
||||||
|
if not self.queue_provider:
|
||||||
|
logger.error("Queue provider not initialized")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
return self.queue_provider.add_tasks_batch(self.queue_provider.AUTH_INBOX, tasks)
|
||||||
|
|
||||||
def add_download_task(self, task: Dict) -> bool:
|
def add_download_task(self, task: Dict) -> bool:
|
||||||
"""Add a download task to the queue."""
|
"""Add a download task to the queue."""
|
||||||
if not self.queue_provider:
|
if not self.queue_provider:
|
||||||
|
|||||||
@ -40,6 +40,7 @@ def run_throughput_worker(worker_id, policy, state_manager, args, profile_manage
|
|||||||
return []
|
return []
|
||||||
|
|
||||||
no_task_streak = 0
|
no_task_streak = 0
|
||||||
|
last_no_task_log_msg = ""
|
||||||
|
|
||||||
while not state_manager.shutdown_event.is_set():
|
while not state_manager.shutdown_event.is_set():
|
||||||
locked_profile = None
|
locked_profile = None
|
||||||
@ -48,7 +49,15 @@ def run_throughput_worker(worker_id, policy, state_manager, args, profile_manage
|
|||||||
# 0. If no tasks were found previously, pause briefly.
|
# 0. If no tasks were found previously, pause briefly.
|
||||||
if no_task_streak > 0:
|
if no_task_streak > 0:
|
||||||
polling_interval = exec_control.get('worker_polling_interval_seconds', 1)
|
polling_interval = exec_control.get('worker_polling_interval_seconds', 1)
|
||||||
logger.info(f"[Worker {worker_id}] No tasks found in previous attempt(s). Pausing for {polling_interval}s. (Streak: {no_task_streak})")
|
base_log_msg = f"[Worker {worker_id}] No available tasks found for any active profiles."
|
||||||
|
if base_log_msg == last_no_task_log_msg:
|
||||||
|
print(".", end="", file=sys.stderr, flush=True)
|
||||||
|
else:
|
||||||
|
if last_no_task_log_msg:
|
||||||
|
print(file=sys.stderr)
|
||||||
|
full_log_msg = f"{base_log_msg} Pausing for {polling_interval}s. (Streak: {no_task_streak})"
|
||||||
|
logger.info(full_log_msg)
|
||||||
|
last_no_task_log_msg = base_log_msg
|
||||||
time.sleep(polling_interval)
|
time.sleep(polling_interval)
|
||||||
if state_manager.shutdown_event.is_set(): continue
|
if state_manager.shutdown_event.is_set(): continue
|
||||||
|
|
||||||
@ -60,9 +69,6 @@ def run_throughput_worker(worker_id, policy, state_manager, args, profile_manage
|
|||||||
if not locked_profile:
|
if not locked_profile:
|
||||||
# No task/profile combo was available.
|
# No task/profile combo was available.
|
||||||
no_task_streak += 1
|
no_task_streak += 1
|
||||||
polling_interval = exec_control.get('worker_polling_interval_seconds', 1)
|
|
||||||
logger.info(f"[Worker {worker_id}] No available tasks found for any active profiles. Pausing for {polling_interval}s.")
|
|
||||||
time.sleep(polling_interval)
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
profile_name = locked_profile['name']
|
profile_name = locked_profile['name']
|
||||||
@ -70,6 +76,9 @@ def run_throughput_worker(worker_id, policy, state_manager, args, profile_manage
|
|||||||
# We have a task and a lock.
|
# We have a task and a lock.
|
||||||
if claimed_task_path:
|
if claimed_task_path:
|
||||||
no_task_streak = 0 # Reset streak
|
no_task_streak = 0 # Reset streak
|
||||||
|
if last_no_task_log_msg:
|
||||||
|
print(file=sys.stderr)
|
||||||
|
last_no_task_log_msg = ""
|
||||||
# 3. Process the task
|
# 3. Process the task
|
||||||
try:
|
try:
|
||||||
with open(claimed_task_path, 'r', encoding='utf-8') as f:
|
with open(claimed_task_path, 'r', encoding='utf-8') as f:
|
||||||
|
|||||||
@ -319,7 +319,7 @@ def apply_overrides(policy, overrides):
|
|||||||
return policy
|
return policy
|
||||||
|
|
||||||
|
|
||||||
def display_effective_policy(policy, name, sources=None, profile_names=None, original_workers_setting=None):
|
def display_effective_policy(policy, name, args, sources=None, profile_names=None, original_workers_setting=None):
|
||||||
"""Prints a human-readable summary of the effective policy."""
|
"""Prints a human-readable summary of the effective policy."""
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
logger.info(f"--- Effective Policy: {name} ---")
|
logger.info(f"--- Effective Policy: {name} ---")
|
||||||
@ -328,6 +328,8 @@ def display_effective_policy(policy, name, sources=None, profile_names=None, ori
|
|||||||
orchestration_mode = settings.get('orchestration_mode')
|
orchestration_mode = settings.get('orchestration_mode')
|
||||||
|
|
||||||
logger.info(f"Mode: {settings.get('mode', 'full_stack')}")
|
logger.info(f"Mode: {settings.get('mode', 'full_stack')}")
|
||||||
|
if args and args.profile_prefix:
|
||||||
|
logger.info(f"Profile Prefix (from CLI): {args.profile_prefix}")
|
||||||
if profile_names:
|
if profile_names:
|
||||||
num_profiles = len(profile_names)
|
num_profiles = len(profile_names)
|
||||||
logger.info(f"Profiles found: {num_profiles}")
|
logger.info(f"Profiles found: {num_profiles}")
|
||||||
|
|||||||
@ -188,7 +188,7 @@ def find_task_and_lock_profile(profile_manager, owner_id, profile_prefix, policy
|
|||||||
if not info_json_dir:
|
if not info_json_dir:
|
||||||
return None, None
|
return None, None
|
||||||
|
|
||||||
logger.info(f"[Worker {worker_id}] Scanning for tasks in '{info_json_dir}'...")
|
logger.debug(f"[Worker {worker_id}] Scanning for tasks in '{info_json_dir}'...")
|
||||||
|
|
||||||
# 1. Get all available task files and group by profile
|
# 1. Get all available task files and group by profile
|
||||||
try:
|
try:
|
||||||
@ -223,7 +223,21 @@ def find_task_and_lock_profile(profile_manager, owner_id, profile_prefix, policy
|
|||||||
|
|
||||||
# 2. Get ACTIVE profiles from Redis.
|
# 2. Get ACTIVE profiles from Redis.
|
||||||
active_profiles = profile_manager.list_profiles(state_filter='ACTIVE')
|
active_profiles = profile_manager.list_profiles(state_filter='ACTIVE')
|
||||||
active_profile_names = {p['name'] for p in active_profiles if p['name'].startswith(profile_prefix or '')}
|
|
||||||
|
prefixes_to_check = []
|
||||||
|
if profile_prefix:
|
||||||
|
prefixes_to_check = [p.strip() for p in profile_prefix.split(',') if p.strip()]
|
||||||
|
|
||||||
|
active_profile_names = set()
|
||||||
|
if prefixes_to_check:
|
||||||
|
for p in active_profiles:
|
||||||
|
for prefix in prefixes_to_check:
|
||||||
|
if p['name'].startswith(prefix):
|
||||||
|
active_profile_names.add(p['name'])
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
# If no prefixes are specified, consider all active profiles.
|
||||||
|
active_profile_names = {p['name'] for p in active_profiles}
|
||||||
|
|
||||||
# 3. Find profiles that are both ACTIVE and have tasks.
|
# 3. Find profiles that are both ACTIVE and have tasks.
|
||||||
candidate_profiles = list(active_profile_names.intersection(tasks_by_profile.keys()))
|
candidate_profiles = list(active_profile_names.intersection(tasks_by_profile.keys()))
|
||||||
@ -918,7 +932,8 @@ def process_info_json_cycle(path, content, policy, state_manager, args, running_
|
|||||||
result['proxy_url'] = proxy_url
|
result['proxy_url'] = proxy_url
|
||||||
|
|
||||||
if profile_manager_instance and profile_name:
|
if profile_manager_instance and profile_name:
|
||||||
profile_manager_instance.record_activity(profile_name, 'tolerated_error')
|
is_dummy = args.dummy or args.dummy_batch
|
||||||
|
profile_manager_instance.record_activity(profile_name, 'tolerated_error', is_dummy=is_dummy)
|
||||||
|
|
||||||
state_manager.log_event(result)
|
state_manager.log_event(result)
|
||||||
results.append(result)
|
results.append(result)
|
||||||
@ -960,14 +975,15 @@ def process_info_json_cycle(path, content, policy, state_manager, args, running_
|
|||||||
|
|
||||||
# --- Record activity on the DOWNLOAD profile that performed the work ---
|
# --- Record activity on the DOWNLOAD profile that performed the work ---
|
||||||
if profile_manager_instance and profile_name:
|
if profile_manager_instance and profile_name:
|
||||||
|
is_dummy = args.dummy or args.dummy_batch
|
||||||
if result.get('success'):
|
if result.get('success'):
|
||||||
profile_manager_instance.record_activity(profile_name, 'download')
|
profile_manager_instance.record_activity(profile_name, 'download', is_dummy=is_dummy)
|
||||||
elif result.get('error_type') == 'Cancelled':
|
elif result.get('error_type') == 'Cancelled':
|
||||||
pass # Do not record cancellations
|
pass # Do not record cancellations
|
||||||
elif result.get('is_tolerated_error'):
|
elif result.get('is_tolerated_error'):
|
||||||
profile_manager_instance.record_activity(profile_name, 'tolerated_error')
|
profile_manager_instance.record_activity(profile_name, 'tolerated_error', is_dummy=is_dummy)
|
||||||
else:
|
else:
|
||||||
profile_manager_instance.record_activity(profile_name, 'download_error')
|
profile_manager_instance.record_activity(profile_name, 'download_error', is_dummy=is_dummy)
|
||||||
|
|
||||||
state_manager.log_event(result)
|
state_manager.log_event(result)
|
||||||
results.append(result)
|
results.append(result)
|
||||||
|
|||||||
@ -495,7 +495,7 @@ def main_stress_policy(args):
|
|||||||
exec_control['workers'] = calculated_workers
|
exec_control['workers'] = calculated_workers
|
||||||
logger.info(f"Calculated 'auto' workers for throughput mode: {calculated_workers} (based on {len(matching_profiles)} profiles with prefix '{profile_prefix}').")
|
logger.info(f"Calculated 'auto' workers for throughput mode: {calculated_workers} (based on {len(matching_profiles)} profiles with prefix '{profile_prefix}').")
|
||||||
|
|
||||||
sp_utils.display_effective_policy(policy, policy_name, sources=[], original_workers_setting=original_workers_setting)
|
sp_utils.display_effective_policy(policy, policy_name, args, sources=[], original_workers_setting=original_workers_setting)
|
||||||
if args.dry_run: return 0
|
if args.dry_run: return 0
|
||||||
|
|
||||||
workers = exec_control.get('workers', 1)
|
workers = exec_control.get('workers', 1)
|
||||||
@ -578,15 +578,45 @@ def main_stress_policy(args):
|
|||||||
logger.info(f"Starting/resuming from URL index {start_index + 1}.")
|
logger.info(f"Starting/resuming from URL index {start_index + 1}.")
|
||||||
# The worker's get_next_url_batch will respect this starting index.
|
# The worker's get_next_url_batch will respect this starting index.
|
||||||
|
|
||||||
sp_utils.display_effective_policy(policy, policy_name, sources=urls_list)
|
sp_utils.display_effective_policy(policy, policy_name, args, sources=urls_list)
|
||||||
if args.dry_run: return 0
|
if args.dry_run: return 0
|
||||||
|
|
||||||
workers = exec_control.get('workers', 1)
|
worker_pools = exec_control.get('worker_pools', [])
|
||||||
with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor:
|
if not worker_pools and exec_control.get('workers'):
|
||||||
futures = [
|
prefix = policy.get('info_json_generation_policy', {}).get('profile_prefix')
|
||||||
executor.submit(run_direct_batch_worker, i, policy, state_manager, args, profile_manager_instance, urls_list, running_processes, process_lock)
|
worker_pools.append({'workers': exec_control.get('workers'), 'profile_prefix': prefix or 'user'})
|
||||||
for i in range(workers)
|
|
||||||
]
|
if not worker_pools:
|
||||||
|
logger.error("No workers configured in policy (execution_control.workers or execution_control.worker_pools).")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if args.profile_prefix:
|
||||||
|
logger.info(f"CLI --profile-prefix '{args.profile_prefix}' is set, it will override any prefixes defined in worker_pools.")
|
||||||
|
for pool in worker_pools:
|
||||||
|
pool['profile_prefix'] = args.profile_prefix
|
||||||
|
|
||||||
|
worker_specs = []
|
||||||
|
worker_id_counter = 0
|
||||||
|
for pool in worker_pools:
|
||||||
|
pool_workers = pool.get('workers', 1)
|
||||||
|
prefix_str = pool.get('profile_prefix', '')
|
||||||
|
prefixes = [p.strip() for p in prefix_str.split(',') if p.strip()]
|
||||||
|
if not prefixes:
|
||||||
|
logger.warning(f"Worker pool has no profile_prefix. Skipping: {pool}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
for i in range(pool_workers):
|
||||||
|
assigned_prefix = prefixes[i % len(prefixes)]
|
||||||
|
worker_policy = deepcopy(policy)
|
||||||
|
worker_policy.setdefault('info_json_generation_policy', {})['profile_prefix'] = assigned_prefix
|
||||||
|
worker_specs.append({
|
||||||
|
'func': run_direct_batch_worker,
|
||||||
|
'args': (worker_id_counter, worker_policy, state_manager, args, profile_manager_instance, urls_list, running_processes, process_lock)
|
||||||
|
})
|
||||||
|
worker_id_counter += 1
|
||||||
|
|
||||||
|
with concurrent.futures.ThreadPoolExecutor(max_workers=len(worker_specs)) as executor:
|
||||||
|
futures = [executor.submit(spec['func'], *spec['args']) for spec in worker_specs]
|
||||||
# Wait for all workers to complete. They will exit their loops when no URLs are left.
|
# Wait for all workers to complete. They will exit their loops when no URLs are left.
|
||||||
concurrent.futures.wait(futures)
|
concurrent.futures.wait(futures)
|
||||||
if shutdown_event.is_set():
|
if shutdown_event.is_set():
|
||||||
@ -662,14 +692,46 @@ def main_stress_policy(args):
|
|||||||
if start_index > 0:
|
if start_index > 0:
|
||||||
logger.info(f"Starting/resuming from URL index {start_index + 1}.")
|
logger.info(f"Starting/resuming from URL index {start_index + 1}.")
|
||||||
|
|
||||||
sp_utils.display_effective_policy(policy, policy_name, sources=urls_list)
|
sp_utils.display_effective_policy(policy, policy_name, args, sources=urls_list)
|
||||||
if args.dry_run: return 0
|
if args.dry_run: return 0
|
||||||
|
|
||||||
with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor:
|
worker_pools = exec_control.get('worker_pools', [])
|
||||||
futures = [
|
if not worker_pools and exec_control.get('workers'):
|
||||||
executor.submit(run_direct_docker_worker, i, policy, state_manager, args, profile_manager_instance, urls_list, running_processes, process_lock)
|
prefix = policy.get('info_json_generation_policy', {}).get('profile_prefix')
|
||||||
for i in range(workers)
|
worker_pools.append({'workers': exec_control.get('workers'), 'profile_prefix': prefix or 'user'})
|
||||||
]
|
|
||||||
|
if not worker_pools:
|
||||||
|
logger.error("No workers configured in policy (execution_control.workers or execution_control.worker_pools).")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if args.profile_prefix:
|
||||||
|
logger.info(f"CLI --profile-prefix '{args.profile_prefix}' is set, it will override any prefixes defined in worker_pools.")
|
||||||
|
for pool in worker_pools:
|
||||||
|
pool['profile_prefix'] = args.profile_prefix
|
||||||
|
|
||||||
|
worker_specs = []
|
||||||
|
worker_id_counter = 0
|
||||||
|
for pool in worker_pools:
|
||||||
|
pool_workers = pool.get('workers', 1)
|
||||||
|
prefix_str = pool.get('profile_prefix', '')
|
||||||
|
prefixes = [p.strip() for p in prefix_str.split(',') if p.strip()]
|
||||||
|
if not prefixes:
|
||||||
|
logger.warning(f"Worker pool has no profile_prefix. Skipping: {pool}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Each worker in the pool gets the full list of prefixes from the pool configuration.
|
||||||
|
for i in range(pool_workers):
|
||||||
|
worker_policy = deepcopy(policy)
|
||||||
|
# The worker functions will now handle a comma-separated list of prefixes.
|
||||||
|
worker_policy.setdefault('info_json_generation_policy', {})['profile_prefix'] = prefix_str
|
||||||
|
worker_specs.append({
|
||||||
|
'func': run_direct_docker_worker,
|
||||||
|
'args': (worker_id_counter, worker_policy, state_manager, args, profile_manager_instance, urls_list, running_processes, process_lock)
|
||||||
|
})
|
||||||
|
worker_id_counter += 1
|
||||||
|
|
||||||
|
with concurrent.futures.ThreadPoolExecutor(max_workers=len(worker_specs)) as executor:
|
||||||
|
futures = [executor.submit(spec['func'], *spec['args']) for spec in worker_specs]
|
||||||
# This worker runs until shutdown, like the download worker
|
# This worker runs until shutdown, like the download worker
|
||||||
shutdown_event.wait()
|
shutdown_event.wait()
|
||||||
logger.info("Shutdown signal received, waiting for direct docker workers to finish...")
|
logger.info("Shutdown signal received, waiting for direct docker workers to finish...")
|
||||||
@ -686,14 +748,46 @@ def main_stress_policy(args):
|
|||||||
logger.error(f"Failed to create info.json directory '{info_json_dir}': {e}")
|
logger.error(f"Failed to create info.json directory '{info_json_dir}': {e}")
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
sp_utils.display_effective_policy(policy, policy_name, sources=[])
|
sp_utils.display_effective_policy(policy, policy_name, args, sources=[])
|
||||||
if args.dry_run: return 0
|
if args.dry_run: return 0
|
||||||
|
|
||||||
with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor:
|
worker_pools = exec_control.get('worker_pools', [])
|
||||||
futures = [
|
if not worker_pools and exec_control.get('workers'):
|
||||||
executor.submit(run_direct_docker_download_worker, i, policy, state_manager, args, profile_manager_instance, running_processes, process_lock)
|
prefix = policy.get('download_policy', {}).get('profile_prefix')
|
||||||
for i in range(workers)
|
worker_pools.append({'workers': exec_control.get('workers'), 'profile_prefix': prefix or 'user'})
|
||||||
]
|
|
||||||
|
if not worker_pools:
|
||||||
|
logger.error("No workers configured in policy (execution_control.workers or execution_control.worker_pools).")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if args.profile_prefix:
|
||||||
|
logger.info(f"CLI --profile-prefix '{args.profile_prefix}' is set, it will override any prefixes defined in worker_pools.")
|
||||||
|
for pool in worker_pools:
|
||||||
|
pool['profile_prefix'] = args.profile_prefix
|
||||||
|
|
||||||
|
worker_specs = []
|
||||||
|
worker_id_counter = 0
|
||||||
|
for pool in worker_pools:
|
||||||
|
pool_workers = pool.get('workers', 1)
|
||||||
|
prefix_str = pool.get('profile_prefix', '')
|
||||||
|
prefixes = [p.strip() for p in prefix_str.split(',') if p.strip()]
|
||||||
|
if not prefixes:
|
||||||
|
logger.warning(f"Worker pool has no profile_prefix. Skipping: {pool}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Each worker in the pool gets the full list of prefixes from the pool configuration.
|
||||||
|
for i in range(pool_workers):
|
||||||
|
worker_policy = deepcopy(policy)
|
||||||
|
# The worker functions will now handle a comma-separated list of prefixes.
|
||||||
|
worker_policy.setdefault('download_policy', {})['profile_prefix'] = prefix_str
|
||||||
|
worker_specs.append({
|
||||||
|
'func': run_direct_docker_download_worker,
|
||||||
|
'args': (worker_id_counter, worker_policy, state_manager, args, profile_manager_instance, running_processes, process_lock)
|
||||||
|
})
|
||||||
|
worker_id_counter += 1
|
||||||
|
|
||||||
|
with concurrent.futures.ThreadPoolExecutor(max_workers=len(worker_specs)) as executor:
|
||||||
|
futures = [executor.submit(spec['func'], *spec['args']) for spec in worker_specs]
|
||||||
# This worker runs until shutdown
|
# This worker runs until shutdown
|
||||||
shutdown_event.wait()
|
shutdown_event.wait()
|
||||||
logger.info("Shutdown signal received, waiting for direct docker download workers to finish...")
|
logger.info("Shutdown signal received, waiting for direct docker download workers to finish...")
|
||||||
@ -726,15 +820,46 @@ def main_stress_policy(args):
|
|||||||
logger.error(f"Failed to create info.json directory '{info_json_dir}': {e}")
|
logger.error(f"Failed to create info.json directory '{info_json_dir}': {e}")
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
sp_utils.display_effective_policy(policy, policy_name, sources=[])
|
sp_utils.display_effective_policy(policy, policy_name, args, sources=[])
|
||||||
if args.dry_run: return 0
|
if args.dry_run: return 0
|
||||||
|
|
||||||
workers = exec_control.get('workers', 1)
|
worker_pools = exec_control.get('worker_pools', [])
|
||||||
with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor:
|
if not worker_pools and exec_control.get('workers'):
|
||||||
futures = [
|
prefix = policy.get('download_policy', {}).get('profile_prefix')
|
||||||
executor.submit(run_direct_download_worker, i, policy, state_manager, args, download_manager, running_processes, process_lock)
|
worker_pools.append({'workers': exec_control.get('workers'), 'profile_prefix': prefix or 'user'})
|
||||||
for i in range(workers)
|
|
||||||
]
|
if not worker_pools:
|
||||||
|
logger.error("No workers configured in policy (execution_control.workers or execution_control.worker_pools).")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if args.profile_prefix:
|
||||||
|
logger.info(f"CLI --profile-prefix '{args.profile_prefix}' is set, it will override any prefixes defined in worker_pools.")
|
||||||
|
for pool in worker_pools:
|
||||||
|
pool['profile_prefix'] = args.profile_prefix
|
||||||
|
|
||||||
|
worker_specs = []
|
||||||
|
worker_id_counter = 0
|
||||||
|
for pool in worker_pools:
|
||||||
|
pool_workers = pool.get('workers', 1)
|
||||||
|
prefix_str = pool.get('profile_prefix', '')
|
||||||
|
prefixes = [p.strip() for p in prefix_str.split(',') if p.strip()]
|
||||||
|
if not prefixes:
|
||||||
|
logger.warning(f"Worker pool has no profile_prefix. Skipping: {pool}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Each worker in the pool gets the full list of prefixes from the pool configuration.
|
||||||
|
for i in range(pool_workers):
|
||||||
|
worker_policy = deepcopy(policy)
|
||||||
|
# The worker functions will now handle a comma-separated list of prefixes.
|
||||||
|
worker_policy.setdefault('download_policy', {})['profile_prefix'] = prefix_str
|
||||||
|
worker_specs.append({
|
||||||
|
'func': run_direct_download_worker,
|
||||||
|
'args': (worker_id_counter, worker_policy, state_manager, args, download_manager, running_processes, process_lock)
|
||||||
|
})
|
||||||
|
worker_id_counter += 1
|
||||||
|
|
||||||
|
with concurrent.futures.ThreadPoolExecutor(max_workers=len(worker_specs)) as executor:
|
||||||
|
futures = [executor.submit(spec['func'], *spec['args']) for spec in worker_specs]
|
||||||
shutdown_event.wait()
|
shutdown_event.wait()
|
||||||
logger.info("Shutdown signal received, waiting for direct download workers to finish...")
|
logger.info("Shutdown signal received, waiting for direct download workers to finish...")
|
||||||
concurrent.futures.wait(futures)
|
concurrent.futures.wait(futures)
|
||||||
@ -786,6 +911,15 @@ def main_stress_policy(args):
|
|||||||
logger.error(f"Failed to create save directory '{save_dir}': {e}")
|
logger.error(f"Failed to create save directory '{save_dir}': {e}")
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
|
# In dummy mode, do not populate the queue. Warn the user instead.
|
||||||
|
if args.dummy or args.dummy_batch:
|
||||||
|
input_queue = queue_policy.get('input_queue', 'queue2_auth_inbox')
|
||||||
|
logger.warning("--- Dummy Mode Notice ---")
|
||||||
|
logger.warning(f"Dummy mode is enabled. The tool will NOT automatically populate the queue.")
|
||||||
|
logger.warning(f"Please ensure the input queue '{input_queue}' contains tasks for the workers to process.")
|
||||||
|
logger.warning(f"You can populate it using another tool, for example: ./bin/ytops-client queue push {input_queue} --payload-json '{{\"url\":\"https://youtu.be/dQw4w9WgXcQ\"}}' --count 100")
|
||||||
|
logger.warning("-------------------------")
|
||||||
|
|
||||||
# Requeue failed tasks if requested
|
# Requeue failed tasks if requested
|
||||||
if args.requeue_failed:
|
if args.requeue_failed:
|
||||||
requeued = state_manager.requeue_failed_auth_tasks(
|
requeued = state_manager.requeue_failed_auth_tasks(
|
||||||
@ -793,15 +927,46 @@ def main_stress_policy(args):
|
|||||||
)
|
)
|
||||||
logger.info(f"Requeued {requeued} failed authentication tasks.")
|
logger.info(f"Requeued {requeued} failed authentication tasks.")
|
||||||
|
|
||||||
sp_utils.display_effective_policy(policy, policy_name, sources=[])
|
sp_utils.display_effective_policy(policy, policy_name, args, sources=[])
|
||||||
if args.dry_run: return 0
|
if args.dry_run: return 0
|
||||||
|
|
||||||
workers = exec_control.get('workers', 1)
|
worker_pools = exec_control.get('worker_pools', [])
|
||||||
with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor:
|
if not worker_pools and exec_control.get('workers'):
|
||||||
futures = [
|
prefix = policy.get('info_json_generation_policy', {}).get('profile_prefix')
|
||||||
executor.submit(run_queue_auth_worker, i, policy, state_manager, args, auth_manager, running_processes, process_lock)
|
worker_pools.append({'workers': exec_control.get('workers'), 'profile_prefix': prefix or 'user'})
|
||||||
for i in range(workers)
|
|
||||||
]
|
if not worker_pools:
|
||||||
|
logger.error("No workers configured in policy (execution_control.workers or execution_control.worker_pools).")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if args.profile_prefix:
|
||||||
|
logger.info(f"CLI --profile-prefix '{args.profile_prefix}' is set, it will override any prefixes defined in worker_pools.")
|
||||||
|
for pool in worker_pools:
|
||||||
|
pool['profile_prefix'] = args.profile_prefix
|
||||||
|
|
||||||
|
worker_specs = []
|
||||||
|
worker_id_counter = 0
|
||||||
|
for pool in worker_pools:
|
||||||
|
pool_workers = pool.get('workers', 1)
|
||||||
|
prefix_str = pool.get('profile_prefix', '')
|
||||||
|
prefixes = [p.strip() for p in prefix_str.split(',') if p.strip()]
|
||||||
|
if not prefixes:
|
||||||
|
logger.warning(f"Worker pool has no profile_prefix. Skipping: {pool}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Each worker in the pool gets the full list of prefixes from the pool configuration.
|
||||||
|
for i in range(pool_workers):
|
||||||
|
worker_policy = deepcopy(policy)
|
||||||
|
# The worker functions will now handle a comma-separated list of prefixes.
|
||||||
|
worker_policy.setdefault('info_json_generation_policy', {})['profile_prefix'] = prefix_str
|
||||||
|
worker_specs.append({
|
||||||
|
'func': run_queue_auth_worker,
|
||||||
|
'args': (worker_id_counter, worker_policy, state_manager, args, auth_manager, running_processes, process_lock)
|
||||||
|
})
|
||||||
|
worker_id_counter += 1
|
||||||
|
|
||||||
|
with concurrent.futures.ThreadPoolExecutor(max_workers=len(worker_specs)) as executor:
|
||||||
|
futures = [executor.submit(spec['func'], *spec['args']) for spec in worker_specs]
|
||||||
shutdown_event.wait()
|
shutdown_event.wait()
|
||||||
logger.info("Shutdown signal received, waiting for queue auth workers to finish...")
|
logger.info("Shutdown signal received, waiting for queue auth workers to finish...")
|
||||||
concurrent.futures.wait(futures)
|
concurrent.futures.wait(futures)
|
||||||
@ -843,6 +1008,17 @@ def main_stress_policy(args):
|
|||||||
env_prefix=env_prefix
|
env_prefix=env_prefix
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# In dummy mode, do not populate the queue. Warn the user instead.
|
||||||
|
if args.dummy or args.dummy_batch:
|
||||||
|
input_queue = queue_policy.get('input_queue', 'queue2_dl_inbox')
|
||||||
|
logger.warning("--- Dummy Mode Notice ---")
|
||||||
|
logger.warning(f"Dummy mode is enabled. The tool will NOT automatically populate the queue.")
|
||||||
|
logger.warning(f"Please ensure the input queue '{input_queue}' contains tasks for the workers to process.")
|
||||||
|
logger.warning("-------------------------")
|
||||||
|
|
||||||
|
# Get download policy
|
||||||
|
d_policy = policy.get('download_policy', {})
|
||||||
|
|
||||||
# Create output directory if specified
|
# Create output directory if specified
|
||||||
output_dir = d_policy.get('output_dir')
|
output_dir = d_policy.get('output_dir')
|
||||||
if output_dir:
|
if output_dir:
|
||||||
@ -860,15 +1036,46 @@ def main_stress_policy(args):
|
|||||||
)
|
)
|
||||||
logger.info(f"Requeued {requeued} failed download tasks.")
|
logger.info(f"Requeued {requeued} failed download tasks.")
|
||||||
|
|
||||||
sp_utils.display_effective_policy(policy, policy_name, sources=[])
|
sp_utils.display_effective_policy(policy, policy_name, args, sources=[])
|
||||||
if args.dry_run: return 0
|
if args.dry_run: return 0
|
||||||
|
|
||||||
workers = exec_control.get('workers', 1)
|
worker_pools = exec_control.get('worker_pools', [])
|
||||||
with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor:
|
if not worker_pools and exec_control.get('workers'):
|
||||||
futures = [
|
prefix = policy.get('download_policy', {}).get('profile_prefix')
|
||||||
executor.submit(run_queue_download_worker, i, policy, state_manager, args, download_manager, running_processes, process_lock)
|
worker_pools.append({'workers': exec_control.get('workers'), 'profile_prefix': prefix or 'user'})
|
||||||
for i in range(workers)
|
|
||||||
]
|
if not worker_pools:
|
||||||
|
logger.error("No workers configured in policy (execution_control.workers or execution_control.worker_pools).")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if args.profile_prefix:
|
||||||
|
logger.info(f"CLI --profile-prefix '{args.profile_prefix}' is set, it will override any prefixes defined in worker_pools.")
|
||||||
|
for pool in worker_pools:
|
||||||
|
pool['profile_prefix'] = args.profile_prefix
|
||||||
|
|
||||||
|
worker_specs = []
|
||||||
|
worker_id_counter = 0
|
||||||
|
for pool in worker_pools:
|
||||||
|
pool_workers = pool.get('workers', 1)
|
||||||
|
prefix_str = pool.get('profile_prefix', '')
|
||||||
|
prefixes = [p.strip() for p in prefix_str.split(',') if p.strip()]
|
||||||
|
if not prefixes:
|
||||||
|
logger.warning(f"Worker pool has no profile_prefix. Skipping: {pool}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Each worker in the pool gets the full list of prefixes from the pool configuration.
|
||||||
|
for i in range(pool_workers):
|
||||||
|
worker_policy = deepcopy(policy)
|
||||||
|
# The worker functions will now handle a comma-separated list of prefixes.
|
||||||
|
worker_policy.setdefault('download_policy', {})['profile_prefix'] = prefix_str
|
||||||
|
worker_specs.append({
|
||||||
|
'func': run_queue_download_worker,
|
||||||
|
'args': (worker_id_counter, worker_policy, state_manager, args, download_manager, running_processes, process_lock)
|
||||||
|
})
|
||||||
|
worker_id_counter += 1
|
||||||
|
|
||||||
|
with concurrent.futures.ThreadPoolExecutor(max_workers=len(worker_specs)) as executor:
|
||||||
|
futures = [executor.submit(spec['func'], *spec['args']) for spec in worker_specs]
|
||||||
shutdown_event.wait()
|
shutdown_event.wait()
|
||||||
logger.info("Shutdown signal received, waiting for queue download workers to finish...")
|
logger.info("Shutdown signal received, waiting for queue download workers to finish...")
|
||||||
concurrent.futures.wait(futures)
|
concurrent.futures.wait(futures)
|
||||||
@ -949,7 +1156,7 @@ def main_stress_policy(args):
|
|||||||
)
|
)
|
||||||
logger.info(f"Requeued {requeued_auth} failed authentication tasks and {requeued_dl} failed download tasks.")
|
logger.info(f"Requeued {requeued_auth} failed authentication tasks and {requeued_dl} failed download tasks.")
|
||||||
|
|
||||||
sp_utils.display_effective_policy(policy, policy_name, sources=[])
|
sp_utils.display_effective_policy(policy, policy_name, args, sources=[])
|
||||||
if args.dry_run: return 0
|
if args.dry_run: return 0
|
||||||
|
|
||||||
# Start both auth and download workers
|
# Start both auth and download workers
|
||||||
@ -1127,6 +1334,7 @@ def main_stress_policy(args):
|
|||||||
sp_utils.display_effective_policy(
|
sp_utils.display_effective_policy(
|
||||||
policy,
|
policy,
|
||||||
policy_name,
|
policy_name,
|
||||||
|
args,
|
||||||
sources=sources,
|
sources=sources,
|
||||||
profile_names=None, # Profile grouping is removed
|
profile_names=None, # Profile grouping is removed
|
||||||
original_workers_setting=original_workers_setting
|
original_workers_setting=original_workers_setting
|
||||||
@ -1254,12 +1462,22 @@ def main_stress_policy(args):
|
|||||||
# If marking by rename is on, do that.
|
# If marking by rename is on, do that.
|
||||||
if settings.get('mark_processed_files'):
|
if settings.get('mark_processed_files'):
|
||||||
try:
|
try:
|
||||||
|
processed_dir = settings.get('processed_files_dir')
|
||||||
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||||
new_path = source.parent / f"{source.name}.{timestamp}.processed"
|
new_filename = f"{source.name}.{timestamp}.processed"
|
||||||
|
|
||||||
|
if processed_dir:
|
||||||
|
dest_path = Path(processed_dir) / new_filename
|
||||||
|
dest_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
shutil.move(str(source), str(dest_path))
|
||||||
|
logger.info(f"Marked '{source.name}' as processed by moving to '{dest_path}'")
|
||||||
|
else:
|
||||||
|
# Fallback to old behavior: rename in place
|
||||||
|
new_path = source.parent / new_filename
|
||||||
source.rename(new_path)
|
source.rename(new_path)
|
||||||
logger.info(f"Marked '{source.name}' as processed by renaming to '{new_path.name}'")
|
logger.info(f"Marked '{source.name}' as processed by renaming to '{new_path.name}'")
|
||||||
except (IOError, OSError) as e:
|
except (IOError, OSError) as e:
|
||||||
logger.error(f"Failed to rename processed file '{source.name}': {e}")
|
logger.error(f"Failed to mark processed file '{source.name}': {e}")
|
||||||
|
|
||||||
# When using profile-aware mode, the file processing (including marking as
|
# When using profile-aware mode, the file processing (including marking as
|
||||||
# processed) is handled inside process_profile_task.
|
# processed) is handled inside process_profile_task.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user