yt-dlp-dags/ansible/playbook-docker-services-setup.yml

370 lines
14 KiB
YAML

---
# This playbook orchestrates the entire setup for the stress test environment.
# It is composed of multiple plays and imported playbooks to ensure modularity and correct execution order.
# -------------------------------------------------------------------------------------------------
# PHASE 1: Base System Configuration
# Ensures all nodes have the necessary base packages, user configurations, and Docker installed.
# -------------------------------------------------------------------------------------------------
- name: "PHASE 1.1: Import base system setup playbook"
import_playbook: playbook-base-system.yml
- name: "PHASE 1.2: Import proxy deployment playbook"
import_playbook: playbook-proxies.yml
# -------------------------------------------------------------------------------------------------
# PHASE 2: Application Directory and Code Setup
# Creates the necessary directory structure and syncs the application code.
# -------------------------------------------------------------------------------------------------
- name: "PHASE 2.1: Create Base Directories"
hosts: all
gather_facts: no
vars_files:
- "group_vars/all/vault.yml"
pre_tasks:
- name: Set inventory_env fact
ansible.builtin.set_fact:
inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
- name: Load environment-specific variables
ansible.builtin.include_vars: "{{ item }}"
with_fileglob:
- "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
tasks:
- name: Define base directory for node
ansible.builtin.set_fact:
base_dir: "{{ airflow_master_dir if inventory_hostname in groups['master'] else airflow_worker_dir }}"
- name: Ensure base directories and subdirectories exist
ansible.builtin.file:
path: "{{ base_dir }}/{{ item }}"
state: directory
owner: "{{ ansible_user }}"
group: "{{ deploy_group }}"
mode: '0755'
recurse: yes
loop:
- "" # for the base directory itself
- "inputfiles"
- "run/docker_mount/fetched_info_jsons"
become: yes
- name: "PHASE 2.2: Import playbook to sync local code"
import_playbook: playbook-stress-sync-code.yml
- name: "PHASE 2.3: Import playbook to install Python dependencies"
import_playbook: playbook-stress-install-deps.yml
# -------------------------------------------------------------------------------------------------
# PHASE 3: Environment and Service Configuration
# Generates the .env file and starts the role-specific services on master and workers.
# -------------------------------------------------------------------------------------------------
- name: "PHASE 3.1: Import playbook to generate .env file"
import_playbook: playbook-stress-generate-env.yml
- name: "PHASE 3.2: Master Node Services Setup"
hosts: master
gather_facts: no
vars:
inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
vars_files:
- "group_vars/all/vault.yml"
pre_tasks:
- name: Load environment-specific variables
ansible.builtin.include_vars: "{{ item }}"
with_fileglob:
- "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
tasks:
- name: Configure system performance and kernel settings
ansible.builtin.copy:
src: "configs/etc/sysctl.d/99-system-limits.conf"
dest: "/etc/sysctl.d/99-system-limits.conf"
owner: root
group: root
mode: '0644'
become: yes
register: sysctl_config_copy
- name: Apply sysctl settings
ansible.builtin.command: sysctl --system
become: yes
when: sysctl_config_copy.changed
- name: Ensure MinIO data directory exists
ansible.builtin.file:
path: "{{ airflow_master_dir }}/minio-data"
state: directory
owner: "{{ ansible_user }}"
group: "{{ deploy_group }}"
mode: '0755'
become: yes
- name: Template Docker Compose file for master services
ansible.builtin.template:
src: templates/docker-compose.stress-master.j2
dest: "{{ airflow_master_dir }}/docker-compose.stress.yml"
owner: "{{ ansible_user }}"
group: "{{ deploy_group }}"
mode: '0644'
become: yes
- name: Stop and remove existing containers before starting services
ansible.builtin.shell:
cmd: |
docker ps -a --filter "name=bgutil-provider" --format "{{{{.ID}}}}" | xargs -r docker rm -f
docker ps -a --filter "name=redis-stress" --format "{{{{.ID}}}}" | xargs -r docker rm -f
docker ps -a --filter "name=minio-stress" --format "{{{{.ID}}}}" | xargs -r docker rm -f
become: yes
changed_when: false
ignore_errors: yes
- name: Start master services (Redis, MinIO)
community.docker.docker_compose_v2:
project_src: "{{ airflow_master_dir }}"
files:
- docker-compose.stress.yml
state: present
remove_orphans: true
become: yes
- name: Wait for MinIO service to be ready
ansible.builtin.wait_for:
host: "{{ hostvars[inventory_hostname].ansible_host }}"
port: 9000
delay: 5
timeout: 60
delegate_to: localhost
- name: Download MinIO Client (mc) if not present
ansible.builtin.command:
cmd: wget https://dl.min.io/client/mc/release/linux-amd64/mc -O /usr/local/bin/mc
creates: /usr/local/bin/mc
become: yes
- name: Ensure MinIO Client (mc) is executable
ansible.builtin.file:
path: /usr/local/bin/mc
mode: '0755'
become: yes
- name: Configure mc alias for local MinIO
ansible.builtin.command: >
mc alias set local http://localhost:9000 {{ vault_s3_access_key_id }} {{ vault_s3_secret_access_key }}
become: yes
become_user: "{{ ansible_user }}"
changed_when: false
environment:
HOME: "/home/{{ ansible_user }}"
- name: Ensure S3 buckets exist in MinIO using mc
ansible.builtin.command: >
mc mb local/{{ item }}
loop:
- "stress-inputs"
- "stress-jsons"
become: yes
become_user: "{{ ansible_user }}"
register: mc_mb_result
failed_when: >
mc_mb_result.rc != 0 and
"already exists" not in mc_mb_result.stderr
changed_when: mc_mb_result.rc == 0
environment:
HOME: "/home/{{ ansible_user }}"
- name: "PHASE 3.2a: Worker Node Services Setup"
hosts: workers
gather_facts: no
vars:
inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
vars_files:
- "group_vars/all/vault.yml"
pre_tasks:
- name: Load environment-specific variables
ansible.builtin.include_vars: "{{ item }}"
with_fileglob:
- "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
tasks:
- name: Template Docker Compose file for worker services
ansible.builtin.template:
src: templates/docker-compose.stress-master.j2
dest: "{{ airflow_worker_dir }}/docker-compose.stress.yml"
owner: "{{ ansible_user }}"
group: "{{ deploy_group }}"
mode: '0644'
become: yes
- name: Stop and remove existing containers before starting services
ansible.builtin.shell:
cmd: |
docker ps -a --filter "name=bgutil-provider" --format "{{{{.ID}}}}" | xargs -r docker rm -f
docker ps -a --filter "name=redis-stress" --format "{{{{.ID}}}}" | xargs -r docker rm -f
docker ps -a --filter "name=minio-stress" --format "{{{{.ID}}}}" | xargs -r docker rm -f
become: yes
changed_when: false
ignore_errors: yes
- name: Start worker services
community.docker.docker_compose_v2:
project_src: "{{ airflow_worker_dir }}"
files:
- docker-compose.stress.yml
state: present
remove_orphans: true
become: yes
- name: "PHASE 3.3: Shared Storage Setup (s3fs)"
hosts: master:workers
gather_facts: no
vars:
inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
vars_files:
- "group_vars/all/vault.yml"
pre_tasks:
- name: Load environment-specific variables
ansible.builtin.include_vars: "{{ item }}"
with_fileglob:
- "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
tasks:
- name: Define base directory for node
ansible.builtin.set_fact:
base_dir: "{{ airflow_master_dir if inventory_hostname in groups['master'] else airflow_worker_dir }}"
- name: Mount S3 buckets via s3fs
block:
- name: Install s3fs for mounting S3 buckets
ansible.builtin.apt:
name: s3fs
state: present
become: yes
- name: Configure s3fs credentials
ansible.builtin.copy:
content: "{{ vault_s3_access_key_id }}:{{ vault_s3_secret_access_key }}"
dest: "/home/{{ ansible_user }}/.passwd-s3fs"
owner: "{{ ansible_user }}"
group: "{{ deploy_group }}"
mode: '0600'
become: yes
- name: Check if mount points are already mounted
ansible.builtin.shell:
cmd: "mount | grep -q '{{ item.path }}'"
loop:
- { bucket: 'stress-inputs', path: '{{ base_dir }}/inputfiles' }
- { bucket: 'stress-jsons', path: '{{ base_dir }}/run/docker_mount/fetched_info_jsons' }
register: mount_check
changed_when: false
failed_when: false
- name: Ensure mount point directories exist (only if not mounted)
ansible.builtin.file:
path: "{{ item.item.path }}"
state: directory
owner: "{{ ansible_user }}"
group: "{{ deploy_group }}"
mode: '0755'
loop: "{{ mount_check.results }}"
when: item.rc != 0
become: yes
- name: Mount S3 buckets for stress testing
ansible.posix.mount:
src: "s3fs#{{ item.bucket }}"
path: "{{ item.path }}"
fstype: fuse
opts: "_netdev,allow_other,use_path_request_style,nonempty,url=http://{{ hostvars[groups['master'][0]].ansible_host }}:9000,passwd_file=/home/{{ ansible_user }}/.passwd-s3fs"
state: mounted
loop:
- { bucket: 'stress-inputs', path: '{{ base_dir }}/inputfiles' }
- { bucket: 'stress-jsons', path: '{{ base_dir }}/run/docker_mount/fetched_info_jsons' }
become: yes
- name: "PHASE 3.4: Import playbook to initialize Redis profiles"
import_playbook: playbook-stress-init-redis.yml
# -------------------------------------------------------------------------------------------------
# PHASE 4: Monitoring and Management Services Setup
# Starts monitoring, enforcer, and simulation processes.
# -------------------------------------------------------------------------------------------------
- name: "PHASE 4.1: Import playbook to manage monitoring and enforcer processes"
import_playbook: playbook-stress-manage-processes.yml
- name: "PHASE 4.2: Start monitoring and enforcer services"
hosts: master
gather_facts: no
vars:
inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
start_monitor: true
start_enforcer: true
vars_files:
- "group_vars/all/vault.yml"
pre_tasks:
- name: Load environment-specific variables
ansible.builtin.include_vars: "{{ item }}"
with_fileglob:
- "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
tasks:
- name: Ensure tmux is installed
ansible.builtin.apt:
name: tmux
state: present
become: yes
- name: Start profile monitoring in tmux session
ansible.builtin.shell:
cmd: |
cd {{ airflow_master_dir }}
tmux new-session -d -s stress-monitor \
"set -a && . ./.env && set +a && \
./bin/ytops-client profile list \
--auth-env sim_auth \
--download-env sim_download \
--live \
--no-blink \
--show-reasons"
when: start_monitor | default(false) | bool
- name: Start policy enforcer in tmux session
ansible.builtin.shell:
cmd: |
cd {{ airflow_master_dir }}
tmux new-session -d -s stress-enforcer \
"set -a && . ./.env && set +a && \
./bin/ytops-client policy-enforcer \
--policy policies/8_unified_simulation_enforcer.yaml \
--live"
when: start_enforcer | default(false) | bool
- name: List active tmux sessions
ansible.builtin.shell:
cmd: tmux list-sessions
register: tmux_sessions
changed_when: false
- name: Display active sessions
ansible.builtin.debug:
msg: "Active tmux sessions: {{ tmux_sessions.stdout_lines }}"
# -------------------------------------------------------------------------------------------------
# PHASE 5: Simulation Workload Generation (Optional - can be run manually)
# These playbooks are available but not automatically started by default.
# -------------------------------------------------------------------------------------------------
- name: "PHASE 5.1: Note about simulation workload generation"
hosts: localhost
gather_facts: no
tasks:
- name: Display note about simulation playbooks
ansible.builtin.debug:
msg: |
Simulation workload generation playbooks are available:
- ansible/playbook-stress-auth-generator.yml
- ansible/playbook-stress-download-simulation.yml
To start simulations manually, run:
ansible-playbook ansible/playbook-stress-auth-generator.yml \
-e "start_generator=true dummy_batch=true auth_min_seconds=2 auth_max_seconds=3"
ansible-playbook ansible/playbook-stress-download-simulation.yml \
-e "start_download=true profile_prefix=user1 download_min_seconds=2 download_max_seconds=5" \
--limit airflow_workers[0]