yt-dlp-dags/ansible/playbook-docker-services-setup.yml

---
# This playbook orchestrates the entire setup for the stress test environment.
# It is composed of multiple plays and imported playbooks to ensure modularity and correct execution order.

# -------------------------------------------------------------------------------------------------
# PHASE 1: Base System Configuration
# Ensures all nodes have the necessary base packages, user configurations, and Docker installed.
# -------------------------------------------------------------------------------------------------
- name: "PHASE 1.1: Import base system setup playbook"
  import_playbook: playbook-base-system.yml

- name: "PHASE 1.2: Import proxy deployment playbook"
  import_playbook: playbook-proxies.yml

# -------------------------------------------------------------------------------------------------
# PHASE 2: Application Directory and Code Setup
# Creates the necessary directory structure and syncs the application code.
# -------------------------------------------------------------------------------------------------
- name: "PHASE 2.1: Create Base Directories"
  hosts: all
  gather_facts: no
  vars_files:
    - "group_vars/all/vault.yml"
  pre_tasks:
    - name: Set inventory_env fact
      ansible.builtin.set_fact:
        inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
    - name: Load environment-specific variables
      ansible.builtin.include_vars: "{{ item }}"
      with_fileglob:
        - "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
  tasks:
    - name: Define base directory for node
      ansible.builtin.set_fact:
        base_dir: "{{ airflow_master_dir if inventory_hostname in groups['master'] else airflow_worker_dir }}"

    - name: Ensure base directories and subdirectories exist
      ansible.builtin.file:
        path: "{{ base_dir }}/{{ item }}"
        state: directory
        owner: "{{ ansible_user }}"
        group: "{{ deploy_group }}"
        mode: '0755'
        recurse: yes
      loop:
        - "" # for the base directory itself
        - "inputfiles"
        - "run/docker_mount/fetched_info_jsons"
      become: yes

- name: "PHASE 2.2: Import playbook to sync local code"
  import_playbook: playbook-stress-sync-code.yml

- name: "PHASE 2.3: Import playbook to install Python dependencies"
  import_playbook: playbook-stress-install-deps.yml

# -------------------------------------------------------------------------------------------------
# PHASE 3: Environment and Service Configuration
# Generates the .env file and starts the role-specific services on master and workers.
# -------------------------------------------------------------------------------------------------
- name: "PHASE 3.1: Import playbook to generate .env file"
  import_playbook: playbook-stress-generate-env.yml

- name: "PHASE 3.2: Master Node Services Setup"
  hosts: master
  gather_facts: no
  vars:
    inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
  vars_files:
    - "group_vars/all/vault.yml"
  pre_tasks:
    - name: Load environment-specific variables
      ansible.builtin.include_vars: "{{ item }}"
      with_fileglob:
        - "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
  tasks:
    - name: Configure system performance and kernel settings
      ansible.builtin.copy:
        src: "configs/etc/sysctl.d/99-system-limits.conf"
        dest: "/etc/sysctl.d/99-system-limits.conf"
        owner: root
        group: root
        mode: '0644'
      become: yes
      register: sysctl_config_copy

    - name: Apply sysctl settings
      ansible.builtin.command: sysctl --system
      become: yes
      when: sysctl_config_copy.changed

    - name: Ensure MinIO data directory exists
      ansible.builtin.file:
        path: "{{ airflow_master_dir }}/minio-data"
        state: directory
        owner: "{{ ansible_user }}"
        group: "{{ deploy_group }}"
        mode: '0755'
      become: yes

    - name: Template Docker Compose file for master services
      ansible.builtin.template:
        src: templates/docker-compose.stress-master.j2
        dest: "{{ airflow_master_dir }}/docker-compose.stress.yml"
        owner: "{{ ansible_user }}"
        group: "{{ deploy_group }}"
        mode: '0644'
      become: yes

    - name: Stop and remove existing containers before starting services
      ansible.builtin.shell:
        cmd: |
          docker ps -a --filter "name=bgutil-provider" --format "{{{{.ID}}}}" | xargs -r docker rm -f
          docker ps -a --filter "name=redis-stress" --format "{{{{.ID}}}}" | xargs -r docker rm -f
          docker ps -a --filter "name=minio-stress" --format "{{{{.ID}}}}" | xargs -r docker rm -f
      become: yes
      changed_when: false
      ignore_errors: yes

    - name: Start master services (Redis, MinIO)
      community.docker.docker_compose_v2:
        project_src: "{{ airflow_master_dir }}"
        files:
          - docker-compose.stress.yml
        state: present
        remove_orphans: true
      become: yes

    - name: Wait for MinIO service to be ready
      ansible.builtin.wait_for:
        host: "{{ hostvars[inventory_hostname].ansible_host }}"
        port: 9000
        delay: 5
        timeout: 60
      delegate_to: localhost

    - name: Download MinIO Client (mc) if not present
      ansible.builtin.command:
        cmd: wget https://dl.min.io/client/mc/release/linux-amd64/mc -O /usr/local/bin/mc
        creates: /usr/local/bin/mc
      become: yes

    - name: Ensure MinIO Client (mc) is executable
      ansible.builtin.file:
        path: /usr/local/bin/mc
        mode: '0755'
      become: yes

    - name: Configure mc alias for local MinIO
      ansible.builtin.command: >
        mc alias set local http://localhost:9000 {{ vault_s3_access_key_id }} {{ vault_s3_secret_access_key }}
      become: yes
      become_user: "{{ ansible_user }}"
      changed_when: false
      environment:
        HOME: "/home/{{ ansible_user }}"

    - name: Ensure S3 buckets exist in MinIO using mc
      ansible.builtin.command: >
        mc mb local/{{ item }}
      loop:
        - "stress-inputs"
        - "stress-jsons"
      become: yes
      become_user: "{{ ansible_user }}"
      register: mc_mb_result
      failed_when: >
        mc_mb_result.rc != 0 and
        "already exists" not in mc_mb_result.stderr
      changed_when: mc_mb_result.rc == 0
      environment:
        HOME: "/home/{{ ansible_user }}"

- name: "PHASE 3.2a: Worker Node Services Setup"
  hosts: workers
  gather_facts: no
  vars:
    inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
  vars_files:
    - "group_vars/all/vault.yml"
  pre_tasks:
    - name: Load environment-specific variables
      ansible.builtin.include_vars: "{{ item }}"
      with_fileglob:
        - "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
  tasks:
    - name: Template Docker Compose file for worker services
      ansible.builtin.template:
        src: templates/docker-compose.stress-master.j2
        dest: "{{ airflow_worker_dir }}/docker-compose.stress.yml"
        owner: "{{ ansible_user }}"
        group: "{{ deploy_group }}"
        mode: '0644'
      become: yes

    - name: Stop and remove existing containers before starting services
      ansible.builtin.shell:
        cmd: |
          docker ps -a --filter "name=bgutil-provider" --format "{{{{.ID}}}}" | xargs -r docker rm -f
          docker ps -a --filter "name=redis-stress" --format "{{{{.ID}}}}" | xargs -r docker rm -f
          docker ps -a --filter "name=minio-stress" --format "{{{{.ID}}}}" | xargs -r docker rm -f
      become: yes
      changed_when: false
      ignore_errors: yes

    - name: Start worker services
      community.docker.docker_compose_v2:
        project_src: "{{ airflow_worker_dir }}"
        files:
          - docker-compose.stress.yml
        state: present
        remove_orphans: true
      become: yes

- name: "PHASE 3.3: Shared Storage Setup (s3fs)"
  hosts: master:workers
  gather_facts: no
  vars:
    inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
  vars_files:
    - "group_vars/all/vault.yml"
  pre_tasks:
    - name: Load environment-specific variables
      ansible.builtin.include_vars: "{{ item }}"
      with_fileglob:
        - "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
  tasks:
    - name: Define base directory for node
      ansible.builtin.set_fact:
        base_dir: "{{ airflow_master_dir if inventory_hostname in groups['master'] else airflow_worker_dir }}"

    - name: Mount S3 buckets via s3fs
      block:
        - name: Install s3fs for mounting S3 buckets
          ansible.builtin.apt:
            name: s3fs
            state: present
          become: yes

        - name: Configure s3fs credentials
          ansible.builtin.copy:
            content: "{{ vault_s3_access_key_id }}:{{ vault_s3_secret_access_key }}"
            dest: "/home/{{ ansible_user }}/.passwd-s3fs"
            owner: "{{ ansible_user }}"
            group: "{{ deploy_group }}"
            mode: '0600'
          become: yes

        - name: Check if mount points are already mounted
          ansible.builtin.shell:
            cmd: "mount | grep -q '{{ item.path }}'"
          loop:
            - { bucket: 'stress-inputs', path: '{{ base_dir }}/inputfiles' }
            - { bucket: 'stress-jsons', path: '{{ base_dir }}/run/docker_mount/fetched_info_jsons' }
          register: mount_check
          changed_when: false
          failed_when: false

        - name: Ensure mount point directories exist (only if not mounted)
          ansible.builtin.file:
            path: "{{ item.item.path }}"
            state: directory
            owner: "{{ ansible_user }}"
            group: "{{ deploy_group }}"
            mode: '0755'
          loop: "{{ mount_check.results }}"
          when: item.rc != 0
          become: yes

        - name: Mount S3 buckets for stress testing
          ansible.posix.mount:
            src: "s3fs#{{ item.bucket }}"
            path: "{{ item.path }}"
            fstype: fuse
            opts: "_netdev,allow_other,use_path_request_style,nonempty,url=http://{{ hostvars[groups['master'][0]].ansible_host }}:9000,passwd_file=/home/{{ ansible_user }}/.passwd-s3fs"
            state: mounted
          loop:
            - { bucket: 'stress-inputs', path: '{{ base_dir }}/inputfiles' }
            - { bucket: 'stress-jsons', path: '{{ base_dir }}/run/docker_mount/fetched_info_jsons' }
          become: yes

- name: "PHASE 3.4: Import playbook to initialize Redis profiles"
  import_playbook: playbook-stress-init-redis.yml

# -------------------------------------------------------------------------------------------------
# PHASE 4: Monitoring and Management Services Setup
# Starts monitoring, enforcer, and simulation processes.
# -------------------------------------------------------------------------------------------------
- name: "PHASE 4.1: Import playbook to manage monitoring and enforcer processes"
  import_playbook: playbook-stress-manage-processes.yml

- name: "PHASE 4.2: Start monitoring and enforcer services"
  hosts: master
  gather_facts: no
  vars:
    inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
    start_monitor: true
    start_enforcer: true
  vars_files:
    - "group_vars/all/vault.yml"
  pre_tasks:
    - name: Load environment-specific variables
      ansible.builtin.include_vars: "{{ item }}"
      with_fileglob:
        - "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
  tasks:
    - name: Ensure tmux is installed
      ansible.builtin.apt:
        name: tmux
        state: present
      become: yes

    - name: Start profile monitoring in tmux session
      ansible.builtin.shell:
        cmd: |
          cd {{ airflow_master_dir }}
          tmux new-session -d -s stress-monitor \
            "set -a && . ./.env && set +a && \
            ./bin/ytops-client profile list \
              --auth-env sim_auth \
              --download-env sim_download \
              --live \
              --no-blink \
              --show-reasons"
      when: start_monitor | default(false) | bool

    - name: Start policy enforcer in tmux session
      ansible.builtin.shell:
        cmd: |
          cd {{ airflow_master_dir }}
          tmux new-session -d -s stress-enforcer \
            "set -a && . ./.env && set +a && \
            ./bin/ytops-client policy-enforcer \
              --policy policies/8_unified_simulation_enforcer.yaml \
              --live"
      when: start_enforcer | default(false) | bool

    - name: List active tmux sessions
      ansible.builtin.shell:
        cmd: tmux list-sessions
      register: tmux_sessions
      changed_when: false

    - name: Display active sessions
      ansible.builtin.debug:
        msg: "Active tmux sessions: {{ tmux_sessions.stdout_lines }}"

# -------------------------------------------------------------------------------------------------
# PHASE 5: Simulation Workload Generation (Optional - can be run manually)
# These playbooks are available but not automatically started by default.
# -------------------------------------------------------------------------------------------------
- name: "PHASE 5.1: Note about simulation workload generation"
  hosts: localhost
  gather_facts: no
  tasks:
    - name: Display note about simulation playbooks
      ansible.builtin.debug:
        msg: |
          Simulation workload generation playbooks are available:
          - ansible/playbook-stress-auth-generator.yml
          - ansible/playbook-stress-download-simulation.yml

          To start simulations manually, run:
          ansible-playbook ansible/playbook-stress-auth-generator.yml \
            -e "start_generator=true dummy_batch=true auth_min_seconds=2 auth_max_seconds=3"

          ansible-playbook ansible/playbook-stress-download-simulation.yml \
            -e "start_download=true profile_prefix=user1 download_min_seconds=2 download_max_seconds=5" \
            --limit airflow_workers[0]