166 lines
6.2 KiB
YAML
166 lines
6.2 KiB
YAML
---
|
|
- name: "UTIL-SETUP: Install media cleanup script and configure cron"
|
|
hosts: all
|
|
gather_facts: yes
|
|
vars_files:
|
|
- "group_vars/all/vault.yml"
|
|
pre_tasks:
|
|
- name: Set inventory_env fact
|
|
ansible.builtin.set_fact:
|
|
inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
|
|
- name: Load environment-specific variables
|
|
ansible.builtin.include_vars: "{{ item }}"
|
|
with_fileglob:
|
|
- "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
|
|
tasks:
|
|
- name: Define base directory for node
|
|
ansible.builtin.set_fact:
|
|
base_dir: "{{ airflow_master_dir if inventory_hostname in groups['master'] else airflow_worker_dir }}"
|
|
|
|
- name: Ensure cleanup script directory exists
|
|
ansible.builtin.file:
|
|
path: "{{ base_dir }}/bin"
|
|
state: directory
|
|
owner: "{{ ansible_user }}"
|
|
group: "{{ deploy_group }}"
|
|
mode: '0755'
|
|
become: yes
|
|
|
|
- name: Copy cleanup_media.py script
|
|
ansible.builtin.copy:
|
|
src: "{{ playbook_dir }}/../yt-ops-services-debug/cleanup_media.py"
|
|
dest: "{{ base_dir }}/bin/cleanup_media.py"
|
|
owner: "{{ ansible_user }}"
|
|
group: "{{ deploy_group }}"
|
|
mode: '0755'
|
|
become: yes
|
|
|
|
- name: Install s5cmd for S3 uploads
|
|
block:
|
|
- name: Check if s5cmd is already installed
|
|
ansible.builtin.stat:
|
|
path: /usr/local/bin/s5cmd
|
|
register: s5cmd_binary
|
|
|
|
- name: Download and install s5cmd
|
|
block:
|
|
- name: Create temporary directory for s5cmd download
|
|
ansible.builtin.tempfile:
|
|
state: directory
|
|
suffix: s5cmd
|
|
register: s5cmd_temp_dir
|
|
|
|
- name: Download s5cmd
|
|
ansible.builtin.get_url:
|
|
url: "https://github.com/peak/s5cmd/releases/download/v2.2.2/s5cmd_2.2.2_Linux-64bit.tar.gz"
|
|
dest: "{{ s5cmd_temp_dir.path }}/s5cmd.tar.gz"
|
|
mode: '0644'
|
|
|
|
- name: Extract s5cmd
|
|
ansible.builtin.unarchive:
|
|
src: "{{ s5cmd_temp_dir.path }}/s5cmd.tar.gz"
|
|
dest: "{{ s5cmd_temp_dir.path }}"
|
|
remote_src: yes
|
|
|
|
- name: Install s5cmd to /usr/local/bin
|
|
ansible.builtin.copy:
|
|
src: "{{ s5cmd_temp_dir.path }}/s5cmd"
|
|
dest: /usr/local/bin/s5cmd
|
|
mode: '0755'
|
|
remote_src: yes
|
|
|
|
- name: Clean up temporary directory
|
|
ansible.builtin.file:
|
|
path: "{{ s5cmd_temp_dir.path }}"
|
|
state: absent
|
|
when: not s5cmd_binary.stat.exists
|
|
become: yes
|
|
|
|
- name: Ensure log directory exists
|
|
ansible.builtin.file:
|
|
path: "/var/log"
|
|
state: directory
|
|
owner: root
|
|
group: root
|
|
mode: '0755'
|
|
become: yes
|
|
|
|
- name: Create wrapper script to source .env before running cleanup
|
|
ansible.builtin.copy:
|
|
content: |
|
|
#!/bin/bash
|
|
# Wrapper script to run cleanup_media.py with environment variables from .env
|
|
|
|
set -e
|
|
|
|
BASE_DIR="{{ base_dir }}"
|
|
|
|
# Source .env file if it exists
|
|
if [ -f "${BASE_DIR}/.env" ]; then
|
|
set -a
|
|
source "${BASE_DIR}/.env"
|
|
set +a
|
|
fi
|
|
|
|
# Determine cleanup mode based on environment variable or default
|
|
CLEANUP_MODE="${CLEANUP_MODE:-{{ cleanup_settings.mode | default('s3-upload') }}}"
|
|
|
|
# Run cleanup script
|
|
cd "${BASE_DIR}"
|
|
|
|
if [ "$CLEANUP_MODE" = "s3-upload" ]; then
|
|
# S3 upload mode - uploads to S3 then deletes
|
|
exec python3 "${BASE_DIR}/bin/cleanup_media.py" \
|
|
--target-dir "${BASE_DIR}/run" \
|
|
--target-dir "${BASE_DIR}/downloadfiles" \
|
|
--max-age {{ cleanup_settings.max_age_seconds | default(3600) }} \
|
|
--log-file /var/log/cleanup_media.log \
|
|
--s3-upload \
|
|
--s3-bucket "${S3_BUCKET:-stress-media-archive}" \
|
|
--s3-prefix "archived-media/$(hostname)" \
|
|
--s5cmd-path /usr/local/bin/s5cmd
|
|
else
|
|
# Simple cleanup mode - just truncate and rename
|
|
exec python3 "${BASE_DIR}/bin/cleanup_media.py" \
|
|
--target-dir "${BASE_DIR}/run" \
|
|
--target-dir "${BASE_DIR}/downloadfiles" \
|
|
--max-age {{ cleanup_settings.max_age_seconds | default(3600) }} \
|
|
--log-file /var/log/cleanup_media.log
|
|
fi
|
|
dest: "{{ base_dir }}/bin/cleanup_media_wrapper.sh"
|
|
owner: "{{ ansible_user }}"
|
|
group: "{{ deploy_group }}"
|
|
mode: '0755'
|
|
become: yes
|
|
|
|
- name: Configure cron job for media cleanup
|
|
ansible.builtin.cron:
|
|
name: "Media cleanup - {{ base_dir }}"
|
|
minute: "0"
|
|
hour: "*"
|
|
job: "{{ base_dir }}/bin/cleanup_media_wrapper.sh 2>&1 | logger -t cleanup_media"
|
|
user: "{{ ansible_user }}"
|
|
state: "{{ 'present' if cleanup_settings.enabled | default(true) | bool else 'absent' }}"
|
|
become: yes
|
|
|
|
- name: Display installation summary
|
|
ansible.builtin.debug:
|
|
msg: |
|
|
Media cleanup script installed successfully on {{ inventory_hostname }}
|
|
|
|
Configuration from cluster.green.yml:
|
|
- Base directory: {{ base_dir }}
|
|
- Enabled: {{ cleanup_settings.enabled | default(true) }}
|
|
- Cleanup mode: {{ cleanup_settings.mode | default('s-upload') }}
|
|
- Max age: {{ cleanup_settings.max_age_seconds | default(3600) }} seconds
|
|
- Cron schedule: Every hour (0 * * * *)
|
|
- Cron job state: {{ 'present' if cleanup_settings.enabled | default(true) | bool else 'absent' }}
|
|
- Log file: /var/log/cleanup_media.log
|
|
|
|
Note: You can override the cleanup mode for a single run by setting the
|
|
CLEANUP_MODE environment variable before executing the wrapper script.
|
|
e.g., CLEANUP_MODE=cleanup {{ base_dir }}/bin/cleanup_media_wrapper.sh
|
|
|
|
To view logs:
|
|
tail -f /var/log/cleanup_media.log
|