yt-dlp-dags/ansible/playbook-install-cleanup-media.yml

166 lines
6.2 KiB
YAML

---
- name: "UTIL-SETUP: Install media cleanup script and configure cron"
hosts: all
gather_facts: yes
vars_files:
- "group_vars/all/vault.yml"
pre_tasks:
- name: Set inventory_env fact
ansible.builtin.set_fact:
inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}"
- name: Load environment-specific variables
ansible.builtin.include_vars: "{{ item }}"
with_fileglob:
- "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml"
tasks:
- name: Define base directory for node
ansible.builtin.set_fact:
base_dir: "{{ airflow_master_dir if inventory_hostname in groups['master'] else airflow_worker_dir }}"
- name: Ensure cleanup script directory exists
ansible.builtin.file:
path: "{{ base_dir }}/bin"
state: directory
owner: "{{ ansible_user }}"
group: "{{ deploy_group }}"
mode: '0755'
become: yes
- name: Copy cleanup_media.py script
ansible.builtin.copy:
src: "{{ playbook_dir }}/../yt-ops-services-debug/cleanup_media.py"
dest: "{{ base_dir }}/bin/cleanup_media.py"
owner: "{{ ansible_user }}"
group: "{{ deploy_group }}"
mode: '0755'
become: yes
- name: Install s5cmd for S3 uploads
block:
- name: Check if s5cmd is already installed
ansible.builtin.stat:
path: /usr/local/bin/s5cmd
register: s5cmd_binary
- name: Download and install s5cmd
block:
- name: Create temporary directory for s5cmd download
ansible.builtin.tempfile:
state: directory
suffix: s5cmd
register: s5cmd_temp_dir
- name: Download s5cmd
ansible.builtin.get_url:
url: "https://github.com/peak/s5cmd/releases/download/v2.2.2/s5cmd_2.2.2_Linux-64bit.tar.gz"
dest: "{{ s5cmd_temp_dir.path }}/s5cmd.tar.gz"
mode: '0644'
- name: Extract s5cmd
ansible.builtin.unarchive:
src: "{{ s5cmd_temp_dir.path }}/s5cmd.tar.gz"
dest: "{{ s5cmd_temp_dir.path }}"
remote_src: yes
- name: Install s5cmd to /usr/local/bin
ansible.builtin.copy:
src: "{{ s5cmd_temp_dir.path }}/s5cmd"
dest: /usr/local/bin/s5cmd
mode: '0755'
remote_src: yes
- name: Clean up temporary directory
ansible.builtin.file:
path: "{{ s5cmd_temp_dir.path }}"
state: absent
when: not s5cmd_binary.stat.exists
become: yes
- name: Ensure log directory exists
ansible.builtin.file:
path: "/var/log"
state: directory
owner: root
group: root
mode: '0755'
become: yes
- name: Create wrapper script to source .env before running cleanup
ansible.builtin.copy:
content: |
#!/bin/bash
# Wrapper script to run cleanup_media.py with environment variables from .env
set -e
BASE_DIR="{{ base_dir }}"
# Source .env file if it exists
if [ -f "${BASE_DIR}/.env" ]; then
set -a
source "${BASE_DIR}/.env"
set +a
fi
# Determine cleanup mode based on environment variable or default
CLEANUP_MODE="${CLEANUP_MODE:-{{ cleanup_settings.mode | default('s3-upload') }}}"
# Run cleanup script
cd "${BASE_DIR}"
if [ "$CLEANUP_MODE" = "s3-upload" ]; then
# S3 upload mode - uploads to S3 then deletes
exec python3 "${BASE_DIR}/bin/cleanup_media.py" \
--target-dir "${BASE_DIR}/run" \
--target-dir "${BASE_DIR}/downloadfiles" \
--max-age {{ cleanup_settings.max_age_seconds | default(3600) }} \
--log-file /var/log/cleanup_media.log \
--s3-upload \
--s3-bucket "${S3_BUCKET:-stress-media-archive}" \
--s3-prefix "archived-media/$(hostname)" \
--s5cmd-path /usr/local/bin/s5cmd
else
# Simple cleanup mode - just truncate and rename
exec python3 "${BASE_DIR}/bin/cleanup_media.py" \
--target-dir "${BASE_DIR}/run" \
--target-dir "${BASE_DIR}/downloadfiles" \
--max-age {{ cleanup_settings.max_age_seconds | default(3600) }} \
--log-file /var/log/cleanup_media.log
fi
dest: "{{ base_dir }}/bin/cleanup_media_wrapper.sh"
owner: "{{ ansible_user }}"
group: "{{ deploy_group }}"
mode: '0755'
become: yes
- name: Configure cron job for media cleanup
ansible.builtin.cron:
name: "Media cleanup - {{ base_dir }}"
minute: "0"
hour: "*"
job: "{{ base_dir }}/bin/cleanup_media_wrapper.sh 2>&1 | logger -t cleanup_media"
user: "{{ ansible_user }}"
state: "{{ 'present' if cleanup_settings.enabled | default(true) | bool else 'absent' }}"
become: yes
- name: Display installation summary
ansible.builtin.debug:
msg: |
Media cleanup script installed successfully on {{ inventory_hostname }}
Configuration from cluster.green.yml:
- Base directory: {{ base_dir }}
- Enabled: {{ cleanup_settings.enabled | default(true) }}
- Cleanup mode: {{ cleanup_settings.mode | default('s-upload') }}
- Max age: {{ cleanup_settings.max_age_seconds | default(3600) }} seconds
- Cron schedule: Every hour (0 * * * *)
- Cron job state: {{ 'present' if cleanup_settings.enabled | default(true) | bool else 'absent' }}
- Log file: /var/log/cleanup_media.log
Note: You can override the cleanup mode for a single run by setting the
CLEANUP_MODE environment variable before executing the wrapper script.
e.g., CLEANUP_MODE=cleanup {{ base_dir }}/bin/cleanup_media_wrapper.sh
To view logs:
tail -f /var/log/cleanup_media.log