--- - name: "UTIL-SETUP: Install media cleanup script and configure cron" hosts: all gather_facts: yes vars_files: - "group_vars/all/vault.yml" pre_tasks: - name: Set inventory_env fact ansible.builtin.set_fact: inventory_env: "{{ inventory_file | basename | splitext | first | replace('inventory.', '') }}" - name: Load environment-specific variables ansible.builtin.include_vars: "{{ item }}" with_fileglob: - "group_vars/all/generated_vars{{ '.' + inventory_env if inventory_env else '' }}.yml" tasks: - name: Define base directory for node ansible.builtin.set_fact: base_dir: "{{ airflow_master_dir if inventory_hostname in groups['master'] else airflow_worker_dir }}" - name: Ensure cleanup script directory exists ansible.builtin.file: path: "{{ base_dir }}/bin" state: directory owner: "{{ ansible_user }}" group: "{{ deploy_group }}" mode: '0755' become: yes - name: Copy cleanup_media.py script ansible.builtin.copy: src: "{{ playbook_dir }}/../yt-ops-services-debug/cleanup_media.py" dest: "{{ base_dir }}/bin/cleanup_media.py" owner: "{{ ansible_user }}" group: "{{ deploy_group }}" mode: '0755' become: yes - name: Install s5cmd for S3 uploads block: - name: Check if s5cmd is already installed ansible.builtin.stat: path: /usr/local/bin/s5cmd register: s5cmd_binary - name: Download and install s5cmd block: - name: Create temporary directory for s5cmd download ansible.builtin.tempfile: state: directory suffix: s5cmd register: s5cmd_temp_dir - name: Download s5cmd ansible.builtin.get_url: url: "https://github.com/peak/s5cmd/releases/download/v2.2.2/s5cmd_2.2.2_Linux-64bit.tar.gz" dest: "{{ s5cmd_temp_dir.path }}/s5cmd.tar.gz" mode: '0644' - name: Extract s5cmd ansible.builtin.unarchive: src: "{{ s5cmd_temp_dir.path }}/s5cmd.tar.gz" dest: "{{ s5cmd_temp_dir.path }}" remote_src: yes - name: Install s5cmd to /usr/local/bin ansible.builtin.copy: src: "{{ s5cmd_temp_dir.path }}/s5cmd" dest: /usr/local/bin/s5cmd mode: '0755' remote_src: yes - name: Clean up temporary directory ansible.builtin.file: path: "{{ s5cmd_temp_dir.path }}" state: absent when: not s5cmd_binary.stat.exists become: yes - name: Ensure log directory exists ansible.builtin.file: path: "/var/log" state: directory owner: root group: root mode: '0755' become: yes - name: Create wrapper script to source .env before running cleanup ansible.builtin.copy: content: | #!/bin/bash # Wrapper script to run cleanup_media.py with environment variables from .env set -e BASE_DIR="{{ base_dir }}" # Source .env file if it exists if [ -f "${BASE_DIR}/.env" ]; then set -a source "${BASE_DIR}/.env" set +a fi # Determine cleanup mode based on environment variable or default CLEANUP_MODE="${CLEANUP_MODE:-{{ cleanup_settings.mode | default('s3-upload') }}}" # Run cleanup script cd "${BASE_DIR}" if [ "$CLEANUP_MODE" = "s3-upload" ]; then # S3 upload mode - uploads to S3 then deletes exec python3 "${BASE_DIR}/bin/cleanup_media.py" \ --target-dir "${BASE_DIR}/run" \ --target-dir "${BASE_DIR}/downloadfiles" \ --max-age {{ cleanup_settings.max_age_seconds | default(3600) }} \ --log-file /var/log/cleanup_media.log \ --s3-upload \ --s3-bucket "${S3_BUCKET:-stress-media-archive}" \ --s3-prefix "archived-media/$(hostname)" \ --s5cmd-path /usr/local/bin/s5cmd else # Simple cleanup mode - just truncate and rename exec python3 "${BASE_DIR}/bin/cleanup_media.py" \ --target-dir "${BASE_DIR}/run" \ --target-dir "${BASE_DIR}/downloadfiles" \ --max-age {{ cleanup_settings.max_age_seconds | default(3600) }} \ --log-file /var/log/cleanup_media.log fi dest: "{{ base_dir }}/bin/cleanup_media_wrapper.sh" owner: "{{ ansible_user }}" group: "{{ deploy_group }}" mode: '0755' become: yes - name: Configure cron job for media cleanup ansible.builtin.cron: name: "Media cleanup - {{ base_dir }}" minute: "0" hour: "*" job: "{{ base_dir }}/bin/cleanup_media_wrapper.sh 2>&1 | logger -t cleanup_media" user: "{{ ansible_user }}" state: "{{ 'present' if cleanup_settings.enabled | default(true) | bool else 'absent' }}" become: yes - name: Display installation summary ansible.builtin.debug: msg: | Media cleanup script installed successfully on {{ inventory_hostname }} Configuration from cluster.green.yml: - Base directory: {{ base_dir }} - Enabled: {{ cleanup_settings.enabled | default(true) }} - Cleanup mode: {{ cleanup_settings.mode | default('s-upload') }} - Max age: {{ cleanup_settings.max_age_seconds | default(3600) }} seconds - Cron schedule: Every hour (0 * * * *) - Cron job state: {{ 'present' if cleanup_settings.enabled | default(true) | bool else 'absent' }} - Log file: /var/log/cleanup_media.log Note: You can override the cleanup mode for a single run by setting the CLEANUP_MODE environment variable before executing the wrapper script. e.g., CLEANUP_MODE=cleanup {{ base_dir }}/bin/cleanup_media_wrapper.sh To view logs: tail -f /var/log/cleanup_media.log