yt-dlp-dags/airflow/configs/docker-compose-dl.yaml.j2

267 lines
11 KiB
Django/Jinja

# Airflow remote DL worker configuration.
# This file should be used on a remote machine to run a download worker.
# It requires a master Airflow instance running with services exposed.
#
# Before running, create a .env file in this directory with:
# MASTER_HOST_IP=... a.b.c.d ... # IP address of the machine running docker-compose-master.yaml
# POSTGRES_PASSWORD=... # The password for the PostgreSQL database from the master compose file
# REDIS_PASSWORD=... # The password for Redis from the master compose file
# AIRFLOW_UID=... # User ID for file permissions, should match master
---
x-airflow-common:
&airflow-common
# This should point to the same image used by the master.
# If you built a custom image for master, you need to push it to a registry
# and reference it here.
image: ${AIRFLOW_IMAGE_NAME:-pangramia/ytdlp-ops-airflow:latest}
# Add extra hosts here to allow workers to resolve other hosts by name.
# This section is auto-generated by Ansible from the inventory.
extra_hosts:
{% for host in groups['all'] %}
- "{{ hostvars[host]['inventory_hostname'] }}:{{ hostvars[host]['ansible_host'] | default(hostvars[host]['inventory_hostname']) }}"
{% endfor %}
env_file:
# The .env file is located in the project root (e.g., /srv/airflow_dl_worker),
# so we provide an absolute path to it.
- "{{ airflow_worker_dir }}/.env"
environment:
&airflow-common-env
AIRFLOW__CORE__PARALLELISM: 128
AIRFLOW__CORE__MAX_ACTIVE_TASKS_PER_DAG: 64
AIRFLOW__SCHEDULER__PARSING_PROCESSES: 8
AIRFLOW__WEBSERVER__WORKERS: 5
AIRFLOW__WEBSERVER__WORKER_CLASS: "gevent"
AIRFLOW__LOGGING__SECRET_MASK_EXCEPTION_ARGS: False
# Prevent slow webserver when low memory?
GUNICORN_CMD_ARGS: --max-requests 20 --max-requests-jitter 3 --worker-tmp-dir /dev/shm
# Airflow Core
AIRFLOW__CORE__EXECUTOR: CeleryExecutor
AIRFLOW__CORE__LOAD_EXAMPLES: 'false'
AIRFLOW__CORE__FERNET_KEY: '' # Should be same as master, but worker does not need it.
# Backend connections - These should point to the master node
# Set MASTER_HOST_IP, POSTGRES_PASSWORD, and REDIS_PASSWORD in your .env file
AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:${{ '{' }}POSTGRES_PASSWORD{{ '}' }}@${{ '{' }}MASTER_HOST_IP{{ '}' }}:{{ postgres_port }}/airflow
AIRFLOW__CELERY__RESULT_BACKEND: db+postgresql+psycopg2://airflow:${{ '{' }}POSTGRES_PASSWORD{{ '}' }}@${{ '{' }}MASTER_HOST_IP{{ '}' }}:{{ postgres_port }}/airflow
AIRFLOW__CELERY__BROKER_URL: redis://:${REDIS_PASSWORD}@${MASTER_HOST_IP}:{{ redis_port }}/0
# Remote Logging - connection is configured directly via environment variables
#_PIP_ADDITIONAL_REQUIREMENTS: ${{ '{' }}_PIP_ADDITIONAL_REQUIREMENTS:- apache-airflow-providers-docker apache-airflow-providers-http thrift>=0.16.0,<=0.20.0 backoff>=2.2.1 python-dotenv==1.0.1 psutil>=5.9.0 apache-airflow-providers-amazon{{ '}' }}
AIRFLOW__LOGGING__REMOTE_LOGGING: "True"
AIRFLOW__LOGGING__REMOTE_BASE_LOG_FOLDER: "s3://videos/airflow-logs"
AIRFLOW__LOGGING__REMOTE_LOG_CONN_ID: s3_delivery_connection
AIRFLOW__LOGGING__ENCRYPT_S3_LOGS: "False"
#AIRFLOW__LOGGING__LOG_ID_TEMPLATE: "{dag_id}-{task_id}-{run_id}-{try_number}"
AIRFLOW__WEBSERVER__SECRET_KEY: 'qmALu5JCAW0518WGAqkVZQ=='
AIRFLOW__CORE__INTERNAL_API_SECRET_KEY: 'qmALu5JCAW0518WGAqkVZQ=='
AIRFLOW__CORE__LOCAL_SETTINGS_PATH: "/opt/airflow/config/custom_task_hooks.py"
volumes:
# Mount dags to get any utility scripts, but the worker will pull the DAG from the DB
- ${AIRFLOW_PROJ_DIR:-.}/dags:/opt/airflow/dags
# Mount logs locally in case remote logging fails
- ${AIRFLOW_PROJ_DIR:-.}/logs:/opt/airflow/logs
# Mount config for local settings and other configurations
- ${AIRFLOW_PROJ_DIR:-.}/config:/opt/airflow/config
- ${AIRFLOW_PROJ_DIR:-.}/config/airflow.cfg:/opt/airflow/airflow.cfg
# Mount download directories
- ${AIRFLOW_PROJ_DIR:-.}/downloadfiles:/opt/airflow/downloadfiles
- ${AIRFLOW_PROJ_DIR:-.}/addfiles:/opt/airflow/addfiles
- ${AIRFLOW_PROJ_DIR:-.}/inputfiles:/opt/airflow/inputfiles
# Mount the generated pangramia package to ensure workers have the latest version
- ${AIRFLOW_PROJ_DIR:-.}/pangramia:/app/pangramia
# Use AIRFLOW_UID from .env file to fix permission issues. GID is set to 0 for compatibility with the Airflow image.
user: "${{ '{' }}AIRFLOW_UID:-50000{{ '}' }}:0"
services:
airflow-worker-dl:
<<: *airflow-common
container_name: airflow-worker-dl-1
hostname: ${HOSTNAME:-dl001}
# The DL worker listens on the generic dl queue AND its own dedicated queue.
command: airflow celery worker -q queue-dl,queue-dl-${HOSTNAME:-dl001}
deploy:
resources:
limits:
memory: ${AIRFLOW_WORKER_DOWNLOAD_MEM_LIMIT:-8G}
reservations:
memory: ${AIRFLOW_WORKER_DOWNLOAD_MEM_RESERV:-2G}
healthcheck:
test:
- "CMD-SHELL"
- 'celery --app airflow.providers.celery.executors.celery_executor.app inspect ping -d "worker-dl@$$(hostname)"'
interval: 30s
timeout: 30s
retries: 5
start_period: 30s
environment:
<<: *airflow-common-env
HOSTNAME: ${HOSTNAME:-dl001}
DUMB_INIT_SETSID: "0"
AIRFLOW__CELERY__WORKER_QUEUES: "queue-dl,queue-dl-${HOSTNAME:-dl001}"
AIRFLOW__CELERY__WORKER_TAGS: "dl"
AIRFLOW__CELERY__WORKER_PREFETCH_MULTIPLIER: "1"
AIRFLOW__CELERY__WORKER_AUTOSCALE: "16,8"
AIRFLOW__CELERY__POOL: "prefork"
AIRFLOW__CELERY__TASK_ACKS_LATE: "False"
AIRFLOW__CELERY__OPERATION_TIMEOUT: "2.0"
AIRFLOW__CELERY__WORKER_NAME: "worker-dl@%h"
AIRFLOW__CELERY__WORKER_MAX_TASKS_PER_CHILD: "100"
AIRFLOW__CELERY__WORKER_MAX_MEMORY_PER_CHILD: "524288" # 512MB
ports:
- "8793:8793"
networks:
- default
- proxynet
restart: always
airflow-worker-s3:
<<: *airflow-common
container_name: airflow-worker-s3-1
hostname: ${HOSTNAME:-s3-001}
# The S3 worker listens on the generic s3 queue AND its own dedicated queue.
command: airflow celery worker -q queue-s3,queue-s3-${HOSTNAME:-s3-001}
deploy:
resources:
limits:
memory: ${AIRFLOW_WORKER_S3_MEM_LIMIT:-1G}
reservations:
memory: ${AIRFLOW_WORKER_S3_MEM_RESERV:-256M}
healthcheck:
test:
- "CMD-SHELL"
- 'celery --app airflow.providers.celery.executors.celery_executor.app inspect ping -d "worker-s3@$$(hostname)"'
interval: 30s
timeout: 30s
retries: 5
start_period: 30s
environment:
<<: *airflow-common-env
S3_DELIVERY_AWS_ACCESS_KEY_ID: "{{ vault_s3_delivery_access_key_id }}"
S3_DELIVERY_AWS_SECRET_ACCESS_KEY: "{{ vault_s3_delivery_secret_access_key }}"
S3_DELIVERY_AWS_REGION: "{{ vault_s3_delivery_aws_region }}"
S3_DELIVERY_ENDPOINT: "{{ vault_s3_delivery_endpoint }}"
S3_DELIVERY_BUCKET: "{{ vault_s3_delivery_bucket }}"
HOSTNAME: ${HOSTNAME:-s3-001}
DUMB_INIT_SETSID: "0"
AIRFLOW__CELERY__WORKER_QUEUES: "queue-s3,queue-s3-${HOSTNAME:-s3-001}"
AIRFLOW__CELERY__WORKER_TAGS: "s3"
AIRFLOW__CELERY__WORKER_PREFETCH_MULTIPLIER: "1"
# S3 tasks are lightweight.
AIRFLOW__CELERY__WORKER_AUTOSCALE: "2,1"
AIRFLOW__CELERY__POOL: "prefork"
AIRFLOW__CELERY__TASK_ACKS_LATE: "False"
AIRFLOW__CELERY__OPERATION_TIMEOUT: "2.0"
AIRFLOW__CELERY__WORKER_NAME: "worker-s3@%h"
AIRFLOW__CELERY__WORKER_MAX_TASKS_PER_CHILD: "100"
AIRFLOW__CELERY__WORKER_MAX_MEMORY_PER_CHILD: "262144" # 256MB
networks:
- default
- proxynet
restart: always
airflow-worker-auth:
<<: *airflow-common
container_name: airflow-worker-auth-1
hostname: ${HOSTNAME:-auth001}
# The Auth worker listens on the generic auth queue AND its own dedicated queue.
command: airflow celery worker -q queue-auth,queue-auth-${HOSTNAME:-auth001}
deploy:
resources:
limits:
memory: ${AIRFLOW_WORKER_AUTH_MEM_LIMIT:-4G}
reservations:
memory: ${AIRFLOW_WORKER_AUTH_MEM_RESERV:-1G}
healthcheck:
test:
- "CMD-SHELL"
- 'celery --app airflow.providers.celery.executors.celery_executor.app inspect ping -d "worker-auth@$$(hostname)"'
interval: 30s
timeout: 30s
retries: 5
start_period: 30s
environment:
<<: *airflow-common-env
HOSTNAME: ${HOSTNAME:-auth001}
DUMB_INIT_SETSID: "0"
AIRFLOW__CELERY__WORKER_QUEUES: "queue-auth,queue-auth-${HOSTNAME:-auth001}"
AIRFLOW__CELERY__WORKER_TAGS: "auth"
AIRFLOW__CELERY__WORKER_PREFETCH_MULTIPLIER: "1"
# Auth tasks are less resource intensive but we want fewer of them to avoid service overload.
AIRFLOW__CELERY__WORKER_AUTOSCALE: "2,1"
AIRFLOW__CELERY__POOL: "prefork"
AIRFLOW__CELERY__TASK_ACKS_LATE: "False"
AIRFLOW__CELERY__OPERATION_TIMEOUT: "2.0"
AIRFLOW__CELERY__WORKER_NAME: "worker-auth@%h"
AIRFLOW__CELERY__WORKER_MAX_TASKS_PER_CHILD: "100"
AIRFLOW__CELERY__WORKER_MAX_MEMORY_PER_CHILD: "262144" # 256MB
networks:
- default
- proxynet
restart: always
docker-socket-proxy:
profiles:
- disabled
image: tecnativa/docker-socket-proxy:0.1.1
environment:
CONTAINERS: 1
IMAGES: 1
AUTH: 1
POST: 1
privileged: true
volumes:
- /var/run/docker.sock:/var/run/docker.sock:ro
restart: always
airflow-worker-mgmt:
<<: *airflow-common
container_name: airflow-worker-mgmt-1
hostname: ${HOSTNAME:-mgmt001}
# The Mgmt worker listens on the generic mgmt queue AND its own dedicated queue.
command: airflow celery worker -q queue-mgmt,queue-mgmt-${HOSTNAME:-mgmt001}
deploy:
resources:
limits:
memory: ${AIRFLOW_WORKER_MGMT_MEM_LIMIT:-2G}
reservations:
memory: ${AIRFLOW_WORKER_MGMT_MEM_RESERV:-512M}
healthcheck:
test:
- "CMD-SHELL"
- 'celery --app airflow.providers.celery.executors.celery_executor.app inspect ping -d "worker-mgmt@$$(hostname)"'
interval: 30s
timeout: 30s
retries: 5
start_period: 30s
environment:
<<: *airflow-common-env
HOSTNAME: ${HOSTNAME:-mgmt001}
DUMB_INIT_SETSID: "0"
AIRFLOW__CELERY__WORKER_QUEUES: "queue-mgmt,queue-mgmt-${HOSTNAME:-mgmt001}"
AIRFLOW__CELERY__WORKER_TAGS: "mgmt"
AIRFLOW__CELERY__WORKER_PREFETCH_MULTIPLIER: "1"
# Mgmt tasks are lightweight.
AIRFLOW__CELERY__WORKER_AUTOSCALE: "4,2"
AIRFLOW__CELERY__POOL: "prefork"
AIRFLOW__CELERY__TASK_ACKS_LATE: "False"
AIRFLOW__CELERY__OPERATION_TIMEOUT: "2.0"
AIRFLOW__CELERY__WORKER_NAME: "worker-mgmt@%h"
AIRFLOW__CELERY__WORKER_MAX_TASKS_PER_CHILD: "100"
AIRFLOW__CELERY__WORKER_MAX_MEMORY_PER_CHILD: "262144" # 256MB
networks:
- default
- proxynet
restart: always
networks:
proxynet:
name: airflow_proxynet
external: true