FROM apache/airflow:2.10.3 ENV AIRFLOW_VERSION=2.10.3 WORKDIR /app # Install system dependencies USER root RUN apt-get update && \ apt-get install -y --no-install-recommends \ vim \ mc \ jq \ build-essential \ python3-dev \ wget \ tar \ xz-utils && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* /usr/share/man /usr/share/doc /usr/share/doc-base # Download and install mc (MinIO client) RUN wget https://dl.min.io/client/mc/release/linux-amd64/mc -O /usr/local/bin/mc && \ chmod +x /usr/local/bin/mc # Download and install custom FFmpeg build from yt-dlp's recommended source RUN FFMPEG_URL="https://github.com/yt-dlp/FFmpeg-Builds/releases/download/latest/ffmpeg-master-latest-linux64-gpl.tar.xz" && \ echo "Downloading FFmpeg from $FFMPEG_URL" && \ wget -qO /tmp/ffmpeg.tar.xz "$FFMPEG_URL" && \ mkdir -p /opt/ffmpeg && \ tar -xf /tmp/ffmpeg.tar.xz -C /opt/ffmpeg --strip-components=1 && \ ln -sf /opt/ffmpeg/bin/ffmpeg /usr/local/bin/ffmpeg && \ ln -sf /opt/ffmpeg/bin/ffprobe /usr/local/bin/ffprobe && \ rm -rf /tmp/ffmpeg.tar.xz && \ ffmpeg -version # Check if airflow group exists, create it if it doesn't, then ensure proper setup RUN if ! getent group airflow > /dev/null 2>&1; then \ groupadd -g 1001 airflow; \ fi && \ # Check if airflow user exists and is in the airflow group if id -u airflow > /dev/null 2>&1; then \ usermod -a -G airflow airflow; \ else \ useradd -u 1003 -g 1001 -m -s /bin/bash airflow; \ fi && \ chown -R airflow:airflow /app && \ chmod g+w /app # Switch to airflow user for package installation USER airflow # Install base Airflow dependencies # [FIX] Explicitly install a version of botocore compatible with Python 3.12 # to fix a RecursionError when handling S3 remote logs. RUN pip install --no-cache-dir \ "apache-airflow==${AIRFLOW_VERSION}" \ apache-airflow-providers-docker \ apache-airflow-providers-http \ apache-airflow-providers-amazon \ "botocore>=1.34.118" \ psycopg2-binary "gunicorn==20.1.0" # --- Install the custom yt_ops_services package --- # Copy all the necessary source code for the package. # The deploy script ensures these files are in the build context. COPY --chown=airflow:airflow setup.py ./ COPY --chown=airflow:airflow VERSION ./ COPY --chown=airflow:airflow yt_ops_services ./yt_ops_services/ COPY --chown=airflow:airflow thrift_model ./thrift_model/ COPY --chown=airflow:airflow pangramia ./pangramia/ # Install the package in editable mode. This runs setup.py and installs all dependencies # listed in `install_requires`, making the `yt_ops_services` module available everywhere. RUN pip install --no-cache-dir -e . # Copy token generator scripts and utils with correct permissions # COPY --chown=airflow:airflow generate_tokens_direct.mjs ./ # COPY --chown=airflow:airflow utils ./utils/ # COPY --chown=airflow:airflow token_generator ./token_generator/ # --- Always update yt-dlp to latest nightly on container start --- # This is done in the entrypoint so every worker run uses the freshest build COPY --chown=airflow:airflow update-yt-dlp.sh /usr/local/bin/update-yt-dlp.sh RUN chmod +x /usr/local/bin/update-yt-dlp.sh # Expose bgutil plugin to worker path ENV PYTHONPATH=/opt/bgutil-ytdlp-pot-provider/plugin:$PYTHONPATH