FROM apache/airflow:2.10.3 ENV AIRFLOW_VERSION=2.10.3 WORKDIR /app # Install system dependencies USER root RUN apt-get update && \ apt-get install -y --no-install-recommends \ vim \ mc \ jq \ build-essential \ python3-dev \ wget \ tar \ xz-utils \ iputils-ping \ curl \ traceroute \ tcpdump \ unzip \ git && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* /usr/share/man /usr/share/doc /usr/share/doc-base # Ensure the airflow user and group exist with the correct UID/GID and permissions. # This is done early to allow `COPY --chown` to work correctly. RUN if ! getent group airflow > /dev/null 2>&1; then \ groupadd -g 50000 airflow; \ fi && \ if ! id -u airflow > /dev/null 2>&1; then \ useradd -u 50000 -g 50000 -m -s /bin/bash airflow; \ else \ usermod -g 50000 airflow; \ fi && \ chown -R airflow:airflow /app && \ chmod -R g+w /app # Download and install mc (MinIO client) RUN wget https://dl.min.io/client/mc/release/linux-amd64/mc -O /usr/local/bin/mc && \ chmod +x /usr/local/bin/mc # Install FFmpeg RUN FFMPEG_URL="https://github.com/yt-dlp/FFmpeg-Builds/releases/download/latest/ffmpeg-master-latest-linux64-gpl.tar.xz" && \ wget -qO /tmp/ffmpeg.tar.xz "$FFMPEG_URL" && \ mkdir -p /opt/ffmpeg && \ tar -xf /tmp/ffmpeg.tar.xz -C /opt/ffmpeg --strip-components=1 && \ ln -sf /opt/ffmpeg/bin/ffmpeg /usr/local/bin/ffmpeg && \ ln -sf /opt/ffmpeg/bin/ffprobe /usr/local/bin/ffprobe && \ rm -rf /tmp/ffmpeg.tar.xz # Install yt-dlp from master # Temporarily rename pip to bypass the root check in the base image's pip wrapper, # ensuring a system-wide installation. RUN mv /usr/local/bin/pip /usr/local/bin/pip.orig && \ python3 -m pip install --no-cache-dir -U pip hatchling wheel && \ python3 -m pip install --no-cache-dir --force-reinstall "yt-dlp[default] @ https://github.com/yt-dlp/yt-dlp/archive/master.tar.gz" && \ chmod a+x "$(which yt-dlp)" && \ mv /usr/local/bin/pip.orig /usr/local/bin/pip # Install Deno RUN curl -fsSL https://github.com/denoland/deno/releases/latest/download/deno-x86_64-unknown-linux-gnu.zip -o deno.zip && \ unzip deno.zip && mv deno /usr/local/bin/ && rm deno.zip # Install aria2c and gost RUN curl -fsSL https://raw.githubusercontent.com/P3TERX/aria2-builder/master/aria2-install.sh | bash # Install gost (direct download of binary) RUN wget -q https://github.com/ginuerzh/gost/releases/download/v2.12.0/gost_2.12.0_linux_amd64.tar.gz && \ tar -xzf gost_2.12.0_linux_amd64.tar.gz -C /usr/local/bin/ && \ rm gost_2.12.0_linux_amd64.tar.gz # Verify installations RUN ffmpeg -version && deno --version && yt-dlp --version && aria2c --version && gost -V # Create version information files RUN ( \ echo "--- yt-dlp ---" && \ yt-dlp --version && \ echo "" && \ echo "--- deno ---" && \ deno --version && \ echo "" && \ echo "--- ffmpeg ---" && \ ffmpeg -version | head -n 1 \ ) > VERSION-airflow-latest.txt && \ cp VERSION-airflow-latest.txt VERSION-airflow-$(date +%Y%m%d-%H%M%S).txt # Install base Airflow dependencies as root (system-wide) # [FIX] Explicitly install a version of botocore compatible with Python 3.12 # to fix a RecursionError when handling S3 remote logs. # Temporarily rename pip to bypass the root check in the base image's pip wrapper. RUN mv /usr/local/bin/pip /usr/local/bin/pip.orig && \ python3 -m pip install --no-cache-dir \ "apache-airflow==${AIRFLOW_VERSION}" \ apache-airflow-providers-docker \ apache-airflow-providers-http \ apache-airflow-providers-amazon \ "apache-airflow-providers-celery>=3.3.0" \ apache-airflow-providers-redis \ "botocore>=1.34.118" \ psycopg2-binary \ "gunicorn==20.1.0" \ "python-ffmpeg==2.0.12" \ "ffprobe3" \ "python-dotenv" \ "PyYAML" \ "aria2p" && \ mv /usr/local/bin/pip.orig /usr/local/bin/pip # --- Install the custom yt_ops_services package --- # Copy all the necessary source code for the package. # The deploy script ensures these files are in the build context. COPY --chown=airflow:airflow setup.py ./ COPY --chown=airflow:airflow VERSION ./ COPY --chown=airflow:airflow yt_ops_services ./yt_ops_services/ COPY --chown=airflow:airflow thrift_model ./thrift_model/ COPY --chown=airflow:airflow pangramia ./pangramia/ # Copy the ytops-client tool and its executable COPY --chown=airflow:airflow ytops_client ./ytops_client/ COPY --chown=airflow:airflow bin/ytops-client /app/bin/ytops-client RUN chmod +x /app/bin/ytops-client ENV PATH="/app/bin:${PATH}" # Install the package in editable mode. This runs setup.py and installs all dependencies # listed in `install_requires`, making the `yt_ops_services` module available everywhere. # Bypass the pip root check again. RUN mv /usr/local/bin/pip /usr/local/bin/pip.orig && \ python3 -m pip install --no-cache-dir -e . && \ mv /usr/local/bin/pip.orig /usr/local/bin/pip # Copy token generator scripts and utils with correct permissions # COPY --chown=airflow:airflow generate_tokens_direct.mjs ./ # COPY --chown=airflow:airflow utils ./utils/ # COPY --chown=airflow:airflow token_generator ./token_generator/ # Ensure the home directory and all its contents are owned by the airflow user before switching to it. # This fixes permission issues that can occur if previous RUN commands created files in /home/airflow as root. # We also make it world-writable to accommodate running the container with a different user ID, which can # happen in some environments (e.g., OpenShift or with docker-compose user overrides). RUN chown -R airflow:airflow /home/airflow && chmod -R 777 /home/airflow # Switch to airflow user for all subsequent operations USER airflow # Expose bgutil plugin to worker path ENV PYTHONPATH=/opt/bgutil-ytdlp-pot-provider/plugin:$PYTHONPATH