yt-dlp-dags/airflow/Dockerfile

91 lines
3.4 KiB
Docker

FROM apache/airflow:2.10.3
ENV AIRFLOW_VERSION=2.10.3
WORKDIR /app
# Install system dependencies
USER root
RUN apt-get update && \
apt-get install -y --no-install-recommends \
vim \
mc \
jq \
build-essential \
python3-dev \
wget \
tar \
xz-utils && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* /usr/share/man /usr/share/doc /usr/share/doc-base
# Download and install mc (MinIO client)
RUN wget https://dl.min.io/client/mc/release/linux-amd64/mc -O /usr/local/bin/mc && \
chmod +x /usr/local/bin/mc
# Download and install custom FFmpeg build from yt-dlp's recommended source
RUN FFMPEG_URL="https://github.com/yt-dlp/FFmpeg-Builds/releases/download/latest/ffmpeg-master-latest-linux64-gpl.tar.xz" && \
echo "Downloading FFmpeg from $FFMPEG_URL" && \
wget -qO /tmp/ffmpeg.tar.xz "$FFMPEG_URL" && \
mkdir -p /opt/ffmpeg && \
tar -xf /tmp/ffmpeg.tar.xz -C /opt/ffmpeg --strip-components=1 && \
ln -sf /opt/ffmpeg/bin/ffmpeg /usr/local/bin/ffmpeg && \
ln -sf /opt/ffmpeg/bin/ffprobe /usr/local/bin/ffprobe && \
rm -rf /tmp/ffmpeg.tar.xz && \
ffmpeg -version
# Check if airflow group exists, create it if it doesn't, then ensure proper setup
RUN if ! getent group airflow > /dev/null 2>&1; then \
groupadd -g 1001 airflow; \
fi && \
# Check if airflow user exists and is in the airflow group
if id -u airflow > /dev/null 2>&1; then \
usermod -a -G airflow airflow; \
else \
useradd -u 1003 -g 1001 -m -s /bin/bash airflow; \
fi && \
chown -R airflow:airflow /app && \
chmod g+w /app
# Switch to airflow user for package installation
USER airflow
# Install base Airflow dependencies
# [FIX] Explicitly install a version of botocore compatible with Python 3.12
# to fix a RecursionError when handling S3 remote logs.
RUN pip install --no-cache-dir \
"apache-airflow==${AIRFLOW_VERSION}" \
apache-airflow-providers-docker \
apache-airflow-providers-http \
apache-airflow-providers-amazon \
"botocore>=1.34.118" \
psycopg2-binary \
"gunicorn==20.1.0" \
"python-ffmpeg==2.0.12" \
"ffprobe3"
# --- Install the custom yt_ops_services package ---
# Copy all the necessary source code for the package.
# The deploy script ensures these files are in the build context.
COPY --chown=airflow:airflow setup.py ./
COPY --chown=airflow:airflow VERSION ./
COPY --chown=airflow:airflow yt_ops_services ./yt_ops_services/
COPY --chown=airflow:airflow thrift_model ./thrift_model/
COPY --chown=airflow:airflow pangramia ./pangramia/
# Install the package in editable mode. This runs setup.py and installs all dependencies
# listed in `install_requires`, making the `yt_ops_services` module available everywhere.
RUN pip install --no-cache-dir -e .
# Copy token generator scripts and utils with correct permissions
# COPY --chown=airflow:airflow generate_tokens_direct.mjs ./
# COPY --chown=airflow:airflow utils ./utils/
# COPY --chown=airflow:airflow token_generator ./token_generator/
# --- Always update yt-dlp to latest nightly on container start ---
# This is done in the entrypoint so every worker run uses the freshest build
COPY --chown=airflow:airflow update-yt-dlp.sh /usr/local/bin/update-yt-dlp.sh
RUN chmod +x /usr/local/bin/update-yt-dlp.sh
# Expose bgutil plugin to worker path
ENV PYTHONPATH=/opt/bgutil-ytdlp-pot-provider/plugin:$PYTHONPATH