143 lines
5.6 KiB
Docker
143 lines
5.6 KiB
Docker
FROM apache/airflow:2.10.3
|
|
ENV AIRFLOW_VERSION=2.10.3
|
|
|
|
WORKDIR /app
|
|
|
|
# Install system dependencies
|
|
USER root
|
|
RUN apt-get update && \
|
|
apt-get install -y --no-install-recommends \
|
|
vim \
|
|
mc \
|
|
jq \
|
|
build-essential \
|
|
python3-dev \
|
|
wget \
|
|
tar \
|
|
xz-utils \
|
|
iputils-ping \
|
|
curl \
|
|
traceroute \
|
|
tcpdump \
|
|
unzip \
|
|
git && \
|
|
apt-get clean && \
|
|
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* /usr/share/man /usr/share/doc /usr/share/doc-base
|
|
|
|
# Ensure the airflow user and group exist with the correct UID/GID and permissions.
|
|
# This is done early to allow `COPY --chown` to work correctly.
|
|
RUN if ! getent group airflow > /dev/null 2>&1; then \
|
|
groupadd -g 50000 airflow; \
|
|
fi && \
|
|
if ! id -u airflow > /dev/null 2>&1; then \
|
|
useradd -u 50000 -g 50000 -m -s /bin/bash airflow; \
|
|
else \
|
|
usermod -g 50000 airflow; \
|
|
fi && \
|
|
chown -R airflow:airflow /app && \
|
|
chmod -R g+w /app
|
|
|
|
# Download and install mc (MinIO client)
|
|
RUN wget https://dl.min.io/client/mc/release/linux-amd64/mc -O /usr/local/bin/mc && \
|
|
chmod +x /usr/local/bin/mc
|
|
|
|
# Install FFmpeg
|
|
RUN FFMPEG_URL="https://github.com/yt-dlp/FFmpeg-Builds/releases/download/latest/ffmpeg-master-latest-linux64-gpl.tar.xz" && \
|
|
wget -qO /tmp/ffmpeg.tar.xz "$FFMPEG_URL" && \
|
|
mkdir -p /opt/ffmpeg && \
|
|
tar -xf /tmp/ffmpeg.tar.xz -C /opt/ffmpeg --strip-components=1 && \
|
|
ln -sf /opt/ffmpeg/bin/ffmpeg /usr/local/bin/ffmpeg && \
|
|
ln -sf /opt/ffmpeg/bin/ffprobe /usr/local/bin/ffprobe && \
|
|
rm -rf /tmp/ffmpeg.tar.xz
|
|
|
|
# Install yt-dlp from master
|
|
# Temporarily rename pip to bypass the root check in the base image's pip wrapper,
|
|
# ensuring a system-wide installation.
|
|
RUN mv /usr/local/bin/pip /usr/local/bin/pip.orig && \
|
|
python3 -m pip install --no-cache-dir -U pip hatchling wheel && \
|
|
python3 -m pip install --no-cache-dir --force-reinstall "yt-dlp[default] @ https://github.com/yt-dlp/yt-dlp/archive/master.tar.gz" && \
|
|
chmod a+x "$(which yt-dlp)" && \
|
|
mv /usr/local/bin/pip.orig /usr/local/bin/pip
|
|
|
|
# Install Deno
|
|
RUN curl -fsSL https://github.com/denoland/deno/releases/latest/download/deno-x86_64-unknown-linux-gnu.zip -o deno.zip && \
|
|
unzip deno.zip && mv deno /usr/local/bin/ && rm deno.zip
|
|
|
|
# Install aria2c and gost
|
|
RUN curl -fsSL https://raw.githubusercontent.com/P3TERX/aria2-builder/master/aria2-install.sh | bash
|
|
|
|
# Install gost (direct download of binary)
|
|
RUN wget -q https://github.com/ginuerzh/gost/releases/download/v2.12.0/gost_2.12.0_linux_amd64.tar.gz && \
|
|
tar -xzf gost_2.12.0_linux_amd64.tar.gz -C /usr/local/bin/ && \
|
|
rm gost_2.12.0_linux_amd64.tar.gz
|
|
|
|
# Verify installations
|
|
RUN ffmpeg -version && deno --version && yt-dlp --version && aria2c --version && gost -V
|
|
|
|
# Create version information files
|
|
RUN ( \
|
|
echo "--- yt-dlp ---" && \
|
|
yt-dlp --version && \
|
|
echo "" && \
|
|
echo "--- deno ---" && \
|
|
deno --version && \
|
|
echo "" && \
|
|
echo "--- ffmpeg ---" && \
|
|
ffmpeg -version | head -n 1 \
|
|
) > VERSION-airflow-latest.txt && \
|
|
cp VERSION-airflow-latest.txt VERSION-airflow-$(date +%Y%m%d-%H%M%S).txt
|
|
|
|
|
|
# Install base Airflow dependencies as root (system-wide)
|
|
# [FIX] Explicitly install a version of botocore compatible with Python 3.12
|
|
# to fix a RecursionError when handling S3 remote logs.
|
|
# Temporarily rename pip to bypass the root check in the base image's pip wrapper.
|
|
RUN mv /usr/local/bin/pip /usr/local/bin/pip.orig && \
|
|
python3 -m pip install --no-cache-dir \
|
|
"apache-airflow==${AIRFLOW_VERSION}" \
|
|
apache-airflow-providers-docker \
|
|
apache-airflow-providers-http \
|
|
apache-airflow-providers-amazon \
|
|
"apache-airflow-providers-celery>=3.3.0" \
|
|
apache-airflow-providers-redis \
|
|
"botocore>=1.34.118" \
|
|
psycopg2-binary \
|
|
"gunicorn==20.1.0" \
|
|
"python-ffmpeg==2.0.12" \
|
|
"ffprobe3" \
|
|
"python-dotenv" && \
|
|
mv /usr/local/bin/pip.orig /usr/local/bin/pip
|
|
|
|
# --- Install the custom yt_ops_services package ---
|
|
# Copy all the necessary source code for the package.
|
|
# The deploy script ensures these files are in the build context.
|
|
COPY --chown=airflow:airflow setup.py ./
|
|
COPY --chown=airflow:airflow VERSION ./
|
|
COPY --chown=airflow:airflow yt_ops_services ./yt_ops_services/
|
|
COPY --chown=airflow:airflow thrift_model ./thrift_model/
|
|
COPY --chown=airflow:airflow pangramia ./pangramia/
|
|
|
|
# Install the package in editable mode. This runs setup.py and installs all dependencies
|
|
# listed in `install_requires`, making the `yt_ops_services` module available everywhere.
|
|
# Bypass the pip root check again.
|
|
RUN mv /usr/local/bin/pip /usr/local/bin/pip.orig && \
|
|
python3 -m pip install --no-cache-dir -e . && \
|
|
mv /usr/local/bin/pip.orig /usr/local/bin/pip
|
|
|
|
# Copy token generator scripts and utils with correct permissions
|
|
# COPY --chown=airflow:airflow generate_tokens_direct.mjs ./
|
|
# COPY --chown=airflow:airflow utils ./utils/
|
|
# COPY --chown=airflow:airflow token_generator ./token_generator/
|
|
|
|
# Ensure the home directory and all its contents are owned by the airflow user before switching to it.
|
|
# This fixes permission issues that can occur if previous RUN commands created files in /home/airflow as root.
|
|
# We also make it world-writable to accommodate running the container with a different user ID, which can
|
|
# happen in some environments (e.g., OpenShift or with docker-compose user overrides).
|
|
RUN chown -R airflow:airflow /home/airflow && chmod -R 777 /home/airflow
|
|
|
|
# Switch to airflow user for all subsequent operations
|
|
USER airflow
|
|
|
|
# Expose bgutil plugin to worker path
|
|
ENV PYTHONPATH=/opt/bgutil-ytdlp-pot-provider/plugin:$PYTHONPATH
|