Updates during migration some fixes on formats

This commit is contained in:
aperez 2025-11-20 09:21:25 +03:00
parent 0ead029b85
commit 3709ba6f81
14 changed files with 387 additions and 127 deletions

View File

@ -381,16 +381,16 @@ with DAG(
type="string",
enum=['best_audio', 'formats_0', 'formats_2', 'formats_3', 'custom'],
title="[Worker Param] Download Format Preset",
description="Select a predefined format string or choose 'custom' to use the value from 'Custom Download Format'.\nformats_0: 18,140\nformats_2: 18-dashy,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy\nformats_3: 18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318"
description="Select a predefined format string or choose 'custom' to use the value from 'Custom Download Format'.\nformats_0: 18,140\nformats_2: 18,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy\nformats_3: 18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318"
),
'download_format_custom': Param(
'18-dashy,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy',
'18,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy',
type="string",
title="[Worker Param] Custom Download Format",
description="Custom yt-dlp format string. Used when preset is 'custom'. E.g., 'ba[ext=m4a]/bestaudio/best'."
),
'downloader': Param(
'cli',
'py',
type="string",
enum=['py', 'aria-rpc', 'cli'],
title="[Worker Param] Download Tool",

View File

@ -665,7 +665,7 @@ def download_and_probe(token_data: dict, available_formats: list[str], **context
elif format_preset == 'formats_0':
download_format = '18,140'
elif format_preset == 'formats_2':
download_format = '18-dashy,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy'
download_format = '18,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy'
elif format_preset == 'formats_3':
download_format = '18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318'
else:
@ -689,34 +689,35 @@ def download_and_probe(token_data: dict, available_formats: list[str], **context
if downloader == 'py':
cmd.extend(['--output-dir', download_dir])
# The 'py' tool maps many yt-dlp flags via --extra-ytdlp-args
py_extra_args = []
if params.get('fragment_retries'):
py_extra_args.extend(['--fragment-retries', str(params['fragment_retries'])])
if params.get('limit_rate'):
py_extra_args.extend(['--limit-rate', params['limit_rate']])
if params.get('socket_timeout'):
py_extra_args.extend(['--socket-timeout', str(params['socket_timeout'])])
if params.get('min_sleep_interval'):
py_extra_args.extend(['--sleep-interval', str(params['min_sleep_interval'])])
if params.get('max_sleep_interval'):
py_extra_args.extend(['--max-sleep-interval', str(params['max_sleep_interval'])])
if params.get('yt_dlp_test_mode'):
py_extra_args.append('--test')
existing_extra = shlex.split(params.get('yt_dlp_extra_args') or '')
final_extra_args = existing_extra + py_extra_args
if final_extra_args:
cmd.extend(['--extra-ytdlp-args', shlex.join(final_extra_args)])
# WORKAROUND: Due to an incompatibility between ytops-client and a recent yt-dlp
# library update, passing --extra-ytdlp-args to the 'py' downloader causes a crash.
# These arguments are being omitted until ytops-client is fixed.
# This affects: fragment_retries, limit_rate, socket_timeout, sleep_interval,
# max_sleep_interval, yt_dlp_test_mode, and the 'yt_dlp_extra_args' DAG param.
has_extra_args = (
params.get('fragment_retries') or params.get('limit_rate') or
params.get('socket_timeout') or params.get('min_sleep_interval') or
params.get('max_sleep_interval') or params.get('yt_dlp_test_mode') or
params.get('yt_dlp_extra_args')
)
if has_extra_args:
logger.warning("WORKAROUND: Omitting --extra-ytdlp-args for 'py' downloader due to a known incompatibility. "
"Some download parameters will be ignored.")
elif downloader == 'aria-rpc':
cmd.extend([
'--aria-host', params.get('aria_host', '172.17.0.1'),
'--aria-port', str(params.get('aria_port', 6800)),
'--aria-secret', params.get('aria_secret'),
'--wait', '--auto-merge-fragments',
'--fragments-dir', download_dir,
'--wait',
'--output-dir', download_dir,
])
if 'dashy' in format_selector:
cmd.extend([
'--auto-merge-fragments',
'--fragments-dir', download_dir,
])
if params.get('yt_dlp_cleanup_mode'):
cmd.append('--cleanup')
@ -1495,16 +1496,16 @@ with DAG(
type="string",
enum=['best_audio', 'formats_0', 'formats_2', 'formats_3', 'custom'],
title="Download Format Preset",
description="Select a predefined format string or choose 'custom'. To download multiple formats, this should be a comma-separated list of format IDs (e.g., '137,140').\nformats_0: 18,140\nformats_2: 18-dashy,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy\nformats_3: 18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318"
description="Select a predefined format string or choose 'custom'. To download multiple formats, this should be a comma-separated list of format IDs (e.g., '137,140').\nformats_0: 18,140\nformats_2: 18,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy\nformats_3: 18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318"
),
'download_format_custom': Param(
'18,140,299/298/137/136/135/134/133',
'18,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy',
type="string",
title="Custom Download Format",
description="Custom yt-dlp format string. Used when preset is 'custom'. To download multiple formats, provide a comma-separated list of format IDs (e.g., '137,140')."
),
'downloader': Param(
'cli',
'py',
type="string",
enum=['py', 'aria-rpc', 'cli'],
title="Download Tool",

View File

@ -260,16 +260,16 @@ with DAG(
type="string",
enum=['best_audio', 'formats_0', 'formats_2', 'formats_3', 'custom'],
title="[Worker Param] Download Format Preset",
description="Select a predefined format string or choose 'custom' to use the value from 'Custom Download Format'.\nformats_0: 18,140\nformats_2: 18-dashy,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy\nformats_3: 18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318"
description="Select a predefined format string or choose 'custom' to use the value from 'Custom Download Format'.\nformats_0: 18,140\nformats_2: 18,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy\nformats_3: 18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318"
),
'download_format_custom': Param(
'18-dashy,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy',
'18,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy',
type="string",
title="[Worker Param] Custom Download Format",
description="Custom yt-dlp format string. Used when preset is 'custom'. E.g., 'ba[ext=m4a]/bestaudio/best'."
),
'downloader': Param(
'cli',
'py',
type="string",
enum=['py', 'aria-rpc', 'cli'],
title="[Worker Param] Download Tool",

View File

@ -300,7 +300,7 @@ def download_and_probe(token_data: dict, available_formats: list[str], **context
elif format_preset == 'formats_0':
download_format = '18,140'
elif format_preset == 'formats_2':
download_format = '18-dashy,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy'
download_format = '18,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy'
elif format_preset == 'formats_3':
download_format = '18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318'
else:
@ -321,34 +321,35 @@ def download_and_probe(token_data: dict, available_formats: list[str], **context
if downloader == 'py':
cmd.extend(['--output-dir', download_dir])
# The 'py' tool maps many yt-dlp flags via --extra-ytdlp-args
py_extra_args = []
if params.get('fragment_retries'):
py_extra_args.extend(['--fragment-retries', str(params['fragment_retries'])])
if params.get('limit_rate'):
py_extra_args.extend(['--limit-rate', params['limit_rate']])
if params.get('socket_timeout'):
py_extra_args.extend(['--socket-timeout', str(params['socket_timeout'])])
if params.get('min_sleep_interval'):
py_extra_args.extend(['--sleep-interval', str(params['min_sleep_interval'])])
if params.get('max_sleep_interval'):
py_extra_args.extend(['--max-sleep-interval', str(params['max_sleep_interval'])])
if params.get('yt_dlp_test_mode'):
py_extra_args.append('--test')
existing_extra = shlex.split(params.get('yt_dlp_extra_args') or '')
final_extra_args = existing_extra + py_extra_args
if final_extra_args:
cmd.extend(['--extra-ytdlp-args', shlex.join(final_extra_args)])
# WORKAROUND: Due to an incompatibility between ytops-client and a recent yt-dlp
# library update, passing --extra-ytdlp-args to the 'py' downloader causes a crash.
# These arguments are being omitted until ytops-client is fixed.
# This affects: fragment_retries, limit_rate, socket_timeout, sleep_interval,
# max_sleep_interval, yt_dlp_test_mode, and the 'yt_dlp_extra_args' DAG param.
has_extra_args = (
params.get('fragment_retries') or params.get('limit_rate') or
params.get('socket_timeout') or params.get('min_sleep_interval') or
params.get('max_sleep_interval') or params.get('yt_dlp_test_mode') or
params.get('yt_dlp_extra_args')
)
if has_extra_args:
logger.warning("WORKAROUND: Omitting --extra-ytdlp-args for 'py' downloader due to a known incompatibility. "
"Some download parameters will be ignored.")
elif downloader == 'aria-rpc':
cmd.extend([
'--aria-host', params.get('aria_host', '172.17.0.1'),
'--aria-port', str(params.get('aria_port', 6800)),
'--aria-secret', params.get('aria_secret'),
'--wait', '--auto-merge-fragments',
'--fragments-dir', download_dir,
'--wait',
'--output-dir', download_dir,
])
if 'dashy' in format_selector:
cmd.extend([
'--auto-merge-fragments',
'--fragments-dir', download_dir,
])
if params.get('yt_dlp_cleanup_mode'):
cmd.append('--cleanup')
@ -814,16 +815,16 @@ with DAG(
type="string",
enum=['best_audio', 'formats_0', 'formats_2', 'formats_3', 'custom'],
title="Download Format Preset",
description="Select a predefined format string or choose 'custom'. To download multiple formats, this should be a comma-separated list of format IDs (e.g., '137,140').\nformats_0: 18,140\nformats_2: 18-dashy,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy\nformats_3: 18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318"
description="Select a predefined format string or choose 'custom'. To download multiple formats, this should be a comma-separated list of format IDs (e.g., '137,140').\nformats_0: 18,140\nformats_2: 18,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy\nformats_3: 18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318"
),
'download_format_custom': Param(
'ba[ext=m4a]/bestaudio/best',
'18,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy',
type="string",
title="Custom Download Format",
description="Custom yt-dlp format string. Used when preset is 'custom'. To download multiple formats, provide a comma-separated list of format IDs (e.g., '137,140')."
),
'downloader': Param(
'cli',
'py',
type="string",
enum=['py', 'aria-rpc', 'cli'],
title="Download Tool",

View File

@ -41,14 +41,6 @@
force_source: true
when: not fast_deploy | default(false)
- name: Build Camoufox image from local Dockerfile
community.docker.docker_image:
name: "camoufox:latest"
build:
path: "{{ airflow_worker_dir }}/camoufox"
source: build
force_source: true
when: not fast_deploy | default(false)
- name: Pull ytdlp-ops-server image only
community.docker.docker_image:
@ -67,7 +59,6 @@
files:
- configs/docker-compose-dl.yaml
- configs/docker-compose-ytdlp-ops.yaml
- configs/docker-compose.camoufox.yaml
state: present
remove_orphans: true
pull: "{{ 'never' if fast_deploy | default(false) else 'missing' }}"

View File

@ -0,0 +1,63 @@
---
- name: Deploy Airflow DL Worker Stack
hosts: airflow_workers
vars_files:
- group_vars/all.yml
- group_vars/all/vault.yml
pre_tasks:
- name: Announce fast deploy mode if enabled
debug:
msg: "🚀 FAST DEPLOY MODE ENABLED: Skipping Docker image builds and pulls. 🚀"
when: fast_deploy | default(false)
run_once: true
tasks:
- name: Ensure worker directory exists
file:
path: "{{ airflow_worker_dir }}"
state: directory
owner: "{{ ansible_user }}"
group: "{{ ansible_user }}"
- name: Template .env.worker
template:
src: templates/.env.worker.j2
dest: "{{ airflow_worker_dir }}/.env"
mode: '0600'
- name: Template docker-compose file for Airflow worker
template:
src: ../airflow/configs/docker-compose-dl.yaml.j2
dest: "{{ airflow_worker_dir }}/configs/docker-compose-dl.yaml"
mode: '0644'
- name: Build Airflow worker image from local Dockerfile
community.docker.docker_image:
name: "{{ airflow_image_name }}"
build:
path: "{{ airflow_worker_dir }}"
dockerfile: "Dockerfile"
source: build
force_source: true
when: not fast_deploy | default(false)
- name: Pull ytdlp-ops-server image only
community.docker.docker_image:
name: "{{ ytdlp_ops_image }}"
source: pull
when: not fast_deploy | default(false)
- name: Generate dynamic configs (camoufox + envoy)
shell:
cmd: "docker compose -f configs/docker-compose.config-generate.yaml run --rm config-generator"
chdir: "{{ airflow_worker_dir }}"
- name: Start worker services
community.docker.docker_compose_v2:
project_src: "{{ airflow_worker_dir }}"
files:
- configs/docker-compose-dl.yaml
- configs/docker-compose-ytdlp-ops.yaml
state: present
remove_orphans: true
pull: "{{ 'never' if fast_deploy | default(false) else 'missing' }}"

View File

@ -111,7 +111,42 @@
name: airflow_proxynet
driver: bridge
post_tasks:
- name: Deploy master
import_playbook: playbook-master.yml
when: inventory_hostname in groups['airflow_master']
- name: Deploy workers
import_playbook: playbook-worker.yml
when: inventory_hostname in groups['airflow_workers']
- name: Deploy and Reload Airflow Task Hook
hosts: all
gather_facts: no
vars_files:
- "{{ inventory_dir }}/group_vars/all/generated_vars.yml"
- "{{ inventory_dir }}/group_vars/all/vault.yml"
tasks:
- name: Ensure config directory exists on MASTER server
when: inventory_hostname in groups['airflow_master']
ansible.builtin.file:
path: "{{ airflow_master_dir }}/config"
state: directory
owner: "{{ ansible_user }}"
group: "{{ ansible_user }}"
mode: '0755'
become: yes
- name: Ensure config directory exists on WORKER server
when: inventory_hostname in groups['airflow_workers']
ansible.builtin.file:
path: "{{ airflow_worker_dir }}/config"
state: directory
owner: "{{ ansible_user }}"
group: "{{ ansible_user }}"
mode: '0755'
become: yes
- name: Sync custom_task_hooks.py to MASTER server
when: inventory_hostname in groups['airflow_master']
synchronize:
@ -157,11 +192,3 @@
cmd: "docker compose restart airflow-worker-dl airflow-worker-auth"
chdir: "{{ airflow_worker_dir }}"
become: yes
- name: Deploy master
import_playbook: playbook-master.yml
when: inventory_hostname in groups['airflow_master']
- name: Deploy workers
import_playbook: playbook-worker.yml
when: inventory_hostname in groups['airflow_workers']

View File

@ -6,6 +6,26 @@
- "{{ inventory_dir }}/group_vars/all/generated_vars.yml"
- "{{ inventory_dir }}/group_vars/all/vault.yml"
tasks:
- name: Ensure config directory exists on MASTER server
when: inventory_hostname in groups['airflow_master']
ansible.builtin.file:
path: "{{ airflow_master_dir }}/config"
state: directory
owner: "{{ ansible_user }}"
group: "{{ ansible_user }}"
mode: '0755'
become: yes
- name: Ensure config directory exists on WORKER server
when: inventory_hostname in groups['airflow_workers']
ansible.builtin.file:
path: "{{ airflow_worker_dir }}/config"
state: directory
owner: "{{ ansible_user }}"
group: "{{ ansible_user }}"
mode: '0755'
become: yes
- name: Sync custom_task_hooks.py to MASTER server
when: inventory_hostname in groups['airflow_master']
synchronize:

View File

@ -147,20 +147,53 @@
deploy_group_gid: "0"
when: deploy_group_gid is not defined or deploy_group_gid == ""
- name: Generate Docker Compose configurations
ansible.builtin.command: >
docker compose --project-directory . -f configs/docker-compose.config-generate.yaml run --rm config-generator
args:
chdir: "{{ airflow_master_dir }}"
- name: Ensure master directory exists
ansible.builtin.file:
path: "{{ airflow_master_dir }}"
state: directory
owner: "{{ ansible_user }}"
group: "{{ deploy_group }}"
mode: '0755'
become: yes
- name: Sync python packages to master for build context
ansible.posix.synchronize:
src: "../{{ item }}/"
dest: "{{ airflow_master_dir }}/{{ item }}/"
rsync_opts:
- "--delete"
- "--exclude=.DS_Store"
- "--exclude=__pycache__"
- "--exclude='*.pyc'"
recursive: yes
perms: yes
loop:
- "thrift_model"
- "pangramia"
- "ytops_client"
- "yt_ops_services"
become: yes
become_user: "{{ ansible_user }}"
register: config_generator_result
changed_when: "'Creating' in config_generator_result.stdout or 'Recreating' in config_generator_result.stdout"
- name: Show config generator output
ansible.builtin.debug:
var: config_generator_result.stdout_lines
when: config_generator_result.changed
- name: Ensure bin directory exists on master for build context
ansible.builtin.file:
path: "{{ airflow_master_dir }}/bin"
state: directory
mode: '0755'
become: yes
become_user: "{{ ansible_user }}"
- name: Sync root files and client utilities to master for build context
ansible.posix.synchronize:
src: "../{{ item }}"
dest: "{{ airflow_master_dir }}/{{ item }}"
perms: yes
loop:
- "setup.py"
- "VERSION"
- "bin/ytops-client"
become: yes
become_user: "{{ ansible_user }}"
- name: Ensure Airflow project directory is writable by the container user (UID 50000)
ansible.builtin.file:
@ -201,6 +234,21 @@
- name: Include Docker health check tasks
include_tasks: tasks/docker_health_check.yml
- name: Generate Docker Compose configurations
ansible.builtin.command: >
docker compose --project-directory . -f configs/docker-compose.config-generate.yaml run --rm config-generator
args:
chdir: "{{ airflow_master_dir }}"
become: yes
become_user: "{{ ansible_user }}"
register: config_generator_result
changed_when: "'Creating' in config_generator_result.stdout or 'Recreating' in config_generator_result.stdout"
- name: Show config generator output
ansible.builtin.debug:
var: config_generator_result.stdout_lines
when: config_generator_result.changed
roles:
- ytdlp-master
- airflow-master

View File

@ -11,6 +11,13 @@
msg: "Syncing local dev files to {{ inventory_hostname }} at {{ airflow_worker_dir }}"
tasks:
- name: Ensure python3-pip is installed
ansible.builtin.apt:
name: python3-pip
state: present
update_cache: yes
become: yes
- name: Check if yt-dlp is installed
ansible.builtin.command: which yt-dlp
register: ytdlp_check

View File

@ -138,52 +138,13 @@
group: "{{ deploy_group }}"
become: yes
- name: Build local Docker images (e.g., camoufox)
ansible.builtin.command: >
docker compose --project-directory . -f configs/docker-compose-ytdlp-ops.yaml build
args:
chdir: "{{ airflow_worker_dir }}"
become: yes
become_user: "{{ ansible_user }}"
register: docker_build_result
changed_when: "'Building' in docker_build_result.stdout or 'writing image' in docker_build_result.stdout"
- name: Pull pre-built Docker images for ytdlp-ops services
ansible.builtin.command: >
docker compose --project-directory . -f configs/docker-compose-ytdlp-ops.yaml pull --ignore-buildable
args:
chdir: "{{ airflow_worker_dir }}"
become: yes
become_user: "{{ ansible_user }}"
register: docker_pull_result
retries: 3
delay: 10
changed_when: "'Pulling' in docker_pull_result.stdout or 'Downloaded' in docker_pull_result.stdout"
- name: Show docker pull output
ansible.builtin.debug:
var: docker_pull_result.stdout_lines
when: docker_pull_result.changed
- name: Ensure Airflow project directory is writable by the container user (UID 50000)
- name: Ensure worker directory exists
ansible.builtin.file:
path: "{{ airflow_worker_dir }}"
owner: 50000
group: 50000
become: yes
- name: Ensure Airflow subdirectories are writable by the container user (UID 50000)
ansible.builtin.file:
path: "{{ item }}"
owner: 50000
group: 50000
recurse: yes
state: directory
loop:
- "{{ airflow_worker_dir }}/dags"
- "{{ airflow_worker_dir }}/logs"
- "{{ airflow_worker_dir }}/plugins"
- "{{ airflow_worker_dir }}/config"
owner: "{{ ansible_user }}"
group: "{{ deploy_group }}"
mode: '0755'
become: yes
- name: Create .dockerignore on worker to exclude runtime data from build context
@ -219,6 +180,66 @@
mode: '0644'
become: yes
- name: Sync python packages to worker for build context
ansible.posix.synchronize:
src: "../{{ item }}/"
dest: "{{ airflow_worker_dir }}/{{ item }}/"
rsync_opts:
- "--delete"
- "--exclude=.DS_Store"
- "--exclude=__pycache__"
- "--exclude='*.pyc'"
recursive: yes
perms: yes
loop:
- "thrift_model"
- "pangramia"
- "ytops_client"
- "yt_ops_services"
become: yes
become_user: "{{ ansible_user }}"
- name: Ensure bin directory exists on worker for build context
ansible.builtin.file:
path: "{{ airflow_worker_dir }}/bin"
state: directory
mode: '0755'
become: yes
become_user: "{{ ansible_user }}"
- name: Sync root files and client utilities to worker for build context
ansible.posix.synchronize:
src: "../{{ item }}"
dest: "{{ airflow_worker_dir }}/{{ item }}"
perms: yes
loop:
- "setup.py"
- "VERSION"
- "bin/ytops-client"
become: yes
become_user: "{{ ansible_user }}"
- name: Ensure Airflow project directory is writable by the container user (UID 50000)
ansible.builtin.file:
path: "{{ airflow_worker_dir }}"
owner: 50000
group: 50000
become: yes
- name: Ensure Airflow subdirectories are writable by the container user (UID 50000)
ansible.builtin.file:
path: "{{ item }}"
owner: 50000
group: 50000
recurse: yes
state: directory
loop:
- "{{ airflow_worker_dir }}/dags"
- "{{ airflow_worker_dir }}/logs"
- "{{ airflow_worker_dir }}/plugins"
- "{{ airflow_worker_dir }}/config"
become: yes
tasks:
- name: Install pipx
ansible.builtin.apt:
@ -237,5 +258,32 @@
- name: Include Docker health check tasks
include_tasks: tasks/docker_health_check.yml
- name: Build local Docker images (e.g., camoufox)
ansible.builtin.command: >
docker compose --project-directory . -f configs/docker-compose-ytdlp-ops.yaml build
args:
chdir: "{{ airflow_worker_dir }}"
become: yes
become_user: "{{ ansible_user }}"
register: docker_build_result
changed_when: "'Building' in docker_build_result.stdout or 'writing image' in docker_build_result.stdout"
- name: Pull pre-built Docker images for ytdlp-ops services
ansible.builtin.command: >
docker compose --project-directory . -f configs/docker-compose-ytdlp-ops.yaml pull --ignore-buildable
args:
chdir: "{{ airflow_worker_dir }}"
become: yes
become_user: "{{ ansible_user }}"
register: docker_pull_result
retries: 3
delay: 10
changed_when: "'Pulling' in docker_pull_result.stdout or 'Downloaded' in docker_pull_result.stdout"
- name: Show docker pull output
ansible.builtin.debug:
var: docker_pull_result.stdout_lines
when: docker_pull_result.changed
roles:
- ytdlp-worker

View File

@ -181,7 +181,9 @@ def main_download_native_py(args):
try:
# This is an internal API, but it's the most accurate way to parse CLI args
# into the ydl_opts dictionary format.
ydl_opts, _, _ = yt_dlp.parse_options(base_opts_args)
# yt-dlp's parse_options can return 3 or 4 values. We only need the `opts` namespace (second value).
_parser, opts, _args, *_ = yt_dlp.parse_options(base_opts_args)
ydl_opts = vars(opts)
except Exception as e:
logger.error(f"Failed to parse options from config/extra_args: {e}")
return 1

View File

@ -258,10 +258,41 @@ def main_get_info(args):
if 'Default yt-dlp CLI params:' not in line
)
print(f"\n--- Server Version Info ---\n{filtered_info}", file=sys.stderr)
info_json_str = token_data.infoJson
info_data_for_analysis: Optional[Dict[str, Any]] = None
try:
info_data_for_analysis = json.loads(info_json_str)
except (json.JSONDecodeError, TypeError):
pass # Will be handled later if info_json is invalid
if hasattr(token_data, 'requestSummary') and token_data.requestSummary:
try:
summary_data = json.loads(token_data.requestSummary)
print(f"\n--- Request Summary ---\n{summary_data.get('summary', token_data.requestSummary)}", file=sys.stderr)
summary_text = summary_data.get('summary', token_data.requestSummary)
# --- Client-side summary correction and enhancement ---
gvs_pot_used = False
if isinstance(info_data_for_analysis, dict):
for f in info_data_for_analysis.get('formats', []):
if 'pot=' in f.get('url', ''):
gvs_pot_used = True
break
if gvs_pot_used and 'PO Token (GVS): not_fetched' in summary_text:
summary_text = summary_text.replace(
'PO Token (GVS): not_fetched',
'PO Token (GVS): bgutil:http (verified from format URL)'
)
if 'Visitor ID Source: omitted_for_tv_client' in summary_text:
summary_text = summary_text.replace(
'Visitor ID Source: omitted_for_tv_client',
'Visitor ID Source: omitted_for_tv_client (handled internally by yt-dlp)'
)
# Add a note that we cannot display it.
summary_text += "\n - Visitor ID Value: Not exposed by server for TV clients to avoid detection."
print(f"\n--- Request Summary ---\n{summary_text}", file=sys.stderr)
except json.JSONDecodeError:
# Fallback for old format or non-JSON summary
print(f"\n--- Request Summary ---\n{token_data.requestSummary}", file=sys.stderr)

View File

@ -1650,6 +1650,15 @@ Overridable Policy Parameters via --set:
parser.add_argument('--list-policies', action='store_true', help='List all available policies from the default policies directory and exit.')
parser.add_argument('--show-overrides', action='store_true', help='Load the specified policy and print all its defined values as a single-line of --set arguments, then exit.')
parser.add_argument('--set', action='append', default=[], help="Override a policy setting using 'key.subkey=value' format.\n(e.g., --set execution_control.workers=5)")
# Add a group for aria2c-specific overrides for clarity in --help
aria_group = parser.add_argument_group('Aria2c RPC Downloader Overrides', 'Shortcuts for common --set options for the aria2c_rpc downloader.')
aria_group.add_argument('--auto-merge-fragments', action=argparse.BooleanOptionalAction, default=None, help='Shortcut to enable/disable download_policy.auto_merge_fragments.')
aria_group.add_argument('--remove-fragments-after-merge', action=argparse.BooleanOptionalAction, default=None, help='Shortcut to enable/disable download_policy.remove_fragments_after_merge.')
aria_group.add_argument('--fragments-dir', help='Shortcut for --set download_policy.aria_fragments_dir=PATH.')
aria_group.add_argument('--remote-dir', help='Shortcut for --set download_policy.aria_remote_dir=PATH.')
aria_group.add_argument('--cleanup', action=argparse.BooleanOptionalAction, default=None, help='Shortcut to enable/disable download_policy.cleanup.')
parser.add_argument('--verbose', action='store_true', help='Enable verbose output for the orchestrator and underlying scripts.')
parser.add_argument('--dry-run', action='store_true', help='Print the effective policy and exit without running the test.')
return parser
@ -1760,6 +1769,18 @@ def main_stress_policy(args):
policy = load_policy(args.policy, args.policy_name)
policy = apply_overrides(policy, args.set)
# Apply direct CLI overrides after --set, so they have final precedence.
if args.auto_merge_fragments is not None:
policy.setdefault('download_policy', {})['auto_merge_fragments'] = args.auto_merge_fragments
if args.remove_fragments_after_merge is not None:
policy.setdefault('download_policy', {})['remove_fragments_after_merge'] = args.remove_fragments_after_merge
if args.fragments_dir is not None:
policy.setdefault('download_policy', {})['aria_fragments_dir'] = args.fragments_dir
if args.remote_dir is not None:
policy.setdefault('download_policy', {})['aria_remote_dir'] = args.remote_dir
if args.cleanup is not None:
policy.setdefault('download_policy', {})['cleanup'] = args.cleanup
policy_name = policy.get('name', args.policy_name or Path(args.policy).stem)
state_manager = StateManager(policy_name)