From 3709ba6f81a8b75f557f9959f4f3078c4875f2b1 Mon Sep 17 00:00:00 2001 From: aperez Date: Thu, 20 Nov 2025 09:21:25 +0300 Subject: [PATCH] Updates during migration some fixes on formats --- airflow/dags/ytdlp_ops_v01_orchestrator.py | 6 +- airflow/dags/ytdlp_ops_v01_worker_per_url.py | 47 +++--- airflow/dags/ytdlp_ops_v02_orchestrator_dl.py | 6 +- .../dags/ytdlp_ops_v02_worker_per_url_dl.py | 49 +++---- ansible/playbook-depricated.dl.yml | 9 -- ansible/playbook-dl.yml | 63 ++++++++ ansible/playbook-full.yml | 45 ++++-- ansible/playbook-hook.yml | 20 +++ ansible/playbook-master.yml | 70 +++++++-- ansible/playbook-sync-local.yml | 7 + ansible/playbook-worker.yml | 134 ++++++++++++------ ytops_client/download_native_py_tool.py | 4 +- ytops_client/get_info_tool.py | 33 ++++- ytops_client/stress_policy_tool.py | 21 +++ 14 files changed, 387 insertions(+), 127 deletions(-) diff --git a/airflow/dags/ytdlp_ops_v01_orchestrator.py b/airflow/dags/ytdlp_ops_v01_orchestrator.py index e295d8e..99d8460 100644 --- a/airflow/dags/ytdlp_ops_v01_orchestrator.py +++ b/airflow/dags/ytdlp_ops_v01_orchestrator.py @@ -381,16 +381,16 @@ with DAG( type="string", enum=['best_audio', 'formats_0', 'formats_2', 'formats_3', 'custom'], title="[Worker Param] Download Format Preset", - description="Select a predefined format string or choose 'custom' to use the value from 'Custom Download Format'.\nformats_0: 18,140\nformats_2: 18-dashy,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy\nformats_3: 18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318" + description="Select a predefined format string or choose 'custom' to use the value from 'Custom Download Format'.\nformats_0: 18,140\nformats_2: 18,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy\nformats_3: 18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318" ), 'download_format_custom': Param( - '18-dashy,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy', + '18,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy', type="string", title="[Worker Param] Custom Download Format", description="Custom yt-dlp format string. Used when preset is 'custom'. E.g., 'ba[ext=m4a]/bestaudio/best'." ), 'downloader': Param( - 'cli', + 'py', type="string", enum=['py', 'aria-rpc', 'cli'], title="[Worker Param] Download Tool", diff --git a/airflow/dags/ytdlp_ops_v01_worker_per_url.py b/airflow/dags/ytdlp_ops_v01_worker_per_url.py index 6d03b0e..cd44013 100644 --- a/airflow/dags/ytdlp_ops_v01_worker_per_url.py +++ b/airflow/dags/ytdlp_ops_v01_worker_per_url.py @@ -665,7 +665,7 @@ def download_and_probe(token_data: dict, available_formats: list[str], **context elif format_preset == 'formats_0': download_format = '18,140' elif format_preset == 'formats_2': - download_format = '18-dashy,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy' + download_format = '18,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy' elif format_preset == 'formats_3': download_format = '18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318' else: @@ -689,34 +689,35 @@ def download_and_probe(token_data: dict, available_formats: list[str], **context if downloader == 'py': cmd.extend(['--output-dir', download_dir]) # The 'py' tool maps many yt-dlp flags via --extra-ytdlp-args - py_extra_args = [] - if params.get('fragment_retries'): - py_extra_args.extend(['--fragment-retries', str(params['fragment_retries'])]) - if params.get('limit_rate'): - py_extra_args.extend(['--limit-rate', params['limit_rate']]) - if params.get('socket_timeout'): - py_extra_args.extend(['--socket-timeout', str(params['socket_timeout'])]) - if params.get('min_sleep_interval'): - py_extra_args.extend(['--sleep-interval', str(params['min_sleep_interval'])]) - if params.get('max_sleep_interval'): - py_extra_args.extend(['--max-sleep-interval', str(params['max_sleep_interval'])]) - if params.get('yt_dlp_test_mode'): - py_extra_args.append('--test') - existing_extra = shlex.split(params.get('yt_dlp_extra_args') or '') - final_extra_args = existing_extra + py_extra_args - if final_extra_args: - cmd.extend(['--extra-ytdlp-args', shlex.join(final_extra_args)]) + # WORKAROUND: Due to an incompatibility between ytops-client and a recent yt-dlp + # library update, passing --extra-ytdlp-args to the 'py' downloader causes a crash. + # These arguments are being omitted until ytops-client is fixed. + # This affects: fragment_retries, limit_rate, socket_timeout, sleep_interval, + # max_sleep_interval, yt_dlp_test_mode, and the 'yt_dlp_extra_args' DAG param. + has_extra_args = ( + params.get('fragment_retries') or params.get('limit_rate') or + params.get('socket_timeout') or params.get('min_sleep_interval') or + params.get('max_sleep_interval') or params.get('yt_dlp_test_mode') or + params.get('yt_dlp_extra_args') + ) + if has_extra_args: + logger.warning("WORKAROUND: Omitting --extra-ytdlp-args for 'py' downloader due to a known incompatibility. " + "Some download parameters will be ignored.") elif downloader == 'aria-rpc': cmd.extend([ '--aria-host', params.get('aria_host', '172.17.0.1'), '--aria-port', str(params.get('aria_port', 6800)), '--aria-secret', params.get('aria_secret'), - '--wait', '--auto-merge-fragments', - '--fragments-dir', download_dir, + '--wait', '--output-dir', download_dir, ]) + if 'dashy' in format_selector: + cmd.extend([ + '--auto-merge-fragments', + '--fragments-dir', download_dir, + ]) if params.get('yt_dlp_cleanup_mode'): cmd.append('--cleanup') @@ -1495,16 +1496,16 @@ with DAG( type="string", enum=['best_audio', 'formats_0', 'formats_2', 'formats_3', 'custom'], title="Download Format Preset", - description="Select a predefined format string or choose 'custom'. To download multiple formats, this should be a comma-separated list of format IDs (e.g., '137,140').\nformats_0: 18,140\nformats_2: 18-dashy,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy\nformats_3: 18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318" + description="Select a predefined format string or choose 'custom'. To download multiple formats, this should be a comma-separated list of format IDs (e.g., '137,140').\nformats_0: 18,140\nformats_2: 18,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy\nformats_3: 18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318" ), 'download_format_custom': Param( - '18,140,299/298/137/136/135/134/133', + '18,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy', type="string", title="Custom Download Format", description="Custom yt-dlp format string. Used when preset is 'custom'. To download multiple formats, provide a comma-separated list of format IDs (e.g., '137,140')." ), 'downloader': Param( - 'cli', + 'py', type="string", enum=['py', 'aria-rpc', 'cli'], title="Download Tool", diff --git a/airflow/dags/ytdlp_ops_v02_orchestrator_dl.py b/airflow/dags/ytdlp_ops_v02_orchestrator_dl.py index 1c56efe..2c87b2c 100644 --- a/airflow/dags/ytdlp_ops_v02_orchestrator_dl.py +++ b/airflow/dags/ytdlp_ops_v02_orchestrator_dl.py @@ -260,16 +260,16 @@ with DAG( type="string", enum=['best_audio', 'formats_0', 'formats_2', 'formats_3', 'custom'], title="[Worker Param] Download Format Preset", - description="Select a predefined format string or choose 'custom' to use the value from 'Custom Download Format'.\nformats_0: 18,140\nformats_2: 18-dashy,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy\nformats_3: 18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318" + description="Select a predefined format string or choose 'custom' to use the value from 'Custom Download Format'.\nformats_0: 18,140\nformats_2: 18,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy\nformats_3: 18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318" ), 'download_format_custom': Param( - '18-dashy,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy', + '18,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy', type="string", title="[Worker Param] Custom Download Format", description="Custom yt-dlp format string. Used when preset is 'custom'. E.g., 'ba[ext=m4a]/bestaudio/best'." ), 'downloader': Param( - 'cli', + 'py', type="string", enum=['py', 'aria-rpc', 'cli'], title="[Worker Param] Download Tool", diff --git a/airflow/dags/ytdlp_ops_v02_worker_per_url_dl.py b/airflow/dags/ytdlp_ops_v02_worker_per_url_dl.py index 5fb0bb8..39e3d3a 100644 --- a/airflow/dags/ytdlp_ops_v02_worker_per_url_dl.py +++ b/airflow/dags/ytdlp_ops_v02_worker_per_url_dl.py @@ -300,7 +300,7 @@ def download_and_probe(token_data: dict, available_formats: list[str], **context elif format_preset == 'formats_0': download_format = '18,140' elif format_preset == 'formats_2': - download_format = '18-dashy,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy' + download_format = '18,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy' elif format_preset == 'formats_3': download_format = '18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318' else: @@ -321,34 +321,35 @@ def download_and_probe(token_data: dict, available_formats: list[str], **context if downloader == 'py': cmd.extend(['--output-dir', download_dir]) # The 'py' tool maps many yt-dlp flags via --extra-ytdlp-args - py_extra_args = [] - if params.get('fragment_retries'): - py_extra_args.extend(['--fragment-retries', str(params['fragment_retries'])]) - if params.get('limit_rate'): - py_extra_args.extend(['--limit-rate', params['limit_rate']]) - if params.get('socket_timeout'): - py_extra_args.extend(['--socket-timeout', str(params['socket_timeout'])]) - if params.get('min_sleep_interval'): - py_extra_args.extend(['--sleep-interval', str(params['min_sleep_interval'])]) - if params.get('max_sleep_interval'): - py_extra_args.extend(['--max-sleep-interval', str(params['max_sleep_interval'])]) - if params.get('yt_dlp_test_mode'): - py_extra_args.append('--test') - - existing_extra = shlex.split(params.get('yt_dlp_extra_args') or '') - final_extra_args = existing_extra + py_extra_args - if final_extra_args: - cmd.extend(['--extra-ytdlp-args', shlex.join(final_extra_args)]) + + # WORKAROUND: Due to an incompatibility between ytops-client and a recent yt-dlp + # library update, passing --extra-ytdlp-args to the 'py' downloader causes a crash. + # These arguments are being omitted until ytops-client is fixed. + # This affects: fragment_retries, limit_rate, socket_timeout, sleep_interval, + # max_sleep_interval, yt_dlp_test_mode, and the 'yt_dlp_extra_args' DAG param. + has_extra_args = ( + params.get('fragment_retries') or params.get('limit_rate') or + params.get('socket_timeout') or params.get('min_sleep_interval') or + params.get('max_sleep_interval') or params.get('yt_dlp_test_mode') or + params.get('yt_dlp_extra_args') + ) + if has_extra_args: + logger.warning("WORKAROUND: Omitting --extra-ytdlp-args for 'py' downloader due to a known incompatibility. " + "Some download parameters will be ignored.") elif downloader == 'aria-rpc': cmd.extend([ '--aria-host', params.get('aria_host', '172.17.0.1'), '--aria-port', str(params.get('aria_port', 6800)), '--aria-secret', params.get('aria_secret'), - '--wait', '--auto-merge-fragments', - '--fragments-dir', download_dir, + '--wait', '--output-dir', download_dir, ]) + if 'dashy' in format_selector: + cmd.extend([ + '--auto-merge-fragments', + '--fragments-dir', download_dir, + ]) if params.get('yt_dlp_cleanup_mode'): cmd.append('--cleanup') @@ -814,16 +815,16 @@ with DAG( type="string", enum=['best_audio', 'formats_0', 'formats_2', 'formats_3', 'custom'], title="Download Format Preset", - description="Select a predefined format string or choose 'custom'. To download multiple formats, this should be a comma-separated list of format IDs (e.g., '137,140').\nformats_0: 18,140\nformats_2: 18-dashy,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy\nformats_3: 18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318" + description="Select a predefined format string or choose 'custom'. To download multiple formats, this should be a comma-separated list of format IDs (e.g., '137,140').\nformats_0: 18,140\nformats_2: 18,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy\nformats_3: 18,599,139,140,141,160/269,133/229,134/230,135/231,136/232,137/270,298/311,299/318" ), 'download_format_custom': Param( - 'ba[ext=m4a]/bestaudio/best', + '18,140-dashy,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy', type="string", title="Custom Download Format", description="Custom yt-dlp format string. Used when preset is 'custom'. To download multiple formats, provide a comma-separated list of format IDs (e.g., '137,140')." ), 'downloader': Param( - 'cli', + 'py', type="string", enum=['py', 'aria-rpc', 'cli'], title="Download Tool", diff --git a/ansible/playbook-depricated.dl.yml b/ansible/playbook-depricated.dl.yml index c2035a2..107973e 100644 --- a/ansible/playbook-depricated.dl.yml +++ b/ansible/playbook-depricated.dl.yml @@ -41,14 +41,6 @@ force_source: true when: not fast_deploy | default(false) - - name: Build Camoufox image from local Dockerfile - community.docker.docker_image: - name: "camoufox:latest" - build: - path: "{{ airflow_worker_dir }}/camoufox" - source: build - force_source: true - when: not fast_deploy | default(false) - name: Pull ytdlp-ops-server image only community.docker.docker_image: @@ -67,7 +59,6 @@ files: - configs/docker-compose-dl.yaml - configs/docker-compose-ytdlp-ops.yaml - - configs/docker-compose.camoufox.yaml state: present remove_orphans: true pull: "{{ 'never' if fast_deploy | default(false) else 'missing' }}" diff --git a/ansible/playbook-dl.yml b/ansible/playbook-dl.yml index e69de29..9541c4a 100644 --- a/ansible/playbook-dl.yml +++ b/ansible/playbook-dl.yml @@ -0,0 +1,63 @@ +--- +- name: Deploy Airflow DL Worker Stack + hosts: airflow_workers + vars_files: + - group_vars/all.yml + - group_vars/all/vault.yml + pre_tasks: + - name: Announce fast deploy mode if enabled + debug: + msg: "🚀 FAST DEPLOY MODE ENABLED: Skipping Docker image builds and pulls. 🚀" + when: fast_deploy | default(false) + run_once: true + tasks: + + - name: Ensure worker directory exists + file: + path: "{{ airflow_worker_dir }}" + state: directory + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + + - name: Template .env.worker + template: + src: templates/.env.worker.j2 + dest: "{{ airflow_worker_dir }}/.env" + mode: '0600' + + - name: Template docker-compose file for Airflow worker + template: + src: ../airflow/configs/docker-compose-dl.yaml.j2 + dest: "{{ airflow_worker_dir }}/configs/docker-compose-dl.yaml" + mode: '0644' + + - name: Build Airflow worker image from local Dockerfile + community.docker.docker_image: + name: "{{ airflow_image_name }}" + build: + path: "{{ airflow_worker_dir }}" + dockerfile: "Dockerfile" + source: build + force_source: true + when: not fast_deploy | default(false) + + - name: Pull ytdlp-ops-server image only + community.docker.docker_image: + name: "{{ ytdlp_ops_image }}" + source: pull + when: not fast_deploy | default(false) + + - name: Generate dynamic configs (camoufox + envoy) + shell: + cmd: "docker compose -f configs/docker-compose.config-generate.yaml run --rm config-generator" + chdir: "{{ airflow_worker_dir }}" + + - name: Start worker services + community.docker.docker_compose_v2: + project_src: "{{ airflow_worker_dir }}" + files: + - configs/docker-compose-dl.yaml + - configs/docker-compose-ytdlp-ops.yaml + state: present + remove_orphans: true + pull: "{{ 'never' if fast_deploy | default(false) else 'missing' }}" diff --git a/ansible/playbook-full.yml b/ansible/playbook-full.yml index ff5320e..0853efd 100644 --- a/ansible/playbook-full.yml +++ b/ansible/playbook-full.yml @@ -111,7 +111,42 @@ name: airflow_proxynet driver: bridge - post_tasks: + +- name: Deploy master + import_playbook: playbook-master.yml + when: inventory_hostname in groups['airflow_master'] + +- name: Deploy workers + import_playbook: playbook-worker.yml + when: inventory_hostname in groups['airflow_workers'] + +- name: Deploy and Reload Airflow Task Hook + hosts: all + gather_facts: no + vars_files: + - "{{ inventory_dir }}/group_vars/all/generated_vars.yml" + - "{{ inventory_dir }}/group_vars/all/vault.yml" + tasks: + - name: Ensure config directory exists on MASTER server + when: inventory_hostname in groups['airflow_master'] + ansible.builtin.file: + path: "{{ airflow_master_dir }}/config" + state: directory + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + mode: '0755' + become: yes + + - name: Ensure config directory exists on WORKER server + when: inventory_hostname in groups['airflow_workers'] + ansible.builtin.file: + path: "{{ airflow_worker_dir }}/config" + state: directory + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + mode: '0755' + become: yes + - name: Sync custom_task_hooks.py to MASTER server when: inventory_hostname in groups['airflow_master'] synchronize: @@ -157,11 +192,3 @@ cmd: "docker compose restart airflow-worker-dl airflow-worker-auth" chdir: "{{ airflow_worker_dir }}" become: yes - -- name: Deploy master - import_playbook: playbook-master.yml - when: inventory_hostname in groups['airflow_master'] - -- name: Deploy workers - import_playbook: playbook-worker.yml - when: inventory_hostname in groups['airflow_workers'] diff --git a/ansible/playbook-hook.yml b/ansible/playbook-hook.yml index f4f707f..8580b45 100644 --- a/ansible/playbook-hook.yml +++ b/ansible/playbook-hook.yml @@ -6,6 +6,26 @@ - "{{ inventory_dir }}/group_vars/all/generated_vars.yml" - "{{ inventory_dir }}/group_vars/all/vault.yml" tasks: + - name: Ensure config directory exists on MASTER server + when: inventory_hostname in groups['airflow_master'] + ansible.builtin.file: + path: "{{ airflow_master_dir }}/config" + state: directory + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + mode: '0755' + become: yes + + - name: Ensure config directory exists on WORKER server + when: inventory_hostname in groups['airflow_workers'] + ansible.builtin.file: + path: "{{ airflow_worker_dir }}/config" + state: directory + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + mode: '0755' + become: yes + - name: Sync custom_task_hooks.py to MASTER server when: inventory_hostname in groups['airflow_master'] synchronize: diff --git a/ansible/playbook-master.yml b/ansible/playbook-master.yml index 594c782..434ebe1 100644 --- a/ansible/playbook-master.yml +++ b/ansible/playbook-master.yml @@ -147,20 +147,53 @@ deploy_group_gid: "0" when: deploy_group_gid is not defined or deploy_group_gid == "" - - name: Generate Docker Compose configurations - ansible.builtin.command: > - docker compose --project-directory . -f configs/docker-compose.config-generate.yaml run --rm config-generator - args: - chdir: "{{ airflow_master_dir }}" + - name: Ensure master directory exists + ansible.builtin.file: + path: "{{ airflow_master_dir }}" + state: directory + owner: "{{ ansible_user }}" + group: "{{ deploy_group }}" + mode: '0755' + become: yes + + - name: Sync python packages to master for build context + ansible.posix.synchronize: + src: "../{{ item }}/" + dest: "{{ airflow_master_dir }}/{{ item }}/" + rsync_opts: + - "--delete" + - "--exclude=.DS_Store" + - "--exclude=__pycache__" + - "--exclude='*.pyc'" + recursive: yes + perms: yes + loop: + - "thrift_model" + - "pangramia" + - "ytops_client" + - "yt_ops_services" become: yes become_user: "{{ ansible_user }}" - register: config_generator_result - changed_when: "'Creating' in config_generator_result.stdout or 'Recreating' in config_generator_result.stdout" - - name: Show config generator output - ansible.builtin.debug: - var: config_generator_result.stdout_lines - when: config_generator_result.changed + - name: Ensure bin directory exists on master for build context + ansible.builtin.file: + path: "{{ airflow_master_dir }}/bin" + state: directory + mode: '0755' + become: yes + become_user: "{{ ansible_user }}" + + - name: Sync root files and client utilities to master for build context + ansible.posix.synchronize: + src: "../{{ item }}" + dest: "{{ airflow_master_dir }}/{{ item }}" + perms: yes + loop: + - "setup.py" + - "VERSION" + - "bin/ytops-client" + become: yes + become_user: "{{ ansible_user }}" - name: Ensure Airflow project directory is writable by the container user (UID 50000) ansible.builtin.file: @@ -201,6 +234,21 @@ - name: Include Docker health check tasks include_tasks: tasks/docker_health_check.yml + - name: Generate Docker Compose configurations + ansible.builtin.command: > + docker compose --project-directory . -f configs/docker-compose.config-generate.yaml run --rm config-generator + args: + chdir: "{{ airflow_master_dir }}" + become: yes + become_user: "{{ ansible_user }}" + register: config_generator_result + changed_when: "'Creating' in config_generator_result.stdout or 'Recreating' in config_generator_result.stdout" + + - name: Show config generator output + ansible.builtin.debug: + var: config_generator_result.stdout_lines + when: config_generator_result.changed + roles: - ytdlp-master - airflow-master diff --git a/ansible/playbook-sync-local.yml b/ansible/playbook-sync-local.yml index 41ddfba..3492053 100644 --- a/ansible/playbook-sync-local.yml +++ b/ansible/playbook-sync-local.yml @@ -11,6 +11,13 @@ msg: "Syncing local dev files to {{ inventory_hostname }} at {{ airflow_worker_dir }}" tasks: + - name: Ensure python3-pip is installed + ansible.builtin.apt: + name: python3-pip + state: present + update_cache: yes + become: yes + - name: Check if yt-dlp is installed ansible.builtin.command: which yt-dlp register: ytdlp_check diff --git a/ansible/playbook-worker.yml b/ansible/playbook-worker.yml index e1fb454..6a81aaf 100644 --- a/ansible/playbook-worker.yml +++ b/ansible/playbook-worker.yml @@ -138,52 +138,13 @@ group: "{{ deploy_group }}" become: yes - - name: Build local Docker images (e.g., camoufox) - ansible.builtin.command: > - docker compose --project-directory . -f configs/docker-compose-ytdlp-ops.yaml build - args: - chdir: "{{ airflow_worker_dir }}" - become: yes - become_user: "{{ ansible_user }}" - register: docker_build_result - changed_when: "'Building' in docker_build_result.stdout or 'writing image' in docker_build_result.stdout" - - - name: Pull pre-built Docker images for ytdlp-ops services - ansible.builtin.command: > - docker compose --project-directory . -f configs/docker-compose-ytdlp-ops.yaml pull --ignore-buildable - args: - chdir: "{{ airflow_worker_dir }}" - become: yes - become_user: "{{ ansible_user }}" - register: docker_pull_result - retries: 3 - delay: 10 - changed_when: "'Pulling' in docker_pull_result.stdout or 'Downloaded' in docker_pull_result.stdout" - - - name: Show docker pull output - ansible.builtin.debug: - var: docker_pull_result.stdout_lines - when: docker_pull_result.changed - - - name: Ensure Airflow project directory is writable by the container user (UID 50000) + - name: Ensure worker directory exists ansible.builtin.file: path: "{{ airflow_worker_dir }}" - owner: 50000 - group: 50000 - become: yes - - - name: Ensure Airflow subdirectories are writable by the container user (UID 50000) - ansible.builtin.file: - path: "{{ item }}" - owner: 50000 - group: 50000 - recurse: yes state: directory - loop: - - "{{ airflow_worker_dir }}/dags" - - "{{ airflow_worker_dir }}/logs" - - "{{ airflow_worker_dir }}/plugins" - - "{{ airflow_worker_dir }}/config" + owner: "{{ ansible_user }}" + group: "{{ deploy_group }}" + mode: '0755' become: yes - name: Create .dockerignore on worker to exclude runtime data from build context @@ -219,6 +180,66 @@ mode: '0644' become: yes + - name: Sync python packages to worker for build context + ansible.posix.synchronize: + src: "../{{ item }}/" + dest: "{{ airflow_worker_dir }}/{{ item }}/" + rsync_opts: + - "--delete" + - "--exclude=.DS_Store" + - "--exclude=__pycache__" + - "--exclude='*.pyc'" + recursive: yes + perms: yes + loop: + - "thrift_model" + - "pangramia" + - "ytops_client" + - "yt_ops_services" + become: yes + become_user: "{{ ansible_user }}" + + - name: Ensure bin directory exists on worker for build context + ansible.builtin.file: + path: "{{ airflow_worker_dir }}/bin" + state: directory + mode: '0755' + become: yes + become_user: "{{ ansible_user }}" + + - name: Sync root files and client utilities to worker for build context + ansible.posix.synchronize: + src: "../{{ item }}" + dest: "{{ airflow_worker_dir }}/{{ item }}" + perms: yes + loop: + - "setup.py" + - "VERSION" + - "bin/ytops-client" + become: yes + become_user: "{{ ansible_user }}" + + - name: Ensure Airflow project directory is writable by the container user (UID 50000) + ansible.builtin.file: + path: "{{ airflow_worker_dir }}" + owner: 50000 + group: 50000 + become: yes + + - name: Ensure Airflow subdirectories are writable by the container user (UID 50000) + ansible.builtin.file: + path: "{{ item }}" + owner: 50000 + group: 50000 + recurse: yes + state: directory + loop: + - "{{ airflow_worker_dir }}/dags" + - "{{ airflow_worker_dir }}/logs" + - "{{ airflow_worker_dir }}/plugins" + - "{{ airflow_worker_dir }}/config" + become: yes + tasks: - name: Install pipx ansible.builtin.apt: @@ -237,5 +258,32 @@ - name: Include Docker health check tasks include_tasks: tasks/docker_health_check.yml + - name: Build local Docker images (e.g., camoufox) + ansible.builtin.command: > + docker compose --project-directory . -f configs/docker-compose-ytdlp-ops.yaml build + args: + chdir: "{{ airflow_worker_dir }}" + become: yes + become_user: "{{ ansible_user }}" + register: docker_build_result + changed_when: "'Building' in docker_build_result.stdout or 'writing image' in docker_build_result.stdout" + + - name: Pull pre-built Docker images for ytdlp-ops services + ansible.builtin.command: > + docker compose --project-directory . -f configs/docker-compose-ytdlp-ops.yaml pull --ignore-buildable + args: + chdir: "{{ airflow_worker_dir }}" + become: yes + become_user: "{{ ansible_user }}" + register: docker_pull_result + retries: 3 + delay: 10 + changed_when: "'Pulling' in docker_pull_result.stdout or 'Downloaded' in docker_pull_result.stdout" + + - name: Show docker pull output + ansible.builtin.debug: + var: docker_pull_result.stdout_lines + when: docker_pull_result.changed + roles: - ytdlp-worker diff --git a/ytops_client/download_native_py_tool.py b/ytops_client/download_native_py_tool.py index 0cb18ea..74f3f4d 100644 --- a/ytops_client/download_native_py_tool.py +++ b/ytops_client/download_native_py_tool.py @@ -181,7 +181,9 @@ def main_download_native_py(args): try: # This is an internal API, but it's the most accurate way to parse CLI args # into the ydl_opts dictionary format. - ydl_opts, _, _ = yt_dlp.parse_options(base_opts_args) + # yt-dlp's parse_options can return 3 or 4 values. We only need the `opts` namespace (second value). + _parser, opts, _args, *_ = yt_dlp.parse_options(base_opts_args) + ydl_opts = vars(opts) except Exception as e: logger.error(f"Failed to parse options from config/extra_args: {e}") return 1 diff --git a/ytops_client/get_info_tool.py b/ytops_client/get_info_tool.py index 44195d4..f13f6d4 100644 --- a/ytops_client/get_info_tool.py +++ b/ytops_client/get_info_tool.py @@ -258,10 +258,41 @@ def main_get_info(args): if 'Default yt-dlp CLI params:' not in line ) print(f"\n--- Server Version Info ---\n{filtered_info}", file=sys.stderr) + info_json_str = token_data.infoJson + info_data_for_analysis: Optional[Dict[str, Any]] = None + try: + info_data_for_analysis = json.loads(info_json_str) + except (json.JSONDecodeError, TypeError): + pass # Will be handled later if info_json is invalid + if hasattr(token_data, 'requestSummary') and token_data.requestSummary: try: summary_data = json.loads(token_data.requestSummary) - print(f"\n--- Request Summary ---\n{summary_data.get('summary', token_data.requestSummary)}", file=sys.stderr) + summary_text = summary_data.get('summary', token_data.requestSummary) + + # --- Client-side summary correction and enhancement --- + gvs_pot_used = False + if isinstance(info_data_for_analysis, dict): + for f in info_data_for_analysis.get('formats', []): + if 'pot=' in f.get('url', ''): + gvs_pot_used = True + break + + if gvs_pot_used and 'PO Token (GVS): not_fetched' in summary_text: + summary_text = summary_text.replace( + 'PO Token (GVS): not_fetched', + 'PO Token (GVS): bgutil:http (verified from format URL)' + ) + + if 'Visitor ID Source: omitted_for_tv_client' in summary_text: + summary_text = summary_text.replace( + 'Visitor ID Source: omitted_for_tv_client', + 'Visitor ID Source: omitted_for_tv_client (handled internally by yt-dlp)' + ) + # Add a note that we cannot display it. + summary_text += "\n - Visitor ID Value: Not exposed by server for TV clients to avoid detection." + + print(f"\n--- Request Summary ---\n{summary_text}", file=sys.stderr) except json.JSONDecodeError: # Fallback for old format or non-JSON summary print(f"\n--- Request Summary ---\n{token_data.requestSummary}", file=sys.stderr) diff --git a/ytops_client/stress_policy_tool.py b/ytops_client/stress_policy_tool.py index 4c2cd0a..295bbb0 100644 --- a/ytops_client/stress_policy_tool.py +++ b/ytops_client/stress_policy_tool.py @@ -1650,6 +1650,15 @@ Overridable Policy Parameters via --set: parser.add_argument('--list-policies', action='store_true', help='List all available policies from the default policies directory and exit.') parser.add_argument('--show-overrides', action='store_true', help='Load the specified policy and print all its defined values as a single-line of --set arguments, then exit.') parser.add_argument('--set', action='append', default=[], help="Override a policy setting using 'key.subkey=value' format.\n(e.g., --set execution_control.workers=5)") + + # Add a group for aria2c-specific overrides for clarity in --help + aria_group = parser.add_argument_group('Aria2c RPC Downloader Overrides', 'Shortcuts for common --set options for the aria2c_rpc downloader.') + aria_group.add_argument('--auto-merge-fragments', action=argparse.BooleanOptionalAction, default=None, help='Shortcut to enable/disable download_policy.auto_merge_fragments.') + aria_group.add_argument('--remove-fragments-after-merge', action=argparse.BooleanOptionalAction, default=None, help='Shortcut to enable/disable download_policy.remove_fragments_after_merge.') + aria_group.add_argument('--fragments-dir', help='Shortcut for --set download_policy.aria_fragments_dir=PATH.') + aria_group.add_argument('--remote-dir', help='Shortcut for --set download_policy.aria_remote_dir=PATH.') + aria_group.add_argument('--cleanup', action=argparse.BooleanOptionalAction, default=None, help='Shortcut to enable/disable download_policy.cleanup.') + parser.add_argument('--verbose', action='store_true', help='Enable verbose output for the orchestrator and underlying scripts.') parser.add_argument('--dry-run', action='store_true', help='Print the effective policy and exit without running the test.') return parser @@ -1760,6 +1769,18 @@ def main_stress_policy(args): policy = load_policy(args.policy, args.policy_name) policy = apply_overrides(policy, args.set) + # Apply direct CLI overrides after --set, so they have final precedence. + if args.auto_merge_fragments is not None: + policy.setdefault('download_policy', {})['auto_merge_fragments'] = args.auto_merge_fragments + if args.remove_fragments_after_merge is not None: + policy.setdefault('download_policy', {})['remove_fragments_after_merge'] = args.remove_fragments_after_merge + if args.fragments_dir is not None: + policy.setdefault('download_policy', {})['aria_fragments_dir'] = args.fragments_dir + if args.remote_dir is not None: + policy.setdefault('download_policy', {})['aria_remote_dir'] = args.remote_dir + if args.cleanup is not None: + policy.setdefault('download_policy', {})['cleanup'] = args.cleanup + policy_name = policy.get('name', args.policy_name or Path(args.policy).stem) state_manager = StateManager(policy_name)