yt-dlp-dags/policies/2_download_only_policies.yaml

59 lines
2.3 KiB
YAML

# This file contains policies for testing only the download step from
# existing info.json files. No new info.json files are generated.
---
# Policy: Basic profile-aware download test.
# This policy reads info.json files from a directory, groups them by a profile
# name extracted from the filename, and downloads them using multiple workers.
# Each worker handles one or more profiles sequentially.
name: basic_profile_aware_download
settings:
mode: download_only
info_json_dir: "prefetched_info_jsons"
# Regex to extract profile names from filenames like '...-VIDEOID-my_profile_name.json'.
profile_extraction_regex: ".*-[a-zA-Z0-9_-]{11}-(.+)\\.json"
execution_control:
run_until: { cycles: 1 }
# 'auto' sets workers to the number of profiles, capped by auto_workers_max.
workers: auto
auto_workers_max: 8
# This sleep applies between each file downloaded by a single profile.
sleep_between_tasks: { min_seconds: 1, max_seconds: 2 }
download_policy:
formats: "18,140,299/298/137/136/135/134/133"
downloader: "aria2c"
downloader_args: "aria2c:-x 4 -k 1M"
extra_args: "--cleanup --output-dir /tmp/downloads"
# This sleep applies between formats of a single video.
sleep_between_formats: { min_seconds: 0, max_seconds: 0 }
---
# Policy: Continuous download from a folder (Pipeline Stage 2).
# This policy watches a directory for new info.json files and processes them
# as they appear. It is designed to work as the second stage of a pipeline,
# consuming files generated by a 'fetch_only' policy like 'tv_fetch_with_cooldown'.
name: continuous_watch_download
settings:
mode: download_only
info_json_dir: "live_info_jsons"
directory_scan_mode: continuous
mark_processed_files: true # Rename files to *.processed to avoid re-downloading.
max_files_per_cycle: 50 # Process up to 50 new files each time it checks.
sleep_if_no_new_files_seconds: 15
execution_control:
# Note: For 'continuous' mode, a time-based run_until (e.g., {minutes: 120})
# is more typical. {cycles: 1} will cause it to scan the directory once
# for new files, process them, and then exit.
run_until: { cycles: 1 }
workers: 4 # Use a few workers to process files in parallel.
sleep_between_tasks: { min_seconds: 0, max_seconds: 0 }
download_policy:
formats: "18,140"
extra_args: "--cleanup --output-dir /tmp/downloads"