yt-dlp-dags/policies/4_custom_scenarios.yaml

127 lines
5.2 KiB
YAML

# This file contains custom policies for specific testing scenarios.
---
# Policy: Fetch info.json with visitor ID rotation.
# This policy uses a single worker to fetch info.json files for a list of URLs.
# It simulates user churn by creating a new profile (and thus a new visitor_id and POT)
# every 250 requests. A short sleep is used between requests.
name: fetch_with_visitor_id_rotation
settings:
mode: fetch_only
urls_file: "urls.txt" # Placeholder, should be overridden with --set
info_json_script: "bin/ytops-client get-info"
save_info_json_dir: "fetched_info_jsons/visitor_id_rotation"
# Use the modern profile management system to rotate visitor_id.
profile_mode: per_worker_with_rotation
profile_management:
prefix: "visitor_rotator"
# Rotate to a new profile generation after 250 requests.
max_requests_per_profile: 250
execution_control:
run_until: { cycles: 1 } # Run through the URL list once.
workers: 1 # Run with a single worker thread.
# A short, fixed sleep between each info.json request.
sleep_between_tasks: { min_seconds: 0.75, max_seconds: 0.75 }
info_json_generation_policy:
# Use a standard client. The server will handle token generation.
client: tv_simply
---
# Policy: Full-stack test with visitor ID rotation and test download.
# This policy uses a single worker to fetch info.json files for a list of URLs,
# and then immediately performs a test download (first 10KB) of specified formats.
# It simulates user churn by creating a new profile (and thus a new visitor_id and POT)
# every 250 requests. A short sleep is used between requests.
name: full_stack_with_visitor_id_rotation
settings:
mode: full_stack
urls_file: "urls.txt" # Placeholder, should be overridden with --set
info_json_script: "bin/ytops-client get-info"
# Use the modern profile management system to rotate visitor_id.
profile_mode: per_worker_with_rotation
profile_management:
prefix: "visitor_rotator"
# Rotate to a new profile generation after 250 requests.
max_requests_per_profile: 250
execution_control:
run_until: { cycles: 1 } # Run through the URL list once.
workers: 1 # Run with a single worker thread.
# A short, fixed sleep between each info.json request.
sleep_between_tasks: { min_seconds: 0.75, max_seconds: 0.75 }
info_json_generation_policy:
# Use a standard client. The server will handle token generation.
client: tv_simply
download_policy:
formats: "299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy"
downloader: "native-py"
extra_args: '--test --cleanup'
output_dir: "downloads/fetch_and_test"
sleep_between_formats: { min_seconds: 6, max_seconds: 6 }
---
# Policy: Download-only test from a fetch folder (Batch Mode).
# This policy scans a directory of existing info.json files once, and performs
# a test download (first 10KB) for specific formats. It is designed to run as
# a batch job after a 'fetch_only' policy has completed.
name: download_only_test_from_fetch_folder
settings:
mode: download_only
# Directory of info.json files to process.
info_json_dir: "fetched_info_jsons/visitor_id_rotation" # Assumes output from 'fetch_with_visitor_id_rotation'
execution_control:
run_until: { cycles: 1 } # Run through the info.json directory once.
workers: 1 # Run with a single worker thread.
# A longer, randomized sleep between processing each info.json file.
sleep_between_tasks: { min_seconds: 5, max_seconds: 10 }
download_policy:
# A specific list of video-only DASH formats to test.
formats: "299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy"
downloader: "native-py"
# Pass extra arguments to perform a "test" download.
extra_args: '--test --cleanup'
output_dir: "downloads/dash_test"
---
# Policy: Live download from a watch folder (Continuous Mode).
# This policy continuously watches a directory for new info.json files and
# processes them as they appear. It is designed to work as the second stage
# of a pipeline, consuming files generated by a 'fetch_only' policy.
name: live_download_from_watch_folder
settings:
mode: download_only
info_json_dir: "live_info_json" # A different directory for the live pipeline
directory_scan_mode: continuous
mark_processed_files: true # Rename files to *.processed to avoid re-downloading.
max_files_per_cycle: 50 # Process up to 50 new files each time it checks.
sleep_if_no_new_files_seconds: 15
execution_control:
# For 'continuous' mode, a time-based run_until is typical.
# {cycles: 1} will scan once, process new files, and exit.
# To run for 2 hours, for example, use: run_until: { minutes: 120 }
run_until: { cycles: 1 }
workers: 4 # Use a few workers to process files in parallel.
# sleep_between_tasks controls the pause between processing different info.json files.
# To pause before each download attempt starts, use 'pause_before_download_seconds'
# in the download_policy section below.
sleep_between_tasks: { min_seconds: 0, max_seconds: 0 }
download_policy:
formats: "299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy"
downloader: "native-py"
# Example: Pause for a few seconds before starting each download attempt.
# pause_before_download_seconds: 2
extra_args: '--test --cleanup'
output_dir: "downloads/live_dash_test"