yt-dlp-dags/policies/1_fetch_only_policies.yaml

156 lines
5.1 KiB
YAML

# This file contains policies for testing only the info.json generation step.
# No downloads are performed.
---
# Policy: Basic fetch-only test for a TV client.
# This policy uses a single, static profile and has a rate limit to avoid being
# too aggressive. It saves the generated info.json files to a directory.
name: tv_downgraded_single_profile
settings:
mode: fetch_only
urls_file: "urls.txt"
info_json_script: "bin/ytops-client get-info"
save_info_json_dir: "fetched_info_jsons/tv_downgraded"
# Use a single, static profile for all requests.
profile_prefix: "tv_downgraded_user"
profile_mode: per_worker # With 1 worker, this is effectively a single profile.
execution_control:
run_until: { cycles: 1 }
workers: 1
sleep_between_tasks: { min_seconds: 5, max_seconds: 10 }
info_json_generation_policy:
client: tv_downgraded
# Safety rate limit: 450 requests per hour (7.5 req/min)
rate_limits:
per_ip: { max_requests: 450, per_minutes: 60 }
---
# Policy: Fetch-only test for an Android client using a cookie file.
# This demonstrates how to pass a cookie file for authenticated requests.
# It uses a single profile and stops if it encounters too many errors.
name: android_sdkless_with_cookies
settings:
mode: fetch_only
urls_file: "urls.txt"
info_json_script: "bin/ytops-client get-info"
save_info_json_dir: "fetched_info_jsons/android_sdkless"
profile_prefix: "android_user_with_cookies"
profile_mode: per_worker
execution_control:
run_until: { cycles: 1 } # Run through the URL list once.
workers: 1
sleep_between_tasks: { min_seconds: 2, max_seconds: 4 }
info_json_generation_policy:
client: android_sdkless
# Pass per-request parameters. This is how you specify a cookie file.
request_params:
cookies_file_path: "/path/to/your/android_cookies.txt"
stop_conditions:
# Stop if we get more than 5 errors in any 10-minute window.
on_error_rate: { max_errors: 5, per_minutes: 10 }
---
# Policy: TV Fetch with Profile Cooldown (Pipeline Stage 1)
# Fetches info.json files using the 'tv' client. Each profile is limited
# to a certain number of requests before it is put into a cooldown period.
# The output of this policy is intended to be used by a 'download_only' policy.
name: tv_fetch_with_cooldown
settings:
mode: fetch_only
urls_file: "urls.txt"
info_json_script: "bin/ytops-client get-info"
# Save the generated files to this directory for the download task to find.
save_info_json_dir: "live_jsons_tv"
profile_management:
prefix: "tv_user"
initial_pool_size: 10
auto_expand_pool: true
max_requests_per_profile: 60
sleep_minutes_on_exhaustion: 60
execution_control:
run_until: { cycles: 1 }
workers: 1
sleep_between_tasks: { min_seconds: 2, max_seconds: 5 }
info_json_generation_policy:
client: "tv"
request_params:
context_reuse_policy: { enabled: true, max_age_seconds: 86400 }
---
# Policy: MWeb with client rotation and rate limits.
# This demonstrates a more complex scenario with multiple clients and strict
# rate limiting, useful for simulating sophisticated user behavior.
name: mweb_client_rotation_and_rate_limits
settings:
mode: fetch_only
urls_file: "urls.txt"
info_json_script: "bin/ytops-client get-info"
# Use the dynamic profile pool management system.
profile_management:
prefix: "mweb_user"
initial_pool_size: 10
max_requests_per_profile: 100
sleep_minutes_on_exhaustion: 15
execution_control:
run_until: { cycles: 1 }
workers: 10
sleep_between_tasks: { min_seconds: 2, max_seconds: 5 }
info_json_generation_policy:
# Enforce strict rate limits for both the entire IP and each individual profile.
rate_limits:
per_ip: { max_requests: 120, per_minutes: 10 }
per_profile: { max_requests: 10, per_minutes: 10 }
# Rotate between a primary client (mweb) and a refresh client (web_camoufox)
# to keep sessions fresh.
client_rotation_policy:
major_client: "mweb"
major_client_params:
context_reuse_policy: { enabled: true, max_age_seconds: 1800 }
refresh_client: "web_camoufox"
refresh_every: { requests: 20, minutes: 10 }
---
# Policy: TV Simply, fetch-only test with per-worker profile rotation.
# Fetches info.json using tv_simply with multiple workers. Each worker gets a
# unique profile that is retired and replaced with a new generation after a
# set number of requests.
name: tv_simply_fetch_rotation
settings:
mode: fetch_only
urls_file: "urls.txt"
info_json_script: "bin/ytops-client get-info"
save_info_json_dir: "fetched_info_jsons/tv_simply_rotation"
# Use the modern profile management system.
profile_mode: per_worker_with_rotation
profile_management:
prefix: "tv_simply_user"
# Rotate to a new profile generation after 250 requests.
max_requests_per_profile: 250
execution_control:
run_until: { cycles: 1 } # Run through the URL list once.
workers: 8 # Run with 8 parallel workers.
sleep_between_tasks: { min_seconds: 2, max_seconds: 5 }
# Optional: Override the assumed time for a fetch task to improve rate estimation.
# The default is 3 seconds for fetch_only mode.
# assumptions:
# fetch_task_duration: 2.5
info_json_generation_policy:
client: tv_simply