yt-dlp-dags/policies/4_custom_scenarios.yaml

# This file contains custom policies for specific testing scenarios.

---
# Policy: Fetch info.json with visitor ID rotation.
# This policy uses a single worker to fetch info.json files for a list of URLs.
# It simulates user churn by creating a new profile (and thus a new visitor_id and POT)
# every 250 requests. A short sleep is used between requests.
name: fetch_with_visitor_id_rotation

settings:
  mode: fetch_only
  urls_file: "urls.txt" # Placeholder, should be overridden with --set
  info_json_script: "bin/ytops-client get-info"
  save_info_json_dir: "fetched_info_jsons/visitor_id_rotation"
  # Use the modern profile management system to rotate visitor_id.
  profile_mode: per_worker_with_rotation
  profile_management:
    prefix: "visitor_rotator"
    # Rotate to a new profile generation after 250 requests.
    max_requests_per_profile: 250

execution_control:
  run_until: { cycles: 1 } # Run through the URL list once.
  workers: 1 # Run with a single worker thread.
  # A short, fixed sleep between each info.json request.
  sleep_between_tasks: { min_seconds: 0.75, max_seconds: 0.75 }

info_json_generation_policy:
  # Use a standard client. The server will handle token generation.
  client: tv_simply

---
# Policy: Full-stack test with visitor ID rotation and test download.
# This policy uses a single worker to fetch info.json files for a list of URLs,
# and then immediately performs a test download (first 10KB) of specified formats.
# It simulates user churn by creating a new profile (and thus a new visitor_id and POT)
# every 250 requests. A short sleep is used between requests.
name: full_stack_with_visitor_id_rotation

settings:
  mode: full_stack
  urls_file: "urls.txt" # Placeholder, should be overridden with --set
  info_json_script: "bin/ytops-client get-info"
  # Use the modern profile management system to rotate visitor_id.
  profile_mode: per_worker_with_rotation
  profile_management:
    prefix: "visitor_rotator"
    # Rotate to a new profile generation after 250 requests.
    max_requests_per_profile: 250

execution_control:
  run_until: { cycles: 1 } # Run through the URL list once.
  workers: 1 # Run with a single worker thread.
  # A short, fixed sleep between each info.json request.
  sleep_between_tasks: { min_seconds: 0.75, max_seconds: 0.75 }

info_json_generation_policy:
  # Use a standard client. The server will handle token generation.
  client: tv_simply

download_policy:
  formats: "299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy"
  downloader: "native-py"
  extra_args: '--test --cleanup'
  output_dir: "downloads/fetch_and_test"
  sleep_between_formats: { min_seconds: 6, max_seconds: 6 }

---
# Policy: Download-only test from a fetch folder (Batch Mode).
# This policy scans a directory of existing info.json files once, and performs
# a test download (first 10KB) for specific formats. It is designed to run as
# a batch job after a 'fetch_only' policy has completed.
name: download_only_test_from_fetch_folder

settings:
  mode: download_only
  # Directory of info.json files to process.
  info_json_dir: "fetched_info_jsons/visitor_id_rotation" # Assumes output from 'fetch_with_visitor_id_rotation'

execution_control:
  run_until: { cycles: 1 } # Run through the info.json directory once.
  workers: 1 # Run with a single worker thread.
  # A longer, randomized sleep between processing each info.json file.
  sleep_between_tasks: { min_seconds: 5, max_seconds: 10 }

download_policy:
  # A specific list of video-only DASH formats to test.
  formats: "299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy"
  downloader: "native-py"
  # Pass extra arguments to perform a "test" download.
  extra_args: '--test --cleanup'
  output_dir: "downloads/dash_test"

---
# Policy: Live download from a watch folder (Continuous Mode).
# This policy continuously watches a directory for new info.json files and
# processes them as they appear. It is designed to work as the second stage
# of a pipeline, consuming files generated by a 'fetch_only' policy.
name: live_download_from_watch_folder

settings:
  mode: download_only
  info_json_dir: "live_info_json" # A different directory for the live pipeline
  directory_scan_mode: continuous
  mark_processed_files: true # Rename files to *.processed to avoid re-downloading.
  max_files_per_cycle: 50 # Process up to 50 new files each time it checks.
  sleep_if_no_new_files_seconds: 15

execution_control:
  # For 'continuous' mode, a time-based run_until is typical.
  # {cycles: 1} will scan once, process new files, and exit.
  # To run for 2 hours, for example, use: run_until: { minutes: 120 }
  run_until: { cycles: 1 }
  workers: 4 # Use a few workers to process files in parallel.
  # sleep_between_tasks controls the pause between processing different info.json files.
  # To pause before each download attempt starts, use 'pause_before_download_seconds'
  # in the download_policy section below.
  sleep_between_tasks: { min_seconds: 0, max_seconds: 0 }

download_policy:
  formats: "299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy"
  downloader: "native-py"
  # Example: Pause for a few seconds before starting each download attempt.
  # pause_before_download_seconds: 2
  extra_args: '--test --cleanup'
  output_dir: "downloads/live_dash_test"