yt-dlp-dags/ytops_client-source/policies/12_queue_auth_simulation.yaml

128 lines
5.4 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# Policy: Queue-based Authentication Simulation via Direct Docker Exec
#
# This policy simulates a continuous stream of info.json fetch requests using
# the 'direct_docker_cli' mode. It pulls URLs from a Redis queue, creates a
# temporary batch file, and then calls a yt-dlp command inside a running
# Docker container.
#
name: 12_queue_auth_simulation
settings:
mode: fetch_only
orchestration_mode: direct_docker_cli
profile_mode: from_pool_with_lock
# The save directory MUST be inside the docker_host_mount_path.
save_info_json_dir: "run/docker_mount/fetched_info_jsons/queue_simulation"
execution_control:
workers: 1
# How long a worker should pause if it cannot find an available profile to lock.
worker_polling_interval_seconds: 1
# No sleep between tasks; throughput is controlled by yt-dlp performance and profile availability.
info_json_generation_policy:
profile_prefix: "user1"
direct_docker_cli_policy:
# Which simulation environment's profiles to use for locking.
use_profile_env: "auth"
# If true, a worker will try to lock a different profile than the one it just used.
avoid_immediate_profile_reuse: true
# How long the worker should wait for a different profile before re-using the same one.
avoid_reuse_max_wait_seconds: 5
# NOTE on Rate Limits: With the default yt-dlp settings, the rate limit for guest
# sessions is ~300 videos/hour (~1000 webpage/player requests per hour).
# For accounts, it is ~2000 videos/hour (~4000 webpage/player requests per hour).
# The enforcer policy (e.g., 8_unified_simulation_enforcer.yaml) should be
# configured to respect these limits via rotation and rest periods.
# If true, extract the visitor_id from yt-dlp logs, save it per-profile,
# and inject it into subsequent requests for that profile.
#track_visitor_id: true
# --- Docker Execution Settings ---
docker_image_name: "ytops/yt-dlp" # Image to use for `docker run`
docker_network_name: "airflow_proxynet"
# IMPORTANT: This path on the HOST will be mounted into the container at `docker_container_mount_path`.
docker_host_mount_path: "run/docker_mount"
docker_container_mount_path: "/config" # The mount point inside the container
# Host path for persisting cache data (e.g., cookies, sigfuncs) between runs.
docker_host_cache_path: ".cache/queue_auth_simulation"
# Path inside the container where the cache is mounted. Should match HOME/.cache
docker_container_cache_path: "/config/.cache"
# If true, create and use a persistent cookie jar per profile inside the cache dir.
# use_cookies: true
# --- User-Agent Generation ---
# Template for generating User-Agent strings for new profiles.
# The '{major_version}' will be replaced by a version string.
user_agent_template: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{major_version}.0.0.0 Safari/537.36"
# Range of Chrome major versions to use for the template.
# See CHROME_MAJOR_VERSION_RANGE in yt-dlp's random_user_agent():
# https://github.com/yt-dlp/yt-dlp/blob/master/yt_dlp/utils/networking.py
user_agent_version_range: [137, 143]
# A base config file can be used, with overrides applied from the policy.
# The orchestrator will inject 'proxy', 'batch-file', and 'output' keys into the overrides.
ytdlp_config_file: "cli.auth.config"
ytdlp_config_overrides:
skip-download: true
write-info-json: true
no-write-subs: true
no-color: true
ignore-errors: true
use-extractors: ["youtube"]
ytdlp_raw_args:
- '--extractor-args "youtube:formats=duplicate;jsc_trace=true;player_client=tv_simply;pot_trace=true;skip=translated_subs,hls"'
- '--extractor-args "youtubepot-bgutilhttp:base_url=http://172.17.0.1:4416"'
- '--sleep-requests 0.75'
# --retry-sleep linear=1::2'
# --- Live Error Parsing Rules ---
# These regex patterns are checked against yt-dlp's stderr in real-time.
# If a fatal error is detected, immediately ban the profile to stop the container
# and prevent further errors in the same batch.
ban_on_fatal_error_in_batch: true
fatal_error_patterns:
- "Sign in to confirm youre not a bot"
- "rate-limited by YouTube"
- "This content isn't available, try again later"
- "HTTP Error 502"
tolerated_error_patterns:
- "HTTP Error 429"
- "The uploader has not made this video available in your country"
- "This video has been removed by the uploader"
- "Private video"
- "This is a private video"
- "Video is private"
- "Video unavailable"
- "account associated with this video has been terminated"
- "members-only content"
- "Sign in to confirm your age"
# Template for renaming the final info.json.
rename_file_template: "{video_id}-{profile_name}-{proxy}.info.json"
queue_policy:
# Set to false to use legacy, unprefixed queue names (e.g., 'queue2_auth_inbox').
# Set to true (or omit) to use environment-prefixed names (e.g., 'sim_auth_queue2_auth_inbox').
use_env_prefix: false
# If specified, create download tasks for these formats
# Can be "all", a specific format ID, or a list of format IDs
formats_to_download: "140-dashy/140-dashy-0/140,299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy"
# How many tasks a worker should pull from the queue at once.
# This will become the batch size for the docker run.
batch_size: 25
simulation_parameters:
auth_env: "sim_auth"
download_env: "sim_download"