yt-dlp-dags/ytops_client-source/policies/10_direct_docker_auth_simulation.yaml

157 lines
7.0 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# Policy: Continuous Authentication Simulation via Direct Docker Exec
#
# This policy simulates a continuous stream of info.json fetch requests using
# the 'direct_docker_cli' mode. It calls a yt-dlp command inside a running
# Docker container, passing in a batch file and configuration.
#
# It uses a pool of managed profiles, locking one for each BATCH of requests.
# The host orchestrator prepares files, and docker exec runs yt-dlp. The container
# itself does not need to be Redis-aware.
#
name: direct_docker_auth_simulation
settings:
mode: fetch_only
orchestration_mode: direct_docker_cli
profile_mode: from_pool_with_lock
urls_file: "inputfiles/urls.rt300.txt"
# The save directory MUST be inside the docker_host_mount_path for the download
# simulation to be able to find the files.
# NOTE: This path is expected to be on an s3fs mount for cross-host communication.
save_info_json_dir: "run/docker_mount/info_json_tasks/direct_docker_simulation"
# Settings for controlling the behavior of dummy/simulation modes.
# These values can be overridden at runtime with the --set flag.
dummy_simulation_settings:
# Timings for dummy auth simulation (per-URL delay in a batch)
auth_min_seconds: 0.1
auth_max_seconds: 0.5
auth_failure_rate: 0.0
auth_skipped_failure_rate: 0.0
# Timings for dummy download simulation (per-format download time)
download_min_seconds: 1.0
download_max_seconds: 3.0
download_failure_rate: 0.0
download_skipped_failure_rate: 0.0
execution_control:
# Automatically discover profile groups from Redis and create workers for them.
# This avoids having to list each profile group (e.g., user31, user32) manually.
worker_pool_discovery:
# A glob-style pattern to find profile prefixes in Redis.
# 'user*' will match all profiles like 'user31_001', 'user61_002', etc.,
# and the tool will create worker pools grouped by 'user31', 'user61', etc.
profile_prefix_pattern: "user*"
# Number of workers to assign to each discovered profile prefix group.
workers_per_profile_group: 1
# How long a worker should pause if it cannot find an available profile to lock.
worker_polling_interval_seconds: 1
# No sleep between tasks; throughput is controlled by yt-dlp performance and profile availability.
info_json_generation_policy:
# This setting tells the auth worker how many download tasks will be generated
# per successful info.json. It is used to correctly increment the
# 'pending_downloads' counter on the auth profile.
# Can be an integer, or 'from_download_policy' to automatically count formats
# from the 'download_policy.formats' setting in this same policy file.
downloads_per_url: "from_download_policy"
# profile_prefix is now defined per-pool in execution_control.worker_pools
# This section is needed for the 'downloads_per_url: from_download_policy' setting.
# It should mirror the formats being used by the download simulation.
download_policy:
formats: "299-dashy" #/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy,140-dashy/140-dashy-0/140"
direct_docker_cli_policy:
# Which simulation environment's profiles to use for locking.
use_profile_env: "auth"
# If true, a worker will try to lock a different profile than the one it just used.
avoid_immediate_profile_reuse: true
# How long the worker should wait for a different profile before re-using the same one.
avoid_reuse_max_wait_seconds: 5
# NOTE on Rate Limits: With the default yt-dlp settings, the rate limit for guest
# sessions is ~300 videos/hour (~1000 webpage/player requests per hour).
# For accounts, it is ~2000 videos/hour (~4000 webpage/player requests per hour).
# The enforcer policy (e.g., 8_unified_simulation_enforcer.yaml) should be
# configured to respect these limits via rotation and rest periods.
# If true, extract the visitor_id from yt-dlp logs, save it per-profile,
# and inject it into subsequent requests for that profile.
#track_visitor_id: true
# --- Docker Execution Settings ---
docker_image_name: "ytops/yt-dlp" # Image to use for `docker run`
docker_network_name: "airflow_proxynet"
# IMPORTANT: This path on the HOST will be mounted into the container at `docker_container_mount_path`.
docker_host_mount_path: "run/docker_mount"
docker_container_mount_path: "/config" # The mount point inside the container
# Host path for persisting cache data (e.g., cookies, sigfuncs) between runs.
# NOTE: This path should be on a fast, local disk, NOT on s3fs.
docker_host_cache_path: ".cache/direct_docker_simulation"
# Path inside the container where the cache is mounted. Should match HOME/.cache
docker_container_cache_path: "/config/.cache"
# If true, create and use a persistent cookie jar per profile inside the cache dir.
# use_cookies: true
# --- User-Agent Generation ---
# Template for generating User-Agent strings for new profiles.
# The '{major_version}' will be replaced by a version string.
user_agent_template: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{major_version}.0.0.0 Safari/537.36"
# Range of Chrome major versions to use for the template.
# See CHROME_MAJOR_VERSION_RANGE in yt-dlp's random_user_agent():
# https://github.com/yt-dlp/yt-dlp/blob/master/yt_dlp/utils/networking.py
user_agent_version_range: [137, 143]
batch_size: 5
# A base config file can be used, with overrides applied from the policy.
# The orchestrator will inject 'proxy', 'batch-file', and 'output' keys into the overrides.
ytdlp_config_file: "cli.auth.config"
ytdlp_config_overrides:
skip-download: true
write-info-json: true
no-write-subs: true
no-color: true
ignore-errors: true
use-extractors: ["youtube"]
ytdlp_raw_args:
- '--extractor-args "youtube:formats=duplicate;jsc_trace=true;player_client=tv_simply;pot_trace=true;skip=translated_subs,hls"'
- '--extractor-args "youtubepot-bgutilhttp:base_url=http://172.17.0.1:4416"'
- '--sleep-requests 0.75'
# --retry-sleep linear=1::2'
# --- Live Error Parsing Rules ---
# These regex patterns are checked against yt-dlp's stderr in real-time.
# If a fatal error is detected, immediately ban the profile to stop the container
# and prevent further errors in the same batch.
ban_on_fatal_error_in_batch: true
fatal_error_patterns:
- "Sign in to confirm youre not a bot"
- "rate-limited by YouTube"
- "This content isn't available, try again later"
- "HTTP Error 502"
tolerated_error_patterns:
- "HTTP Error 429"
- "The uploader has not made this video available in your country"
- "This video has been removed by the uploader"
- "Private video"
- "This is a private video"
- "Video is private"
- "Video unavailable"
- "account associated with this video has been terminated"
- "members-only content"
- "Sign in to confirm your age"
# Template for renaming the final info.json.
rename_file_template: "{video_id}-{profile_name}-{proxy}.info.json"
simulation_parameters:
auth_env: "sim_auth"
download_env: "sim_download"