# Policy: Queue-based Download Simulation # # This policy simulates a continuous stream of downloads. It pulls download tasks # from a Redis queue, where each task typically contains a path to an info.json # file and a format to download. # # This policy is designed to be the *second stage* of a two-stage simulation, # consuming tasks produced by an authentication simulation like: # - `12_queue_auth_simulation.yaml` (when configured for Queue-Download workflow) # # It does not matter to this policy whether the auth stage created tasks per-URL # or per-format; this worker will simply process whatever task it receives from # the `input_queue`. # name: 13_queue_download_simulation settings: mode: download_only orchestration_mode: queue_download profile_mode: from_pool_with_lock # In queue mode, info_json_dir is not used to find tasks. # However, the paths inside the download tasks must be accessible # within the docker_host_mount_path. # The profile_extraction_regex is also not needed as the profile # can be specified in the download task. execution_control: # Define worker pools for multiple user groups worker_pools: - profile_prefix: "user1" workers: 1 - profile_prefix: "user2" workers: 1 - profile_prefix: "user3" workers: 1 # How long a worker should pause if it cannot find an available profile or task. worker_polling_interval_seconds: 1 download_policy: # profile_prefix is now defined per-pool in execution_control.worker_pools # Default cooldown in seconds if not specified by the enforcer in Redis. # The value from Redis (set via `unlock_cooldown_seconds` in the enforcer policy) # will always take precedence. This is a fallback. # Can be an integer (e.g., 1) or a range (e.g., [1, 3]). default_unlock_cooldown_seconds: 1 # If true, check if the download URL in the info.json is expired before # attempting to download. This is enabled by default. check_url_expiration: true # --- Airflow Integration --- # If true, move downloaded media and info.json to a timestamped, video-id-based # directory structure that the Airflow DAGs can process. output_to_airflow_ready_dir: true airflow_ready_dir_base_path: "downloadfiles/videos/ready" direct_docker_cli_policy: # Which simulation environment's profiles to use for locking. use_profile_env: "download" # If true, a worker will try to lock a different profile than the one it just used. # This is disabled for downloads, as the cooldown mechanism is sufficient. avoid_immediate_profile_reuse: false # How long the worker should wait for a different profile before re-using the same one. avoid_reuse_max_wait_seconds: 5 # NOTE on Rate Limits: With the default yt-dlp settings, the rate limit for guest # sessions is ~300 videos/hour (~1000 webpage/player requests per hour). # For accounts, it is ~2000 videos/hour (~4000 webpage/player requests per hour). # This enforcer policy should be configured to respect these limits via # rotation and rest periods. # --- Docker Execution Settings --- docker_image_name: "ytops/yt-dlp" docker_network_name: "airflow_proxynet" # Host path mounted into the container for task files (info.json, config). # IMPORTANT: This must be the SAME host path used for the `info_json_dir` above, # or a parent directory of it, so the container can see the task files. docker_host_mount_path: "run/docker_mount" docker_container_mount_path: "/config" # Path on the HOST where downloaded files will be saved. # NOTE: This path should be on a fast, local disk, NOT on s3fs. docker_host_download_path: "downloaded_media/queue_downloads" # Path inside the CONTAINER where `docker_host_download_path` is mounted. docker_container_download_path: "/downloads" # A base config file can be used, with overrides applied from the policy. # The orchestrator will inject 'proxy', 'load-info-json', and 'output' keys into the overrides. ytdlp_config_file: "cli.download.config" ytdlp_config_overrides: format: "299-dashy/298-dashy/137-dashy/136-dashy/135-dashy/134-dashy/133-dashy,140-dashy/140-dashy-0/140" #format: "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best" no-resize-buffer: true buffer-size: "4M" concurrent-fragments: 8 ytdlp_raw_args: [] # --- Live Error Parsing Rules --- # If a fatal error is detected, immediately ban the profile to stop the container. ban_on_fatal_error_in_batch: true fatal_error_patterns: - "HTTP Error 403" - "HTTP Error 502" tolerated_error_patterns: - "timed out" - "Timeout" - "connection reset by peer" - "Invalid data found when processing input" - "Error opening input files" # Settings for controlling the behavior of dummy/simulation modes. # These values can be overridden at runtime with the --set flag. dummy_simulation_settings: # Timings for dummy download simulation (per-format download time) download_min_seconds: 1.0 download_max_seconds: 3.0 download_failure_rate: 0.0 download_skipped_failure_rate: 0.0 queue_policy: # Set to false to use legacy, unprefixed queue names (e.g., 'queue2_dl_inbox'). # Set to true (or omit) to use environment-prefixed names (e.g., 'sim_download_queue2_dl_inbox'). use_env_prefix: false # Queue to pull download tasks from input_queue: "queue2_dl_inbox" # Whether to report completion back to a queue # Can be true (report all), false (report none), or "success_only"/"failure_only" report_completion: true # Queue to report completion to completion_queue: "queue2_dl_completed" # Queue to report failures to (always reported regardless of report_completion) failure_queue: "queue2_dl_fail" # Queue to report skipped tasks to skipped_queue: "queue2_dl_skipped" # How many tasks to process in a batch. For downloads, this should be 1, # as each worker locks a profile for a single download task. batch_size: 1 simulation_parameters: download_env: "sim_download"