Updates over task generator usage
This commit is contained in:
parent
fa7c46f671
commit
5479e8c8f8
@ -27,6 +27,10 @@ execution_control:
|
|||||||
|
|
||||||
download_policy:
|
download_policy:
|
||||||
profile_prefix: "user1"
|
profile_prefix: "user1"
|
||||||
|
# A comma-separated list of format IDs to download for each info.json.
|
||||||
|
# This is used by the dummy mode simulation to test per-format downloads.
|
||||||
|
# In non-dummy mode, the format selector in ytdlp_config_overrides is used.
|
||||||
|
formats: "140-dashy,299-dashy"
|
||||||
# Default cooldown in seconds if not specified by the enforcer in Redis.
|
# Default cooldown in seconds if not specified by the enforcer in Redis.
|
||||||
# The value from Redis (set via `unlock_cooldown_seconds` in the enforcer policy)
|
# The value from Redis (set via `unlock_cooldown_seconds` in the enforcer policy)
|
||||||
# will always take precedence. This is a fallback.
|
# will always take precedence. This is a fallback.
|
||||||
|
|||||||
@ -977,7 +977,7 @@ def find_task_and_lock_profile(profile_manager, owner_id, profile_prefix, policy
|
|||||||
|
|
||||||
# 2. Get all available task files.
|
# 2. Get all available task files.
|
||||||
try:
|
try:
|
||||||
task_files = list(Path(info_json_dir).glob('*.json'))
|
task_files = list(Path(info_json_dir).rglob('*.json'))
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
logger.warning(f"Info JSON directory not found during scan: {info_json_dir}")
|
logger.warning(f"Info JSON directory not found during scan: {info_json_dir}")
|
||||||
return None, None
|
return None, None
|
||||||
@ -2124,7 +2124,7 @@ def run_direct_docker_download_worker(worker_id, policy, state_manager, args, pr
|
|||||||
continue # Skip to finally block to unlock profile
|
continue # Skip to finally block to unlock profile
|
||||||
|
|
||||||
if args.dummy or args.dummy_batch:
|
if args.dummy or args.dummy_batch:
|
||||||
logger.info(f"========== [Worker {worker_id}] BEGIN DUMMY DOCKER DOWNLOAD ==========")
|
logger.info(f"========== [Worker {worker_id}] BEGIN DUMMY DOCKER DOWNLOAD PER-FORMAT SIMULATION ==========")
|
||||||
logger.info(f"[Worker {worker_id}] Profile: {profile_name} | Task: {claimed_task_path_host.name}")
|
logger.info(f"[Worker {worker_id}] Profile: {profile_name} | Task: {claimed_task_path_host.name}")
|
||||||
|
|
||||||
dummy_settings = policy.get('settings', {}).get('dummy_simulation_settings', {})
|
dummy_settings = policy.get('settings', {}).get('dummy_simulation_settings', {})
|
||||||
@ -2133,23 +2133,67 @@ def run_direct_docker_download_worker(worker_id, policy, state_manager, args, pr
|
|||||||
failure_rate = dummy_settings.get('download_failure_rate', 0.0)
|
failure_rate = dummy_settings.get('download_failure_rate', 0.0)
|
||||||
skipped_rate = dummy_settings.get('download_skipped_failure_rate', 0.0)
|
skipped_rate = dummy_settings.get('download_skipped_failure_rate', 0.0)
|
||||||
|
|
||||||
|
# In dummy mode, prioritize the format from the task file, then from the policy.
|
||||||
|
format_selection = info_data.get('_ytops_download_format')
|
||||||
|
source_of_format = "task file"
|
||||||
|
if not format_selection:
|
||||||
|
format_selection = d_policy.get('formats', '')
|
||||||
|
source_of_format = "policy"
|
||||||
|
|
||||||
|
if not format_selection:
|
||||||
|
logger.warning(f"[Worker {worker_id}] DUMMY: No format specified in task file or policy. Simulating a single download.")
|
||||||
|
formats_to_test = ['dummy_format']
|
||||||
|
else:
|
||||||
|
formats_to_test = [f.strip() for f in format_selection.split(',') if f.strip()]
|
||||||
|
logger.info(f"[Worker {worker_id}] DUMMY: Simulating downloads for formats (from {source_of_format}): {', '.join(formats_to_test)}")
|
||||||
|
|
||||||
|
for format_id in formats_to_test:
|
||||||
|
if state_manager.shutdown_event.is_set():
|
||||||
|
logger.info(f"[Worker {worker_id}] DUMMY: Shutdown requested, stopping format simulation.")
|
||||||
|
break
|
||||||
|
|
||||||
|
logger.info(f"[Worker {worker_id}] DUMMY: Simulating download for format '{format_id}'...")
|
||||||
time.sleep(random.uniform(min_seconds, max_seconds))
|
time.sleep(random.uniform(min_seconds, max_seconds))
|
||||||
|
|
||||||
rand_val = random.random()
|
rand_val = random.random()
|
||||||
should_fail_skipped = rand_val < skipped_rate
|
should_fail_skipped = rand_val < skipped_rate
|
||||||
should_fail_fatal = not should_fail_skipped and rand_val < (skipped_rate + failure_rate)
|
should_fail_fatal = not should_fail_skipped and rand_val < (skipped_rate + failure_rate)
|
||||||
|
|
||||||
if should_fail_skipped:
|
success = False
|
||||||
logger.warning(f"[Worker {worker_id}] DUMMY: Simulating skipped download failure.")
|
details = ""
|
||||||
profile_manager_instance.record_activity(profile_name, 'tolerated_error')
|
error_type = None
|
||||||
elif should_fail_fatal:
|
is_tolerated_error = False
|
||||||
logger.warning(f"[Worker {worker_id}] DUMMY: Simulating fatal download failure.")
|
|
||||||
profile_manager_instance.record_activity(profile_name, 'download_error')
|
|
||||||
else:
|
|
||||||
logger.info(f"[Worker {worker_id}] DUMMY: Simulating download success.")
|
|
||||||
profile_manager_instance.record_activity(profile_name, 'download')
|
|
||||||
|
|
||||||
logger.info(f"========== [Worker {worker_id}] END DUMMY DOCKER DOWNLOAD ==========")
|
if should_fail_skipped:
|
||||||
|
logger.warning(f"[Worker {worker_id}] DUMMY: Simulating skipped download failure for format '{format_id}'.")
|
||||||
|
profile_manager_instance.record_activity(profile_name, 'tolerated_error')
|
||||||
|
details = f"Dummy skipped failure for format {format_id}"
|
||||||
|
error_type = "DummySkippedFailure"
|
||||||
|
is_tolerated_error = True
|
||||||
|
elif should_fail_fatal:
|
||||||
|
logger.warning(f"[Worker {worker_id}] DUMMY: Simulating fatal download failure for format '{format_id}'.")
|
||||||
|
profile_manager_instance.record_activity(profile_name, 'download_error')
|
||||||
|
details = f"Dummy fatal failure for format {format_id}"
|
||||||
|
error_type = "DummyFailure"
|
||||||
|
else:
|
||||||
|
logger.info(f"[Worker {worker_id}] DUMMY: Simulating download success for format '{format_id}'.")
|
||||||
|
profile_manager_instance.record_activity(profile_name, 'download')
|
||||||
|
success = True
|
||||||
|
details = f"Dummy success for format {format_id}"
|
||||||
|
|
||||||
|
event = {
|
||||||
|
'type': 'direct_docker_download',
|
||||||
|
'profile': profile_name,
|
||||||
|
'proxy_url': locked_profile['proxy'],
|
||||||
|
'success': success,
|
||||||
|
'details': details,
|
||||||
|
'error_type': error_type,
|
||||||
|
'is_tolerated_error': is_tolerated_error,
|
||||||
|
'format': format_id
|
||||||
|
}
|
||||||
|
state_manager.log_event(event)
|
||||||
|
|
||||||
|
logger.info(f"========== [Worker {worker_id}] END DUMMY DOCKER DOWNLOAD SIMULATION ==========")
|
||||||
|
|
||||||
# In dummy mode, we just rename the file to processed and continue to the finally block.
|
# In dummy mode, we just rename the file to processed and continue to the finally block.
|
||||||
try:
|
try:
|
||||||
|
|||||||
@ -7,7 +7,9 @@ import json
|
|||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
import signal
|
||||||
import sys
|
import sys
|
||||||
|
import time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
# Configure logging
|
# Configure logging
|
||||||
@ -17,6 +19,13 @@ logging.basicConfig(
|
|||||||
)
|
)
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Graceful shutdown handler
|
||||||
|
shutdown_event = False
|
||||||
|
def handle_shutdown(sig, frame):
|
||||||
|
global shutdown_event
|
||||||
|
logger.info("Shutdown signal received. Stopping task generator...")
|
||||||
|
shutdown_event = True
|
||||||
|
|
||||||
def sanitize_format_for_filename(format_str: str) -> str:
|
def sanitize_format_for_filename(format_str: str) -> str:
|
||||||
"""Sanitizes a format selector string to be filesystem-friendly."""
|
"""Sanitizes a format selector string to be filesystem-friendly."""
|
||||||
# Replace common problematic characters with underscores
|
# Replace common problematic characters with underscores
|
||||||
@ -45,6 +54,9 @@ def add_task_generator_parser(subparsers):
|
|||||||
gen_parser.add_argument('--source-dir', required=True, help='Directory containing the source info.json files.')
|
gen_parser.add_argument('--source-dir', required=True, help='Directory containing the source info.json files.')
|
||||||
gen_parser.add_argument('--output-dir', required=True, help='Directory where the generated task files will be saved.')
|
gen_parser.add_argument('--output-dir', required=True, help='Directory where the generated task files will be saved.')
|
||||||
gen_parser.add_argument('--formats', required=True, help='A comma-separated list of format IDs or selectors to generate tasks for (e.g., "18,140,bestvideo").')
|
gen_parser.add_argument('--formats', required=True, help='A comma-separated list of format IDs or selectors to generate tasks for (e.g., "18,140,bestvideo").')
|
||||||
|
gen_parser.add_argument('--live', action='store_true', help='Run continuously, watching the source directory for new files.')
|
||||||
|
gen_parser.add_argument('--interval-seconds', type=int, default=10, help='When in --live mode, how often to scan for new files.')
|
||||||
|
gen_parser.add_argument('--dummy', action='store_true', help='Generate dummy task files without reading info.json content. Useful for testing download workers.')
|
||||||
gen_parser.add_argument('--verbose', action='store_true', help='Enable verbose logging.')
|
gen_parser.add_argument('--verbose', action='store_true', help='Enable verbose logging.')
|
||||||
|
|
||||||
reset_parser = generate_subparsers.add_parser(
|
reset_parser = generate_subparsers.add_parser(
|
||||||
@ -112,45 +124,38 @@ def main_task_generator(args):
|
|||||||
return 1
|
return 1
|
||||||
|
|
||||||
|
|
||||||
def _main_task_generator_generate(args):
|
def _generate_tasks_for_file(source_file, output_dir, formats_to_generate, is_dummy_mode):
|
||||||
if args.verbose:
|
"""Helper function to generate task files for a single source info.json."""
|
||||||
logging.getLogger().setLevel(logging.DEBUG)
|
|
||||||
|
|
||||||
source_dir = Path(args.source_dir)
|
|
||||||
output_dir = Path(args.output_dir)
|
|
||||||
formats_to_generate = [f.strip() for f in args.formats.split(',') if f.strip()]
|
|
||||||
|
|
||||||
if not source_dir.is_dir():
|
|
||||||
logger.error(f"Source directory does not exist or is not a directory: {source_dir}")
|
|
||||||
return 1
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
output_dir.mkdir(parents=True, exist_ok=True)
|
info_json_content = {}
|
||||||
except OSError as e:
|
if is_dummy_mode:
|
||||||
logger.error(f"Could not create output directory '{output_dir}': {e}")
|
# In dummy mode, we don't read the file content. We create a minimal structure.
|
||||||
return 1
|
# We try to parse the filename to get video_id and profile_name for organization.
|
||||||
|
# Example filename: {video_id}-{profile_name}-{proxy}.info.json
|
||||||
|
parts = source_file.stem.split('-')
|
||||||
|
video_id = parts[0] if parts else 'dummy_video'
|
||||||
|
profile_name = next((p for p in parts if p.startswith('user')), None)
|
||||||
|
|
||||||
logger.info(f"Scanning for info.json files in '{source_dir}' (recursively)...")
|
info_json_content = {
|
||||||
source_files = list(source_dir.rglob('*.json'))
|
'id': video_id,
|
||||||
|
'_dummy': True,
|
||||||
if not source_files:
|
'_ytops_metadata': {
|
||||||
logger.info(f"No .json files found in '{source_dir}'. Nothing to do.")
|
'profile_name': profile_name
|
||||||
return 0
|
}
|
||||||
|
}
|
||||||
logger.info(f"Found {len(source_files)} source file(s). Generating tasks for formats: {', '.join(formats_to_generate)}...")
|
logger.debug(f"DUMMY MODE: Generating tasks for source file: {source_file.name}")
|
||||||
|
else:
|
||||||
total_tasks_generated = 0
|
|
||||||
for source_file in source_files:
|
|
||||||
try:
|
|
||||||
with open(source_file, 'r', encoding='utf-8') as f:
|
with open(source_file, 'r', encoding='utf-8') as f:
|
||||||
info_json_content = json.load(f)
|
info_json_content = json.load(f)
|
||||||
except (IOError, json.JSONDecodeError) as e:
|
except (IOError, json.JSONDecodeError) as e:
|
||||||
logger.warning(f"Skipping file '{source_file.name}' due to read/parse error: {e}")
|
logger.warning(f"Skipping file '{source_file.name}' due to read/parse error: {e}")
|
||||||
continue
|
return 0
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"An unexpected error occurred while processing '{source_file.name}': {e}")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
tasks_generated_this_file = 0
|
||||||
try:
|
try:
|
||||||
tasks_generated_this_run = 0
|
|
||||||
|
|
||||||
# Use metadata to create a profile-specific subdirectory for better organization.
|
# Use metadata to create a profile-specific subdirectory for better organization.
|
||||||
profile_name_from_meta = info_json_content.get('_ytops_metadata', {}).get('profile_name')
|
profile_name_from_meta = info_json_content.get('_ytops_metadata', {}).get('profile_name')
|
||||||
final_output_dir = output_dir
|
final_output_dir = output_dir
|
||||||
@ -161,7 +166,7 @@ def _main_task_generator_generate(args):
|
|||||||
final_output_dir.mkdir(parents=True, exist_ok=True)
|
final_output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
except OSError as e:
|
except OSError as e:
|
||||||
logger.error(f"Could not create profile subdirectory '{final_output_dir}': {e}. Skipping tasks for this source file.")
|
logger.error(f"Could not create profile subdirectory '{final_output_dir}': {e}. Skipping tasks for this source file.")
|
||||||
continue
|
return 0
|
||||||
|
|
||||||
for format_str in formats_to_generate:
|
for format_str in formats_to_generate:
|
||||||
task_data = info_json_content.copy()
|
task_data = info_json_content.copy()
|
||||||
@ -182,12 +187,9 @@ def _main_task_generator_generate(args):
|
|||||||
with open(output_path, 'w', encoding='utf-8') as f:
|
with open(output_path, 'w', encoding='utf-8') as f:
|
||||||
json.dump(task_data, f, indent=2)
|
json.dump(task_data, f, indent=2)
|
||||||
logger.debug(f"Generated task file: {output_path}")
|
logger.debug(f"Generated task file: {output_path}")
|
||||||
tasks_generated_this_run += 1
|
tasks_generated_this_file += 1
|
||||||
|
|
||||||
if tasks_generated_this_run > 0:
|
# Mark source file as processed now that we've iterated through all formats for it.
|
||||||
total_tasks_generated += tasks_generated_this_run
|
|
||||||
|
|
||||||
# Mark source file as processed by renaming
|
|
||||||
try:
|
try:
|
||||||
processed_path = source_file.with_suffix(f"{source_file.suffix}.processed")
|
processed_path = source_file.with_suffix(f"{source_file.suffix}.processed")
|
||||||
source_file.rename(processed_path)
|
source_file.rename(processed_path)
|
||||||
@ -199,5 +201,74 @@ def _main_task_generator_generate(args):
|
|||||||
logger.error(f"An I/O error occurred while generating tasks for '{source_file.name}': {e}. It will be retried on the next run.")
|
logger.error(f"An I/O error occurred while generating tasks for '{source_file.name}': {e}. It will be retried on the next run.")
|
||||||
# The file is not renamed, so it will be picked up again
|
# The file is not renamed, so it will be picked up again
|
||||||
|
|
||||||
|
return tasks_generated_this_file
|
||||||
|
|
||||||
|
|
||||||
|
def _main_task_generator_generate(args):
|
||||||
|
if args.verbose:
|
||||||
|
logging.getLogger().setLevel(logging.DEBUG)
|
||||||
|
|
||||||
|
signal.signal(signal.SIGINT, handle_shutdown)
|
||||||
|
signal.signal(signal.SIGTERM, handle_shutdown)
|
||||||
|
|
||||||
|
source_dir = Path(args.source_dir)
|
||||||
|
output_dir = Path(args.output_dir)
|
||||||
|
formats_to_generate = [f.strip() for f in args.formats.split(',') if f.strip()]
|
||||||
|
|
||||||
|
if not source_dir.is_dir():
|
||||||
|
logger.error(f"Source directory does not exist or is not a directory: {source_dir}")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
try:
|
||||||
|
output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
except OSError as e:
|
||||||
|
logger.error(f"Could not create output directory '{output_dir}': {e}")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if not args.live:
|
||||||
|
logger.info(f"Scanning for info.json files in '{source_dir}' (recursively)...")
|
||||||
|
source_files = list(source_dir.rglob('*.json'))
|
||||||
|
|
||||||
|
if not source_files:
|
||||||
|
logger.info(f"No .json files found in '{source_dir}'. Nothing to do.")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
logger.info(f"Found {len(source_files)} source file(s). Generating tasks for formats: {', '.join(formats_to_generate)}...")
|
||||||
|
|
||||||
|
total_tasks_generated = 0
|
||||||
|
for source_file in source_files:
|
||||||
|
tasks_for_file = _generate_tasks_for_file(source_file, output_dir, formats_to_generate, args.dummy)
|
||||||
|
total_tasks_generated += tasks_for_file
|
||||||
|
|
||||||
logger.info(f"Successfully generated {total_tasks_generated} new task file(s) in '{output_dir}'.")
|
logger.info(f"Successfully generated {total_tasks_generated} new task file(s) in '{output_dir}'.")
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
# --- Live Mode ---
|
||||||
|
logger.info(f"Running in LIVE mode. Watching '{source_dir}' for new files every {args.interval_seconds}s. Press Ctrl+C to stop.")
|
||||||
|
total_tasks_generated = 0
|
||||||
|
|
||||||
|
while not shutdown_event:
|
||||||
|
try:
|
||||||
|
logger.debug("Live mode: Scanning for new source files...")
|
||||||
|
source_files = list(source_dir.rglob('*.json'))
|
||||||
|
|
||||||
|
if not source_files:
|
||||||
|
logger.debug("Live mode: No source files found.")
|
||||||
|
else:
|
||||||
|
logger.info(f"Live mode: Found {len(source_files)} source file(s) to process.")
|
||||||
|
for source_file in source_files:
|
||||||
|
if shutdown_event: break
|
||||||
|
tasks_for_file = _generate_tasks_for_file(source_file, output_dir, formats_to_generate, args.dummy)
|
||||||
|
total_tasks_generated += tasks_for_file
|
||||||
|
|
||||||
|
if shutdown_event: break
|
||||||
|
|
||||||
|
logger.debug(f"Live mode: Scan complete. Sleeping for {args.interval_seconds}s...")
|
||||||
|
time.sleep(args.interval_seconds)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"An unexpected error occurred in the live loop: {e}", exc_info=True)
|
||||||
|
time.sleep(5) # Pause before retrying to avoid spamming errors
|
||||||
|
|
||||||
|
logger.info(f"Task generator stopped. Total tasks generated in this run: {total_tasks_generated}.")
|
||||||
|
return 0
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user