import subprocess import os import json from datetime import datetime import concurrent.futures import tempfile import shutil # Конфигурация formats = "18,599,140,133,134,135,136,137,298,299" cookies_file = "cookies.txt" output_template = "video/%(id)s.f%(format_id)s.%(ext)s" num_threads = 4 url_file = "video_urls.txt" log_file = "download_log.txt" metadata_file = "metadata.json" downloaded_videos_file = "downloaded_videos.json" failed_urls_file = "failed_urls.json" def load_json_file(file_path): if os.path.exists(file_path): with open(file_path, "r") as file: return json.load(file) return {} def save_json_file(file_path, data): with open(file_path, "w") as file: json.dump(data, file, ensure_ascii=False, indent=4) def download_video(url): start_time = datetime.now() try: with tempfile.NamedTemporaryFile(delete=False, suffix='.txt') as temp_cookies: temp_cookies_path = temp_cookies.name shutil.copyfile(cookies_file, temp_cookies_path) command = [ "yt-dlp", "-f", formats, "--cookies", temp_cookies_path, "--output", output_template, "--write-info-json", url ] print(f"START COMMAND: {' '.join(command)}") subprocess.run(command, check=True) end_time = datetime.now() log_download(url, start_time, end_time) update_metadata(url) mark_as_downloaded(url) except subprocess.CalledProcessError as e: print(f"ERROR DOWNLOAD VIDEO {url}: {e}") if "Sign in to confirm you’re not a bot" in str(e) or "Video unavailable" in str(e): mark_as_failed(url) finally: if os.path.exists(temp_cookies_path): os.remove(temp_cookies_path) def log_download(url, start_time, end_time): with open(log_file, "a") as log: log.write(f"{url} Downloaded.\nStart: {start_time}\nEnd: {end_time}\n\n") def update_metadata(url): video_id = url.split('=')[-1] metadata_path = f"video/{video_id}.info.json" if os.path.exists(metadata_path): with open(metadata_path, "r") as meta_file: metadata = json.load(meta_file) all_metadata = load_json_file(metadata_file) all_metadata[url] = metadata save_json_file(metadata_file, all_metadata) def mark_as_downloaded(url): downloaded_videos = load_json_file(downloaded_videos_file) downloaded_videos[url] = datetime.now().isoformat() save_json_file(downloaded_videos_file, downloaded_videos) def mark_as_failed(url): failed_urls = load_json_file(failed_urls_file) failed_urls[url] = datetime.now().isoformat() save_json_file(failed_urls_file, failed_urls) def get_remaining_urls(video_urls): downloaded_videos = load_json_file(downloaded_videos_file) failed_urls = load_json_file(failed_urls_file) return [url for url in video_urls if url not in downloaded_videos and url not in failed_urls] if not os.path.exists(url_file): print(f"File {url_file} Missing.") exit(1) with open(url_file, "r") as file: video_urls = [line.strip() for line in file if line.strip()] os.makedirs("video", exist_ok=True) remaining_urls = get_remaining_urls(video_urls) with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor: futures = [executor.submit(download_video, url) for url in remaining_urls] for future in concurrent.futures.as_completed(futures): try: future.result() except Exception as e: print(f"ERROR: {e}")