From f76d950b33ed9b1e92cbba0a26f026bf5a14b4e1 Mon Sep 17 00:00:00 2001 From: evgeniy_t Date: Mon, 3 Feb 2025 23:10:46 +0500 Subject: [PATCH] =?UTF-8?q?=D0=9E=D0=B1=D0=BD=D0=BE=D0=B2=D0=B8=D1=82?= =?UTF-8?q?=D1=8C=20multithread.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- multithread.py | 96 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 64 insertions(+), 32 deletions(-) diff --git a/multithread.py b/multithread.py index 7f5e57d..c27071f 100644 --- a/multithread.py +++ b/multithread.py @@ -3,73 +3,105 @@ import os import json from datetime import datetime import concurrent.futures +import tempfile +import shutil -formats = "18" +# Конфигурация +formats = "18,599,140,133,134,135,136,137,298,299" cookies_file = "cookies.txt" -output_template = "%(id)s/%(id)s.f%(format_id)s.%(ext)s" -output_infojson = "infojson:%(id)s/%(id)s.t%(duration_string)s.%(ext)s" -paths = "~/Downloads/staging" -paths_temp = "temp:~/Downloads/temp" -cache_dir = "~/Downloads/cache" -ffmpeg_location = "~/" -num_retries = 10 -fragment_retries = 10 -concur_fragments = 1 -num_threads = 16 - +output_template = "video/%(id)s.f%(format_id)s.%(ext)s" +num_threads = 4 url_file = "video_urls.txt" log_file = "download_log.txt" metadata_file = "metadata.json" +downloaded_videos_file = "downloaded_videos.json" +failed_urls_file = "failed_urls.json" +def load_json_file(file_path): + if os.path.exists(file_path): + with open(file_path, "r") as file: + return json.load(file) + return {} + +def save_json_file(file_path, data): + with open(file_path, "w") as file: + json.dump(data, file, ensure_ascii=False, indent=4) def download_video(url): start_time = datetime.now() try: + with tempfile.NamedTemporaryFile(delete=False, suffix='.txt') as temp_cookies: + temp_cookies_path = temp_cookies.name + shutil.copyfile(cookies_file, temp_cookies_path) + command = [ "yt-dlp", - "--paths", paths, - "--paths", paths_temp, - "--cache-dir", cache_dir, - "--ffmpeg-location", ffmpeg_location, - "--format", formats, - "--cookies", cookies_file, + "-f", formats, + "--cookies", temp_cookies_path, "--output", output_template, - "--output", output_infojson, - "--ignore-config", - "--no-progress", "--write-info-json", url ] - print(f"Start: {' '.join(command)}") + print(f"START COMMAND: {' '.join(command)}") subprocess.run(command, check=True) end_time = datetime.now() log_download(url, start_time, end_time) + update_metadata(url) + mark_as_downloaded(url) except subprocess.CalledProcessError as e: - print(f"ERROR VIDEO {url}: {e}") - + print(f"ERROR DOWNLOAD VIDEO {url}: {e}") + if "Sign in to confirm you’re not a bot" in str(e) or "Video unavailable" in str(e): + mark_as_failed(url) + finally: + if os.path.exists(temp_cookies_path): + os.remove(temp_cookies_path) def log_download(url, start_time, end_time): with open(log_file, "a") as log: - log. Write(f"{url} Dowload.\nTue: {start_time}\nNext: {end_time}\n\n") + log.write(f"{url} Downloaded.\nStart: {start_time}\nEnd: {end_time}\n\n") + +def update_metadata(url): + video_id = url.split('=')[-1] + metadata_path = f"video/{video_id}.info.json" + if os.path.exists(metadata_path): + with open(metadata_path, "r") as meta_file: + metadata = json.load(meta_file) + + all_metadata = load_json_file(metadata_file) + all_metadata[url] = metadata + save_json_file(metadata_file, all_metadata) + +def mark_as_downloaded(url): + downloaded_videos = load_json_file(downloaded_videos_file) + downloaded_videos[url] = datetime.now().isoformat() + save_json_file(downloaded_videos_file, downloaded_videos) + +def mark_as_failed(url): + failed_urls = load_json_file(failed_urls_file) + failed_urls[url] = datetime.now().isoformat() + save_json_file(failed_urls_file, failed_urls) + +def get_remaining_urls(video_urls): + downloaded_videos = load_json_file(downloaded_videos_file) + failed_urls = load_json_file(failed_urls_file) + return [url for url in video_urls if url not in downloaded_videos and url not in failed_urls] if not os.path.exists(url_file): - print(f"File {url_file} Empiy.") + print(f"File {url_file} Missing.") exit(1) with open(url_file, "r") as file: video_urls = [line.strip() for line in file if line.strip()] +os.makedirs("video", exist_ok=True) -os.makedirs("~/Downloads/staging", exist_ok=True) -os.makedirs("~/Downloads/temp", exist_ok=True) -os.makedirs("~/Downloads/cache", exist_ok=True) - +remaining_urls = get_remaining_urls(video_urls) with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor: - futures = [executor.submit(download_video, url) for url in video_urls] + futures = [executor.submit(download_video, url) for url in remaining_urls] for future in concurrent.futures.as_completed(futures): try: - future. Result() + future.result() except Exception as e: print(f"ERROR: {e}") \ No newline at end of file