yt-dlp_e/multithread.py

107 lines
3.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import subprocess
import os
import json
from datetime import datetime
import concurrent.futures
import tempfile
import shutil
# Конфигурация
formats = "18,599,140,133,134,135,136,137,298,299"
cookies_file = "cookies.txt"
output_template = "video/%(id)s.f%(format_id)s.%(ext)s"
num_threads = 4
url_file = "video_urls.txt"
log_file = "download_log.txt"
metadata_file = "metadata.json"
downloaded_videos_file = "downloaded_videos.json"
failed_urls_file = "failed_urls.json"
def load_json_file(file_path):
if os.path.exists(file_path):
with open(file_path, "r") as file:
return json.load(file)
return {}
def save_json_file(file_path, data):
with open(file_path, "w") as file:
json.dump(data, file, ensure_ascii=False, indent=4)
def download_video(url):
start_time = datetime.now()
try:
with tempfile.NamedTemporaryFile(delete=False, suffix='.txt') as temp_cookies:
temp_cookies_path = temp_cookies.name
shutil.copyfile(cookies_file, temp_cookies_path)
command = [
"yt-dlp",
"-f", formats,
"--cookies", temp_cookies_path,
"--output", output_template,
"--write-info-json",
url
]
print(f"START COMMAND: {' '.join(command)}")
subprocess.run(command, check=True)
end_time = datetime.now()
log_download(url, start_time, end_time)
update_metadata(url)
mark_as_downloaded(url)
except subprocess.CalledProcessError as e:
print(f"ERROR DOWNLOAD VIDEO {url}: {e}")
if "Sign in to confirm youre not a bot" in str(e) or "Video unavailable" in str(e):
mark_as_failed(url)
finally:
if os.path.exists(temp_cookies_path):
os.remove(temp_cookies_path)
def log_download(url, start_time, end_time):
with open(log_file, "a") as log:
log.write(f"{url} Downloaded.\nStart: {start_time}\nEnd: {end_time}\n\n")
def update_metadata(url):
video_id = url.split('=')[-1]
metadata_path = f"video/{video_id}.info.json"
if os.path.exists(metadata_path):
with open(metadata_path, "r") as meta_file:
metadata = json.load(meta_file)
all_metadata = load_json_file(metadata_file)
all_metadata[url] = metadata
save_json_file(metadata_file, all_metadata)
def mark_as_downloaded(url):
downloaded_videos = load_json_file(downloaded_videos_file)
downloaded_videos[url] = datetime.now().isoformat()
save_json_file(downloaded_videos_file, downloaded_videos)
def mark_as_failed(url):
failed_urls = load_json_file(failed_urls_file)
failed_urls[url] = datetime.now().isoformat()
save_json_file(failed_urls_file, failed_urls)
def get_remaining_urls(video_urls):
downloaded_videos = load_json_file(downloaded_videos_file)
failed_urls = load_json_file(failed_urls_file)
return [url for url in video_urls if url not in downloaded_videos and url not in failed_urls]
if not os.path.exists(url_file):
print(f"File {url_file} Missing.")
exit(1)
with open(url_file, "r") as file:
video_urls = [line.strip() for line in file if line.strip()]
os.makedirs("video", exist_ok=True)
remaining_urls = get_remaining_urls(video_urls)
with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
futures = [executor.submit(download_video, url) for url in remaining_urls]
for future in concurrent.futures.as_completed(futures):
try:
future.result()
except Exception as e:
print(f"ERROR: {e}")