yt-dlp_e/multithread.py

import subprocess
import os
import json
from datetime import datetime
import concurrent.futures
import tempfile
import shutil

# Конфигурация
formats = "18,599,140,133,134,135,136,137,298,299"
cookies_file = "cookies.txt"
output_template = "video/%(id)s.f%(format_id)s.%(ext)s"
num_threads = 4

url_file = "video_urls.txt"
log_file = "download_log.txt"
metadata_file = "metadata.json"
downloaded_videos_file = "downloaded_videos.json"
failed_urls_file = "failed_urls.json"

def load_json_file(file_path):
    if os.path.exists(file_path):
        with open(file_path, "r") as file:
            return json.load(file)
    return {}

def save_json_file(file_path, data):
    with open(file_path, "w") as file:
        json.dump(data, file, ensure_ascii=False, indent=4)

def download_video(url):
    start_time = datetime.now()
    try:
        with tempfile.NamedTemporaryFile(delete=False, suffix='.txt') as temp_cookies:
            temp_cookies_path = temp_cookies.name
            shutil.copyfile(cookies_file, temp_cookies_path)

        command = [
            "yt-dlp",
            "-f", formats,
            "--cookies", temp_cookies_path,
            "--output", output_template,
            "--write-info-json",
            url
        ]
        print(f"START COMMAND: {' '.join(command)}")
        subprocess.run(command, check=True)
        end_time = datetime.now()
        log_download(url, start_time, end_time)
        update_metadata(url)
        mark_as_downloaded(url)
    except subprocess.CalledProcessError as e:
        print(f"ERROR DOWNLOAD VIDEO {url}: {e}")
        if "Sign in to confirm you’re not a bot" in str(e) or "Video unavailable" in str(e):
            mark_as_failed(url)
    finally:
        if os.path.exists(temp_cookies_path):
            os.remove(temp_cookies_path)

def log_download(url, start_time, end_time):
    with open(log_file, "a") as log:
        log.write(f"{url} Downloaded.\nStart: {start_time}\nEnd: {end_time}\n\n")

def update_metadata(url):
    video_id = url.split('=')[-1]
    metadata_path = f"video/{video_id}.info.json"
    if os.path.exists(metadata_path):
        with open(metadata_path, "r") as meta_file:
            metadata = json.load(meta_file)

        all_metadata = load_json_file(metadata_file)
        all_metadata[url] = metadata
        save_json_file(metadata_file, all_metadata)

def mark_as_downloaded(url):
    downloaded_videos = load_json_file(downloaded_videos_file)
    downloaded_videos[url] = datetime.now().isoformat()
    save_json_file(downloaded_videos_file, downloaded_videos)

def mark_as_failed(url):
    failed_urls = load_json_file(failed_urls_file)
    failed_urls[url] = datetime.now().isoformat()
    save_json_file(failed_urls_file, failed_urls)

def get_remaining_urls(video_urls):
    downloaded_videos = load_json_file(downloaded_videos_file)
    failed_urls = load_json_file(failed_urls_file)
    return [url for url in video_urls if url not in downloaded_videos and url not in failed_urls]

if not os.path.exists(url_file):
    print(f"File {url_file} Missing.")
    exit(1)

with open(url_file, "r") as file:
    video_urls = [line.strip() for line in file if line.strip()]

os.makedirs("video", exist_ok=True)

remaining_urls = get_remaining_urls(video_urls)

with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
    futures = [executor.submit(download_video, url) for url in remaining_urls]
    for future in concurrent.futures.as_completed(futures):
        try:
            future.result()
        except Exception as e:
            print(f"ERROR: {e}")