Обновить multithread.py
This commit is contained in:
parent
88ac7b11d5
commit
f76d950b33
@ -3,73 +3,105 @@ import os
|
|||||||
import json
|
import json
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
|
import tempfile
|
||||||
|
import shutil
|
||||||
|
|
||||||
formats = "18"
|
# Конфигурация
|
||||||
|
formats = "18,599,140,133,134,135,136,137,298,299"
|
||||||
cookies_file = "cookies.txt"
|
cookies_file = "cookies.txt"
|
||||||
output_template = "%(id)s/%(id)s.f%(format_id)s.%(ext)s"
|
output_template = "video/%(id)s.f%(format_id)s.%(ext)s"
|
||||||
output_infojson = "infojson:%(id)s/%(id)s.t%(duration_string)s.%(ext)s"
|
num_threads = 4
|
||||||
paths = "~/Downloads/staging"
|
|
||||||
paths_temp = "temp:~/Downloads/temp"
|
|
||||||
cache_dir = "~/Downloads/cache"
|
|
||||||
ffmpeg_location = "~/"
|
|
||||||
num_retries = 10
|
|
||||||
fragment_retries = 10
|
|
||||||
concur_fragments = 1
|
|
||||||
num_threads = 16
|
|
||||||
|
|
||||||
|
|
||||||
url_file = "video_urls.txt"
|
url_file = "video_urls.txt"
|
||||||
log_file = "download_log.txt"
|
log_file = "download_log.txt"
|
||||||
metadata_file = "metadata.json"
|
metadata_file = "metadata.json"
|
||||||
|
downloaded_videos_file = "downloaded_videos.json"
|
||||||
|
failed_urls_file = "failed_urls.json"
|
||||||
|
|
||||||
|
def load_json_file(file_path):
|
||||||
|
if os.path.exists(file_path):
|
||||||
|
with open(file_path, "r") as file:
|
||||||
|
return json.load(file)
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def save_json_file(file_path, data):
|
||||||
|
with open(file_path, "w") as file:
|
||||||
|
json.dump(data, file, ensure_ascii=False, indent=4)
|
||||||
|
|
||||||
def download_video(url):
|
def download_video(url):
|
||||||
start_time = datetime.now()
|
start_time = datetime.now()
|
||||||
try:
|
try:
|
||||||
|
with tempfile.NamedTemporaryFile(delete=False, suffix='.txt') as temp_cookies:
|
||||||
|
temp_cookies_path = temp_cookies.name
|
||||||
|
shutil.copyfile(cookies_file, temp_cookies_path)
|
||||||
|
|
||||||
command = [
|
command = [
|
||||||
"yt-dlp",
|
"yt-dlp",
|
||||||
"--paths", paths,
|
"-f", formats,
|
||||||
"--paths", paths_temp,
|
"--cookies", temp_cookies_path,
|
||||||
"--cache-dir", cache_dir,
|
|
||||||
"--ffmpeg-location", ffmpeg_location,
|
|
||||||
"--format", formats,
|
|
||||||
"--cookies", cookies_file,
|
|
||||||
"--output", output_template,
|
"--output", output_template,
|
||||||
"--output", output_infojson,
|
|
||||||
"--ignore-config",
|
|
||||||
"--no-progress",
|
|
||||||
"--write-info-json",
|
"--write-info-json",
|
||||||
url
|
url
|
||||||
]
|
]
|
||||||
print(f"Start: {' '.join(command)}")
|
print(f"START COMMAND: {' '.join(command)}")
|
||||||
subprocess.run(command, check=True)
|
subprocess.run(command, check=True)
|
||||||
end_time = datetime.now()
|
end_time = datetime.now()
|
||||||
log_download(url, start_time, end_time)
|
log_download(url, start_time, end_time)
|
||||||
|
update_metadata(url)
|
||||||
|
mark_as_downloaded(url)
|
||||||
except subprocess.CalledProcessError as e:
|
except subprocess.CalledProcessError as e:
|
||||||
print(f"ERROR VIDEO {url}: {e}")
|
print(f"ERROR DOWNLOAD VIDEO {url}: {e}")
|
||||||
|
if "Sign in to confirm you’re not a bot" in str(e) or "Video unavailable" in str(e):
|
||||||
|
mark_as_failed(url)
|
||||||
|
finally:
|
||||||
|
if os.path.exists(temp_cookies_path):
|
||||||
|
os.remove(temp_cookies_path)
|
||||||
|
|
||||||
def log_download(url, start_time, end_time):
|
def log_download(url, start_time, end_time):
|
||||||
with open(log_file, "a") as log:
|
with open(log_file, "a") as log:
|
||||||
log. Write(f"{url} Dowload.\nTue: {start_time}\nNext: {end_time}\n\n")
|
log.write(f"{url} Downloaded.\nStart: {start_time}\nEnd: {end_time}\n\n")
|
||||||
|
|
||||||
|
def update_metadata(url):
|
||||||
|
video_id = url.split('=')[-1]
|
||||||
|
metadata_path = f"video/{video_id}.info.json"
|
||||||
|
if os.path.exists(metadata_path):
|
||||||
|
with open(metadata_path, "r") as meta_file:
|
||||||
|
metadata = json.load(meta_file)
|
||||||
|
|
||||||
|
all_metadata = load_json_file(metadata_file)
|
||||||
|
all_metadata[url] = metadata
|
||||||
|
save_json_file(metadata_file, all_metadata)
|
||||||
|
|
||||||
|
def mark_as_downloaded(url):
|
||||||
|
downloaded_videos = load_json_file(downloaded_videos_file)
|
||||||
|
downloaded_videos[url] = datetime.now().isoformat()
|
||||||
|
save_json_file(downloaded_videos_file, downloaded_videos)
|
||||||
|
|
||||||
|
def mark_as_failed(url):
|
||||||
|
failed_urls = load_json_file(failed_urls_file)
|
||||||
|
failed_urls[url] = datetime.now().isoformat()
|
||||||
|
save_json_file(failed_urls_file, failed_urls)
|
||||||
|
|
||||||
|
def get_remaining_urls(video_urls):
|
||||||
|
downloaded_videos = load_json_file(downloaded_videos_file)
|
||||||
|
failed_urls = load_json_file(failed_urls_file)
|
||||||
|
return [url for url in video_urls if url not in downloaded_videos and url not in failed_urls]
|
||||||
|
|
||||||
if not os.path.exists(url_file):
|
if not os.path.exists(url_file):
|
||||||
print(f"File {url_file} Empiy.")
|
print(f"File {url_file} Missing.")
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
with open(url_file, "r") as file:
|
with open(url_file, "r") as file:
|
||||||
video_urls = [line.strip() for line in file if line.strip()]
|
video_urls = [line.strip() for line in file if line.strip()]
|
||||||
|
|
||||||
|
os.makedirs("video", exist_ok=True)
|
||||||
|
|
||||||
os.makedirs("~/Downloads/staging", exist_ok=True)
|
remaining_urls = get_remaining_urls(video_urls)
|
||||||
os.makedirs("~/Downloads/temp", exist_ok=True)
|
|
||||||
os.makedirs("~/Downloads/cache", exist_ok=True)
|
|
||||||
|
|
||||||
|
|
||||||
with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
|
with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
|
||||||
futures = [executor.submit(download_video, url) for url in video_urls]
|
futures = [executor.submit(download_video, url) for url in remaining_urls]
|
||||||
for future in concurrent.futures.as_completed(futures):
|
for future in concurrent.futures.as_completed(futures):
|
||||||
try:
|
try:
|
||||||
future. Result()
|
future.result()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"ERROR: {e}")
|
print(f"ERROR: {e}")
|
||||||
Loading…
x
Reference in New Issue
Block a user