| import os, os.path as osp |
| import json |
| import yt_dlp |
|
|
|
|
| def ytb_download(url, json_info=None, output_dir="ytb_videos/"): |
| uid = osp.basename(url).split("?v=")[-1] |
| os.makedirs(output_dir, exist_ok=True) |
| |
| yt_opts = { |
| "format": "best", |
| "outtmpl": osp.join(output_dir, f"{uid}.%(ext)s"), |
| "postprocessors": [ |
| { |
| "key": "FFmpegVideoConvertor", |
| "preferedformat": "mp4", |
| } |
| ], |
| } |
|
|
| video_path = osp.join(output_dir, f"{uid}.mp4") |
| meta_path = osp.join(output_dir, f"{uid}.json") |
| if osp.exists(video_path): |
| print(f"{uid} already downloaded.") |
| return 0 |
|
|
| try: |
| with yt_dlp.YoutubeDL(yt_opts) as ydl: |
| ydl.download([url]) |
| if json_info is not None: |
| with open(osp.join(output_dir, f"{uid}.json"), "w") as fp: |
| json.dump(json_info, fp, indent=2) |
| return 0 |
| except: |
| return -1 |
| |
| with open("Video-MME.json", "r") as fp: |
| info = json.load(fp) |
|
|
| from tqdm import tqdm |
| urls = [] |
| errors = [] |
| for d in tqdm(info): |
| r = ytb_download(d['url']) |
| if r != 0: |
| errors.append(d['url']) |
|
|
| with open("errors.txt", "w") as fp: |
| fp.write("\n".join(errors)) |
|
|