symphony-rendering / yt-dlp_download.py
shwj114514
add 30s ckpt
82b281c
import argparse
import concurrent.futures as futures
import os
import re
import sys
from dataclasses import dataclass
from typing import List
try:
from yt_dlp import YoutubeDL
except ImportError:
print("请先安装: pip install yt-dlp", file=sys.stderr)
sys.exit(1)
HEADER_PATTERN = re.compile(r"[::]\s*$")
URL_PATTERN = re.compile(r"^https?://", re.I)
@dataclass
class Job:
url: str
group: str
def safe_dirname(name: str) -> str:
name = name.strip().strip("::").strip()
return re.sub(r"[\\/:\*\?\"<>\|]", " ", name)
def parse_links(file_path: str) -> List[Job]:
jobs: List[Job] = []
current_group = "Misc"
with open(file_path, "r", encoding="utf-8") as f:
for raw in f:
line = raw.strip()
if not line:
continue
if HEADER_PATTERN.search(line):
current_group = safe_dirname(line)
continue
if URL_PATTERN.match(line):
jobs.append(Job(url=line, group=current_group))
return jobs
def build_ydl_opts(out_dir: str, audio_format: str):
os.makedirs(out_dir, exist_ok=True)
fmt = "bestaudio[acodec=opus]/bestaudio"
postprocessors = [
{
"key": "FFmpegExtractAudio",
"preferredcodec": audio_format,
},
{"key": "FFmpegMetadata"},
{"key": "EmbedThumbnail"},
]
ydl_opts = {
"format": fmt,
"outtmpl": os.path.join(out_dir, "%(title)s.%(ext)s"),
"restrictfilenames": False,
"noplaylist": True,
"ignoreerrors": True,
"continuedl": True,
"split_chapters": True,
"overwrites": False,
"postprocessors": postprocessors,
"consoletitle": True,
"progress_with_newline": True,
}
return ydl_opts
def download_one(job: Job, audio_format: str) -> str:
out_dir = safe_dirname(job.group)
ydl_opts = build_ydl_opts(out_dir, audio_format)
try:
print(f"[DOWN] {job.url} -> {out_dir}")
with YoutubeDL(ydl_opts) as ydl:
ydl.download([job.url])
return f"OK : {job.url}"
except Exception as e:
return f"FAIL: {job.url} ({e})"
LINK_TXT="youtube_12_composer_list.txt"
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--links", default=LINK_TXT, help="包含分组与 URL 的文本文件,例如 links.txt")
parser.add_argument("--audio-format", default="best",
choices=["best", "mp3", "m4a", "flac", "wav", "opus"],
help="音频格式;'best' 为保留原始音轨(推荐)")
parser.add_argument("-j", "--workers", type=int, default=1,
help="并行下载线程数(谨慎过高,默认 1)")
args = parser.parse_args()
jobs = parse_links(args.links)
if not jobs:
print("未在文件中找到任何 URL。", file=sys.stderr)
sys.exit(1)
print(f"共 {len(jobs)} 个链接,分组数:{len(set(j.group for j in jobs))}")
if args.workers <= 1:
for job in jobs:
msg = download_one(job, args.audio_format)
print(msg)
else:
with futures.ThreadPoolExecutor(max_workers=args.workers) as ex:
tasks = [ex.submit(download_one, job, args.audio_format) for job in jobs]
for t in futures.as_completed(tasks):
print(t.result())
if __name__ == "__main__":
main()