Spaces:

lablab-ai-amd-developer-hackathon
/

ElevenClip-AI

Running

ElevenClip-AI / backend /app /services /subtitles.py

JakgritB

feat(editor): subtitle-first editor + AI subtitle pipeline

89e1dc4 3 days ago

5.55 kB

	import re
	from pathlib import Path

	from app.models.schemas import TranscriptSegment


	def seconds_to_srt_time(value: float) -> str:
	millis = int(round(value * 1000))
	hours, remainder = divmod(millis, 3_600_000)
	minutes, remainder = divmod(remainder, 60_000)
	seconds, millis = divmod(remainder, 1000)
	return f"{hours:02}:{minutes:02}:{seconds:02},{millis:03}"


	def write_srt(
	path: Path, clip_start: float, clip_end: float, segments: list[TranscriptSegment]
	) -> list[dict]:
	cues: list[dict] = []
	rows: list[str] = []
	index = 1
	for segment in segments:
	if segment.end_seconds < clip_start or segment.start_seconds > clip_end:
	continue
	start = max(0.0, segment.start_seconds - clip_start)
	end = min(clip_end - clip_start, segment.end_seconds - clip_start)
	for cue in split_timed_caption(segment.text, start, max(end, start + 1.2)):
	rows.extend(_srt_row(index, cue["start_seconds"], cue["end_seconds"], cue["text"]))
	cues.append(cue)
	index += 1
	if not rows:
	cues = [{"start_seconds": 0.0, "end_seconds": 3.0, "text": ""}]
	rows = _srt_row(1, 0.0, 3.0, "")
	path.write_text("\n".join(rows), encoding="utf-8")
	return cues


	def write_single_caption_srt(path: Path, duration: float, text: str) -> list[dict]:
	safe_duration = max(duration, 1.0)
	cues = split_timed_caption(text, 0.0, safe_duration)
	rows: list[str] = []
	for index, cue in enumerate(cues, start=1):
	rows.extend(_srt_row(index, cue["start_seconds"], cue["end_seconds"], cue["text"]))
	if not rows:
	cues = [{"start_seconds": 0.0, "end_seconds": min(safe_duration, 3.0), "text": ""}]
	rows = _srt_row(1, cues[0]["start_seconds"], cues[0]["end_seconds"], "")
	path.write_text("\n".join(rows), encoding="utf-8")
	return cues


	def write_srt_from_cues(path: Path, cues: list) -> list[dict]:
	"""Write SRT using user-supplied per-cue timing (preferred over auto-distribution).

	Accepts list of objects with .start_seconds / .end_seconds / .text attributes
	(Pydantic SubtitleCue) or dicts with the same keys.
	"""
	rows: list[str] = []
	out_cues: list[dict] = []
	index = 1
	for cue in cues:
	start = float(getattr(cue, "start_seconds", None) or cue.get("start_seconds", 0))
	end = float(getattr(cue, "end_seconds", None) or cue.get("end_seconds", 0))
	text = str(getattr(cue, "text", None) or cue.get("text", ""))
	if end <= start:
	end = start + 1.0
	clean_text = text.strip()
	if not clean_text:
	continue
	rows.extend(_srt_row(index, start, end, clean_text))
	out_cues.append({"start_seconds": round(start, 3), "end_seconds": round(end, 3), "text": clean_text})
	index += 1
	if not rows:
	out_cues = [{"start_seconds": 0.0, "end_seconds": 3.0, "text": ""}]
	rows = _srt_row(1, 0.0, 3.0, "")
	path.write_text("\n".join(rows), encoding="utf-8")
	return out_cues


	def split_timed_caption(text: str, start: float, end: float) -> list[dict]:
	phrases = split_caption_text(text)
	if not phrases:
	return []

	total_duration = max(end - start, 1.2)
	max_cues = max(1, int(total_duration / 1.2))
	if len(phrases) > max_cues:
	phrases = _merge_phrases(phrases, max_cues)

	cue_duration = min(4.0, max(1.2, total_duration / len(phrases)))
	cues: list[dict] = []
	cursor = start
	for index, phrase in enumerate(phrases):
	remaining = len(phrases) - index
	max_end = end - ((remaining - 1) * 1.2)
	cue_end = min(max_end, cursor + cue_duration)
	cue_end = max(cue_end, cursor + 1.2)
	if index == len(phrases) - 1:
	cue_end = end
	cues.append(
	{
	"start_seconds": round(cursor, 3),
	"end_seconds": round(max(cue_end, cursor + 0.8), 3),
	"text": phrase,
	}
	)
	cursor = cue_end
	return cues


	def split_caption_text(text: str, max_chars: int = 42, max_words: int = 7) -> list[str]:
	clean = re.sub(r"\s+", " ", text.strip())
	if not clean:
	return []

	words = clean.split()
	if len(words) <= 1:
	return [clean[index : index + max_chars] for index in range(0, len(clean), max_chars)]

	phrases: list[str] = []
	current: list[str] = []
	for word in words:
	candidate = " ".join([*current, word]).strip()
	punctuation_break = bool(current and re.search(r"[,.!?;:]$", current[-1]))
	if current and (len(candidate) > max_chars or len(current) >= max_words or punctuation_break):
	phrases.append(" ".join(current))
	current = [word]
	else:
	current.append(word)
	if current:
	phrases.append(" ".join(current))
	return phrases


	def _merge_phrases(phrases: list[str], target_count: int) -> list[str]:
	if target_count <= 1:
	return [" ".join(phrases)]
	merged: list[str] = []
	bucket_size = len(phrases) / target_count
	for index in range(target_count):
	start = round(index * bucket_size)
	end = round((index + 1) * bucket_size)
	merged.append(" ".join(phrases[start:end]).strip())
	return [phrase for phrase in merged if phrase]


	def _srt_row(index: int, start: float, end: float, text: str) -> list[str]:
	return [
	str(index),
	f"{seconds_to_srt_time(start)} --> {seconds_to_srt_time(end)}",
	text.strip(),
	"",
	]