NCAkit / modules /video_creator /services /libraries /video_composer.py
ismdrobiul489's picture
perf: Use ultrafast preset for video export - 3-5x faster
af49b9c
import logging
from pathlib import Path
from typing import List, Dict
from moviepy.editor import (
VideoFileClip,
AudioFileClip,
CompositeVideoClip,
CompositeAudioClip,
TextClip,
concatenate_videoclips,
concatenate_audioclips
)
from ...schemas import Caption, Scene, CaptionPositionEnum, MusicVolumeEnum
from .ffmpeg_utils import FFmpegUtils
logger = logging.getLogger(__name__)
class VideoComposer:
"""Video composition using MoviePy (replaces Remotion)"""
@staticmethod
def render(
scenes: List[Dict],
music_path: str,
output_path: Path,
orientation: str = "portrait",
caption_position: str = "bottom",
caption_bg_color: str = "blue",
music_volume: str = "high",
padding_back: int = 0
):
"""
Render final video with scenes, captions, and music
Args:
scenes: List of scene dicts with 'video', 'audio', 'captions'
music_path: Path to background music file
output_path: Where to save the final video
orientation: 'portrait' or 'landscape'
caption_position: 'top', 'center', or 'bottom'
caption_bg_color: Background color for captions
music_volume: 'low', 'medium', 'high', or 'muted'
padding_back: Additional padding at end in milliseconds
"""
logger.info(f"Rendering video with {len(scenes)} scenes")
# Set dimensions based on orientation
if orientation == "portrait":
width, height = 1080, 1920
else:
width, height = 1920, 1080
# Process each scene
video_clips = []
total_duration = 0
for i, scene in enumerate(scenes):
logger.debug(f"Processing scene {i + 1}/{len(scenes)}")
# Load narration audio
audio_clip = AudioFileClip(scene["audio"]["url"])
scene_duration = scene["audio"]["duration"]
# Load video clip(s)
video_input = scene["video"]
if isinstance(video_input, list):
# Concatenate multiple clips
clips = []
for item in video_input:
try:
# Handle both string paths (legacy) and dicts (new smart segmentation)
if isinstance(item, dict):
path = item["path"]
target_duration = item["duration"]
start_time = item.get("start_time", 0)
else:
path = item
target_duration = None
start_time = 0
# FIX 1: Load video WITHOUT audio to prevent stream conflicts
clip = VideoFileClip(path, audio=False)
# Verify actual duration using ffprobe
actual_duration = FFmpegUtils.get_video_duration(Path(path))
if actual_duration > 0:
# If we have a start_time, we are taking a subclip
if start_time > 0:
# Ensure we don't go past the end
end_time = min(start_time + target_duration, actual_duration)
# If the segment is completely out of bounds (shouldn't happen with good logic), fix it
if start_time >= actual_duration:
start_time = 0
end_time = min(target_duration, actual_duration)
clip = clip.subclip(start_time, end_time)
# If the subclip is shorter than target (because we hit end of file),
# we might need to loop or extend?
# The calling logic should ensure 'start_time + target_duration <= actual_duration'
# if possible. If not, we loop the result.
if clip.duration < target_duration:
clip = clip.loop(duration=target_duration)
else:
clip = clip.set_duration(target_duration)
# Standard logic (start from 0)
elif target_duration and actual_duration < target_duration:
clip = clip.loop(duration=target_duration)
elif target_duration:
clip = clip.set_duration(target_duration)
elif abs(clip.duration - actual_duration) > 0.5:
clip = clip.set_duration(actual_duration)
# Resize to target dimensions immediately
clip = VideoComposer._resize_and_crop(clip, width, height)
clips.append(clip)
except Exception as e:
logger.warning(f"Failed to load video clip {item}: {e}")
if not clips:
raise Exception("No valid video clips found for scene")
# FIX 2: Use method="chain" for better stability with identically sized clips
video_clip = concatenate_videoclips(clips, method="chain")
else:
# FIX 1 (Repeated): Load without audio
video_clip = VideoFileClip(video_input, audio=False)
# Verify actual duration using ffprobe
actual_duration = FFmpegUtils.get_video_duration(Path(video_input))
if actual_duration > 0:
if abs(video_clip.duration - actual_duration) > 0.5:
video_clip = video_clip.set_duration(actual_duration)
video_clip = VideoComposer._resize_and_crop(video_clip, width, height)
# Set duration to match audio
# Loop video if it's shorter than audio to prevent black screen
if video_clip.duration < scene_duration:
# If gap is small (< 0.5s), freeze the last frame to fill it.
# This prevents black frames/flicker at the end of scene.
gap = scene_duration - video_clip.duration
if gap < 0.5:
logger.debug(f"Filling small gap of {gap:.3f}s by freezing last frame")
# Create a freeze frame of the last instant
last_frame = video_clip.to_ImageClip(t=video_clip.duration - 0.01).set_duration(gap)
video_clip = concatenate_videoclips([video_clip, last_frame], method="chain")
else:
# Gap is large, loop the video
video_clip = video_clip.loop(duration=scene_duration)
else:
# Video is longer, just trim it
video_clip = video_clip.set_duration(scene_duration)
video_clip = video_clip.set_audio(audio_clip)
# Add captions
if scene.get("captions"):
# Fix for "bad blue color": default to transparent if blue is passed
# or if the user wants the old default.
# Ideally, we use a semi-transparent box, but MoviePy TextClip
# background support is limited. Transparent with stroke is safer.
if caption_bg_color == "blue":
caption_bg_color = "transparent" # Explicit string instead of None
video_clip = VideoComposer._add_captions(
video_clip,
scene["captions"],
width,
height,
caption_position,
caption_bg_color,
total_duration
)
video_clips.append(video_clip)
total_duration += scene_duration
# Add padding if specified
if padding_back > 0:
padding_seconds = padding_back / 1000
total_duration += padding_seconds
# Extend last clip
if video_clips:
last_clip = video_clips[-1]
# Loop the last clip for padding too
video_clips[-1] = last_clip.loop(duration=last_clip.duration + padding_seconds)
# Concatenate all scenes
logger.debug("Concatenating video clips")
# Use chain here too
final_video = concatenate_videoclips(video_clips, method="chain")
# Add background music
if music_path and music_volume != "muted":
logger.debug("Adding background music")
final_video = VideoComposer._add_background_music(
final_video,
music_path,
music_volume
)
# Write final video
logger.info(f"Writing video to {output_path}")
final_video.write_videofile(
str(output_path),
codec="libx264",
audio_codec="aac",
fps=30,
preset="ultrafast", # Fast export (3-5x faster, ~20-30% larger file)
threads=2,
logger=None # Suppress moviepy progress bar
)
# Cleanup
final_video.close()
for clip in video_clips:
clip.close()
logger.info(f"Video rendered successfully: {output_path}")
@staticmethod
def _resize_and_crop(clip: VideoFileClip, target_width: int, target_height: int) -> VideoFileClip:
"""Resize and crop video to match target dimensions"""
clip_width, clip_height = clip.size
clip_aspect = clip_width / clip_height
target_aspect = target_width / target_height
if clip_aspect > target_aspect:
# Clip is wider, crop width
new_height = target_height
new_width = int(target_height * clip_aspect)
resized = clip.resize(height=new_height)
x_center = new_width / 2
x1 = x_center - target_width / 2
cropped = resized.crop(x1=x1, x2=x1 + target_width)
else:
# Clip is taller, crop height
new_width = target_width
new_height = int(target_width / clip_aspect)
resized = clip.resize(width=new_width)
y_center = new_height / 2
y1 = y_center - target_height / 2
cropped = resized.crop(y1=y1, y2=y1 + target_height)
return cropped
@staticmethod
def _add_captions(
video_clip: VideoFileClip,
captions: List[Dict],
width: int,
height: int,
position: str,
bg_color: str,
offset_seconds: float
) -> CompositeVideoClip:
"""Add captions to video clip"""
caption_clips = []
# Determine vertical position
if position == "top":
y_pos = height * 0.15
elif position == "center":
y_pos = height * 0.5
else: # bottom
y_pos = height * 0.70 # Changed from 0.85 to 0.70 as requested
for caption in captions:
start_time = caption["startMs"] / 1000
end_time = caption["endMs"] / 1000
duration = end_time - start_time
if duration <= 0:
continue
# Create text clip
# Use transparent background by default if None
# Add strong stroke for visibility
final_bg_color = bg_color if bg_color else "transparent"
try:
# Try caption method with fixed height to avoid NoneType error
# Allocating 20% of height for caption box
# Try local font first, then system fonts
# Priority: TheBoldFont (static/fonts) > DejaVu > Liberation
thebold_path = Path(__file__).parent.parent.parent.parent.parent / "static" / "fonts" / "THEBOLDFONT-FREEVERSION.ttf"
dejavu_path = Path("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf")
liberation_path = Path("/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf")
if thebold_path.exists():
font_name = str(thebold_path)
elif dejavu_path.exists():
font_name = str(dejavu_path)
elif liberation_path.exists():
font_name = str(liberation_path)
else:
font_name = "DejaVu-Sans-Bold" # Fallback name
txt_clip = TextClip(
caption["text"],
fontsize=70,
color="white",
font=font_name,
stroke_color="black",
stroke_width=1.5,
bg_color=final_bg_color,
method="caption",
size=(int(width * 0.9), int(height * 0.2)),
align="center"
)
except Exception as e:
logger.warning(f"TextClip caption method failed: {e}. Falling back to label method.")
try:
# Fallback to label method (no wrapping, but works)
txt_clip = TextClip(
caption["text"],
fontsize=60,
color="white",
font=font_name,
stroke_color="black",
stroke_width=2,
bg_color=final_bg_color,
method="label"
)
except Exception as e2:
logger.error(f"TextClip label also failed: {e2}. Skipping caption.")
continue
txt_clip = txt_clip.set_duration(duration)
txt_clip = txt_clip.set_start(start_time)
txt_clip = txt_clip.set_position(("center", y_pos))
caption_clips.append(txt_clip)
if caption_clips:
return CompositeVideoClip([video_clip] + caption_clips)
return video_clip
@staticmethod
def _add_background_music(
video_clip: VideoFileClip,
music_path: str,
volume_level: str
) -> VideoFileClip:
"""Add background music to video"""
# Load music
music = AudioFileClip(music_path)
# Loop music to match video duration
if music.duration < video_clip.duration:
loops_needed = int(video_clip.duration / music.duration) + 1
music = concatenate_audioclips([music] * loops_needed)
# Trim to video duration
music = music.subclip(0, video_clip.duration)
# Set volume based on level
volume_multipliers = {
"low": 0.2,
"medium": 0.4,
"high": 0.6,
"muted": 0.0
}
volume = volume_multipliers.get(volume_level, 0.6)
music = music.volumex(volume)
# Mix with narration audio
if video_clip.audio:
final_audio = CompositeAudioClip([video_clip.audio, music])
return video_clip.set_audio(final_audio)
return video_clip.set_audio(music)