import logging from pathlib import Path from typing import List, Dict from moviepy.editor import ( VideoFileClip, AudioFileClip, CompositeVideoClip, CompositeAudioClip, TextClip, concatenate_videoclips, concatenate_audioclips ) from ...schemas import Caption, Scene, CaptionPositionEnum, MusicVolumeEnum from .ffmpeg_utils import FFmpegUtils logger = logging.getLogger(__name__) class VideoComposer: """Video composition using MoviePy (replaces Remotion)""" @staticmethod def render( scenes: List[Dict], music_path: str, output_path: Path, orientation: str = "portrait", caption_position: str = "bottom", caption_bg_color: str = "blue", music_volume: str = "high", padding_back: int = 0 ): """ Render final video with scenes, captions, and music Args: scenes: List of scene dicts with 'video', 'audio', 'captions' music_path: Path to background music file output_path: Where to save the final video orientation: 'portrait' or 'landscape' caption_position: 'top', 'center', or 'bottom' caption_bg_color: Background color for captions music_volume: 'low', 'medium', 'high', or 'muted' padding_back: Additional padding at end in milliseconds """ logger.info(f"Rendering video with {len(scenes)} scenes") # Set dimensions based on orientation if orientation == "portrait": width, height = 1080, 1920 else: width, height = 1920, 1080 # Process each scene video_clips = [] total_duration = 0 for i, scene in enumerate(scenes): logger.debug(f"Processing scene {i + 1}/{len(scenes)}") # Load narration audio audio_clip = AudioFileClip(scene["audio"]["url"]) scene_duration = scene["audio"]["duration"] # Load video clip(s) video_input = scene["video"] if isinstance(video_input, list): # Concatenate multiple clips clips = [] for item in video_input: try: # Handle both string paths (legacy) and dicts (new smart segmentation) if isinstance(item, dict): path = item["path"] target_duration = item["duration"] start_time = item.get("start_time", 0) else: path = item target_duration = None start_time = 0 # FIX 1: Load video WITHOUT audio to prevent stream conflicts clip = VideoFileClip(path, audio=False) # Verify actual duration using ffprobe actual_duration = FFmpegUtils.get_video_duration(Path(path)) if actual_duration > 0: # If we have a start_time, we are taking a subclip if start_time > 0: # Ensure we don't go past the end end_time = min(start_time + target_duration, actual_duration) # If the segment is completely out of bounds (shouldn't happen with good logic), fix it if start_time >= actual_duration: start_time = 0 end_time = min(target_duration, actual_duration) clip = clip.subclip(start_time, end_time) # If the subclip is shorter than target (because we hit end of file), # we might need to loop or extend? # The calling logic should ensure 'start_time + target_duration <= actual_duration' # if possible. If not, we loop the result. if clip.duration < target_duration: clip = clip.loop(duration=target_duration) else: clip = clip.set_duration(target_duration) # Standard logic (start from 0) elif target_duration and actual_duration < target_duration: clip = clip.loop(duration=target_duration) elif target_duration: clip = clip.set_duration(target_duration) elif abs(clip.duration - actual_duration) > 0.5: clip = clip.set_duration(actual_duration) # Resize to target dimensions immediately clip = VideoComposer._resize_and_crop(clip, width, height) clips.append(clip) except Exception as e: logger.warning(f"Failed to load video clip {item}: {e}") if not clips: raise Exception("No valid video clips found for scene") # FIX 2: Use method="chain" for better stability with identically sized clips video_clip = concatenate_videoclips(clips, method="chain") else: # FIX 1 (Repeated): Load without audio video_clip = VideoFileClip(video_input, audio=False) # Verify actual duration using ffprobe actual_duration = FFmpegUtils.get_video_duration(Path(video_input)) if actual_duration > 0: if abs(video_clip.duration - actual_duration) > 0.5: video_clip = video_clip.set_duration(actual_duration) video_clip = VideoComposer._resize_and_crop(video_clip, width, height) # Set duration to match audio # Loop video if it's shorter than audio to prevent black screen if video_clip.duration < scene_duration: # If gap is small (< 0.5s), freeze the last frame to fill it. # This prevents black frames/flicker at the end of scene. gap = scene_duration - video_clip.duration if gap < 0.5: logger.debug(f"Filling small gap of {gap:.3f}s by freezing last frame") # Create a freeze frame of the last instant last_frame = video_clip.to_ImageClip(t=video_clip.duration - 0.01).set_duration(gap) video_clip = concatenate_videoclips([video_clip, last_frame], method="chain") else: # Gap is large, loop the video video_clip = video_clip.loop(duration=scene_duration) else: # Video is longer, just trim it video_clip = video_clip.set_duration(scene_duration) video_clip = video_clip.set_audio(audio_clip) # Add captions if scene.get("captions"): # Fix for "bad blue color": default to transparent if blue is passed # or if the user wants the old default. # Ideally, we use a semi-transparent box, but MoviePy TextClip # background support is limited. Transparent with stroke is safer. if caption_bg_color == "blue": caption_bg_color = "transparent" # Explicit string instead of None video_clip = VideoComposer._add_captions( video_clip, scene["captions"], width, height, caption_position, caption_bg_color, total_duration ) video_clips.append(video_clip) total_duration += scene_duration # Add padding if specified if padding_back > 0: padding_seconds = padding_back / 1000 total_duration += padding_seconds # Extend last clip if video_clips: last_clip = video_clips[-1] # Loop the last clip for padding too video_clips[-1] = last_clip.loop(duration=last_clip.duration + padding_seconds) # Concatenate all scenes logger.debug("Concatenating video clips") # Use chain here too final_video = concatenate_videoclips(video_clips, method="chain") # Add background music if music_path and music_volume != "muted": logger.debug("Adding background music") final_video = VideoComposer._add_background_music( final_video, music_path, music_volume ) # Write final video logger.info(f"Writing video to {output_path}") final_video.write_videofile( str(output_path), codec="libx264", audio_codec="aac", fps=30, preset="ultrafast", # Fast export (3-5x faster, ~20-30% larger file) threads=2, logger=None # Suppress moviepy progress bar ) # Cleanup final_video.close() for clip in video_clips: clip.close() logger.info(f"Video rendered successfully: {output_path}") @staticmethod def _resize_and_crop(clip: VideoFileClip, target_width: int, target_height: int) -> VideoFileClip: """Resize and crop video to match target dimensions""" clip_width, clip_height = clip.size clip_aspect = clip_width / clip_height target_aspect = target_width / target_height if clip_aspect > target_aspect: # Clip is wider, crop width new_height = target_height new_width = int(target_height * clip_aspect) resized = clip.resize(height=new_height) x_center = new_width / 2 x1 = x_center - target_width / 2 cropped = resized.crop(x1=x1, x2=x1 + target_width) else: # Clip is taller, crop height new_width = target_width new_height = int(target_width / clip_aspect) resized = clip.resize(width=new_width) y_center = new_height / 2 y1 = y_center - target_height / 2 cropped = resized.crop(y1=y1, y2=y1 + target_height) return cropped @staticmethod def _add_captions( video_clip: VideoFileClip, captions: List[Dict], width: int, height: int, position: str, bg_color: str, offset_seconds: float ) -> CompositeVideoClip: """Add captions to video clip""" caption_clips = [] # Determine vertical position if position == "top": y_pos = height * 0.15 elif position == "center": y_pos = height * 0.5 else: # bottom y_pos = height * 0.70 # Changed from 0.85 to 0.70 as requested for caption in captions: start_time = caption["startMs"] / 1000 end_time = caption["endMs"] / 1000 duration = end_time - start_time if duration <= 0: continue # Create text clip # Use transparent background by default if None # Add strong stroke for visibility final_bg_color = bg_color if bg_color else "transparent" try: # Try caption method with fixed height to avoid NoneType error # Allocating 20% of height for caption box # Try local font first, then system fonts # Priority: TheBoldFont (static/fonts) > DejaVu > Liberation thebold_path = Path(__file__).parent.parent.parent.parent.parent / "static" / "fonts" / "THEBOLDFONT-FREEVERSION.ttf" dejavu_path = Path("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf") liberation_path = Path("/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf") if thebold_path.exists(): font_name = str(thebold_path) elif dejavu_path.exists(): font_name = str(dejavu_path) elif liberation_path.exists(): font_name = str(liberation_path) else: font_name = "DejaVu-Sans-Bold" # Fallback name txt_clip = TextClip( caption["text"], fontsize=70, color="white", font=font_name, stroke_color="black", stroke_width=1.5, bg_color=final_bg_color, method="caption", size=(int(width * 0.9), int(height * 0.2)), align="center" ) except Exception as e: logger.warning(f"TextClip caption method failed: {e}. Falling back to label method.") try: # Fallback to label method (no wrapping, but works) txt_clip = TextClip( caption["text"], fontsize=60, color="white", font=font_name, stroke_color="black", stroke_width=2, bg_color=final_bg_color, method="label" ) except Exception as e2: logger.error(f"TextClip label also failed: {e2}. Skipping caption.") continue txt_clip = txt_clip.set_duration(duration) txt_clip = txt_clip.set_start(start_time) txt_clip = txt_clip.set_position(("center", y_pos)) caption_clips.append(txt_clip) if caption_clips: return CompositeVideoClip([video_clip] + caption_clips) return video_clip @staticmethod def _add_background_music( video_clip: VideoFileClip, music_path: str, volume_level: str ) -> VideoFileClip: """Add background music to video""" # Load music music = AudioFileClip(music_path) # Loop music to match video duration if music.duration < video_clip.duration: loops_needed = int(video_clip.duration / music.duration) + 1 music = concatenate_audioclips([music] * loops_needed) # Trim to video duration music = music.subclip(0, video_clip.duration) # Set volume based on level volume_multipliers = { "low": 0.2, "medium": 0.4, "high": 0.6, "muted": 0.0 } volume = volume_multipliers.get(volume_level, 0.6) music = music.volumex(volume) # Mix with narration audio if video_clip.audio: final_audio = CompositeAudioClip([video_clip.audio, music]) return video_clip.set_audio(final_audio) return video_clip.set_audio(music)