Spaces:

robiul487
/

NCAkit

Sleeping

App Files Files Community

ismdrobiul489 commited on Dec 13, 2025

Commit

1f29180

1 Parent(s): 4bb5c2c

Add word-by-word captions to stick figure video like story_reels

Browse files

Files changed (2) hide show

modules/art_reels/router.py +3 -2
modules/art_reels/services/video_composer.py +113 -14

modules/art_reels/router.py CHANGED Viewed

@@ -217,13 +217,14 @@ async def generate_stick_figure_video(job_id: str, script: str, voice: str):
         update_job(job_id, "processing", 80)
-        # Step 6: Compose video with audio
         logger.info(f"Composing video with audio for job {job_id}")
         video_path = video_composer.compose_video(
             frame_paths=frame_paths,
             audio_path=audio_path,
             output_name=f"stick_{job_id}.mp4",
-            fps=30
         )
         update_job(job_id, "processing", 95)

         update_job(job_id, "processing", 80)
+        # Step 6: Compose video with audio and captions
         logger.info(f"Composing video with audio for job {job_id}")
         video_path = video_composer.compose_video(
             frame_paths=frame_paths,
             audio_path=audio_path,
             output_name=f"stick_{job_id}.mp4",
+            fps=30,
+            captions=captions  # Word-by-word captions from Whisper
         )
         update_job(job_id, "processing", 95)

modules/art_reels/services/video_composer.py CHANGED Viewed

@@ -1,43 +1,79 @@
 """
-Video Composer - Combines frames into final video
-Uses MoviePy for video rendering
 """
 import logging
 import os
 import uuid
-from typing import List, Optional
-from moviepy.editor import ImageSequenceClip, AudioFileClip, CompositeAudioClip
 logger = logging.getLogger(__name__)
 class VideoComposer:
     """
-    Combines image frames into final video.
     Features:
     - Frame sequence to video
     - Audio overlay
     - Background music
     """
     # Video settings
     FPS = 30
     def __init__(self, output_dir: str = "videos"):
         self.output_dir = output_dir
         os.makedirs(output_dir, exist_ok=True)
     def compose_video(
         self,
         frame_paths: List[str],
         audio_path: Optional[str] = None,
         music_path: Optional[str] = None,
         output_name: Optional[str] = None,
-        fps: int = None
     ) -> str:
         """
-        Compose video from frames.
         Args:
             frame_paths: List of frame image paths
@@ -45,6 +81,7 @@ class VideoComposer:
             music_path: Optional background music path
             output_name: Custom output filename
             fps: Frames per second
         Returns:
             Path to output video
@@ -57,7 +94,11 @@ class VideoComposer:
             logger.info(f"Composing video from {len(frame_paths)} frames")
             # Create video clip from frames
-            clip = ImageSequenceClip(frame_paths, fps=fps)
             # Add audio if provided
             audio_clips = []
@@ -69,10 +110,10 @@ class VideoComposer:
             if music_path and os.path.exists(music_path):
                 music_audio = AudioFileClip(music_path)
                 # Loop music if needed
-                if music_audio.duration < clip.duration:
-                    music_audio = music_audio.loop(duration=clip.duration)
                 else:
-                    music_audio = music_audio.subclip(0, clip.duration)
                 # Lower volume for background
                 music_audio = music_audio.volumex(0.3)
                 audio_clips.append(music_audio)
@@ -82,10 +123,10 @@ class VideoComposer:
                     final_audio = CompositeAudioClip(audio_clips)
                 else:
                     final_audio = audio_clips[0]
-                clip = clip.set_audio(final_audio)
             # Write video
-            clip.write_videofile(
                 output_path,
                 codec='libx264',
                 audio_codec='aac',
@@ -95,7 +136,7 @@ class VideoComposer:
             )
             # Cleanup
-            clip.close()
             for ac in audio_clips:
                 ac.close()
@@ -106,6 +147,64 @@ class VideoComposer:
             logger.error(f"Error composing video: {e}")
             raise
     def cleanup_frames(self, frame_paths: List[str]):
         """Delete temporary frame files"""
         for path in frame_paths:

 """
+Video Composer - Combines frames into final video with captions
+Uses MoviePy for video rendering with word-by-word captions like Story Reels
 """
 import logging
 import os
 import uuid
+from pathlib import Path
+from typing import List, Optional, Dict
+from moviepy.editor import (
+    ImageSequenceClip,
+    AudioFileClip,
+    CompositeAudioClip,
+    CompositeVideoClip,
+    TextClip
+)
 logger = logging.getLogger(__name__)
 class VideoComposer:
     """
+    Combines image frames into final video with professional captions.
     Features:
     - Frame sequence to video
     - Audio overlay
     - Background music
+    - Word-by-word captions (like Story Reels)
     """
     # Video settings
     FPS = 30
+    TARGET_WIDTH = 1080
+    TARGET_HEIGHT = 1920
+    # Caption settings (matching Story Reels)
+    CAPTION_FONT_SIZE = 72
+    CAPTION_COLOR = 'white'
+    CAPTION_STROKE_COLOR = 'black'
+    CAPTION_STROKE_WIDTH = 4
+    CAPTION_Y_POS = 0.75  # 75% down
     def __init__(self, output_dir: str = "videos"):
         self.output_dir = output_dir
         os.makedirs(output_dir, exist_ok=True)
+    def _find_font(self) -> str:
+        """Find a suitable font for captions"""
+        # Try common font paths
+        font_paths = [
+            "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
+            "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
+            "/usr/share/fonts/TTF/DejaVuSans-Bold.ttf",
+            "C:/Windows/Fonts/arial.ttf",
+            "C:/Windows/Fonts/arialbd.ttf",
+        ]
+        for path in font_paths:
+            if os.path.exists(path):
+                return path
+        # Fallback
+        return "DejaVu-Sans-Bold"
     def compose_video(
         self,
         frame_paths: List[str],
         audio_path: Optional[str] = None,
         music_path: Optional[str] = None,
         output_name: Optional[str] = None,
+        fps: int = None,
+        captions: Optional[List[Dict]] = None
     ) -> str:
         """
+        Compose video from frames with optional captions.
         Args:
             frame_paths: List of frame image paths
             music_path: Optional background music path
             output_name: Custom output filename
             fps: Frames per second
+            captions: Optional list of captions [{text, startMs, endMs}]
         Returns:
             Path to output video
             logger.info(f"Composing video from {len(frame_paths)} frames")
             # Create video clip from frames
+            base_clip = ImageSequenceClip(frame_paths, fps=fps)
+            # If captions provided, add them
+            if captions:
+                base_clip = self._add_captions(base_clip, captions)
             # Add audio if provided
             audio_clips = []
             if music_path and os.path.exists(music_path):
                 music_audio = AudioFileClip(music_path)
                 # Loop music if needed
+                if music_audio.duration < base_clip.duration:
+                    music_audio = music_audio.loop(duration=base_clip.duration)
                 else:
+                    music_audio = music_audio.subclip(0, base_clip.duration)
                 # Lower volume for background
                 music_audio = music_audio.volumex(0.3)
                 audio_clips.append(music_audio)
                     final_audio = CompositeAudioClip(audio_clips)
                 else:
                     final_audio = audio_clips[0]
+                base_clip = base_clip.set_audio(final_audio)
             # Write video
+            base_clip.write_videofile(
                 output_path,
                 codec='libx264',
                 audio_codec='aac',
             )
             # Cleanup
+            base_clip.close()
             for ac in audio_clips:
                 ac.close()
             logger.error(f"Error composing video: {e}")
             raise
+    def _add_captions(
+        self,
+        video_clip,
+        captions: List[Dict]
+    ):
+        """
+        Add word-by-word captions to video.
+        Args:
+            video_clip: Base video clip
+            captions: List of [{text, startMs, endMs}]
+        Returns:
+            CompositeVideoClip with captions
+        """
+        font_name = self._find_font()
+        caption_clips = []
+        y_pos = self.TARGET_HEIGHT * self.CAPTION_Y_POS
+        for cap in captions:
+            start_time = cap.get("startMs", 0) / 1000
+            end_time = cap.get("endMs", 0) / 1000
+            duration = end_time - start_time
+            text = cap.get("text", "")
+            if duration <= 0 or not text:
+                continue
+            try:
+                txt_clip = TextClip(
+                    text,
+                    fontsize=self.CAPTION_FONT_SIZE,
+                    font=font_name,
+                    color=self.CAPTION_COLOR,
+                    stroke_color=self.CAPTION_STROKE_COLOR,
+                    stroke_width=self.CAPTION_STROKE_WIDTH,
+                    method='caption',
+                    size=(self.TARGET_WIDTH - 100, None),
+                    align='center'
+                )
+                # Position at bottom center
+                txt_clip = txt_clip.set_position(('center', y_pos))
+                txt_clip = txt_clip.set_start(start_time)
+                txt_clip = txt_clip.set_duration(duration)
+                caption_clips.append(txt_clip)
+            except Exception as e:
+                logger.warning(f"Failed to create caption: {e}")
+                continue
+        if caption_clips:
+            logger.info(f"Added {len(caption_clips)} caption clips")
+            return CompositeVideoClip([video_clip] + caption_clips)
+        return video_clip
     def cleanup_frames(self, frame_paths: List[str]):
         """Delete temporary frame files"""
         for path in frame_paths: