ismdrobiul489 commited on
Commit
1f29180
·
1 Parent(s): 4bb5c2c

Add word-by-word captions to stick figure video like story_reels

Browse files
modules/art_reels/router.py CHANGED
@@ -217,13 +217,14 @@ async def generate_stick_figure_video(job_id: str, script: str, voice: str):
217
 
218
  update_job(job_id, "processing", 80)
219
 
220
- # Step 6: Compose video with audio
221
  logger.info(f"Composing video with audio for job {job_id}")
222
  video_path = video_composer.compose_video(
223
  frame_paths=frame_paths,
224
  audio_path=audio_path,
225
  output_name=f"stick_{job_id}.mp4",
226
- fps=30
 
227
  )
228
 
229
  update_job(job_id, "processing", 95)
 
217
 
218
  update_job(job_id, "processing", 80)
219
 
220
+ # Step 6: Compose video with audio and captions
221
  logger.info(f"Composing video with audio for job {job_id}")
222
  video_path = video_composer.compose_video(
223
  frame_paths=frame_paths,
224
  audio_path=audio_path,
225
  output_name=f"stick_{job_id}.mp4",
226
+ fps=30,
227
+ captions=captions # Word-by-word captions from Whisper
228
  )
229
 
230
  update_job(job_id, "processing", 95)
modules/art_reels/services/video_composer.py CHANGED
@@ -1,43 +1,79 @@
1
  """
2
- Video Composer - Combines frames into final video
3
- Uses MoviePy for video rendering
4
  """
5
  import logging
6
  import os
7
  import uuid
8
- from typing import List, Optional
9
- from moviepy.editor import ImageSequenceClip, AudioFileClip, CompositeAudioClip
 
 
 
 
 
 
 
10
 
11
  logger = logging.getLogger(__name__)
12
 
13
 
14
  class VideoComposer:
15
  """
16
- Combines image frames into final video.
17
 
18
  Features:
19
  - Frame sequence to video
20
  - Audio overlay
21
  - Background music
 
22
  """
23
 
24
  # Video settings
25
  FPS = 30
 
 
 
 
 
 
 
 
 
26
 
27
  def __init__(self, output_dir: str = "videos"):
28
  self.output_dir = output_dir
29
  os.makedirs(output_dir, exist_ok=True)
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  def compose_video(
32
  self,
33
  frame_paths: List[str],
34
  audio_path: Optional[str] = None,
35
  music_path: Optional[str] = None,
36
  output_name: Optional[str] = None,
37
- fps: int = None
 
38
  ) -> str:
39
  """
40
- Compose video from frames.
41
 
42
  Args:
43
  frame_paths: List of frame image paths
@@ -45,6 +81,7 @@ class VideoComposer:
45
  music_path: Optional background music path
46
  output_name: Custom output filename
47
  fps: Frames per second
 
48
 
49
  Returns:
50
  Path to output video
@@ -57,7 +94,11 @@ class VideoComposer:
57
  logger.info(f"Composing video from {len(frame_paths)} frames")
58
 
59
  # Create video clip from frames
60
- clip = ImageSequenceClip(frame_paths, fps=fps)
 
 
 
 
61
 
62
  # Add audio if provided
63
  audio_clips = []
@@ -69,10 +110,10 @@ class VideoComposer:
69
  if music_path and os.path.exists(music_path):
70
  music_audio = AudioFileClip(music_path)
71
  # Loop music if needed
72
- if music_audio.duration < clip.duration:
73
- music_audio = music_audio.loop(duration=clip.duration)
74
  else:
75
- music_audio = music_audio.subclip(0, clip.duration)
76
  # Lower volume for background
77
  music_audio = music_audio.volumex(0.3)
78
  audio_clips.append(music_audio)
@@ -82,10 +123,10 @@ class VideoComposer:
82
  final_audio = CompositeAudioClip(audio_clips)
83
  else:
84
  final_audio = audio_clips[0]
85
- clip = clip.set_audio(final_audio)
86
 
87
  # Write video
88
- clip.write_videofile(
89
  output_path,
90
  codec='libx264',
91
  audio_codec='aac',
@@ -95,7 +136,7 @@ class VideoComposer:
95
  )
96
 
97
  # Cleanup
98
- clip.close()
99
  for ac in audio_clips:
100
  ac.close()
101
 
@@ -106,6 +147,64 @@ class VideoComposer:
106
  logger.error(f"Error composing video: {e}")
107
  raise
108
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  def cleanup_frames(self, frame_paths: List[str]):
110
  """Delete temporary frame files"""
111
  for path in frame_paths:
 
1
  """
2
+ Video Composer - Combines frames into final video with captions
3
+ Uses MoviePy for video rendering with word-by-word captions like Story Reels
4
  """
5
  import logging
6
  import os
7
  import uuid
8
+ from pathlib import Path
9
+ from typing import List, Optional, Dict
10
+ from moviepy.editor import (
11
+ ImageSequenceClip,
12
+ AudioFileClip,
13
+ CompositeAudioClip,
14
+ CompositeVideoClip,
15
+ TextClip
16
+ )
17
 
18
  logger = logging.getLogger(__name__)
19
 
20
 
21
  class VideoComposer:
22
  """
23
+ Combines image frames into final video with professional captions.
24
 
25
  Features:
26
  - Frame sequence to video
27
  - Audio overlay
28
  - Background music
29
+ - Word-by-word captions (like Story Reels)
30
  """
31
 
32
  # Video settings
33
  FPS = 30
34
+ TARGET_WIDTH = 1080
35
+ TARGET_HEIGHT = 1920
36
+
37
+ # Caption settings (matching Story Reels)
38
+ CAPTION_FONT_SIZE = 72
39
+ CAPTION_COLOR = 'white'
40
+ CAPTION_STROKE_COLOR = 'black'
41
+ CAPTION_STROKE_WIDTH = 4
42
+ CAPTION_Y_POS = 0.75 # 75% down
43
 
44
  def __init__(self, output_dir: str = "videos"):
45
  self.output_dir = output_dir
46
  os.makedirs(output_dir, exist_ok=True)
47
 
48
+ def _find_font(self) -> str:
49
+ """Find a suitable font for captions"""
50
+ # Try common font paths
51
+ font_paths = [
52
+ "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
53
+ "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
54
+ "/usr/share/fonts/TTF/DejaVuSans-Bold.ttf",
55
+ "C:/Windows/Fonts/arial.ttf",
56
+ "C:/Windows/Fonts/arialbd.ttf",
57
+ ]
58
+
59
+ for path in font_paths:
60
+ if os.path.exists(path):
61
+ return path
62
+
63
+ # Fallback
64
+ return "DejaVu-Sans-Bold"
65
+
66
  def compose_video(
67
  self,
68
  frame_paths: List[str],
69
  audio_path: Optional[str] = None,
70
  music_path: Optional[str] = None,
71
  output_name: Optional[str] = None,
72
+ fps: int = None,
73
+ captions: Optional[List[Dict]] = None
74
  ) -> str:
75
  """
76
+ Compose video from frames with optional captions.
77
 
78
  Args:
79
  frame_paths: List of frame image paths
 
81
  music_path: Optional background music path
82
  output_name: Custom output filename
83
  fps: Frames per second
84
+ captions: Optional list of captions [{text, startMs, endMs}]
85
 
86
  Returns:
87
  Path to output video
 
94
  logger.info(f"Composing video from {len(frame_paths)} frames")
95
 
96
  # Create video clip from frames
97
+ base_clip = ImageSequenceClip(frame_paths, fps=fps)
98
+
99
+ # If captions provided, add them
100
+ if captions:
101
+ base_clip = self._add_captions(base_clip, captions)
102
 
103
  # Add audio if provided
104
  audio_clips = []
 
110
  if music_path and os.path.exists(music_path):
111
  music_audio = AudioFileClip(music_path)
112
  # Loop music if needed
113
+ if music_audio.duration < base_clip.duration:
114
+ music_audio = music_audio.loop(duration=base_clip.duration)
115
  else:
116
+ music_audio = music_audio.subclip(0, base_clip.duration)
117
  # Lower volume for background
118
  music_audio = music_audio.volumex(0.3)
119
  audio_clips.append(music_audio)
 
123
  final_audio = CompositeAudioClip(audio_clips)
124
  else:
125
  final_audio = audio_clips[0]
126
+ base_clip = base_clip.set_audio(final_audio)
127
 
128
  # Write video
129
+ base_clip.write_videofile(
130
  output_path,
131
  codec='libx264',
132
  audio_codec='aac',
 
136
  )
137
 
138
  # Cleanup
139
+ base_clip.close()
140
  for ac in audio_clips:
141
  ac.close()
142
 
 
147
  logger.error(f"Error composing video: {e}")
148
  raise
149
 
150
+ def _add_captions(
151
+ self,
152
+ video_clip,
153
+ captions: List[Dict]
154
+ ):
155
+ """
156
+ Add word-by-word captions to video.
157
+
158
+ Args:
159
+ video_clip: Base video clip
160
+ captions: List of [{text, startMs, endMs}]
161
+
162
+ Returns:
163
+ CompositeVideoClip with captions
164
+ """
165
+ font_name = self._find_font()
166
+ caption_clips = []
167
+ y_pos = self.TARGET_HEIGHT * self.CAPTION_Y_POS
168
+
169
+ for cap in captions:
170
+ start_time = cap.get("startMs", 0) / 1000
171
+ end_time = cap.get("endMs", 0) / 1000
172
+ duration = end_time - start_time
173
+ text = cap.get("text", "")
174
+
175
+ if duration <= 0 or not text:
176
+ continue
177
+
178
+ try:
179
+ txt_clip = TextClip(
180
+ text,
181
+ fontsize=self.CAPTION_FONT_SIZE,
182
+ font=font_name,
183
+ color=self.CAPTION_COLOR,
184
+ stroke_color=self.CAPTION_STROKE_COLOR,
185
+ stroke_width=self.CAPTION_STROKE_WIDTH,
186
+ method='caption',
187
+ size=(self.TARGET_WIDTH - 100, None),
188
+ align='center'
189
+ )
190
+
191
+ # Position at bottom center
192
+ txt_clip = txt_clip.set_position(('center', y_pos))
193
+ txt_clip = txt_clip.set_start(start_time)
194
+ txt_clip = txt_clip.set_duration(duration)
195
+
196
+ caption_clips.append(txt_clip)
197
+
198
+ except Exception as e:
199
+ logger.warning(f"Failed to create caption: {e}")
200
+ continue
201
+
202
+ if caption_clips:
203
+ logger.info(f"Added {len(caption_clips)} caption clips")
204
+ return CompositeVideoClip([video_clip] + caption_clips)
205
+
206
+ return video_clip
207
+
208
  def cleanup_frames(self, frame_paths: List[str]):
209
  """Delete temporary frame files"""
210
  for path in frame_paths: