Commit ·
071c8d3
1
Parent(s): 65ab047
Fix: Follow exact story_reels workflow - create_captions, SRTParser, FFmpegUtils
Browse files- modules/art_reels/router.py +21 -12
modules/art_reels/router.py
CHANGED
|
@@ -189,20 +189,29 @@ async def generate_stick_figure_video(job_id: str, script: str, voice: str):
|
|
| 189 |
|
| 190 |
update_job(job_id, "processing", 30)
|
| 191 |
|
| 192 |
-
# Step 2:
|
| 193 |
-
logger.info(f"
|
| 194 |
-
captions = await asyncio.to_thread(whisper_client.transcribe, audio_path)
|
| 195 |
|
| 196 |
-
#
|
| 197 |
-
|
| 198 |
-
audio_clip = AudioFileClip(audio_path)
|
| 199 |
-
audio_duration = audio_clip.duration
|
| 200 |
-
audio_clip.close()
|
| 201 |
|
| 202 |
-
|
| 203 |
-
|
| 204 |
|
| 205 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
|
| 207 |
update_job(job_id, "processing", 45)
|
| 208 |
|
|
@@ -239,7 +248,7 @@ async def generate_stick_figure_video(job_id: str, script: str, voice: str):
|
|
| 239 |
audio_path=audio_path,
|
| 240 |
output_name=f"stick_{job_id}.mp4",
|
| 241 |
fps=30,
|
| 242 |
-
captions=
|
| 243 |
)
|
| 244 |
|
| 245 |
update_job(job_id, "processing", 95)
|
|
|
|
| 189 |
|
| 190 |
update_job(job_id, "processing", 30)
|
| 191 |
|
| 192 |
+
# Step 2: Generate captions with Whisper (exact story_reels workflow)
|
| 193 |
+
logger.info(f"Generating captions with Whisper for job {job_id}")
|
|
|
|
| 194 |
|
| 195 |
+
# WhisperClient.create_captions returns List[Caption]
|
| 196 |
+
captions = await asyncio.to_thread(whisper_client.create_captions, audio_path)
|
|
|
|
|
|
|
|
|
|
| 197 |
|
| 198 |
+
# Convert to dict format
|
| 199 |
+
captions_dict = [c.dict() for c in captions]
|
| 200 |
|
| 201 |
+
# Generate .srt content (for video subtitles)
|
| 202 |
+
srt_content = SRTParser.generate_srt_content(captions_dict)
|
| 203 |
+
srt_path = os.path.join(temp_dir, "voice.srt")
|
| 204 |
+
with open(srt_path, "w", encoding="utf-8") as f:
|
| 205 |
+
f.write(srt_content)
|
| 206 |
+
logger.info(f"Generated .srt with {len(captions)} captions")
|
| 207 |
+
|
| 208 |
+
# Get actual audio duration
|
| 209 |
+
from modules.video_creator.services.libraries.ffmpeg_utils import FFmpegUtils
|
| 210 |
+
audio_duration = FFmpegUtils.get_video_duration(audio_path)
|
| 211 |
+
|
| 212 |
+
# Step 3: Create 2-second chunks (for AI scene generation)
|
| 213 |
+
chunks = SRTParser.create_2s_chunks(captions_dict, audio_duration)
|
| 214 |
+
logger.info(f"Created {len(chunks)} x 2s chunks for job {job_id}")
|
| 215 |
|
| 216 |
update_job(job_id, "processing", 45)
|
| 217 |
|
|
|
|
| 248 |
audio_path=audio_path,
|
| 249 |
output_name=f"stick_{job_id}.mp4",
|
| 250 |
fps=30,
|
| 251 |
+
captions=captions_dict # Word-by-word captions from Whisper
|
| 252 |
)
|
| 253 |
|
| 254 |
update_job(job_id, "processing", 95)
|