ismdrobiul489 commited on
Commit
e3dc2da
·
1 Parent(s): ee2af1f

Upgrade Art Reels: AI stick figure with TTS, remove Bangla, use openai/gpt-oss-120b model

Browse files
modules/art_reels/router.py CHANGED
@@ -131,35 +131,102 @@ async def generate_drawing_video(job_id: str, subject: str, style: str, colors:
131
 
132
 
133
  async def generate_stick_figure_video(job_id: str, script: str, voice: str):
134
- """Background task to generate stick figure motivation video"""
135
  temp_dir = f"temp/art_{job_id}"
 
136
 
137
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  update_job(job_id, "processing", 10)
139
 
140
- # Parse script into scenes (simple keyword matching)
141
- scenes = parse_script_to_scenes(script)
 
 
 
 
 
 
 
 
 
 
142
 
143
  update_job(job_id, "processing", 30)
144
 
145
- # Generate frames
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  logger.info(f"Generating stick figure frames for job {job_id}")
147
- frame_paths = stick_figure.generate_motivation_frames(
148
  scenes=scenes,
149
- output_dir=temp_dir
 
 
150
  )
151
 
152
- update_job(job_id, "processing", 60)
153
 
154
- # Compose video
155
- logger.info(f"Composing video for job {job_id}")
156
  video_path = video_composer.compose_video(
157
  frame_paths=frame_paths,
 
158
  output_name=f"stick_{job_id}.mp4",
159
  fps=30
160
  )
161
 
162
- update_job(job_id, "processing", 90)
163
 
164
  # Cleanup
165
  video_composer.cleanup_frames(frame_paths)
@@ -167,10 +234,12 @@ async def generate_stick_figure_video(job_id: str, script: str, voice: str):
167
  shutil.rmtree(temp_dir)
168
 
169
  update_job(job_id, "ready", 100, video_url=f"/api/art/video/{job_id}")
170
- logger.info(f"Stick figure video ready: {job_id}")
171
 
172
  except Exception as e:
173
  logger.error(f"Error generating stick figure video: {e}")
 
 
174
  update_job(job_id, "failed", error=str(e))
175
 
176
 
 
131
 
132
 
133
  async def generate_stick_figure_video(job_id: str, script: str, voice: str):
134
+ """Background task to generate stick figure motivation video with TTS"""
135
  temp_dir = f"temp/art_{job_id}"
136
+ os.makedirs(temp_dir, exist_ok=True)
137
 
138
  try:
139
+ update_job(job_id, "processing", 5)
140
+ logger.info(f"Starting stick figure video with TTS for job {job_id}")
141
+
142
+ # Import TTS, Whisper, and AI Stick Figure
143
+ from ..story_reels.services.srt_parser import SRTParser
144
+ from .services.ai_stick_figure import AIStickFigure
145
+
146
+ # Get TTS client from app
147
+ import sys
148
+ app_module = sys.modules.get('app')
149
+ tts_client = getattr(app_module, 'tts_client', None) if app_module else None
150
+ whisper_client = getattr(app_module, 'whisper_client', None) if app_module else None
151
+
152
+ if not tts_client or not whisper_client:
153
+ # Fallback: try to get from story_reels module
154
+ try:
155
+ from modules.story_reels import get_clients
156
+ tts_client, whisper_client = get_clients()
157
+ except:
158
+ raise Exception("TTS and Whisper clients not available")
159
+
160
  update_job(job_id, "processing", 10)
161
 
162
+ # Step 1: Generate TTS audio from script
163
+ logger.info(f"Generating TTS audio for job {job_id}")
164
+ audio_path = os.path.join(temp_dir, "voice.mp3")
165
+ await asyncio.to_thread(
166
+ tts_client.generate,
167
+ text=script,
168
+ voice=voice,
169
+ output_path=audio_path
170
+ )
171
+
172
+ if not os.path.exists(audio_path):
173
+ raise Exception("TTS audio generation failed")
174
 
175
  update_job(job_id, "processing", 30)
176
 
177
+ # Step 2: Get timestamps with Whisper
178
+ logger.info(f"Transcribing audio for job {job_id}")
179
+ captions = await asyncio.to_thread(whisper_client.transcribe, audio_path)
180
+
181
+ # Step 3: Create 2-second chunks
182
+ from moviepy.editor import AudioFileClip
183
+ audio_clip = AudioFileClip(audio_path)
184
+ audio_duration = audio_clip.duration
185
+ audio_clip.close()
186
+
187
+ srt_parser = SRTParser()
188
+ chunks = srt_parser.create_2s_chunks(captions, audio_duration)
189
+
190
+ logger.info(f"Created {len(chunks)} 2-second chunks for job {job_id}")
191
+
192
+ update_job(job_id, "processing", 45)
193
+
194
+ # Step 4: Generate scenes with AI
195
+ ai_stick = AIStickFigure()
196
+ scenes = ai_stick.generate_scenes_with_ai(chunks)
197
+
198
+ # Calculate durations for each chunk
199
+ chunk_durations = []
200
+ for i, chunk in enumerate(chunks):
201
+ if i < len(chunks) - 1:
202
+ duration = chunks[i + 1].get("start_time", 2.0) - chunk.get("start_time", 0)
203
+ else:
204
+ duration = audio_duration - chunk.get("start_time", 0)
205
+ chunk_durations.append(max(0.5, duration))
206
+
207
+ update_job(job_id, "processing", 60)
208
+
209
+ # Step 5: Generate frames
210
  logger.info(f"Generating stick figure frames for job {job_id}")
211
+ frame_paths = ai_stick.generate_frames_from_scenes(
212
  scenes=scenes,
213
+ chunk_durations=chunk_durations,
214
+ output_dir=temp_dir,
215
+ fps=30
216
  )
217
 
218
+ update_job(job_id, "processing", 80)
219
 
220
+ # Step 6: Compose video with audio
221
+ logger.info(f"Composing video with audio for job {job_id}")
222
  video_path = video_composer.compose_video(
223
  frame_paths=frame_paths,
224
+ audio_path=audio_path,
225
  output_name=f"stick_{job_id}.mp4",
226
  fps=30
227
  )
228
 
229
+ update_job(job_id, "processing", 95)
230
 
231
  # Cleanup
232
  video_composer.cleanup_frames(frame_paths)
 
234
  shutil.rmtree(temp_dir)
235
 
236
  update_job(job_id, "ready", 100, video_url=f"/api/art/video/{job_id}")
237
+ logger.info(f"Stick figure video with TTS ready: {job_id}")
238
 
239
  except Exception as e:
240
  logger.error(f"Error generating stick figure video: {e}")
241
+ import traceback
242
+ logger.error(traceback.format_exc())
243
  update_job(job_id, "failed", error=str(e))
244
 
245
 
modules/art_reels/services/ai_stick_figure.py ADDED
@@ -0,0 +1,663 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ AI Stick Figure Generator - AI-Powered Stick Figure Animation with TTS
3
+ Uses Groq AI for scene generation, Kokoro TTS for voice, Whisper for timing
4
+ """
5
+ import logging
6
+ import os
7
+ import json
8
+ import math
9
+ from PIL import Image, ImageDraw, ImageFont
10
+ from typing import List, Tuple, Dict, Optional
11
+ from groq import Groq
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ # System prompt for AI scene generation
17
+ SCENE_GENERATION_PROMPT = """You are an AI that converts text into stick figure animation scenes.
18
+
19
+ For each 2-second chunk of narration, generate a scene description.
20
+
21
+ OUTPUT FORMAT (JSON array):
22
+ [
23
+ {
24
+ "chunk_id": 0,
25
+ "pose": "standing|walking|running|sitting|sleeping|waving|thinking|jumping|celebrating",
26
+ "props": ["crown", "money", "book", "phone", "laptop", "coffee", "bed"],
27
+ "text_overlay": null or "important quote here",
28
+ "emotion": "happy|sad|thinking|excited|tired|angry",
29
+ "action": "brief action description"
30
+ }
31
+ ]
32
+
33
+ RULES:
34
+ 1. Match the pose and props to the MEANING of the text
35
+ 2. Use text_overlay ONLY for important quotes or key phrases
36
+ 3. Be creative with emotions and actions
37
+ 4. Keep it simple - stick figures should be expressive but minimal
38
+
39
+ EXAMPLES:
40
+ - "A rich businessman" → pose: "standing", props: ["money", "suit"], emotion: "happy"
41
+ - "He woke up from sleep" → pose: "sleeping", props: ["bed"], emotion: "tired"
42
+ - "Get ready for success" → pose: "celebrating", props: [], text_overlay: "GET READY!"
43
+ - "Walk like a king" → pose: "walking", props: ["crown"], emotion: "happy"
44
+ """
45
+
46
+
47
+ class AIStickFigure:
48
+ """
49
+ AI-Powered Stick Figure Animation Generator.
50
+
51
+ Pipeline:
52
+ 1. Text → TTS → Audio
53
+ 2. Audio → Whisper → 2-second chunks
54
+ 3. Chunks → Groq AI → Scene descriptions
55
+ 4. Scenes → Python Drawing → Frames
56
+ 5. Frames + Audio → Video
57
+ """
58
+
59
+ # Video dimensions (9:16 portrait)
60
+ WIDTH = 1080
61
+ HEIGHT = 1920
62
+
63
+ # Stick figure size
64
+ HEAD_RADIUS = 50
65
+ BODY_LENGTH = 150
66
+ ARM_LENGTH = 100
67
+ LEG_LENGTH = 120
68
+ LINE_WIDTH = 10
69
+
70
+ # Colors
71
+ BG_COLOR = (255, 255, 255) # White
72
+ FIGURE_COLOR = (30, 30, 30) # Near black
73
+ ACCENT_COLOR = (255, 87, 51) # Orange accent
74
+
75
+ def __init__(self, groq_api_key: str = None):
76
+ self.groq_api_key = groq_api_key or os.environ.get("GROQ_API_KEY")
77
+ if self.groq_api_key:
78
+ self.groq = Groq(api_key=self.groq_api_key)
79
+ else:
80
+ self.groq = None
81
+ logger.warning("Groq API key not found - AI scene generation disabled")
82
+
83
+ def generate_scenes_with_ai(self, chunks: List[Dict]) -> List[Dict]:
84
+ """
85
+ Use Groq AI to generate scene descriptions from text chunks.
86
+
87
+ Args:
88
+ chunks: List of {chunk_id, text} from Whisper
89
+
90
+ Returns:
91
+ List of scene descriptions with pose, props, etc.
92
+ """
93
+ if not self.groq:
94
+ # Fallback to keyword-based generation
95
+ return self._generate_scenes_keyword(chunks)
96
+
97
+ try:
98
+ # Prepare user prompt
99
+ chunk_texts = "\n".join([
100
+ f"Chunk {c['chunk_id']}: \"{c['text']}\""
101
+ for c in chunks
102
+ ])
103
+
104
+ user_prompt = f"""Generate stick figure scenes for these narration chunks:
105
+
106
+ {chunk_texts}
107
+
108
+ Generate exactly {len(chunks)} scenes, one for each chunk.
109
+ Return ONLY valid JSON array, no other text."""
110
+
111
+ response = self.groq.chat.completions.create(
112
+ model="openai/gpt-oss-120b",
113
+ messages=[
114
+ {"role": "system", "content": SCENE_GENERATION_PROMPT},
115
+ {"role": "user", "content": user_prompt}
116
+ ],
117
+ temperature=0.7,
118
+ max_tokens=2000
119
+ )
120
+
121
+ content = response.choices[0].message.content.strip()
122
+
123
+ # Parse JSON
124
+ if content.startswith("```"):
125
+ content = content.split("```")[1]
126
+ if content.startswith("json"):
127
+ content = content[4:]
128
+
129
+ scenes = json.loads(content)
130
+ logger.info(f"AI generated {len(scenes)} scenes")
131
+ return scenes
132
+
133
+ except Exception as e:
134
+ logger.error(f"AI scene generation failed: {e}, using keyword fallback")
135
+ return self._generate_scenes_keyword(chunks)
136
+
137
+ def _generate_scenes_keyword(self, chunks: List[Dict]) -> List[Dict]:
138
+ """Fallback keyword-based scene generation"""
139
+ scenes = []
140
+
141
+ keyword_mapping = {
142
+ # Poses
143
+ "sleep": ("sleeping", [], "tired"),
144
+ "wake": ("sleeping", [], "tired"),
145
+ "run": ("running", [], "excited"),
146
+ "running": ("running", [], "excited"),
147
+ "walk": ("walking", [], "happy"),
148
+ "walking": ("walking", [], "happy"),
149
+ "sit": ("sitting", [], "thinking"),
150
+ "sitting": ("sitting", [], "thinking"),
151
+ "think": ("thinking", [], "thinking"),
152
+ "thinking": ("thinking", [], "thinking"),
153
+ "jump": ("jumping", [], "excited"),
154
+ "celebrate": ("celebrating", [], "happy"),
155
+ "success": ("celebrating", [], "excited"),
156
+ # Props
157
+ "king": ("standing", ["crown"], "happy"),
158
+ "queen": ("standing", ["crown"], "happy"),
159
+ "rich": ("standing", ["money"], "happy"),
160
+ "wealthy": ("standing", ["money"], "happy"),
161
+ "money": ("standing", ["money"], "happy"),
162
+ "dollar": ("standing", ["money"], "happy"),
163
+ "book": ("sitting", ["book"], "thinking"),
164
+ "read": ("sitting", ["book"], "thinking"),
165
+ "phone": ("standing", ["phone"], "happy"),
166
+ "coffee": ("standing", ["coffee"], "happy"),
167
+ }
168
+
169
+ for chunk in chunks:
170
+ text = chunk.get("text", "").lower()
171
+ pose = "standing"
172
+ props = []
173
+ emotion = "happy"
174
+ text_overlay = None
175
+
176
+ for keyword, (p, pr, em) in keyword_mapping.items():
177
+ if keyword in text:
178
+ pose = p
179
+ props = pr
180
+ emotion = em
181
+ break
182
+
183
+ # Check for text overlay triggers
184
+ triggers = ["get ready", "remember", "important", "key", "ready", "success"]
185
+ for trigger in triggers:
186
+ if trigger in text:
187
+ text_overlay = text[:30].upper() if len(text) > 30 else text.upper()
188
+ break
189
+
190
+ scenes.append({
191
+ "chunk_id": chunk.get("chunk_id", 0),
192
+ "pose": pose,
193
+ "props": props,
194
+ "emotion": emotion,
195
+ "text_overlay": text_overlay,
196
+ "action": f"Scene for: {text[:50]}"
197
+ })
198
+
199
+ return scenes
200
+
201
+ def draw_stick_figure(
202
+ self,
203
+ draw: ImageDraw,
204
+ x: int,
205
+ y: int,
206
+ pose: str = "standing",
207
+ emotion: str = "happy",
208
+ props: List[str] = None,
209
+ scale: float = 1.0
210
+ ):
211
+ """Draw an expressive stick figure"""
212
+ props = props or []
213
+
214
+ # Scale dimensions
215
+ head_r = int(self.HEAD_RADIUS * scale)
216
+ body_len = int(self.BODY_LENGTH * scale)
217
+ arm_len = int(self.ARM_LENGTH * scale)
218
+ leg_len = int(self.LEG_LENGTH * scale)
219
+ line_w = max(4, int(self.LINE_WIDTH * scale))
220
+
221
+ # Pose-specific drawing
222
+ pose_methods = {
223
+ "standing": self._draw_standing,
224
+ "walking": self._draw_walking,
225
+ "running": self._draw_running,
226
+ "sitting": self._draw_sitting,
227
+ "sleeping": self._draw_sleeping,
228
+ "waving": self._draw_waving,
229
+ "thinking": self._draw_thinking,
230
+ "jumping": self._draw_jumping,
231
+ "celebrating": self._draw_celebrating,
232
+ }
233
+
234
+ draw_method = pose_methods.get(pose, self._draw_standing)
235
+ draw_method(draw, x, y, head_r, body_len, arm_len, leg_len, line_w, emotion)
236
+
237
+ # Draw props
238
+ for prop in props:
239
+ self._draw_prop(draw, x, y - body_len - head_r, head_r, prop, scale)
240
+
241
+ def _draw_face(self, draw, x, y, head_r, emotion, line_w):
242
+ """Draw expressive face"""
243
+ # Eyes
244
+ eye_y = y - head_r // 4
245
+ eye_offset = head_r // 3
246
+ eye_size = head_r // 5
247
+
248
+ if emotion == "happy":
249
+ # Happy eyes (curved)
250
+ draw.arc([x - eye_offset - eye_size, eye_y - eye_size,
251
+ x - eye_offset + eye_size, eye_y + eye_size],
252
+ 0, 180, fill=self.FIGURE_COLOR, width=line_w//2)
253
+ draw.arc([x + eye_offset - eye_size, eye_y - eye_size,
254
+ x + eye_offset + eye_size, eye_y + eye_size],
255
+ 0, 180, fill=self.FIGURE_COLOR, width=line_w//2)
256
+ # Smile
257
+ draw.arc([x - head_r//2, y - head_r//4, x + head_r//2, y + head_r//2],
258
+ 0, 180, fill=self.FIGURE_COLOR, width=line_w//2)
259
+ elif emotion == "sad":
260
+ # Sad eyes
261
+ draw.ellipse([x - eye_offset - eye_size, eye_y - eye_size,
262
+ x - eye_offset + eye_size, eye_y + eye_size],
263
+ fill=self.FIGURE_COLOR)
264
+ draw.ellipse([x + eye_offset - eye_size, eye_y - eye_size,
265
+ x + eye_offset + eye_size, eye_y + eye_size],
266
+ fill=self.FIGURE_COLOR)
267
+ # Frown
268
+ draw.arc([x - head_r//2, y, x + head_r//2, y + head_r//2],
269
+ 180, 360, fill=self.FIGURE_COLOR, width=line_w//2)
270
+ elif emotion == "thinking":
271
+ # Thinking eyes (looking up)
272
+ draw.ellipse([x - eye_offset - eye_size, eye_y - eye_size - 5,
273
+ x - eye_offset + eye_size, eye_y + eye_size - 5],
274
+ fill=self.FIGURE_COLOR)
275
+ draw.ellipse([x + eye_offset - eye_size, eye_y - eye_size - 5,
276
+ x + eye_offset + eye_size, eye_y + eye_size - 5],
277
+ fill=self.FIGURE_COLOR)
278
+ # Neutral mouth
279
+ draw.line([x - head_r//3, y + head_r//4, x + head_r//3, y + head_r//4],
280
+ fill=self.FIGURE_COLOR, width=line_w//2)
281
+ elif emotion == "excited":
282
+ # Big excited eyes
283
+ big_eye = eye_size * 2
284
+ draw.ellipse([x - eye_offset - big_eye, eye_y - big_eye,
285
+ x - eye_offset + big_eye, eye_y + big_eye],
286
+ fill=self.FIGURE_COLOR)
287
+ draw.ellipse([x + eye_offset - big_eye, eye_y - big_eye,
288
+ x + eye_offset + big_eye, eye_y + big_eye],
289
+ fill=self.FIGURE_COLOR)
290
+ # Big smile
291
+ draw.arc([x - head_r//2, y - head_r//3, x + head_r//2, y + head_r//2],
292
+ 0, 180, fill=self.FIGURE_COLOR, width=line_w)
293
+ else:
294
+ # Default neutral
295
+ draw.ellipse([x - eye_offset - eye_size, eye_y - eye_size,
296
+ x - eye_offset + eye_size, eye_y + eye_size],
297
+ fill=self.FIGURE_COLOR)
298
+ draw.ellipse([x + eye_offset - eye_size, eye_y - eye_size,
299
+ x + eye_offset + eye_size, eye_y + eye_size],
300
+ fill=self.FIGURE_COLOR)
301
+ draw.line([x - head_r//3, y + head_r//4, x + head_r//3, y + head_r//4],
302
+ fill=self.FIGURE_COLOR, width=line_w//2)
303
+
304
+ def _draw_standing(self, draw, x, y, head_r, body_len, arm_len, leg_len, line_w, emotion):
305
+ """Draw standing pose with expression"""
306
+ head_y = y - body_len - head_r
307
+
308
+ # Head circle
309
+ draw.ellipse([x - head_r, head_y - head_r, x + head_r, head_y + head_r],
310
+ outline=self.FIGURE_COLOR, width=line_w)
311
+ self._draw_face(draw, x, head_y, head_r, emotion, line_w)
312
+
313
+ # Body
314
+ body_top = head_y + head_r
315
+ body_bottom = body_top + body_len
316
+ draw.line([x, body_top, x, body_bottom], fill=self.FIGURE_COLOR, width=line_w)
317
+
318
+ # Arms
319
+ arm_y = body_top + body_len // 4
320
+ draw.line([x, arm_y, x - arm_len, arm_y + arm_len//3], fill=self.FIGURE_COLOR, width=line_w)
321
+ draw.line([x, arm_y, x + arm_len, arm_y + arm_len//3], fill=self.FIGURE_COLOR, width=line_w)
322
+
323
+ # Legs
324
+ draw.line([x, body_bottom, x - leg_len//2, body_bottom + leg_len], fill=self.FIGURE_COLOR, width=line_w)
325
+ draw.line([x, body_bottom, x + leg_len//2, body_bottom + leg_len], fill=self.FIGURE_COLOR, width=line_w)
326
+
327
+ def _draw_walking(self, draw, x, y, head_r, body_len, arm_len, leg_len, line_w, emotion):
328
+ """Draw walking pose"""
329
+ head_y = y - body_len - head_r
330
+
331
+ draw.ellipse([x - head_r, head_y - head_r, x + head_r, head_y + head_r],
332
+ outline=self.FIGURE_COLOR, width=line_w)
333
+ self._draw_face(draw, x, head_y, head_r, emotion, line_w)
334
+
335
+ body_top = head_y + head_r
336
+ body_bottom = body_top + body_len
337
+ draw.line([x, body_top, x + 10, body_bottom], fill=self.FIGURE_COLOR, width=line_w)
338
+
339
+ arm_y = body_top + body_len // 4
340
+ draw.line([x, arm_y, x - arm_len//2, arm_y + arm_len], fill=self.FIGURE_COLOR, width=line_w)
341
+ draw.line([x, arm_y, x + arm_len, arm_y - arm_len//3], fill=self.FIGURE_COLOR, width=line_w)
342
+
343
+ draw.line([x + 10, body_bottom, x - leg_len, body_bottom + leg_len], fill=self.FIGURE_COLOR, width=line_w)
344
+ draw.line([x + 10, body_bottom, x + leg_len, body_bottom + leg_len//2], fill=self.FIGURE_COLOR, width=line_w)
345
+
346
+ def _draw_running(self, draw, x, y, head_r, body_len, arm_len, leg_len, line_w, emotion):
347
+ """Draw running pose"""
348
+ head_y = y - body_len - head_r
349
+
350
+ draw.ellipse([x + 20 - head_r, head_y - head_r, x + 20 + head_r, head_y + head_r],
351
+ outline=self.FIGURE_COLOR, width=line_w)
352
+ self._draw_face(draw, x + 20, head_y, head_r, "excited", line_w)
353
+
354
+ body_top = head_y + head_r
355
+ body_bottom = body_top + body_len
356
+ draw.line([x + 20, body_top, x + 40, body_bottom], fill=self.FIGURE_COLOR, width=line_w)
357
+
358
+ arm_y = body_top + body_len // 4 + 20
359
+ draw.line([x + 20, arm_y, x - arm_len, arm_y + arm_len//3], fill=self.FIGURE_COLOR, width=line_w)
360
+ draw.line([x + 20, arm_y, x + arm_len + 30, arm_y - arm_len//2], fill=self.FIGURE_COLOR, width=line_w)
361
+
362
+ draw.line([x + 40, body_bottom, x - leg_len - 20, body_bottom + leg_len//2], fill=self.FIGURE_COLOR, width=line_w)
363
+ draw.line([x + 40, body_bottom, x + leg_len + 40, body_bottom + leg_len//3], fill=self.FIGURE_COLOR, width=line_w)
364
+
365
+ def _draw_sitting(self, draw, x, y, head_r, body_len, arm_len, leg_len, line_w, emotion):
366
+ """Draw sitting pose"""
367
+ y_offset = leg_len // 2
368
+ head_y = y - body_len - head_r + y_offset
369
+
370
+ draw.ellipse([x - head_r, head_y - head_r, x + head_r, head_y + head_r],
371
+ outline=self.FIGURE_COLOR, width=line_w)
372
+ self._draw_face(draw, x, head_y, head_r, emotion, line_w)
373
+
374
+ body_top = head_y + head_r
375
+ body_bottom = body_top + body_len // 2
376
+ draw.line([x, body_top, x, body_bottom], fill=self.FIGURE_COLOR, width=line_w)
377
+
378
+ arm_y = body_top + body_len // 6
379
+ draw.line([x, arm_y, x - arm_len // 2, body_bottom], fill=self.FIGURE_COLOR, width=line_w)
380
+ draw.line([x, arm_y, x + arm_len // 2, body_bottom], fill=self.FIGURE_COLOR, width=line_w)
381
+
382
+ # Chair/ground
383
+ draw.line([x - leg_len, body_bottom, x + leg_len, body_bottom], fill=self.FIGURE_COLOR, width=line_w//2)
384
+ draw.line([x, body_bottom, x - leg_len//2, body_bottom + leg_len//2], fill=self.FIGURE_COLOR, width=line_w)
385
+ draw.line([x, body_bottom, x + leg_len//2, body_bottom + leg_len//2], fill=self.FIGURE_COLOR, width=line_w)
386
+
387
+ def _draw_sleeping(self, draw, x, y, head_r, body_len, arm_len, leg_len, line_w, emotion):
388
+ """Draw horizontal sleeping pose"""
389
+ # Horizontal figure
390
+ draw.ellipse([x - body_len - head_r, y - head_r, x - body_len + head_r, y + head_r],
391
+ outline=self.FIGURE_COLOR, width=line_w)
392
+
393
+ # Closed eyes (lines)
394
+ draw.line([x - body_len - head_r//2, y - head_r//4, x - body_len - head_r//4, y - head_r//4],
395
+ fill=self.FIGURE_COLOR, width=line_w//2)
396
+ draw.line([x - body_len + head_r//4, y - head_r//4, x - body_len + head_r//2, y - head_r//4],
397
+ fill=self.FIGURE_COLOR, width=line_w//2)
398
+
399
+ # Body
400
+ draw.line([x - body_len + head_r, y, x, y], fill=self.FIGURE_COLOR, width=line_w)
401
+
402
+ # Legs
403
+ draw.line([x, y, x + leg_len, y + 10], fill=self.FIGURE_COLOR, width=line_w)
404
+ draw.line([x, y, x + leg_len, y - 10], fill=self.FIGURE_COLOR, width=line_w)
405
+
406
+ # Zzz
407
+ try:
408
+ font = ImageFont.truetype("arial.ttf", 40)
409
+ except:
410
+ font = ImageFont.load_default()
411
+ draw.text((x - body_len, y - head_r - 50), "Zzz", fill=(100, 100, 100), font=font)
412
+
413
+ # Bed
414
+ draw.rectangle([x - body_len - head_r - 20, y + head_r, x + leg_len + 20, y + head_r + 20],
415
+ fill=(139, 90, 43))
416
+
417
+ def _draw_waving(self, draw, x, y, head_r, body_len, arm_len, leg_len, line_w, emotion):
418
+ """Draw waving pose"""
419
+ head_y = y - body_len - head_r
420
+
421
+ draw.ellipse([x - head_r, head_y - head_r, x + head_r, head_y + head_r],
422
+ outline=self.FIGURE_COLOR, width=line_w)
423
+ self._draw_face(draw, x, head_y, head_r, "happy", line_w)
424
+
425
+ body_top = head_y + head_r
426
+ body_bottom = body_top + body_len
427
+ draw.line([x, body_top, x, body_bottom], fill=self.FIGURE_COLOR, width=line_w)
428
+
429
+ arm_y = body_top + body_len // 4
430
+ draw.line([x, arm_y, x - arm_len, arm_y + arm_len//2], fill=self.FIGURE_COLOR, width=line_w)
431
+ # Waving arm up
432
+ draw.line([x, arm_y, x + arm_len//2, arm_y - arm_len], fill=self.FIGURE_COLOR, width=line_w)
433
+ # Hand wave lines
434
+ draw.line([x + arm_len//2 - 10, arm_y - arm_len - 30, x + arm_len//2 + 10, arm_y - arm_len - 20],
435
+ fill=self.ACCENT_COLOR, width=3)
436
+ draw.line([x + arm_len//2 + 15, arm_y - arm_len - 25, x + arm_len//2 + 35, arm_y - arm_len - 15],
437
+ fill=self.ACCENT_COLOR, width=3)
438
+
439
+ draw.line([x, body_bottom, x - leg_len//2, body_bottom + leg_len], fill=self.FIGURE_COLOR, width=line_w)
440
+ draw.line([x, body_bottom, x + leg_len//2, body_bottom + leg_len], fill=self.FIGURE_COLOR, width=line_w)
441
+
442
+ def _draw_thinking(self, draw, x, y, head_r, body_len, arm_len, leg_len, line_w, emotion):
443
+ """Draw thinking pose with thought bubble"""
444
+ head_y = y - body_len - head_r
445
+
446
+ draw.ellipse([x - head_r, head_y - head_r, x + head_r, head_y + head_r],
447
+ outline=self.FIGURE_COLOR, width=line_w)
448
+ self._draw_face(draw, x, head_y, head_r, "thinking", line_w)
449
+
450
+ body_top = head_y + head_r
451
+ body_bottom = body_top + body_len
452
+ draw.line([x, body_top, x, body_bottom], fill=self.FIGURE_COLOR, width=line_w)
453
+
454
+ arm_y = body_top + body_len // 4
455
+ draw.line([x, arm_y, x - arm_len, arm_y + arm_len//2], fill=self.FIGURE_COLOR, width=line_w)
456
+ # Hand on chin
457
+ draw.line([x, arm_y, x + arm_len//3, arm_y - arm_len//3], fill=self.FIGURE_COLOR, width=line_w)
458
+ draw.line([x + arm_len//3, arm_y - arm_len//3, x + head_r//2, head_y + head_r],
459
+ fill=self.FIGURE_COLOR, width=line_w)
460
+
461
+ draw.line([x, body_bottom, x - leg_len//2, body_bottom + leg_len], fill=self.FIGURE_COLOR, width=line_w)
462
+ draw.line([x, body_bottom, x + leg_len//2, body_bottom + leg_len], fill=self.FIGURE_COLOR, width=line_w)
463
+
464
+ # Thought bubble
465
+ draw.ellipse([x + head_r + 30, head_y - head_r - 100, x + head_r + 150, head_y - head_r - 20],
466
+ outline=self.FIGURE_COLOR, width=3)
467
+ draw.ellipse([x + head_r + 10, head_y - head_r - 20, x + head_r + 30, head_y - head_r],
468
+ fill=self.FIGURE_COLOR)
469
+
470
+ def _draw_jumping(self, draw, x, y, head_r, body_len, arm_len, leg_len, line_w, emotion):
471
+ """Draw jumping pose"""
472
+ y_offset = -80 # Jump up
473
+ head_y = y - body_len - head_r + y_offset
474
+
475
+ draw.ellipse([x - head_r, head_y - head_r, x + head_r, head_y + head_r],
476
+ outline=self.FIGURE_COLOR, width=line_w)
477
+ self._draw_face(draw, x, head_y, head_r, "excited", line_w)
478
+
479
+ body_top = head_y + head_r
480
+ body_bottom = body_top + body_len
481
+ draw.line([x, body_top, x, body_bottom], fill=self.FIGURE_COLOR, width=line_w)
482
+
483
+ # Arms up
484
+ arm_y = body_top + body_len // 4
485
+ draw.line([x, arm_y, x - arm_len, arm_y - arm_len//2], fill=self.FIGURE_COLOR, width=line_w)
486
+ draw.line([x, arm_y, x + arm_len, arm_y - arm_len//2], fill=self.FIGURE_COLOR, width=line_w)
487
+
488
+ # Legs spread
489
+ draw.line([x, body_bottom, x - leg_len, body_bottom + leg_len//2], fill=self.FIGURE_COLOR, width=line_w)
490
+ draw.line([x, body_bottom, x + leg_len, body_bottom + leg_len//2], fill=self.FIGURE_COLOR, width=line_w)
491
+
492
+ # Jump lines
493
+ draw.line([x - 30, body_bottom + leg_len + 30, x + 30, body_bottom + leg_len + 30],
494
+ fill=(200, 200, 200), width=4)
495
+
496
+ def _draw_celebrating(self, draw, x, y, head_r, body_len, arm_len, leg_len, line_w, emotion):
497
+ """Draw celebrating pose with confetti"""
498
+ head_y = y - body_len - head_r
499
+
500
+ draw.ellipse([x - head_r, head_y - head_r, x + head_r, head_y + head_r],
501
+ outline=self.FIGURE_COLOR, width=line_w)
502
+ self._draw_face(draw, x, head_y, head_r, "excited", line_w)
503
+
504
+ body_top = head_y + head_r
505
+ body_bottom = body_top + body_len
506
+ draw.line([x, body_top, x, body_bottom], fill=self.FIGURE_COLOR, width=line_w)
507
+
508
+ # Both arms up
509
+ arm_y = body_top + body_len // 4
510
+ draw.line([x, arm_y, x - arm_len, arm_y - arm_len], fill=self.FIGURE_COLOR, width=line_w)
511
+ draw.line([x, arm_y, x + arm_len, arm_y - arm_len], fill=self.FIGURE_COLOR, width=line_w)
512
+
513
+ draw.line([x, body_bottom, x - leg_len//2, body_bottom + leg_len], fill=self.FIGURE_COLOR, width=line_w)
514
+ draw.line([x, body_bottom, x + leg_len//2, body_bottom + leg_len], fill=self.FIGURE_COLOR, width=line_w)
515
+
516
+ # Confetti
517
+ import random
518
+ colors = [(255, 87, 51), (255, 195, 0), (76, 175, 80), (33, 150, 243)]
519
+ for _ in range(10):
520
+ cx = x + random.randint(-150, 150)
521
+ cy = head_y + random.randint(-150, 50)
522
+ color = random.choice(colors)
523
+ draw.ellipse([cx - 5, cy - 5, cx + 5, cy + 5], fill=color)
524
+
525
+ def _draw_prop(self, draw, x, head_y, head_r, prop: str, scale: float):
526
+ """Draw props on the figure"""
527
+ if prop == "crown":
528
+ crown_y = head_y - head_r - 20
529
+ points = [
530
+ (x - 40, crown_y),
531
+ (x - 25, crown_y - 40),
532
+ (x, crown_y - 20),
533
+ (x + 25, crown_y - 40),
534
+ (x + 40, crown_y)
535
+ ]
536
+ draw.polygon(points, fill=(255, 215, 0), outline=(200, 170, 0), width=2)
537
+ elif prop == "money":
538
+ bag_x = x + 100
539
+ bag_y = head_y + 150
540
+ draw.ellipse([bag_x, bag_y, bag_x + 60, bag_y + 70], fill=(34, 139, 34), outline=(20, 100, 20), width=2)
541
+ try:
542
+ font = ImageFont.truetype("arial.ttf", 30)
543
+ except:
544
+ font = ImageFont.load_default()
545
+ draw.text((bag_x + 20, bag_y + 20), "$", fill=(255, 255, 255), font=font)
546
+ elif prop == "book":
547
+ book_x = x + 80
548
+ book_y = head_y + 130
549
+ draw.rectangle([book_x, book_y, book_x + 50, book_y + 70], fill=(139, 69, 19), outline=(100, 50, 10), width=2)
550
+ draw.line([book_x + 25, book_y, book_x + 25, book_y + 70], fill=(80, 40, 10), width=2)
551
+ elif prop == "phone":
552
+ phone_x = x + 90
553
+ phone_y = head_y + 100
554
+ draw.rectangle([phone_x, phone_y, phone_x + 30, phone_y + 50], fill=(50, 50, 50), outline=(30, 30, 30), width=2)
555
+ draw.rectangle([phone_x + 3, phone_y + 5, phone_x + 27, phone_y + 40], fill=(100, 150, 200))
556
+ elif prop == "coffee":
557
+ cup_x = x + 100
558
+ cup_y = head_y + 120
559
+ draw.rectangle([cup_x, cup_y, cup_x + 40, cup_y + 50], fill=(255, 255, 255), outline=(200, 200, 200), width=2)
560
+ draw.arc([cup_x + 30, cup_y + 10, cup_x + 50, cup_y + 40], -90, 90, fill=(200, 200, 200), width=3)
561
+ # Steam
562
+ draw.arc([cup_x + 10, cup_y - 20, cup_x + 20, cup_y], 0, 180, fill=(200, 200, 200), width=2)
563
+ draw.arc([cup_x + 20, cup_y - 25, cup_x + 30, cup_y - 5], 180, 360, fill=(200, 200, 200), width=2)
564
+
565
+ def add_text_overlay(
566
+ self,
567
+ img: Image.Image,
568
+ text: str,
569
+ position: str = "bottom"
570
+ ) -> Image.Image:
571
+ """Add stylish text overlay"""
572
+ draw = ImageDraw.Draw(img)
573
+
574
+ try:
575
+ font = ImageFont.truetype("arial.ttf", 70)
576
+ except:
577
+ font = ImageFont.load_default()
578
+
579
+ bbox = draw.textbbox((0, 0), text, font=font)
580
+ text_width = bbox[2] - bbox[0]
581
+ text_height = bbox[3] - bbox[1]
582
+
583
+ if position == "center":
584
+ pos = ((self.WIDTH - text_width) // 2, (self.HEIGHT - text_height) // 2)
585
+ elif position == "top":
586
+ pos = ((self.WIDTH - text_width) // 2, 150)
587
+ else: # bottom
588
+ pos = ((self.WIDTH - text_width) // 2, self.HEIGHT - 200)
589
+
590
+ # Background box
591
+ padding = 20
592
+ draw.rectangle([
593
+ pos[0] - padding, pos[1] - padding,
594
+ pos[0] + text_width + padding, pos[1] + text_height + padding
595
+ ], fill=(0, 0, 0, 180))
596
+
597
+ # Text
598
+ draw.text(pos, text, fill=(255, 255, 255), font=font,
599
+ stroke_width=2, stroke_fill=(0, 0, 0))
600
+
601
+ return img
602
+
603
+ def create_scene_frame(self, scene: Dict) -> Image.Image:
604
+ """Create a single frame from scene description"""
605
+ img = Image.new('RGB', (self.WIDTH, self.HEIGHT), self.BG_COLOR)
606
+ draw = ImageDraw.Draw(img)
607
+
608
+ # Draw figure
609
+ pose = scene.get("pose", "standing")
610
+ props = scene.get("props", [])
611
+ emotion = scene.get("emotion", "happy")
612
+
613
+ self.draw_stick_figure(draw, self.WIDTH // 2, self.HEIGHT // 2 + 150,
614
+ pose=pose, emotion=emotion, props=props, scale=1.5)
615
+
616
+ # Add text overlay if present
617
+ text_overlay = scene.get("text_overlay")
618
+ if text_overlay:
619
+ self.add_text_overlay(img, text_overlay)
620
+
621
+ return img
622
+
623
+ def generate_frames_from_scenes(
624
+ self,
625
+ scenes: List[Dict],
626
+ chunk_durations: List[float],
627
+ output_dir: str,
628
+ fps: int = 30
629
+ ) -> List[str]:
630
+ """
631
+ Generate frames for all scenes with correct timing.
632
+
633
+ Args:
634
+ scenes: List of scene descriptions
635
+ chunk_durations: Duration of each chunk in seconds
636
+ output_dir: Directory to save frames
637
+ fps: Frames per second
638
+
639
+ Returns:
640
+ List of frame file paths
641
+ """
642
+ os.makedirs(output_dir, exist_ok=True)
643
+ frame_paths = []
644
+ frame_num = 0
645
+
646
+ for i, scene in enumerate(scenes):
647
+ duration = chunk_durations[i] if i < len(chunk_durations) else 2.0
648
+ num_frames = int(duration * fps)
649
+
650
+ logger.info(f"Generating {num_frames} frames for scene {i}")
651
+
652
+ # Create scene frame
653
+ frame = self.create_scene_frame(scene)
654
+
655
+ # Save frames for duration
656
+ for _ in range(num_frames):
657
+ frame_path = os.path.join(output_dir, f"frame_{frame_num:05d}.png")
658
+ frame.save(frame_path)
659
+ frame_paths.append(frame_path)
660
+ frame_num += 1
661
+
662
+ logger.info(f"Generated {len(frame_paths)} total frames")
663
+ return frame_paths
static/index.html CHANGED
@@ -645,9 +645,9 @@
645
  </p>
646
  <form id="stickForm">
647
  <div class="form-group">
648
- <label>Script (Keywords: রাজা, বড়লোক, ঘুম, দৌড়, etc.)</label>
649
  <textarea id="stickScript" rows="4"
650
- placeholder="একজন বড়লোক মানুষ সকালে ঘুম থেকে উঠল। সে মনে মনে ভাবল আজকে কি করব। Get ready for success!"></textarea>
651
  </div>
652
  <button type="submit" class="submit-btn">🎭 Generate Stick Figure Video</button>
653
  </form>
 
645
  </p>
646
  <form id="stickForm">
647
  <div class="form-group">
648
+ <label>Script (Keywords: king, rich, sleep, run, walk, think, etc.)</label>
649
  <textarea id="stickScript" rows="4"
650
+ placeholder="A rich man woke up from sleep. He thought about what to do today. Get ready for success!"></textarea>
651
  </div>
652
  <button type="submit" class="submit-btn">🎭 Generate Stick Figure Video</button>
653
  </form>