Commit ·
4c8e696
1
Parent(s): e224b41
Add complete Bar Race module with Brain, Scout, Surgeon, Artist, Director architecture
Browse files- modules/bar_race/__init__.py +11 -2
- modules/bar_race/assets/fonts/.gitkeep +1 -0
- modules/bar_race/assets/images/.gitkeep +1 -0
- modules/bar_race/assets/music/.gitkeep +3 -0
- modules/bar_race/data/__init__.py +0 -1
- modules/bar_race/data/topic_registry.py +0 -154
- modules/bar_race/router.py +110 -81
- modules/bar_race/schemas.py +32 -28
- modules/bar_race/services/__init__.py +1 -1
- modules/bar_race/services/artist.py +301 -0
- modules/bar_race/services/bar_composer.py +0 -91
- modules/bar_race/services/bar_frame.py +0 -301
- modules/bar_race/services/brain.py +225 -0
- modules/bar_race/services/data_fetcher.py +0 -134
- modules/bar_race/services/director.py +333 -0
- modules/bar_race/services/scout.py +267 -0
- modules/bar_race/services/surgeon.py +327 -0
- requirements.txt +7 -0
- static/index.html +18 -52
modules/bar_race/__init__.py
CHANGED
|
@@ -1,7 +1,15 @@
|
|
| 1 |
"""
|
| 2 |
Bar Race Module
|
| 3 |
-
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
"""
|
| 6 |
import logging
|
| 7 |
from fastapi import FastAPI
|
|
@@ -15,6 +23,7 @@ MODULE_DESCRIPTION = "Bar Chart Race Video Generator"
|
|
| 15 |
|
| 16 |
_app = None
|
| 17 |
|
|
|
|
| 18 |
def register(app: FastAPI, config=None):
|
| 19 |
"""Register Bar Race module routes"""
|
| 20 |
global _app
|
|
|
|
| 1 |
"""
|
| 2 |
Bar Race Module
|
| 3 |
+
Intelligent Bar Chart Race Video Generator.
|
| 4 |
+
|
| 5 |
+
Architecture:
|
| 6 |
+
- Brain: LLM Planner (Gemini)
|
| 7 |
+
- Scout: Data Fetcher (APIs + Scraping)
|
| 8 |
+
- Surgeon: Data Cleaner
|
| 9 |
+
- Artist: Image Processor
|
| 10 |
+
- Director: Video Generator
|
| 11 |
+
|
| 12 |
+
100% standalone - no dependency on other modules.
|
| 13 |
"""
|
| 14 |
import logging
|
| 15 |
from fastapi import FastAPI
|
|
|
|
| 23 |
|
| 24 |
_app = None
|
| 25 |
|
| 26 |
+
|
| 27 |
def register(app: FastAPI, config=None):
|
| 28 |
"""Register Bar Race module routes"""
|
| 29 |
global _app
|
modules/bar_race/assets/fonts/.gitkeep
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# Custom fonts for video rendering
|
modules/bar_race/assets/images/.gitkeep
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# Entity images will be downloaded here during video generation
|
modules/bar_race/assets/music/.gitkeep
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Optional background music files
|
| 2 |
+
# Supported formats: .mp3, .wav, .m4a, .ogg
|
| 3 |
+
# Music will be automatically added if files exist here
|
modules/bar_race/data/__init__.py
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
# Data directory init
|
|
|
|
|
|
modules/bar_race/data/topic_registry.py
DELETED
|
@@ -1,154 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Topic Registry
|
| 3 |
-
50+ pre-configured topics for bar chart race videos.
|
| 4 |
-
Each topic has: title, unit, category, data source info.
|
| 5 |
-
"""
|
| 6 |
-
|
| 7 |
-
TOPICS = {
|
| 8 |
-
# =============================================
|
| 9 |
-
# ECONOMICS & FINANCE (সবচেয়ে জনপ্রিয়)
|
| 10 |
-
# =============================================
|
| 11 |
-
"gdp_nominal": {
|
| 12 |
-
"id": "gdp_nominal",
|
| 13 |
-
"title": "Richest Countries by GDP (Nominal)",
|
| 14 |
-
"unit": "Trillion USD",
|
| 15 |
-
"category": "economics",
|
| 16 |
-
"year_range": (1960, 2024),
|
| 17 |
-
"top_n": 10,
|
| 18 |
-
"description": "বিশ্বের সবচেয়ে ধনী দেশগুলোর তালিকা",
|
| 19 |
-
"bar_color": "#4CAF50", # Green
|
| 20 |
-
},
|
| 21 |
-
"gdp_ppp": {
|
| 22 |
-
"id": "gdp_ppp",
|
| 23 |
-
"title": "Countries by GDP (PPP)",
|
| 24 |
-
"unit": "Trillion USD",
|
| 25 |
-
"category": "economics",
|
| 26 |
-
"year_range": (1990, 2024),
|
| 27 |
-
"top_n": 10,
|
| 28 |
-
"description": "ক্রয়ক্ষমতার ভিত্তিতে ধনী দেশ",
|
| 29 |
-
"bar_color": "#2196F3", # Blue
|
| 30 |
-
},
|
| 31 |
-
"gdp_per_capita": {
|
| 32 |
-
"id": "gdp_per_capita",
|
| 33 |
-
"title": "Richest Countries by GDP Per Capita",
|
| 34 |
-
"unit": "USD",
|
| 35 |
-
"category": "economics",
|
| 36 |
-
"year_range": (1960, 2024),
|
| 37 |
-
"top_n": 10,
|
| 38 |
-
"description": "মাথাপিছু আয়ে সবচেয়ে ধনী দেশ",
|
| 39 |
-
"bar_color": "#FF9800", # Orange
|
| 40 |
-
},
|
| 41 |
-
|
| 42 |
-
# =============================================
|
| 43 |
-
# DEMOGRAPHICS & SOCIETY
|
| 44 |
-
# =============================================
|
| 45 |
-
"population": {
|
| 46 |
-
"id": "population",
|
| 47 |
-
"title": "Most Populated Countries",
|
| 48 |
-
"unit": "Million",
|
| 49 |
-
"category": "demographics",
|
| 50 |
-
"year_range": (1960, 2024),
|
| 51 |
-
"top_n": 10,
|
| 52 |
-
"description": "বিশ্বের সবচেয়ে জনবহুল দেশ",
|
| 53 |
-
"bar_color": "#9C27B0", # Purple
|
| 54 |
-
},
|
| 55 |
-
"life_expectancy": {
|
| 56 |
-
"id": "life_expectancy",
|
| 57 |
-
"title": "Countries by Life Expectancy",
|
| 58 |
-
"unit": "Years",
|
| 59 |
-
"category": "demographics",
|
| 60 |
-
"year_range": (1960, 2024),
|
| 61 |
-
"top_n": 10,
|
| 62 |
-
"description": "কোন দেশের মানুষ গড়ে কতদিন বাঁচে",
|
| 63 |
-
"bar_color": "#E91E63", # Pink
|
| 64 |
-
},
|
| 65 |
-
|
| 66 |
-
# =============================================
|
| 67 |
-
# TECH & DIGITAL
|
| 68 |
-
# =============================================
|
| 69 |
-
"social_media_users": {
|
| 70 |
-
"id": "social_media_users",
|
| 71 |
-
"title": "Social Media Platforms by Users",
|
| 72 |
-
"unit": "Billion Users",
|
| 73 |
-
"category": "tech",
|
| 74 |
-
"year_range": (2004, 2024),
|
| 75 |
-
"top_n": 10,
|
| 76 |
-
"description": "ফেসবুক, ইউটিউব, টিকটক ইউজার সংখ্যা",
|
| 77 |
-
"bar_color": "#00BCD4", # Cyan
|
| 78 |
-
},
|
| 79 |
-
"browser_market_share": {
|
| 80 |
-
"id": "browser_market_share",
|
| 81 |
-
"title": "Browser Market Share",
|
| 82 |
-
"unit": "% Share",
|
| 83 |
-
"category": "tech",
|
| 84 |
-
"year_range": (2008, 2024),
|
| 85 |
-
"top_n": 8,
|
| 86 |
-
"description": "ক্রোম, ফায়ারফক্স, এজ মার্কেট শেয়ার",
|
| 87 |
-
"bar_color": "#3F51B5", # Indigo
|
| 88 |
-
},
|
| 89 |
-
|
| 90 |
-
# =============================================
|
| 91 |
-
# ENTERTAINMENT
|
| 92 |
-
# =============================================
|
| 93 |
-
"youtube_subscribers": {
|
| 94 |
-
"id": "youtube_subscribers",
|
| 95 |
-
"title": "Most Subscribed YouTube Channels",
|
| 96 |
-
"unit": "Million Subscribers",
|
| 97 |
-
"category": "entertainment",
|
| 98 |
-
"year_range": (2010, 2024),
|
| 99 |
-
"top_n": 10,
|
| 100 |
-
"description": "টি-সিরিজ বনাম মিস্টার বিস্ট বনাম পিউডিপাই",
|
| 101 |
-
"bar_color": "#F44336", # Red (YouTube)
|
| 102 |
-
},
|
| 103 |
-
|
| 104 |
-
# =============================================
|
| 105 |
-
# SPORTS
|
| 106 |
-
# =============================================
|
| 107 |
-
"olympic_medals": {
|
| 108 |
-
"id": "olympic_medals",
|
| 109 |
-
"title": "Countries by Olympic Gold Medals",
|
| 110 |
-
"unit": "Gold Medals",
|
| 111 |
-
"category": "sports",
|
| 112 |
-
"year_range": (1896, 2024),
|
| 113 |
-
"top_n": 10,
|
| 114 |
-
"description": "অলিম্পিকে কোন দেশ কত স্বর্ণ জিতেছে",
|
| 115 |
-
"bar_color": "#FFD700", # Gold
|
| 116 |
-
},
|
| 117 |
-
|
| 118 |
-
# =============================================
|
| 119 |
-
# GEOPOLITICS
|
| 120 |
-
# =============================================
|
| 121 |
-
"military_spending": {
|
| 122 |
-
"id": "military_spending",
|
| 123 |
-
"title": "Countries by Military Expenditure",
|
| 124 |
-
"unit": "Billion USD",
|
| 125 |
-
"category": "geopolitics",
|
| 126 |
-
"year_range": (1990, 2024),
|
| 127 |
-
"top_n": 10,
|
| 128 |
-
"description": "কোন দেশ সেনাবাহিনীতে কত খরচ করে",
|
| 129 |
-
"bar_color": "#795548", # Brown
|
| 130 |
-
},
|
| 131 |
-
}
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
def get_topic(topic_id: str) -> dict:
|
| 135 |
-
"""Get topic configuration by ID"""
|
| 136 |
-
return TOPICS.get(topic_id.lower())
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
def list_topics() -> list:
|
| 140 |
-
"""List all available topics"""
|
| 141 |
-
return [
|
| 142 |
-
{
|
| 143 |
-
"id": t["id"],
|
| 144 |
-
"title": t["title"],
|
| 145 |
-
"category": t["category"],
|
| 146 |
-
"description": t["description"]
|
| 147 |
-
}
|
| 148 |
-
for t in TOPICS.values()
|
| 149 |
-
]
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
def get_topics_by_category(category: str) -> list:
|
| 153 |
-
"""Get topics filtered by category"""
|
| 154 |
-
return [t for t in TOPICS.values() if t["category"] == category]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
modules/bar_race/router.py
CHANGED
|
@@ -5,16 +5,13 @@ API endpoints for bar chart race video generation.
|
|
| 5 |
import logging
|
| 6 |
import os
|
| 7 |
import uuid
|
|
|
|
| 8 |
import traceback
|
| 9 |
from typing import Dict
|
| 10 |
from fastapi import APIRouter, BackgroundTasks, HTTPException
|
| 11 |
-
from fastapi.responses import FileResponse
|
| 12 |
|
| 13 |
-
from .schemas import BarRaceRequest, JobResponse, JobStatus
|
| 14 |
-
from .data.topic_registry import TOPICS, get_topic, list_topics
|
| 15 |
-
from .services.data_fetcher import DataFetcher
|
| 16 |
-
from .services.bar_frame import BarFrameGenerator
|
| 17 |
-
from .services.bar_composer import BarComposer
|
| 18 |
|
| 19 |
logger = logging.getLogger(__name__)
|
| 20 |
|
|
@@ -24,15 +21,24 @@ router = APIRouter()
|
|
| 24 |
jobs: Dict[str, dict] = {}
|
| 25 |
|
| 26 |
|
| 27 |
-
def update_job(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
"""Update job status"""
|
| 29 |
if job_id in jobs:
|
| 30 |
jobs[job_id].update({
|
| 31 |
"status": status,
|
| 32 |
"progress": progress,
|
|
|
|
| 33 |
"video_url": video_url,
|
| 34 |
"error": error
|
| 35 |
})
|
|
|
|
| 36 |
|
| 37 |
|
| 38 |
async def generate_bar_race_video(job_id: str, request: BarRaceRequest):
|
|
@@ -40,93 +46,119 @@ async def generate_bar_race_video(job_id: str, request: BarRaceRequest):
|
|
| 40 |
temp_dir = f"temp/bar_race_{job_id}"
|
| 41 |
|
| 42 |
try:
|
| 43 |
-
update_job(job_id, "processing", 5)
|
| 44 |
os.makedirs(temp_dir, exist_ok=True)
|
| 45 |
|
| 46 |
-
# Get
|
| 47 |
-
|
| 48 |
-
if not topic_config:
|
| 49 |
-
topic_config = {
|
| 50 |
-
"title": request.topic.replace("_", " ").title(),
|
| 51 |
-
"unit": "Value",
|
| 52 |
-
}
|
| 53 |
-
|
| 54 |
-
title = topic_config.get("title", request.topic)
|
| 55 |
-
unit = topic_config.get("unit", "")
|
| 56 |
-
|
| 57 |
-
update_job(job_id, "processing", 10)
|
| 58 |
-
logger.info(f"Fetching data for topic: {request.topic}")
|
| 59 |
-
|
| 60 |
-
# Fetch data
|
| 61 |
-
data_fetcher = DataFetcher()
|
| 62 |
-
all_data = data_fetcher.fetch_data(
|
| 63 |
-
topic_id=request.topic,
|
| 64 |
-
year_start=request.year_start,
|
| 65 |
-
year_end=request.year_end,
|
| 66 |
-
top_n=request.top_n
|
| 67 |
-
)
|
| 68 |
|
| 69 |
-
|
| 70 |
-
|
| 71 |
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
|
| 87 |
-
|
| 88 |
-
|
| 89 |
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
|
|
|
|
|
|
| 93 |
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
)
|
| 99 |
|
| 100 |
-
|
|
|
|
| 101 |
|
| 102 |
-
|
| 103 |
-
video_url = f"/api/bar-race/video/{job_id}"
|
| 104 |
|
| 105 |
-
#
|
| 106 |
-
|
| 107 |
|
| 108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
logger.info(f"Bar race video ready: {video_url}")
|
| 110 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
except Exception as e:
|
| 112 |
logger.error(f"Bar race generation failed: {e}")
|
| 113 |
logger.error(traceback.format_exc())
|
| 114 |
update_job(job_id, "failed", error=str(e))
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
if os.path.exists(temp_dir):
|
| 119 |
-
import shutil
|
| 120 |
-
shutil.rmtree(temp_dir, ignore_errors=True)
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
@router.get("/topics")
|
| 124 |
-
async def get_available_topics():
|
| 125 |
-
"""Get list of available topics"""
|
| 126 |
-
return {
|
| 127 |
-
"topics": list_topics(),
|
| 128 |
-
"total": len(TOPICS)
|
| 129 |
-
}
|
| 130 |
|
| 131 |
|
| 132 |
@router.post("/generate", response_model=JobResponse)
|
|
@@ -134,19 +166,16 @@ async def generate_bar_race(request: BarRaceRequest, background_tasks: Backgroun
|
|
| 134 |
"""
|
| 135 |
Generate a bar chart race video.
|
| 136 |
|
| 137 |
-
|
| 138 |
"""
|
| 139 |
job_id = str(uuid.uuid4())[:8]
|
| 140 |
|
| 141 |
-
# Validate year range
|
| 142 |
-
if request.year_start >= request.year_end:
|
| 143 |
-
raise HTTPException(400, "year_start must be less than year_end")
|
| 144 |
-
|
| 145 |
# Initialize job
|
| 146 |
jobs[job_id] = {
|
| 147 |
"job_id": job_id,
|
| 148 |
"status": "queued",
|
| 149 |
"progress": 0,
|
|
|
|
| 150 |
"video_url": None,
|
| 151 |
"error": None
|
| 152 |
}
|
|
@@ -157,7 +186,7 @@ async def generate_bar_race(request: BarRaceRequest, background_tasks: Backgroun
|
|
| 157 |
return JobResponse(
|
| 158 |
job_id=job_id,
|
| 159 |
status="queued",
|
| 160 |
-
message=f"Bar race
|
| 161 |
)
|
| 162 |
|
| 163 |
|
|
|
|
| 5 |
import logging
|
| 6 |
import os
|
| 7 |
import uuid
|
| 8 |
+
import shutil
|
| 9 |
import traceback
|
| 10 |
from typing import Dict
|
| 11 |
from fastapi import APIRouter, BackgroundTasks, HTTPException
|
| 12 |
+
from fastapi.responses import FileResponse, RedirectResponse
|
| 13 |
|
| 14 |
+
from .schemas import BarRaceRequest, JobResponse, JobStatus
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
logger = logging.getLogger(__name__)
|
| 17 |
|
|
|
|
| 21 |
jobs: Dict[str, dict] = {}
|
| 22 |
|
| 23 |
|
| 24 |
+
def update_job(
|
| 25 |
+
job_id: str,
|
| 26 |
+
status: str,
|
| 27 |
+
progress: int = 0,
|
| 28 |
+
current_step: str = None,
|
| 29 |
+
video_url: str = None,
|
| 30 |
+
error: str = None
|
| 31 |
+
):
|
| 32 |
"""Update job status"""
|
| 33 |
if job_id in jobs:
|
| 34 |
jobs[job_id].update({
|
| 35 |
"status": status,
|
| 36 |
"progress": progress,
|
| 37 |
+
"current_step": current_step,
|
| 38 |
"video_url": video_url,
|
| 39 |
"error": error
|
| 40 |
})
|
| 41 |
+
logger.debug(f"Job {job_id}: {status} ({progress}%) - {current_step}")
|
| 42 |
|
| 43 |
|
| 44 |
async def generate_bar_race_video(job_id: str, request: BarRaceRequest):
|
|
|
|
| 46 |
temp_dir = f"temp/bar_race_{job_id}"
|
| 47 |
|
| 48 |
try:
|
|
|
|
| 49 |
os.makedirs(temp_dir, exist_ok=True)
|
| 50 |
|
| 51 |
+
# Get API key from environment
|
| 52 |
+
gemini_api_key = os.getenv("GEMINI_API_KEY")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
+
# ============ BRAIN ============
|
| 55 |
+
update_job(job_id, "processing", 5, "Brain: Analyzing topic...")
|
| 56 |
|
| 57 |
+
from .services.brain import Brain
|
| 58 |
+
brain = Brain(gemini_api_key=gemini_api_key)
|
| 59 |
+
plan = brain.generate_plan(request.topic)
|
| 60 |
|
| 61 |
+
if not plan:
|
| 62 |
+
raise Exception("Brain failed to generate plan")
|
| 63 |
+
|
| 64 |
+
logger.info(f"Brain: Generated plan for entity_type={plan.get('entity_type')}")
|
| 65 |
+
|
| 66 |
+
# ============ SCOUT ============
|
| 67 |
+
update_job(job_id, "processing", 15, "Scout: Fetching data...")
|
| 68 |
+
|
| 69 |
+
from .services.scout import Scout
|
| 70 |
+
scout = Scout(temp_dir=temp_dir)
|
| 71 |
+
raw_df = scout.fetch_data(plan)
|
| 72 |
+
|
| 73 |
+
if raw_df is None or raw_df.empty:
|
| 74 |
+
raise Exception("Scout failed to fetch data")
|
| 75 |
+
|
| 76 |
+
logger.info(f"Scout: Fetched {len(raw_df)} rows")
|
| 77 |
+
|
| 78 |
+
# ============ SURGEON ============
|
| 79 |
+
update_job(job_id, "processing", 35, "Surgeon: Cleaning data...")
|
| 80 |
+
|
| 81 |
+
from .services.surgeon import Surgeon
|
| 82 |
+
surgeon = Surgeon(temp_dir=temp_dir)
|
| 83 |
+
clean_df = surgeon.clean_data(raw_df, plan)
|
| 84 |
+
|
| 85 |
+
if clean_df is None or clean_df.empty:
|
| 86 |
+
raise Exception("Surgeon failed to clean data")
|
| 87 |
+
|
| 88 |
+
logger.info(f"Surgeon: Cleaned data, {len(clean_df)} rows, {clean_df['name'].nunique()} entities")
|
| 89 |
|
| 90 |
+
# ============ ARTIST ============
|
| 91 |
+
update_job(job_id, "processing", 50, "Artist: Processing images...")
|
| 92 |
|
| 93 |
+
from .services.artist import Artist
|
| 94 |
+
artist = Artist(temp_dir=temp_dir)
|
| 95 |
+
entities = clean_df["name"].unique().tolist()
|
| 96 |
+
entity_type = plan.get("entity_type", "general")
|
| 97 |
+
image_paths = artist.process_entities(entities, entity_type)
|
| 98 |
|
| 99 |
+
logger.info(f"Artist: Processed {len(image_paths)} images")
|
| 100 |
+
|
| 101 |
+
# ============ DIRECTOR ============
|
| 102 |
+
update_job(job_id, "processing", 65, "Director: Generating video...")
|
| 103 |
+
|
| 104 |
+
from .services.director import Director
|
| 105 |
+
director = Director(temp_dir=temp_dir)
|
| 106 |
+
video_path = director.generate_video(
|
| 107 |
+
df=clean_df,
|
| 108 |
+
plan=plan,
|
| 109 |
+
image_paths=image_paths,
|
| 110 |
+
duration_seconds=request.duration_seconds,
|
| 111 |
+
job_id=job_id
|
| 112 |
)
|
| 113 |
|
| 114 |
+
if not video_path or not os.path.exists(video_path):
|
| 115 |
+
raise Exception("Director failed to generate video")
|
| 116 |
|
| 117 |
+
logger.info(f"Director: Generated video at {video_path}")
|
|
|
|
| 118 |
|
| 119 |
+
# ============ UPLOAD TO HF ============
|
| 120 |
+
update_job(job_id, "processing", 85, "Uploading to cloud storage...")
|
| 121 |
|
| 122 |
+
video_url = None
|
| 123 |
+
try:
|
| 124 |
+
from modules.shared.services.hf_storage import get_hf_storage
|
| 125 |
+
hf_storage = get_hf_storage()
|
| 126 |
+
|
| 127 |
+
if hf_storage and hf_storage.enabled:
|
| 128 |
+
# Upload video
|
| 129 |
+
uploaded_url = hf_storage.upload_file(
|
| 130 |
+
local_path=video_path,
|
| 131 |
+
remote_path=f"bar_race/{job_id}.mp4"
|
| 132 |
+
)
|
| 133 |
+
if uploaded_url:
|
| 134 |
+
video_url = uploaded_url
|
| 135 |
+
logger.info(f"Uploaded to HF: {video_url}")
|
| 136 |
+
except Exception as e:
|
| 137 |
+
logger.warning(f"HF upload failed, using local: {e}")
|
| 138 |
+
|
| 139 |
+
# Fallback to local URL
|
| 140 |
+
if not video_url:
|
| 141 |
+
video_url = f"/api/bar-race/video/{job_id}"
|
| 142 |
+
|
| 143 |
+
# ============ SUCCESS ============
|
| 144 |
+
update_job(job_id, "ready", 100, "Complete", video_url=video_url)
|
| 145 |
logger.info(f"Bar race video ready: {video_url}")
|
| 146 |
|
| 147 |
+
# Cleanup temp files (only on success)
|
| 148 |
+
try:
|
| 149 |
+
if os.path.exists(temp_dir):
|
| 150 |
+
shutil.rmtree(temp_dir)
|
| 151 |
+
logger.info(f"Cleaned up temp directory: {temp_dir}")
|
| 152 |
+
except Exception as e:
|
| 153 |
+
logger.warning(f"Cleanup failed: {e}")
|
| 154 |
+
|
| 155 |
except Exception as e:
|
| 156 |
logger.error(f"Bar race generation failed: {e}")
|
| 157 |
logger.error(traceback.format_exc())
|
| 158 |
update_job(job_id, "failed", error=str(e))
|
| 159 |
+
|
| 160 |
+
# Keep temp files for debugging on failure
|
| 161 |
+
logger.info(f"Keeping temp directory for debugging: {temp_dir}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
|
| 163 |
|
| 164 |
@router.post("/generate", response_model=JobResponse)
|
|
|
|
| 166 |
"""
|
| 167 |
Generate a bar chart race video.
|
| 168 |
|
| 169 |
+
Takes a topic and duration, returns job_id to track progress.
|
| 170 |
"""
|
| 171 |
job_id = str(uuid.uuid4())[:8]
|
| 172 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
# Initialize job
|
| 174 |
jobs[job_id] = {
|
| 175 |
"job_id": job_id,
|
| 176 |
"status": "queued",
|
| 177 |
"progress": 0,
|
| 178 |
+
"current_step": "Initializing...",
|
| 179 |
"video_url": None,
|
| 180 |
"error": None
|
| 181 |
}
|
|
|
|
| 186 |
return JobResponse(
|
| 187 |
job_id=job_id,
|
| 188 |
status="queued",
|
| 189 |
+
message=f"Bar race generation started for topic: {request.topic}"
|
| 190 |
)
|
| 191 |
|
| 192 |
|
modules/bar_race/schemas.py
CHANGED
|
@@ -3,40 +3,43 @@ Bar Race Schemas
|
|
| 3 |
Pydantic models for bar chart race video generation.
|
| 4 |
"""
|
| 5 |
from pydantic import BaseModel, Field
|
| 6 |
-
from typing import Optional, List
|
| 7 |
from enum import Enum
|
| 8 |
|
| 9 |
|
| 10 |
-
class
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
SPORTS = "sports"
|
| 17 |
-
GEOPOLITICS = "geopolitics"
|
| 18 |
-
ENVIRONMENT = "environment"
|
| 19 |
-
UNIQUE = "unique"
|
| 20 |
|
| 21 |
|
| 22 |
class BarRaceRequest(BaseModel):
|
| 23 |
"""Request to generate a bar chart race video"""
|
| 24 |
-
topic: str = Field(..., description="Topic
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
|
| 42 |
class JobResponse(BaseModel):
|
|
@@ -49,7 +52,8 @@ class JobResponse(BaseModel):
|
|
| 49 |
class JobStatus(BaseModel):
|
| 50 |
"""Job status response"""
|
| 51 |
job_id: str
|
| 52 |
-
status: str # queued,
|
| 53 |
progress: int = 0
|
|
|
|
| 54 |
video_url: Optional[str] = None
|
| 55 |
error: Optional[str] = None
|
|
|
|
| 3 |
Pydantic models for bar chart race video generation.
|
| 4 |
"""
|
| 5 |
from pydantic import BaseModel, Field
|
| 6 |
+
from typing import Optional, List, Dict, Any
|
| 7 |
from enum import Enum
|
| 8 |
|
| 9 |
|
| 10 |
+
class EntityType(str, Enum):
|
| 11 |
+
"""Type of entities in the bar chart"""
|
| 12 |
+
PERSON = "person"
|
| 13 |
+
COUNTRY = "country"
|
| 14 |
+
COMPANY = "company"
|
| 15 |
+
GENERAL = "general"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
|
| 18 |
class BarRaceRequest(BaseModel):
|
| 19 |
"""Request to generate a bar chart race video"""
|
| 20 |
+
topic: str = Field(..., description="Topic/prompt for video (e.g., 'Top 10 richest cricketers')")
|
| 21 |
+
duration_seconds: int = Field(60, ge=30, le=120, description="Video duration in seconds")
|
| 22 |
+
|
| 23 |
+
class Config:
|
| 24 |
+
json_schema_extra = {
|
| 25 |
+
"example": {
|
| 26 |
+
"topic": "Top 10 richest countries by GDP 2000-2024",
|
| 27 |
+
"duration_seconds": 60
|
| 28 |
+
}
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
class BrainPlan(BaseModel):
|
| 33 |
+
"""JSON plan generated by Brain (LLM)"""
|
| 34 |
+
topic: str
|
| 35 |
+
entity_type: EntityType
|
| 36 |
+
time_config: Dict[str, Any]
|
| 37 |
+
value_intent: Dict[str, Any]
|
| 38 |
+
search_strategies: List[Dict[str, Any]]
|
| 39 |
+
source_priority: List[str]
|
| 40 |
+
data_expectation: Dict[str, Any]
|
| 41 |
+
visualization: Dict[str, Any]
|
| 42 |
+
video_meta: Dict[str, Any]
|
| 43 |
|
| 44 |
|
| 45 |
class JobResponse(BaseModel):
|
|
|
|
| 52 |
class JobStatus(BaseModel):
|
| 53 |
"""Job status response"""
|
| 54 |
job_id: str
|
| 55 |
+
status: str # queued, brain, scout, surgeon, artist, director, uploading, ready, failed
|
| 56 |
progress: int = 0
|
| 57 |
+
current_step: Optional[str] = None
|
| 58 |
video_url: Optional[str] = None
|
| 59 |
error: Optional[str] = None
|
modules/bar_race/services/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
# Services
|
|
|
|
| 1 |
+
# Services package
|
modules/bar_race/services/artist.py
ADDED
|
@@ -0,0 +1,301 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Artist - Image Processor
|
| 3 |
+
Downloads and processes entity images for bar chart race.
|
| 4 |
+
"""
|
| 5 |
+
import logging
|
| 6 |
+
import requests
|
| 7 |
+
import os
|
| 8 |
+
from PIL import Image, ImageDraw
|
| 9 |
+
from typing import Dict, Any, List, Optional
|
| 10 |
+
from io import BytesIO
|
| 11 |
+
|
| 12 |
+
logger = logging.getLogger(__name__)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class Artist:
|
| 16 |
+
"""
|
| 17 |
+
Image Processor for Bar Race video generation.
|
| 18 |
+
|
| 19 |
+
Responsibilities:
|
| 20 |
+
- Search and download entity images
|
| 21 |
+
- Background removal (optional, if rembg available)
|
| 22 |
+
- Face detection for person entities
|
| 23 |
+
- Circular mask application
|
| 24 |
+
"""
|
| 25 |
+
|
| 26 |
+
HEADERS = {
|
| 27 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
# Image size for bar chart
|
| 31 |
+
IMAGE_SIZE = 80
|
| 32 |
+
|
| 33 |
+
def __init__(self, temp_dir: str):
|
| 34 |
+
self.temp_dir = temp_dir
|
| 35 |
+
self.images_dir = os.path.join(temp_dir, "images")
|
| 36 |
+
os.makedirs(self.images_dir, exist_ok=True)
|
| 37 |
+
|
| 38 |
+
# Check if rembg is available
|
| 39 |
+
self.rembg_available = False
|
| 40 |
+
try:
|
| 41 |
+
import rembg
|
| 42 |
+
self.rembg_available = True
|
| 43 |
+
logger.info("Artist: rembg available for background removal")
|
| 44 |
+
except ImportError:
|
| 45 |
+
logger.info("Artist: rembg not available, skipping background removal")
|
| 46 |
+
|
| 47 |
+
def process_entities(self, entities: List[str], entity_type: str) -> Dict[str, str]:
|
| 48 |
+
"""
|
| 49 |
+
Download and process images for all entities.
|
| 50 |
+
|
| 51 |
+
Args:
|
| 52 |
+
entities: List of entity names
|
| 53 |
+
entity_type: Type of entity (person, country, company, general)
|
| 54 |
+
|
| 55 |
+
Returns:
|
| 56 |
+
Dict mapping entity name to processed image path
|
| 57 |
+
"""
|
| 58 |
+
logger.info(f"Artist: Processing images for {len(entities)} entities (type: {entity_type})")
|
| 59 |
+
|
| 60 |
+
image_paths = {}
|
| 61 |
+
|
| 62 |
+
for entity in entities:
|
| 63 |
+
try:
|
| 64 |
+
image_path = self._process_entity(entity, entity_type)
|
| 65 |
+
if image_path:
|
| 66 |
+
image_paths[entity] = image_path
|
| 67 |
+
logger.debug(f"Artist: Processed image for {entity}")
|
| 68 |
+
else:
|
| 69 |
+
logger.warning(f"Artist: No image found for {entity}")
|
| 70 |
+
except Exception as e:
|
| 71 |
+
logger.warning(f"Artist: Failed to process {entity}: {e}")
|
| 72 |
+
|
| 73 |
+
logger.info(f"Artist: Processed {len(image_paths)}/{len(entities)} images")
|
| 74 |
+
return image_paths
|
| 75 |
+
|
| 76 |
+
def _process_entity(self, entity: str, entity_type: str) -> Optional[str]:
|
| 77 |
+
"""Process a single entity's image"""
|
| 78 |
+
# Try to get image
|
| 79 |
+
image = self._get_image(entity, entity_type)
|
| 80 |
+
|
| 81 |
+
if image is None:
|
| 82 |
+
return None
|
| 83 |
+
|
| 84 |
+
# Process image
|
| 85 |
+
try:
|
| 86 |
+
# Resize to square
|
| 87 |
+
image = image.convert("RGBA")
|
| 88 |
+
image = self._resize_to_square(image)
|
| 89 |
+
|
| 90 |
+
# Remove background if rembg available and it's a person
|
| 91 |
+
if self.rembg_available and entity_type == "person":
|
| 92 |
+
image = self._remove_background(image)
|
| 93 |
+
|
| 94 |
+
# Apply circular mask
|
| 95 |
+
image = self._apply_circular_mask(image)
|
| 96 |
+
|
| 97 |
+
# Save processed image
|
| 98 |
+
safe_name = "".join(c if c.isalnum() else "_" for c in entity)
|
| 99 |
+
output_path = os.path.join(self.images_dir, f"{safe_name}.png")
|
| 100 |
+
image.save(output_path, "PNG")
|
| 101 |
+
|
| 102 |
+
return output_path
|
| 103 |
+
|
| 104 |
+
except Exception as e:
|
| 105 |
+
logger.error(f"Artist: Error processing image for {entity}: {e}")
|
| 106 |
+
return None
|
| 107 |
+
|
| 108 |
+
def _get_image(self, entity: str, entity_type: str) -> Optional[Image.Image]:
|
| 109 |
+
"""Get image for an entity"""
|
| 110 |
+
|
| 111 |
+
# Priority 1: Wikipedia Commons
|
| 112 |
+
image = self._search_wikipedia_commons(entity, entity_type)
|
| 113 |
+
if image:
|
| 114 |
+
return image
|
| 115 |
+
|
| 116 |
+
# Priority 2: DuckDuckGo image search
|
| 117 |
+
image = self._search_duckduckgo(entity, entity_type)
|
| 118 |
+
if image:
|
| 119 |
+
return image
|
| 120 |
+
|
| 121 |
+
# Priority 3: Generate placeholder
|
| 122 |
+
return self._generate_placeholder(entity)
|
| 123 |
+
|
| 124 |
+
def _search_wikipedia_commons(self, entity: str, entity_type: str) -> Optional[Image.Image]:
|
| 125 |
+
"""Search Wikipedia Commons for entity image"""
|
| 126 |
+
try:
|
| 127 |
+
# For countries, search for flag
|
| 128 |
+
if entity_type == "country":
|
| 129 |
+
search_query = f"Flag of {entity}"
|
| 130 |
+
else:
|
| 131 |
+
search_query = entity
|
| 132 |
+
|
| 133 |
+
# Wikipedia API search
|
| 134 |
+
search_url = "https://en.wikipedia.org/w/api.php"
|
| 135 |
+
params = {
|
| 136 |
+
"action": "query",
|
| 137 |
+
"titles": search_query,
|
| 138 |
+
"prop": "pageimages",
|
| 139 |
+
"format": "json",
|
| 140 |
+
"pithumbsize": 200
|
| 141 |
+
}
|
| 142 |
+
|
| 143 |
+
response = requests.get(search_url, params=params, headers=self.HEADERS, timeout=10)
|
| 144 |
+
if response.status_code == 200:
|
| 145 |
+
data = response.json()
|
| 146 |
+
pages = data.get("query", {}).get("pages", {})
|
| 147 |
+
|
| 148 |
+
for page_id, page_data in pages.items():
|
| 149 |
+
if "thumbnail" in page_data:
|
| 150 |
+
image_url = page_data["thumbnail"]["source"]
|
| 151 |
+
return self._download_image(image_url)
|
| 152 |
+
|
| 153 |
+
except Exception as e:
|
| 154 |
+
logger.debug(f"Artist: Wikipedia Commons search failed for {entity}: {e}")
|
| 155 |
+
|
| 156 |
+
return None
|
| 157 |
+
|
| 158 |
+
def _search_duckduckgo(self, entity: str, entity_type: str) -> Optional[Image.Image]:
|
| 159 |
+
"""Search DuckDuckGo for entity image"""
|
| 160 |
+
try:
|
| 161 |
+
from duckduckgo_search import DDGS
|
| 162 |
+
|
| 163 |
+
# Build search query
|
| 164 |
+
if entity_type == "country":
|
| 165 |
+
query = f"{entity} flag icon"
|
| 166 |
+
elif entity_type == "person":
|
| 167 |
+
query = f"{entity} portrait photo"
|
| 168 |
+
else:
|
| 169 |
+
query = f"{entity} logo"
|
| 170 |
+
|
| 171 |
+
with DDGS() as ddgs:
|
| 172 |
+
results = list(ddgs.images(query, max_results=3))
|
| 173 |
+
|
| 174 |
+
for result in results:
|
| 175 |
+
image_url = result.get("image")
|
| 176 |
+
if image_url:
|
| 177 |
+
image = self._download_image(image_url)
|
| 178 |
+
if image:
|
| 179 |
+
return image
|
| 180 |
+
|
| 181 |
+
except ImportError:
|
| 182 |
+
logger.debug("Artist: duckduckgo-search not available")
|
| 183 |
+
except Exception as e:
|
| 184 |
+
logger.debug(f"Artist: DuckDuckGo search failed for {entity}: {e}")
|
| 185 |
+
|
| 186 |
+
return None
|
| 187 |
+
|
| 188 |
+
def _download_image(self, url: str) -> Optional[Image.Image]:
|
| 189 |
+
"""Download image from URL"""
|
| 190 |
+
try:
|
| 191 |
+
response = requests.get(url, headers=self.HEADERS, timeout=10)
|
| 192 |
+
if response.status_code == 200:
|
| 193 |
+
return Image.open(BytesIO(response.content))
|
| 194 |
+
except Exception as e:
|
| 195 |
+
logger.debug(f"Artist: Failed to download image: {e}")
|
| 196 |
+
|
| 197 |
+
return None
|
| 198 |
+
|
| 199 |
+
def _resize_to_square(self, image: Image.Image) -> Image.Image:
|
| 200 |
+
"""Resize image to square, center cropping if needed"""
|
| 201 |
+
width, height = image.size
|
| 202 |
+
|
| 203 |
+
# Determine crop box for square
|
| 204 |
+
if width > height:
|
| 205 |
+
left = (width - height) // 2
|
| 206 |
+
top = 0
|
| 207 |
+
right = left + height
|
| 208 |
+
bottom = height
|
| 209 |
+
else:
|
| 210 |
+
left = 0
|
| 211 |
+
top = (height - width) // 2
|
| 212 |
+
right = width
|
| 213 |
+
bottom = top + width
|
| 214 |
+
|
| 215 |
+
# Crop to square
|
| 216 |
+
image = image.crop((left, top, right, bottom))
|
| 217 |
+
|
| 218 |
+
# Resize to target size
|
| 219 |
+
image = image.resize((self.IMAGE_SIZE, self.IMAGE_SIZE), Image.Resampling.LANCZOS)
|
| 220 |
+
|
| 221 |
+
return image
|
| 222 |
+
|
| 223 |
+
def _remove_background(self, image: Image.Image) -> Image.Image:
|
| 224 |
+
"""Remove background using rembg"""
|
| 225 |
+
try:
|
| 226 |
+
import rembg
|
| 227 |
+
|
| 228 |
+
# Convert to bytes
|
| 229 |
+
img_bytes = BytesIO()
|
| 230 |
+
image.save(img_bytes, format="PNG")
|
| 231 |
+
img_bytes.seek(0)
|
| 232 |
+
|
| 233 |
+
# Remove background
|
| 234 |
+
output = rembg.remove(img_bytes.getvalue())
|
| 235 |
+
|
| 236 |
+
return Image.open(BytesIO(output))
|
| 237 |
+
|
| 238 |
+
except Exception as e:
|
| 239 |
+
logger.warning(f"Artist: Background removal failed: {e}")
|
| 240 |
+
return image
|
| 241 |
+
|
| 242 |
+
def _apply_circular_mask(self, image: Image.Image) -> Image.Image:
|
| 243 |
+
"""Apply circular mask to image"""
|
| 244 |
+
# Ensure RGBA
|
| 245 |
+
if image.mode != "RGBA":
|
| 246 |
+
image = image.convert("RGBA")
|
| 247 |
+
|
| 248 |
+
size = image.size[0]
|
| 249 |
+
|
| 250 |
+
# Create circular mask
|
| 251 |
+
mask = Image.new("L", (size, size), 0)
|
| 252 |
+
draw = ImageDraw.Draw(mask)
|
| 253 |
+
draw.ellipse((0, 0, size, size), fill=255)
|
| 254 |
+
|
| 255 |
+
# Apply mask
|
| 256 |
+
output = Image.new("RGBA", (size, size), (0, 0, 0, 0))
|
| 257 |
+
output.paste(image, (0, 0), mask)
|
| 258 |
+
|
| 259 |
+
return output
|
| 260 |
+
|
| 261 |
+
def _generate_placeholder(self, entity: str) -> Image.Image:
|
| 262 |
+
"""Generate a placeholder image with entity initial"""
|
| 263 |
+
size = self.IMAGE_SIZE
|
| 264 |
+
|
| 265 |
+
# Create colored background
|
| 266 |
+
colors = [
|
| 267 |
+
(74, 222, 128), # Green
|
| 268 |
+
(251, 191, 36), # Yellow
|
| 269 |
+
(239, 68, 68), # Red
|
| 270 |
+
(59, 130, 246), # Blue
|
| 271 |
+
(168, 85, 247), # Purple
|
| 272 |
+
(20, 184, 166), # Teal
|
| 273 |
+
]
|
| 274 |
+
|
| 275 |
+
# Pick color based on entity name hash
|
| 276 |
+
color = colors[hash(entity) % len(colors)]
|
| 277 |
+
|
| 278 |
+
# Create image
|
| 279 |
+
image = Image.new("RGBA", (size, size), color)
|
| 280 |
+
draw = ImageDraw.Draw(image)
|
| 281 |
+
|
| 282 |
+
# Draw initial
|
| 283 |
+
initial = entity[0].upper() if entity else "?"
|
| 284 |
+
|
| 285 |
+
# Use default font
|
| 286 |
+
try:
|
| 287 |
+
from PIL import ImageFont
|
| 288 |
+
font = ImageFont.truetype("arial.ttf", size // 2)
|
| 289 |
+
except:
|
| 290 |
+
font = ImageFont.load_default()
|
| 291 |
+
|
| 292 |
+
# Center text
|
| 293 |
+
bbox = draw.textbbox((0, 0), initial, font=font)
|
| 294 |
+
text_width = bbox[2] - bbox[0]
|
| 295 |
+
text_height = bbox[3] - bbox[1]
|
| 296 |
+
x = (size - text_width) // 2
|
| 297 |
+
y = (size - text_height) // 2 - bbox[1]
|
| 298 |
+
|
| 299 |
+
draw.text((x, y), initial, fill=(255, 255, 255), font=font)
|
| 300 |
+
|
| 301 |
+
return image
|
modules/bar_race/services/bar_composer.py
DELETED
|
@@ -1,91 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Bar Composer Service
|
| 3 |
-
Assembles bar race frames into final video.
|
| 4 |
-
No TTS - pure visual animation with optional background music.
|
| 5 |
-
"""
|
| 6 |
-
import logging
|
| 7 |
-
import os
|
| 8 |
-
import shutil
|
| 9 |
-
from pathlib import Path
|
| 10 |
-
from typing import List, Optional
|
| 11 |
-
from moviepy.editor import ImageSequenceClip
|
| 12 |
-
|
| 13 |
-
logger = logging.getLogger(__name__)
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
class BarComposer:
|
| 17 |
-
"""
|
| 18 |
-
Composes bar race video from frames.
|
| 19 |
-
- No TTS dependency
|
| 20 |
-
- Optional background music support
|
| 21 |
-
"""
|
| 22 |
-
|
| 23 |
-
FPS = 30
|
| 24 |
-
|
| 25 |
-
def __init__(self, output_dir: str = "videos/bar_race"):
|
| 26 |
-
self.output_dir = output_dir
|
| 27 |
-
os.makedirs(output_dir, exist_ok=True)
|
| 28 |
-
|
| 29 |
-
def compose_video(
|
| 30 |
-
self,
|
| 31 |
-
frame_paths: List[str],
|
| 32 |
-
output_name: str,
|
| 33 |
-
fps: int = 30,
|
| 34 |
-
music_path: Optional[str] = None
|
| 35 |
-
) -> str:
|
| 36 |
-
"""
|
| 37 |
-
Compose video from frame sequence.
|
| 38 |
-
|
| 39 |
-
Args:
|
| 40 |
-
frame_paths: List of frame image paths
|
| 41 |
-
output_name: Output video filename
|
| 42 |
-
fps: Frames per second
|
| 43 |
-
music_path: Optional background music path
|
| 44 |
-
|
| 45 |
-
Returns:
|
| 46 |
-
Path to output video file
|
| 47 |
-
"""
|
| 48 |
-
output_path = os.path.join(self.output_dir, output_name)
|
| 49 |
-
|
| 50 |
-
try:
|
| 51 |
-
logger.info(f"Composing video from {len(frame_paths)} frames...")
|
| 52 |
-
|
| 53 |
-
# Create video clip from frames
|
| 54 |
-
video_clip = ImageSequenceClip(frame_paths, fps=fps)
|
| 55 |
-
|
| 56 |
-
# Add background music if provided
|
| 57 |
-
if music_path and os.path.exists(music_path):
|
| 58 |
-
from moviepy.editor import AudioFileClip
|
| 59 |
-
audio = AudioFileClip(music_path)
|
| 60 |
-
# Loop or trim audio to match video duration
|
| 61 |
-
if audio.duration > video_clip.duration:
|
| 62 |
-
audio = audio.subclip(0, video_clip.duration)
|
| 63 |
-
video_clip = video_clip.set_audio(audio)
|
| 64 |
-
|
| 65 |
-
# Write video
|
| 66 |
-
logger.info(f"Writing video to {output_path}")
|
| 67 |
-
video_clip.write_videofile(
|
| 68 |
-
output_path,
|
| 69 |
-
fps=fps,
|
| 70 |
-
codec="libx264",
|
| 71 |
-
audio_codec="aac" if music_path else None,
|
| 72 |
-
preset="medium",
|
| 73 |
-
threads=4,
|
| 74 |
-
logger=None
|
| 75 |
-
)
|
| 76 |
-
|
| 77 |
-
# Cleanup
|
| 78 |
-
video_clip.close()
|
| 79 |
-
|
| 80 |
-
logger.info(f"Bar race video complete: {output_path}")
|
| 81 |
-
return output_path
|
| 82 |
-
|
| 83 |
-
except Exception as e:
|
| 84 |
-
logger.error(f"Failed to compose video: {e}")
|
| 85 |
-
raise
|
| 86 |
-
|
| 87 |
-
def cleanup_frames(self, frame_dir: str):
|
| 88 |
-
"""Remove temporary frame directory"""
|
| 89 |
-
if os.path.exists(frame_dir):
|
| 90 |
-
shutil.rmtree(frame_dir)
|
| 91 |
-
logger.info(f"Cleaned up frames: {frame_dir}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
modules/bar_race/services/bar_frame.py
DELETED
|
@@ -1,301 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Bar Frame Generator
|
| 3 |
-
Creates animated bar chart race frames using Pillow.
|
| 4 |
-
"""
|
| 5 |
-
import logging
|
| 6 |
-
import os
|
| 7 |
-
from PIL import Image, ImageDraw, ImageFont
|
| 8 |
-
from typing import Dict, List, Tuple
|
| 9 |
-
|
| 10 |
-
logger = logging.getLogger(__name__)
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
class BarFrameGenerator:
|
| 14 |
-
"""
|
| 15 |
-
Generates frames for bar chart race animation.
|
| 16 |
-
- Horizontal racing bars
|
| 17 |
-
- Smooth interpolation between years
|
| 18 |
-
- Year counter display
|
| 19 |
-
"""
|
| 20 |
-
|
| 21 |
-
# Canvas dimensions (9:16 vertical)
|
| 22 |
-
WIDTH = 1080
|
| 23 |
-
HEIGHT = 1920
|
| 24 |
-
|
| 25 |
-
# Colors
|
| 26 |
-
BG_COLOR = (18, 18, 28) # Dark navy
|
| 27 |
-
TITLE_COLOR = (255, 255, 255) # White
|
| 28 |
-
YEAR_COLOR = (100, 100, 120) # Muted gray
|
| 29 |
-
BAR_LABEL_COLOR = (255, 255, 255) # White
|
| 30 |
-
VALUE_COLOR = (200, 200, 200) # Light gray
|
| 31 |
-
|
| 32 |
-
# Bar colors (will cycle through these)
|
| 33 |
-
BAR_COLORS = [
|
| 34 |
-
(74, 222, 128), # Green
|
| 35 |
-
(251, 191, 36), # Yellow
|
| 36 |
-
(239, 68, 68), # Red
|
| 37 |
-
(59, 130, 246), # Blue
|
| 38 |
-
(168, 85, 247), # Purple
|
| 39 |
-
(20, 184, 166), # Teal
|
| 40 |
-
(249, 115, 22), # Orange
|
| 41 |
-
(236, 72, 153), # Pink
|
| 42 |
-
(34, 197, 94), # Emerald
|
| 43 |
-
(99, 102, 241), # Indigo
|
| 44 |
-
]
|
| 45 |
-
|
| 46 |
-
# Layout
|
| 47 |
-
TITLE_Y = 80
|
| 48 |
-
YEAR_Y = 1700 # Large year at bottom
|
| 49 |
-
BAR_START_Y = 200
|
| 50 |
-
BAR_HEIGHT = 80
|
| 51 |
-
BAR_GAP = 30
|
| 52 |
-
BAR_MAX_WIDTH = 900
|
| 53 |
-
BAR_X_START = 160
|
| 54 |
-
|
| 55 |
-
def __init__(self):
|
| 56 |
-
self._load_fonts()
|
| 57 |
-
self.entity_colors = {} # Cache colors for entities
|
| 58 |
-
|
| 59 |
-
def _load_fonts(self):
|
| 60 |
-
"""Load fonts with fallbacks"""
|
| 61 |
-
font_paths = [
|
| 62 |
-
"C:/Windows/Fonts/arial.ttf",
|
| 63 |
-
"C:/Windows/Fonts/ArialBD.ttf",
|
| 64 |
-
"/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
|
| 65 |
-
]
|
| 66 |
-
|
| 67 |
-
self.font_title = None
|
| 68 |
-
self.font_label = None
|
| 69 |
-
self.font_value = None
|
| 70 |
-
self.font_year = None
|
| 71 |
-
|
| 72 |
-
for path in font_paths:
|
| 73 |
-
if os.path.exists(path):
|
| 74 |
-
try:
|
| 75 |
-
self.font_title = ImageFont.truetype(path, 52)
|
| 76 |
-
self.font_label = ImageFont.truetype(path, 32)
|
| 77 |
-
self.font_value = ImageFont.truetype(path, 28)
|
| 78 |
-
self.font_year = ImageFont.truetype(path, 200)
|
| 79 |
-
logger.info(f"Loaded font: {path}")
|
| 80 |
-
break
|
| 81 |
-
except Exception as e:
|
| 82 |
-
logger.warning(f"Failed to load font {path}: {e}")
|
| 83 |
-
|
| 84 |
-
if not self.font_title:
|
| 85 |
-
self.font_title = ImageFont.load_default()
|
| 86 |
-
self.font_label = ImageFont.load_default()
|
| 87 |
-
self.font_value = ImageFont.load_default()
|
| 88 |
-
self.font_year = ImageFont.load_default()
|
| 89 |
-
logger.warning("Using default font")
|
| 90 |
-
|
| 91 |
-
def _get_entity_color(self, entity: str) -> Tuple[int, int, int]:
|
| 92 |
-
"""Get consistent color for an entity"""
|
| 93 |
-
if entity not in self.entity_colors:
|
| 94 |
-
color_index = len(self.entity_colors) % len(self.BAR_COLORS)
|
| 95 |
-
self.entity_colors[entity] = self.BAR_COLORS[color_index]
|
| 96 |
-
return self.entity_colors[entity]
|
| 97 |
-
|
| 98 |
-
def _draw_rounded_rect(self, draw: ImageDraw, bbox: Tuple, fill: Tuple, radius: int = 15):
|
| 99 |
-
"""Draw a rounded rectangle"""
|
| 100 |
-
draw.rounded_rectangle(bbox, radius=radius, fill=fill)
|
| 101 |
-
|
| 102 |
-
def create_frame(
|
| 103 |
-
self,
|
| 104 |
-
title: str,
|
| 105 |
-
unit: str,
|
| 106 |
-
year: float, # Can be fractional for smooth animation
|
| 107 |
-
bars_data: List[Dict], # [{name, value}, ...]
|
| 108 |
-
max_value: float
|
| 109 |
-
) -> Image.Image:
|
| 110 |
-
"""
|
| 111 |
-
Create a single frame of the bar chart race.
|
| 112 |
-
|
| 113 |
-
Args:
|
| 114 |
-
title: Chart title
|
| 115 |
-
unit: Value unit (e.g., "Trillion USD")
|
| 116 |
-
year: Current year (can be fractional)
|
| 117 |
-
bars_data: List of {name, value} sorted by value descending
|
| 118 |
-
max_value: Maximum value for scaling
|
| 119 |
-
"""
|
| 120 |
-
img = Image.new('RGB', (self.WIDTH, self.HEIGHT), self.BG_COLOR)
|
| 121 |
-
draw = ImageDraw.Draw(img)
|
| 122 |
-
|
| 123 |
-
# Title
|
| 124 |
-
title_text = title
|
| 125 |
-
bbox = draw.textbbox((0, 0), title_text, font=self.font_title)
|
| 126 |
-
title_x = (self.WIDTH - (bbox[2] - bbox[0])) // 2
|
| 127 |
-
draw.text((title_x, self.TITLE_Y), title_text, fill=self.TITLE_COLOR, font=self.font_title)
|
| 128 |
-
|
| 129 |
-
# Year (large, at bottom)
|
| 130 |
-
year_text = str(int(year))
|
| 131 |
-
bbox = draw.textbbox((0, 0), year_text, font=self.font_year)
|
| 132 |
-
year_x = (self.WIDTH - (bbox[2] - bbox[0])) // 2
|
| 133 |
-
draw.text((year_x, self.YEAR_Y), year_text, fill=self.YEAR_COLOR, font=self.font_year)
|
| 134 |
-
|
| 135 |
-
# Draw bars
|
| 136 |
-
for i, bar in enumerate(bars_data[:10]): # Max 10 bars
|
| 137 |
-
y = self.BAR_START_Y + i * (self.BAR_HEIGHT + self.BAR_GAP)
|
| 138 |
-
|
| 139 |
-
# Calculate bar width
|
| 140 |
-
bar_width = int((bar["value"] / max_value) * self.BAR_MAX_WIDTH)
|
| 141 |
-
bar_width = max(50, bar_width) # Minimum width
|
| 142 |
-
|
| 143 |
-
# Get color
|
| 144 |
-
color = self._get_entity_color(bar["name"])
|
| 145 |
-
|
| 146 |
-
# Draw bar
|
| 147 |
-
self._draw_rounded_rect(
|
| 148 |
-
draw,
|
| 149 |
-
(self.BAR_X_START, y, self.BAR_X_START + bar_width, y + self.BAR_HEIGHT),
|
| 150 |
-
color,
|
| 151 |
-
radius=10
|
| 152 |
-
)
|
| 153 |
-
|
| 154 |
-
# Draw entity name (inside bar if fits, else to the left)
|
| 155 |
-
name_text = bar["name"]
|
| 156 |
-
name_bbox = draw.textbbox((0, 0), name_text, font=self.font_label)
|
| 157 |
-
name_width = name_bbox[2] - name_bbox[0]
|
| 158 |
-
|
| 159 |
-
if name_width < bar_width - 20:
|
| 160 |
-
# Inside bar
|
| 161 |
-
name_x = self.BAR_X_START + 15
|
| 162 |
-
else:
|
| 163 |
-
# To the left of bar
|
| 164 |
-
name_x = 10
|
| 165 |
-
|
| 166 |
-
name_y = y + (self.BAR_HEIGHT - (name_bbox[3] - name_bbox[1])) // 2
|
| 167 |
-
draw.text((name_x, name_y), name_text, fill=self.BAR_LABEL_COLOR, font=self.font_label)
|
| 168 |
-
|
| 169 |
-
# Draw value (to the right of bar)
|
| 170 |
-
value_text = f"{bar['value']:.1f} {unit}"
|
| 171 |
-
value_bbox = draw.textbbox((0, 0), value_text, font=self.font_value)
|
| 172 |
-
value_x = self.BAR_X_START + bar_width + 15
|
| 173 |
-
value_y = y + (self.BAR_HEIGHT - (value_bbox[3] - value_bbox[1])) // 2
|
| 174 |
-
draw.text((value_x, value_y), value_text, fill=self.VALUE_COLOR, font=self.font_value)
|
| 175 |
-
|
| 176 |
-
return img
|
| 177 |
-
|
| 178 |
-
def interpolate_data(
|
| 179 |
-
self,
|
| 180 |
-
data_start: List[Dict],
|
| 181 |
-
data_end: List[Dict],
|
| 182 |
-
progress: float # 0.0 to 1.0
|
| 183 |
-
) -> List[Dict]:
|
| 184 |
-
"""
|
| 185 |
-
Interpolate between two years of data for smooth animation.
|
| 186 |
-
"""
|
| 187 |
-
# Create lookup for end values
|
| 188 |
-
end_values = {d["name"]: d["value"] for d in data_end}
|
| 189 |
-
|
| 190 |
-
interpolated = []
|
| 191 |
-
for d in data_start:
|
| 192 |
-
name = d["name"]
|
| 193 |
-
start_val = d["value"]
|
| 194 |
-
end_val = end_values.get(name, start_val)
|
| 195 |
-
|
| 196 |
-
# Linear interpolation
|
| 197 |
-
current_val = start_val + (end_val - start_val) * progress
|
| 198 |
-
|
| 199 |
-
interpolated.append({
|
| 200 |
-
"name": name,
|
| 201 |
-
"value": current_val
|
| 202 |
-
})
|
| 203 |
-
|
| 204 |
-
# Sort by current value
|
| 205 |
-
interpolated.sort(key=lambda x: x["value"], reverse=True)
|
| 206 |
-
|
| 207 |
-
return interpolated
|
| 208 |
-
|
| 209 |
-
def generate_frames(
|
| 210 |
-
self,
|
| 211 |
-
title: str,
|
| 212 |
-
unit: str,
|
| 213 |
-
all_data: List[Dict], # [{name, year, value}, ...]
|
| 214 |
-
year_start: int,
|
| 215 |
-
year_end: int,
|
| 216 |
-
fps: int = 30,
|
| 217 |
-
duration_seconds: int = 60,
|
| 218 |
-
output_dir: str = "temp_frames"
|
| 219 |
-
) -> List[str]:
|
| 220 |
-
"""
|
| 221 |
-
Generate all frames for the bar chart race.
|
| 222 |
-
|
| 223 |
-
Returns list of frame file paths.
|
| 224 |
-
"""
|
| 225 |
-
os.makedirs(output_dir, exist_ok=True)
|
| 226 |
-
|
| 227 |
-
# Calculate frames per year
|
| 228 |
-
total_frames = fps * duration_seconds
|
| 229 |
-
years_count = year_end - year_start
|
| 230 |
-
frames_per_year = total_frames / years_count
|
| 231 |
-
|
| 232 |
-
# Find max value for consistent scaling
|
| 233 |
-
max_value = max(d["value"] for d in all_data) * 1.1 # 10% padding
|
| 234 |
-
|
| 235 |
-
# Group data by year
|
| 236 |
-
data_by_year = {}
|
| 237 |
-
for d in all_data:
|
| 238 |
-
year = d["year"]
|
| 239 |
-
if year not in data_by_year:
|
| 240 |
-
data_by_year[year] = []
|
| 241 |
-
data_by_year[year].append({"name": d["name"], "value": d["value"]})
|
| 242 |
-
|
| 243 |
-
# Sort each year's data
|
| 244 |
-
for year in data_by_year:
|
| 245 |
-
data_by_year[year].sort(key=lambda x: x["value"], reverse=True)
|
| 246 |
-
|
| 247 |
-
frame_paths = []
|
| 248 |
-
frame_num = 0
|
| 249 |
-
|
| 250 |
-
for year in range(year_start, year_end):
|
| 251 |
-
# Get data for current and next year
|
| 252 |
-
current_data = data_by_year.get(year, [])
|
| 253 |
-
next_data = data_by_year.get(year + 1, current_data)
|
| 254 |
-
|
| 255 |
-
# Generate frames for this year transition
|
| 256 |
-
frames_for_this_year = int(frames_per_year)
|
| 257 |
-
|
| 258 |
-
for f in range(frames_for_this_year):
|
| 259 |
-
progress = f / frames_for_this_year
|
| 260 |
-
|
| 261 |
-
# Interpolate data
|
| 262 |
-
interpolated = self.interpolate_data(current_data, next_data, progress)
|
| 263 |
-
|
| 264 |
-
# Calculate display year (fractional)
|
| 265 |
-
display_year = year + progress
|
| 266 |
-
|
| 267 |
-
# Create frame
|
| 268 |
-
frame = self.create_frame(
|
| 269 |
-
title=title,
|
| 270 |
-
unit=unit,
|
| 271 |
-
year=display_year,
|
| 272 |
-
bars_data=interpolated,
|
| 273 |
-
max_value=max_value
|
| 274 |
-
)
|
| 275 |
-
|
| 276 |
-
# Save frame
|
| 277 |
-
frame_path = os.path.join(output_dir, f"frame_{frame_num:05d}.png")
|
| 278 |
-
frame.save(frame_path)
|
| 279 |
-
frame_paths.append(frame_path)
|
| 280 |
-
frame_num += 1
|
| 281 |
-
|
| 282 |
-
if frame_num % 100 == 0:
|
| 283 |
-
logger.info(f"Generated {frame_num} frames...")
|
| 284 |
-
|
| 285 |
-
# Add final frames for end year
|
| 286 |
-
final_data = data_by_year.get(year_end, [])
|
| 287 |
-
for _ in range(fps): # 1 second on final year
|
| 288 |
-
frame = self.create_frame(
|
| 289 |
-
title=title,
|
| 290 |
-
unit=unit,
|
| 291 |
-
year=year_end,
|
| 292 |
-
bars_data=final_data,
|
| 293 |
-
max_value=max_value
|
| 294 |
-
)
|
| 295 |
-
frame_path = os.path.join(output_dir, f"frame_{frame_num:05d}.png")
|
| 296 |
-
frame.save(frame_path)
|
| 297 |
-
frame_paths.append(frame_path)
|
| 298 |
-
frame_num += 1
|
| 299 |
-
|
| 300 |
-
logger.info(f"Generated total {len(frame_paths)} frames")
|
| 301 |
-
return frame_paths
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
modules/bar_race/services/brain.py
ADDED
|
@@ -0,0 +1,225 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Brain - LLM Planner
|
| 3 |
+
Uses Gemini API to understand user topic and generate structured JSON plan.
|
| 4 |
+
"""
|
| 5 |
+
import logging
|
| 6 |
+
import json
|
| 7 |
+
import os
|
| 8 |
+
from typing import Dict, Any, Optional
|
| 9 |
+
|
| 10 |
+
logger = logging.getLogger(__name__)
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class Brain:
|
| 14 |
+
"""
|
| 15 |
+
LLM Planner for Bar Race video generation.
|
| 16 |
+
|
| 17 |
+
Responsibilities:
|
| 18 |
+
- Topic understanding & decomposition
|
| 19 |
+
- Entity type detection (person, country, company)
|
| 20 |
+
- Search strategy generation
|
| 21 |
+
- Data source priority determination
|
| 22 |
+
- Visualization config generation
|
| 23 |
+
"""
|
| 24 |
+
|
| 25 |
+
GEMINI_MODEL = "gemma-3-27b-it"
|
| 26 |
+
|
| 27 |
+
SYSTEM_PROMPT = """You are an expert data analyst and video planner. Your task is to analyze a user's topic and create a structured JSON plan for generating a bar chart race video.
|
| 28 |
+
|
| 29 |
+
Given a topic like "Top 10 richest cricketers history" or "GDP by country 2000-2024", you must output a JSON plan with:
|
| 30 |
+
|
| 31 |
+
1. entity_type: "person", "country", "company", or "general"
|
| 32 |
+
2. time_config: start_year, end_year, granularity (year/month)
|
| 33 |
+
3. value_intent: what values to track (net worth, GDP, population, etc.)
|
| 34 |
+
4. search_strategies: list of search queries to find data
|
| 35 |
+
5. source_priority: ["wikipedia_rest", "worldbank_api", "web_scraping"]
|
| 36 |
+
6. data_expectation: numeric, sparse, needs_interpolation
|
| 37 |
+
7. visualization: type, top_n, smooth
|
| 38 |
+
8. video_meta: title for the video
|
| 39 |
+
|
| 40 |
+
IMPORTANT: Output ONLY valid JSON, no other text."""
|
| 41 |
+
|
| 42 |
+
PLAN_TEMPLATE = """{
|
| 43 |
+
"topic": "{topic}",
|
| 44 |
+
"entity_type": "country",
|
| 45 |
+
"time_config": {
|
| 46 |
+
"start_year": 2000,
|
| 47 |
+
"end_year": 2024,
|
| 48 |
+
"granularity": "year"
|
| 49 |
+
},
|
| 50 |
+
"value_intent": {
|
| 51 |
+
"primary": "GDP",
|
| 52 |
+
"unit": "Trillion USD",
|
| 53 |
+
"alternatives": ["gross domestic product", "economic output"]
|
| 54 |
+
},
|
| 55 |
+
"search_strategies": [
|
| 56 |
+
{
|
| 57 |
+
"intent": "ranking_history",
|
| 58 |
+
"queries": [
|
| 59 |
+
"GDP by country by year wikipedia",
|
| 60 |
+
"world GDP ranking history table"
|
| 61 |
+
]
|
| 62 |
+
}
|
| 63 |
+
],
|
| 64 |
+
"source_priority": ["wikipedia_rest", "worldbank_api", "web_scraping"],
|
| 65 |
+
"data_expectation": {
|
| 66 |
+
"numeric": true,
|
| 67 |
+
"sparse": false,
|
| 68 |
+
"needs_interpolation": false
|
| 69 |
+
},
|
| 70 |
+
"visualization": {
|
| 71 |
+
"type": "bar_chart_race",
|
| 72 |
+
"top_n": 10,
|
| 73 |
+
"smooth": true
|
| 74 |
+
},
|
| 75 |
+
"video_meta": {
|
| 76 |
+
"title": "Top 10 Countries by GDP (2000-2024)"
|
| 77 |
+
}
|
| 78 |
+
}"""
|
| 79 |
+
|
| 80 |
+
def __init__(self, gemini_api_key: str = None):
|
| 81 |
+
self.gemini_api_key = gemini_api_key or os.getenv("GEMINI_API_KEY")
|
| 82 |
+
self.gemini_client = None
|
| 83 |
+
|
| 84 |
+
if self.gemini_api_key:
|
| 85 |
+
try:
|
| 86 |
+
from google import genai
|
| 87 |
+
self.gemini_client = genai.Client(api_key=self.gemini_api_key)
|
| 88 |
+
logger.info("Brain: Gemini client initialized")
|
| 89 |
+
except ImportError:
|
| 90 |
+
logger.warning("google-genai package not installed")
|
| 91 |
+
else:
|
| 92 |
+
logger.warning("Brain: No Gemini API key, will use template-based planning")
|
| 93 |
+
|
| 94 |
+
def generate_plan(self, topic: str) -> Dict[str, Any]:
|
| 95 |
+
"""
|
| 96 |
+
Generate a structured plan from user topic.
|
| 97 |
+
|
| 98 |
+
Args:
|
| 99 |
+
topic: User's topic string
|
| 100 |
+
|
| 101 |
+
Returns:
|
| 102 |
+
Dict containing the structured plan
|
| 103 |
+
"""
|
| 104 |
+
logger.info(f"Brain: Generating plan for topic: {topic}")
|
| 105 |
+
|
| 106 |
+
if self.gemini_client:
|
| 107 |
+
try:
|
| 108 |
+
plan = self._generate_with_gemini(topic)
|
| 109 |
+
if plan:
|
| 110 |
+
logger.info("Brain: Plan generated with Gemini")
|
| 111 |
+
return plan
|
| 112 |
+
except Exception as e:
|
| 113 |
+
logger.warning(f"Brain: Gemini failed, using fallback: {e}")
|
| 114 |
+
|
| 115 |
+
# Fallback: Template-based planning
|
| 116 |
+
return self._generate_fallback_plan(topic)
|
| 117 |
+
|
| 118 |
+
def _generate_with_gemini(self, topic: str) -> Optional[Dict[str, Any]]:
|
| 119 |
+
"""Generate plan using Gemini API"""
|
| 120 |
+
prompt = f"""Analyze this topic and create a JSON plan for a bar chart race video:
|
| 121 |
+
|
| 122 |
+
Topic: {topic}
|
| 123 |
+
|
| 124 |
+
{self.SYSTEM_PROMPT}
|
| 125 |
+
|
| 126 |
+
Output the JSON plan:"""
|
| 127 |
+
|
| 128 |
+
response = self.gemini_client.models.generate_content(
|
| 129 |
+
model=self.GEMINI_MODEL,
|
| 130 |
+
contents=prompt
|
| 131 |
+
)
|
| 132 |
+
|
| 133 |
+
# Parse JSON from response
|
| 134 |
+
text = response.text.strip()
|
| 135 |
+
|
| 136 |
+
# Extract JSON if wrapped in markdown
|
| 137 |
+
if "```json" in text:
|
| 138 |
+
text = text.split("```json")[1].split("```")[0].strip()
|
| 139 |
+
elif "```" in text:
|
| 140 |
+
text = text.split("```")[1].split("```")[0].strip()
|
| 141 |
+
|
| 142 |
+
try:
|
| 143 |
+
return json.loads(text)
|
| 144 |
+
except json.JSONDecodeError as e:
|
| 145 |
+
logger.error(f"Brain: Failed to parse Gemini response as JSON: {e}")
|
| 146 |
+
return None
|
| 147 |
+
|
| 148 |
+
def _generate_fallback_plan(self, topic: str) -> Dict[str, Any]:
|
| 149 |
+
"""Generate plan using simple heuristics when Gemini fails"""
|
| 150 |
+
logger.info("Brain: Using fallback template-based planning")
|
| 151 |
+
|
| 152 |
+
topic_lower = topic.lower()
|
| 153 |
+
|
| 154 |
+
# Detect entity type
|
| 155 |
+
if any(word in topic_lower for word in ["person", "cricketer", "player", "actor", "singer", "celebrity", "billionaire"]):
|
| 156 |
+
entity_type = "person"
|
| 157 |
+
elif any(word in topic_lower for word in ["country", "nation", "gdp", "population", "military"]):
|
| 158 |
+
entity_type = "country"
|
| 159 |
+
elif any(word in topic_lower for word in ["company", "brand", "corporation", "business"]):
|
| 160 |
+
entity_type = "company"
|
| 161 |
+
else:
|
| 162 |
+
entity_type = "general"
|
| 163 |
+
|
| 164 |
+
# Detect value intent
|
| 165 |
+
if "gdp" in topic_lower:
|
| 166 |
+
value_primary = "GDP"
|
| 167 |
+
value_unit = "Trillion USD"
|
| 168 |
+
elif "population" in topic_lower:
|
| 169 |
+
value_primary = "population"
|
| 170 |
+
value_unit = "Million"
|
| 171 |
+
elif "rich" in topic_lower or "wealth" in topic_lower or "net worth" in topic_lower:
|
| 172 |
+
value_primary = "net worth"
|
| 173 |
+
value_unit = "Billion USD"
|
| 174 |
+
elif "subscriber" in topic_lower:
|
| 175 |
+
value_primary = "subscribers"
|
| 176 |
+
value_unit = "Million"
|
| 177 |
+
else:
|
| 178 |
+
value_primary = "value"
|
| 179 |
+
value_unit = ""
|
| 180 |
+
|
| 181 |
+
# Generate search queries
|
| 182 |
+
search_queries = [
|
| 183 |
+
f"{topic} wikipedia",
|
| 184 |
+
f"{topic} by year table",
|
| 185 |
+
f"{topic} history data"
|
| 186 |
+
]
|
| 187 |
+
|
| 188 |
+
# Build plan
|
| 189 |
+
plan = {
|
| 190 |
+
"topic": topic,
|
| 191 |
+
"entity_type": entity_type,
|
| 192 |
+
"time_config": {
|
| 193 |
+
"start_year": 2000,
|
| 194 |
+
"end_year": 2024,
|
| 195 |
+
"granularity": "year"
|
| 196 |
+
},
|
| 197 |
+
"value_intent": {
|
| 198 |
+
"primary": value_primary,
|
| 199 |
+
"unit": value_unit,
|
| 200 |
+
"alternatives": []
|
| 201 |
+
},
|
| 202 |
+
"search_strategies": [
|
| 203 |
+
{
|
| 204 |
+
"intent": "ranking_history",
|
| 205 |
+
"queries": search_queries
|
| 206 |
+
}
|
| 207 |
+
],
|
| 208 |
+
"source_priority": ["wikipedia_rest", "worldbank_api", "web_scraping"],
|
| 209 |
+
"data_expectation": {
|
| 210 |
+
"numeric": True,
|
| 211 |
+
"sparse": True,
|
| 212 |
+
"needs_interpolation": True
|
| 213 |
+
},
|
| 214 |
+
"visualization": {
|
| 215 |
+
"type": "bar_chart_race",
|
| 216 |
+
"top_n": 10,
|
| 217 |
+
"smooth": True
|
| 218 |
+
},
|
| 219 |
+
"video_meta": {
|
| 220 |
+
"title": f"{topic} Evolution"
|
| 221 |
+
}
|
| 222 |
+
}
|
| 223 |
+
|
| 224 |
+
logger.info(f"Brain: Generated fallback plan for entity_type={entity_type}")
|
| 225 |
+
return plan
|
modules/bar_race/services/data_fetcher.py
DELETED
|
@@ -1,134 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Data Fetcher Service
|
| 3 |
-
Fetches and normalizes data for bar chart race.
|
| 4 |
-
Uses AI-generated realistic data for demo purposes.
|
| 5 |
-
"""
|
| 6 |
-
import logging
|
| 7 |
-
from typing import List, Dict, Optional
|
| 8 |
-
import random
|
| 9 |
-
|
| 10 |
-
logger = logging.getLogger(__name__)
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
class DataFetcher:
|
| 14 |
-
"""
|
| 15 |
-
Fetches data for bar chart race topics.
|
| 16 |
-
Uses AI-generated realistic data based on topic configuration.
|
| 17 |
-
"""
|
| 18 |
-
|
| 19 |
-
# Country data for various topics
|
| 20 |
-
COUNTRY_DATA = {
|
| 21 |
-
"gdp_nominal": {
|
| 22 |
-
"entities": ["USA", "China", "Japan", "Germany", "UK", "India", "France", "Italy", "Brazil", "Canada"],
|
| 23 |
-
"base_values": [10.0, 1.2, 4.5, 2.0, 1.5, 0.5, 1.4, 1.2, 0.6, 0.7],
|
| 24 |
-
"growth_rates": [0.03, 0.10, 0.02, 0.02, 0.02, 0.07, 0.02, 0.01, 0.03, 0.02],
|
| 25 |
-
},
|
| 26 |
-
"population": {
|
| 27 |
-
"entities": ["China", "India", "USA", "Indonesia", "Pakistan", "Brazil", "Nigeria", "Bangladesh", "Russia", "Mexico"],
|
| 28 |
-
"base_values": [1200, 1000, 280, 210, 140, 170, 120, 130, 145, 100],
|
| 29 |
-
"growth_rates": [0.005, 0.015, 0.008, 0.012, 0.020, 0.008, 0.025, 0.010, -0.002, 0.012],
|
| 30 |
-
},
|
| 31 |
-
"social_media_users": {
|
| 32 |
-
"entities": ["Facebook", "YouTube", "WhatsApp", "Instagram", "TikTok", "Snapchat", "Twitter", "LinkedIn", "Pinterest", "Reddit"],
|
| 33 |
-
"base_values": [0.1, 0.05, 0.0, 0.0, 0.0, 0.0, 0.01, 0.01, 0.0, 0.01],
|
| 34 |
-
"growth_rates": [0.35, 0.40, 0.50, 0.60, 0.80, 0.30, 0.20, 0.15, 0.25, 0.20],
|
| 35 |
-
},
|
| 36 |
-
"youtube_subscribers": {
|
| 37 |
-
"entities": ["T-Series", "MrBeast", "Cocomelon", "SET India", "PewDiePie", "Kids Diana Show", "Like Nastya", "Vlad and Niki", "Zee Music", "WWE"],
|
| 38 |
-
"base_values": [1, 0.1, 0.1, 0.5, 5, 0.1, 0.1, 0.1, 0.3, 10],
|
| 39 |
-
"growth_rates": [0.40, 0.50, 0.60, 0.30, 0.15, 0.55, 0.55, 0.50, 0.25, 0.05],
|
| 40 |
-
},
|
| 41 |
-
"military_spending": {
|
| 42 |
-
"entities": ["USA", "China", "Russia", "India", "UK", "Saudi Arabia", "Germany", "France", "Japan", "South Korea"],
|
| 43 |
-
"base_values": [300, 20, 50, 15, 35, 20, 30, 35, 40, 15],
|
| 44 |
-
"growth_rates": [0.03, 0.12, 0.05, 0.08, 0.02, 0.08, 0.02, 0.02, 0.01, 0.05],
|
| 45 |
-
},
|
| 46 |
-
}
|
| 47 |
-
|
| 48 |
-
def __init__(self):
|
| 49 |
-
pass
|
| 50 |
-
|
| 51 |
-
def fetch_data(
|
| 52 |
-
self,
|
| 53 |
-
topic_id: str,
|
| 54 |
-
year_start: int,
|
| 55 |
-
year_end: int,
|
| 56 |
-
top_n: int = 10
|
| 57 |
-
) -> List[Dict]:
|
| 58 |
-
"""
|
| 59 |
-
Fetch data for a topic.
|
| 60 |
-
Returns list of {name, year, value} dicts.
|
| 61 |
-
"""
|
| 62 |
-
topic_data = self.COUNTRY_DATA.get(topic_id)
|
| 63 |
-
|
| 64 |
-
if not topic_data:
|
| 65 |
-
# Generate generic data for unknown topics
|
| 66 |
-
return self._generate_generic_data(topic_id, year_start, year_end, top_n)
|
| 67 |
-
|
| 68 |
-
return self._generate_realistic_data(topic_data, year_start, year_end, top_n)
|
| 69 |
-
|
| 70 |
-
def _generate_realistic_data(
|
| 71 |
-
self,
|
| 72 |
-
topic_data: Dict,
|
| 73 |
-
year_start: int,
|
| 74 |
-
year_end: int,
|
| 75 |
-
top_n: int
|
| 76 |
-
) -> List[Dict]:
|
| 77 |
-
"""Generate realistic data based on topic configuration"""
|
| 78 |
-
entities = topic_data["entities"][:top_n]
|
| 79 |
-
base_values = topic_data["base_values"][:top_n]
|
| 80 |
-
growth_rates = topic_data["growth_rates"][:top_n]
|
| 81 |
-
|
| 82 |
-
data = []
|
| 83 |
-
base_year = 2000 # Reference year for base values
|
| 84 |
-
|
| 85 |
-
for year in range(year_start, year_end + 1):
|
| 86 |
-
for i, entity in enumerate(entities):
|
| 87 |
-
# Calculate value based on growth from base year
|
| 88 |
-
years_diff = year - base_year
|
| 89 |
-
value = base_values[i] * ((1 + growth_rates[i]) ** years_diff)
|
| 90 |
-
|
| 91 |
-
# Add some random variation (±5%)
|
| 92 |
-
value *= (1 + random.uniform(-0.05, 0.05))
|
| 93 |
-
|
| 94 |
-
data.append({
|
| 95 |
-
"name": entity,
|
| 96 |
-
"year": year,
|
| 97 |
-
"value": round(value, 2)
|
| 98 |
-
})
|
| 99 |
-
|
| 100 |
-
return data
|
| 101 |
-
|
| 102 |
-
def _generate_generic_data(
|
| 103 |
-
self,
|
| 104 |
-
topic_id: str,
|
| 105 |
-
year_start: int,
|
| 106 |
-
year_end: int,
|
| 107 |
-
top_n: int
|
| 108 |
-
) -> List[Dict]:
|
| 109 |
-
"""Generate generic data for unknown topics"""
|
| 110 |
-
logger.warning(f"No pre-configured data for topic: {topic_id}, generating generic data")
|
| 111 |
-
|
| 112 |
-
entities = [f"Entity_{i+1}" for i in range(top_n)]
|
| 113 |
-
|
| 114 |
-
data = []
|
| 115 |
-
for year in range(year_start, year_end + 1):
|
| 116 |
-
for i, entity in enumerate(entities):
|
| 117 |
-
# Random growth pattern
|
| 118 |
-
base = 100 - i * 5
|
| 119 |
-
value = base * (1 + 0.05 * (year - year_start))
|
| 120 |
-
value *= (1 + random.uniform(-0.1, 0.1))
|
| 121 |
-
|
| 122 |
-
data.append({
|
| 123 |
-
"name": entity,
|
| 124 |
-
"year": year,
|
| 125 |
-
"value": round(value, 2)
|
| 126 |
-
})
|
| 127 |
-
|
| 128 |
-
return data
|
| 129 |
-
|
| 130 |
-
def get_data_for_year(self, data: List[Dict], year: int, top_n: int = 10) -> List[Dict]:
|
| 131 |
-
"""Filter and sort data for a specific year"""
|
| 132 |
-
year_data = [d for d in data if d["year"] == year]
|
| 133 |
-
year_data.sort(key=lambda x: x["value"], reverse=True)
|
| 134 |
-
return year_data[:top_n]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
modules/bar_race/services/director.py
ADDED
|
@@ -0,0 +1,333 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Director - Video Generator
|
| 3 |
+
Creates bar chart race animation and final video.
|
| 4 |
+
"""
|
| 5 |
+
import logging
|
| 6 |
+
import pandas as pd
|
| 7 |
+
import os
|
| 8 |
+
from typing import Dict, Any, Optional
|
| 9 |
+
import shutil
|
| 10 |
+
|
| 11 |
+
logger = logging.getLogger(__name__)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class Director:
|
| 15 |
+
"""
|
| 16 |
+
Video Generator for Bar Race.
|
| 17 |
+
|
| 18 |
+
Creates animated bar chart race video using:
|
| 19 |
+
- bar_chart_race library for animation
|
| 20 |
+
- Entity images overlay
|
| 21 |
+
- Background music
|
| 22 |
+
- 9:16 vertical format (1080x1920)
|
| 23 |
+
"""
|
| 24 |
+
|
| 25 |
+
# Video dimensions (9:16)
|
| 26 |
+
VIDEO_WIDTH = 1080
|
| 27 |
+
VIDEO_HEIGHT = 1920
|
| 28 |
+
FPS = 30
|
| 29 |
+
|
| 30 |
+
def __init__(self, temp_dir: str, output_dir: str = "videos/bar_race"):
|
| 31 |
+
self.temp_dir = temp_dir
|
| 32 |
+
self.output_dir = output_dir
|
| 33 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 34 |
+
|
| 35 |
+
def generate_video(
|
| 36 |
+
self,
|
| 37 |
+
df: pd.DataFrame,
|
| 38 |
+
plan: Dict[str, Any],
|
| 39 |
+
image_paths: Dict[str, str],
|
| 40 |
+
duration_seconds: int = 60,
|
| 41 |
+
job_id: str = ""
|
| 42 |
+
) -> Optional[str]:
|
| 43 |
+
"""
|
| 44 |
+
Generate bar chart race video.
|
| 45 |
+
|
| 46 |
+
Args:
|
| 47 |
+
df: Cleaned data with columns: name, year, value
|
| 48 |
+
plan: Brain's plan with video_meta
|
| 49 |
+
image_paths: Dict mapping entity name to image path
|
| 50 |
+
duration_seconds: Video duration
|
| 51 |
+
job_id: Job ID for output filename
|
| 52 |
+
|
| 53 |
+
Returns:
|
| 54 |
+
Path to generated video, or None if failed
|
| 55 |
+
"""
|
| 56 |
+
logger.info(f"Director: Starting video generation for {duration_seconds}s video")
|
| 57 |
+
|
| 58 |
+
try:
|
| 59 |
+
# Prepare data for bar_chart_race
|
| 60 |
+
df_pivot = self._prepare_data(df)
|
| 61 |
+
|
| 62 |
+
if df_pivot is None or df_pivot.empty:
|
| 63 |
+
logger.error("Director: Failed to prepare data")
|
| 64 |
+
return None
|
| 65 |
+
|
| 66 |
+
# Generate animation
|
| 67 |
+
video_path = self._generate_bar_race(
|
| 68 |
+
df_pivot=df_pivot,
|
| 69 |
+
plan=plan,
|
| 70 |
+
duration_seconds=duration_seconds,
|
| 71 |
+
job_id=job_id
|
| 72 |
+
)
|
| 73 |
+
|
| 74 |
+
if video_path and os.path.exists(video_path):
|
| 75 |
+
# Try to add background music (optional)
|
| 76 |
+
video_with_music = self._add_background_music(video_path, duration_seconds)
|
| 77 |
+
if video_with_music:
|
| 78 |
+
return video_with_music
|
| 79 |
+
|
| 80 |
+
return video_path
|
| 81 |
+
|
| 82 |
+
except Exception as e:
|
| 83 |
+
logger.error(f"Director: Video generation failed: {e}")
|
| 84 |
+
import traceback
|
| 85 |
+
logger.error(traceback.format_exc())
|
| 86 |
+
return None
|
| 87 |
+
|
| 88 |
+
def _add_background_music(self, video_path: str, duration_seconds: int) -> Optional[str]:
|
| 89 |
+
"""Add background music if available in assets/music folder"""
|
| 90 |
+
music_dir = "modules/bar_race/assets/music"
|
| 91 |
+
|
| 92 |
+
# Check if music directory exists
|
| 93 |
+
if not os.path.exists(music_dir):
|
| 94 |
+
logger.info("Director: No music folder found, skipping background music")
|
| 95 |
+
return None
|
| 96 |
+
|
| 97 |
+
# Find music files
|
| 98 |
+
music_files = []
|
| 99 |
+
for ext in [".mp3", ".wav", ".m4a", ".ogg"]:
|
| 100 |
+
for f in os.listdir(music_dir):
|
| 101 |
+
if f.lower().endswith(ext):
|
| 102 |
+
music_files.append(os.path.join(music_dir, f))
|
| 103 |
+
|
| 104 |
+
if not music_files:
|
| 105 |
+
logger.info("Director: No music files found, skipping background music")
|
| 106 |
+
return None
|
| 107 |
+
|
| 108 |
+
try:
|
| 109 |
+
from moviepy.editor import VideoFileClip, AudioFileClip
|
| 110 |
+
import random
|
| 111 |
+
|
| 112 |
+
# Pick random music file
|
| 113 |
+
music_path = random.choice(music_files)
|
| 114 |
+
logger.info(f"Director: Adding background music: {music_path}")
|
| 115 |
+
|
| 116 |
+
# Load video and audio
|
| 117 |
+
video = VideoFileClip(video_path)
|
| 118 |
+
audio = AudioFileClip(music_path)
|
| 119 |
+
|
| 120 |
+
# Loop audio if shorter than video
|
| 121 |
+
if audio.duration < video.duration:
|
| 122 |
+
from moviepy.editor import concatenate_audioclips
|
| 123 |
+
loops_needed = int(video.duration / audio.duration) + 1
|
| 124 |
+
audio = concatenate_audioclips([audio] * loops_needed)
|
| 125 |
+
|
| 126 |
+
# Trim audio to video length and lower volume
|
| 127 |
+
audio = audio.subclip(0, video.duration).volumex(0.3)
|
| 128 |
+
|
| 129 |
+
# Add audio to video
|
| 130 |
+
video_with_audio = video.set_audio(audio)
|
| 131 |
+
|
| 132 |
+
# Save with music
|
| 133 |
+
output_path = video_path.replace(".mp4", "_music.mp4")
|
| 134 |
+
video_with_audio.write_videofile(
|
| 135 |
+
output_path,
|
| 136 |
+
codec="libx264",
|
| 137 |
+
audio_codec="aac",
|
| 138 |
+
fps=self.FPS,
|
| 139 |
+
logger=None
|
| 140 |
+
)
|
| 141 |
+
|
| 142 |
+
# Cleanup
|
| 143 |
+
video.close()
|
| 144 |
+
audio.close()
|
| 145 |
+
|
| 146 |
+
# Replace original with music version
|
| 147 |
+
os.remove(video_path)
|
| 148 |
+
os.rename(output_path, video_path)
|
| 149 |
+
|
| 150 |
+
logger.info(f"Director: Added background music to video")
|
| 151 |
+
return video_path
|
| 152 |
+
|
| 153 |
+
except Exception as e:
|
| 154 |
+
logger.warning(f"Director: Failed to add music: {e}")
|
| 155 |
+
return None
|
| 156 |
+
|
| 157 |
+
def _prepare_data(self, df: pd.DataFrame) -> Optional[pd.DataFrame]:
|
| 158 |
+
"""Prepare data for bar_chart_race (pivoted format)"""
|
| 159 |
+
try:
|
| 160 |
+
# Pivot: rows=year, columns=entity, values=value
|
| 161 |
+
df_pivot = df.pivot(index="year", columns="name", values="value")
|
| 162 |
+
|
| 163 |
+
# Sort by year
|
| 164 |
+
df_pivot = df_pivot.sort_index()
|
| 165 |
+
|
| 166 |
+
# Fill NaN with 0
|
| 167 |
+
df_pivot = df_pivot.fillna(0)
|
| 168 |
+
|
| 169 |
+
logger.info(f"Director: Prepared pivot table with shape {df_pivot.shape}")
|
| 170 |
+
return df_pivot
|
| 171 |
+
|
| 172 |
+
except Exception as e:
|
| 173 |
+
logger.error(f"Director: Data preparation failed: {e}")
|
| 174 |
+
return None
|
| 175 |
+
|
| 176 |
+
def _generate_bar_race(
|
| 177 |
+
self,
|
| 178 |
+
df_pivot: pd.DataFrame,
|
| 179 |
+
plan: Dict[str, Any],
|
| 180 |
+
duration_seconds: int,
|
| 181 |
+
job_id: str
|
| 182 |
+
) -> Optional[str]:
|
| 183 |
+
"""Generate bar chart race animation"""
|
| 184 |
+
|
| 185 |
+
# Get video metadata
|
| 186 |
+
video_meta = plan.get("video_meta", {})
|
| 187 |
+
title = video_meta.get("title", "Bar Chart Race")
|
| 188 |
+
value_unit = plan.get("value_intent", {}).get("unit", "")
|
| 189 |
+
top_n = plan.get("visualization", {}).get("top_n", 10)
|
| 190 |
+
|
| 191 |
+
output_path = os.path.join(self.output_dir, f"bar_race_{job_id}.mp4")
|
| 192 |
+
|
| 193 |
+
try:
|
| 194 |
+
import bar_chart_race as bcr
|
| 195 |
+
|
| 196 |
+
# Calculate steps per period based on duration
|
| 197 |
+
num_years = len(df_pivot)
|
| 198 |
+
steps_per_period = max(10, (duration_seconds * self.FPS) // num_years)
|
| 199 |
+
|
| 200 |
+
logger.info(f"Director: Creating animation with steps_per_period={steps_per_period}")
|
| 201 |
+
|
| 202 |
+
# Generate bar chart race
|
| 203 |
+
bcr.bar_chart_race(
|
| 204 |
+
df=df_pivot,
|
| 205 |
+
filename=output_path,
|
| 206 |
+
orientation='h',
|
| 207 |
+
sort='desc',
|
| 208 |
+
n_bars=top_n,
|
| 209 |
+
fixed_order=False,
|
| 210 |
+
fixed_max=True,
|
| 211 |
+
steps_per_period=steps_per_period,
|
| 212 |
+
period_length=500,
|
| 213 |
+
interpolate_period=True,
|
| 214 |
+
period_label={'x': .95, 'y': .15, 'ha': 'right', 'size': 72},
|
| 215 |
+
period_fmt='{x:.0f}',
|
| 216 |
+
period_summary_func=None,
|
| 217 |
+
perpendicular_bar_func=None,
|
| 218 |
+
title=title,
|
| 219 |
+
title_size=36,
|
| 220 |
+
bar_size=.85,
|
| 221 |
+
bar_textposition='inside',
|
| 222 |
+
bar_texttemplate='{x:,.0f}',
|
| 223 |
+
bar_label_size=14,
|
| 224 |
+
tick_label_size=14,
|
| 225 |
+
scale='linear',
|
| 226 |
+
writer=None,
|
| 227 |
+
fig=None,
|
| 228 |
+
bar_kwargs={'alpha': .8},
|
| 229 |
+
filter_column_colors=False,
|
| 230 |
+
cmap='dark24',
|
| 231 |
+
dpi=144
|
| 232 |
+
)
|
| 233 |
+
|
| 234 |
+
logger.info(f"Director: Generated video at {output_path}")
|
| 235 |
+
return output_path
|
| 236 |
+
|
| 237 |
+
except ImportError:
|
| 238 |
+
logger.warning("Director: bar_chart_race not available, using fallback")
|
| 239 |
+
return self._generate_fallback_video(df_pivot, plan, duration_seconds, job_id)
|
| 240 |
+
except Exception as e:
|
| 241 |
+
logger.error(f"Director: bar_chart_race failed: {e}")
|
| 242 |
+
return self._generate_fallback_video(df_pivot, plan, duration_seconds, job_id)
|
| 243 |
+
|
| 244 |
+
def _generate_fallback_video(
|
| 245 |
+
self,
|
| 246 |
+
df_pivot: pd.DataFrame,
|
| 247 |
+
plan: Dict[str, Any],
|
| 248 |
+
duration_seconds: int,
|
| 249 |
+
job_id: str
|
| 250 |
+
) -> Optional[str]:
|
| 251 |
+
"""Fallback: Generate simple video using matplotlib and MoviePy"""
|
| 252 |
+
logger.info("Director: Using fallback matplotlib animation")
|
| 253 |
+
|
| 254 |
+
try:
|
| 255 |
+
import matplotlib
|
| 256 |
+
matplotlib.use('Agg')
|
| 257 |
+
import matplotlib.pyplot as plt
|
| 258 |
+
from matplotlib.animation import FuncAnimation
|
| 259 |
+
from moviepy.editor import VideoFileClip
|
| 260 |
+
import tempfile
|
| 261 |
+
|
| 262 |
+
video_meta = plan.get("video_meta", {})
|
| 263 |
+
title = video_meta.get("title", "Bar Chart Race")
|
| 264 |
+
top_n = plan.get("visualization", {}).get("top_n", 10)
|
| 265 |
+
|
| 266 |
+
# Create figure with dark theme
|
| 267 |
+
fig, ax = plt.subplots(figsize=(6, 10.67), facecolor='#121220')
|
| 268 |
+
ax.set_facecolor('#121220')
|
| 269 |
+
|
| 270 |
+
years = df_pivot.index.tolist()
|
| 271 |
+
num_frames = duration_seconds * self.FPS
|
| 272 |
+
frames_per_year = num_frames // len(years)
|
| 273 |
+
|
| 274 |
+
# Colors for bars
|
| 275 |
+
colors = plt.cm.viridis([i/top_n for i in range(top_n)])
|
| 276 |
+
|
| 277 |
+
def update(frame):
|
| 278 |
+
ax.clear()
|
| 279 |
+
ax.set_facecolor('#121220')
|
| 280 |
+
|
| 281 |
+
# Calculate current year and interpolation
|
| 282 |
+
year_idx = min(frame // frames_per_year, len(years) - 1)
|
| 283 |
+
year = years[year_idx]
|
| 284 |
+
|
| 285 |
+
# Get data for current year
|
| 286 |
+
data = df_pivot.loc[year].sort_values(ascending=True).tail(top_n)
|
| 287 |
+
|
| 288 |
+
# Draw horizontal bars
|
| 289 |
+
bars = ax.barh(range(len(data)), data.values, color=colors[:len(data)])
|
| 290 |
+
|
| 291 |
+
# Labels
|
| 292 |
+
ax.set_yticks(range(len(data)))
|
| 293 |
+
ax.set_yticklabels(data.index, fontsize=10, color='white')
|
| 294 |
+
ax.set_title(f"{title}\n{year}", fontsize=16, color='white', pad=20)
|
| 295 |
+
|
| 296 |
+
# Style
|
| 297 |
+
ax.spines['top'].set_visible(False)
|
| 298 |
+
ax.spines['right'].set_visible(False)
|
| 299 |
+
ax.spines['bottom'].set_color('#444')
|
| 300 |
+
ax.spines['left'].set_color('#444')
|
| 301 |
+
ax.tick_params(colors='#888')
|
| 302 |
+
|
| 303 |
+
plt.tight_layout()
|
| 304 |
+
|
| 305 |
+
# Create animation
|
| 306 |
+
anim = FuncAnimation(fig, update, frames=num_frames, interval=1000/self.FPS)
|
| 307 |
+
|
| 308 |
+
# Save to temp file
|
| 309 |
+
temp_path = os.path.join(self.temp_dir, f"temp_animation_{job_id}.mp4")
|
| 310 |
+
anim.save(temp_path, writer='ffmpeg', fps=self.FPS, dpi=100)
|
| 311 |
+
plt.close(fig)
|
| 312 |
+
|
| 313 |
+
# Move to output
|
| 314 |
+
output_path = os.path.join(self.output_dir, f"bar_race_{job_id}.mp4")
|
| 315 |
+
shutil.move(temp_path, output_path)
|
| 316 |
+
|
| 317 |
+
logger.info(f"Director: Generated fallback video at {output_path}")
|
| 318 |
+
return output_path
|
| 319 |
+
|
| 320 |
+
except Exception as e:
|
| 321 |
+
logger.error(f"Director: Fallback video generation failed: {e}")
|
| 322 |
+
import traceback
|
| 323 |
+
logger.error(traceback.format_exc())
|
| 324 |
+
return None
|
| 325 |
+
|
| 326 |
+
def cleanup(self):
|
| 327 |
+
"""Clean up temporary files"""
|
| 328 |
+
try:
|
| 329 |
+
if os.path.exists(self.temp_dir):
|
| 330 |
+
shutil.rmtree(self.temp_dir)
|
| 331 |
+
logger.info(f"Director: Cleaned up temp directory: {self.temp_dir}")
|
| 332 |
+
except Exception as e:
|
| 333 |
+
logger.warning(f"Director: Cleanup failed: {e}")
|
modules/bar_race/services/scout.py
ADDED
|
@@ -0,0 +1,267 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Scout - Data Fetcher
|
| 3 |
+
Collects data from multiple sources based on Brain's plan.
|
| 4 |
+
|
| 5 |
+
Priority:
|
| 6 |
+
1. Wikipedia REST API
|
| 7 |
+
2. World Bank API
|
| 8 |
+
3. DuckDuckGo + BeautifulSoup scraping
|
| 9 |
+
"""
|
| 10 |
+
import logging
|
| 11 |
+
import requests
|
| 12 |
+
import pandas as pd
|
| 13 |
+
from typing import Dict, Any, List, Optional
|
| 14 |
+
from bs4 import BeautifulSoup
|
| 15 |
+
import re
|
| 16 |
+
import os
|
| 17 |
+
|
| 18 |
+
logger = logging.getLogger(__name__)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class Scout:
|
| 22 |
+
"""
|
| 23 |
+
Data Fetcher for Bar Race video generation.
|
| 24 |
+
|
| 25 |
+
Tries APIs first, falls back to web scraping.
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
# API endpoints
|
| 29 |
+
WIKIPEDIA_API = "https://en.wikipedia.org/api/rest_v1"
|
| 30 |
+
WORLDBANK_API = "https://api.worldbank.org/v2"
|
| 31 |
+
|
| 32 |
+
# Common headers
|
| 33 |
+
HEADERS = {
|
| 34 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
def __init__(self, temp_dir: str):
|
| 38 |
+
self.temp_dir = temp_dir
|
| 39 |
+
os.makedirs(temp_dir, exist_ok=True)
|
| 40 |
+
|
| 41 |
+
def fetch_data(self, plan: Dict[str, Any]) -> Optional[pd.DataFrame]:
|
| 42 |
+
"""
|
| 43 |
+
Fetch data based on Brain's plan.
|
| 44 |
+
|
| 45 |
+
Tries sources in priority order:
|
| 46 |
+
1. Wikipedia REST API
|
| 47 |
+
2. World Bank API
|
| 48 |
+
3. Web scraping
|
| 49 |
+
|
| 50 |
+
Returns:
|
| 51 |
+
DataFrame with raw data, or None if all sources fail
|
| 52 |
+
"""
|
| 53 |
+
source_priority = plan.get("source_priority", ["wikipedia_rest", "worldbank_api", "web_scraping"])
|
| 54 |
+
entity_type = plan.get("entity_type", "general")
|
| 55 |
+
topic = plan.get("topic", "")
|
| 56 |
+
value_intent = plan.get("value_intent", {})
|
| 57 |
+
|
| 58 |
+
df = None
|
| 59 |
+
|
| 60 |
+
for source in source_priority:
|
| 61 |
+
logger.info(f"Scout: Trying source: {source}")
|
| 62 |
+
|
| 63 |
+
try:
|
| 64 |
+
if source == "wikipedia_rest":
|
| 65 |
+
df = self._fetch_wikipedia(plan)
|
| 66 |
+
elif source == "worldbank_api":
|
| 67 |
+
df = self._fetch_worldbank(plan)
|
| 68 |
+
elif source == "web_scraping":
|
| 69 |
+
df = self._fetch_scraping(plan)
|
| 70 |
+
|
| 71 |
+
if df is not None and not df.empty:
|
| 72 |
+
logger.info(f"Scout: Success with {source}, got {len(df)} rows")
|
| 73 |
+
break
|
| 74 |
+
|
| 75 |
+
except Exception as e:
|
| 76 |
+
logger.warning(f"Scout: {source} failed: {e}")
|
| 77 |
+
continue
|
| 78 |
+
|
| 79 |
+
if df is not None and not df.empty:
|
| 80 |
+
# Save raw data
|
| 81 |
+
raw_path = os.path.join(self.temp_dir, "raw_data.csv")
|
| 82 |
+
df.to_csv(raw_path, index=False)
|
| 83 |
+
logger.info(f"Scout: Saved raw data to {raw_path}")
|
| 84 |
+
return df
|
| 85 |
+
|
| 86 |
+
logger.error("Scout: All sources failed")
|
| 87 |
+
return None
|
| 88 |
+
|
| 89 |
+
def _fetch_wikipedia(self, plan: Dict[str, Any]) -> Optional[pd.DataFrame]:
|
| 90 |
+
"""Fetch data from Wikipedia REST API or tables"""
|
| 91 |
+
queries = []
|
| 92 |
+
for strategy in plan.get("search_strategies", []):
|
| 93 |
+
queries.extend(strategy.get("queries", []))
|
| 94 |
+
|
| 95 |
+
if not queries:
|
| 96 |
+
queries = [plan.get("topic", "")]
|
| 97 |
+
|
| 98 |
+
# Try Wikipedia page tables
|
| 99 |
+
for query in queries:
|
| 100 |
+
try:
|
| 101 |
+
# Search Wikipedia
|
| 102 |
+
search_url = f"https://en.wikipedia.org/w/api.php"
|
| 103 |
+
params = {
|
| 104 |
+
"action": "opensearch",
|
| 105 |
+
"search": query.replace(" wikipedia", ""),
|
| 106 |
+
"limit": 5,
|
| 107 |
+
"format": "json"
|
| 108 |
+
}
|
| 109 |
+
|
| 110 |
+
response = requests.get(search_url, params=params, headers=self.HEADERS, timeout=10)
|
| 111 |
+
if response.status_code == 200:
|
| 112 |
+
results = response.json()
|
| 113 |
+
if len(results) >= 4 and results[3]:
|
| 114 |
+
# Get first result URL
|
| 115 |
+
page_url = results[3][0]
|
| 116 |
+
|
| 117 |
+
# Fetch page and extract tables
|
| 118 |
+
tables = pd.read_html(page_url)
|
| 119 |
+
if tables:
|
| 120 |
+
# Find table with year data
|
| 121 |
+
for table in tables:
|
| 122 |
+
if self._has_year_column(table):
|
| 123 |
+
logger.info(f"Scout: Found table with year data from Wikipedia")
|
| 124 |
+
return table
|
| 125 |
+
|
| 126 |
+
# Return largest table if no year column found
|
| 127 |
+
largest = max(tables, key=lambda t: len(t))
|
| 128 |
+
return largest
|
| 129 |
+
|
| 130 |
+
except Exception as e:
|
| 131 |
+
logger.debug(f"Scout: Wikipedia query '{query}' failed: {e}")
|
| 132 |
+
continue
|
| 133 |
+
|
| 134 |
+
return None
|
| 135 |
+
|
| 136 |
+
def _fetch_worldbank(self, plan: Dict[str, Any]) -> Optional[pd.DataFrame]:
|
| 137 |
+
"""Fetch data from World Bank API"""
|
| 138 |
+
value_intent = plan.get("value_intent", {})
|
| 139 |
+
primary_value = value_intent.get("primary", "").lower()
|
| 140 |
+
time_config = plan.get("time_config", {})
|
| 141 |
+
|
| 142 |
+
# Map common intents to World Bank indicators
|
| 143 |
+
indicator_map = {
|
| 144 |
+
"gdp": "NY.GDP.MKTP.CD", # GDP (current USD)
|
| 145 |
+
"population": "SP.POP.TOTL", # Total population
|
| 146 |
+
"life expectancy": "SP.DYN.LE00.IN", # Life expectancy at birth
|
| 147 |
+
"inflation": "FP.CPI.TOTL.ZG", # Inflation (consumer prices)
|
| 148 |
+
"military": "MS.MIL.XPND.CD", # Military expenditure
|
| 149 |
+
}
|
| 150 |
+
|
| 151 |
+
indicator = None
|
| 152 |
+
for key, value in indicator_map.items():
|
| 153 |
+
if key in primary_value:
|
| 154 |
+
indicator = value
|
| 155 |
+
break
|
| 156 |
+
|
| 157 |
+
if not indicator:
|
| 158 |
+
logger.debug("Scout: No matching World Bank indicator found")
|
| 159 |
+
return None
|
| 160 |
+
|
| 161 |
+
try:
|
| 162 |
+
# Fetch data from World Bank API
|
| 163 |
+
start_year = time_config.get("start_year", 2000)
|
| 164 |
+
end_year = time_config.get("end_year", 2024)
|
| 165 |
+
|
| 166 |
+
url = f"{self.WORLDBANK_API}/country/all/indicator/{indicator}"
|
| 167 |
+
params = {
|
| 168 |
+
"format": "json",
|
| 169 |
+
"per_page": 500,
|
| 170 |
+
"date": f"{start_year}:{end_year}"
|
| 171 |
+
}
|
| 172 |
+
|
| 173 |
+
response = requests.get(url, params=params, headers=self.HEADERS, timeout=15)
|
| 174 |
+
if response.status_code == 200:
|
| 175 |
+
data = response.json()
|
| 176 |
+
if len(data) >= 2 and data[1]:
|
| 177 |
+
records = data[1]
|
| 178 |
+
|
| 179 |
+
# Convert to DataFrame
|
| 180 |
+
rows = []
|
| 181 |
+
for record in records:
|
| 182 |
+
if record.get("value") is not None:
|
| 183 |
+
rows.append({
|
| 184 |
+
"name": record["country"]["value"],
|
| 185 |
+
"year": int(record["date"]),
|
| 186 |
+
"value": record["value"]
|
| 187 |
+
})
|
| 188 |
+
|
| 189 |
+
if rows:
|
| 190 |
+
df = pd.DataFrame(rows)
|
| 191 |
+
logger.info(f"Scout: Got {len(df)} rows from World Bank API")
|
| 192 |
+
return df
|
| 193 |
+
|
| 194 |
+
except Exception as e:
|
| 195 |
+
logger.warning(f"Scout: World Bank API failed: {e}")
|
| 196 |
+
|
| 197 |
+
return None
|
| 198 |
+
|
| 199 |
+
def _fetch_scraping(self, plan: Dict[str, Any]) -> Optional[pd.DataFrame]:
|
| 200 |
+
"""Fallback: Search and scrape web pages"""
|
| 201 |
+
queries = []
|
| 202 |
+
for strategy in plan.get("search_strategies", []):
|
| 203 |
+
queries.extend(strategy.get("queries", []))
|
| 204 |
+
|
| 205 |
+
if not queries:
|
| 206 |
+
queries = [f"{plan.get('topic', '')} data table"]
|
| 207 |
+
|
| 208 |
+
# Try DuckDuckGo search
|
| 209 |
+
try:
|
| 210 |
+
from duckduckgo_search import DDGS
|
| 211 |
+
|
| 212 |
+
with DDGS() as ddgs:
|
| 213 |
+
for query in queries[:3]: # Limit to 3 queries
|
| 214 |
+
results = list(ddgs.text(query, max_results=5))
|
| 215 |
+
|
| 216 |
+
for result in results:
|
| 217 |
+
url = result.get("href", "")
|
| 218 |
+
if not url:
|
| 219 |
+
continue
|
| 220 |
+
|
| 221 |
+
try:
|
| 222 |
+
# Fetch and parse tables
|
| 223 |
+
response = requests.get(url, headers=self.HEADERS, timeout=10)
|
| 224 |
+
if response.status_code == 200:
|
| 225 |
+
tables = pd.read_html(response.text)
|
| 226 |
+
if tables:
|
| 227 |
+
for table in tables:
|
| 228 |
+
if self._has_year_column(table):
|
| 229 |
+
logger.info(f"Scout: Found table from {url}")
|
| 230 |
+
return table
|
| 231 |
+
|
| 232 |
+
# Return largest table
|
| 233 |
+
largest = max(tables, key=lambda t: len(t))
|
| 234 |
+
if len(largest) > 5:
|
| 235 |
+
return largest
|
| 236 |
+
|
| 237 |
+
except Exception as e:
|
| 238 |
+
logger.debug(f"Scout: Failed to scrape {url}: {e}")
|
| 239 |
+
continue
|
| 240 |
+
|
| 241 |
+
except ImportError:
|
| 242 |
+
logger.warning("Scout: duckduckgo-search not installed")
|
| 243 |
+
except Exception as e:
|
| 244 |
+
logger.warning(f"Scout: DuckDuckGo search failed: {e}")
|
| 245 |
+
|
| 246 |
+
return None
|
| 247 |
+
|
| 248 |
+
def _has_year_column(self, df: pd.DataFrame) -> bool:
|
| 249 |
+
"""Check if DataFrame has a year-like column"""
|
| 250 |
+
for col in df.columns:
|
| 251 |
+
col_str = str(col).lower()
|
| 252 |
+
# Check if column name contains year-related words
|
| 253 |
+
if any(word in col_str for word in ["year", "date", "time"]):
|
| 254 |
+
return True
|
| 255 |
+
# Check if column values look like years
|
| 256 |
+
try:
|
| 257 |
+
sample = df[col].dropna().head(5)
|
| 258 |
+
for val in sample:
|
| 259 |
+
if isinstance(val, (int, float)):
|
| 260 |
+
if 1900 <= val <= 2100:
|
| 261 |
+
return True
|
| 262 |
+
elif isinstance(val, str):
|
| 263 |
+
if re.match(r'^(19|20)\d{2}$', str(val)):
|
| 264 |
+
return True
|
| 265 |
+
except:
|
| 266 |
+
pass
|
| 267 |
+
return False
|
modules/bar_race/services/surgeon.py
ADDED
|
@@ -0,0 +1,327 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Surgeon - Data Cleaner & Formatter
|
| 3 |
+
Cleans raw data and prepares it for bar chart race animation.
|
| 4 |
+
"""
|
| 5 |
+
import logging
|
| 6 |
+
import pandas as pd
|
| 7 |
+
import numpy as np
|
| 8 |
+
import re
|
| 9 |
+
import os
|
| 10 |
+
from typing import Dict, Any, Optional, List
|
| 11 |
+
|
| 12 |
+
logger = logging.getLogger(__name__)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class Surgeon:
|
| 16 |
+
"""
|
| 17 |
+
Data Cleaner for Bar Race video generation.
|
| 18 |
+
|
| 19 |
+
Responsibilities:
|
| 20 |
+
- Table selection (find year columns)
|
| 21 |
+
- Wide → Long / Long → Wide conversion
|
| 22 |
+
- Regex cleaning ($, €, commas, references)
|
| 23 |
+
- Convert strings to numeric
|
| 24 |
+
- Interpolate missing years
|
| 25 |
+
"""
|
| 26 |
+
|
| 27 |
+
def __init__(self, temp_dir: str):
|
| 28 |
+
self.temp_dir = temp_dir
|
| 29 |
+
|
| 30 |
+
def clean_data(self, df: pd.DataFrame, plan: Dict[str, Any]) -> Optional[pd.DataFrame]:
|
| 31 |
+
"""
|
| 32 |
+
Clean and format raw data for bar chart race.
|
| 33 |
+
|
| 34 |
+
Expected output format:
|
| 35 |
+
name | year | value
|
| 36 |
+
USA | 2000 | 10.5
|
| 37 |
+
USA | 2001 | 11.2
|
| 38 |
+
...
|
| 39 |
+
|
| 40 |
+
Returns:
|
| 41 |
+
Cleaned DataFrame with columns: name, year, value
|
| 42 |
+
"""
|
| 43 |
+
if df is None or df.empty:
|
| 44 |
+
logger.error("Surgeon: No data to clean")
|
| 45 |
+
return None
|
| 46 |
+
|
| 47 |
+
logger.info(f"Surgeon: Cleaning data with shape {df.shape}")
|
| 48 |
+
logger.debug(f"Surgeon: Columns: {list(df.columns)}")
|
| 49 |
+
|
| 50 |
+
try:
|
| 51 |
+
# Step 1: Identify data structure
|
| 52 |
+
structure = self._identify_structure(df)
|
| 53 |
+
logger.info(f"Surgeon: Data structure: {structure}")
|
| 54 |
+
|
| 55 |
+
# Step 2: Convert to long format
|
| 56 |
+
if structure == "wide":
|
| 57 |
+
df_long = self._wide_to_long(df, plan)
|
| 58 |
+
elif structure == "long":
|
| 59 |
+
df_long = self._normalize_long(df, plan)
|
| 60 |
+
else:
|
| 61 |
+
df_long = self._attempt_conversion(df, plan)
|
| 62 |
+
|
| 63 |
+
if df_long is None or df_long.empty:
|
| 64 |
+
logger.error("Surgeon: Failed to convert data to long format")
|
| 65 |
+
return None
|
| 66 |
+
|
| 67 |
+
# Step 3: Clean values
|
| 68 |
+
df_clean = self._clean_values(df_long)
|
| 69 |
+
|
| 70 |
+
# Step 4: Interpolate missing years
|
| 71 |
+
time_config = plan.get("time_config", {})
|
| 72 |
+
df_interpolated = self._interpolate_years(
|
| 73 |
+
df_clean,
|
| 74 |
+
start_year=time_config.get("start_year", 2000),
|
| 75 |
+
end_year=time_config.get("end_year", 2024)
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
# Step 5: Get top N entities
|
| 79 |
+
top_n = plan.get("visualization", {}).get("top_n", 10)
|
| 80 |
+
df_final = self._get_top_entities(df_interpolated, top_n)
|
| 81 |
+
|
| 82 |
+
# Save cleaned data
|
| 83 |
+
output_path = os.path.join(self.temp_dir, "bar_chart_ready.csv")
|
| 84 |
+
df_final.to_csv(output_path, index=False)
|
| 85 |
+
logger.info(f"Surgeon: Saved cleaned data to {output_path}, shape: {df_final.shape}")
|
| 86 |
+
|
| 87 |
+
return df_final
|
| 88 |
+
|
| 89 |
+
except Exception as e:
|
| 90 |
+
logger.error(f"Surgeon: Cleaning failed: {e}")
|
| 91 |
+
import traceback
|
| 92 |
+
logger.error(traceback.format_exc())
|
| 93 |
+
return None
|
| 94 |
+
|
| 95 |
+
def _identify_structure(self, df: pd.DataFrame) -> str:
|
| 96 |
+
"""Identify if data is wide or long format"""
|
| 97 |
+
# Wide format: years as columns (2000, 2001, 2002...)
|
| 98 |
+
year_columns = []
|
| 99 |
+
for col in df.columns:
|
| 100 |
+
try:
|
| 101 |
+
year = int(str(col))
|
| 102 |
+
if 1900 <= year <= 2100:
|
| 103 |
+
year_columns.append(col)
|
| 104 |
+
except:
|
| 105 |
+
pass
|
| 106 |
+
|
| 107 |
+
if len(year_columns) > 3:
|
| 108 |
+
return "wide"
|
| 109 |
+
|
| 110 |
+
# Long format: year column with values
|
| 111 |
+
for col in df.columns:
|
| 112 |
+
col_lower = str(col).lower()
|
| 113 |
+
if "year" in col_lower or "date" in col_lower:
|
| 114 |
+
return "long"
|
| 115 |
+
|
| 116 |
+
return "unknown"
|
| 117 |
+
|
| 118 |
+
def _wide_to_long(self, df: pd.DataFrame, plan: Dict[str, Any]) -> Optional[pd.DataFrame]:
|
| 119 |
+
"""Convert wide format to long format"""
|
| 120 |
+
# Find year columns
|
| 121 |
+
year_columns = []
|
| 122 |
+
non_year_columns = []
|
| 123 |
+
|
| 124 |
+
for col in df.columns:
|
| 125 |
+
try:
|
| 126 |
+
year = int(str(col))
|
| 127 |
+
if 1900 <= year <= 2100:
|
| 128 |
+
year_columns.append(col)
|
| 129 |
+
else:
|
| 130 |
+
non_year_columns.append(col)
|
| 131 |
+
except:
|
| 132 |
+
non_year_columns.append(col)
|
| 133 |
+
|
| 134 |
+
if not year_columns:
|
| 135 |
+
return None
|
| 136 |
+
|
| 137 |
+
# Find name column (first non-year column with strings)
|
| 138 |
+
name_col = None
|
| 139 |
+
for col in non_year_columns:
|
| 140 |
+
if df[col].dtype == object:
|
| 141 |
+
name_col = col
|
| 142 |
+
break
|
| 143 |
+
|
| 144 |
+
if name_col is None and non_year_columns:
|
| 145 |
+
name_col = non_year_columns[0]
|
| 146 |
+
|
| 147 |
+
if name_col is None:
|
| 148 |
+
return None
|
| 149 |
+
|
| 150 |
+
# Melt to long format
|
| 151 |
+
df_long = df.melt(
|
| 152 |
+
id_vars=[name_col],
|
| 153 |
+
value_vars=year_columns,
|
| 154 |
+
var_name="year",
|
| 155 |
+
value_name="value"
|
| 156 |
+
)
|
| 157 |
+
|
| 158 |
+
# Rename columns
|
| 159 |
+
df_long.columns = ["name", "year", "value"]
|
| 160 |
+
|
| 161 |
+
return df_long
|
| 162 |
+
|
| 163 |
+
def _normalize_long(self, df: pd.DataFrame, plan: Dict[str, Any]) -> Optional[pd.DataFrame]:
|
| 164 |
+
"""Normalize long format data"""
|
| 165 |
+
# Find relevant columns
|
| 166 |
+
name_col = None
|
| 167 |
+
year_col = None
|
| 168 |
+
value_col = None
|
| 169 |
+
|
| 170 |
+
for col in df.columns:
|
| 171 |
+
col_lower = str(col).lower()
|
| 172 |
+
|
| 173 |
+
if name_col is None and any(word in col_lower for word in ["name", "country", "entity", "player"]):
|
| 174 |
+
name_col = col
|
| 175 |
+
elif year_col is None and any(word in col_lower for word in ["year", "date", "time"]):
|
| 176 |
+
year_col = col
|
| 177 |
+
elif value_col is None and any(word in col_lower for word in ["value", "amount", "gdp", "population", "worth"]):
|
| 178 |
+
value_col = col
|
| 179 |
+
|
| 180 |
+
# Fallback: use first string column as name, numeric columns for year/value
|
| 181 |
+
if name_col is None:
|
| 182 |
+
for col in df.columns:
|
| 183 |
+
if df[col].dtype == object:
|
| 184 |
+
name_col = col
|
| 185 |
+
break
|
| 186 |
+
|
| 187 |
+
if value_col is None:
|
| 188 |
+
# Use last numeric column as value
|
| 189 |
+
for col in reversed(list(df.columns)):
|
| 190 |
+
if col != year_col and pd.api.types.is_numeric_dtype(df[col]):
|
| 191 |
+
value_col = col
|
| 192 |
+
break
|
| 193 |
+
|
| 194 |
+
if not all([name_col, year_col, value_col]):
|
| 195 |
+
logger.warning(f"Surgeon: Could not identify columns. name={name_col}, year={year_col}, value={value_col}")
|
| 196 |
+
return None
|
| 197 |
+
|
| 198 |
+
# Select and rename
|
| 199 |
+
df_long = df[[name_col, year_col, value_col]].copy()
|
| 200 |
+
df_long.columns = ["name", "year", "value"]
|
| 201 |
+
|
| 202 |
+
return df_long
|
| 203 |
+
|
| 204 |
+
def _attempt_conversion(self, df: pd.DataFrame, plan: Dict[str, Any]) -> Optional[pd.DataFrame]:
|
| 205 |
+
"""Attempt to convert unknown format"""
|
| 206 |
+
# Try treating first column as name, rest as years/values
|
| 207 |
+
if len(df.columns) >= 2:
|
| 208 |
+
name_col = df.columns[0]
|
| 209 |
+
|
| 210 |
+
# Check if other columns might be years
|
| 211 |
+
potential_years = []
|
| 212 |
+
for col in df.columns[1:]:
|
| 213 |
+
try:
|
| 214 |
+
year = int(str(col))
|
| 215 |
+
if 1900 <= year <= 2100:
|
| 216 |
+
potential_years.append(col)
|
| 217 |
+
except:
|
| 218 |
+
pass
|
| 219 |
+
|
| 220 |
+
if potential_years:
|
| 221 |
+
return self._wide_to_long(df, plan)
|
| 222 |
+
|
| 223 |
+
return None
|
| 224 |
+
|
| 225 |
+
def _clean_values(self, df: pd.DataFrame) -> pd.DataFrame:
|
| 226 |
+
"""Clean values: remove symbols, convert to numeric"""
|
| 227 |
+
df = df.copy()
|
| 228 |
+
|
| 229 |
+
# Clean name column
|
| 230 |
+
df["name"] = df["name"].astype(str).str.strip()
|
| 231 |
+
df["name"] = df["name"].str.replace(r'\[.*?\]', '', regex=True) # Remove references like [1]
|
| 232 |
+
|
| 233 |
+
# Clean year column
|
| 234 |
+
df["year"] = pd.to_numeric(df["year"], errors="coerce")
|
| 235 |
+
|
| 236 |
+
# Clean value column
|
| 237 |
+
def clean_value(val):
|
| 238 |
+
if pd.isna(val):
|
| 239 |
+
return np.nan
|
| 240 |
+
if isinstance(val, (int, float)):
|
| 241 |
+
return float(val)
|
| 242 |
+
|
| 243 |
+
# Convert to string and clean
|
| 244 |
+
val_str = str(val)
|
| 245 |
+
|
| 246 |
+
# Remove currency symbols and commas
|
| 247 |
+
val_str = re.sub(r'[$€£¥₹,]', '', val_str)
|
| 248 |
+
|
| 249 |
+
# Remove references like [1], [a]
|
| 250 |
+
val_str = re.sub(r'\[.*?\]', '', val_str)
|
| 251 |
+
|
| 252 |
+
# Handle multipliers (billion, million, trillion)
|
| 253 |
+
multiplier = 1
|
| 254 |
+
val_lower = val_str.lower()
|
| 255 |
+
if "trillion" in val_lower:
|
| 256 |
+
multiplier = 1e12
|
| 257 |
+
val_str = re.sub(r'trillion', '', val_str, flags=re.IGNORECASE)
|
| 258 |
+
elif "billion" in val_lower:
|
| 259 |
+
multiplier = 1e9
|
| 260 |
+
val_str = re.sub(r'billion', '', val_str, flags=re.IGNORECASE)
|
| 261 |
+
elif "million" in val_lower:
|
| 262 |
+
multiplier = 1e6
|
| 263 |
+
val_str = re.sub(r'million', '', val_str, flags=re.IGNORECASE)
|
| 264 |
+
|
| 265 |
+
# Extract numeric value
|
| 266 |
+
match = re.search(r'[-+]?\d*\.?\d+', val_str)
|
| 267 |
+
if match:
|
| 268 |
+
return float(match.group()) * multiplier
|
| 269 |
+
|
| 270 |
+
return np.nan
|
| 271 |
+
|
| 272 |
+
df["value"] = df["value"].apply(clean_value)
|
| 273 |
+
|
| 274 |
+
# Drop rows with missing data
|
| 275 |
+
df = df.dropna(subset=["name", "year", "value"])
|
| 276 |
+
|
| 277 |
+
# Convert year to int
|
| 278 |
+
df["year"] = df["year"].astype(int)
|
| 279 |
+
|
| 280 |
+
return df
|
| 281 |
+
|
| 282 |
+
def _interpolate_years(self, df: pd.DataFrame, start_year: int, end_year: int) -> pd.DataFrame:
|
| 283 |
+
"""Interpolate missing years for each entity"""
|
| 284 |
+
entities = df["name"].unique()
|
| 285 |
+
all_years = list(range(start_year, end_year + 1))
|
| 286 |
+
|
| 287 |
+
result_dfs = []
|
| 288 |
+
|
| 289 |
+
for entity in entities:
|
| 290 |
+
entity_df = df[df["name"] == entity].copy()
|
| 291 |
+
|
| 292 |
+
if entity_df.empty:
|
| 293 |
+
continue
|
| 294 |
+
|
| 295 |
+
# Create full year index
|
| 296 |
+
full_df = pd.DataFrame({"year": all_years})
|
| 297 |
+
full_df["name"] = entity
|
| 298 |
+
|
| 299 |
+
# Merge with existing data
|
| 300 |
+
merged = full_df.merge(entity_df[["year", "value"]], on="year", how="left")
|
| 301 |
+
|
| 302 |
+
# Interpolate missing values
|
| 303 |
+
merged["value"] = merged["value"].interpolate(method="linear")
|
| 304 |
+
|
| 305 |
+
# Forward/backward fill remaining NaN
|
| 306 |
+
merged["value"] = merged["value"].fillna(method="ffill").fillna(method="bfill")
|
| 307 |
+
|
| 308 |
+
result_dfs.append(merged)
|
| 309 |
+
|
| 310 |
+
if result_dfs:
|
| 311 |
+
return pd.concat(result_dfs, ignore_index=True)
|
| 312 |
+
return df
|
| 313 |
+
|
| 314 |
+
def _get_top_entities(self, df: pd.DataFrame, top_n: int = 10) -> pd.DataFrame:
|
| 315 |
+
"""Get top N entities based on maximum value"""
|
| 316 |
+
# Calculate max value for each entity
|
| 317 |
+
max_values = df.groupby("name")["value"].max().sort_values(ascending=False)
|
| 318 |
+
|
| 319 |
+
# Get top N entity names
|
| 320 |
+
top_entities = max_values.head(top_n).index.tolist()
|
| 321 |
+
|
| 322 |
+
# Filter dataframe
|
| 323 |
+
df_top = df[df["name"].isin(top_entities)]
|
| 324 |
+
|
| 325 |
+
logger.info(f"Surgeon: Selected top {len(top_entities)} entities: {top_entities}")
|
| 326 |
+
|
| 327 |
+
return df_top
|
requirements.txt
CHANGED
|
@@ -29,3 +29,10 @@ imageio-ffmpeg>=0.4.9
|
|
| 29 |
# Trends Analysis
|
| 30 |
pytrends
|
| 31 |
pandas
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
# Trends Analysis
|
| 30 |
pytrends
|
| 31 |
pandas
|
| 32 |
+
|
| 33 |
+
# Bar Race Module
|
| 34 |
+
bar_chart_race
|
| 35 |
+
beautifulsoup4
|
| 36 |
+
lxml
|
| 37 |
+
duckduckgo-search
|
| 38 |
+
rembg
|
static/index.html
CHANGED
|
@@ -656,50 +656,22 @@
|
|
| 656 |
|
| 657 |
<form id="barRaceForm">
|
| 658 |
<div class="form-group">
|
| 659 |
-
<label>Topic *</label>
|
| 660 |
-
<
|
| 661 |
-
|
| 662 |
-
|
| 663 |
-
|
| 664 |
-
|
| 665 |
-
<option value="youtube_subscribers">YouTube Subscribers</option>
|
| 666 |
-
<option value="military_spending">Military Expenditure</option>
|
| 667 |
-
<option value="olympic_medals">Olympic Gold Medals</option>
|
| 668 |
-
<option value="life_expectancy">Life Expectancy</option>
|
| 669 |
-
<option value="browser_market_share">Browser Market Share</option>
|
| 670 |
-
</select>
|
| 671 |
-
</div>
|
| 672 |
-
|
| 673 |
-
<div class="form-row" style="display: grid; grid-template-columns: 1fr 1fr; gap: 1rem;">
|
| 674 |
-
<div class="form-group">
|
| 675 |
-
<label>Start Year</label>
|
| 676 |
-
<input type="number" id="barRaceYearStart" value="2000" min="1960" max="2024">
|
| 677 |
-
</div>
|
| 678 |
-
<div class="form-group">
|
| 679 |
-
<label>End Year</label>
|
| 680 |
-
<input type="number" id="barRaceYearEnd" value="2024" min="1960" max="2024">
|
| 681 |
-
</div>
|
| 682 |
</div>
|
| 683 |
|
| 684 |
-
<div class="form-
|
| 685 |
-
<
|
| 686 |
-
|
| 687 |
-
<
|
| 688 |
-
|
| 689 |
-
|
| 690 |
-
|
| 691 |
-
|
| 692 |
-
</select>
|
| 693 |
-
</div>
|
| 694 |
-
<div class="form-group">
|
| 695 |
-
<label>Duration (seconds)</label>
|
| 696 |
-
<select id="barRaceDuration">
|
| 697 |
-
<option value="30">30s</option>
|
| 698 |
-
<option value="60" selected>60s</option>
|
| 699 |
-
<option value="90">90s</option>
|
| 700 |
-
<option value="120">120s</option>
|
| 701 |
-
</select>
|
| 702 |
-
</div>
|
| 703 |
</div>
|
| 704 |
|
| 705 |
<button type="submit" class="btn btn-primary" style="width: 100%;">📊 Generate Bar Race Video</button>
|
|
@@ -1241,9 +1213,6 @@
|
|
| 1241 |
status.innerHTML = '⏳ Starting bar race generation...';
|
| 1242 |
|
| 1243 |
const topic = document.getElementById('barRaceTopic').value;
|
| 1244 |
-
const yearStart = parseInt(document.getElementById('barRaceYearStart').value);
|
| 1245 |
-
const yearEnd = parseInt(document.getElementById('barRaceYearEnd').value);
|
| 1246 |
-
const topN = parseInt(document.getElementById('barRaceTopN').value);
|
| 1247 |
const duration = parseInt(document.getElementById('barRaceDuration').value);
|
| 1248 |
|
| 1249 |
try {
|
|
@@ -1252,18 +1221,14 @@
|
|
| 1252 |
headers: { 'Content-Type': 'application/json' },
|
| 1253 |
body: JSON.stringify({
|
| 1254 |
topic: topic,
|
| 1255 |
-
|
| 1256 |
-
year_end: yearEnd,
|
| 1257 |
-
top_n: topN,
|
| 1258 |
-
duration_seconds: duration,
|
| 1259 |
-
fps: 30
|
| 1260 |
})
|
| 1261 |
});
|
| 1262 |
|
| 1263 |
const data = await response.json();
|
| 1264 |
if (!response.ok) throw new Error(data.detail || 'Failed to start');
|
| 1265 |
|
| 1266 |
-
status.innerHTML = `⏳ Job started: ${data.job_id}.
|
| 1267 |
pollBarRaceStatus(data.job_id);
|
| 1268 |
|
| 1269 |
} catch (err) {
|
|
@@ -1286,7 +1251,8 @@
|
|
| 1286 |
status.className = 'status error';
|
| 1287 |
status.innerHTML = '❌ Failed: ' + (data.error || 'Unknown error');
|
| 1288 |
} else {
|
| 1289 |
-
|
|
|
|
| 1290 |
setTimeout(poll, 2000);
|
| 1291 |
}
|
| 1292 |
} catch (err) {
|
|
|
|
| 656 |
|
| 657 |
<form id="barRaceForm">
|
| 658 |
<div class="form-group">
|
| 659 |
+
<label>Topic / Prompt *</label>
|
| 660 |
+
<input type="text" id="barRaceTopic" placeholder="e.g., Top 10 richest countries by GDP 2000-2024"
|
| 661 |
+
required>
|
| 662 |
+
<small style="color: var(--text-secondary); display: block; margin-top: 0.5rem;">
|
| 663 |
+
Enter any topic - the AI will find data and create the video
|
| 664 |
+
</small>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 665 |
</div>
|
| 666 |
|
| 667 |
+
<div class="form-group">
|
| 668 |
+
<label>Duration</label>
|
| 669 |
+
<select id="barRaceDuration">
|
| 670 |
+
<option value="30">30 seconds</option>
|
| 671 |
+
<option value="60" selected>60 seconds</option>
|
| 672 |
+
<option value="90">90 seconds</option>
|
| 673 |
+
<option value="120">120 seconds</option>
|
| 674 |
+
</select>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 675 |
</div>
|
| 676 |
|
| 677 |
<button type="submit" class="btn btn-primary" style="width: 100%;">📊 Generate Bar Race Video</button>
|
|
|
|
| 1213 |
status.innerHTML = '⏳ Starting bar race generation...';
|
| 1214 |
|
| 1215 |
const topic = document.getElementById('barRaceTopic').value;
|
|
|
|
|
|
|
|
|
|
| 1216 |
const duration = parseInt(document.getElementById('barRaceDuration').value);
|
| 1217 |
|
| 1218 |
try {
|
|
|
|
| 1221 |
headers: { 'Content-Type': 'application/json' },
|
| 1222 |
body: JSON.stringify({
|
| 1223 |
topic: topic,
|
| 1224 |
+
duration_seconds: duration
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1225 |
})
|
| 1226 |
});
|
| 1227 |
|
| 1228 |
const data = await response.json();
|
| 1229 |
if (!response.ok) throw new Error(data.detail || 'Failed to start');
|
| 1230 |
|
| 1231 |
+
status.innerHTML = `⏳ Job started: ${data.job_id}. Analyzing topic...`;
|
| 1232 |
pollBarRaceStatus(data.job_id);
|
| 1233 |
|
| 1234 |
} catch (err) {
|
|
|
|
| 1251 |
status.className = 'status error';
|
| 1252 |
status.innerHTML = '❌ Failed: ' + (data.error || 'Unknown error');
|
| 1253 |
} else {
|
| 1254 |
+
const step = data.current_step || data.status;
|
| 1255 |
+
status.innerHTML = `⏳ ${step} (${data.progress}%)`;
|
| 1256 |
setTimeout(poll, 2000);
|
| 1257 |
}
|
| 1258 |
} catch (err) {
|