diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000000000000000000000000000000000000..9e9e31b4f7d7542fb228634f6026fa489b2f5950
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,57 @@
+# NCAkit Environment Configuration
+# Copy this file to .env and fill in your values
+
+# ===================
+# Video Creator Module
+# ===================
+
+# Pexels API key for background videos (Required)
+# Get from: https://www.pexels.com/api/
+PEXELS_API_KEY=your_pexels_api_key_here
+
+# Kokoro TTS endpoint URL (Required)
+# Example: https://your-username-kokoro-tts.hf.space
+HF_TTS=https://your-tts-endpoint.hf.space
+
+# Whisper model for captions (Optional, default: tiny.en)
+# Options: tiny.en, base.en, small.en, medium.en, large
+WHISPER_MODEL=tiny.en
+
+# ===================
+# Server Configuration
+# ===================
+
+# Server port (Optional, default: 8880)
+PORT=8880
+
+# Log level (Optional, default: info)
+# Options: debug, info, warning, error
+LOG_LEVEL=info
+
+# Running in Docker? (Optional, default: false)
+DOCKER=false
+
+# Custom data directory (Optional)
+# DATA_DIR_PATH=/path/to/data
+
+# ===================
+# Add new module configs below
+# ===================
+
+# ===================
+# Story Reels Module (Image Generation)
+# ===================
+
+# NVIDIA API Key (PRIMARY - stable-diffusion-3-medium)
+# Get from: https://build.nvidia.com/
+NVIDIA_API_KEY=nvapi-your_key_here
+
+# Cloudflare Worker URL (FALLBACK)
+CF_URL=https://image-api.yourworker.workers.dev
+
+# Cloudflare API Key (FALLBACK)
+CF_API=your_api_key_here
+
+# Gemini API Key (Required for AI script generation)
+# Get from: https://aistudio.google.com/apikey
+GEMINI_API_KEY=your_gemini_api_key_here
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..6b9df027c422c392e807f993b1545166115fb074
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1 @@
+*.mp3 filter=lfs diff=lfs merge=lfs -text
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000000000000000000000000000000000000..ede1185bbc69e98baa018eda640337fae7c7d387
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,41 @@
+# NCAkit Docker Configuration for Hugging Face Spaces
+FROM python:3.11-slim
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ ffmpeg \
+ libsndfile1 \
+ git \
+ && rm -rf /var/lib/apt/lists/*
+
+# Create non-root user for HF Spaces
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+ PATH=/home/user/.local/bin:$PATH
+
+# Set working directory
+WORKDIR $HOME/app
+
+# Copy requirements first for caching
+COPY --chown=user requirements.txt .
+RUN pip install --no-cache-dir --user -r requirements.txt
+
+# Copy application code
+COPY --chown=user . .
+
+# Create data directories
+RUN mkdir -p $HOME/app/data $HOME/app/videos $HOME/app/temp
+
+# Environment for HF Spaces
+ENV DOCKER=true
+ENV PORT=8880
+ENV LOG_LEVEL=info
+ENV DATA_DIR=$HOME/app/data
+ENV VIDEOS_DIR=$HOME/app/videos
+
+# Expose REST API port
+EXPOSE 8880
+
+# Run the application
+CMD ["python", "app.py"]
diff --git a/README.md b/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..382d14caf01122abf304ea2447fe463ba1d24fa2
--- /dev/null
+++ b/README.md
@@ -0,0 +1,110 @@
+# NCAkit - Neural Content Automation Toolkit 🤖
+
+A modular Python toolkit for content automation, featuring video creation, text-to-speech, and more.
+
+## ✨ Features
+
+- 🎬 **Video Creator** - Short-form videos with TTS, captions, and music
+- 🔌 **Modular Architecture** - Easy to add new features
+- 🌐 **REST API** - FastAPI with auto-generated docs
+- 🚀 **Ready for Deployment** - Docker & Hugging Face Spaces
+
+## 🏗️ Project Structure
+
+```
+NCAkit/
+├── app.py # Main FastAPI application
+├── config.py # Unified configuration
+├── requirements.txt # All dependencies
+├── Dockerfile # Docker deployment
+│
+├── core/ # Shared infrastructure
+│ ├── module_registry.py # Auto module discovery
+│ └── utils/ # Shared utilities
+│
+├── modules/ # Feature modules
+│ ├── video_creator/ # Video creation module
+│ │ ├── router.py # API endpoints
+│ │ ├── schemas.py # Pydantic models
+│ │ └── services/ # Core logic
+│ └── _template/ # Template for new modules
+│
+└── static/ # Web UI & assets
+```
+
+## 🚀 Quick Start
+
+### Install
+
+```bash
+cd NCAkit
+pip install -r requirements.txt
+```
+
+### Configure
+
+```bash
+cp .env.example .env
+# Edit .env with your API keys
+```
+
+### Run
+
+```bash
+python app.py
+# Or: uvicorn app:app --host 0.0.0.0 --port 8880 --reload
+```
+
+### Access
+
+- **Web UI**: http://localhost:8880
+- **API Docs**: http://localhost:8880/docs
+- **Modules**: http://localhost:8880/api/modules
+
+## 📡 API Endpoints
+
+| Module | Endpoint | Method | Description |
+|--------|----------|--------|-------------|
+| System | `/health` | GET | Health check |
+| System | `/api/modules` | GET | List modules |
+| Video | `/api/video/short-video` | POST | Create video |
+| Video | `/api/video/short-video/{id}/status` | GET | Check status |
+| Video | `/api/video/short-video/{id}` | GET | Download video |
+
+## 🔧 Adding New Modules
+
+1. Copy `modules/_template/` to `modules/your_module/`
+2. Update `MODULE_NAME`, `MODULE_PREFIX` in `__init__.py`
+3. Implement router and services
+4. Restart server - auto-discovered!
+
+```python
+# modules/your_module/__init__.py
+MODULE_NAME = "your_module"
+MODULE_PREFIX = "/api/your-feature"
+
+def register(app, config):
+ from .router import router
+ app.include_router(router, prefix=MODULE_PREFIX)
+```
+
+## 🐳 Docker
+
+```bash
+docker build -t ncakit .
+docker run -p 8880:8880 --env-file .env ncakit
+```
+
+## ⚙️ Environment Variables
+
+| Variable | Required | Default | Module |
+|----------|----------|---------|--------|
+| `PEXELS_API_KEY` | ✅ | - | Video Creator |
+| `HF_TTS` | ✅ | - | Video Creator |
+| `WHISPER_MODEL` | ❌ | tiny.en | Video Creator |
+| `PORT` | ❌ | 8880 | Server |
+| `LOG_LEVEL` | ❌ | info | Server |
+
+## 📄 License
+
+MIT
diff --git a/__pycache__/app.cpython-313.pyc b/__pycache__/app.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6bb190d01c17ed83c4fab9df715209c619f62707
Binary files /dev/null and b/__pycache__/app.cpython-313.pyc differ
diff --git a/__pycache__/config.cpython-313.pyc b/__pycache__/config.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3bbb5078776ea190cfe10b8fe29c9a340e0e684f
Binary files /dev/null and b/__pycache__/config.cpython-313.pyc differ
diff --git a/app.py b/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..8ab92e8c97381c8783c4bd79a1e419b977aa5567
--- /dev/null
+++ b/app.py
@@ -0,0 +1,115 @@
+"""
+NCAkit - Neural Content Automation Toolkit
+Main FastAPI Application with Modular Architecture
+"""
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.staticfiles import StaticFiles
+from fastapi.responses import FileResponse
+import logging
+from pathlib import Path
+import sys
+
+from config import config
+from core.module_registry import registry
+
+# Setup logging
+logging.basicConfig(
+ level=config.log_level.upper(),
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+ handlers=[logging.StreamHandler(sys.stdout)]
+)
+logger = logging.getLogger(__name__)
+
+# Create FastAPI app
+app = FastAPI(
+ title="NCAkit - Neural Content Automation Toolkit",
+ description="""
+ # NCAkit REST API
+
+ A modular toolkit for content automation with multiple feature modules.
+
+ ## Available Modules
+
+ - 🎬 **Video Creator** - Create short-form videos with TTS, captions, and music
+ - 📱 More modules coming soon...
+
+ ## How It Works
+
+ 1. Each module has its own API prefix (e.g., `/api/video/`)
+ 2. Modules are auto-discovered and registered on startup
+ 3. Check `/api/modules` for list of available modules
+ """,
+ version="1.0.0",
+ contact={
+ "name": "NCAkit",
+ "url": "https://github.com/your-repo/ncakit"
+ }
+)
+
+# Add CORS middleware
+app.add_middleware(
+ CORSMiddleware,
+ allow_origins=["*"],
+ allow_credentials=True,
+ allow_methods=["*"],
+ allow_headers=["*"],
+)
+
+
+@app.on_event("startup")
+async def startup_event():
+ """Initialize all modules on startup"""
+ logger.info("Starting NCAkit...")
+
+ # Ensure directories exist
+ config.ensure_directories()
+
+ # Register all modules
+ num_modules = registry.register_all(app, config)
+ logger.info(f"Loaded {num_modules} module(s)")
+
+ logger.info(f"NCAkit started successfully on port {config.port}")
+
+
+@app.get("/health", tags=["System"])
+async def health_check():
+ """Health check endpoint"""
+ return {"status": "ok", "toolkit": "ncakit"}
+
+
+@app.get("/api/modules", tags=["System"])
+async def list_modules():
+ """List all available modules"""
+ return {
+ "modules": registry.list_modules()
+ }
+
+
+@app.get("/")
+async def read_root():
+ """Serve the web UI"""
+ static_path = Path(__file__).parent / "static" / "index.html"
+ if static_path.exists():
+ return FileResponse(static_path)
+ return {
+ "message": "NCAkit - Neural Content Automation Toolkit",
+ "docs": "/docs",
+ "modules": "/api/modules"
+ }
+
+
+# Mount static files if they exist
+static_dir = Path(__file__).parent / "static"
+if static_dir.exists():
+ app.mount("/static", StaticFiles(directory=str(static_dir)), name="static")
+
+
+if __name__ == "__main__":
+ import uvicorn
+ uvicorn.run(
+ "app:app",
+ host="0.0.0.0",
+ port=config.port,
+ log_level=config.log_level.lower()
+ )
diff --git a/config.py b/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..9bb50f30201bed27781cdef941fc1dd02632d99b
--- /dev/null
+++ b/config.py
@@ -0,0 +1,115 @@
+"""
+Base Configuration for NCAkit
+Provides centralized configuration management for all modules.
+"""
+import os
+from pathlib import Path
+from pydantic_settings import BaseSettings
+from typing import Optional, Dict, Any
+
+
+class BaseConfig(BaseSettings):
+ """
+ Base configuration class that all module configs should extend.
+ Provides common settings and utilities.
+ """
+
+ # Server Configuration
+ port: int = 8880
+ log_level: str = "info"
+ debug: bool = False
+
+ # Environment
+ docker: bool = False
+ dev: bool = False
+ data_dir_path: Optional[str] = None
+
+ class Config:
+ env_file = ".env"
+ case_sensitive = False
+ extra = "ignore"
+
+ @property
+ def base_data_dir(self) -> Path:
+ """Get the base data directory path"""
+ if self.data_dir_path:
+ return Path(self.data_dir_path)
+
+ if self.docker:
+ return Path("/data")
+
+ # For local development
+ home = Path.home()
+ return home / ".ncakit"
+
+ def ensure_base_directories(self):
+ """Ensure base directories exist"""
+ self.base_data_dir.mkdir(parents=True, exist_ok=True)
+
+
+class NCAkitConfig(BaseConfig):
+ """
+ Main NCAkit configuration.
+ Aggregates all module-specific settings.
+ """
+
+ # ===================
+ # Video Creator Module Config
+ # ===================
+ pexels_api_key: Optional[str] = None
+ hf_tts: Optional[str] = None
+ whisper_model: str = "tiny.en"
+ whisper_verbose: bool = False
+ concurrency: int = 1
+ video_cache_size_in_bytes: int = 2684354560 # 2.5GB
+
+ # ===================
+ # Add new module configs here
+ # Example:
+ # openai_api_key: Optional[str] = None
+ # ===================
+
+ # ===================
+ # Story Reels Module Config
+ # ===================
+ nvidia_api_key: Optional[str] = None # NVIDIA API key (primary)
+ cf_url: Optional[str] = None # Cloudflare Worker URL (fallback)
+ cf_api: Optional[str] = None # Cloudflare API key (fallback)
+ gemini_api_key: Optional[str] = None # For AI script generation
+
+ @property
+ def videos_dir_path(self) -> Path:
+ """Directory for storing generated videos"""
+ path = self.base_data_dir / "videos"
+ path.mkdir(parents=True, exist_ok=True)
+ return path
+
+ @property
+ def temp_dir_path(self) -> Path:
+ """Directory for temporary files"""
+ path = self.base_data_dir / "temp"
+ path.mkdir(parents=True, exist_ok=True)
+ return path
+
+ @property
+ def whisper_model_dir(self) -> Path:
+ """Directory for Whisper models"""
+ path = self.base_data_dir / "whisper_models"
+ path.mkdir(parents=True, exist_ok=True)
+ return path
+
+ @property
+ def music_dir_path(self) -> Path:
+ """Directory for music files"""
+ return Path(__file__).parent / "static" / "music"
+
+ def ensure_directories(self):
+ """Ensure all required directories exist"""
+ self.ensure_base_directories()
+ self.videos_dir_path.mkdir(parents=True, exist_ok=True)
+ self.temp_dir_path.mkdir(parents=True, exist_ok=True)
+ self.whisper_model_dir.mkdir(parents=True, exist_ok=True)
+
+
+# Global config instance
+config = NCAkitConfig()
diff --git a/core/__init__.py b/core/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..b8931f5e8489dab08686d638f2752a43697f9c03
--- /dev/null
+++ b/core/__init__.py
@@ -0,0 +1 @@
+# NCAkit - Neural Content Automation Toolkit
diff --git a/core/__pycache__/module_registry.cpython-313.pyc b/core/__pycache__/module_registry.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..77504862dca3df8467448e15f813a663ece8beec
Binary files /dev/null and b/core/__pycache__/module_registry.cpython-313.pyc differ
diff --git a/core/module_registry.py b/core/module_registry.py
new file mode 100644
index 0000000000000000000000000000000000000000..d1802efb600c9e2a10c0c8cc34c3ac18eaf8f07d
--- /dev/null
+++ b/core/module_registry.py
@@ -0,0 +1,145 @@
+"""
+Module Registry for NCAkit
+Handles automatic discovery and registration of feature modules.
+"""
+import importlib
+import pkgutil
+import logging
+from pathlib import Path
+from typing import List, Dict, Any, Callable
+from fastapi import FastAPI
+
+logger = logging.getLogger(__name__)
+
+
+class ModuleInfo:
+ """Information about a registered module"""
+ def __init__(
+ self,
+ name: str,
+ prefix: str,
+ description: str = "",
+ register_fn: Callable = None
+ ):
+ self.name = name
+ self.prefix = prefix
+ self.description = description
+ self.register_fn = register_fn
+
+
+class ModuleRegistry:
+ """
+ Centralized registry for all NCAkit modules.
+
+ Each module must have an __init__.py with:
+ - MODULE_NAME: str
+ - MODULE_PREFIX: str
+ - MODULE_DESCRIPTION: str (optional)
+ - register(app, config): function
+ """
+
+ def __init__(self):
+ self._modules: Dict[str, ModuleInfo] = {}
+ self._initialized: bool = False
+
+ def discover_modules(self, modules_package: str = "modules") -> List[str]:
+ """
+ Discover all available modules in the modules package.
+ Returns list of module names.
+ """
+ discovered = []
+
+ try:
+ package = importlib.import_module(modules_package)
+ package_path = Path(package.__file__).parent
+
+ for finder, name, is_pkg in pkgutil.iter_modules([str(package_path)]):
+ # Skip private/template modules
+ if name.startswith('_'):
+ continue
+
+ if is_pkg:
+ discovered.append(name)
+ logger.debug(f"Discovered module: {name}")
+
+ except Exception as e:
+ logger.error(f"Error discovering modules: {e}")
+
+ return discovered
+
+ def load_module(self, module_name: str, modules_package: str = "modules") -> ModuleInfo | None:
+ """Load a single module and return its info"""
+ try:
+ full_module_name = f"{modules_package}.{module_name}"
+ module = importlib.import_module(full_module_name)
+
+ # Check required attributes
+ if not hasattr(module, 'register'):
+ logger.warning(f"Module {module_name} has no register function, skipping")
+ return None
+
+ # Get module metadata
+ name = getattr(module, 'MODULE_NAME', module_name)
+ prefix = getattr(module, 'MODULE_PREFIX', f"/api/{module_name}")
+ description = getattr(module, 'MODULE_DESCRIPTION', "")
+
+ info = ModuleInfo(
+ name=name,
+ prefix=prefix,
+ description=description,
+ register_fn=module.register
+ )
+
+ self._modules[name] = info
+ logger.info(f"Loaded module: {name} (prefix: {prefix})")
+ return info
+
+ except Exception as e:
+ logger.error(f"Failed to load module {module_name}: {e}")
+ return None
+
+ def register_all(self, app: FastAPI, config: Any) -> int:
+ """
+ Register all discovered modules with the FastAPI app.
+ Returns number of successfully registered modules.
+ """
+ if self._initialized:
+ logger.warning("Modules already initialized")
+ return len(self._modules)
+
+ # Discover modules
+ module_names = self.discover_modules()
+
+ registered = 0
+ for name in module_names:
+ info = self.load_module(name)
+ if info and info.register_fn:
+ try:
+ info.register_fn(app, config)
+ registered += 1
+ logger.info(f"Registered module: {info.name}")
+ except Exception as e:
+ logger.error(f"Failed to register module {name}: {e}")
+
+ self._initialized = True
+ logger.info(f"Registered {registered}/{len(module_names)} modules")
+ return registered
+
+ def get_module(self, name: str) -> ModuleInfo | None:
+ """Get info about a specific module"""
+ return self._modules.get(name)
+
+ def list_modules(self) -> List[Dict[str, str]]:
+ """List all registered modules"""
+ return [
+ {
+ "name": info.name,
+ "prefix": info.prefix,
+ "description": info.description
+ }
+ for info in self._modules.values()
+ ]
+
+
+# Global registry instance
+registry = ModuleRegistry()
diff --git a/core/utils/__init__.py b/core/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..6a414d72dab8f400f2768aabfe6bda3edf03d4be
--- /dev/null
+++ b/core/utils/__init__.py
@@ -0,0 +1 @@
+# Core Utilities
diff --git a/modules/__init__.py b/modules/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..9b58ce0610cb5363ffde27f98a05ba0c5cb044e4
--- /dev/null
+++ b/modules/__init__.py
@@ -0,0 +1 @@
+# NCAkit Modules
diff --git a/modules/_template/__init__.py b/modules/_template/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..27b1ced60214e5de65895b762add7f370d4203d4
--- /dev/null
+++ b/modules/_template/__init__.py
@@ -0,0 +1,2 @@
+# Module Template - DO NOT USE DIRECTLY
+# Copy this folder to create a new module
diff --git a/modules/_template/module.py b/modules/_template/module.py
new file mode 100644
index 0000000000000000000000000000000000000000..746c866887303b21fac83cf85236b8c20a9c2a1e
--- /dev/null
+++ b/modules/_template/module.py
@@ -0,0 +1,38 @@
+"""
+Module Template for NCAkit
+Copy this folder and rename to create a new module.
+
+Usage:
+1. Copy _template folder to modules/your_module_name/
+2. Update MODULE_NAME, MODULE_PREFIX, MODULE_DESCRIPTION
+3. Implement your router and services
+4. The module will be auto-discovered on startup
+"""
+from fastapi import FastAPI
+
+# ===================
+# Module Metadata
+# ===================
+MODULE_NAME = "template"
+MODULE_PREFIX = "/api/template"
+MODULE_DESCRIPTION = "Template module - copy and modify for your feature"
+
+
+def register(app: FastAPI, config):
+ """
+ Register this module with the main FastAPI app.
+ Called automatically by module_registry.
+
+ Args:
+ app: FastAPI application instance
+ config: NCAkitConfig instance with all settings
+ """
+ from .router import router
+
+ # You can initialize services here and attach to app.state
+ # Example:
+ # from .services import MyService
+ # app.state.my_service = MyService(config)
+
+ # Register the router
+ app.include_router(router, prefix=MODULE_PREFIX, tags=[MODULE_NAME])
diff --git a/modules/_template/router.py b/modules/_template/router.py
new file mode 100644
index 0000000000000000000000000000000000000000..1fc889918e270d04c53541031fde802b9e8ab8bf
--- /dev/null
+++ b/modules/_template/router.py
@@ -0,0 +1,24 @@
+"""
+Template Router - Define your API endpoints here
+"""
+from fastapi import APIRouter
+
+router = APIRouter()
+
+
+@router.get("/")
+async def template_root():
+ """Example endpoint - replace with your implementation"""
+ return {"message": "Template module is working!"}
+
+
+@router.get("/example")
+async def example_endpoint():
+ """Another example endpoint"""
+ return {"data": "This is example data"}
+
+
+# Add more endpoints as needed
+# @router.post("/create")
+# async def create_something(request: YourRequestModel):
+# ...
diff --git a/modules/_template/schemas.py b/modules/_template/schemas.py
new file mode 100644
index 0000000000000000000000000000000000000000..71bb1cc286194198f2fd4d0f116a3e6baf128255
--- /dev/null
+++ b/modules/_template/schemas.py
@@ -0,0 +1,20 @@
+"""
+Template Schemas - Define your Pydantic models here
+"""
+from pydantic import BaseModel, Field
+from typing import Optional, List
+
+
+class ExampleRequest(BaseModel):
+ """Example request model"""
+ name: str = Field(..., description="Name field")
+ value: Optional[int] = Field(None, description="Optional value")
+
+
+class ExampleResponse(BaseModel):
+ """Example response model"""
+ success: bool
+ data: dict
+
+
+# Add more models as needed
diff --git a/modules/story_reels/__init__.py b/modules/story_reels/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..0503a505494296b7edb58f10c7abd46b0b50b8ff
--- /dev/null
+++ b/modules/story_reels/__init__.py
@@ -0,0 +1,97 @@
+"""
+Story Reels Module for NCAkit
+Character-consistent story video generation using Cloudflare AI.
+"""
+from fastapi import FastAPI
+import logging
+
+# Module Metadata
+MODULE_NAME = "story_reels"
+MODULE_PREFIX = "/api/story"
+MODULE_DESCRIPTION = "Generate character-consistent story videos from text scripts"
+
+logger = logging.getLogger(__name__)
+
+
+def register(app: FastAPI, config):
+ """
+ Register the story reels module with FastAPI.
+ Initializes all services and adds routes.
+ """
+ from .router import router, set_story_creator
+ from .services.cloudflare_client import CloudflareClient
+ from .services.script_generator import ScriptGenerator
+ from .services.story_creator import StoryCreator
+
+ logger.info("Registering story_reels module...")
+
+ # Validate configs
+
+ if not config.gemini_api_key:
+ logger.warning("GEMINI_API_KEY missing! AI script generation will fail.")
+
+ # Reuse TTS client from video_creator if available
+ tts_client = getattr(app.state, 'tts_client', None)
+ whisper_client = getattr(app.state, 'whisper_client', None)
+
+ # If video_creator not loaded, initialize our own clients
+ if not tts_client:
+ logger.info("Initializing TTS client for story_reels...")
+ from modules.video_creator.services.libraries.tts_client import TTSClient
+ tts_client = TTSClient(config.hf_tts)
+ app.state.tts_client = tts_client
+
+ if not whisper_client:
+ logger.info("Initializing Whisper client for story_reels...")
+ from modules.video_creator.services.libraries.whisper_client import WhisperClient
+ whisper_client = WhisperClient(
+ model_name=config.whisper_model,
+ model_dir=config.whisper_model_dir
+ )
+ app.state.whisper_client = whisper_client
+
+ # Initialize Script Generator (Gemini)
+ logger.info("Initializing script generator (Gemini)...")
+ script_generator = ScriptGenerator(config.gemini_api_key or "")
+
+ # Initialize NVIDIA client (PRIMARY)
+ nvidia_client = None
+ if config.nvidia_api_key:
+ logger.info("Initializing NVIDIA client (primary)...")
+ from .services.nvidia_client import NvidiaClient
+ nvidia_client = NvidiaClient(config.nvidia_api_key)
+ else:
+ logger.warning("NVIDIA_API_KEY missing! Using Cloudflare only.")
+
+ # Initialize Cloudflare client (FALLBACK)
+ cloudflare_client = None
+ if config.cf_url and config.cf_api:
+ logger.info("Initializing Cloudflare client (fallback)...")
+ cloudflare_client = CloudflareClient(
+ api_url=config.cf_url,
+ api_key=config.cf_api
+ )
+ else:
+ logger.warning("CF_URL or CF_API missing! No fallback available.")
+
+ # Initialize story creator
+ logger.info("Initializing story creator...")
+ story_creator = StoryCreator(
+ config=config,
+ tts_client=tts_client,
+ whisper_client=whisper_client,
+ nvidia_client=nvidia_client,
+ cloudflare_client=cloudflare_client,
+ script_generator=script_generator
+ )
+
+ # Set the global story creator in the router
+ set_story_creator(story_creator)
+
+ # Store in app state
+ app.state.story_creator = story_creator
+
+ # Register routes
+ app.include_router(router, prefix=MODULE_PREFIX, tags=["Story Reels"])
+
+ logger.info("story_reels module registered successfully!")
diff --git a/modules/story_reels/__pycache__/__init__.cpython-313.pyc b/modules/story_reels/__pycache__/__init__.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6b28cd695cd0b8946bf0761f9e557a7d9281f23b
Binary files /dev/null and b/modules/story_reels/__pycache__/__init__.cpython-313.pyc differ
diff --git a/modules/story_reels/__pycache__/router.cpython-313.pyc b/modules/story_reels/__pycache__/router.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..666b2d2af10e8bdeb09447f8c8497020dcdc232a
Binary files /dev/null and b/modules/story_reels/__pycache__/router.cpython-313.pyc differ
diff --git a/modules/story_reels/__pycache__/schemas.cpython-313.pyc b/modules/story_reels/__pycache__/schemas.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1fd6051995a4b40e28645aec1cca9dbdc0eab005
Binary files /dev/null and b/modules/story_reels/__pycache__/schemas.cpython-313.pyc differ
diff --git a/modules/story_reels/router.py b/modules/story_reels/router.py
new file mode 100644
index 0000000000000000000000000000000000000000..2d0e6de5c2ca9dbb1433e08f78c7037b5ad73d5d
--- /dev/null
+++ b/modules/story_reels/router.py
@@ -0,0 +1,117 @@
+"""
+Story Reels Router - API Endpoints
+"""
+from fastapi import APIRouter, HTTPException
+from fastapi.responses import FileResponse
+import logging
+
+from .schemas import (
+ GenerateVideoRequest,
+ GenerateVideoResponse,
+ VideoStatusResponse,
+ PreviewResponse,
+ JobStatus
+)
+from .services.story_creator import StoryCreator
+
+logger = logging.getLogger(__name__)
+
+# Will be set during module registration
+story_creator: StoryCreator = None
+
+
+def set_story_creator(creator: StoryCreator):
+ """Set the global story creator instance"""
+ global story_creator
+ story_creator = creator
+
+
+router = APIRouter()
+
+
+@router.post("/generate",
+ response_model=GenerateVideoResponse,
+ status_code=201,
+ summary="Generate story video",
+ description="Generate a character-consistent story video from script"
+)
+async def generate_video(request: GenerateVideoRequest):
+ """
+ Main video generation endpoint.
+
+ - Converts script to speech (TTS)
+ - Generates captions (Whisper)
+ - Creates character-consistent images (Cloudflare)
+ - Composes final video (MoviePy)
+ """
+ try:
+ logger.info(f"Generating video for topic: {request.topic}")
+
+ job_id = story_creator.add_to_queue(
+ topic=request.topic,
+ script=request.script,
+ character_profile=request.character_profile,
+ voice=request.voice
+ )
+
+ return GenerateVideoResponse(
+ job_id=job_id,
+ status=JobStatus.queued,
+ message="Video generation started"
+ )
+
+ except Exception as e:
+ logger.error(f"Error starting generation: {e}", exc_info=True)
+ raise HTTPException(status_code=400, detail=str(e))
+
+
+@router.get("/status/{job_id}",
+ response_model=VideoStatusResponse,
+ summary="Get job status",
+ description="Check the processing status of a video generation job"
+)
+async def get_status(job_id: str):
+ """Get video generation status"""
+ status = story_creator.get_status(job_id)
+ return VideoStatusResponse(**status)
+
+
+@router.get("/preview/{job_id}/{scene_id}",
+ response_model=PreviewResponse,
+ summary="Get scene preview",
+ description="Get preview of a generated scene image"
+)
+async def get_preview(job_id: str, scene_id: int):
+ """Get scene preview"""
+ scene = story_creator.get_preview(job_id, scene_id)
+
+ if not scene:
+ raise HTTPException(status_code=404, detail="Scene not found")
+
+ return PreviewResponse(
+ scene_id=scene["scene_id"],
+ image_url=scene["image_path"],
+ prompt=scene["prompt"]
+ )
+
+
+@router.get("/download/{job_id}",
+ summary="Download video",
+ description="Download the generated video file",
+ responses={
+ 200: {"description": "Video file", "content": {"video/mp4": {}}},
+ 404: {"description": "Video not found"}
+ }
+)
+async def download_video(job_id: str):
+ """Download generated video"""
+ video_path = story_creator.get_video_path(job_id)
+
+ if not video_path or not video_path.exists():
+ raise HTTPException(status_code=404, detail="Video not found")
+
+ return FileResponse(
+ video_path,
+ media_type="video/mp4",
+ filename=f"story_{job_id}.mp4"
+ )
diff --git a/modules/story_reels/schemas.py b/modules/story_reels/schemas.py
new file mode 100644
index 0000000000000000000000000000000000000000..678dd5a77c8bb5c64a86625e704ccdc8ddc5cefe
--- /dev/null
+++ b/modules/story_reels/schemas.py
@@ -0,0 +1,114 @@
+"""
+Story Reels Pydantic Schemas
+Character-consistent video generation from text scripts
+"""
+from pydantic import BaseModel, Field
+from typing import List, Optional
+from enum import Enum
+
+
+class StyleEnum(str, Enum):
+ """Available image styles"""
+ semi_realistic = "semi-realistic"
+ anime = "anime"
+ cartoon = "cartoon"
+ realistic = "realistic"
+ watercolor = "watercolor"
+
+
+class CameraEnum(str, Enum):
+ """Camera shot types"""
+ close_up = "close-up"
+ medium = "medium shot"
+ wide = "wide shot"
+ side = "side view"
+ front = "front view"
+
+
+class JobStatus(str, Enum):
+ """Job processing status"""
+ queued = "queued"
+ processing = "processing"
+ generating_audio = "generating_audio"
+ generating_images = "generating_images"
+ composing_video = "composing_video"
+ ready = "ready"
+ failed = "failed"
+
+
+# ===================
+# Character Profile
+# ===================
+
+class CharacterProfile(BaseModel):
+ """Character definition for consistency"""
+ name: str = Field(..., description="Character name")
+ age: str = Field("25", description="Character age")
+ gender: str = Field("male", description="male/female")
+ hair: str = Field("short black hair", description="Hair description")
+ skin: str = Field("light brown", description="Skin tone")
+ face: str = Field("", description="Face features (optional)")
+ clothes: str = Field("casual clothes", description="Clothing description")
+ style: StyleEnum = Field(StyleEnum.semi_realistic, description="Art style")
+ seed: int = Field(432891, description="Fixed seed for consistency")
+
+
+# ===================
+# Scene
+# ===================
+
+class SceneInput(BaseModel):
+ """Scene from script segment"""
+ scene_id: int
+ scene_text: str = Field(..., description="Scene description")
+ camera: CameraEnum = Field(CameraEnum.medium, description="Camera angle")
+ pose: str = Field("standing", description="Character pose")
+ lighting: str = Field("natural light", description="Lighting description")
+ duration: float = Field(4.0, description="Scene duration in seconds")
+
+
+class GeneratedScene(BaseModel):
+ """Scene with generated content"""
+ scene_id: int
+ prompt: str
+ image_url: str
+ duration: float
+
+
+# ===================
+# API Request/Response
+# ===================
+
+class GenerateVideoRequest(BaseModel):
+ """Main video generation request"""
+ topic: str = Field(..., description="Video topic/title")
+ script: str = Field("", description="Full story script (optional - auto-generated if empty)")
+ character_profile: Optional[CharacterProfile] = Field(
+ default=None,
+ description="Character profile for consistency (optional)"
+ )
+ voice: str = Field("af_heart", description="TTS voice")
+
+
+class GenerateVideoResponse(BaseModel):
+ """Response after starting generation"""
+ job_id: str
+ status: JobStatus = JobStatus.queued
+ message: str = "Video generation started"
+
+
+class VideoStatusResponse(BaseModel):
+ """Job status response"""
+ job_id: str
+ status: JobStatus
+ progress: int = Field(0, description="Progress 0-100")
+ video_url: Optional[str] = None
+ duration: Optional[float] = None
+ error: Optional[str] = None
+
+
+class PreviewResponse(BaseModel):
+ """Scene preview response"""
+ scene_id: int
+ image_url: str
+ prompt: str
diff --git a/modules/story_reels/services/__init__.py b/modules/story_reels/services/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..eb87d350e8973a44ec3b0ed2b61e159b65a34407
--- /dev/null
+++ b/modules/story_reels/services/__init__.py
@@ -0,0 +1 @@
+# Story Reels Services
diff --git a/modules/story_reels/services/__pycache__/cloudflare_client.cpython-313.pyc b/modules/story_reels/services/__pycache__/cloudflare_client.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6cde4e4c105711ce28fbed4918e4db8e571e3fc4
Binary files /dev/null and b/modules/story_reels/services/__pycache__/cloudflare_client.cpython-313.pyc differ
diff --git a/modules/story_reels/services/__pycache__/nvidia_client.cpython-313.pyc b/modules/story_reels/services/__pycache__/nvidia_client.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..399d24b10c704611b06be3eb6b0b5873c25d3cb2
Binary files /dev/null and b/modules/story_reels/services/__pycache__/nvidia_client.cpython-313.pyc differ
diff --git a/modules/story_reels/services/__pycache__/prompt_builder.cpython-313.pyc b/modules/story_reels/services/__pycache__/prompt_builder.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..abc92e840912ce5f90c42c9fddac2b558fdaf969
Binary files /dev/null and b/modules/story_reels/services/__pycache__/prompt_builder.cpython-313.pyc differ
diff --git a/modules/story_reels/services/__pycache__/script_generator.cpython-313.pyc b/modules/story_reels/services/__pycache__/script_generator.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..971d7b7c3b18b0f242826a5fd29e44ceec47308c
Binary files /dev/null and b/modules/story_reels/services/__pycache__/script_generator.cpython-313.pyc differ
diff --git a/modules/story_reels/services/__pycache__/srt_parser.cpython-313.pyc b/modules/story_reels/services/__pycache__/srt_parser.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d73e777948a97916607f658582947a695fc0fa46
Binary files /dev/null and b/modules/story_reels/services/__pycache__/srt_parser.cpython-313.pyc differ
diff --git a/modules/story_reels/services/__pycache__/story_creator.cpython-313.pyc b/modules/story_reels/services/__pycache__/story_creator.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3609ebb9bcb4fe8b9631b418aff01e55fa41d3c2
Binary files /dev/null and b/modules/story_reels/services/__pycache__/story_creator.cpython-313.pyc differ
diff --git a/modules/story_reels/services/cloudflare_client.py b/modules/story_reels/services/cloudflare_client.py
new file mode 100644
index 0000000000000000000000000000000000000000..7a05249c3e002ff6bf7e2abf90a6889052b7e356
--- /dev/null
+++ b/modules/story_reels/services/cloudflare_client.py
@@ -0,0 +1,198 @@
+"""
+Cloudflare Workers AI Client
+Text-to-image generation with character consistency
+Uses custom Cloudflare Worker endpoint
+"""
+import logging
+import time
+import requests
+from typing import Optional, List, Dict
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+
+class CloudflareClient:
+ """
+ Client for Cloudflare Workers AI image generation.
+ Uses custom worker endpoint for image generation.
+ """
+
+ # Default model
+ DEFAULT_MODEL = "@cf/stabilityai/stable-diffusion-xl-base-1.0"
+
+ def __init__(self, api_url: str, api_key: str):
+ """
+ Initialize Cloudflare client.
+
+ Args:
+ api_url: Custom Cloudflare Worker URL (CF_URL)
+ api_key: API key for authentication (CF_API)
+ """
+ self.api_url = api_url
+ self.api_key = api_key
+
+ def generate_image(
+ self,
+ prompt: str,
+ seed: Optional[int] = None,
+ width: int = 1080,
+ height: int = 1920,
+ quality: int = 90
+ ) -> bytes:
+ """
+ Generate image from prompt.
+
+ Args:
+ prompt: Text prompt for image generation
+ seed: Fixed seed for reproducibility
+ width: Image width (9:16 portrait = 1080)
+ height: Image height (9:16 portrait = 1920)
+ quality: Image quality (1-100)
+
+ Returns:
+ Image bytes (PNG format)
+ """
+ headers = {
+ "Authorization": f"Bearer {self.api_key}",
+ "Content-Type": "application/json"
+ }
+
+ payload = {
+ "prompt": prompt,
+ "model": self.DEFAULT_MODEL,
+ "width": width,
+ "height": height,
+ "format": "png",
+ "quality": quality,
+ "download": True
+ }
+
+ # Add seed for consistency if provided
+ if seed is not None:
+ payload["seed"] = seed
+
+ logger.debug(f"Generating image with prompt: {prompt[:100]}...")
+
+ try:
+ response = requests.post(
+ self.api_url,
+ headers=headers,
+ json=payload,
+ timeout=120
+ )
+ response.raise_for_status()
+
+ # Worker returns raw image bytes
+ return response.content
+
+ except requests.exceptions.RequestException as e:
+ logger.error(f"Cloudflare API error: {e}")
+ if hasattr(e, 'response') and e.response is not None:
+ logger.error(f"Response: {e.response.text[:500]}")
+ raise Exception(f"Image generation failed: {e}")
+
+ def generate_and_save(
+ self,
+ prompt: str,
+ output_path: Path,
+ seed: Optional[int] = None,
+ **kwargs
+ ) -> Path:
+ """Generate image and save to file"""
+ image_bytes = self.generate_image(prompt, seed=seed, **kwargs)
+
+ output_path.parent.mkdir(parents=True, exist_ok=True)
+ output_path.write_bytes(image_bytes)
+
+ logger.info(f"Saved image to {output_path}")
+ return output_path
+
+ @staticmethod
+ def test_connection(api_url: str, api_key: str) -> bool:
+ """Test API connection"""
+ try:
+ client = CloudflareClient(api_url, api_key)
+ client.generate_image("test", width=256, height=256)
+ return True
+ except Exception as e:
+ logger.error(f"Connection test failed: {e}")
+ return False
+
+ def generate_batch(
+ self,
+ prompts: List[tuple],
+ output_dir: Path,
+ seed: Optional[int] = None,
+ batch_size: int = 5,
+ delay_seconds: float = 1.0,
+ **kwargs
+ ) -> List[Dict]:
+ """
+ Generate images in batches to save API credits.
+
+ Pattern: Generate 5, wait, next 5, wait...
+
+ Args:
+ prompts: List of (prompt_id, prompt_text) tuples
+ output_dir: Directory to save images
+ seed: Fixed seed for character consistency
+ batch_size: Images per batch (default 5)
+ delay_seconds: Delay between images in batch (default 1s)
+
+ Returns:
+ List of generated image info dicts
+ """
+ output_dir.mkdir(parents=True, exist_ok=True)
+ generated = []
+ total = len(prompts)
+
+ # Split into batches of 5
+ for batch_start in range(0, total, batch_size):
+ batch_end = min(batch_start + batch_size, total)
+ batch = prompts[batch_start:batch_end]
+
+ logger.info(f"Processing batch {batch_start//batch_size + 1}: images {batch_start+1}-{batch_end} of {total}")
+
+ # Process each image in the batch with 1s delay
+ for i, (prompt_id, prompt_text) in enumerate(batch):
+ try:
+ output_path = output_dir / f"scene_{prompt_id:03d}.png"
+
+ # Generate and save
+ self.generate_and_save(
+ prompt=prompt_text,
+ output_path=output_path,
+ seed=seed,
+ **kwargs
+ )
+
+ generated.append({
+ "id": prompt_id,
+ "path": str(output_path),
+ "prompt": prompt_text
+ })
+
+ logger.debug(f"Generated image {prompt_id}/{total}")
+
+ # Delay between images (not after last one in batch)
+ if i < len(batch) - 1:
+ time.sleep(delay_seconds)
+
+ except Exception as e:
+ logger.error(f"Failed to generate image {prompt_id}: {e}")
+ generated.append({
+ "id": prompt_id,
+ "path": None,
+ "error": str(e)
+ })
+
+ # Batch complete - small pause before next batch
+ if batch_end < total:
+ logger.info(f"Batch complete. Waiting before next batch...")
+ time.sleep(delay_seconds * 2)
+
+ successful = len([g for g in generated if g.get("path")])
+ logger.info(f"Batch generation complete: {successful}/{total} images generated")
+
+ return generated
diff --git a/modules/story_reels/services/nvidia_client.py b/modules/story_reels/services/nvidia_client.py
new file mode 100644
index 0000000000000000000000000000000000000000..e5db84f18a2a33212c9c36f7c6029cf4b65787b0
--- /dev/null
+++ b/modules/story_reels/services/nvidia_client.py
@@ -0,0 +1,235 @@
+"""
+NVIDIA Image Generation Client
+Uses Stable Diffusion 3 Medium for high-quality 9:16 images
+FIRST CHOICE - Falls back to Cloudflare on error
+"""
+import logging
+import time
+import requests
+import base64
+from typing import Optional, List, Dict
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+
+class NvidiaClient:
+ """
+ Client for NVIDIA AI image generation.
+ Uses stable-diffusion-3-medium with 9:16 aspect ratio.
+ """
+
+ # Fixed model - stable-diffusion-3-medium
+ INVOKE_URL = "https://ai.api.nvidia.com/v1/genai/stabilityai/stable-diffusion-3-medium"
+
+ def __init__(self, api_key: str):
+ """
+ Initialize NVIDIA client.
+
+ Args:
+ api_key: NVIDIA API key (nvapi-xxx)
+ """
+ self.api_key = api_key
+ self.headers = {
+ "Authorization": f"Bearer {api_key}",
+ "Accept": "application/json",
+ }
+
+ def generate_image(
+ self,
+ prompt: str,
+ seed: int = 0,
+ steps: int = 50,
+ cfg_scale: float = 5
+ ) -> bytes:
+ """
+ Generate image from prompt.
+
+ Args:
+ prompt: Text prompt for image generation
+ seed: Random seed for reproducibility
+ steps: Number of diffusion steps (default 50)
+ cfg_scale: Guidance scale (default 5)
+
+ Returns:
+ Image bytes (PNG format)
+ """
+ # Stable Diffusion 3 Medium payload - 9:16 aspect ratio
+ payload = {
+ "prompt": prompt,
+ "cfg_scale": cfg_scale,
+ "aspect_ratio": "9:16", # Portrait for reels
+ "seed": seed,
+ "steps": steps,
+ "negative_prompt": ""
+ }
+
+ logger.debug(f"NVIDIA generating image with prompt: {prompt[:100]}...")
+
+ try:
+ response = requests.post(
+ self.INVOKE_URL,
+ headers=self.headers,
+ json=payload,
+ timeout=120
+ )
+ response.raise_for_status()
+ response_body = response.json()
+
+ # Extract base64 - handle multiple response formats
+ image_b64 = None
+
+ # Format 1: Direct image field
+ if isinstance(response_body, dict) and "image" in response_body:
+ image_b64 = response_body["image"]
+
+ # Format 2: Artifacts array with base64 field
+ elif isinstance(response_body, dict) and "artifacts" in response_body:
+ artifacts = response_body.get("artifacts")
+ if artifacts and isinstance(artifacts, list) and len(artifacts) > 0:
+ image_b64 = artifacts[0].get("base64")
+
+ # Format 3: Array with image_b64 field
+ elif isinstance(response_body, list) and len(response_body) > 0:
+ if "image_b64" in response_body[0]:
+ image_b64 = response_body[0].get("image_b64")
+ elif "base64" in response_body[0]:
+ image_b64 = response_body[0].get("base64")
+
+ if image_b64:
+ # Decode base64 to bytes
+ image_data = base64.b64decode(image_b64)
+ logger.info(f"NVIDIA image generated successfully")
+ return image_data
+ else:
+ logger.error(f"NVIDIA: Could not find image data. Keys: {response_body.keys() if isinstance(response_body, dict) else 'list'}")
+ raise Exception("No image data in NVIDIA response")
+
+ except requests.exceptions.RequestException as e:
+ logger.error(f"NVIDIA API error: {e}")
+ raise Exception(f"NVIDIA image generation failed: {e}")
+
+ def generate_and_save(
+ self,
+ prompt: str,
+ output_path: Path,
+ seed: int = 0,
+ **kwargs
+ ) -> Path:
+ """Generate image and save to file"""
+ image_bytes = self.generate_image(prompt, seed=seed, **kwargs)
+
+ output_path.parent.mkdir(parents=True, exist_ok=True)
+ output_path.write_bytes(image_bytes)
+
+ logger.info(f"Saved NVIDIA image to {output_path}")
+ return output_path
+
+ @staticmethod
+ def test_connection(api_key: str) -> bool:
+ """Test API connection"""
+ try:
+ client = NvidiaClient(api_key)
+ client.generate_image("test", steps=10)
+ return True
+ except Exception as e:
+ logger.error(f"NVIDIA connection test failed: {e}")
+ return False
+
+ def generate_batch(
+ self,
+ prompts: List[tuple],
+ output_dir: Path,
+ seed: int = 0,
+ batch_size: int = 5,
+ delay_seconds: float = 1.0,
+ fallback_client=None,
+ **kwargs
+ ) -> List[Dict]:
+ """
+ Generate images in batches with fallback support.
+
+ Pattern: Generate 5, wait, next 5...
+ If NVIDIA fails, try Cloudflare fallback.
+
+ Args:
+ prompts: List of (prompt_id, prompt_text) tuples
+ output_dir: Directory to save images
+ seed: Fixed seed for character consistency
+ batch_size: Images per batch (default 5)
+ delay_seconds: Delay between images (default 1s)
+ fallback_client: Cloudflare client for fallback
+
+ Returns:
+ List of generated image info dicts
+ """
+ output_dir.mkdir(parents=True, exist_ok=True)
+ generated = []
+ total = len(prompts)
+
+ for batch_start in range(0, total, batch_size):
+ batch_end = min(batch_start + batch_size, total)
+ batch = prompts[batch_start:batch_end]
+
+ logger.info(f"NVIDIA batch {batch_start//batch_size + 1}: images {batch_start+1}-{batch_end} of {total}")
+
+ for i, (prompt_id, prompt_text) in enumerate(batch):
+ output_path = output_dir / f"scene_{prompt_id:03d}.png"
+ success = False
+
+ # Try NVIDIA first
+ try:
+ self.generate_and_save(
+ prompt=prompt_text,
+ output_path=output_path,
+ seed=seed,
+ **kwargs
+ )
+ success = True
+ logger.debug(f"NVIDIA: Generated image {prompt_id}/{total}")
+
+ except Exception as e:
+ logger.warning(f"NVIDIA failed for image {prompt_id}: {e}")
+
+ # Fallback to Cloudflare
+ if fallback_client:
+ try:
+ logger.info(f"Falling back to Cloudflare for image {prompt_id}")
+ fallback_client.generate_and_save(
+ prompt=prompt_text,
+ output_path=output_path,
+ seed=seed,
+ width=1080,
+ height=1920
+ )
+ success = True
+ logger.info(f"Cloudflare fallback successful for image {prompt_id}")
+ except Exception as cf_e:
+ logger.error(f"Cloudflare fallback also failed: {cf_e}")
+
+ if success:
+ generated.append({
+ "id": prompt_id,
+ "path": str(output_path),
+ "prompt": prompt_text
+ })
+ else:
+ generated.append({
+ "id": prompt_id,
+ "path": None,
+ "error": "Both NVIDIA and Cloudflare failed"
+ })
+
+ # Delay between images
+ if i < len(batch) - 1:
+ time.sleep(delay_seconds)
+
+ # Batch complete - pause before next
+ if batch_end < total:
+ logger.info("Batch complete. Waiting before next batch...")
+ time.sleep(delay_seconds * 2)
+
+ successful = len([g for g in generated if g.get("path")])
+ logger.info(f"Batch complete: {successful}/{total} images generated")
+
+ return generated
diff --git a/modules/story_reels/services/prompt_builder.py b/modules/story_reels/services/prompt_builder.py
new file mode 100644
index 0000000000000000000000000000000000000000..59952fff3b2c16af8cde7d655e6293c8acf709c7
--- /dev/null
+++ b/modules/story_reels/services/prompt_builder.py
@@ -0,0 +1,147 @@
+"""
+Prompt Builder for Character-Consistent Images
+Builds detailed prompts with character profile injection
+"""
+import logging
+from typing import Optional, List
+from ..schemas import CharacterProfile, SceneInput, CameraEnum
+
+logger = logging.getLogger(__name__)
+
+
+class PromptBuilder:
+ """
+ Builds prompts for image generation with character consistency.
+
+ Strategy:
+ 1. Fixed character description in every prompt
+ 2. Same seed across all images
+ 3. Consistency keywords
+ 4. Style anchor
+ """
+
+ # Consistency keywords to add
+ CONSISTENCY_KEYWORDS = [
+ "same character throughout",
+ "consistent appearance",
+ "consistent face",
+ "character reference"
+ ]
+
+ def __init__(self, character_profile: Optional[CharacterProfile] = None):
+ self.character = character_profile
+
+ def build_character_description(self) -> str:
+ """Build detailed character description string"""
+ if not self.character:
+ return ""
+
+ parts = []
+
+ # Name and basics
+ if self.character.name:
+ parts.append(f"a character named {self.character.name}")
+
+ # Age and gender
+ if self.character.age and self.character.gender:
+ parts.append(f"{self.character.age} year old {self.character.gender}")
+
+ # Physical features
+ if self.character.hair:
+ parts.append(self.character.hair)
+ if self.character.skin:
+ parts.append(f"{self.character.skin} skin")
+ if self.character.face:
+ parts.append(self.character.face)
+
+ # Clothing
+ if self.character.clothes:
+ parts.append(f"wearing {self.character.clothes}")
+
+ return ", ".join(parts)
+
+ def build_scene_prompt(self, scene: SceneInput) -> str:
+ """
+ Build full prompt for a scene.
+
+ Format:
+ [style], [character description], [scene text], [camera], [lighting], [consistency keywords]
+ """
+ parts = []
+
+ # 1. Style anchor (important for consistency)
+ if self.character and self.character.style:
+ parts.append(f"{self.character.style.value} style artwork")
+ else:
+ parts.append("semi-realistic style artwork")
+
+ # 2. Character description (injected for consistency)
+ char_desc = self.build_character_description()
+ if char_desc:
+ parts.append(char_desc)
+
+ # 3. Scene description
+ parts.append(scene.scene_text)
+
+ # 4. Camera angle
+ parts.append(scene.camera.value)
+
+ # 5. Pose if specified
+ if scene.pose:
+ parts.append(f"{scene.pose} pose")
+
+ # 6. Lighting
+ if scene.lighting:
+ parts.append(scene.lighting)
+
+ # 7. Consistency keywords
+ parts.extend(self.CONSISTENCY_KEYWORDS[:2])
+
+ # 8. Quality keywords
+ parts.extend([
+ "high quality",
+ "detailed",
+ "professional illustration"
+ ])
+
+ prompt = ", ".join(parts)
+ logger.debug(f"Built prompt: {prompt[:150]}...")
+
+ return prompt
+
+ def build_prompts_for_scenes(self, scenes: List[SceneInput]) -> List[str]:
+ """Build prompts for all scenes"""
+ return [self.build_scene_prompt(scene) for scene in scenes]
+
+ @staticmethod
+ def create_scenes_from_segments(
+ segments: List[dict],
+ default_camera: CameraEnum = CameraEnum.medium
+ ) -> List[SceneInput]:
+ """
+ Create SceneInput objects from SRT segments.
+
+ Args:
+ segments: List of {text, start_ms, end_ms, duration}
+
+ Returns:
+ List of SceneInput objects
+ """
+ scenes = []
+
+ for i, seg in enumerate(segments):
+ duration = seg.get('duration', 4.0)
+ if isinstance(duration, int):
+ duration = duration / 1000 # Convert ms to seconds
+
+ scene = SceneInput(
+ scene_id=i + 1,
+ scene_text=seg.get('text', ''),
+ camera=default_camera,
+ pose="natural pose",
+ lighting="natural lighting",
+ duration=duration
+ )
+ scenes.append(scene)
+
+ return scenes
diff --git a/modules/story_reels/services/script_generator.py b/modules/story_reels/services/script_generator.py
new file mode 100644
index 0000000000000000000000000000000000000000..fe23fe1e500d1502679bb40c65045a49c256c374
--- /dev/null
+++ b/modules/story_reels/services/script_generator.py
@@ -0,0 +1,256 @@
+"""
+Script Generator using Gemini API
+Generates story scripts from topics for TTS narration
+"""
+import logging
+import requests
+from typing import Optional
+
+logger = logging.getLogger(__name__)
+
+
+class ScriptGenerator:
+ """
+ Generates story scripts using Google Gemini API.
+
+ Features:
+ - Topic → Full narration script (<=1000 chars)
+ - Character-aware script generation
+ - Optimized for TTS output
+ """
+
+ GEMINI_API_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent"
+
+ # System prompt for script generation
+ SYSTEM_PROMPT = """You are a professional script writer for short-form video content (TikTok, Reels, Shorts).
+
+RULES:
+1. Write a narration script for the given topic
+2. Maximum 1000 characters (STRICT LIMIT)
+3. Write in a natural, engaging voice
+4. Focus on storytelling - beginning, middle, end
+5. Use simple, clear sentences for TTS
+6. NO emojis, NO hashtags, NO special formatting
+7. Output ONLY the script text, nothing else
+
+If a character is provided, write the story from their perspective or about them."""
+
+ def __init__(self, api_key: str):
+ self.api_key = api_key
+
+ def generate_script(
+ self,
+ topic: str,
+ character_name: Optional[str] = None,
+ max_chars: int = 1000
+ ) -> str:
+ """
+ Generate a story script from topic.
+
+ Args:
+ topic: Story topic/idea
+ character_name: Optional character name to include
+ max_chars: Maximum character limit (default 1000)
+
+ Returns:
+ Generated script text
+ """
+ # Build the prompt
+ user_prompt = f"Topic: {topic}"
+
+ if character_name:
+ user_prompt += f"\nMain Character: {character_name}"
+
+ user_prompt += f"\n\nWrite a short narration script (max {max_chars} characters)."
+
+ logger.info(f"Generating script for topic: {topic[:50]}...")
+
+ try:
+ response = requests.post(
+ f"{self.GEMINI_API_URL}?key={self.api_key}",
+ headers={"Content-Type": "application/json"},
+ json={
+ "contents": [
+ {
+ "role": "user",
+ "parts": [{"text": self.SYSTEM_PROMPT + "\n\n" + user_prompt}]
+ }
+ ],
+ "generationConfig": {
+ "temperature": 0.7,
+ "maxOutputTokens": 500,
+ "topP": 0.9
+ }
+ },
+ timeout=30
+ )
+ response.raise_for_status()
+
+ data = response.json()
+
+ # Extract text from response
+ script = data["candidates"][0]["content"]["parts"][0]["text"]
+
+ # Enforce character limit
+ if len(script) > max_chars:
+ script = script[:max_chars].rsplit(' ', 1)[0] + "."
+
+ logger.info(f"Generated script: {len(script)} chars")
+ return script.strip()
+
+ except requests.exceptions.RequestException as e:
+ logger.error(f"Gemini API error: {e}")
+ raise Exception(f"Script generation failed: {e}")
+ except (KeyError, IndexError) as e:
+ logger.error(f"Failed to parse Gemini response: {e}")
+ raise Exception("Invalid response from Gemini API")
+
+ @staticmethod
+ def test_connection(api_key: str) -> bool:
+ """Test API connection"""
+ try:
+ gen = ScriptGenerator(api_key)
+ gen.generate_script("test", max_chars=50)
+ return True
+ except:
+ return False
+
+ # System prompt for image prompt generation
+ IMAGE_PROMPT_SYSTEM = """You are an expert at creating detailed image prompts for AI image generation.
+
+Your task: Generate detailed image prompts for each 2-second scene of a story video.
+
+CONTEXT:
+- Full story script is provided so you understand the narrative
+- Each 2-second chunk needs a visual prompt
+- Character profile (if provided) must be consistent in EVERY prompt
+- Images should tell the story visually
+
+RULES FOR PROMPTS:
+1. Be detailed and specific (50-100 words each)
+2. Include: scene description, character pose/action, camera angle, lighting, mood
+3. Add style keywords at the end (semi-realistic, detailed, high quality)
+4. DO NOT include text/dialogue in prompts
+5. Keep character appearance CONSISTENT across all prompts
+6. Use cinematographic language (close-up, wide shot, etc.)
+
+OUTPUT FORMAT:
+Return ONLY valid JSON array, no markdown, no explanation:
+[
+ {"chunk_id": 1, "prompt": "detailed prompt here..."},
+ {"chunk_id": 2, "prompt": "detailed prompt here..."}
+]"""
+
+ def generate_image_prompts(
+ self,
+ full_script: str,
+ chunks: list,
+ character_profile: dict = None,
+ max_batch: int = 30
+ ) -> list:
+ """
+ Generate detailed image prompts for all 2-second chunks.
+
+ Args:
+ full_script: Complete narration script (for context)
+ chunks: List of {chunk_id, text, duration} from SRTParser
+ character_profile: Optional character dict
+ max_batch: Max chunks per API call (default 30)
+
+ Returns:
+ List of {chunk_id, prompt} dicts
+ """
+ import json
+
+ all_prompts = []
+ total_chunks = len(chunks)
+
+ # Split into batches if too many chunks
+ for batch_start in range(0, total_chunks, max_batch):
+ batch_end = min(batch_start + max_batch, total_chunks)
+ batch_chunks = chunks[batch_start:batch_end]
+
+ logger.info(f"Generating prompts for chunks {batch_start+1}-{batch_end} of {total_chunks}")
+
+ # Build user prompt
+ user_prompt = f"""FULL STORY SCRIPT:
+{full_script}
+
+"""
+ if character_profile:
+ user_prompt += f"""CHARACTER PROFILE:
+- Name: {character_profile.get('name', 'Main character')}
+- Age: {character_profile.get('age', '25')}
+- Gender: {character_profile.get('gender', 'male')}
+- Hair: {character_profile.get('hair', 'short black hair')}
+- Skin: {character_profile.get('skin', 'light skin')}
+- Clothes: {character_profile.get('clothes', 'casual clothes')}
+- Style: {character_profile.get('style', 'semi-realistic')}
+
+IMPORTANT: Include this character description in EVERY prompt!
+
+"""
+
+ user_prompt += "2-SECOND CHUNKS TO GENERATE PROMPTS FOR:\n"
+ for chunk in batch_chunks:
+ user_prompt += f"- Chunk {chunk['chunk_id']}: \"{chunk['text']}\"\n"
+
+ user_prompt += "\nGenerate detailed image prompts for each chunk. Return ONLY JSON array."
+
+ try:
+ response = requests.post(
+ f"{self.GEMINI_API_URL}?key={self.api_key}",
+ headers={"Content-Type": "application/json"},
+ json={
+ "contents": [
+ {
+ "role": "user",
+ "parts": [{"text": self.IMAGE_PROMPT_SYSTEM + "\n\n" + user_prompt}]
+ }
+ ],
+ "generationConfig": {
+ "temperature": 0.7,
+ "maxOutputTokens": 4000,
+ "topP": 0.9
+ }
+ },
+ timeout=60
+ )
+ response.raise_for_status()
+
+ data = response.json()
+ text = data["candidates"][0]["content"]["parts"][0]["text"]
+
+ # Clean response - remove markdown if present
+ text = text.strip()
+ if text.startswith("```"):
+ text = text.split("```")[1]
+ if text.startswith("json"):
+ text = text[4:]
+ text = text.strip()
+
+ # Parse JSON
+ batch_prompts = json.loads(text)
+ all_prompts.extend(batch_prompts)
+
+ logger.info(f"Generated {len(batch_prompts)} prompts in batch")
+
+ except json.JSONDecodeError as e:
+ logger.error(f"Failed to parse JSON response: {e}")
+ # Fallback: create simple prompts
+ for chunk in batch_chunks:
+ all_prompts.append({
+ "chunk_id": chunk["chunk_id"],
+ "prompt": f"{chunk['text']}, semi-realistic style, high quality, detailed"
+ })
+ except Exception as e:
+ logger.error(f"Gemini API error: {e}")
+ # Fallback
+ for chunk in batch_chunks:
+ all_prompts.append({
+ "chunk_id": chunk["chunk_id"],
+ "prompt": f"{chunk['text']}, semi-realistic style, high quality"
+ })
+
+ logger.info(f"Generated {len(all_prompts)} total image prompts")
+ return all_prompts
diff --git a/modules/story_reels/services/srt_parser.py b/modules/story_reels/services/srt_parser.py
new file mode 100644
index 0000000000000000000000000000000000000000..470e47dbd0034888cf66cc142e34d1476a7c3de6
--- /dev/null
+++ b/modules/story_reels/services/srt_parser.py
@@ -0,0 +1,214 @@
+"""
+SRT Parser for Story Reels
+Parses SRT segments and calculates scene durations
+"""
+import re
+import logging
+from typing import List, Dict
+from pathlib import Path
+import math
+
+logger = logging.getLogger(__name__)
+
+
+class SRTParser:
+ """
+ Parses SRT files and calculates image counts based on 2s rule.
+ """
+
+ # SRT timestamp regex
+ TIMESTAMP_PATTERN = re.compile(
+ r'(\d{2}):(\d{2}):(\d{2}),(\d{3})\s*-->\s*(\d{2}):(\d{2}):(\d{2}),(\d{3})'
+ )
+
+ @staticmethod
+ def parse_timestamp(h: str, m: str, s: str, ms: str) -> int:
+ """Convert timestamp to milliseconds"""
+ return int(h) * 3600000 + int(m) * 60000 + int(s) * 1000 + int(ms)
+
+ @classmethod
+ def parse_srt_content(cls, srt_content: str) -> List[Dict]:
+ """
+ Parse SRT content into segments.
+
+ Returns:
+ List of {text, start_ms, end_ms, duration_ms, image_count}
+ """
+ segments = []
+ blocks = srt_content.strip().split('\n\n')
+
+ for block in blocks:
+ lines = block.strip().split('\n')
+ if len(lines) < 3:
+ continue
+
+ # Skip sequence number (line 0)
+ timestamp_line = lines[1]
+ text_lines = lines[2:]
+
+ match = cls.TIMESTAMP_PATTERN.match(timestamp_line)
+ if not match:
+ continue
+
+ start_ms = cls.parse_timestamp(*match.groups()[:4])
+ end_ms = cls.parse_timestamp(*match.groups()[4:])
+ duration_ms = end_ms - start_ms
+
+ # Calculate image count (2 seconds per image)
+ duration_s = duration_ms / 1000
+ image_count = max(1, math.ceil(duration_s / 2))
+
+ segments.append({
+ 'text': ' '.join(text_lines),
+ 'start_ms': start_ms,
+ 'end_ms': end_ms,
+ 'duration_ms': duration_ms,
+ 'duration': duration_s,
+ 'image_count': image_count
+ })
+
+ logger.info(f"Parsed {len(segments)} SRT segments")
+ return segments
+
+ @classmethod
+ def parse_srt_file(cls, srt_path: Path) -> List[Dict]:
+ """Parse SRT file"""
+ content = srt_path.read_text(encoding='utf-8')
+ return cls.parse_srt_content(content)
+
+ @staticmethod
+ def calculate_total_images(segments: List[Dict]) -> int:
+ """Calculate total images needed"""
+ return sum(seg.get('image_count', 1) for seg in segments)
+
+ @staticmethod
+ def calculate_total_duration(segments: List[Dict]) -> float:
+ """Calculate total duration in seconds"""
+ return sum(seg.get('duration', 0) for seg in segments)
+
+ @classmethod
+ def segments_from_captions(cls, captions: List[Dict]) -> List[Dict]:
+ """
+ Convert Whisper captions to segments.
+
+ Args:
+ captions: List from WhisperClient [{text, startMs, endMs}]
+
+ Returns:
+ Segments with image_count calculated
+ """
+ segments = []
+
+ for cap in captions:
+ start_ms = cap.get('startMs', 0)
+ end_ms = cap.get('endMs', 0)
+ duration_ms = end_ms - start_ms
+ duration_s = duration_ms / 1000
+
+ segments.append({
+ 'text': cap.get('text', ''),
+ 'start_ms': start_ms,
+ 'end_ms': end_ms,
+ 'duration_ms': duration_ms,
+ 'duration': duration_s,
+ 'image_count': max(1, math.ceil(duration_s / 2))
+ })
+
+ return segments
+
+ @classmethod
+ def create_2s_chunks(cls, captions: List[Dict], total_duration: float) -> List[Dict]:
+ """
+ Create 2-second chunks for image generation.
+
+ This is SEPARATE from .srt captions:
+ - .srt = original Whisper captions (for video subtitles)
+ - 2s chunks = for image prompt generation
+
+ Args:
+ captions: Original Whisper captions
+ total_duration: Total audio duration in seconds
+
+ Returns:
+ List of 2-second chunks with text for image prompts
+ """
+ # Flatten all caption text with timing
+ all_words = []
+ for cap in captions:
+ start_ms = cap.get('startMs', 0)
+ end_ms = cap.get('endMs', 0)
+ text = cap.get('text', '').strip()
+ if text:
+ all_words.append({
+ 'text': text,
+ 'start_ms': start_ms,
+ 'end_ms': end_ms
+ })
+
+ # Calculate number of 2-second chunks
+ num_chunks = max(1, math.ceil(total_duration / 2))
+ chunk_duration_ms = 2000 # 2 seconds
+
+ chunks = []
+
+ for i in range(num_chunks):
+ chunk_start = i * chunk_duration_ms
+ chunk_end = min((i + 1) * chunk_duration_ms, int(total_duration * 1000))
+
+ # Last chunk might be shorter
+ actual_duration = (chunk_end - chunk_start) / 1000
+
+ # Find words that fall within this chunk
+ chunk_texts = []
+ for word in all_words:
+ # Word overlaps with chunk
+ if word['end_ms'] > chunk_start and word['start_ms'] < chunk_end:
+ chunk_texts.append(word['text'])
+
+ # Combine texts for this chunk
+ chunk_text = ' '.join(chunk_texts) if chunk_texts else f"Scene {i + 1}"
+
+ chunks.append({
+ 'chunk_id': i + 1,
+ 'text': chunk_text,
+ 'start_ms': chunk_start,
+ 'end_ms': chunk_end,
+ 'duration': actual_duration
+ })
+
+ logger.info(f"Created {len(chunks)} x 2-second chunks for image generation")
+ return chunks
+
+ @staticmethod
+ def generate_srt_content(captions: List[Dict]) -> str:
+ """
+ Generate .srt file content from Whisper captions.
+ This will be embedded in the final video.
+
+ Args:
+ captions: Original Whisper captions
+
+ Returns:
+ SRT formatted string
+ """
+ srt_lines = []
+
+ for i, cap in enumerate(captions, 1):
+ start_ms = cap.get('startMs', 0)
+ end_ms = cap.get('endMs', 0)
+ text = cap.get('text', '').strip()
+
+ # Format timestamps: HH:MM:SS,mmm
+ def format_time(ms):
+ hours = ms // 3600000
+ minutes = (ms % 3600000) // 60000
+ seconds = (ms % 60000) // 1000
+ millis = ms % 1000
+ return f"{hours:02d}:{minutes:02d}:{seconds:02d},{millis:03d}"
+
+ srt_lines.append(str(i))
+ srt_lines.append(f"{format_time(start_ms)} --> {format_time(end_ms)}")
+ srt_lines.append(text)
+ srt_lines.append("")
+
+ return '\n'.join(srt_lines)
diff --git a/modules/story_reels/services/story_creator.py b/modules/story_reels/services/story_creator.py
new file mode 100644
index 0000000000000000000000000000000000000000..992533bafdd3694130ffd39615b0d61f4a60927c
--- /dev/null
+++ b/modules/story_reels/services/story_creator.py
@@ -0,0 +1,583 @@
+"""
+Story Creator - Main Pipeline Orchestrator
+Coordinates TTS, Whisper, Cloudflare, and MoviePy
+"""
+import asyncio
+import logging
+import uuid
+from pathlib import Path
+from typing import Dict, List, Optional
+from datetime import datetime
+
+from ..schemas import (
+ CharacterProfile,
+ SceneInput,
+ JobStatus,
+ GeneratedScene
+)
+from .cloudflare_client import CloudflareClient
+from .prompt_builder import PromptBuilder
+from .srt_parser import SRTParser
+from .script_generator import ScriptGenerator
+
+logger = logging.getLogger(__name__)
+
+
+class StoryCreator:
+ """
+ Main orchestrator for story-to-video pipeline.
+
+ Pipeline:
+ 1. Script → TTS → voice.mp3
+ 2. voice.mp3 → Whisper → segments
+ 3. Segments → PromptBuilder → prompts
+ 4. Prompts → Cloudflare → images
+ 5. Images + Audio → MoviePy → video
+ """
+
+ def __init__(
+ self,
+ config,
+ tts_client,
+ whisper_client,
+ nvidia_client=None, # PRIMARY
+ cloudflare_client=None, # FALLBACK
+ script_generator: ScriptGenerator = None
+ ):
+ self.config = config
+ self.tts = tts_client
+ self.whisper = whisper_client
+ self.nvidia = nvidia_client # PRIMARY
+ self.cloudflare = cloudflare_client # FALLBACK
+ self.script_gen = script_generator
+
+ # Job tracking
+ self.jobs: Dict[str, Dict] = {}
+ self.queue: List[Dict] = []
+ self.processing = False
+
+ def add_to_queue(
+ self,
+ topic: str,
+ script: str,
+ character_profile: Optional[CharacterProfile] = None,
+ voice: str = "af_heart"
+ ) -> str:
+ """
+ Add story to generation queue.
+
+ Returns:
+ job_id for tracking
+ """
+ job_id = str(uuid.uuid4()).replace('-', '')[:16]
+
+ job = {
+ "id": job_id,
+ "topic": topic,
+ "script": script,
+ "character": character_profile,
+ "voice": voice,
+ "status": JobStatus.queued,
+ "progress": 0,
+ "created_at": datetime.now().isoformat(),
+ "video_url": None,
+ "duration": None,
+ "error": None,
+ "scenes": []
+ }
+
+ self.jobs[job_id] = job
+ self.queue.append(job)
+
+ logger.info(f"Added job {job_id} to queue. Queue length: {len(self.queue)}")
+
+ # Start processing if not already running
+ if not self.processing:
+ asyncio.create_task(self.process_queue())
+
+ return job_id
+
+ async def process_queue(self):
+ """Process jobs in queue"""
+ if self.processing:
+ return
+
+ self.processing = True
+
+ try:
+ while self.queue:
+ job = self.queue[0]
+ job_id = job["id"]
+
+ logger.info(f"Processing job {job_id}")
+
+ try:
+ await self._process_job(job)
+ job["status"] = JobStatus.ready
+ job["progress"] = 100
+ logger.info(f"Job {job_id} completed successfully")
+ except Exception as e:
+ logger.error(f"Job {job_id} failed: {e}", exc_info=True)
+ job["status"] = JobStatus.failed
+ job["error"] = str(e)
+ finally:
+ self.queue.pop(0)
+ finally:
+ self.processing = False
+
+ async def _process_job(self, job: Dict):
+ """Process a single job through the pipeline"""
+ job_id = job["id"]
+ temp_dir = self.config.temp_dir_path / job_id
+ temp_dir.mkdir(parents=True, exist_ok=True)
+
+ temp_files = []
+
+ try:
+ # ====================
+ # Step 0: Generate Script (if not provided)
+ # ====================
+ script = job["script"]
+
+ if not script or script.strip() == "":
+ logger.info(f"[{job_id}] Generating script from topic using Gemini...")
+ job["progress"] = 5
+
+ char_name = job["character"].name if job["character"] else None
+ script = self.script_gen.generate_script(
+ topic=job["topic"],
+ character_name=char_name,
+ max_chars=1000
+ )
+ job["script"] = script
+ logger.info(f"[{job_id}] Generated script: {len(script)} chars")
+
+ # ====================
+ # Step 1: Generate TTS
+ # ====================
+ job["status"] = JobStatus.generating_audio
+ job["progress"] = 10
+
+ logger.info(f"[{job_id}] Generating TTS audio...")
+
+ audio_data, tts_duration = await self.tts.generate(
+ script,
+ job["voice"]
+ )
+
+ wav_path = temp_dir / "voice.wav"
+ mp3_path = temp_dir / "voice.mp3"
+ temp_files.extend([wav_path, mp3_path])
+
+ # Import FFmpegUtils from video_creator
+ from modules.video_creator.services.libraries.ffmpeg_utils import FFmpegUtils
+
+ FFmpegUtils.save_audio_as_wav(audio_data, wav_path)
+ FFmpegUtils.save_audio_as_mp3(audio_data, mp3_path)
+
+ # Get actual duration
+ audio_duration = FFmpegUtils.get_video_duration(wav_path)
+ logger.info(f"[{job_id}] Audio generated: {audio_duration:.2f}s")
+
+ job["progress"] = 25
+
+ # ====================
+ # Step 2: Generate Captions (Whisper)
+ # ====================
+ logger.info(f"[{job_id}] Generating captions with Whisper...")
+
+ captions = self.whisper.create_captions(str(wav_path))
+ captions_dict = [c.dict() for c in captions]
+
+ # OUTPUT 1: .srt content (for video subtitles)
+ srt_content = SRTParser.generate_srt_content(captions_dict)
+ srt_path = temp_dir / "voice.srt"
+ srt_path.write_text(srt_content, encoding='utf-8')
+ temp_files.append(srt_path)
+ logger.info(f"[{job_id}] Generated .srt with {len(captions)} captions")
+
+ # OUTPUT 2: 2-second chunks (for image prompts)
+ image_chunks = SRTParser.create_2s_chunks(captions_dict, audio_duration)
+ logger.info(f"[{job_id}] Created {len(image_chunks)} x 2s chunks for images")
+
+ job["progress"] = 40
+ job["srt_path"] = str(srt_path)
+
+ # ====================
+ # Step 3: Generate Image Prompts using AI
+ # ====================
+ job["status"] = JobStatus.generating_images
+ logger.info(f"[{job_id}] Generating AI-powered image prompts...")
+
+ # Convert character profile to dict if exists
+ char_dict = None
+ if job["character"]:
+ char_dict = {
+ "name": job["character"].name,
+ "age": job["character"].age,
+ "gender": job["character"].gender,
+ "hair": job["character"].hair,
+ "skin": job["character"].skin,
+ "clothes": job["character"].clothes,
+ "style": job["character"].style.value if hasattr(job["character"].style, 'value') else str(job["character"].style)
+ }
+
+ # Generate all image prompts at once using Gemini
+ # Input: Full script (context) + 2s chunks → Output: JSON array of prompts
+ ai_prompts = self.script_gen.generate_image_prompts(
+ full_script=script,
+ chunks=image_chunks,
+ character_profile=char_dict
+ )
+
+ logger.info(f"[{job_id}] AI generated {len(ai_prompts)} image prompts")
+
+ job["progress"] = 50
+
+ # ====================
+ # Step 4: Generate Images (PARALLEL - NVIDIA + Cloudflare)
+ # ====================
+ # If both APIs available: split images 50/50 for 2x speed
+ # 1 second delay between each request (rate limit safe)
+
+ seed = job["character"].seed if job["character"] else 432891
+
+ # Build prompts list from AI-generated prompts
+ prompts_list = []
+ for p in ai_prompts:
+ prompts_list.append((p["chunk_id"], p["prompt"]))
+
+ total_images = len(prompts_list)
+ logger.info(f"[{job_id}] Generating {total_images} images...")
+
+ # Check which APIs are available
+ has_nvidia = self.nvidia is not None
+ has_cloudflare = self.cloudflare is not None
+
+ if has_nvidia and has_cloudflare:
+ # PARALLEL MODE: NVIDIA 70%, Cloudflare 30% (NVIDIA has better quality)
+ logger.info(f"[{job_id}] Parallel mode: NVIDIA 70% + Cloudflare 30%")
+
+ import threading
+
+ # Split: first 70% to NVIDIA, remaining 30% to Cloudflare
+ nvidia_count = int(total_images * 0.7)
+ if nvidia_count == 0:
+ nvidia_count = 1
+
+ nvidia_prompts = prompts_list[:nvidia_count]
+ cloudflare_prompts = prompts_list[nvidia_count:]
+
+ # Get indices
+ nvidia_indices = [p[0] for p in nvidia_prompts]
+ cloudflare_indices = [p[0] for p in cloudflare_prompts]
+
+ nvidia_results = []
+ cloudflare_results = []
+
+ def nvidia_worker():
+ """NVIDIA: 5 requests → wait → next 5"""
+ nonlocal nvidia_results
+ batch_size = 5
+ for batch_start in range(0, len(nvidia_prompts), batch_size):
+ batch = nvidia_prompts[batch_start:batch_start + batch_size]
+ logger.info(f"NVIDIA batch {batch_start//batch_size + 1}: {len(batch)} images")
+
+ for orig_idx, prompt in batch:
+ try:
+ output_path = temp_dir / f"scene_{orig_idx:03d}.png"
+ self.nvidia.generate_and_save(prompt, output_path, seed=seed)
+ nvidia_results.append({"id": orig_idx, "path": str(output_path), "prompt": prompt})
+ logger.debug(f"NVIDIA: {orig_idx}")
+ except Exception as e:
+ logger.error(f"NVIDIA failed {orig_idx}: {e}")
+ nvidia_results.append({"id": orig_idx, "path": None, "error": str(e)})
+ time.sleep(1.0) # 1s delay between requests in same batch
+
+ # Batch complete - wait before next batch
+ if batch_start + batch_size < len(nvidia_prompts):
+ logger.info("NVIDIA batch complete, waiting...")
+ time.sleep(2.0)
+
+ def cloudflare_worker():
+ """Cloudflare: 5 requests → wait → next 5"""
+ nonlocal cloudflare_results
+ batch_size = 5
+ for batch_start in range(0, len(cloudflare_prompts), batch_size):
+ batch = cloudflare_prompts[batch_start:batch_start + batch_size]
+ logger.info(f"Cloudflare batch {batch_start//batch_size + 1}: {len(batch)} images")
+
+ for orig_idx, prompt in batch:
+ try:
+ output_path = temp_dir / f"scene_{orig_idx:03d}.png"
+ self.cloudflare.generate_and_save(prompt, output_path, seed=seed, width=1080, height=1920)
+ cloudflare_results.append({"id": orig_idx, "path": str(output_path), "prompt": prompt})
+ logger.debug(f"Cloudflare: {orig_idx}")
+ except Exception as e:
+ logger.error(f"Cloudflare failed {orig_idx}: {e}")
+ cloudflare_results.append({"id": orig_idx, "path": None, "error": str(e)})
+ time.sleep(1.0) # 1s delay between requests in same batch
+
+ # Batch complete - wait before next batch
+ if batch_start + batch_size < len(cloudflare_prompts):
+ logger.info("Cloudflare batch complete, waiting...")
+ time.sleep(2.0)
+
+ # Run both in parallel (each has its own batch counter)
+ t1 = threading.Thread(target=nvidia_worker)
+ t2 = threading.Thread(target=cloudflare_worker)
+ t1.start()
+ t2.start()
+ t1.join()
+ t2.join()
+
+ # Combine results
+ batch_results = nvidia_results + cloudflare_results
+ batch_results.sort(key=lambda x: x["id"])
+
+ elif has_nvidia:
+ # NVIDIA only with 1s delay
+ logger.info(f"[{job_id}] NVIDIA only mode")
+ batch_results = self.nvidia.generate_batch(
+ prompts=prompts_list,
+ output_dir=temp_dir,
+ seed=seed,
+ batch_size=5,
+ delay_seconds=1.0
+ )
+ elif has_cloudflare:
+ # Cloudflare only with 1s delay
+ logger.info(f"[{job_id}] Cloudflare only mode")
+ batch_results = self.cloudflare.generate_batch(
+ prompts=prompts_list,
+ output_dir=temp_dir,
+ seed=seed,
+ batch_size=5,
+ delay_seconds=1.0,
+ width=1080,
+ height=1920
+ )
+ else:
+ raise Exception("No image generation client available!")
+
+ # Build generated_scenes from batch results
+ generated_scenes = []
+ for result in batch_results:
+ if result.get("path"):
+ temp_files.append(Path(result["path"]))
+
+ # Find matching chunk for duration
+ scene_duration = 2.0
+ for chunk in image_chunks:
+ if chunk['chunk_id'] == result["id"]:
+ scene_duration = chunk['duration']
+ break
+
+ generated_scenes.append({
+ "scene_id": result["id"],
+ "prompt": result["prompt"],
+ "image_path": result["path"],
+ "duration": scene_duration
+ })
+
+ logger.info(f"[{job_id}] Generated {len(generated_scenes)}/{len(ai_prompts)} images")
+
+ job["scenes"] = generated_scenes
+ job["progress"] = 80
+
+ # ====================
+ # Step 5: Compose Video
+ # ====================
+ job["status"] = JobStatus.composing_video
+ logger.info(f"[{job_id}] Composing final video...")
+
+ output_path = self.config.videos_dir_path / f"{job_id}.mp4"
+
+ await self._compose_video(
+ scenes=generated_scenes,
+ audio_path=mp3_path,
+ output_path=output_path
+ )
+
+ job["video_url"] = str(output_path)
+ job["duration"] = audio_duration
+ job["progress"] = 100
+
+ logger.info(f"[{job_id}] Video saved to {output_path}")
+
+ finally:
+ # Cleanup temp files
+ for temp_file in temp_files:
+ if temp_file.exists():
+ try:
+ temp_file.unlink()
+ except:
+ pass
+
+ # Remove temp directory
+ if temp_dir.exists():
+ try:
+ temp_dir.rmdir()
+ except:
+ pass
+
+ async def _compose_video(
+ self,
+ scenes: List[Dict],
+ audio_path: Path,
+ output_path: Path
+ ):
+ """
+ Compose video from images and audio using MoviePy.
+
+ Effects:
+ - Crossfade transitions (0.3s) between scenes
+ - Subtle Ken Burns zoom (1.05x) for dynamic feel
+ - Fade in at start, fade out at end
+ """
+ from moviepy.editor import (
+ ImageClip,
+ AudioFileClip,
+ concatenate_videoclips,
+ CompositeVideoClip,
+ vfx
+ )
+
+ # Constants
+ CROSSFADE_DURATION = 0.3 # Transition duration
+ ZOOM_FACTOR = 1.05 # Subtle zoom (1.05 = 5% zoom)
+ FADE_DURATION = 0.5 # Fade in/out duration
+ TARGET_HEIGHT = 1920 # Portrait
+ TARGET_WIDTH = 1080
+
+ # Load audio first to get exact duration
+ audio = AudioFileClip(str(audio_path))
+ audio_duration = audio.duration
+
+ # Create video clips from images with effects
+ clips = []
+ total_video_duration = 0
+ total_scenes = len(scenes)
+
+ for i, scene in enumerate(scenes):
+ image_path = scene["image_path"]
+ duration = scene["duration"]
+
+ # For the last clip, adjust duration to match audio
+ if i == total_scenes - 1:
+ remaining = audio_duration - total_video_duration
+ if remaining > 0:
+ duration = remaining
+
+ # Create image clip
+ clip = ImageClip(image_path).set_duration(duration)
+
+ # Resize to portrait (1080x1920)
+ clip = clip.resize(height=TARGET_HEIGHT)
+
+ # Scene position-based effects
+ # Hook (first 2 clips): Zoom OUT (start big, end normal) - grabs attention
+ # Middle clips: Subtle zoom IN (Ken Burns)
+ # Outro (last clip): Static with fade out
+
+ if i < 2:
+ # HOOK: Zoom OUT effect (1.1 → 1.0) - dynamic attention grabber
+ def make_zoom_out(t, clip_duration=duration):
+ zoom = 1.1 - (0.1 * (t / clip_duration)) # 1.1 to 1.0
+ return zoom
+ clip = clip.resize(lambda t: make_zoom_out(t))
+
+ elif i < total_scenes - 1:
+ # MIDDLE: Ken Burns zoom IN (1.0 → 1.05)
+ def make_zoom_in(t, clip_duration=duration):
+ zoom = 1.0 + (ZOOM_FACTOR - 1.0) * (t / clip_duration)
+ return zoom
+ clip = clip.resize(lambda t: make_zoom_in(t))
+
+ # Last clip stays static (no zoom)
+
+ # Center crop after zoom (to maintain 1080x1920)
+ clip = clip.resize(width=TARGET_WIDTH, height=TARGET_HEIGHT)
+
+ # Transitions: crossfade for smooth scene changes (NOT on first 2 clips)
+ # Hook clips: NO crossfade, clean direct cut
+ if i >= 2 and duration > CROSSFADE_DURATION:
+ clip = clip.crossfadein(CROSSFADE_DURATION)
+
+ # NO fade in for Hook (first 2 clips) - start immediately visible!
+ # Only fade out at the very end
+ if i == total_scenes - 1:
+ clip = clip.fadeout(FADE_DURATION)
+
+ clips.append(clip)
+ total_video_duration += duration
+
+ # Concatenate with crossfade transitions
+ if len(clips) > 1:
+ video = concatenate_videoclips(clips, method="compose", padding=-CROSSFADE_DURATION)
+ else:
+ video = clips[0]
+
+ # Final safety: match video length to audio exactly
+ if abs(video.duration - audio_duration) > 0.1:
+ if video.duration > audio_duration:
+ video = video.subclip(0, audio_duration)
+
+ video = video.set_audio(audio)
+
+ # Write final video
+ logger.info(f"Writing video with effects: crossfade={CROSSFADE_DURATION}s, zoom={ZOOM_FACTOR}x")
+ video.write_videofile(
+ str(output_path),
+ fps=24,
+ codec='libx264',
+ audio_codec='aac',
+ threads=4,
+ preset='medium'
+ )
+
+ # Cleanup
+ video.close()
+ audio.close()
+
+ def get_status(self, job_id: str) -> Dict:
+ """Get job status"""
+ job = self.jobs.get(job_id)
+
+ if not job:
+ return {
+ "job_id": job_id,
+ "status": JobStatus.failed,
+ "error": "Job not found"
+ }
+
+ return {
+ "job_id": job_id,
+ "status": job["status"],
+ "progress": job["progress"],
+ "video_url": job.get("video_url"),
+ "duration": job.get("duration"),
+ "error": job.get("error")
+ }
+
+ def get_preview(self, job_id: str, scene_id: int) -> Optional[Dict]:
+ """Get scene preview"""
+ job = self.jobs.get(job_id)
+
+ if not job or not job.get("scenes"):
+ return None
+
+ for scene in job["scenes"]:
+ if scene["scene_id"] == scene_id:
+ return scene
+
+ return None
+
+ def get_video_path(self, job_id: str) -> Optional[Path]:
+ """Get video file path"""
+ job = self.jobs.get(job_id)
+
+ if not job or not job.get("video_url"):
+ return None
+
+ return Path(job["video_url"])
diff --git a/modules/video_creator/__init__.py b/modules/video_creator/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..b11ce136c2b748709bb5143b837e1b4a4bea9550
--- /dev/null
+++ b/modules/video_creator/__init__.py
@@ -0,0 +1,81 @@
+"""
+Video Creator Module for NCAkit
+Creates short-form videos with TTS, captions, background videos, and music.
+"""
+from fastapi import FastAPI
+import logging
+
+# Module Metadata
+MODULE_NAME = "video_creator"
+MODULE_PREFIX = "/api/video"
+MODULE_DESCRIPTION = "Create short-form videos with TTS, captions, and background music"
+
+logger = logging.getLogger(__name__)
+
+
+def register(app: FastAPI, config):
+ """
+ Register the video creator module with FastAPI.
+ Initializes all services and adds routes.
+ """
+ from .router import router, set_short_creator
+ from .services.libraries.tts_client import TTSClient
+ from .services.libraries.whisper_client import WhisperClient
+ from .services.libraries.pexels_client import PexelsClient
+ from .services.music_manager import MusicManager
+ from .services.short_creator import ShortCreator
+
+ logger.info("Registering video_creator module...")
+
+ # Validate environment variables
+ if not config.pexels_api_key:
+ logger.warning("PEXELS_API_KEY is missing! Video generation will fail.")
+
+ if not config.hf_tts:
+ logger.warning("HF_TTS is missing! TTS will fail.")
+
+ # Initialize TTS client
+ logger.info("Initializing TTS client...")
+ tts_client = TTSClient(config.hf_tts)
+
+ # Initialize Whisper client
+ logger.info("Initializing Whisper client...")
+ whisper_client = WhisperClient(
+ model_name=config.whisper_model,
+ model_dir=config.whisper_model_dir
+ )
+
+ # Initialize Pexels client
+ logger.info("Initializing Pexels client...")
+ pexels_client = PexelsClient(config.pexels_api_key)
+
+ # Initialize music manager
+ logger.info("Initializing music manager...")
+ music_manager = MusicManager(config.music_dir_path)
+ try:
+ music_manager.ensure_music_files_exist()
+ except FileNotFoundError as e:
+ logger.error(f"Music setup error: {e}")
+ logger.warning("Creating empty music directory")
+ config.music_dir_path.mkdir(parents=True, exist_ok=True)
+
+ # Initialize short creator
+ logger.info("Initializing short creator...")
+ short_creator = ShortCreator(
+ config=config,
+ tts_client=tts_client,
+ whisper_client=whisper_client,
+ pexels_client=pexels_client,
+ music_manager=music_manager
+ )
+
+ # Set the global short creator in the router
+ set_short_creator(short_creator)
+
+ # Store in app state for access from other modules if needed
+ app.state.video_creator = short_creator
+
+ # Register routes
+ app.include_router(router, prefix=MODULE_PREFIX, tags=["Video Creator"])
+
+ logger.info("video_creator module registered successfully!")
diff --git a/modules/video_creator/__pycache__/__init__.cpython-313.pyc b/modules/video_creator/__pycache__/__init__.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5daa2e0eff8c4d9d40a4648cf83a795a84bb8b44
Binary files /dev/null and b/modules/video_creator/__pycache__/__init__.cpython-313.pyc differ
diff --git a/modules/video_creator/__pycache__/router.cpython-313.pyc b/modules/video_creator/__pycache__/router.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1b4209a3fce6d022a5ba34984c82d32c16b35e35
Binary files /dev/null and b/modules/video_creator/__pycache__/router.cpython-313.pyc differ
diff --git a/modules/video_creator/router.py b/modules/video_creator/router.py
new file mode 100644
index 0000000000000000000000000000000000000000..94c7747dfeba18a2380c768ad6eb1314a37385cf
--- /dev/null
+++ b/modules/video_creator/router.py
@@ -0,0 +1,130 @@
+"""
+Video Creator Router - API Endpoints
+"""
+from fastapi import APIRouter, HTTPException
+from fastapi.responses import FileResponse
+import logging
+
+from .schemas import (
+ CreateVideoRequest,
+ CreateVideoResponse,
+ VideoStatusResponse,
+ VideoListResponse,
+ VideoListItem
+)
+from .services.short_creator import ShortCreator
+
+logger = logging.getLogger(__name__)
+
+# This will be set when the module registers
+short_creator: ShortCreator = None
+
+
+def set_short_creator(creator: ShortCreator):
+ """Set the global short creator instance"""
+ global short_creator
+ short_creator = creator
+
+
+router = APIRouter()
+
+
+@router.post("/short-video",
+ response_model=CreateVideoResponse,
+ status_code=201,
+ summary="Create a new video",
+ description="Create a new short video from text scenes. Returns a video ID to track progress."
+)
+async def create_short_video(request: CreateVideoRequest):
+ """Create a new short video"""
+ try:
+ logger.info(f"Creating short video with {len(request.scenes)} scenes")
+
+ video_id = short_creator.add_to_queue(
+ request.scenes,
+ request.config
+ )
+
+ return CreateVideoResponse(videoId=video_id)
+
+ except Exception as e:
+ logger.error(f"Error creating video: {e}", exc_info=True)
+ raise HTTPException(status_code=400, detail=str(e))
+
+
+@router.get("/short-video/{video_id}/status",
+ response_model=VideoStatusResponse,
+ summary="Get video status",
+ description="Check the processing status of a video (processing, ready, or failed)"
+)
+async def get_video_status(video_id: str):
+ """Get the status of a video"""
+ status = short_creator.get_status(video_id)
+ return VideoStatusResponse(status=status)
+
+
+@router.get("/short-video/{video_id}",
+ summary="Download video",
+ description="Download the generated video file (MP4 format)",
+ responses={
+ 200: {"description": "Video file", "content": {"video/mp4": {}}},
+ 404: {"description": "Video not found"}
+ }
+)
+async def get_video(video_id: str):
+ """Download/stream a video"""
+ video_path = short_creator.get_video_path(video_id)
+
+ if not video_path.exists():
+ raise HTTPException(status_code=404, detail="Video not found")
+
+ return FileResponse(
+ video_path,
+ media_type="video/mp4",
+ filename=f"{video_id}.mp4"
+ )
+
+
+@router.get("/short-videos",
+ response_model=VideoListResponse,
+ summary="List all videos",
+ description="Get a list of all videos with their current status"
+)
+async def list_videos():
+ """List all videos"""
+ videos = short_creator.list_all_videos()
+ return VideoListResponse(
+ videos=[VideoListItem(**v) for v in videos]
+ )
+
+
+@router.delete("/short-video/{video_id}",
+ summary="Delete video",
+ description="Delete a video by its ID"
+)
+async def delete_video(video_id: str):
+ """Delete a video"""
+ try:
+ short_creator.delete_video(video_id)
+ return {"success": True}
+ except Exception as e:
+ logger.error(f"Error deleting video: {e}")
+ raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.get("/voices",
+ summary="List TTS voices",
+ description="Get all available text-to-speech voice options"
+)
+async def get_voices():
+ """List available TTS voices"""
+ return short_creator.get_available_voices()
+
+
+@router.get("/music-tags",
+ summary="List music moods",
+ description="Get all available background music mood options"
+)
+async def get_music_tags():
+ """List available music moods"""
+ return short_creator.get_available_music_tags()
diff --git a/modules/video_creator/schemas.py b/modules/video_creator/schemas.py
new file mode 100644
index 0000000000000000000000000000000000000000..45f58f8375990bd50577e3cd48af1548cc410b13
--- /dev/null
+++ b/modules/video_creator/schemas.py
@@ -0,0 +1,143 @@
+from pydantic import BaseModel, Field
+from typing import List, Optional, Literal
+from enum import Enum
+
+
+class VoiceEnum(str, Enum):
+ """Available TTS voices"""
+ af_heart = "af_heart"
+ af_alloy = "af_alloy"
+ af_aoede = "af_aoede"
+ af_bella = "af_bella"
+ af_jessica = "af_jessica"
+ af_kore = "af_kore"
+ af_nicole = "af_nicole"
+ af_nova = "af_nova"
+ af_river = "af_river"
+ af_sarah = "af_sarah"
+ af_sky = "af_sky"
+ am_adam = "am_adam"
+ am_echo = "am_echo"
+ am_eric = "am_eric"
+ am_fenrir = "am_fenrir"
+ am_liam = "am_liam"
+ am_michael = "am_michael"
+ am_onyx = "am_onyx"
+ am_puck = "am_puck"
+ am_santa = "am_santa"
+ bf_emma = "bf_emma"
+ bf_isabella = "bf_isabella"
+ bm_george = "bm_george"
+ bm_lewis = "bm_lewis"
+ bf_alice = "bf_alice"
+ bf_lily = "bf_lily"
+ bm_daniel = "bm_daniel"
+ bm_fable = "bm_fable"
+
+
+class MusicMoodEnum(str, Enum):
+ """Available music moods"""
+ sad = "sad"
+ melancholic = "melancholic"
+ happy = "happy"
+ euphoric = "euphoric/high"
+ excited = "excited"
+ chill = "chill"
+ uneasy = "uneasy"
+ angry = "angry"
+ dark = "dark"
+ hopeful = "hopeful"
+ contemplative = "contemplative"
+ funny = "funny/quirky"
+
+
+class OrientationEnum(str, Enum):
+ """Video orientation"""
+ portrait = "portrait"
+ landscape = "landscape"
+
+
+class CaptionPositionEnum(str, Enum):
+ """Caption position on video"""
+ top = "top"
+ center = "center"
+ bottom = "bottom"
+
+
+class MusicVolumeEnum(str, Enum):
+ """Music volume level"""
+ low = "low"
+ medium = "medium"
+ high = "high"
+ muted = "muted"
+
+
+class VideoStatus(str, Enum):
+ """Video processing status"""
+ processing = "processing"
+ ready = "ready"
+ failed = "failed"
+
+
+class SceneInput(BaseModel):
+ """Input for a single scene in the video"""
+ text: str = Field(..., description="Text to be narrated in this scene")
+ searchTerms: List[str] = Field(..., description="Keywords for finding background video", alias="searchTerms")
+
+ class Config:
+ populate_by_name = True
+
+
+class RenderConfig(BaseModel):
+ """Configuration for video rendering"""
+ paddingBack: Optional[int] = Field(0, description="End screen duration in milliseconds")
+ music: Optional[MusicMoodEnum] = Field(None, description="Background music mood")
+ captionPosition: CaptionPositionEnum = Field(CaptionPositionEnum.bottom, description="Caption position")
+ captionBackgroundColor: str = Field("blue", description="Caption background color")
+ voice: VoiceEnum = Field(VoiceEnum.af_heart, description="TTS voice")
+ orientation: OrientationEnum = Field(OrientationEnum.portrait, description="Video orientation")
+ musicVolume: MusicVolumeEnum = Field(MusicVolumeEnum.high, description="Background music volume")
+
+ class Config:
+ populate_by_name = True
+
+
+class CreateVideoRequest(BaseModel):
+ """Request to create a short video"""
+ scenes: List[SceneInput] = Field(..., min_length=1, description="List of scenes for the video")
+ config: Optional[RenderConfig] = Field(default_factory=RenderConfig, description="Render configuration")
+
+
+class CreateVideoResponse(BaseModel):
+ """Response after creating a video"""
+ videoId: str = Field(..., description="Unique ID for the created video")
+
+
+class VideoStatusResponse(BaseModel):
+ """Response for video status check"""
+ status: VideoStatus = Field(..., description="Current status of the video")
+
+
+class VideoListItem(BaseModel):
+ """Single video in the list"""
+ id: str
+ status: VideoStatus
+
+
+class VideoListResponse(BaseModel):
+ """Response for listing all videos"""
+ videos: List[VideoListItem]
+
+
+class Caption(BaseModel):
+ """Caption with timing information"""
+ text: str
+ startMs: int
+ endMs: int
+
+
+class Scene(BaseModel):
+ """Processed scene with all media"""
+ captions: List[Caption]
+ video: str # Path to video file
+ audio: dict # Audio info with 'url' and 'duration'
diff --git a/modules/video_creator/services/__init__.py b/modules/video_creator/services/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/modules/video_creator/services/__pycache__/__init__.cpython-313.pyc b/modules/video_creator/services/__pycache__/__init__.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5aca70f9fad19762b2f4ea60eee95a78f9317347
Binary files /dev/null and b/modules/video_creator/services/__pycache__/__init__.cpython-313.pyc differ
diff --git a/modules/video_creator/services/__pycache__/short_creator.cpython-313.pyc b/modules/video_creator/services/__pycache__/short_creator.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3a676c191c1b9bd94ea42f32f22b14bec5d87212
Binary files /dev/null and b/modules/video_creator/services/__pycache__/short_creator.cpython-313.pyc differ
diff --git a/modules/video_creator/services/libraries/__init__.py b/modules/video_creator/services/libraries/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/modules/video_creator/services/libraries/__pycache__/__init__.cpython-313.pyc b/modules/video_creator/services/libraries/__pycache__/__init__.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d075a1826c8444303c2135a3f2869dcc5b79379b
Binary files /dev/null and b/modules/video_creator/services/libraries/__pycache__/__init__.cpython-313.pyc differ
diff --git a/modules/video_creator/services/libraries/__pycache__/tts_client.cpython-313.pyc b/modules/video_creator/services/libraries/__pycache__/tts_client.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c3358153b4b61be9966bf66571a2bf5ef51025b0
Binary files /dev/null and b/modules/video_creator/services/libraries/__pycache__/tts_client.cpython-313.pyc differ
diff --git a/modules/video_creator/services/libraries/__pycache__/whisper_client.cpython-313.pyc b/modules/video_creator/services/libraries/__pycache__/whisper_client.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6d434a263d749c6a5f4240b9a582aef2e9352dd9
Binary files /dev/null and b/modules/video_creator/services/libraries/__pycache__/whisper_client.cpython-313.pyc differ
diff --git a/modules/video_creator/services/libraries/ffmpeg_utils.py b/modules/video_creator/services/libraries/ffmpeg_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..680a588680d82907c6d14c81db0153b51579e062
--- /dev/null
+++ b/modules/video_creator/services/libraries/ffmpeg_utils.py
@@ -0,0 +1,191 @@
+import subprocess
+import logging
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+
+class FFmpegUtils:
+ """Utilities for audio and video processing with FFmpeg"""
+
+ @staticmethod
+ def save_audio_as_wav(audio_data: bytes, output_path: Path):
+ """
+ Save audio data as WAV file (normalized for Whisper)
+
+ Args:
+ audio_data: Raw audio bytes (WAV format from TTS)
+ output_path: Where to save the normalized WAV
+ """
+ logger.debug(f"Saving normalized WAV to {output_path}")
+
+ # Write input data to temp file
+ temp_input = output_path.parent / f"temp_{output_path.name}"
+ temp_input.write_bytes(audio_data)
+
+ try:
+ # Normalize audio for Whisper (16kHz, mono, 16-bit PCM)
+ subprocess.run([
+ "ffmpeg",
+ "-i", str(temp_input),
+ "-ar", "16000", # 16kHz sample rate
+ "-ac", "1", # Mono
+ "-sample_fmt", "s16", # 16-bit PCM
+ "-y", # Overwrite
+ str(output_path)
+ ], check=True, capture_output=True)
+
+ logger.debug(f"Saved normalized WAV: {output_path}")
+ finally:
+ # Clean up temp file
+ if temp_input.exists():
+ temp_input.unlink()
+
+ @staticmethod
+ def save_audio_as_mp3(audio_data: bytes, output_path: Path):
+ """
+ Convert audio data to MP3
+
+ Args:
+ audio_data: Raw audio bytes (WAV format from TTS)
+ output_path: Where to save the MP3
+ """
+ logger.debug(f"Converting to MP3: {output_path}")
+
+ # Write input data to temp file
+ temp_input = output_path.parent / f"temp_{output_path.name}.wav"
+ temp_input.write_bytes(audio_data)
+
+ try:
+ # Convert to MP3
+ subprocess.run([
+ "ffmpeg",
+ "-i", str(temp_input),
+ "-codec:a", "libmp3lame",
+ "-qscale:a", "2", # High quality
+ "-y", # Overwrite
+ str(output_path)
+ ], check=True, capture_output=True)
+
+ logger.debug(f"Saved MP3: {output_path}")
+ finally:
+ if temp_input.exists():
+ temp_input.unlink()
+
+ @staticmethod
+ def get_video_duration(file_path: Path) -> float:
+ """
+ Get duration of video file in seconds using ffprobe
+
+ Args:
+ file_path: Path to video file
+
+ Returns:
+ Duration in seconds
+ """
+ try:
+ cmd = [
+ "ffprobe",
+ "-v", "error",
+ "-show_entries", "format=duration",
+ "-of", "default=noprint_wrappers=1:nokey=1",
+ str(file_path)
+ ]
+ result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+ return float(result.stdout.strip())
+ except Exception as e:
+ logger.error(f"Failed to get video duration for {file_path}: {e}")
+ return 0.0
+
+ @staticmethod
+ def normalize_video(input_path: Path, output_path: Path):
+ """
+ Normalize video to standard format (H.264, 30fps, AAC) to fix seeking/black screen issues.
+
+ Args:
+ input_path: Path to source video
+ output_path: Path to save normalized video
+ """
+ logger.debug(f"Normalizing video: {input_path} -> {output_path}")
+
+ try:
+ cmd = [
+ "ffmpeg",
+ "-i", str(input_path),
+ "-c:v", "libx264",
+ "-preset", "fast",
+ "-r", "30",
+ "-c:a", "aac",
+ "-pix_fmt", "yuv420p",
+ "-y",
+ str(output_path)
+ ]
+
+ subprocess.run(cmd, check=True, capture_output=True)
+ logger.debug(f"Normalized video saved to {output_path}")
+
+ except subprocess.CalledProcessError as e:
+ logger.error(f"Failed to normalize video {input_path}: {e.stderr.decode()}")
+ raise e
+ except Exception as e:
+ logger.error(f"Error normalizing video {input_path}: {e}")
+ raise e
+
+ @staticmethod
+ def cut_video(input_path: Path, output_path: Path, start_time: float, duration: float):
+ """
+ Cut a segment from a video file using FFmpeg.
+
+ Args:
+ input_path: Source video
+ output_path: Destination for the segment
+ start_time: Start time in seconds
+ duration: Duration of the segment in seconds
+ """
+ try:
+ cmd = [
+ "ffmpeg",
+ "-ss", str(start_time),
+ "-i", str(input_path),
+ "-t", str(duration),
+ "-c:v", "libx264",
+ "-preset", "fast",
+ "-c:a", "aac",
+ "-y",
+ str(output_path)
+ ]
+
+ subprocess.run(cmd, check=True, capture_output=True)
+
+ except subprocess.CalledProcessError as e:
+ logger.error(f"Failed to cut video {input_path}: {e.stderr.decode()}")
+ raise e
+
+ @staticmethod
+ def image_to_video(input_path: Path, output_path: Path, duration: float):
+ """
+ Convert image to video of specific duration
+
+ Args:
+ input_path: Path to source image (jpg, png, etc.)
+ output_path: Path to save the output video
+ duration: Duration of the video in seconds
+ """
+ try:
+ cmd = [
+ "ffmpeg",
+ "-loop", "1",
+ "-i", str(input_path),
+ "-t", str(duration),
+ "-c:v", "libx264",
+ "-pix_fmt", "yuv420p",
+ "-vf", "scale=1080:1920:force_original_aspect_ratio=decrease,pad=1080:1920:(ow-iw)/2:(oh-ih)/2",
+ "-r", "30",
+ "-y",
+ str(output_path)
+ ]
+ subprocess.run(cmd, check=True, capture_output=True)
+ logger.debug(f"Created video from image: {output_path}")
+ except subprocess.CalledProcessError as e:
+ logger.error(f"Failed to convert image to video: {e.stderr.decode()}")
+ raise e
diff --git a/modules/video_creator/services/libraries/pexels_client.py b/modules/video_creator/services/libraries/pexels_client.py
new file mode 100644
index 0000000000000000000000000000000000000000..2349a059b968acc33382fbc24daf2dad9bd001f2
--- /dev/null
+++ b/modules/video_creator/services/libraries/pexels_client.py
@@ -0,0 +1,223 @@
+import requests
+import logging
+from typing import List, Optional
+from pathlib import Path
+import random
+
+logger = logging.getLogger(__name__)
+
+
+class PexelsClient:
+ """Client for Pexels API to fetch background videos"""
+
+ def __init__(self, api_key: str):
+ """
+ Initialize Pexels client
+
+ Args:
+ api_key: Pexels API key
+ """
+ self.api_key = api_key
+ self.base_url = "https://api.pexels.com/videos"
+ self.headers = {"Authorization": api_key}
+ self.joker_terms = ["nature", "globe", "space", "ocean"]
+
+ def find_video(
+ self,
+ search_terms: List[str],
+ duration: float,
+ exclude_ids: Optional[List[int]] = None,
+ orientation: str = "portrait"
+ ) -> dict:
+ """
+ Find a suitable video from Pexels
+
+ Args:
+ search_terms: Keywords to search for
+ duration: Required video duration in seconds
+ exclude_ids: List of video IDs to exclude
+ orientation: 'portrait' or 'landscape'
+
+ Returns:
+ Dict with 'id' and 'url' of the selected video
+ """
+ exclude_ids = exclude_ids or []
+
+ # Try user-provided search terms first
+ for term in search_terms:
+ video = self._search_and_select(term, duration, exclude_ids, orientation)
+ if video:
+ return video
+
+ # Fall back to joker terms
+ logger.info(f"No videos found for {search_terms}, using joker terms")
+ for term in self.joker_terms:
+ video = self._search_and_select(term, duration, exclude_ids, orientation)
+ if video:
+ return video
+
+ raise Exception("No suitable videos found on Pexels")
+
+ def _search_and_select(
+ self,
+ query: str,
+ min_duration: float,
+ exclude_ids: List[int],
+ orientation: str
+ ) -> Optional[dict]:
+ """Search for videos and select a suitable one"""
+ try:
+ logger.debug(f"Searching Pexels for: {query} ({orientation})")
+
+ response = requests.get(
+ f"{self.base_url}/search",
+ headers=self.headers,
+ params={
+ "query": query,
+ "orientation": orientation,
+ "per_page": 15,
+ "size": "medium" # Good balance of quality and file size
+ },
+ timeout=10
+ )
+
+ if response.status_code != 200:
+ logger.warning(f"Pexels API error: {response.status_code}")
+ return None
+
+ data = response.json()
+ videos = data.get("videos", [])
+
+ if not videos:
+ logger.debug(f"No videos found for query: {query}")
+ return None
+
+ # Filter suitable videos
+ suitable_videos = []
+ for video in videos:
+ if video["id"] in exclude_ids:
+ continue
+
+ # Get video file URL (HD or SD)
+ video_files = video.get("video_files", [])
+ if not video_files:
+ continue
+
+ # Sort by quality and find a good match
+ video_files = sorted(
+ video_files,
+ key=lambda x: x.get("width", 0) * x.get("height", 0),
+ reverse=True
+ )
+
+ # Find appropriate quality based on orientation
+ target_width = 1080 if orientation == "portrait" else 1920
+ target_height = 1920 if orientation == "portrait" else 1080
+
+ selected_file = None
+ for vf in video_files:
+ # Look for files close to our target resolution
+ if vf.get("width") and vf.get("height"):
+ if (abs(vf["width"] - target_width) < 300 and
+ abs(vf["height"] - target_height) < 300):
+ selected_file = vf
+ break
+
+ # Fallback to highest quality if no exact match
+ if not selected_file and video_files:
+ selected_file = video_files[0]
+
+ if selected_file and selected_file.get("link"):
+ suitable_videos.append({
+ "id": video["id"],
+ "url": selected_file["link"],
+ "duration": video.get("duration", 0)
+ })
+
+ if not suitable_videos:
+ return None
+
+ # Filter by duration if possible
+ # Try to find videos that are at least 50% of the requested duration
+ # to avoid stitching too many tiny clips
+ duration_threshold = min(min_duration * 0.5, 15) # Cap at 15s requirement
+ long_enough_videos = [v for v in suitable_videos if v["duration"] >= duration_threshold]
+
+ if long_enough_videos:
+ selected = random.choice(long_enough_videos)
+ logger.info(f"Selected Pexels video ID {selected['id']} (duration: {selected['duration']}s) for query '{query}'")
+ return selected
+
+ # Fallback to any suitable video
+ selected = random.choice(suitable_videos)
+ logger.info(f"Selected Pexels video ID {selected['id']} (duration: {selected['duration']}s) for query '{query}' (fallback)")
+ return selected
+
+ except Exception as e:
+ logger.error(f"Error searching Pexels: {e}")
+ return None
+
+ def find_photo(
+ self,
+ query: str,
+ orientation: str = "portrait"
+ ) -> Optional[dict]:
+ """
+ Find a suitable photo from Pexels
+
+ Args:
+ query: Search term
+ orientation: 'portrait' or 'landscape'
+
+ Returns:
+ Dict with 'id' and 'url' of the photo
+ """
+ try:
+ logger.debug(f"Searching Pexels for photo: {query} ({orientation})")
+
+ # Pexels Photo API endpoint
+ url = "https://api.pexels.com/v1/search"
+
+ response = requests.get(
+ url,
+ headers=self.headers,
+ params={
+ "query": query,
+ "orientation": orientation,
+ "per_page": 15,
+ "size": "large"
+ },
+ timeout=10
+ )
+
+ if response.status_code != 200:
+ logger.warning(f"Pexels Photo API error: {response.status_code}")
+ return None
+
+ data = response.json()
+ photos = data.get("photos", [])
+
+ if not photos:
+ logger.debug(f"No photos found for query: {query}")
+ return None
+
+ # Select a random photo
+ photo = random.choice(photos)
+
+ # Get URL (prefer original or large2x)
+ src = photo.get("src", {})
+ url = src.get("original") or src.get("large2x") or src.get("large")
+
+ if not url:
+ return None
+
+ logger.info(f"Selected Pexels photo ID {photo['id']} for query '{query}'")
+ return {
+ "id": photo["id"],
+ "url": url,
+ "type": "photo"
+ }
+
+ except Exception as e:
+ logger.error(f"Error searching Pexels photos: {e}")
+ return None
diff --git a/modules/video_creator/services/libraries/tts_client.py b/modules/video_creator/services/libraries/tts_client.py
new file mode 100644
index 0000000000000000000000000000000000000000..a04b71de6183466d680972571c31a85d32af7b3a
--- /dev/null
+++ b/modules/video_creator/services/libraries/tts_client.py
@@ -0,0 +1,106 @@
+import aiohttp
+import struct
+import logging
+from typing import Tuple
+
+logger = logging.getLogger(__name__)
+
+
+class TTSClient:
+ """Client for Kokoro TTS via Hugging Face Cloud API"""
+
+ def __init__(self, api_url: str):
+ """
+ Initialize TTS client
+
+ Args:
+ api_url: Base URL for the TTS API (HF_TTS environment variable)
+ """
+ self.api_url = api_url.rstrip('/')
+ logger.info(f"Using cloud TTS API at {self.api_url}")
+
+ async def generate(self, text: str, voice: str) -> Tuple[bytes, float]:
+ """
+ Generate speech from text
+
+ Args:
+ text: Text to convert to speech
+ voice: Voice identifier (e.g., 'af_heart', 'am_adam')
+
+ Returns:
+ Tuple of (audio_bytes, duration_seconds)
+ """
+ endpoint = f"{self.api_url}/v1/audio/speech"
+
+ logger.debug(f"Generating audio with voice={voice}, text_length={len(text)}")
+
+ async with aiohttp.ClientSession() as session:
+ async with session.post(
+ endpoint,
+ json={
+ "model": "kokoro",
+ "input": text,
+ "voice": voice
+ },
+ headers={"Content-Type": "application/json"},
+ timeout=aiohttp.ClientTimeout(total=30)
+ ) as response:
+ if response.status != 200:
+ error_text = await response.text()
+ raise Exception(f"TTS API error ({response.status}): {error_text}")
+
+ audio_data = await response.read()
+ duration = self._estimate_audio_duration(audio_data)
+
+ logger.debug(f"Generated audio: {len(audio_data)} bytes, {duration:.2f}s")
+ return audio_data, duration
+
+ def _estimate_audio_duration(self, audio_buffer: bytes) -> float:
+ """
+ Estimate audio duration from WAV buffer
+
+ WAV format: 44 byte header, then PCM data
+ """
+ if len(audio_buffer) < 44:
+ # Fallback estimation
+ return (len(audio_buffer) - 44) / (2 * 24000)
+
+ # Check if it's a valid WAV file (starts with 'RIFF')
+ if audio_buffer[:4] != b'RIFF':
+ # Fallback estimation
+ return (len(audio_buffer) - 44) / (2 * 24000)
+
+ try:
+ # Parse WAV header
+ # Data size at bytes 40-43
+ data_size = struct.unpack(' list:
+ """Return list of available TTS voices"""
+ return [
+ "af_heart", "af_alloy", "af_aoede", "af_bella", "af_jessica",
+ "af_kore", "af_nicole", "af_nova", "af_river", "af_sarah", "af_sky",
+ "am_adam", "am_echo", "am_eric", "am_fenrir", "am_liam",
+ "am_michael", "am_onyx", "am_puck", "am_santa",
+ "bf_emma", "bf_isabella", "bm_george", "bm_lewis",
+ "bf_alice", "bf_lily", "bm_daniel", "bm_fable"
+ ]
diff --git a/modules/video_creator/services/libraries/video_composer.py b/modules/video_creator/services/libraries/video_composer.py
new file mode 100644
index 0000000000000000000000000000000000000000..f869fa57c344965aeff07cd89bda96a11aaad83e
--- /dev/null
+++ b/modules/video_creator/services/libraries/video_composer.py
@@ -0,0 +1,360 @@
+import logging
+from pathlib import Path
+from typing import List, Dict
+from moviepy.editor import (
+ VideoFileClip,
+ AudioFileClip,
+ CompositeVideoClip,
+ CompositeAudioClip,
+ TextClip,
+ concatenate_videoclips,
+ concatenate_audioclips
+)
+from models.schemas import Caption, Scene, CaptionPositionEnum, MusicVolumeEnum
+from video_creator.libraries.ffmpeg_utils import FFmpegUtils
+
+logger = logging.getLogger(__name__)
+
+
+class VideoComposer:
+ """Video composition using MoviePy (replaces Remotion)"""
+
+ @staticmethod
+ def render(
+ scenes: List[Dict],
+ music_path: str,
+ output_path: Path,
+ orientation: str = "portrait",
+ caption_position: str = "bottom",
+ caption_bg_color: str = "blue",
+ music_volume: str = "high",
+ padding_back: int = 0
+ ):
+ """
+ Render final video with scenes, captions, and music
+
+ Args:
+ scenes: List of scene dicts with 'video', 'audio', 'captions'
+ music_path: Path to background music file
+ output_path: Where to save the final video
+ orientation: 'portrait' or 'landscape'
+ caption_position: 'top', 'center', or 'bottom'
+ caption_bg_color: Background color for captions
+ music_volume: 'low', 'medium', 'high', or 'muted'
+ padding_back: Additional padding at end in milliseconds
+ """
+ logger.info(f"Rendering video with {len(scenes)} scenes")
+
+ # Set dimensions based on orientation
+ if orientation == "portrait":
+ width, height = 1080, 1920
+ else:
+ width, height = 1920, 1080
+
+ # Process each scene
+ video_clips = []
+ total_duration = 0
+
+ for i, scene in enumerate(scenes):
+ logger.debug(f"Processing scene {i + 1}/{len(scenes)}")
+ # Load narration audio
+ audio_clip = AudioFileClip(scene["audio"]["url"])
+ scene_duration = scene["audio"]["duration"]
+
+ # Load video clip(s)
+ video_input = scene["video"]
+ if isinstance(video_input, list):
+ # Concatenate multiple clips
+ clips = []
+ for item in video_input:
+ try:
+ # Handle both string paths (legacy) and dicts (new smart segmentation)
+ if isinstance(item, dict):
+ path = item["path"]
+ target_duration = item["duration"]
+ start_time = item.get("start_time", 0)
+ else:
+ path = item
+ target_duration = None
+ start_time = 0
+
+ # FIX 1: Load video WITHOUT audio to prevent stream conflicts
+ clip = VideoFileClip(path, audio=False)
+
+ # Verify actual duration using ffprobe
+ actual_duration = FFmpegUtils.get_video_duration(Path(path))
+
+ if actual_duration > 0:
+ # If we have a start_time, we are taking a subclip
+ if start_time > 0:
+ # Ensure we don't go past the end
+ end_time = min(start_time + target_duration, actual_duration)
+ # If the segment is completely out of bounds (shouldn't happen with good logic), fix it
+ if start_time >= actual_duration:
+ start_time = 0
+ end_time = min(target_duration, actual_duration)
+
+ clip = clip.subclip(start_time, end_time)
+
+ # If the subclip is shorter than target (because we hit end of file),
+ # we might need to loop or extend?
+ # The calling logic should ensure 'start_time + target_duration <= actual_duration'
+ # if possible. If not, we loop the result.
+ if clip.duration < target_duration:
+ clip = clip.loop(duration=target_duration)
+ else:
+ clip = clip.set_duration(target_duration)
+
+ # Standard logic (start from 0)
+ elif target_duration and actual_duration < target_duration:
+ clip = clip.loop(duration=target_duration)
+ elif target_duration:
+ clip = clip.set_duration(target_duration)
+ elif abs(clip.duration - actual_duration) > 0.5:
+ clip = clip.set_duration(actual_duration)
+
+ # Resize to target dimensions immediately
+ clip = VideoComposer._resize_and_crop(clip, width, height)
+ clips.append(clip)
+ except Exception as e:
+ logger.warning(f"Failed to load video clip {item}: {e}")
+
+ if not clips:
+ raise Exception("No valid video clips found for scene")
+
+ # FIX 2: Use method="chain" for better stability with identically sized clips
+ video_clip = concatenate_videoclips(clips, method="chain")
+ else:
+ # FIX 1 (Repeated): Load without audio
+ video_clip = VideoFileClip(video_input, audio=False)
+ # Verify actual duration using ffprobe
+ actual_duration = FFmpegUtils.get_video_duration(Path(video_input))
+ if actual_duration > 0:
+ if abs(video_clip.duration - actual_duration) > 0.5:
+ video_clip = video_clip.set_duration(actual_duration)
+
+ video_clip = VideoComposer._resize_and_crop(video_clip, width, height)
+
+ # Set duration to match audio
+ # Loop video if it's shorter than audio to prevent black screen
+ if video_clip.duration < scene_duration:
+ # If gap is small (< 0.5s), freeze the last frame to fill it.
+ # This prevents black frames/flicker at the end of scene.
+ gap = scene_duration - video_clip.duration
+ if gap < 0.5:
+ logger.debug(f"Filling small gap of {gap:.3f}s by freezing last frame")
+ # Create a freeze frame of the last instant
+ last_frame = video_clip.to_ImageClip(t=video_clip.duration - 0.01).set_duration(gap)
+ video_clip = concatenate_videoclips([video_clip, last_frame], method="chain")
+ else:
+ # Gap is large, loop the video
+ video_clip = video_clip.loop(duration=scene_duration)
+ else:
+ # Video is longer, just trim it
+ video_clip = video_clip.set_duration(scene_duration)
+
+ video_clip = video_clip.set_audio(audio_clip)
+
+ # Add captions
+ if scene.get("captions"):
+ # Fix for "bad blue color": default to transparent if blue is passed
+ # or if the user wants the old default.
+ # Ideally, we use a semi-transparent box, but MoviePy TextClip
+ # background support is limited. Transparent with stroke is safer.
+ if caption_bg_color == "blue":
+ caption_bg_color = "transparent" # Explicit string instead of None
+
+ video_clip = VideoComposer._add_captions(
+ video_clip,
+ scene["captions"],
+ width,
+ height,
+ caption_position,
+ caption_bg_color,
+ total_duration
+ )
+
+ video_clips.append(video_clip)
+ total_duration += scene_duration
+
+ # Add padding if specified
+ if padding_back > 0:
+ padding_seconds = padding_back / 1000
+ total_duration += padding_seconds
+ # Extend last clip
+ if video_clips:
+ last_clip = video_clips[-1]
+ # Loop the last clip for padding too
+ video_clips[-1] = last_clip.loop(duration=last_clip.duration + padding_seconds)
+
+ # Concatenate all scenes
+ logger.debug("Concatenating video clips")
+ # Use chain here too
+ final_video = concatenate_videoclips(video_clips, method="chain")
+
+ # Add background music
+ if music_path and music_volume != "muted":
+ logger.debug("Adding background music")
+ final_video = VideoComposer._add_background_music(
+ final_video,
+ music_path,
+ music_volume
+ )
+
+ # Write final video
+ logger.info(f"Writing video to {output_path}")
+ final_video.write_videofile(
+ str(output_path),
+ codec="libx264",
+ audio_codec="aac",
+ fps=30,
+ preset="medium",
+ threads=2,
+ logger=None # Suppress moviepy progress bar
+ )
+
+ # Cleanup
+ final_video.close()
+ for clip in video_clips:
+ clip.close()
+
+ logger.info(f"Video rendered successfully: {output_path}")
+
+ @staticmethod
+ def _resize_and_crop(clip: VideoFileClip, target_width: int, target_height: int) -> VideoFileClip:
+ """Resize and crop video to match target dimensions"""
+ clip_width, clip_height = clip.size
+ clip_aspect = clip_width / clip_height
+ target_aspect = target_width / target_height
+
+ if clip_aspect > target_aspect:
+ # Clip is wider, crop width
+ new_height = target_height
+ new_width = int(target_height * clip_aspect)
+ resized = clip.resize(height=new_height)
+ x_center = new_width / 2
+ x1 = x_center - target_width / 2
+ cropped = resized.crop(x1=x1, x2=x1 + target_width)
+ else:
+ # Clip is taller, crop height
+ new_width = target_width
+ new_height = int(target_width / clip_aspect)
+ resized = clip.resize(width=new_width)
+ y_center = new_height / 2
+ y1 = y_center - target_height / 2
+ cropped = resized.crop(y1=y1, y2=y1 + target_height)
+
+ return cropped
+
+ @staticmethod
+ def _add_captions(
+ video_clip: VideoFileClip,
+ captions: List[Dict],
+ width: int,
+ height: int,
+ position: str,
+ bg_color: str,
+ offset_seconds: float
+ ) -> CompositeVideoClip:
+ """Add captions to video clip"""
+ caption_clips = []
+
+ # Determine vertical position
+ if position == "top":
+ y_pos = height * 0.15
+ elif position == "center":
+ y_pos = height * 0.5
+ else: # bottom
+ y_pos = height * 0.70 # Changed from 0.85 to 0.70 as requested
+
+ for caption in captions:
+ start_time = caption["startMs"] / 1000
+ end_time = caption["endMs"] / 1000
+ duration = end_time - start_time
+
+ if duration <= 0:
+ continue
+
+ # Create text clip
+ # Use transparent background by default if None
+ # Add strong stroke for visibility
+ final_bg_color = bg_color if bg_color else "transparent"
+
+ try:
+ # Try caption method with fixed height to avoid NoneType error
+ # Allocating 20% of height for caption box
+
+ # Use TheBoldFont.ttf
+ font_path = Path(__file__).parent.parent.parent / "static" / "fonts" / "TheBoldFont.ttf"
+ font_name = str(font_path) if font_path.exists() else "Liberation-Sans-Bold"
+
+ txt_clip = TextClip(
+ caption["text"],
+ fontsize=70,
+ color="white",
+ font=font_name,
+ stroke_color="black",
+ stroke_width=1.5,
+ bg_color=final_bg_color,
+ method="caption",
+ size=(int(width * 0.9), int(height * 0.2)),
+ align="center"
+ )
+ except Exception as e:
+ logger.warning(f"TextClip caption method failed: {e}. Falling back to label method.")
+ # Fallback to label method (no wrapping, but works)
+ txt_clip = TextClip(
+ caption["text"],
+ fontsize=60,
+ color="white",
+ font=font_name,
+ stroke_color="black",
+ stroke_width=2,
+ bg_color=final_bg_color,
+ method="label"
+ )
+
+ txt_clip = txt_clip.set_duration(duration)
+ txt_clip = txt_clip.set_start(start_time)
+ txt_clip = txt_clip.set_position(("center", y_pos))
+
+ caption_clips.append(txt_clip)
+
+ if caption_clips:
+ return CompositeVideoClip([video_clip] + caption_clips)
+ return video_clip
+
+ @staticmethod
+ def _add_background_music(
+ video_clip: VideoFileClip,
+ music_path: str,
+ volume_level: str
+ ) -> VideoFileClip:
+ """Add background music to video"""
+ # Load music
+ music = AudioFileClip(music_path)
+
+ # Loop music to match video duration
+ if music.duration < video_clip.duration:
+ loops_needed = int(video_clip.duration / music.duration) + 1
+ music = concatenate_audioclips([music] * loops_needed)
+
+ # Trim to video duration
+ music = music.subclip(0, video_clip.duration)
+
+ # Set volume based on level
+ volume_multipliers = {
+ "low": 0.2,
+ "medium": 0.4,
+ "high": 0.6,
+ "muted": 0.0
+ }
+ volume = volume_multipliers.get(volume_level, 0.6)
+ music = music.volumex(volume)
+
+ # Mix with narration audio
+ if video_clip.audio:
+ final_audio = CompositeAudioClip([video_clip.audio, music])
+ return video_clip.set_audio(final_audio)
+
+ return video_clip.set_audio(music)
diff --git a/modules/video_creator/services/libraries/whisper_client.py b/modules/video_creator/services/libraries/whisper_client.py
new file mode 100644
index 0000000000000000000000000000000000000000..f5e5470b9a8c741f6f91221916d46caf2c0777e5
--- /dev/null
+++ b/modules/video_creator/services/libraries/whisper_client.py
@@ -0,0 +1,81 @@
+import logging
+from pathlib import Path
+from faster_whisper import WhisperModel
+from typing import List
+from models.schemas import Caption
+
+logger = logging.getLogger(__name__)
+
+
+class WhisperClient:
+ """Client for faster-whisper caption generation"""
+
+ def __init__(self, model_name: str = "tiny.en", model_dir: Path = None):
+ """
+ Initialize Whisper client
+
+ Args:
+ model_name: Whisper model to use (tiny.en, base.en, medium.en, etc.)
+ model_dir: Directory to store/load models
+ """
+ self.model_name = model_name
+ self.model_dir = str(model_dir) if model_dir else None
+
+ logger.info(f"Loading Whisper model: {model_name}")
+
+ # Use CPU with int8 quantization for efficiency
+ self.model = WhisperModel(
+ model_name,
+ device="cpu",
+ compute_type="int8",
+ download_root=self.model_dir
+ )
+
+ logger.info("Whisper model loaded successfully")
+
+ def create_captions(self, audio_path: str) -> List[Caption]:
+ """
+ Generate captions from audio file
+
+ Args:
+ audio_path: Path to audio file (WAV format preferred)
+
+ Returns:
+ List of Caption objects with text and timing
+ """
+ logger.debug(f"Transcribing audio: {audio_path}")
+
+ # Transcribe with word-level timestamps
+ segments, info = self.model.transcribe(
+ audio_path,
+ word_timestamps=True,
+ vad_filter=True, # Voice activity detection to filter silence
+ vad_parameters=dict(min_silence_duration_ms=500)
+ )
+
+ captions: List[Caption] = []
+
+ for segment in segments:
+ if not segment.words:
+ continue
+
+ for word in segment.words:
+ # Skip special tokens
+ if word.word.startswith('[') or word.word.strip() == '':
+ continue
+
+ # Merge with previous caption if no space and previous doesn't end with space
+ if (captions and
+ not word.word.startswith(' ') and
+ not captions[-1].text.endswith(' ')):
+ captions[-1].text += word.word.strip()
+ captions[-1].endMs = int(word.end * 1000)
+ else:
+ captions.append(Caption(
+ text=word.word.strip(),
+ startMs=int(word.start * 1000),
+ endMs=int(word.end * 1000)
+ ))
+
+ logger.debug(f"Generated {len(captions)} captions from {audio_path}")
+ return captions
diff --git a/modules/video_creator/services/music_manager.py b/modules/video_creator/services/music_manager.py
new file mode 100644
index 0000000000000000000000000000000000000000..dc367ed7b376910d73fed68be4f1f638d3c167c9
--- /dev/null
+++ b/modules/video_creator/services/music_manager.py
@@ -0,0 +1,130 @@
+import json
+import logging
+from pathlib import Path
+from typing import List, Optional, Dict
+import random
+
+logger = logging.getLogger(__name__)
+
+
+class MusicManager:
+ """Manages background music files and metadata"""
+
+ def __init__(self, music_dir: Path):
+ """
+ Initialize music manager
+
+ Args:
+ music_dir: Directory containing music files and metadata
+ """
+ self.music_dir = music_dir
+ self._music_cache: Optional[List[Dict]] = None
+
+ def ensure_music_files_exist(self):
+ """Verify music files directory exists"""
+ if not self.music_dir.exists():
+ raise FileNotFoundError(
+ f"Music directory not found: {self.music_dir}. "
+ "Please copy music files from the original project."
+ )
+
+ music_files = list(self.music_dir.glob("*.mp3"))
+ if not music_files:
+ raise FileNotFoundError(
+ f"No MP3 files found in {self.music_dir}. "
+ "Please copy music files from the original project."
+ )
+
+ logger.info(f"Found {len(music_files)} music files")
+
+ def get_music_list(self) -> List[Dict]:
+ """
+ Get list of available music with metadata
+
+ Returns:
+ List of dicts with 'path', 'mood', 'duration' keys
+ """
+ if self._music_cache is not None:
+ return self._music_cache
+
+ music_list = []
+
+ # Check if metadata file exists
+ metadata_path = self.music_dir / "music_metadata.json"
+ metadata = {}
+
+ if metadata_path.exists():
+ try:
+ with open(metadata_path, 'r') as f:
+ metadata = json.load(f)
+ except Exception as e:
+ logger.warning(f"Failed to load music metadata: {e}")
+
+ # Scan music files
+ for music_file in self.music_dir.glob("*.mp3"):
+ file_name = music_file.name
+
+ # Get mood from metadata or infer from filename
+ mood = metadata.get(file_name, {}).get("mood")
+ if not mood:
+ mood = self._infer_mood_from_filename(file_name)
+
+ music_list.append({
+ "path": str(music_file),
+ "mood": mood,
+ "filename": file_name
+ })
+
+ self._music_cache = music_list
+ return music_list
+
+ def find_music(self, mood: Optional[str] = None) -> Dict:
+ """
+ Find music file by mood
+
+ Args:
+ mood: Music mood (sad, happy, chill, etc.) or None for random
+
+ Returns:
+ Dict with music info
+ """
+ music_list = self.get_music_list()
+
+ if not music_list:
+ raise ValueError("No music files available")
+
+ # Filter by mood if specified
+ if mood:
+ filtered = [m for m in music_list if m["mood"] == mood]
+ if filtered:
+ return random.choice(filtered)
+ logger.warning(f"No music found for mood '{mood}', using random")
+
+ # Return random music
+ return random.choice(music_list)
+
+ def get_available_moods(self) -> List[str]:
+ """Get list of available music moods"""
+ music_list = self.get_music_list()
+ moods = set(m["mood"] for m in music_list if m["mood"])
+ return sorted(list(moods))
+
+ @staticmethod
+ def _infer_mood_from_filename(filename: str) -> str:
+ """Infer mood from filename (fallback if no metadata)"""
+ filename_lower = filename.lower()
+
+ mood_keywords = {
+ "sad": ["sad", "melancholy", "emotional"],
+ "happy": ["happy", "joyful", "upbeat"],
+ "chill": ["chill", "relax", "calm", "ambient"],
+ "dark": ["dark", "suspense", "mysterious"],
+ "excited": ["excited", "energetic", "uplifting"],
+ "angry": ["angry", "intense", "aggressive"],
+ }
+
+ for mood, keywords in mood_keywords.items():
+ if any(kw in filename_lower for kw in keywords):
+ return mood
+
+ return "chill" # Default mood
diff --git a/modules/video_creator/services/short_creator.py b/modules/video_creator/services/short_creator.py
new file mode 100644
index 0000000000000000000000000000000000000000..bdba06a73c14e5fc55c71a87c03ec8bb2a8850b2
--- /dev/null
+++ b/modules/video_creator/services/short_creator.py
@@ -0,0 +1,444 @@
+import asyncio
+import logging
+import uuid
+from pathlib import Path
+from typing import List, Dict, Optional
+import aiohttp
+import requests
+
+# Use relative imports for module-local schemas
+from ..schemas import (
+ SceneInput, RenderConfig, VideoStatus, Scene, Caption
+)
+from .libraries.tts_client import TTSClient
+from .libraries.whisper_client import WhisperClient
+from .libraries.pexels_client import PexelsClient
+from .libraries.ffmpeg_utils import FFmpegUtils
+from .libraries.video_composer import VideoComposer
+from .music_manager import MusicManager
+
+logger = logging.getLogger(__name__)
+
+
+class ShortCreator:
+ """Main video creation orchestrator"""
+
+ def __init__(
+ self,
+ config: Config,
+ tts_client: TTSClient,
+ whisper_client: WhisperClient,
+ pexels_client: PexelsClient,
+ music_manager: MusicManager
+ ):
+ self.config = config
+ self.tts = tts_client
+ self.whisper = whisper_client
+ self.pexels = pexels_client
+ self.music_manager = music_manager
+ self.queue: List[Dict] = []
+ self.processing = False
+
+ def add_to_queue(self, scenes: List[SceneInput], config: RenderConfig) -> str:
+ """
+ Add video to processing queue
+
+ Returns:
+ video_id for tracking
+ """
+ video_id = str(uuid.uuid4()).replace('-', '')[:24] # Similar to cuid
+
+ self.queue.append({
+ "id": video_id,
+ "scenes": scenes,
+ "config": config
+ })
+
+ logger.info(f"Added video {video_id} to queue. Queue length: {len(self.queue)}")
+
+ # Start processing if not already running
+ if not self.processing:
+ asyncio.create_task(self.process_queue())
+
+ return video_id
+
+ async def process_queue(self):
+ """Process videos in the queue"""
+ if self.processing:
+ return
+
+ self.processing = True
+
+ try:
+ while self.queue:
+ item = self.queue[0]
+ video_id = item["id"]
+
+ logger.info(f"Processing video {video_id}")
+
+ try:
+ # Run video creation in a background thread to keep API responsive
+ # This allows status checks while video is being processed
+ await asyncio.to_thread(
+ self._create_short_sync,
+ video_id,
+ item["scenes"],
+ item["config"]
+ )
+ logger.info(f"Successfully created video {video_id}")
+ except Exception as e:
+ logger.error(f"Failed to create video {video_id}: {e}", exc_info=True)
+ # Mark as failed by creating a .failed marker file
+ failed_marker = self.config.videos_dir_path / f"{video_id}.failed"
+ failed_marker.write_text(str(e))
+ finally:
+ self.queue.pop(0)
+ finally:
+ self.processing = False
+
+ def _create_short_sync(
+ self,
+ video_id: str,
+ input_scenes: List[SceneInput],
+ config: RenderConfig
+ ):
+ """Synchronous wrapper for create_short - runs in a separate thread"""
+ # Create a new event loop for this thread
+ loop = asyncio.new_event_loop()
+ asyncio.set_event_loop(loop)
+ try:
+ loop.run_until_complete(self.create_short(video_id, input_scenes, config))
+ finally:
+ loop.close()
+
+ async def create_short(
+ self,
+ video_id: str,
+ input_scenes: List[SceneInput],
+ config: RenderConfig
+ ):
+ """Create the short video"""
+ scenes = []
+ total_duration = 0
+ exclude_video_ids = []
+ temp_files = []
+
+ orientation = config.orientation.value
+
+ # Process each scene
+ for i, scene_input in enumerate(input_scenes):
+ logger.debug(f"Processing scene {i + 1}/{len(input_scenes)}")
+
+ # Generate TTS audio
+ audio_data, tts_duration = await self.tts.generate(
+ scene_input.text,
+ config.voice.value
+ )
+
+ # Save audio files
+ temp_id = str(uuid.uuid4()).replace('-', '')[:12]
+ wav_path = self.config.temp_dir_path / f"{temp_id}.wav"
+ mp3_path = self.config.temp_dir_path / f"{temp_id}.mp3"
+ video_path = self.config.temp_dir_path / f"{temp_id}.mp4"
+
+ temp_files.extend([wav_path, mp3_path, video_path])
+
+ # Save and convert audio
+ FFmpegUtils.save_audio_as_wav(audio_data, wav_path)
+ FFmpegUtils.save_audio_as_mp3(audio_data, mp3_path)
+
+ # Get ACTUAL audio duration from WAV file (TTS estimate is often wrong!)
+ audio_duration = FFmpegUtils.get_video_duration(wav_path)
+ logger.info(f"Scene {i+1}: TTS reported {tts_duration:.2f}s, actual WAV duration: {audio_duration:.2f}s")
+
+ # Add padding to last scene
+ if i + 1 == len(input_scenes) and config.paddingBack:
+ audio_duration += config.paddingBack / 1000
+
+ # Generate captions
+ captions = self.whisper.create_captions(str(wav_path))
+
+ # Find and download background video(s)
+ video_paths = []
+
+ # Simplified Scene Construction: One Video Per Scene
+ # User Request: "Remove restrictions. One video per scene equal to audio."
+ # User Request: "Video must be 9:16. Use image if needed."
+
+ # Force portrait for 9:16
+ orientation = "portrait"
+
+ keywords = scene_input.searchTerms
+ if not keywords:
+ keywords = ["general"]
+
+ # Handle both string and list inputs for searchTerms
+ # If it's a string, use it directly; if list, use first item
+ if isinstance(keywords, str):
+ keyword = keywords # Use the whole string
+ elif isinstance(keywords, list) and len(keywords) > 0:
+ keyword = keywords[0] if isinstance(keywords[0], str) else str(keywords[0])
+ else:
+ keyword = "general"
+
+ logger.debug(f"Using search keyword: '{keyword}' from searchTerms: {keywords}")
+
+ # Try to find a video that is at least as long as the audio
+ search_duration = max(audio_duration, 5.0)
+
+ video_found = False
+ video_path = None
+ temp_vid_id = str(uuid.uuid4()).replace('-', '')[:12]
+
+ try:
+ # 1. Try Video Search
+ pexels_video = self.pexels.find_video(
+ keyword,
+ search_duration,
+ exclude_video_ids,
+ orientation
+ )
+
+ video_path = self.config.temp_dir_path / f"{temp_vid_id}.mp4"
+ temp_files.append(video_path)
+
+ # Download video
+ logger.debug(f"Downloading video for '{keyword}' (Target: {audio_duration:.2f}s)")
+ response = requests.get(pexels_video["url"], stream=True, timeout=30)
+ response.raise_for_status()
+
+ with open(video_path, 'wb') as f:
+ for chunk in response.iter_content(chunk_size=8192):
+ f.write(chunk)
+
+ # Verify file size
+ if video_path.stat().st_size < 1024:
+ logger.warning(f"Downloaded video {video_path} is too small")
+ raise Exception("Downloaded video is invalid")
+
+ # Normalize video
+ norm_path = video_path.with_suffix(".norm.mp4")
+ FFmpegUtils.normalize_video(video_path, norm_path)
+ video_path.unlink()
+ norm_path.rename(video_path)
+
+ video_found = True
+ exclude_video_ids.append(pexels_video["id"])
+
+ except Exception as e:
+ logger.warning(f"Video search/download failed for '{keyword}': {e}. Trying photo fallback.")
+ video_found = False
+
+ # 2. Photo Fallback
+ if not video_found:
+ try:
+ logger.info(f"Attempting photo fallback for '{keyword}'")
+ pexels_photo = self.pexels.find_photo(keyword, orientation)
+
+ if pexels_photo:
+ # Download photo
+ photo_path = self.config.temp_dir_path / f"{temp_vid_id}.jpg"
+ temp_files.append(photo_path)
+
+ response = requests.get(pexels_photo["url"], stream=True, timeout=30)
+ response.raise_for_status()
+
+ with open(photo_path, 'wb') as f:
+ for chunk in response.iter_content(chunk_size=8192):
+ f.write(chunk)
+
+ # Convert photo to video
+ video_path = self.config.temp_dir_path / f"{temp_vid_id}_img.mp4"
+ temp_files.append(video_path)
+
+ FFmpegUtils.image_to_video(photo_path, video_path, audio_duration)
+ video_found = True
+ logger.info(f"Created video from photo {pexels_photo['id']}")
+
+ except Exception as e:
+ logger.error(f"Photo fallback failed: {e}")
+
+ if not video_found or not video_path or not video_path.exists():
+ raise Exception(f"Failed to find any visual content for '{keyword}'")
+
+ # Get actual duration (whether video or image-video)
+ vid_duration = FFmpegUtils.get_video_duration(video_path)
+
+ # Determine cut duration
+ take_duration = min(vid_duration, audio_duration)
+
+ logger.info(f"Using {take_duration:.2f}s of content for scene (Audio: {audio_duration:.2f}s)")
+
+ # Physically cut/trim to ensure exact match
+ final_clip_path = self.config.temp_dir_path / f"{temp_vid_id}_cut.mp4"
+ temp_files.append(final_clip_path)
+
+ FFmpegUtils.cut_video(video_path, final_clip_path, 0, take_duration)
+
+ # Verify actual cut duration
+ actual_cut_dur = FFmpegUtils.get_video_duration(final_clip_path)
+
+ video_paths.append({
+ "path": str(final_clip_path),
+ "duration": actual_cut_dur,
+ "keyword": keyword
+ })
+
+ # Build scene dict
+ scenes.append({
+ "captions": [c.dict() for c in captions],
+ "video": video_paths,
+ "audio": {
+ "url": str(mp3_path),
+ "duration": audio_duration
+ }
+ })
+
+ total_duration += audio_duration
+
+ # Add padding to total duration
+ if config.paddingBack:
+ total_duration += config.paddingBack / 1000
+
+ # Select background music
+ music_mood = config.music.value if config.music else None
+ selected_music = self.music_manager.find_music(music_mood)
+
+ logger.info(f"Selected music: {selected_music['filename']} (mood: {selected_music['mood']})")
+
+ # Render final video
+ output_path = self.config.videos_dir_path / f"{video_id}.mp4"
+ # Use a temp path for atomic write to prevent premature "ready" status
+ temp_output_path = self.config.videos_dir_path / f"{video_id}.tmp.mp4"
+
+ try:
+ VideoComposer.render(
+ scenes=scenes,
+ music_path=selected_music["path"],
+ output_path=temp_output_path,
+ orientation=orientation,
+ caption_position=config.captionPosition.value,
+ caption_bg_color=config.captionBackgroundColor,
+ music_volume=config.musicVolume.value,
+ padding_back=config.paddingBack
+ )
+
+ # Atomic rename to final path
+ if temp_output_path.exists():
+ temp_output_path.rename(output_path)
+ logger.info(f"Video {video_id} created successfully at {output_path}")
+ else:
+ raise Exception("Rendered file not found at temp path")
+
+ except Exception as e:
+ # Cleanup temp file on failure
+ if temp_output_path.exists():
+ temp_output_path.unlink()
+ raise e
+
+ # Cleanup temp files
+ for temp_file in temp_files:
+ if temp_file.exists():
+ temp_file.unlink()
+
+ def get_status(self, video_id: str) -> VideoStatus:
+ """Get video processing status"""
+ # Check if in queue (waiting or being processed)
+ if any(item["id"] == video_id for item in self.queue):
+ return VideoStatus.processing
+
+ # Check if final video exists (READY)
+ video_path = self.config.videos_dir_path / f"{video_id}.mp4"
+ if video_path.exists():
+ return VideoStatus.ready
+
+ # Check if temp file exists (still rendering = PROCESSING)
+ temp_path = self.config.videos_dir_path / f"{video_id}.tmp.mp4"
+ if temp_path.exists():
+ return VideoStatus.processing
+
+ # Check if failed marker exists
+ failed_marker = self.config.videos_dir_path / f"{video_id}.failed"
+ if failed_marker.exists():
+ return VideoStatus.failed
+
+ # If processing flag is active but video not found, it might be in early stages
+ if self.processing:
+ return VideoStatus.processing
+
+ # Video not found at all
+ return VideoStatus.failed
+
+ def get_video_path(self, video_id: str) -> Path:
+ """Get path to video file"""
+ return self.config.videos_dir_path / f"{video_id}.mp4"
+
+ def delete_video(self, video_id: str):
+ """Delete video file"""
+ video_path = self.get_video_path(video_id)
+ if video_path.exists():
+ video_path.unlink()
+ logger.info(f"Deleted video {video_id}")
+
+ def list_all_videos(self) -> List[Dict]:
+ """List all videos with their status"""
+ videos = []
+
+ # Get all MP4 files (exclude temp files)
+ for video_file in self.config.videos_dir_path.glob("*.mp4"):
+ # Skip temp files (*.tmp.mp4)
+ if ".tmp." in video_file.name:
+ continue
+ video_id = video_file.stem
+ videos.append({
+ "id": video_id,
+ "status": self.get_status(video_id).value
+ })
+
+ # Add videos in queue
+ for item in self.queue:
+ if not any(v["id"] == item["id"] for v in videos):
+ videos.append({
+ "id": item["id"],
+ "status": VideoStatus.processing.value
+ })
+
+ return videos
+
+ def get_available_voices(self) -> List[str]:
+ """Get list of available TTS voices"""
+ return TTSClient.list_available_voices()
+
+ def _plan_segments(self, duration: float) -> List[float]:
+ """
+ Deterministic segmentation algorithm (Even Split Strategy):
+ - Segments between 2-5 seconds
+ - Avoid 1-second clips
+ - Sum exactly equals duration
+ - Distribute duration evenly to maximize segment length
+ """
+ if duration <= 5.0:
+ return [duration]
+
+ # Calculate optimal number of segments
+ # We want segments as close to 5.0 as possible, but >= 2.0
+ num_segments = int(duration / 5.0)
+ if duration % 5.0 > 0:
+ num_segments += 1
+
+ segment_duration = duration / num_segments
+
+ # Create list of equal segments
+ segments = [segment_duration] * num_segments
+
+ # Handle floating point precision errors
+ current_sum = sum(segments)
+ diff = duration - current_sum
+ if abs(diff) > 0.0001:
+ segments[-1] += diff
+
+ return segments
+
+ def get_available_music_tags(self) -> List[str]:
+ """Get list of available music moods"""
+ return self.music_manager.get_available_moods()
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e8cef854bdd45ef4239e3d83d831e802a0f51bb0
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,23 @@
+# Required Python packages for NCAkit
+
+# Core Framework
+fastapi
+uvicorn[standard]
+pydantic
+pydantic-settings
+
+# HTTP & Async
+requests
+aiohttp
+
+# Video Processing
+moviepy==1.0.3
+pydub
+Pillow<10.0.0
+numpy<2.0.0
+
+# AI/ML
+faster-whisper
+
+# Utilities
+python-multipart
diff --git a/static/app.js b/static/app.js
new file mode 100644
index 0000000000000000000000000000000000000000..67405f8ac211e2a28202994dc953c56a30f36d76
--- /dev/null
+++ b/static/app.js
@@ -0,0 +1,177 @@
+// API base URL
+const API_BASE = '';
+
+// DOM elements
+const videoForm = document.getElementById('videoForm');
+const scenesContainer = document.getElementById('scenesContainer');
+const addSceneBtn = document.getElementById('addScene');
+const statusDiv = document.getElementById('status');
+const videosListDiv = document.getElementById('videosList');
+
+let sceneCount = 1;
+
+// Add new scene
+addSceneBtn.addEventListener('click', () => {
+ sceneCount++;
+ const sceneDiv = document.createElement('div');
+ sceneDiv.className = 'scene';
+ sceneDiv.innerHTML = `
+
No videos yet. Create one above!
'; + return; + } + + videosListDiv.innerHTML = data.videos.map(video => ` +Error loading videos
'; + } +} + +// Delete video +async function deleteVideo(videoId) { + if (!confirm('Are you sure you want to delete this video?')) { + return; + } + + try { + await fetch(`${API_BASE}/api/short-video/${videoId}`, { + method: 'DELETE' + }); + loadVideos(); + } catch (error) { + alert('Error deleting video'); + } +} + +// Load videos on page load +loadVideos(); + +// Auto-refresh videos list every 10 seconds +setInterval(loadVideos, 10000); diff --git a/static/fonts/TheBoldFont.ttf b/static/fonts/TheBoldFont.ttf new file mode 100644 index 0000000000000000000000000000000000000000..e790788d68d32db53f06a52f2185284b258db76c Binary files /dev/null and b/static/fonts/TheBoldFont.ttf differ diff --git a/static/index.html b/static/index.html new file mode 100644 index 0000000000000000000000000000000000000000..5cb733ec6adc3f5922df9c31492710138021da9f --- /dev/null +++ b/static/index.html @@ -0,0 +1,568 @@ + + + + + + +Neural Content Automation Toolkit
++ AI generates script → TTS → Character images → Final video +
+ + + + ++ Scene-based video with TTS, captions, and Pexels backgrounds +
+ + + + +