diff --git a/.env.example b/.env.example new file mode 100644 index 0000000000000000000000000000000000000000..9e9e31b4f7d7542fb228634f6026fa489b2f5950 --- /dev/null +++ b/.env.example @@ -0,0 +1,57 @@ +# NCAkit Environment Configuration +# Copy this file to .env and fill in your values + +# =================== +# Video Creator Module +# =================== + +# Pexels API key for background videos (Required) +# Get from: https://www.pexels.com/api/ +PEXELS_API_KEY=your_pexels_api_key_here + +# Kokoro TTS endpoint URL (Required) +# Example: https://your-username-kokoro-tts.hf.space +HF_TTS=https://your-tts-endpoint.hf.space + +# Whisper model for captions (Optional, default: tiny.en) +# Options: tiny.en, base.en, small.en, medium.en, large +WHISPER_MODEL=tiny.en + +# =================== +# Server Configuration +# =================== + +# Server port (Optional, default: 8880) +PORT=8880 + +# Log level (Optional, default: info) +# Options: debug, info, warning, error +LOG_LEVEL=info + +# Running in Docker? (Optional, default: false) +DOCKER=false + +# Custom data directory (Optional) +# DATA_DIR_PATH=/path/to/data + +# =================== +# Add new module configs below +# =================== + +# =================== +# Story Reels Module (Image Generation) +# =================== + +# NVIDIA API Key (PRIMARY - stable-diffusion-3-medium) +# Get from: https://build.nvidia.com/ +NVIDIA_API_KEY=nvapi-your_key_here + +# Cloudflare Worker URL (FALLBACK) +CF_URL=https://image-api.yourworker.workers.dev + +# Cloudflare API Key (FALLBACK) +CF_API=your_api_key_here + +# Gemini API Key (Required for AI script generation) +# Get from: https://aistudio.google.com/apikey +GEMINI_API_KEY=your_gemini_api_key_here diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..6b9df027c422c392e807f993b1545166115fb074 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +*.mp3 filter=lfs diff=lfs merge=lfs -text diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..ede1185bbc69e98baa018eda640337fae7c7d387 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,41 @@ +# NCAkit Docker Configuration for Hugging Face Spaces +FROM python:3.11-slim + +# Install system dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + ffmpeg \ + libsndfile1 \ + git \ + && rm -rf /var/lib/apt/lists/* + +# Create non-root user for HF Spaces +RUN useradd -m -u 1000 user +USER user +ENV HOME=/home/user \ + PATH=/home/user/.local/bin:$PATH + +# Set working directory +WORKDIR $HOME/app + +# Copy requirements first for caching +COPY --chown=user requirements.txt . +RUN pip install --no-cache-dir --user -r requirements.txt + +# Copy application code +COPY --chown=user . . + +# Create data directories +RUN mkdir -p $HOME/app/data $HOME/app/videos $HOME/app/temp + +# Environment for HF Spaces +ENV DOCKER=true +ENV PORT=8880 +ENV LOG_LEVEL=info +ENV DATA_DIR=$HOME/app/data +ENV VIDEOS_DIR=$HOME/app/videos + +# Expose REST API port +EXPOSE 8880 + +# Run the application +CMD ["python", "app.py"] diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..382d14caf01122abf304ea2447fe463ba1d24fa2 --- /dev/null +++ b/README.md @@ -0,0 +1,110 @@ +# NCAkit - Neural Content Automation Toolkit 🤖 + +A modular Python toolkit for content automation, featuring video creation, text-to-speech, and more. + +## ✨ Features + +- 🎬 **Video Creator** - Short-form videos with TTS, captions, and music +- 🔌 **Modular Architecture** - Easy to add new features +- 🌐 **REST API** - FastAPI with auto-generated docs +- 🚀 **Ready for Deployment** - Docker & Hugging Face Spaces + +## 🏗️ Project Structure + +``` +NCAkit/ +├── app.py # Main FastAPI application +├── config.py # Unified configuration +├── requirements.txt # All dependencies +├── Dockerfile # Docker deployment +│ +├── core/ # Shared infrastructure +│ ├── module_registry.py # Auto module discovery +│ └── utils/ # Shared utilities +│ +├── modules/ # Feature modules +│ ├── video_creator/ # Video creation module +│ │ ├── router.py # API endpoints +│ │ ├── schemas.py # Pydantic models +│ │ └── services/ # Core logic +│ └── _template/ # Template for new modules +│ +└── static/ # Web UI & assets +``` + +## 🚀 Quick Start + +### Install + +```bash +cd NCAkit +pip install -r requirements.txt +``` + +### Configure + +```bash +cp .env.example .env +# Edit .env with your API keys +``` + +### Run + +```bash +python app.py +# Or: uvicorn app:app --host 0.0.0.0 --port 8880 --reload +``` + +### Access + +- **Web UI**: http://localhost:8880 +- **API Docs**: http://localhost:8880/docs +- **Modules**: http://localhost:8880/api/modules + +## 📡 API Endpoints + +| Module | Endpoint | Method | Description | +|--------|----------|--------|-------------| +| System | `/health` | GET | Health check | +| System | `/api/modules` | GET | List modules | +| Video | `/api/video/short-video` | POST | Create video | +| Video | `/api/video/short-video/{id}/status` | GET | Check status | +| Video | `/api/video/short-video/{id}` | GET | Download video | + +## 🔧 Adding New Modules + +1. Copy `modules/_template/` to `modules/your_module/` +2. Update `MODULE_NAME`, `MODULE_PREFIX` in `__init__.py` +3. Implement router and services +4. Restart server - auto-discovered! + +```python +# modules/your_module/__init__.py +MODULE_NAME = "your_module" +MODULE_PREFIX = "/api/your-feature" + +def register(app, config): + from .router import router + app.include_router(router, prefix=MODULE_PREFIX) +``` + +## 🐳 Docker + +```bash +docker build -t ncakit . +docker run -p 8880:8880 --env-file .env ncakit +``` + +## ⚙️ Environment Variables + +| Variable | Required | Default | Module | +|----------|----------|---------|--------| +| `PEXELS_API_KEY` | ✅ | - | Video Creator | +| `HF_TTS` | ✅ | - | Video Creator | +| `WHISPER_MODEL` | ❌ | tiny.en | Video Creator | +| `PORT` | ❌ | 8880 | Server | +| `LOG_LEVEL` | ❌ | info | Server | + +## 📄 License + +MIT diff --git a/__pycache__/app.cpython-313.pyc b/__pycache__/app.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6bb190d01c17ed83c4fab9df715209c619f62707 Binary files /dev/null and b/__pycache__/app.cpython-313.pyc differ diff --git a/__pycache__/config.cpython-313.pyc b/__pycache__/config.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3bbb5078776ea190cfe10b8fe29c9a340e0e684f Binary files /dev/null and b/__pycache__/config.cpython-313.pyc differ diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..8ab92e8c97381c8783c4bd79a1e419b977aa5567 --- /dev/null +++ b/app.py @@ -0,0 +1,115 @@ +""" +NCAkit - Neural Content Automation Toolkit +Main FastAPI Application with Modular Architecture +""" +from fastapi import FastAPI +from fastapi.middleware.cors import CORSMiddleware +from fastapi.staticfiles import StaticFiles +from fastapi.responses import FileResponse +import logging +from pathlib import Path +import sys + +from config import config +from core.module_registry import registry + +# Setup logging +logging.basicConfig( + level=config.log_level.upper(), + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + handlers=[logging.StreamHandler(sys.stdout)] +) +logger = logging.getLogger(__name__) + +# Create FastAPI app +app = FastAPI( + title="NCAkit - Neural Content Automation Toolkit", + description=""" + # NCAkit REST API + + A modular toolkit for content automation with multiple feature modules. + + ## Available Modules + + - 🎬 **Video Creator** - Create short-form videos with TTS, captions, and music + - 📱 More modules coming soon... + + ## How It Works + + 1. Each module has its own API prefix (e.g., `/api/video/`) + 2. Modules are auto-discovered and registered on startup + 3. Check `/api/modules` for list of available modules + """, + version="1.0.0", + contact={ + "name": "NCAkit", + "url": "https://github.com/your-repo/ncakit" + } +) + +# Add CORS middleware +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + + +@app.on_event("startup") +async def startup_event(): + """Initialize all modules on startup""" + logger.info("Starting NCAkit...") + + # Ensure directories exist + config.ensure_directories() + + # Register all modules + num_modules = registry.register_all(app, config) + logger.info(f"Loaded {num_modules} module(s)") + + logger.info(f"NCAkit started successfully on port {config.port}") + + +@app.get("/health", tags=["System"]) +async def health_check(): + """Health check endpoint""" + return {"status": "ok", "toolkit": "ncakit"} + + +@app.get("/api/modules", tags=["System"]) +async def list_modules(): + """List all available modules""" + return { + "modules": registry.list_modules() + } + + +@app.get("/") +async def read_root(): + """Serve the web UI""" + static_path = Path(__file__).parent / "static" / "index.html" + if static_path.exists(): + return FileResponse(static_path) + return { + "message": "NCAkit - Neural Content Automation Toolkit", + "docs": "/docs", + "modules": "/api/modules" + } + + +# Mount static files if they exist +static_dir = Path(__file__).parent / "static" +if static_dir.exists(): + app.mount("/static", StaticFiles(directory=str(static_dir)), name="static") + + +if __name__ == "__main__": + import uvicorn + uvicorn.run( + "app:app", + host="0.0.0.0", + port=config.port, + log_level=config.log_level.lower() + ) diff --git a/config.py b/config.py new file mode 100644 index 0000000000000000000000000000000000000000..9bb50f30201bed27781cdef941fc1dd02632d99b --- /dev/null +++ b/config.py @@ -0,0 +1,115 @@ +""" +Base Configuration for NCAkit +Provides centralized configuration management for all modules. +""" +import os +from pathlib import Path +from pydantic_settings import BaseSettings +from typing import Optional, Dict, Any + + +class BaseConfig(BaseSettings): + """ + Base configuration class that all module configs should extend. + Provides common settings and utilities. + """ + + # Server Configuration + port: int = 8880 + log_level: str = "info" + debug: bool = False + + # Environment + docker: bool = False + dev: bool = False + data_dir_path: Optional[str] = None + + class Config: + env_file = ".env" + case_sensitive = False + extra = "ignore" + + @property + def base_data_dir(self) -> Path: + """Get the base data directory path""" + if self.data_dir_path: + return Path(self.data_dir_path) + + if self.docker: + return Path("/data") + + # For local development + home = Path.home() + return home / ".ncakit" + + def ensure_base_directories(self): + """Ensure base directories exist""" + self.base_data_dir.mkdir(parents=True, exist_ok=True) + + +class NCAkitConfig(BaseConfig): + """ + Main NCAkit configuration. + Aggregates all module-specific settings. + """ + + # =================== + # Video Creator Module Config + # =================== + pexels_api_key: Optional[str] = None + hf_tts: Optional[str] = None + whisper_model: str = "tiny.en" + whisper_verbose: bool = False + concurrency: int = 1 + video_cache_size_in_bytes: int = 2684354560 # 2.5GB + + # =================== + # Add new module configs here + # Example: + # openai_api_key: Optional[str] = None + # =================== + + # =================== + # Story Reels Module Config + # =================== + nvidia_api_key: Optional[str] = None # NVIDIA API key (primary) + cf_url: Optional[str] = None # Cloudflare Worker URL (fallback) + cf_api: Optional[str] = None # Cloudflare API key (fallback) + gemini_api_key: Optional[str] = None # For AI script generation + + @property + def videos_dir_path(self) -> Path: + """Directory for storing generated videos""" + path = self.base_data_dir / "videos" + path.mkdir(parents=True, exist_ok=True) + return path + + @property + def temp_dir_path(self) -> Path: + """Directory for temporary files""" + path = self.base_data_dir / "temp" + path.mkdir(parents=True, exist_ok=True) + return path + + @property + def whisper_model_dir(self) -> Path: + """Directory for Whisper models""" + path = self.base_data_dir / "whisper_models" + path.mkdir(parents=True, exist_ok=True) + return path + + @property + def music_dir_path(self) -> Path: + """Directory for music files""" + return Path(__file__).parent / "static" / "music" + + def ensure_directories(self): + """Ensure all required directories exist""" + self.ensure_base_directories() + self.videos_dir_path.mkdir(parents=True, exist_ok=True) + self.temp_dir_path.mkdir(parents=True, exist_ok=True) + self.whisper_model_dir.mkdir(parents=True, exist_ok=True) + + +# Global config instance +config = NCAkitConfig() diff --git a/core/__init__.py b/core/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b8931f5e8489dab08686d638f2752a43697f9c03 --- /dev/null +++ b/core/__init__.py @@ -0,0 +1 @@ +# NCAkit - Neural Content Automation Toolkit diff --git a/core/__pycache__/module_registry.cpython-313.pyc b/core/__pycache__/module_registry.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..77504862dca3df8467448e15f813a663ece8beec Binary files /dev/null and b/core/__pycache__/module_registry.cpython-313.pyc differ diff --git a/core/module_registry.py b/core/module_registry.py new file mode 100644 index 0000000000000000000000000000000000000000..d1802efb600c9e2a10c0c8cc34c3ac18eaf8f07d --- /dev/null +++ b/core/module_registry.py @@ -0,0 +1,145 @@ +""" +Module Registry for NCAkit +Handles automatic discovery and registration of feature modules. +""" +import importlib +import pkgutil +import logging +from pathlib import Path +from typing import List, Dict, Any, Callable +from fastapi import FastAPI + +logger = logging.getLogger(__name__) + + +class ModuleInfo: + """Information about a registered module""" + def __init__( + self, + name: str, + prefix: str, + description: str = "", + register_fn: Callable = None + ): + self.name = name + self.prefix = prefix + self.description = description + self.register_fn = register_fn + + +class ModuleRegistry: + """ + Centralized registry for all NCAkit modules. + + Each module must have an __init__.py with: + - MODULE_NAME: str + - MODULE_PREFIX: str + - MODULE_DESCRIPTION: str (optional) + - register(app, config): function + """ + + def __init__(self): + self._modules: Dict[str, ModuleInfo] = {} + self._initialized: bool = False + + def discover_modules(self, modules_package: str = "modules") -> List[str]: + """ + Discover all available modules in the modules package. + Returns list of module names. + """ + discovered = [] + + try: + package = importlib.import_module(modules_package) + package_path = Path(package.__file__).parent + + for finder, name, is_pkg in pkgutil.iter_modules([str(package_path)]): + # Skip private/template modules + if name.startswith('_'): + continue + + if is_pkg: + discovered.append(name) + logger.debug(f"Discovered module: {name}") + + except Exception as e: + logger.error(f"Error discovering modules: {e}") + + return discovered + + def load_module(self, module_name: str, modules_package: str = "modules") -> ModuleInfo | None: + """Load a single module and return its info""" + try: + full_module_name = f"{modules_package}.{module_name}" + module = importlib.import_module(full_module_name) + + # Check required attributes + if not hasattr(module, 'register'): + logger.warning(f"Module {module_name} has no register function, skipping") + return None + + # Get module metadata + name = getattr(module, 'MODULE_NAME', module_name) + prefix = getattr(module, 'MODULE_PREFIX', f"/api/{module_name}") + description = getattr(module, 'MODULE_DESCRIPTION', "") + + info = ModuleInfo( + name=name, + prefix=prefix, + description=description, + register_fn=module.register + ) + + self._modules[name] = info + logger.info(f"Loaded module: {name} (prefix: {prefix})") + return info + + except Exception as e: + logger.error(f"Failed to load module {module_name}: {e}") + return None + + def register_all(self, app: FastAPI, config: Any) -> int: + """ + Register all discovered modules with the FastAPI app. + Returns number of successfully registered modules. + """ + if self._initialized: + logger.warning("Modules already initialized") + return len(self._modules) + + # Discover modules + module_names = self.discover_modules() + + registered = 0 + for name in module_names: + info = self.load_module(name) + if info and info.register_fn: + try: + info.register_fn(app, config) + registered += 1 + logger.info(f"Registered module: {info.name}") + except Exception as e: + logger.error(f"Failed to register module {name}: {e}") + + self._initialized = True + logger.info(f"Registered {registered}/{len(module_names)} modules") + return registered + + def get_module(self, name: str) -> ModuleInfo | None: + """Get info about a specific module""" + return self._modules.get(name) + + def list_modules(self) -> List[Dict[str, str]]: + """List all registered modules""" + return [ + { + "name": info.name, + "prefix": info.prefix, + "description": info.description + } + for info in self._modules.values() + ] + + +# Global registry instance +registry = ModuleRegistry() diff --git a/core/utils/__init__.py b/core/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6a414d72dab8f400f2768aabfe6bda3edf03d4be --- /dev/null +++ b/core/utils/__init__.py @@ -0,0 +1 @@ +# Core Utilities diff --git a/modules/__init__.py b/modules/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9b58ce0610cb5363ffde27f98a05ba0c5cb044e4 --- /dev/null +++ b/modules/__init__.py @@ -0,0 +1 @@ +# NCAkit Modules diff --git a/modules/_template/__init__.py b/modules/_template/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..27b1ced60214e5de65895b762add7f370d4203d4 --- /dev/null +++ b/modules/_template/__init__.py @@ -0,0 +1,2 @@ +# Module Template - DO NOT USE DIRECTLY +# Copy this folder to create a new module diff --git a/modules/_template/module.py b/modules/_template/module.py new file mode 100644 index 0000000000000000000000000000000000000000..746c866887303b21fac83cf85236b8c20a9c2a1e --- /dev/null +++ b/modules/_template/module.py @@ -0,0 +1,38 @@ +""" +Module Template for NCAkit +Copy this folder and rename to create a new module. + +Usage: +1. Copy _template folder to modules/your_module_name/ +2. Update MODULE_NAME, MODULE_PREFIX, MODULE_DESCRIPTION +3. Implement your router and services +4. The module will be auto-discovered on startup +""" +from fastapi import FastAPI + +# =================== +# Module Metadata +# =================== +MODULE_NAME = "template" +MODULE_PREFIX = "/api/template" +MODULE_DESCRIPTION = "Template module - copy and modify for your feature" + + +def register(app: FastAPI, config): + """ + Register this module with the main FastAPI app. + Called automatically by module_registry. + + Args: + app: FastAPI application instance + config: NCAkitConfig instance with all settings + """ + from .router import router + + # You can initialize services here and attach to app.state + # Example: + # from .services import MyService + # app.state.my_service = MyService(config) + + # Register the router + app.include_router(router, prefix=MODULE_PREFIX, tags=[MODULE_NAME]) diff --git a/modules/_template/router.py b/modules/_template/router.py new file mode 100644 index 0000000000000000000000000000000000000000..1fc889918e270d04c53541031fde802b9e8ab8bf --- /dev/null +++ b/modules/_template/router.py @@ -0,0 +1,24 @@ +""" +Template Router - Define your API endpoints here +""" +from fastapi import APIRouter + +router = APIRouter() + + +@router.get("/") +async def template_root(): + """Example endpoint - replace with your implementation""" + return {"message": "Template module is working!"} + + +@router.get("/example") +async def example_endpoint(): + """Another example endpoint""" + return {"data": "This is example data"} + + +# Add more endpoints as needed +# @router.post("/create") +# async def create_something(request: YourRequestModel): +# ... diff --git a/modules/_template/schemas.py b/modules/_template/schemas.py new file mode 100644 index 0000000000000000000000000000000000000000..71bb1cc286194198f2fd4d0f116a3e6baf128255 --- /dev/null +++ b/modules/_template/schemas.py @@ -0,0 +1,20 @@ +""" +Template Schemas - Define your Pydantic models here +""" +from pydantic import BaseModel, Field +from typing import Optional, List + + +class ExampleRequest(BaseModel): + """Example request model""" + name: str = Field(..., description="Name field") + value: Optional[int] = Field(None, description="Optional value") + + +class ExampleResponse(BaseModel): + """Example response model""" + success: bool + data: dict + + +# Add more models as needed diff --git a/modules/story_reels/__init__.py b/modules/story_reels/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0503a505494296b7edb58f10c7abd46b0b50b8ff --- /dev/null +++ b/modules/story_reels/__init__.py @@ -0,0 +1,97 @@ +""" +Story Reels Module for NCAkit +Character-consistent story video generation using Cloudflare AI. +""" +from fastapi import FastAPI +import logging + +# Module Metadata +MODULE_NAME = "story_reels" +MODULE_PREFIX = "/api/story" +MODULE_DESCRIPTION = "Generate character-consistent story videos from text scripts" + +logger = logging.getLogger(__name__) + + +def register(app: FastAPI, config): + """ + Register the story reels module with FastAPI. + Initializes all services and adds routes. + """ + from .router import router, set_story_creator + from .services.cloudflare_client import CloudflareClient + from .services.script_generator import ScriptGenerator + from .services.story_creator import StoryCreator + + logger.info("Registering story_reels module...") + + # Validate configs + + if not config.gemini_api_key: + logger.warning("GEMINI_API_KEY missing! AI script generation will fail.") + + # Reuse TTS client from video_creator if available + tts_client = getattr(app.state, 'tts_client', None) + whisper_client = getattr(app.state, 'whisper_client', None) + + # If video_creator not loaded, initialize our own clients + if not tts_client: + logger.info("Initializing TTS client for story_reels...") + from modules.video_creator.services.libraries.tts_client import TTSClient + tts_client = TTSClient(config.hf_tts) + app.state.tts_client = tts_client + + if not whisper_client: + logger.info("Initializing Whisper client for story_reels...") + from modules.video_creator.services.libraries.whisper_client import WhisperClient + whisper_client = WhisperClient( + model_name=config.whisper_model, + model_dir=config.whisper_model_dir + ) + app.state.whisper_client = whisper_client + + # Initialize Script Generator (Gemini) + logger.info("Initializing script generator (Gemini)...") + script_generator = ScriptGenerator(config.gemini_api_key or "") + + # Initialize NVIDIA client (PRIMARY) + nvidia_client = None + if config.nvidia_api_key: + logger.info("Initializing NVIDIA client (primary)...") + from .services.nvidia_client import NvidiaClient + nvidia_client = NvidiaClient(config.nvidia_api_key) + else: + logger.warning("NVIDIA_API_KEY missing! Using Cloudflare only.") + + # Initialize Cloudflare client (FALLBACK) + cloudflare_client = None + if config.cf_url and config.cf_api: + logger.info("Initializing Cloudflare client (fallback)...") + cloudflare_client = CloudflareClient( + api_url=config.cf_url, + api_key=config.cf_api + ) + else: + logger.warning("CF_URL or CF_API missing! No fallback available.") + + # Initialize story creator + logger.info("Initializing story creator...") + story_creator = StoryCreator( + config=config, + tts_client=tts_client, + whisper_client=whisper_client, + nvidia_client=nvidia_client, + cloudflare_client=cloudflare_client, + script_generator=script_generator + ) + + # Set the global story creator in the router + set_story_creator(story_creator) + + # Store in app state + app.state.story_creator = story_creator + + # Register routes + app.include_router(router, prefix=MODULE_PREFIX, tags=["Story Reels"]) + + logger.info("story_reels module registered successfully!") diff --git a/modules/story_reels/__pycache__/__init__.cpython-313.pyc b/modules/story_reels/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6b28cd695cd0b8946bf0761f9e557a7d9281f23b Binary files /dev/null and b/modules/story_reels/__pycache__/__init__.cpython-313.pyc differ diff --git a/modules/story_reels/__pycache__/router.cpython-313.pyc b/modules/story_reels/__pycache__/router.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..666b2d2af10e8bdeb09447f8c8497020dcdc232a Binary files /dev/null and b/modules/story_reels/__pycache__/router.cpython-313.pyc differ diff --git a/modules/story_reels/__pycache__/schemas.cpython-313.pyc b/modules/story_reels/__pycache__/schemas.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1fd6051995a4b40e28645aec1cca9dbdc0eab005 Binary files /dev/null and b/modules/story_reels/__pycache__/schemas.cpython-313.pyc differ diff --git a/modules/story_reels/router.py b/modules/story_reels/router.py new file mode 100644 index 0000000000000000000000000000000000000000..2d0e6de5c2ca9dbb1433e08f78c7037b5ad73d5d --- /dev/null +++ b/modules/story_reels/router.py @@ -0,0 +1,117 @@ +""" +Story Reels Router - API Endpoints +""" +from fastapi import APIRouter, HTTPException +from fastapi.responses import FileResponse +import logging + +from .schemas import ( + GenerateVideoRequest, + GenerateVideoResponse, + VideoStatusResponse, + PreviewResponse, + JobStatus +) +from .services.story_creator import StoryCreator + +logger = logging.getLogger(__name__) + +# Will be set during module registration +story_creator: StoryCreator = None + + +def set_story_creator(creator: StoryCreator): + """Set the global story creator instance""" + global story_creator + story_creator = creator + + +router = APIRouter() + + +@router.post("/generate", + response_model=GenerateVideoResponse, + status_code=201, + summary="Generate story video", + description="Generate a character-consistent story video from script" +) +async def generate_video(request: GenerateVideoRequest): + """ + Main video generation endpoint. + + - Converts script to speech (TTS) + - Generates captions (Whisper) + - Creates character-consistent images (Cloudflare) + - Composes final video (MoviePy) + """ + try: + logger.info(f"Generating video for topic: {request.topic}") + + job_id = story_creator.add_to_queue( + topic=request.topic, + script=request.script, + character_profile=request.character_profile, + voice=request.voice + ) + + return GenerateVideoResponse( + job_id=job_id, + status=JobStatus.queued, + message="Video generation started" + ) + + except Exception as e: + logger.error(f"Error starting generation: {e}", exc_info=True) + raise HTTPException(status_code=400, detail=str(e)) + + +@router.get("/status/{job_id}", + response_model=VideoStatusResponse, + summary="Get job status", + description="Check the processing status of a video generation job" +) +async def get_status(job_id: str): + """Get video generation status""" + status = story_creator.get_status(job_id) + return VideoStatusResponse(**status) + + +@router.get("/preview/{job_id}/{scene_id}", + response_model=PreviewResponse, + summary="Get scene preview", + description="Get preview of a generated scene image" +) +async def get_preview(job_id: str, scene_id: int): + """Get scene preview""" + scene = story_creator.get_preview(job_id, scene_id) + + if not scene: + raise HTTPException(status_code=404, detail="Scene not found") + + return PreviewResponse( + scene_id=scene["scene_id"], + image_url=scene["image_path"], + prompt=scene["prompt"] + ) + + +@router.get("/download/{job_id}", + summary="Download video", + description="Download the generated video file", + responses={ + 200: {"description": "Video file", "content": {"video/mp4": {}}}, + 404: {"description": "Video not found"} + } +) +async def download_video(job_id: str): + """Download generated video""" + video_path = story_creator.get_video_path(job_id) + + if not video_path or not video_path.exists(): + raise HTTPException(status_code=404, detail="Video not found") + + return FileResponse( + video_path, + media_type="video/mp4", + filename=f"story_{job_id}.mp4" + ) diff --git a/modules/story_reels/schemas.py b/modules/story_reels/schemas.py new file mode 100644 index 0000000000000000000000000000000000000000..678dd5a77c8bb5c64a86625e704ccdc8ddc5cefe --- /dev/null +++ b/modules/story_reels/schemas.py @@ -0,0 +1,114 @@ +""" +Story Reels Pydantic Schemas +Character-consistent video generation from text scripts +""" +from pydantic import BaseModel, Field +from typing import List, Optional +from enum import Enum + + +class StyleEnum(str, Enum): + """Available image styles""" + semi_realistic = "semi-realistic" + anime = "anime" + cartoon = "cartoon" + realistic = "realistic" + watercolor = "watercolor" + + +class CameraEnum(str, Enum): + """Camera shot types""" + close_up = "close-up" + medium = "medium shot" + wide = "wide shot" + side = "side view" + front = "front view" + + +class JobStatus(str, Enum): + """Job processing status""" + queued = "queued" + processing = "processing" + generating_audio = "generating_audio" + generating_images = "generating_images" + composing_video = "composing_video" + ready = "ready" + failed = "failed" + + +# =================== +# Character Profile +# =================== + +class CharacterProfile(BaseModel): + """Character definition for consistency""" + name: str = Field(..., description="Character name") + age: str = Field("25", description="Character age") + gender: str = Field("male", description="male/female") + hair: str = Field("short black hair", description="Hair description") + skin: str = Field("light brown", description="Skin tone") + face: str = Field("", description="Face features (optional)") + clothes: str = Field("casual clothes", description="Clothing description") + style: StyleEnum = Field(StyleEnum.semi_realistic, description="Art style") + seed: int = Field(432891, description="Fixed seed for consistency") + + +# =================== +# Scene +# =================== + +class SceneInput(BaseModel): + """Scene from script segment""" + scene_id: int + scene_text: str = Field(..., description="Scene description") + camera: CameraEnum = Field(CameraEnum.medium, description="Camera angle") + pose: str = Field("standing", description="Character pose") + lighting: str = Field("natural light", description="Lighting description") + duration: float = Field(4.0, description="Scene duration in seconds") + + +class GeneratedScene(BaseModel): + """Scene with generated content""" + scene_id: int + prompt: str + image_url: str + duration: float + + +# =================== +# API Request/Response +# =================== + +class GenerateVideoRequest(BaseModel): + """Main video generation request""" + topic: str = Field(..., description="Video topic/title") + script: str = Field("", description="Full story script (optional - auto-generated if empty)") + character_profile: Optional[CharacterProfile] = Field( + default=None, + description="Character profile for consistency (optional)" + ) + voice: str = Field("af_heart", description="TTS voice") + + +class GenerateVideoResponse(BaseModel): + """Response after starting generation""" + job_id: str + status: JobStatus = JobStatus.queued + message: str = "Video generation started" + + +class VideoStatusResponse(BaseModel): + """Job status response""" + job_id: str + status: JobStatus + progress: int = Field(0, description="Progress 0-100") + video_url: Optional[str] = None + duration: Optional[float] = None + error: Optional[str] = None + + +class PreviewResponse(BaseModel): + """Scene preview response""" + scene_id: int + image_url: str + prompt: str diff --git a/modules/story_reels/services/__init__.py b/modules/story_reels/services/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..eb87d350e8973a44ec3b0ed2b61e159b65a34407 --- /dev/null +++ b/modules/story_reels/services/__init__.py @@ -0,0 +1 @@ +# Story Reels Services diff --git a/modules/story_reels/services/__pycache__/cloudflare_client.cpython-313.pyc b/modules/story_reels/services/__pycache__/cloudflare_client.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6cde4e4c105711ce28fbed4918e4db8e571e3fc4 Binary files /dev/null and b/modules/story_reels/services/__pycache__/cloudflare_client.cpython-313.pyc differ diff --git a/modules/story_reels/services/__pycache__/nvidia_client.cpython-313.pyc b/modules/story_reels/services/__pycache__/nvidia_client.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..399d24b10c704611b06be3eb6b0b5873c25d3cb2 Binary files /dev/null and b/modules/story_reels/services/__pycache__/nvidia_client.cpython-313.pyc differ diff --git a/modules/story_reels/services/__pycache__/prompt_builder.cpython-313.pyc b/modules/story_reels/services/__pycache__/prompt_builder.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..abc92e840912ce5f90c42c9fddac2b558fdaf969 Binary files /dev/null and b/modules/story_reels/services/__pycache__/prompt_builder.cpython-313.pyc differ diff --git a/modules/story_reels/services/__pycache__/script_generator.cpython-313.pyc b/modules/story_reels/services/__pycache__/script_generator.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..971d7b7c3b18b0f242826a5fd29e44ceec47308c Binary files /dev/null and b/modules/story_reels/services/__pycache__/script_generator.cpython-313.pyc differ diff --git a/modules/story_reels/services/__pycache__/srt_parser.cpython-313.pyc b/modules/story_reels/services/__pycache__/srt_parser.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d73e777948a97916607f658582947a695fc0fa46 Binary files /dev/null and b/modules/story_reels/services/__pycache__/srt_parser.cpython-313.pyc differ diff --git a/modules/story_reels/services/__pycache__/story_creator.cpython-313.pyc b/modules/story_reels/services/__pycache__/story_creator.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3609ebb9bcb4fe8b9631b418aff01e55fa41d3c2 Binary files /dev/null and b/modules/story_reels/services/__pycache__/story_creator.cpython-313.pyc differ diff --git a/modules/story_reels/services/cloudflare_client.py b/modules/story_reels/services/cloudflare_client.py new file mode 100644 index 0000000000000000000000000000000000000000..7a05249c3e002ff6bf7e2abf90a6889052b7e356 --- /dev/null +++ b/modules/story_reels/services/cloudflare_client.py @@ -0,0 +1,198 @@ +""" +Cloudflare Workers AI Client +Text-to-image generation with character consistency +Uses custom Cloudflare Worker endpoint +""" +import logging +import time +import requests +from typing import Optional, List, Dict +from pathlib import Path + +logger = logging.getLogger(__name__) + + +class CloudflareClient: + """ + Client for Cloudflare Workers AI image generation. + Uses custom worker endpoint for image generation. + """ + + # Default model + DEFAULT_MODEL = "@cf/stabilityai/stable-diffusion-xl-base-1.0" + + def __init__(self, api_url: str, api_key: str): + """ + Initialize Cloudflare client. + + Args: + api_url: Custom Cloudflare Worker URL (CF_URL) + api_key: API key for authentication (CF_API) + """ + self.api_url = api_url + self.api_key = api_key + + def generate_image( + self, + prompt: str, + seed: Optional[int] = None, + width: int = 1080, + height: int = 1920, + quality: int = 90 + ) -> bytes: + """ + Generate image from prompt. + + Args: + prompt: Text prompt for image generation + seed: Fixed seed for reproducibility + width: Image width (9:16 portrait = 1080) + height: Image height (9:16 portrait = 1920) + quality: Image quality (1-100) + + Returns: + Image bytes (PNG format) + """ + headers = { + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json" + } + + payload = { + "prompt": prompt, + "model": self.DEFAULT_MODEL, + "width": width, + "height": height, + "format": "png", + "quality": quality, + "download": True + } + + # Add seed for consistency if provided + if seed is not None: + payload["seed"] = seed + + logger.debug(f"Generating image with prompt: {prompt[:100]}...") + + try: + response = requests.post( + self.api_url, + headers=headers, + json=payload, + timeout=120 + ) + response.raise_for_status() + + # Worker returns raw image bytes + return response.content + + except requests.exceptions.RequestException as e: + logger.error(f"Cloudflare API error: {e}") + if hasattr(e, 'response') and e.response is not None: + logger.error(f"Response: {e.response.text[:500]}") + raise Exception(f"Image generation failed: {e}") + + def generate_and_save( + self, + prompt: str, + output_path: Path, + seed: Optional[int] = None, + **kwargs + ) -> Path: + """Generate image and save to file""" + image_bytes = self.generate_image(prompt, seed=seed, **kwargs) + + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_bytes(image_bytes) + + logger.info(f"Saved image to {output_path}") + return output_path + + @staticmethod + def test_connection(api_url: str, api_key: str) -> bool: + """Test API connection""" + try: + client = CloudflareClient(api_url, api_key) + client.generate_image("test", width=256, height=256) + return True + except Exception as e: + logger.error(f"Connection test failed: {e}") + return False + + def generate_batch( + self, + prompts: List[tuple], + output_dir: Path, + seed: Optional[int] = None, + batch_size: int = 5, + delay_seconds: float = 1.0, + **kwargs + ) -> List[Dict]: + """ + Generate images in batches to save API credits. + + Pattern: Generate 5, wait, next 5, wait... + + Args: + prompts: List of (prompt_id, prompt_text) tuples + output_dir: Directory to save images + seed: Fixed seed for character consistency + batch_size: Images per batch (default 5) + delay_seconds: Delay between images in batch (default 1s) + + Returns: + List of generated image info dicts + """ + output_dir.mkdir(parents=True, exist_ok=True) + generated = [] + total = len(prompts) + + # Split into batches of 5 + for batch_start in range(0, total, batch_size): + batch_end = min(batch_start + batch_size, total) + batch = prompts[batch_start:batch_end] + + logger.info(f"Processing batch {batch_start//batch_size + 1}: images {batch_start+1}-{batch_end} of {total}") + + # Process each image in the batch with 1s delay + for i, (prompt_id, prompt_text) in enumerate(batch): + try: + output_path = output_dir / f"scene_{prompt_id:03d}.png" + + # Generate and save + self.generate_and_save( + prompt=prompt_text, + output_path=output_path, + seed=seed, + **kwargs + ) + + generated.append({ + "id": prompt_id, + "path": str(output_path), + "prompt": prompt_text + }) + + logger.debug(f"Generated image {prompt_id}/{total}") + + # Delay between images (not after last one in batch) + if i < len(batch) - 1: + time.sleep(delay_seconds) + + except Exception as e: + logger.error(f"Failed to generate image {prompt_id}: {e}") + generated.append({ + "id": prompt_id, + "path": None, + "error": str(e) + }) + + # Batch complete - small pause before next batch + if batch_end < total: + logger.info(f"Batch complete. Waiting before next batch...") + time.sleep(delay_seconds * 2) + + successful = len([g for g in generated if g.get("path")]) + logger.info(f"Batch generation complete: {successful}/{total} images generated") + + return generated diff --git a/modules/story_reels/services/nvidia_client.py b/modules/story_reels/services/nvidia_client.py new file mode 100644 index 0000000000000000000000000000000000000000..e5db84f18a2a33212c9c36f7c6029cf4b65787b0 --- /dev/null +++ b/modules/story_reels/services/nvidia_client.py @@ -0,0 +1,235 @@ +""" +NVIDIA Image Generation Client +Uses Stable Diffusion 3 Medium for high-quality 9:16 images +FIRST CHOICE - Falls back to Cloudflare on error +""" +import logging +import time +import requests +import base64 +from typing import Optional, List, Dict +from pathlib import Path + +logger = logging.getLogger(__name__) + + +class NvidiaClient: + """ + Client for NVIDIA AI image generation. + Uses stable-diffusion-3-medium with 9:16 aspect ratio. + """ + + # Fixed model - stable-diffusion-3-medium + INVOKE_URL = "https://ai.api.nvidia.com/v1/genai/stabilityai/stable-diffusion-3-medium" + + def __init__(self, api_key: str): + """ + Initialize NVIDIA client. + + Args: + api_key: NVIDIA API key (nvapi-xxx) + """ + self.api_key = api_key + self.headers = { + "Authorization": f"Bearer {api_key}", + "Accept": "application/json", + } + + def generate_image( + self, + prompt: str, + seed: int = 0, + steps: int = 50, + cfg_scale: float = 5 + ) -> bytes: + """ + Generate image from prompt. + + Args: + prompt: Text prompt for image generation + seed: Random seed for reproducibility + steps: Number of diffusion steps (default 50) + cfg_scale: Guidance scale (default 5) + + Returns: + Image bytes (PNG format) + """ + # Stable Diffusion 3 Medium payload - 9:16 aspect ratio + payload = { + "prompt": prompt, + "cfg_scale": cfg_scale, + "aspect_ratio": "9:16", # Portrait for reels + "seed": seed, + "steps": steps, + "negative_prompt": "" + } + + logger.debug(f"NVIDIA generating image with prompt: {prompt[:100]}...") + + try: + response = requests.post( + self.INVOKE_URL, + headers=self.headers, + json=payload, + timeout=120 + ) + response.raise_for_status() + response_body = response.json() + + # Extract base64 - handle multiple response formats + image_b64 = None + + # Format 1: Direct image field + if isinstance(response_body, dict) and "image" in response_body: + image_b64 = response_body["image"] + + # Format 2: Artifacts array with base64 field + elif isinstance(response_body, dict) and "artifacts" in response_body: + artifacts = response_body.get("artifacts") + if artifacts and isinstance(artifacts, list) and len(artifacts) > 0: + image_b64 = artifacts[0].get("base64") + + # Format 3: Array with image_b64 field + elif isinstance(response_body, list) and len(response_body) > 0: + if "image_b64" in response_body[0]: + image_b64 = response_body[0].get("image_b64") + elif "base64" in response_body[0]: + image_b64 = response_body[0].get("base64") + + if image_b64: + # Decode base64 to bytes + image_data = base64.b64decode(image_b64) + logger.info(f"NVIDIA image generated successfully") + return image_data + else: + logger.error(f"NVIDIA: Could not find image data. Keys: {response_body.keys() if isinstance(response_body, dict) else 'list'}") + raise Exception("No image data in NVIDIA response") + + except requests.exceptions.RequestException as e: + logger.error(f"NVIDIA API error: {e}") + raise Exception(f"NVIDIA image generation failed: {e}") + + def generate_and_save( + self, + prompt: str, + output_path: Path, + seed: int = 0, + **kwargs + ) -> Path: + """Generate image and save to file""" + image_bytes = self.generate_image(prompt, seed=seed, **kwargs) + + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_bytes(image_bytes) + + logger.info(f"Saved NVIDIA image to {output_path}") + return output_path + + @staticmethod + def test_connection(api_key: str) -> bool: + """Test API connection""" + try: + client = NvidiaClient(api_key) + client.generate_image("test", steps=10) + return True + except Exception as e: + logger.error(f"NVIDIA connection test failed: {e}") + return False + + def generate_batch( + self, + prompts: List[tuple], + output_dir: Path, + seed: int = 0, + batch_size: int = 5, + delay_seconds: float = 1.0, + fallback_client=None, + **kwargs + ) -> List[Dict]: + """ + Generate images in batches with fallback support. + + Pattern: Generate 5, wait, next 5... + If NVIDIA fails, try Cloudflare fallback. + + Args: + prompts: List of (prompt_id, prompt_text) tuples + output_dir: Directory to save images + seed: Fixed seed for character consistency + batch_size: Images per batch (default 5) + delay_seconds: Delay between images (default 1s) + fallback_client: Cloudflare client for fallback + + Returns: + List of generated image info dicts + """ + output_dir.mkdir(parents=True, exist_ok=True) + generated = [] + total = len(prompts) + + for batch_start in range(0, total, batch_size): + batch_end = min(batch_start + batch_size, total) + batch = prompts[batch_start:batch_end] + + logger.info(f"NVIDIA batch {batch_start//batch_size + 1}: images {batch_start+1}-{batch_end} of {total}") + + for i, (prompt_id, prompt_text) in enumerate(batch): + output_path = output_dir / f"scene_{prompt_id:03d}.png" + success = False + + # Try NVIDIA first + try: + self.generate_and_save( + prompt=prompt_text, + output_path=output_path, + seed=seed, + **kwargs + ) + success = True + logger.debug(f"NVIDIA: Generated image {prompt_id}/{total}") + + except Exception as e: + logger.warning(f"NVIDIA failed for image {prompt_id}: {e}") + + # Fallback to Cloudflare + if fallback_client: + try: + logger.info(f"Falling back to Cloudflare for image {prompt_id}") + fallback_client.generate_and_save( + prompt=prompt_text, + output_path=output_path, + seed=seed, + width=1080, + height=1920 + ) + success = True + logger.info(f"Cloudflare fallback successful for image {prompt_id}") + except Exception as cf_e: + logger.error(f"Cloudflare fallback also failed: {cf_e}") + + if success: + generated.append({ + "id": prompt_id, + "path": str(output_path), + "prompt": prompt_text + }) + else: + generated.append({ + "id": prompt_id, + "path": None, + "error": "Both NVIDIA and Cloudflare failed" + }) + + # Delay between images + if i < len(batch) - 1: + time.sleep(delay_seconds) + + # Batch complete - pause before next + if batch_end < total: + logger.info("Batch complete. Waiting before next batch...") + time.sleep(delay_seconds * 2) + + successful = len([g for g in generated if g.get("path")]) + logger.info(f"Batch complete: {successful}/{total} images generated") + + return generated diff --git a/modules/story_reels/services/prompt_builder.py b/modules/story_reels/services/prompt_builder.py new file mode 100644 index 0000000000000000000000000000000000000000..59952fff3b2c16af8cde7d655e6293c8acf709c7 --- /dev/null +++ b/modules/story_reels/services/prompt_builder.py @@ -0,0 +1,147 @@ +""" +Prompt Builder for Character-Consistent Images +Builds detailed prompts with character profile injection +""" +import logging +from typing import Optional, List +from ..schemas import CharacterProfile, SceneInput, CameraEnum + +logger = logging.getLogger(__name__) + + +class PromptBuilder: + """ + Builds prompts for image generation with character consistency. + + Strategy: + 1. Fixed character description in every prompt + 2. Same seed across all images + 3. Consistency keywords + 4. Style anchor + """ + + # Consistency keywords to add + CONSISTENCY_KEYWORDS = [ + "same character throughout", + "consistent appearance", + "consistent face", + "character reference" + ] + + def __init__(self, character_profile: Optional[CharacterProfile] = None): + self.character = character_profile + + def build_character_description(self) -> str: + """Build detailed character description string""" + if not self.character: + return "" + + parts = [] + + # Name and basics + if self.character.name: + parts.append(f"a character named {self.character.name}") + + # Age and gender + if self.character.age and self.character.gender: + parts.append(f"{self.character.age} year old {self.character.gender}") + + # Physical features + if self.character.hair: + parts.append(self.character.hair) + if self.character.skin: + parts.append(f"{self.character.skin} skin") + if self.character.face: + parts.append(self.character.face) + + # Clothing + if self.character.clothes: + parts.append(f"wearing {self.character.clothes}") + + return ", ".join(parts) + + def build_scene_prompt(self, scene: SceneInput) -> str: + """ + Build full prompt for a scene. + + Format: + [style], [character description], [scene text], [camera], [lighting], [consistency keywords] + """ + parts = [] + + # 1. Style anchor (important for consistency) + if self.character and self.character.style: + parts.append(f"{self.character.style.value} style artwork") + else: + parts.append("semi-realistic style artwork") + + # 2. Character description (injected for consistency) + char_desc = self.build_character_description() + if char_desc: + parts.append(char_desc) + + # 3. Scene description + parts.append(scene.scene_text) + + # 4. Camera angle + parts.append(scene.camera.value) + + # 5. Pose if specified + if scene.pose: + parts.append(f"{scene.pose} pose") + + # 6. Lighting + if scene.lighting: + parts.append(scene.lighting) + + # 7. Consistency keywords + parts.extend(self.CONSISTENCY_KEYWORDS[:2]) + + # 8. Quality keywords + parts.extend([ + "high quality", + "detailed", + "professional illustration" + ]) + + prompt = ", ".join(parts) + logger.debug(f"Built prompt: {prompt[:150]}...") + + return prompt + + def build_prompts_for_scenes(self, scenes: List[SceneInput]) -> List[str]: + """Build prompts for all scenes""" + return [self.build_scene_prompt(scene) for scene in scenes] + + @staticmethod + def create_scenes_from_segments( + segments: List[dict], + default_camera: CameraEnum = CameraEnum.medium + ) -> List[SceneInput]: + """ + Create SceneInput objects from SRT segments. + + Args: + segments: List of {text, start_ms, end_ms, duration} + + Returns: + List of SceneInput objects + """ + scenes = [] + + for i, seg in enumerate(segments): + duration = seg.get('duration', 4.0) + if isinstance(duration, int): + duration = duration / 1000 # Convert ms to seconds + + scene = SceneInput( + scene_id=i + 1, + scene_text=seg.get('text', ''), + camera=default_camera, + pose="natural pose", + lighting="natural lighting", + duration=duration + ) + scenes.append(scene) + + return scenes diff --git a/modules/story_reels/services/script_generator.py b/modules/story_reels/services/script_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..fe23fe1e500d1502679bb40c65045a49c256c374 --- /dev/null +++ b/modules/story_reels/services/script_generator.py @@ -0,0 +1,256 @@ +""" +Script Generator using Gemini API +Generates story scripts from topics for TTS narration +""" +import logging +import requests +from typing import Optional + +logger = logging.getLogger(__name__) + + +class ScriptGenerator: + """ + Generates story scripts using Google Gemini API. + + Features: + - Topic → Full narration script (<=1000 chars) + - Character-aware script generation + - Optimized for TTS output + """ + + GEMINI_API_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent" + + # System prompt for script generation + SYSTEM_PROMPT = """You are a professional script writer for short-form video content (TikTok, Reels, Shorts). + +RULES: +1. Write a narration script for the given topic +2. Maximum 1000 characters (STRICT LIMIT) +3. Write in a natural, engaging voice +4. Focus on storytelling - beginning, middle, end +5. Use simple, clear sentences for TTS +6. NO emojis, NO hashtags, NO special formatting +7. Output ONLY the script text, nothing else + +If a character is provided, write the story from their perspective or about them.""" + + def __init__(self, api_key: str): + self.api_key = api_key + + def generate_script( + self, + topic: str, + character_name: Optional[str] = None, + max_chars: int = 1000 + ) -> str: + """ + Generate a story script from topic. + + Args: + topic: Story topic/idea + character_name: Optional character name to include + max_chars: Maximum character limit (default 1000) + + Returns: + Generated script text + """ + # Build the prompt + user_prompt = f"Topic: {topic}" + + if character_name: + user_prompt += f"\nMain Character: {character_name}" + + user_prompt += f"\n\nWrite a short narration script (max {max_chars} characters)." + + logger.info(f"Generating script for topic: {topic[:50]}...") + + try: + response = requests.post( + f"{self.GEMINI_API_URL}?key={self.api_key}", + headers={"Content-Type": "application/json"}, + json={ + "contents": [ + { + "role": "user", + "parts": [{"text": self.SYSTEM_PROMPT + "\n\n" + user_prompt}] + } + ], + "generationConfig": { + "temperature": 0.7, + "maxOutputTokens": 500, + "topP": 0.9 + } + }, + timeout=30 + ) + response.raise_for_status() + + data = response.json() + + # Extract text from response + script = data["candidates"][0]["content"]["parts"][0]["text"] + + # Enforce character limit + if len(script) > max_chars: + script = script[:max_chars].rsplit(' ', 1)[0] + "." + + logger.info(f"Generated script: {len(script)} chars") + return script.strip() + + except requests.exceptions.RequestException as e: + logger.error(f"Gemini API error: {e}") + raise Exception(f"Script generation failed: {e}") + except (KeyError, IndexError) as e: + logger.error(f"Failed to parse Gemini response: {e}") + raise Exception("Invalid response from Gemini API") + + @staticmethod + def test_connection(api_key: str) -> bool: + """Test API connection""" + try: + gen = ScriptGenerator(api_key) + gen.generate_script("test", max_chars=50) + return True + except: + return False + + # System prompt for image prompt generation + IMAGE_PROMPT_SYSTEM = """You are an expert at creating detailed image prompts for AI image generation. + +Your task: Generate detailed image prompts for each 2-second scene of a story video. + +CONTEXT: +- Full story script is provided so you understand the narrative +- Each 2-second chunk needs a visual prompt +- Character profile (if provided) must be consistent in EVERY prompt +- Images should tell the story visually + +RULES FOR PROMPTS: +1. Be detailed and specific (50-100 words each) +2. Include: scene description, character pose/action, camera angle, lighting, mood +3. Add style keywords at the end (semi-realistic, detailed, high quality) +4. DO NOT include text/dialogue in prompts +5. Keep character appearance CONSISTENT across all prompts +6. Use cinematographic language (close-up, wide shot, etc.) + +OUTPUT FORMAT: +Return ONLY valid JSON array, no markdown, no explanation: +[ + {"chunk_id": 1, "prompt": "detailed prompt here..."}, + {"chunk_id": 2, "prompt": "detailed prompt here..."} +]""" + + def generate_image_prompts( + self, + full_script: str, + chunks: list, + character_profile: dict = None, + max_batch: int = 30 + ) -> list: + """ + Generate detailed image prompts for all 2-second chunks. + + Args: + full_script: Complete narration script (for context) + chunks: List of {chunk_id, text, duration} from SRTParser + character_profile: Optional character dict + max_batch: Max chunks per API call (default 30) + + Returns: + List of {chunk_id, prompt} dicts + """ + import json + + all_prompts = [] + total_chunks = len(chunks) + + # Split into batches if too many chunks + for batch_start in range(0, total_chunks, max_batch): + batch_end = min(batch_start + max_batch, total_chunks) + batch_chunks = chunks[batch_start:batch_end] + + logger.info(f"Generating prompts for chunks {batch_start+1}-{batch_end} of {total_chunks}") + + # Build user prompt + user_prompt = f"""FULL STORY SCRIPT: +{full_script} + +""" + if character_profile: + user_prompt += f"""CHARACTER PROFILE: +- Name: {character_profile.get('name', 'Main character')} +- Age: {character_profile.get('age', '25')} +- Gender: {character_profile.get('gender', 'male')} +- Hair: {character_profile.get('hair', 'short black hair')} +- Skin: {character_profile.get('skin', 'light skin')} +- Clothes: {character_profile.get('clothes', 'casual clothes')} +- Style: {character_profile.get('style', 'semi-realistic')} + +IMPORTANT: Include this character description in EVERY prompt! + +""" + + user_prompt += "2-SECOND CHUNKS TO GENERATE PROMPTS FOR:\n" + for chunk in batch_chunks: + user_prompt += f"- Chunk {chunk['chunk_id']}: \"{chunk['text']}\"\n" + + user_prompt += "\nGenerate detailed image prompts for each chunk. Return ONLY JSON array." + + try: + response = requests.post( + f"{self.GEMINI_API_URL}?key={self.api_key}", + headers={"Content-Type": "application/json"}, + json={ + "contents": [ + { + "role": "user", + "parts": [{"text": self.IMAGE_PROMPT_SYSTEM + "\n\n" + user_prompt}] + } + ], + "generationConfig": { + "temperature": 0.7, + "maxOutputTokens": 4000, + "topP": 0.9 + } + }, + timeout=60 + ) + response.raise_for_status() + + data = response.json() + text = data["candidates"][0]["content"]["parts"][0]["text"] + + # Clean response - remove markdown if present + text = text.strip() + if text.startswith("```"): + text = text.split("```")[1] + if text.startswith("json"): + text = text[4:] + text = text.strip() + + # Parse JSON + batch_prompts = json.loads(text) + all_prompts.extend(batch_prompts) + + logger.info(f"Generated {len(batch_prompts)} prompts in batch") + + except json.JSONDecodeError as e: + logger.error(f"Failed to parse JSON response: {e}") + # Fallback: create simple prompts + for chunk in batch_chunks: + all_prompts.append({ + "chunk_id": chunk["chunk_id"], + "prompt": f"{chunk['text']}, semi-realistic style, high quality, detailed" + }) + except Exception as e: + logger.error(f"Gemini API error: {e}") + # Fallback + for chunk in batch_chunks: + all_prompts.append({ + "chunk_id": chunk["chunk_id"], + "prompt": f"{chunk['text']}, semi-realistic style, high quality" + }) + + logger.info(f"Generated {len(all_prompts)} total image prompts") + return all_prompts diff --git a/modules/story_reels/services/srt_parser.py b/modules/story_reels/services/srt_parser.py new file mode 100644 index 0000000000000000000000000000000000000000..470e47dbd0034888cf66cc142e34d1476a7c3de6 --- /dev/null +++ b/modules/story_reels/services/srt_parser.py @@ -0,0 +1,214 @@ +""" +SRT Parser for Story Reels +Parses SRT segments and calculates scene durations +""" +import re +import logging +from typing import List, Dict +from pathlib import Path +import math + +logger = logging.getLogger(__name__) + + +class SRTParser: + """ + Parses SRT files and calculates image counts based on 2s rule. + """ + + # SRT timestamp regex + TIMESTAMP_PATTERN = re.compile( + r'(\d{2}):(\d{2}):(\d{2}),(\d{3})\s*-->\s*(\d{2}):(\d{2}):(\d{2}),(\d{3})' + ) + + @staticmethod + def parse_timestamp(h: str, m: str, s: str, ms: str) -> int: + """Convert timestamp to milliseconds""" + return int(h) * 3600000 + int(m) * 60000 + int(s) * 1000 + int(ms) + + @classmethod + def parse_srt_content(cls, srt_content: str) -> List[Dict]: + """ + Parse SRT content into segments. + + Returns: + List of {text, start_ms, end_ms, duration_ms, image_count} + """ + segments = [] + blocks = srt_content.strip().split('\n\n') + + for block in blocks: + lines = block.strip().split('\n') + if len(lines) < 3: + continue + + # Skip sequence number (line 0) + timestamp_line = lines[1] + text_lines = lines[2:] + + match = cls.TIMESTAMP_PATTERN.match(timestamp_line) + if not match: + continue + + start_ms = cls.parse_timestamp(*match.groups()[:4]) + end_ms = cls.parse_timestamp(*match.groups()[4:]) + duration_ms = end_ms - start_ms + + # Calculate image count (2 seconds per image) + duration_s = duration_ms / 1000 + image_count = max(1, math.ceil(duration_s / 2)) + + segments.append({ + 'text': ' '.join(text_lines), + 'start_ms': start_ms, + 'end_ms': end_ms, + 'duration_ms': duration_ms, + 'duration': duration_s, + 'image_count': image_count + }) + + logger.info(f"Parsed {len(segments)} SRT segments") + return segments + + @classmethod + def parse_srt_file(cls, srt_path: Path) -> List[Dict]: + """Parse SRT file""" + content = srt_path.read_text(encoding='utf-8') + return cls.parse_srt_content(content) + + @staticmethod + def calculate_total_images(segments: List[Dict]) -> int: + """Calculate total images needed""" + return sum(seg.get('image_count', 1) for seg in segments) + + @staticmethod + def calculate_total_duration(segments: List[Dict]) -> float: + """Calculate total duration in seconds""" + return sum(seg.get('duration', 0) for seg in segments) + + @classmethod + def segments_from_captions(cls, captions: List[Dict]) -> List[Dict]: + """ + Convert Whisper captions to segments. + + Args: + captions: List from WhisperClient [{text, startMs, endMs}] + + Returns: + Segments with image_count calculated + """ + segments = [] + + for cap in captions: + start_ms = cap.get('startMs', 0) + end_ms = cap.get('endMs', 0) + duration_ms = end_ms - start_ms + duration_s = duration_ms / 1000 + + segments.append({ + 'text': cap.get('text', ''), + 'start_ms': start_ms, + 'end_ms': end_ms, + 'duration_ms': duration_ms, + 'duration': duration_s, + 'image_count': max(1, math.ceil(duration_s / 2)) + }) + + return segments + + @classmethod + def create_2s_chunks(cls, captions: List[Dict], total_duration: float) -> List[Dict]: + """ + Create 2-second chunks for image generation. + + This is SEPARATE from .srt captions: + - .srt = original Whisper captions (for video subtitles) + - 2s chunks = for image prompt generation + + Args: + captions: Original Whisper captions + total_duration: Total audio duration in seconds + + Returns: + List of 2-second chunks with text for image prompts + """ + # Flatten all caption text with timing + all_words = [] + for cap in captions: + start_ms = cap.get('startMs', 0) + end_ms = cap.get('endMs', 0) + text = cap.get('text', '').strip() + if text: + all_words.append({ + 'text': text, + 'start_ms': start_ms, + 'end_ms': end_ms + }) + + # Calculate number of 2-second chunks + num_chunks = max(1, math.ceil(total_duration / 2)) + chunk_duration_ms = 2000 # 2 seconds + + chunks = [] + + for i in range(num_chunks): + chunk_start = i * chunk_duration_ms + chunk_end = min((i + 1) * chunk_duration_ms, int(total_duration * 1000)) + + # Last chunk might be shorter + actual_duration = (chunk_end - chunk_start) / 1000 + + # Find words that fall within this chunk + chunk_texts = [] + for word in all_words: + # Word overlaps with chunk + if word['end_ms'] > chunk_start and word['start_ms'] < chunk_end: + chunk_texts.append(word['text']) + + # Combine texts for this chunk + chunk_text = ' '.join(chunk_texts) if chunk_texts else f"Scene {i + 1}" + + chunks.append({ + 'chunk_id': i + 1, + 'text': chunk_text, + 'start_ms': chunk_start, + 'end_ms': chunk_end, + 'duration': actual_duration + }) + + logger.info(f"Created {len(chunks)} x 2-second chunks for image generation") + return chunks + + @staticmethod + def generate_srt_content(captions: List[Dict]) -> str: + """ + Generate .srt file content from Whisper captions. + This will be embedded in the final video. + + Args: + captions: Original Whisper captions + + Returns: + SRT formatted string + """ + srt_lines = [] + + for i, cap in enumerate(captions, 1): + start_ms = cap.get('startMs', 0) + end_ms = cap.get('endMs', 0) + text = cap.get('text', '').strip() + + # Format timestamps: HH:MM:SS,mmm + def format_time(ms): + hours = ms // 3600000 + minutes = (ms % 3600000) // 60000 + seconds = (ms % 60000) // 1000 + millis = ms % 1000 + return f"{hours:02d}:{minutes:02d}:{seconds:02d},{millis:03d}" + + srt_lines.append(str(i)) + srt_lines.append(f"{format_time(start_ms)} --> {format_time(end_ms)}") + srt_lines.append(text) + srt_lines.append("") + + return '\n'.join(srt_lines) diff --git a/modules/story_reels/services/story_creator.py b/modules/story_reels/services/story_creator.py new file mode 100644 index 0000000000000000000000000000000000000000..992533bafdd3694130ffd39615b0d61f4a60927c --- /dev/null +++ b/modules/story_reels/services/story_creator.py @@ -0,0 +1,583 @@ +""" +Story Creator - Main Pipeline Orchestrator +Coordinates TTS, Whisper, Cloudflare, and MoviePy +""" +import asyncio +import logging +import uuid +from pathlib import Path +from typing import Dict, List, Optional +from datetime import datetime + +from ..schemas import ( + CharacterProfile, + SceneInput, + JobStatus, + GeneratedScene +) +from .cloudflare_client import CloudflareClient +from .prompt_builder import PromptBuilder +from .srt_parser import SRTParser +from .script_generator import ScriptGenerator + +logger = logging.getLogger(__name__) + + +class StoryCreator: + """ + Main orchestrator for story-to-video pipeline. + + Pipeline: + 1. Script → TTS → voice.mp3 + 2. voice.mp3 → Whisper → segments + 3. Segments → PromptBuilder → prompts + 4. Prompts → Cloudflare → images + 5. Images + Audio → MoviePy → video + """ + + def __init__( + self, + config, + tts_client, + whisper_client, + nvidia_client=None, # PRIMARY + cloudflare_client=None, # FALLBACK + script_generator: ScriptGenerator = None + ): + self.config = config + self.tts = tts_client + self.whisper = whisper_client + self.nvidia = nvidia_client # PRIMARY + self.cloudflare = cloudflare_client # FALLBACK + self.script_gen = script_generator + + # Job tracking + self.jobs: Dict[str, Dict] = {} + self.queue: List[Dict] = [] + self.processing = False + + def add_to_queue( + self, + topic: str, + script: str, + character_profile: Optional[CharacterProfile] = None, + voice: str = "af_heart" + ) -> str: + """ + Add story to generation queue. + + Returns: + job_id for tracking + """ + job_id = str(uuid.uuid4()).replace('-', '')[:16] + + job = { + "id": job_id, + "topic": topic, + "script": script, + "character": character_profile, + "voice": voice, + "status": JobStatus.queued, + "progress": 0, + "created_at": datetime.now().isoformat(), + "video_url": None, + "duration": None, + "error": None, + "scenes": [] + } + + self.jobs[job_id] = job + self.queue.append(job) + + logger.info(f"Added job {job_id} to queue. Queue length: {len(self.queue)}") + + # Start processing if not already running + if not self.processing: + asyncio.create_task(self.process_queue()) + + return job_id + + async def process_queue(self): + """Process jobs in queue""" + if self.processing: + return + + self.processing = True + + try: + while self.queue: + job = self.queue[0] + job_id = job["id"] + + logger.info(f"Processing job {job_id}") + + try: + await self._process_job(job) + job["status"] = JobStatus.ready + job["progress"] = 100 + logger.info(f"Job {job_id} completed successfully") + except Exception as e: + logger.error(f"Job {job_id} failed: {e}", exc_info=True) + job["status"] = JobStatus.failed + job["error"] = str(e) + finally: + self.queue.pop(0) + finally: + self.processing = False + + async def _process_job(self, job: Dict): + """Process a single job through the pipeline""" + job_id = job["id"] + temp_dir = self.config.temp_dir_path / job_id + temp_dir.mkdir(parents=True, exist_ok=True) + + temp_files = [] + + try: + # ==================== + # Step 0: Generate Script (if not provided) + # ==================== + script = job["script"] + + if not script or script.strip() == "": + logger.info(f"[{job_id}] Generating script from topic using Gemini...") + job["progress"] = 5 + + char_name = job["character"].name if job["character"] else None + script = self.script_gen.generate_script( + topic=job["topic"], + character_name=char_name, + max_chars=1000 + ) + job["script"] = script + logger.info(f"[{job_id}] Generated script: {len(script)} chars") + + # ==================== + # Step 1: Generate TTS + # ==================== + job["status"] = JobStatus.generating_audio + job["progress"] = 10 + + logger.info(f"[{job_id}] Generating TTS audio...") + + audio_data, tts_duration = await self.tts.generate( + script, + job["voice"] + ) + + wav_path = temp_dir / "voice.wav" + mp3_path = temp_dir / "voice.mp3" + temp_files.extend([wav_path, mp3_path]) + + # Import FFmpegUtils from video_creator + from modules.video_creator.services.libraries.ffmpeg_utils import FFmpegUtils + + FFmpegUtils.save_audio_as_wav(audio_data, wav_path) + FFmpegUtils.save_audio_as_mp3(audio_data, mp3_path) + + # Get actual duration + audio_duration = FFmpegUtils.get_video_duration(wav_path) + logger.info(f"[{job_id}] Audio generated: {audio_duration:.2f}s") + + job["progress"] = 25 + + # ==================== + # Step 2: Generate Captions (Whisper) + # ==================== + logger.info(f"[{job_id}] Generating captions with Whisper...") + + captions = self.whisper.create_captions(str(wav_path)) + captions_dict = [c.dict() for c in captions] + + # OUTPUT 1: .srt content (for video subtitles) + srt_content = SRTParser.generate_srt_content(captions_dict) + srt_path = temp_dir / "voice.srt" + srt_path.write_text(srt_content, encoding='utf-8') + temp_files.append(srt_path) + logger.info(f"[{job_id}] Generated .srt with {len(captions)} captions") + + # OUTPUT 2: 2-second chunks (for image prompts) + image_chunks = SRTParser.create_2s_chunks(captions_dict, audio_duration) + logger.info(f"[{job_id}] Created {len(image_chunks)} x 2s chunks for images") + + job["progress"] = 40 + job["srt_path"] = str(srt_path) + + # ==================== + # Step 3: Generate Image Prompts using AI + # ==================== + job["status"] = JobStatus.generating_images + logger.info(f"[{job_id}] Generating AI-powered image prompts...") + + # Convert character profile to dict if exists + char_dict = None + if job["character"]: + char_dict = { + "name": job["character"].name, + "age": job["character"].age, + "gender": job["character"].gender, + "hair": job["character"].hair, + "skin": job["character"].skin, + "clothes": job["character"].clothes, + "style": job["character"].style.value if hasattr(job["character"].style, 'value') else str(job["character"].style) + } + + # Generate all image prompts at once using Gemini + # Input: Full script (context) + 2s chunks → Output: JSON array of prompts + ai_prompts = self.script_gen.generate_image_prompts( + full_script=script, + chunks=image_chunks, + character_profile=char_dict + ) + + logger.info(f"[{job_id}] AI generated {len(ai_prompts)} image prompts") + + job["progress"] = 50 + + # ==================== + # Step 4: Generate Images (PARALLEL - NVIDIA + Cloudflare) + # ==================== + # If both APIs available: split images 50/50 for 2x speed + # 1 second delay between each request (rate limit safe) + + seed = job["character"].seed if job["character"] else 432891 + + # Build prompts list from AI-generated prompts + prompts_list = [] + for p in ai_prompts: + prompts_list.append((p["chunk_id"], p["prompt"])) + + total_images = len(prompts_list) + logger.info(f"[{job_id}] Generating {total_images} images...") + + # Check which APIs are available + has_nvidia = self.nvidia is not None + has_cloudflare = self.cloudflare is not None + + if has_nvidia and has_cloudflare: + # PARALLEL MODE: NVIDIA 70%, Cloudflare 30% (NVIDIA has better quality) + logger.info(f"[{job_id}] Parallel mode: NVIDIA 70% + Cloudflare 30%") + + import threading + + # Split: first 70% to NVIDIA, remaining 30% to Cloudflare + nvidia_count = int(total_images * 0.7) + if nvidia_count == 0: + nvidia_count = 1 + + nvidia_prompts = prompts_list[:nvidia_count] + cloudflare_prompts = prompts_list[nvidia_count:] + + # Get indices + nvidia_indices = [p[0] for p in nvidia_prompts] + cloudflare_indices = [p[0] for p in cloudflare_prompts] + + nvidia_results = [] + cloudflare_results = [] + + def nvidia_worker(): + """NVIDIA: 5 requests → wait → next 5""" + nonlocal nvidia_results + batch_size = 5 + for batch_start in range(0, len(nvidia_prompts), batch_size): + batch = nvidia_prompts[batch_start:batch_start + batch_size] + logger.info(f"NVIDIA batch {batch_start//batch_size + 1}: {len(batch)} images") + + for orig_idx, prompt in batch: + try: + output_path = temp_dir / f"scene_{orig_idx:03d}.png" + self.nvidia.generate_and_save(prompt, output_path, seed=seed) + nvidia_results.append({"id": orig_idx, "path": str(output_path), "prompt": prompt}) + logger.debug(f"NVIDIA: {orig_idx}") + except Exception as e: + logger.error(f"NVIDIA failed {orig_idx}: {e}") + nvidia_results.append({"id": orig_idx, "path": None, "error": str(e)}) + time.sleep(1.0) # 1s delay between requests in same batch + + # Batch complete - wait before next batch + if batch_start + batch_size < len(nvidia_prompts): + logger.info("NVIDIA batch complete, waiting...") + time.sleep(2.0) + + def cloudflare_worker(): + """Cloudflare: 5 requests → wait → next 5""" + nonlocal cloudflare_results + batch_size = 5 + for batch_start in range(0, len(cloudflare_prompts), batch_size): + batch = cloudflare_prompts[batch_start:batch_start + batch_size] + logger.info(f"Cloudflare batch {batch_start//batch_size + 1}: {len(batch)} images") + + for orig_idx, prompt in batch: + try: + output_path = temp_dir / f"scene_{orig_idx:03d}.png" + self.cloudflare.generate_and_save(prompt, output_path, seed=seed, width=1080, height=1920) + cloudflare_results.append({"id": orig_idx, "path": str(output_path), "prompt": prompt}) + logger.debug(f"Cloudflare: {orig_idx}") + except Exception as e: + logger.error(f"Cloudflare failed {orig_idx}: {e}") + cloudflare_results.append({"id": orig_idx, "path": None, "error": str(e)}) + time.sleep(1.0) # 1s delay between requests in same batch + + # Batch complete - wait before next batch + if batch_start + batch_size < len(cloudflare_prompts): + logger.info("Cloudflare batch complete, waiting...") + time.sleep(2.0) + + # Run both in parallel (each has its own batch counter) + t1 = threading.Thread(target=nvidia_worker) + t2 = threading.Thread(target=cloudflare_worker) + t1.start() + t2.start() + t1.join() + t2.join() + + # Combine results + batch_results = nvidia_results + cloudflare_results + batch_results.sort(key=lambda x: x["id"]) + + elif has_nvidia: + # NVIDIA only with 1s delay + logger.info(f"[{job_id}] NVIDIA only mode") + batch_results = self.nvidia.generate_batch( + prompts=prompts_list, + output_dir=temp_dir, + seed=seed, + batch_size=5, + delay_seconds=1.0 + ) + elif has_cloudflare: + # Cloudflare only with 1s delay + logger.info(f"[{job_id}] Cloudflare only mode") + batch_results = self.cloudflare.generate_batch( + prompts=prompts_list, + output_dir=temp_dir, + seed=seed, + batch_size=5, + delay_seconds=1.0, + width=1080, + height=1920 + ) + else: + raise Exception("No image generation client available!") + + # Build generated_scenes from batch results + generated_scenes = [] + for result in batch_results: + if result.get("path"): + temp_files.append(Path(result["path"])) + + # Find matching chunk for duration + scene_duration = 2.0 + for chunk in image_chunks: + if chunk['chunk_id'] == result["id"]: + scene_duration = chunk['duration'] + break + + generated_scenes.append({ + "scene_id": result["id"], + "prompt": result["prompt"], + "image_path": result["path"], + "duration": scene_duration + }) + + logger.info(f"[{job_id}] Generated {len(generated_scenes)}/{len(ai_prompts)} images") + + job["scenes"] = generated_scenes + job["progress"] = 80 + + # ==================== + # Step 5: Compose Video + # ==================== + job["status"] = JobStatus.composing_video + logger.info(f"[{job_id}] Composing final video...") + + output_path = self.config.videos_dir_path / f"{job_id}.mp4" + + await self._compose_video( + scenes=generated_scenes, + audio_path=mp3_path, + output_path=output_path + ) + + job["video_url"] = str(output_path) + job["duration"] = audio_duration + job["progress"] = 100 + + logger.info(f"[{job_id}] Video saved to {output_path}") + + finally: + # Cleanup temp files + for temp_file in temp_files: + if temp_file.exists(): + try: + temp_file.unlink() + except: + pass + + # Remove temp directory + if temp_dir.exists(): + try: + temp_dir.rmdir() + except: + pass + + async def _compose_video( + self, + scenes: List[Dict], + audio_path: Path, + output_path: Path + ): + """ + Compose video from images and audio using MoviePy. + + Effects: + - Crossfade transitions (0.3s) between scenes + - Subtle Ken Burns zoom (1.05x) for dynamic feel + - Fade in at start, fade out at end + """ + from moviepy.editor import ( + ImageClip, + AudioFileClip, + concatenate_videoclips, + CompositeVideoClip, + vfx + ) + + # Constants + CROSSFADE_DURATION = 0.3 # Transition duration + ZOOM_FACTOR = 1.05 # Subtle zoom (1.05 = 5% zoom) + FADE_DURATION = 0.5 # Fade in/out duration + TARGET_HEIGHT = 1920 # Portrait + TARGET_WIDTH = 1080 + + # Load audio first to get exact duration + audio = AudioFileClip(str(audio_path)) + audio_duration = audio.duration + + # Create video clips from images with effects + clips = [] + total_video_duration = 0 + total_scenes = len(scenes) + + for i, scene in enumerate(scenes): + image_path = scene["image_path"] + duration = scene["duration"] + + # For the last clip, adjust duration to match audio + if i == total_scenes - 1: + remaining = audio_duration - total_video_duration + if remaining > 0: + duration = remaining + + # Create image clip + clip = ImageClip(image_path).set_duration(duration) + + # Resize to portrait (1080x1920) + clip = clip.resize(height=TARGET_HEIGHT) + + # Scene position-based effects + # Hook (first 2 clips): Zoom OUT (start big, end normal) - grabs attention + # Middle clips: Subtle zoom IN (Ken Burns) + # Outro (last clip): Static with fade out + + if i < 2: + # HOOK: Zoom OUT effect (1.1 → 1.0) - dynamic attention grabber + def make_zoom_out(t, clip_duration=duration): + zoom = 1.1 - (0.1 * (t / clip_duration)) # 1.1 to 1.0 + return zoom + clip = clip.resize(lambda t: make_zoom_out(t)) + + elif i < total_scenes - 1: + # MIDDLE: Ken Burns zoom IN (1.0 → 1.05) + def make_zoom_in(t, clip_duration=duration): + zoom = 1.0 + (ZOOM_FACTOR - 1.0) * (t / clip_duration) + return zoom + clip = clip.resize(lambda t: make_zoom_in(t)) + + # Last clip stays static (no zoom) + + # Center crop after zoom (to maintain 1080x1920) + clip = clip.resize(width=TARGET_WIDTH, height=TARGET_HEIGHT) + + # Transitions: crossfade for smooth scene changes (NOT on first 2 clips) + # Hook clips: NO crossfade, clean direct cut + if i >= 2 and duration > CROSSFADE_DURATION: + clip = clip.crossfadein(CROSSFADE_DURATION) + + # NO fade in for Hook (first 2 clips) - start immediately visible! + # Only fade out at the very end + if i == total_scenes - 1: + clip = clip.fadeout(FADE_DURATION) + + clips.append(clip) + total_video_duration += duration + + # Concatenate with crossfade transitions + if len(clips) > 1: + video = concatenate_videoclips(clips, method="compose", padding=-CROSSFADE_DURATION) + else: + video = clips[0] + + # Final safety: match video length to audio exactly + if abs(video.duration - audio_duration) > 0.1: + if video.duration > audio_duration: + video = video.subclip(0, audio_duration) + + video = video.set_audio(audio) + + # Write final video + logger.info(f"Writing video with effects: crossfade={CROSSFADE_DURATION}s, zoom={ZOOM_FACTOR}x") + video.write_videofile( + str(output_path), + fps=24, + codec='libx264', + audio_codec='aac', + threads=4, + preset='medium' + ) + + # Cleanup + video.close() + audio.close() + + def get_status(self, job_id: str) -> Dict: + """Get job status""" + job = self.jobs.get(job_id) + + if not job: + return { + "job_id": job_id, + "status": JobStatus.failed, + "error": "Job not found" + } + + return { + "job_id": job_id, + "status": job["status"], + "progress": job["progress"], + "video_url": job.get("video_url"), + "duration": job.get("duration"), + "error": job.get("error") + } + + def get_preview(self, job_id: str, scene_id: int) -> Optional[Dict]: + """Get scene preview""" + job = self.jobs.get(job_id) + + if not job or not job.get("scenes"): + return None + + for scene in job["scenes"]: + if scene["scene_id"] == scene_id: + return scene + + return None + + def get_video_path(self, job_id: str) -> Optional[Path]: + """Get video file path""" + job = self.jobs.get(job_id) + + if not job or not job.get("video_url"): + return None + + return Path(job["video_url"]) diff --git a/modules/video_creator/__init__.py b/modules/video_creator/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b11ce136c2b748709bb5143b837e1b4a4bea9550 --- /dev/null +++ b/modules/video_creator/__init__.py @@ -0,0 +1,81 @@ +""" +Video Creator Module for NCAkit +Creates short-form videos with TTS, captions, background videos, and music. +""" +from fastapi import FastAPI +import logging + +# Module Metadata +MODULE_NAME = "video_creator" +MODULE_PREFIX = "/api/video" +MODULE_DESCRIPTION = "Create short-form videos with TTS, captions, and background music" + +logger = logging.getLogger(__name__) + + +def register(app: FastAPI, config): + """ + Register the video creator module with FastAPI. + Initializes all services and adds routes. + """ + from .router import router, set_short_creator + from .services.libraries.tts_client import TTSClient + from .services.libraries.whisper_client import WhisperClient + from .services.libraries.pexels_client import PexelsClient + from .services.music_manager import MusicManager + from .services.short_creator import ShortCreator + + logger.info("Registering video_creator module...") + + # Validate environment variables + if not config.pexels_api_key: + logger.warning("PEXELS_API_KEY is missing! Video generation will fail.") + + if not config.hf_tts: + logger.warning("HF_TTS is missing! TTS will fail.") + + # Initialize TTS client + logger.info("Initializing TTS client...") + tts_client = TTSClient(config.hf_tts) + + # Initialize Whisper client + logger.info("Initializing Whisper client...") + whisper_client = WhisperClient( + model_name=config.whisper_model, + model_dir=config.whisper_model_dir + ) + + # Initialize Pexels client + logger.info("Initializing Pexels client...") + pexels_client = PexelsClient(config.pexels_api_key) + + # Initialize music manager + logger.info("Initializing music manager...") + music_manager = MusicManager(config.music_dir_path) + try: + music_manager.ensure_music_files_exist() + except FileNotFoundError as e: + logger.error(f"Music setup error: {e}") + logger.warning("Creating empty music directory") + config.music_dir_path.mkdir(parents=True, exist_ok=True) + + # Initialize short creator + logger.info("Initializing short creator...") + short_creator = ShortCreator( + config=config, + tts_client=tts_client, + whisper_client=whisper_client, + pexels_client=pexels_client, + music_manager=music_manager + ) + + # Set the global short creator in the router + set_short_creator(short_creator) + + # Store in app state for access from other modules if needed + app.state.video_creator = short_creator + + # Register routes + app.include_router(router, prefix=MODULE_PREFIX, tags=["Video Creator"]) + + logger.info("video_creator module registered successfully!") diff --git a/modules/video_creator/__pycache__/__init__.cpython-313.pyc b/modules/video_creator/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5daa2e0eff8c4d9d40a4648cf83a795a84bb8b44 Binary files /dev/null and b/modules/video_creator/__pycache__/__init__.cpython-313.pyc differ diff --git a/modules/video_creator/__pycache__/router.cpython-313.pyc b/modules/video_creator/__pycache__/router.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1b4209a3fce6d022a5ba34984c82d32c16b35e35 Binary files /dev/null and b/modules/video_creator/__pycache__/router.cpython-313.pyc differ diff --git a/modules/video_creator/router.py b/modules/video_creator/router.py new file mode 100644 index 0000000000000000000000000000000000000000..94c7747dfeba18a2380c768ad6eb1314a37385cf --- /dev/null +++ b/modules/video_creator/router.py @@ -0,0 +1,130 @@ +""" +Video Creator Router - API Endpoints +""" +from fastapi import APIRouter, HTTPException +from fastapi.responses import FileResponse +import logging + +from .schemas import ( + CreateVideoRequest, + CreateVideoResponse, + VideoStatusResponse, + VideoListResponse, + VideoListItem +) +from .services.short_creator import ShortCreator + +logger = logging.getLogger(__name__) + +# This will be set when the module registers +short_creator: ShortCreator = None + + +def set_short_creator(creator: ShortCreator): + """Set the global short creator instance""" + global short_creator + short_creator = creator + + +router = APIRouter() + + +@router.post("/short-video", + response_model=CreateVideoResponse, + status_code=201, + summary="Create a new video", + description="Create a new short video from text scenes. Returns a video ID to track progress." +) +async def create_short_video(request: CreateVideoRequest): + """Create a new short video""" + try: + logger.info(f"Creating short video with {len(request.scenes)} scenes") + + video_id = short_creator.add_to_queue( + request.scenes, + request.config + ) + + return CreateVideoResponse(videoId=video_id) + + except Exception as e: + logger.error(f"Error creating video: {e}", exc_info=True) + raise HTTPException(status_code=400, detail=str(e)) + + +@router.get("/short-video/{video_id}/status", + response_model=VideoStatusResponse, + summary="Get video status", + description="Check the processing status of a video (processing, ready, or failed)" +) +async def get_video_status(video_id: str): + """Get the status of a video""" + status = short_creator.get_status(video_id) + return VideoStatusResponse(status=status) + + +@router.get("/short-video/{video_id}", + summary="Download video", + description="Download the generated video file (MP4 format)", + responses={ + 200: {"description": "Video file", "content": {"video/mp4": {}}}, + 404: {"description": "Video not found"} + } +) +async def get_video(video_id: str): + """Download/stream a video""" + video_path = short_creator.get_video_path(video_id) + + if not video_path.exists(): + raise HTTPException(status_code=404, detail="Video not found") + + return FileResponse( + video_path, + media_type="video/mp4", + filename=f"{video_id}.mp4" + ) + + +@router.get("/short-videos", + response_model=VideoListResponse, + summary="List all videos", + description="Get a list of all videos with their current status" +) +async def list_videos(): + """List all videos""" + videos = short_creator.list_all_videos() + return VideoListResponse( + videos=[VideoListItem(**v) for v in videos] + ) + + +@router.delete("/short-video/{video_id}", + summary="Delete video", + description="Delete a video by its ID" +) +async def delete_video(video_id: str): + """Delete a video""" + try: + short_creator.delete_video(video_id) + return {"success": True} + except Exception as e: + logger.error(f"Error deleting video: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +@router.get("/voices", + summary="List TTS voices", + description="Get all available text-to-speech voice options" +) +async def get_voices(): + """List available TTS voices""" + return short_creator.get_available_voices() + + +@router.get("/music-tags", + summary="List music moods", + description="Get all available background music mood options" +) +async def get_music_tags(): + """List available music moods""" + return short_creator.get_available_music_tags() diff --git a/modules/video_creator/schemas.py b/modules/video_creator/schemas.py new file mode 100644 index 0000000000000000000000000000000000000000..45f58f8375990bd50577e3cd48af1548cc410b13 --- /dev/null +++ b/modules/video_creator/schemas.py @@ -0,0 +1,143 @@ +from pydantic import BaseModel, Field +from typing import List, Optional, Literal +from enum import Enum + + +class VoiceEnum(str, Enum): + """Available TTS voices""" + af_heart = "af_heart" + af_alloy = "af_alloy" + af_aoede = "af_aoede" + af_bella = "af_bella" + af_jessica = "af_jessica" + af_kore = "af_kore" + af_nicole = "af_nicole" + af_nova = "af_nova" + af_river = "af_river" + af_sarah = "af_sarah" + af_sky = "af_sky" + am_adam = "am_adam" + am_echo = "am_echo" + am_eric = "am_eric" + am_fenrir = "am_fenrir" + am_liam = "am_liam" + am_michael = "am_michael" + am_onyx = "am_onyx" + am_puck = "am_puck" + am_santa = "am_santa" + bf_emma = "bf_emma" + bf_isabella = "bf_isabella" + bm_george = "bm_george" + bm_lewis = "bm_lewis" + bf_alice = "bf_alice" + bf_lily = "bf_lily" + bm_daniel = "bm_daniel" + bm_fable = "bm_fable" + + +class MusicMoodEnum(str, Enum): + """Available music moods""" + sad = "sad" + melancholic = "melancholic" + happy = "happy" + euphoric = "euphoric/high" + excited = "excited" + chill = "chill" + uneasy = "uneasy" + angry = "angry" + dark = "dark" + hopeful = "hopeful" + contemplative = "contemplative" + funny = "funny/quirky" + + +class OrientationEnum(str, Enum): + """Video orientation""" + portrait = "portrait" + landscape = "landscape" + + +class CaptionPositionEnum(str, Enum): + """Caption position on video""" + top = "top" + center = "center" + bottom = "bottom" + + +class MusicVolumeEnum(str, Enum): + """Music volume level""" + low = "low" + medium = "medium" + high = "high" + muted = "muted" + + +class VideoStatus(str, Enum): + """Video processing status""" + processing = "processing" + ready = "ready" + failed = "failed" + + +class SceneInput(BaseModel): + """Input for a single scene in the video""" + text: str = Field(..., description="Text to be narrated in this scene") + searchTerms: List[str] = Field(..., description="Keywords for finding background video", alias="searchTerms") + + class Config: + populate_by_name = True + + +class RenderConfig(BaseModel): + """Configuration for video rendering""" + paddingBack: Optional[int] = Field(0, description="End screen duration in milliseconds") + music: Optional[MusicMoodEnum] = Field(None, description="Background music mood") + captionPosition: CaptionPositionEnum = Field(CaptionPositionEnum.bottom, description="Caption position") + captionBackgroundColor: str = Field("blue", description="Caption background color") + voice: VoiceEnum = Field(VoiceEnum.af_heart, description="TTS voice") + orientation: OrientationEnum = Field(OrientationEnum.portrait, description="Video orientation") + musicVolume: MusicVolumeEnum = Field(MusicVolumeEnum.high, description="Background music volume") + + class Config: + populate_by_name = True + + +class CreateVideoRequest(BaseModel): + """Request to create a short video""" + scenes: List[SceneInput] = Field(..., min_length=1, description="List of scenes for the video") + config: Optional[RenderConfig] = Field(default_factory=RenderConfig, description="Render configuration") + + +class CreateVideoResponse(BaseModel): + """Response after creating a video""" + videoId: str = Field(..., description="Unique ID for the created video") + + +class VideoStatusResponse(BaseModel): + """Response for video status check""" + status: VideoStatus = Field(..., description="Current status of the video") + + +class VideoListItem(BaseModel): + """Single video in the list""" + id: str + status: VideoStatus + + +class VideoListResponse(BaseModel): + """Response for listing all videos""" + videos: List[VideoListItem] + + +class Caption(BaseModel): + """Caption with timing information""" + text: str + startMs: int + endMs: int + + +class Scene(BaseModel): + """Processed scene with all media""" + captions: List[Caption] + video: str # Path to video file + audio: dict # Audio info with 'url' and 'duration' diff --git a/modules/video_creator/services/__init__.py b/modules/video_creator/services/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/modules/video_creator/services/__pycache__/__init__.cpython-313.pyc b/modules/video_creator/services/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5aca70f9fad19762b2f4ea60eee95a78f9317347 Binary files /dev/null and b/modules/video_creator/services/__pycache__/__init__.cpython-313.pyc differ diff --git a/modules/video_creator/services/__pycache__/short_creator.cpython-313.pyc b/modules/video_creator/services/__pycache__/short_creator.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3a676c191c1b9bd94ea42f32f22b14bec5d87212 Binary files /dev/null and b/modules/video_creator/services/__pycache__/short_creator.cpython-313.pyc differ diff --git a/modules/video_creator/services/libraries/__init__.py b/modules/video_creator/services/libraries/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/modules/video_creator/services/libraries/__pycache__/__init__.cpython-313.pyc b/modules/video_creator/services/libraries/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d075a1826c8444303c2135a3f2869dcc5b79379b Binary files /dev/null and b/modules/video_creator/services/libraries/__pycache__/__init__.cpython-313.pyc differ diff --git a/modules/video_creator/services/libraries/__pycache__/tts_client.cpython-313.pyc b/modules/video_creator/services/libraries/__pycache__/tts_client.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c3358153b4b61be9966bf66571a2bf5ef51025b0 Binary files /dev/null and b/modules/video_creator/services/libraries/__pycache__/tts_client.cpython-313.pyc differ diff --git a/modules/video_creator/services/libraries/__pycache__/whisper_client.cpython-313.pyc b/modules/video_creator/services/libraries/__pycache__/whisper_client.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6d434a263d749c6a5f4240b9a582aef2e9352dd9 Binary files /dev/null and b/modules/video_creator/services/libraries/__pycache__/whisper_client.cpython-313.pyc differ diff --git a/modules/video_creator/services/libraries/ffmpeg_utils.py b/modules/video_creator/services/libraries/ffmpeg_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..680a588680d82907c6d14c81db0153b51579e062 --- /dev/null +++ b/modules/video_creator/services/libraries/ffmpeg_utils.py @@ -0,0 +1,191 @@ +import subprocess +import logging +from pathlib import Path + +logger = logging.getLogger(__name__) + + +class FFmpegUtils: + """Utilities for audio and video processing with FFmpeg""" + + @staticmethod + def save_audio_as_wav(audio_data: bytes, output_path: Path): + """ + Save audio data as WAV file (normalized for Whisper) + + Args: + audio_data: Raw audio bytes (WAV format from TTS) + output_path: Where to save the normalized WAV + """ + logger.debug(f"Saving normalized WAV to {output_path}") + + # Write input data to temp file + temp_input = output_path.parent / f"temp_{output_path.name}" + temp_input.write_bytes(audio_data) + + try: + # Normalize audio for Whisper (16kHz, mono, 16-bit PCM) + subprocess.run([ + "ffmpeg", + "-i", str(temp_input), + "-ar", "16000", # 16kHz sample rate + "-ac", "1", # Mono + "-sample_fmt", "s16", # 16-bit PCM + "-y", # Overwrite + str(output_path) + ], check=True, capture_output=True) + + logger.debug(f"Saved normalized WAV: {output_path}") + finally: + # Clean up temp file + if temp_input.exists(): + temp_input.unlink() + + @staticmethod + def save_audio_as_mp3(audio_data: bytes, output_path: Path): + """ + Convert audio data to MP3 + + Args: + audio_data: Raw audio bytes (WAV format from TTS) + output_path: Where to save the MP3 + """ + logger.debug(f"Converting to MP3: {output_path}") + + # Write input data to temp file + temp_input = output_path.parent / f"temp_{output_path.name}.wav" + temp_input.write_bytes(audio_data) + + try: + # Convert to MP3 + subprocess.run([ + "ffmpeg", + "-i", str(temp_input), + "-codec:a", "libmp3lame", + "-qscale:a", "2", # High quality + "-y", # Overwrite + str(output_path) + ], check=True, capture_output=True) + + logger.debug(f"Saved MP3: {output_path}") + finally: + if temp_input.exists(): + temp_input.unlink() + + @staticmethod + def get_video_duration(file_path: Path) -> float: + """ + Get duration of video file in seconds using ffprobe + + Args: + file_path: Path to video file + + Returns: + Duration in seconds + """ + try: + cmd = [ + "ffprobe", + "-v", "error", + "-show_entries", "format=duration", + "-of", "default=noprint_wrappers=1:nokey=1", + str(file_path) + ] + result = subprocess.run(cmd, capture_output=True, text=True, check=True) + return float(result.stdout.strip()) + except Exception as e: + logger.error(f"Failed to get video duration for {file_path}: {e}") + return 0.0 + + @staticmethod + def normalize_video(input_path: Path, output_path: Path): + """ + Normalize video to standard format (H.264, 30fps, AAC) to fix seeking/black screen issues. + + Args: + input_path: Path to source video + output_path: Path to save normalized video + """ + logger.debug(f"Normalizing video: {input_path} -> {output_path}") + + try: + cmd = [ + "ffmpeg", + "-i", str(input_path), + "-c:v", "libx264", + "-preset", "fast", + "-r", "30", + "-c:a", "aac", + "-pix_fmt", "yuv420p", + "-y", + str(output_path) + ] + + subprocess.run(cmd, check=True, capture_output=True) + logger.debug(f"Normalized video saved to {output_path}") + + except subprocess.CalledProcessError as e: + logger.error(f"Failed to normalize video {input_path}: {e.stderr.decode()}") + raise e + except Exception as e: + logger.error(f"Error normalizing video {input_path}: {e}") + raise e + + @staticmethod + def cut_video(input_path: Path, output_path: Path, start_time: float, duration: float): + """ + Cut a segment from a video file using FFmpeg. + + Args: + input_path: Source video + output_path: Destination for the segment + start_time: Start time in seconds + duration: Duration of the segment in seconds + """ + try: + cmd = [ + "ffmpeg", + "-ss", str(start_time), + "-i", str(input_path), + "-t", str(duration), + "-c:v", "libx264", + "-preset", "fast", + "-c:a", "aac", + "-y", + str(output_path) + ] + + subprocess.run(cmd, check=True, capture_output=True) + + except subprocess.CalledProcessError as e: + logger.error(f"Failed to cut video {input_path}: {e.stderr.decode()}") + raise e + + @staticmethod + def image_to_video(input_path: Path, output_path: Path, duration: float): + """ + Convert image to video of specific duration + + Args: + input_path: Path to source image (jpg, png, etc.) + output_path: Path to save the output video + duration: Duration of the video in seconds + """ + try: + cmd = [ + "ffmpeg", + "-loop", "1", + "-i", str(input_path), + "-t", str(duration), + "-c:v", "libx264", + "-pix_fmt", "yuv420p", + "-vf", "scale=1080:1920:force_original_aspect_ratio=decrease,pad=1080:1920:(ow-iw)/2:(oh-ih)/2", + "-r", "30", + "-y", + str(output_path) + ] + subprocess.run(cmd, check=True, capture_output=True) + logger.debug(f"Created video from image: {output_path}") + except subprocess.CalledProcessError as e: + logger.error(f"Failed to convert image to video: {e.stderr.decode()}") + raise e diff --git a/modules/video_creator/services/libraries/pexels_client.py b/modules/video_creator/services/libraries/pexels_client.py new file mode 100644 index 0000000000000000000000000000000000000000..2349a059b968acc33382fbc24daf2dad9bd001f2 --- /dev/null +++ b/modules/video_creator/services/libraries/pexels_client.py @@ -0,0 +1,223 @@ +import requests +import logging +from typing import List, Optional +from pathlib import Path +import random + +logger = logging.getLogger(__name__) + + +class PexelsClient: + """Client for Pexels API to fetch background videos""" + + def __init__(self, api_key: str): + """ + Initialize Pexels client + + Args: + api_key: Pexels API key + """ + self.api_key = api_key + self.base_url = "https://api.pexels.com/videos" + self.headers = {"Authorization": api_key} + self.joker_terms = ["nature", "globe", "space", "ocean"] + + def find_video( + self, + search_terms: List[str], + duration: float, + exclude_ids: Optional[List[int]] = None, + orientation: str = "portrait" + ) -> dict: + """ + Find a suitable video from Pexels + + Args: + search_terms: Keywords to search for + duration: Required video duration in seconds + exclude_ids: List of video IDs to exclude + orientation: 'portrait' or 'landscape' + + Returns: + Dict with 'id' and 'url' of the selected video + """ + exclude_ids = exclude_ids or [] + + # Try user-provided search terms first + for term in search_terms: + video = self._search_and_select(term, duration, exclude_ids, orientation) + if video: + return video + + # Fall back to joker terms + logger.info(f"No videos found for {search_terms}, using joker terms") + for term in self.joker_terms: + video = self._search_and_select(term, duration, exclude_ids, orientation) + if video: + return video + + raise Exception("No suitable videos found on Pexels") + + def _search_and_select( + self, + query: str, + min_duration: float, + exclude_ids: List[int], + orientation: str + ) -> Optional[dict]: + """Search for videos and select a suitable one""" + try: + logger.debug(f"Searching Pexels for: {query} ({orientation})") + + response = requests.get( + f"{self.base_url}/search", + headers=self.headers, + params={ + "query": query, + "orientation": orientation, + "per_page": 15, + "size": "medium" # Good balance of quality and file size + }, + timeout=10 + ) + + if response.status_code != 200: + logger.warning(f"Pexels API error: {response.status_code}") + return None + + data = response.json() + videos = data.get("videos", []) + + if not videos: + logger.debug(f"No videos found for query: {query}") + return None + + # Filter suitable videos + suitable_videos = [] + for video in videos: + if video["id"] in exclude_ids: + continue + + # Get video file URL (HD or SD) + video_files = video.get("video_files", []) + if not video_files: + continue + + # Sort by quality and find a good match + video_files = sorted( + video_files, + key=lambda x: x.get("width", 0) * x.get("height", 0), + reverse=True + ) + + # Find appropriate quality based on orientation + target_width = 1080 if orientation == "portrait" else 1920 + target_height = 1920 if orientation == "portrait" else 1080 + + selected_file = None + for vf in video_files: + # Look for files close to our target resolution + if vf.get("width") and vf.get("height"): + if (abs(vf["width"] - target_width) < 300 and + abs(vf["height"] - target_height) < 300): + selected_file = vf + break + + # Fallback to highest quality if no exact match + if not selected_file and video_files: + selected_file = video_files[0] + + if selected_file and selected_file.get("link"): + suitable_videos.append({ + "id": video["id"], + "url": selected_file["link"], + "duration": video.get("duration", 0) + }) + + if not suitable_videos: + return None + + # Filter by duration if possible + # Try to find videos that are at least 50% of the requested duration + # to avoid stitching too many tiny clips + duration_threshold = min(min_duration * 0.5, 15) # Cap at 15s requirement + long_enough_videos = [v for v in suitable_videos if v["duration"] >= duration_threshold] + + if long_enough_videos: + selected = random.choice(long_enough_videos) + logger.info(f"Selected Pexels video ID {selected['id']} (duration: {selected['duration']}s) for query '{query}'") + return selected + + # Fallback to any suitable video + selected = random.choice(suitable_videos) + logger.info(f"Selected Pexels video ID {selected['id']} (duration: {selected['duration']}s) for query '{query}' (fallback)") + return selected + + except Exception as e: + logger.error(f"Error searching Pexels: {e}") + return None + + def find_photo( + self, + query: str, + orientation: str = "portrait" + ) -> Optional[dict]: + """ + Find a suitable photo from Pexels + + Args: + query: Search term + orientation: 'portrait' or 'landscape' + + Returns: + Dict with 'id' and 'url' of the photo + """ + try: + logger.debug(f"Searching Pexels for photo: {query} ({orientation})") + + # Pexels Photo API endpoint + url = "https://api.pexels.com/v1/search" + + response = requests.get( + url, + headers=self.headers, + params={ + "query": query, + "orientation": orientation, + "per_page": 15, + "size": "large" + }, + timeout=10 + ) + + if response.status_code != 200: + logger.warning(f"Pexels Photo API error: {response.status_code}") + return None + + data = response.json() + photos = data.get("photos", []) + + if not photos: + logger.debug(f"No photos found for query: {query}") + return None + + # Select a random photo + photo = random.choice(photos) + + # Get URL (prefer original or large2x) + src = photo.get("src", {}) + url = src.get("original") or src.get("large2x") or src.get("large") + + if not url: + return None + + logger.info(f"Selected Pexels photo ID {photo['id']} for query '{query}'") + return { + "id": photo["id"], + "url": url, + "type": "photo" + } + + except Exception as e: + logger.error(f"Error searching Pexels photos: {e}") + return None diff --git a/modules/video_creator/services/libraries/tts_client.py b/modules/video_creator/services/libraries/tts_client.py new file mode 100644 index 0000000000000000000000000000000000000000..a04b71de6183466d680972571c31a85d32af7b3a --- /dev/null +++ b/modules/video_creator/services/libraries/tts_client.py @@ -0,0 +1,106 @@ +import aiohttp +import struct +import logging +from typing import Tuple + +logger = logging.getLogger(__name__) + + +class TTSClient: + """Client for Kokoro TTS via Hugging Face Cloud API""" + + def __init__(self, api_url: str): + """ + Initialize TTS client + + Args: + api_url: Base URL for the TTS API (HF_TTS environment variable) + """ + self.api_url = api_url.rstrip('/') + logger.info(f"Using cloud TTS API at {self.api_url}") + + async def generate(self, text: str, voice: str) -> Tuple[bytes, float]: + """ + Generate speech from text + + Args: + text: Text to convert to speech + voice: Voice identifier (e.g., 'af_heart', 'am_adam') + + Returns: + Tuple of (audio_bytes, duration_seconds) + """ + endpoint = f"{self.api_url}/v1/audio/speech" + + logger.debug(f"Generating audio with voice={voice}, text_length={len(text)}") + + async with aiohttp.ClientSession() as session: + async with session.post( + endpoint, + json={ + "model": "kokoro", + "input": text, + "voice": voice + }, + headers={"Content-Type": "application/json"}, + timeout=aiohttp.ClientTimeout(total=30) + ) as response: + if response.status != 200: + error_text = await response.text() + raise Exception(f"TTS API error ({response.status}): {error_text}") + + audio_data = await response.read() + duration = self._estimate_audio_duration(audio_data) + + logger.debug(f"Generated audio: {len(audio_data)} bytes, {duration:.2f}s") + return audio_data, duration + + def _estimate_audio_duration(self, audio_buffer: bytes) -> float: + """ + Estimate audio duration from WAV buffer + + WAV format: 44 byte header, then PCM data + """ + if len(audio_buffer) < 44: + # Fallback estimation + return (len(audio_buffer) - 44) / (2 * 24000) + + # Check if it's a valid WAV file (starts with 'RIFF') + if audio_buffer[:4] != b'RIFF': + # Fallback estimation + return (len(audio_buffer) - 44) / (2 * 24000) + + try: + # Parse WAV header + # Data size at bytes 40-43 + data_size = struct.unpack(' list: + """Return list of available TTS voices""" + return [ + "af_heart", "af_alloy", "af_aoede", "af_bella", "af_jessica", + "af_kore", "af_nicole", "af_nova", "af_river", "af_sarah", "af_sky", + "am_adam", "am_echo", "am_eric", "am_fenrir", "am_liam", + "am_michael", "am_onyx", "am_puck", "am_santa", + "bf_emma", "bf_isabella", "bm_george", "bm_lewis", + "bf_alice", "bf_lily", "bm_daniel", "bm_fable" + ] diff --git a/modules/video_creator/services/libraries/video_composer.py b/modules/video_creator/services/libraries/video_composer.py new file mode 100644 index 0000000000000000000000000000000000000000..f869fa57c344965aeff07cd89bda96a11aaad83e --- /dev/null +++ b/modules/video_creator/services/libraries/video_composer.py @@ -0,0 +1,360 @@ +import logging +from pathlib import Path +from typing import List, Dict +from moviepy.editor import ( + VideoFileClip, + AudioFileClip, + CompositeVideoClip, + CompositeAudioClip, + TextClip, + concatenate_videoclips, + concatenate_audioclips +) +from models.schemas import Caption, Scene, CaptionPositionEnum, MusicVolumeEnum +from video_creator.libraries.ffmpeg_utils import FFmpegUtils + +logger = logging.getLogger(__name__) + + +class VideoComposer: + """Video composition using MoviePy (replaces Remotion)""" + + @staticmethod + def render( + scenes: List[Dict], + music_path: str, + output_path: Path, + orientation: str = "portrait", + caption_position: str = "bottom", + caption_bg_color: str = "blue", + music_volume: str = "high", + padding_back: int = 0 + ): + """ + Render final video with scenes, captions, and music + + Args: + scenes: List of scene dicts with 'video', 'audio', 'captions' + music_path: Path to background music file + output_path: Where to save the final video + orientation: 'portrait' or 'landscape' + caption_position: 'top', 'center', or 'bottom' + caption_bg_color: Background color for captions + music_volume: 'low', 'medium', 'high', or 'muted' + padding_back: Additional padding at end in milliseconds + """ + logger.info(f"Rendering video with {len(scenes)} scenes") + + # Set dimensions based on orientation + if orientation == "portrait": + width, height = 1080, 1920 + else: + width, height = 1920, 1080 + + # Process each scene + video_clips = [] + total_duration = 0 + + for i, scene in enumerate(scenes): + logger.debug(f"Processing scene {i + 1}/{len(scenes)}") + # Load narration audio + audio_clip = AudioFileClip(scene["audio"]["url"]) + scene_duration = scene["audio"]["duration"] + + # Load video clip(s) + video_input = scene["video"] + if isinstance(video_input, list): + # Concatenate multiple clips + clips = [] + for item in video_input: + try: + # Handle both string paths (legacy) and dicts (new smart segmentation) + if isinstance(item, dict): + path = item["path"] + target_duration = item["duration"] + start_time = item.get("start_time", 0) + else: + path = item + target_duration = None + start_time = 0 + + # FIX 1: Load video WITHOUT audio to prevent stream conflicts + clip = VideoFileClip(path, audio=False) + + # Verify actual duration using ffprobe + actual_duration = FFmpegUtils.get_video_duration(Path(path)) + + if actual_duration > 0: + # If we have a start_time, we are taking a subclip + if start_time > 0: + # Ensure we don't go past the end + end_time = min(start_time + target_duration, actual_duration) + # If the segment is completely out of bounds (shouldn't happen with good logic), fix it + if start_time >= actual_duration: + start_time = 0 + end_time = min(target_duration, actual_duration) + + clip = clip.subclip(start_time, end_time) + + # If the subclip is shorter than target (because we hit end of file), + # we might need to loop or extend? + # The calling logic should ensure 'start_time + target_duration <= actual_duration' + # if possible. If not, we loop the result. + if clip.duration < target_duration: + clip = clip.loop(duration=target_duration) + else: + clip = clip.set_duration(target_duration) + + # Standard logic (start from 0) + elif target_duration and actual_duration < target_duration: + clip = clip.loop(duration=target_duration) + elif target_duration: + clip = clip.set_duration(target_duration) + elif abs(clip.duration - actual_duration) > 0.5: + clip = clip.set_duration(actual_duration) + + # Resize to target dimensions immediately + clip = VideoComposer._resize_and_crop(clip, width, height) + clips.append(clip) + except Exception as e: + logger.warning(f"Failed to load video clip {item}: {e}") + + if not clips: + raise Exception("No valid video clips found for scene") + + # FIX 2: Use method="chain" for better stability with identically sized clips + video_clip = concatenate_videoclips(clips, method="chain") + else: + # FIX 1 (Repeated): Load without audio + video_clip = VideoFileClip(video_input, audio=False) + # Verify actual duration using ffprobe + actual_duration = FFmpegUtils.get_video_duration(Path(video_input)) + if actual_duration > 0: + if abs(video_clip.duration - actual_duration) > 0.5: + video_clip = video_clip.set_duration(actual_duration) + + video_clip = VideoComposer._resize_and_crop(video_clip, width, height) + + # Set duration to match audio + # Loop video if it's shorter than audio to prevent black screen + if video_clip.duration < scene_duration: + # If gap is small (< 0.5s), freeze the last frame to fill it. + # This prevents black frames/flicker at the end of scene. + gap = scene_duration - video_clip.duration + if gap < 0.5: + logger.debug(f"Filling small gap of {gap:.3f}s by freezing last frame") + # Create a freeze frame of the last instant + last_frame = video_clip.to_ImageClip(t=video_clip.duration - 0.01).set_duration(gap) + video_clip = concatenate_videoclips([video_clip, last_frame], method="chain") + else: + # Gap is large, loop the video + video_clip = video_clip.loop(duration=scene_duration) + else: + # Video is longer, just trim it + video_clip = video_clip.set_duration(scene_duration) + + video_clip = video_clip.set_audio(audio_clip) + + # Add captions + if scene.get("captions"): + # Fix for "bad blue color": default to transparent if blue is passed + # or if the user wants the old default. + # Ideally, we use a semi-transparent box, but MoviePy TextClip + # background support is limited. Transparent with stroke is safer. + if caption_bg_color == "blue": + caption_bg_color = "transparent" # Explicit string instead of None + + video_clip = VideoComposer._add_captions( + video_clip, + scene["captions"], + width, + height, + caption_position, + caption_bg_color, + total_duration + ) + + video_clips.append(video_clip) + total_duration += scene_duration + + # Add padding if specified + if padding_back > 0: + padding_seconds = padding_back / 1000 + total_duration += padding_seconds + # Extend last clip + if video_clips: + last_clip = video_clips[-1] + # Loop the last clip for padding too + video_clips[-1] = last_clip.loop(duration=last_clip.duration + padding_seconds) + + # Concatenate all scenes + logger.debug("Concatenating video clips") + # Use chain here too + final_video = concatenate_videoclips(video_clips, method="chain") + + # Add background music + if music_path and music_volume != "muted": + logger.debug("Adding background music") + final_video = VideoComposer._add_background_music( + final_video, + music_path, + music_volume + ) + + # Write final video + logger.info(f"Writing video to {output_path}") + final_video.write_videofile( + str(output_path), + codec="libx264", + audio_codec="aac", + fps=30, + preset="medium", + threads=2, + logger=None # Suppress moviepy progress bar + ) + + # Cleanup + final_video.close() + for clip in video_clips: + clip.close() + + logger.info(f"Video rendered successfully: {output_path}") + + @staticmethod + def _resize_and_crop(clip: VideoFileClip, target_width: int, target_height: int) -> VideoFileClip: + """Resize and crop video to match target dimensions""" + clip_width, clip_height = clip.size + clip_aspect = clip_width / clip_height + target_aspect = target_width / target_height + + if clip_aspect > target_aspect: + # Clip is wider, crop width + new_height = target_height + new_width = int(target_height * clip_aspect) + resized = clip.resize(height=new_height) + x_center = new_width / 2 + x1 = x_center - target_width / 2 + cropped = resized.crop(x1=x1, x2=x1 + target_width) + else: + # Clip is taller, crop height + new_width = target_width + new_height = int(target_width / clip_aspect) + resized = clip.resize(width=new_width) + y_center = new_height / 2 + y1 = y_center - target_height / 2 + cropped = resized.crop(y1=y1, y2=y1 + target_height) + + return cropped + + @staticmethod + def _add_captions( + video_clip: VideoFileClip, + captions: List[Dict], + width: int, + height: int, + position: str, + bg_color: str, + offset_seconds: float + ) -> CompositeVideoClip: + """Add captions to video clip""" + caption_clips = [] + + # Determine vertical position + if position == "top": + y_pos = height * 0.15 + elif position == "center": + y_pos = height * 0.5 + else: # bottom + y_pos = height * 0.70 # Changed from 0.85 to 0.70 as requested + + for caption in captions: + start_time = caption["startMs"] / 1000 + end_time = caption["endMs"] / 1000 + duration = end_time - start_time + + if duration <= 0: + continue + + # Create text clip + # Use transparent background by default if None + # Add strong stroke for visibility + final_bg_color = bg_color if bg_color else "transparent" + + try: + # Try caption method with fixed height to avoid NoneType error + # Allocating 20% of height for caption box + + # Use TheBoldFont.ttf + font_path = Path(__file__).parent.parent.parent / "static" / "fonts" / "TheBoldFont.ttf" + font_name = str(font_path) if font_path.exists() else "Liberation-Sans-Bold" + + txt_clip = TextClip( + caption["text"], + fontsize=70, + color="white", + font=font_name, + stroke_color="black", + stroke_width=1.5, + bg_color=final_bg_color, + method="caption", + size=(int(width * 0.9), int(height * 0.2)), + align="center" + ) + except Exception as e: + logger.warning(f"TextClip caption method failed: {e}. Falling back to label method.") + # Fallback to label method (no wrapping, but works) + txt_clip = TextClip( + caption["text"], + fontsize=60, + color="white", + font=font_name, + stroke_color="black", + stroke_width=2, + bg_color=final_bg_color, + method="label" + ) + + txt_clip = txt_clip.set_duration(duration) + txt_clip = txt_clip.set_start(start_time) + txt_clip = txt_clip.set_position(("center", y_pos)) + + caption_clips.append(txt_clip) + + if caption_clips: + return CompositeVideoClip([video_clip] + caption_clips) + return video_clip + + @staticmethod + def _add_background_music( + video_clip: VideoFileClip, + music_path: str, + volume_level: str + ) -> VideoFileClip: + """Add background music to video""" + # Load music + music = AudioFileClip(music_path) + + # Loop music to match video duration + if music.duration < video_clip.duration: + loops_needed = int(video_clip.duration / music.duration) + 1 + music = concatenate_audioclips([music] * loops_needed) + + # Trim to video duration + music = music.subclip(0, video_clip.duration) + + # Set volume based on level + volume_multipliers = { + "low": 0.2, + "medium": 0.4, + "high": 0.6, + "muted": 0.0 + } + volume = volume_multipliers.get(volume_level, 0.6) + music = music.volumex(volume) + + # Mix with narration audio + if video_clip.audio: + final_audio = CompositeAudioClip([video_clip.audio, music]) + return video_clip.set_audio(final_audio) + + return video_clip.set_audio(music) diff --git a/modules/video_creator/services/libraries/whisper_client.py b/modules/video_creator/services/libraries/whisper_client.py new file mode 100644 index 0000000000000000000000000000000000000000..f5e5470b9a8c741f6f91221916d46caf2c0777e5 --- /dev/null +++ b/modules/video_creator/services/libraries/whisper_client.py @@ -0,0 +1,81 @@ +import logging +from pathlib import Path +from faster_whisper import WhisperModel +from typing import List +from models.schemas import Caption + +logger = logging.getLogger(__name__) + + +class WhisperClient: + """Client for faster-whisper caption generation""" + + def __init__(self, model_name: str = "tiny.en", model_dir: Path = None): + """ + Initialize Whisper client + + Args: + model_name: Whisper model to use (tiny.en, base.en, medium.en, etc.) + model_dir: Directory to store/load models + """ + self.model_name = model_name + self.model_dir = str(model_dir) if model_dir else None + + logger.info(f"Loading Whisper model: {model_name}") + + # Use CPU with int8 quantization for efficiency + self.model = WhisperModel( + model_name, + device="cpu", + compute_type="int8", + download_root=self.model_dir + ) + + logger.info("Whisper model loaded successfully") + + def create_captions(self, audio_path: str) -> List[Caption]: + """ + Generate captions from audio file + + Args: + audio_path: Path to audio file (WAV format preferred) + + Returns: + List of Caption objects with text and timing + """ + logger.debug(f"Transcribing audio: {audio_path}") + + # Transcribe with word-level timestamps + segments, info = self.model.transcribe( + audio_path, + word_timestamps=True, + vad_filter=True, # Voice activity detection to filter silence + vad_parameters=dict(min_silence_duration_ms=500) + ) + + captions: List[Caption] = [] + + for segment in segments: + if not segment.words: + continue + + for word in segment.words: + # Skip special tokens + if word.word.startswith('[') or word.word.strip() == '': + continue + + # Merge with previous caption if no space and previous doesn't end with space + if (captions and + not word.word.startswith(' ') and + not captions[-1].text.endswith(' ')): + captions[-1].text += word.word.strip() + captions[-1].endMs = int(word.end * 1000) + else: + captions.append(Caption( + text=word.word.strip(), + startMs=int(word.start * 1000), + endMs=int(word.end * 1000) + )) + + logger.debug(f"Generated {len(captions)} captions from {audio_path}") + return captions diff --git a/modules/video_creator/services/music_manager.py b/modules/video_creator/services/music_manager.py new file mode 100644 index 0000000000000000000000000000000000000000..dc367ed7b376910d73fed68be4f1f638d3c167c9 --- /dev/null +++ b/modules/video_creator/services/music_manager.py @@ -0,0 +1,130 @@ +import json +import logging +from pathlib import Path +from typing import List, Optional, Dict +import random + +logger = logging.getLogger(__name__) + + +class MusicManager: + """Manages background music files and metadata""" + + def __init__(self, music_dir: Path): + """ + Initialize music manager + + Args: + music_dir: Directory containing music files and metadata + """ + self.music_dir = music_dir + self._music_cache: Optional[List[Dict]] = None + + def ensure_music_files_exist(self): + """Verify music files directory exists""" + if not self.music_dir.exists(): + raise FileNotFoundError( + f"Music directory not found: {self.music_dir}. " + "Please copy music files from the original project." + ) + + music_files = list(self.music_dir.glob("*.mp3")) + if not music_files: + raise FileNotFoundError( + f"No MP3 files found in {self.music_dir}. " + "Please copy music files from the original project." + ) + + logger.info(f"Found {len(music_files)} music files") + + def get_music_list(self) -> List[Dict]: + """ + Get list of available music with metadata + + Returns: + List of dicts with 'path', 'mood', 'duration' keys + """ + if self._music_cache is not None: + return self._music_cache + + music_list = [] + + # Check if metadata file exists + metadata_path = self.music_dir / "music_metadata.json" + metadata = {} + + if metadata_path.exists(): + try: + with open(metadata_path, 'r') as f: + metadata = json.load(f) + except Exception as e: + logger.warning(f"Failed to load music metadata: {e}") + + # Scan music files + for music_file in self.music_dir.glob("*.mp3"): + file_name = music_file.name + + # Get mood from metadata or infer from filename + mood = metadata.get(file_name, {}).get("mood") + if not mood: + mood = self._infer_mood_from_filename(file_name) + + music_list.append({ + "path": str(music_file), + "mood": mood, + "filename": file_name + }) + + self._music_cache = music_list + return music_list + + def find_music(self, mood: Optional[str] = None) -> Dict: + """ + Find music file by mood + + Args: + mood: Music mood (sad, happy, chill, etc.) or None for random + + Returns: + Dict with music info + """ + music_list = self.get_music_list() + + if not music_list: + raise ValueError("No music files available") + + # Filter by mood if specified + if mood: + filtered = [m for m in music_list if m["mood"] == mood] + if filtered: + return random.choice(filtered) + logger.warning(f"No music found for mood '{mood}', using random") + + # Return random music + return random.choice(music_list) + + def get_available_moods(self) -> List[str]: + """Get list of available music moods""" + music_list = self.get_music_list() + moods = set(m["mood"] for m in music_list if m["mood"]) + return sorted(list(moods)) + + @staticmethod + def _infer_mood_from_filename(filename: str) -> str: + """Infer mood from filename (fallback if no metadata)""" + filename_lower = filename.lower() + + mood_keywords = { + "sad": ["sad", "melancholy", "emotional"], + "happy": ["happy", "joyful", "upbeat"], + "chill": ["chill", "relax", "calm", "ambient"], + "dark": ["dark", "suspense", "mysterious"], + "excited": ["excited", "energetic", "uplifting"], + "angry": ["angry", "intense", "aggressive"], + } + + for mood, keywords in mood_keywords.items(): + if any(kw in filename_lower for kw in keywords): + return mood + + return "chill" # Default mood diff --git a/modules/video_creator/services/short_creator.py b/modules/video_creator/services/short_creator.py new file mode 100644 index 0000000000000000000000000000000000000000..bdba06a73c14e5fc55c71a87c03ec8bb2a8850b2 --- /dev/null +++ b/modules/video_creator/services/short_creator.py @@ -0,0 +1,444 @@ +import asyncio +import logging +import uuid +from pathlib import Path +from typing import List, Dict, Optional +import aiohttp +import requests + +# Use relative imports for module-local schemas +from ..schemas import ( + SceneInput, RenderConfig, VideoStatus, Scene, Caption +) +from .libraries.tts_client import TTSClient +from .libraries.whisper_client import WhisperClient +from .libraries.pexels_client import PexelsClient +from .libraries.ffmpeg_utils import FFmpegUtils +from .libraries.video_composer import VideoComposer +from .music_manager import MusicManager + +logger = logging.getLogger(__name__) + + +class ShortCreator: + """Main video creation orchestrator""" + + def __init__( + self, + config: Config, + tts_client: TTSClient, + whisper_client: WhisperClient, + pexels_client: PexelsClient, + music_manager: MusicManager + ): + self.config = config + self.tts = tts_client + self.whisper = whisper_client + self.pexels = pexels_client + self.music_manager = music_manager + self.queue: List[Dict] = [] + self.processing = False + + def add_to_queue(self, scenes: List[SceneInput], config: RenderConfig) -> str: + """ + Add video to processing queue + + Returns: + video_id for tracking + """ + video_id = str(uuid.uuid4()).replace('-', '')[:24] # Similar to cuid + + self.queue.append({ + "id": video_id, + "scenes": scenes, + "config": config + }) + + logger.info(f"Added video {video_id} to queue. Queue length: {len(self.queue)}") + + # Start processing if not already running + if not self.processing: + asyncio.create_task(self.process_queue()) + + return video_id + + async def process_queue(self): + """Process videos in the queue""" + if self.processing: + return + + self.processing = True + + try: + while self.queue: + item = self.queue[0] + video_id = item["id"] + + logger.info(f"Processing video {video_id}") + + try: + # Run video creation in a background thread to keep API responsive + # This allows status checks while video is being processed + await asyncio.to_thread( + self._create_short_sync, + video_id, + item["scenes"], + item["config"] + ) + logger.info(f"Successfully created video {video_id}") + except Exception as e: + logger.error(f"Failed to create video {video_id}: {e}", exc_info=True) + # Mark as failed by creating a .failed marker file + failed_marker = self.config.videos_dir_path / f"{video_id}.failed" + failed_marker.write_text(str(e)) + finally: + self.queue.pop(0) + finally: + self.processing = False + + def _create_short_sync( + self, + video_id: str, + input_scenes: List[SceneInput], + config: RenderConfig + ): + """Synchronous wrapper for create_short - runs in a separate thread""" + # Create a new event loop for this thread + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + loop.run_until_complete(self.create_short(video_id, input_scenes, config)) + finally: + loop.close() + + async def create_short( + self, + video_id: str, + input_scenes: List[SceneInput], + config: RenderConfig + ): + """Create the short video""" + scenes = [] + total_duration = 0 + exclude_video_ids = [] + temp_files = [] + + orientation = config.orientation.value + + # Process each scene + for i, scene_input in enumerate(input_scenes): + logger.debug(f"Processing scene {i + 1}/{len(input_scenes)}") + + # Generate TTS audio + audio_data, tts_duration = await self.tts.generate( + scene_input.text, + config.voice.value + ) + + # Save audio files + temp_id = str(uuid.uuid4()).replace('-', '')[:12] + wav_path = self.config.temp_dir_path / f"{temp_id}.wav" + mp3_path = self.config.temp_dir_path / f"{temp_id}.mp3" + video_path = self.config.temp_dir_path / f"{temp_id}.mp4" + + temp_files.extend([wav_path, mp3_path, video_path]) + + # Save and convert audio + FFmpegUtils.save_audio_as_wav(audio_data, wav_path) + FFmpegUtils.save_audio_as_mp3(audio_data, mp3_path) + + # Get ACTUAL audio duration from WAV file (TTS estimate is often wrong!) + audio_duration = FFmpegUtils.get_video_duration(wav_path) + logger.info(f"Scene {i+1}: TTS reported {tts_duration:.2f}s, actual WAV duration: {audio_duration:.2f}s") + + # Add padding to last scene + if i + 1 == len(input_scenes) and config.paddingBack: + audio_duration += config.paddingBack / 1000 + + # Generate captions + captions = self.whisper.create_captions(str(wav_path)) + + # Find and download background video(s) + video_paths = [] + + # Simplified Scene Construction: One Video Per Scene + # User Request: "Remove restrictions. One video per scene equal to audio." + # User Request: "Video must be 9:16. Use image if needed." + + # Force portrait for 9:16 + orientation = "portrait" + + keywords = scene_input.searchTerms + if not keywords: + keywords = ["general"] + + # Handle both string and list inputs for searchTerms + # If it's a string, use it directly; if list, use first item + if isinstance(keywords, str): + keyword = keywords # Use the whole string + elif isinstance(keywords, list) and len(keywords) > 0: + keyword = keywords[0] if isinstance(keywords[0], str) else str(keywords[0]) + else: + keyword = "general" + + logger.debug(f"Using search keyword: '{keyword}' from searchTerms: {keywords}") + + # Try to find a video that is at least as long as the audio + search_duration = max(audio_duration, 5.0) + + video_found = False + video_path = None + temp_vid_id = str(uuid.uuid4()).replace('-', '')[:12] + + try: + # 1. Try Video Search + pexels_video = self.pexels.find_video( + keyword, + search_duration, + exclude_video_ids, + orientation + ) + + video_path = self.config.temp_dir_path / f"{temp_vid_id}.mp4" + temp_files.append(video_path) + + # Download video + logger.debug(f"Downloading video for '{keyword}' (Target: {audio_duration:.2f}s)") + response = requests.get(pexels_video["url"], stream=True, timeout=30) + response.raise_for_status() + + with open(video_path, 'wb') as f: + for chunk in response.iter_content(chunk_size=8192): + f.write(chunk) + + # Verify file size + if video_path.stat().st_size < 1024: + logger.warning(f"Downloaded video {video_path} is too small") + raise Exception("Downloaded video is invalid") + + # Normalize video + norm_path = video_path.with_suffix(".norm.mp4") + FFmpegUtils.normalize_video(video_path, norm_path) + video_path.unlink() + norm_path.rename(video_path) + + video_found = True + exclude_video_ids.append(pexels_video["id"]) + + except Exception as e: + logger.warning(f"Video search/download failed for '{keyword}': {e}. Trying photo fallback.") + video_found = False + + # 2. Photo Fallback + if not video_found: + try: + logger.info(f"Attempting photo fallback for '{keyword}'") + pexels_photo = self.pexels.find_photo(keyword, orientation) + + if pexels_photo: + # Download photo + photo_path = self.config.temp_dir_path / f"{temp_vid_id}.jpg" + temp_files.append(photo_path) + + response = requests.get(pexels_photo["url"], stream=True, timeout=30) + response.raise_for_status() + + with open(photo_path, 'wb') as f: + for chunk in response.iter_content(chunk_size=8192): + f.write(chunk) + + # Convert photo to video + video_path = self.config.temp_dir_path / f"{temp_vid_id}_img.mp4" + temp_files.append(video_path) + + FFmpegUtils.image_to_video(photo_path, video_path, audio_duration) + video_found = True + logger.info(f"Created video from photo {pexels_photo['id']}") + + except Exception as e: + logger.error(f"Photo fallback failed: {e}") + + if not video_found or not video_path or not video_path.exists(): + raise Exception(f"Failed to find any visual content for '{keyword}'") + + # Get actual duration (whether video or image-video) + vid_duration = FFmpegUtils.get_video_duration(video_path) + + # Determine cut duration + take_duration = min(vid_duration, audio_duration) + + logger.info(f"Using {take_duration:.2f}s of content for scene (Audio: {audio_duration:.2f}s)") + + # Physically cut/trim to ensure exact match + final_clip_path = self.config.temp_dir_path / f"{temp_vid_id}_cut.mp4" + temp_files.append(final_clip_path) + + FFmpegUtils.cut_video(video_path, final_clip_path, 0, take_duration) + + # Verify actual cut duration + actual_cut_dur = FFmpegUtils.get_video_duration(final_clip_path) + + video_paths.append({ + "path": str(final_clip_path), + "duration": actual_cut_dur, + "keyword": keyword + }) + + # Build scene dict + scenes.append({ + "captions": [c.dict() for c in captions], + "video": video_paths, + "audio": { + "url": str(mp3_path), + "duration": audio_duration + } + }) + + total_duration += audio_duration + + # Add padding to total duration + if config.paddingBack: + total_duration += config.paddingBack / 1000 + + # Select background music + music_mood = config.music.value if config.music else None + selected_music = self.music_manager.find_music(music_mood) + + logger.info(f"Selected music: {selected_music['filename']} (mood: {selected_music['mood']})") + + # Render final video + output_path = self.config.videos_dir_path / f"{video_id}.mp4" + # Use a temp path for atomic write to prevent premature "ready" status + temp_output_path = self.config.videos_dir_path / f"{video_id}.tmp.mp4" + + try: + VideoComposer.render( + scenes=scenes, + music_path=selected_music["path"], + output_path=temp_output_path, + orientation=orientation, + caption_position=config.captionPosition.value, + caption_bg_color=config.captionBackgroundColor, + music_volume=config.musicVolume.value, + padding_back=config.paddingBack + ) + + # Atomic rename to final path + if temp_output_path.exists(): + temp_output_path.rename(output_path) + logger.info(f"Video {video_id} created successfully at {output_path}") + else: + raise Exception("Rendered file not found at temp path") + + except Exception as e: + # Cleanup temp file on failure + if temp_output_path.exists(): + temp_output_path.unlink() + raise e + + # Cleanup temp files + for temp_file in temp_files: + if temp_file.exists(): + temp_file.unlink() + + def get_status(self, video_id: str) -> VideoStatus: + """Get video processing status""" + # Check if in queue (waiting or being processed) + if any(item["id"] == video_id for item in self.queue): + return VideoStatus.processing + + # Check if final video exists (READY) + video_path = self.config.videos_dir_path / f"{video_id}.mp4" + if video_path.exists(): + return VideoStatus.ready + + # Check if temp file exists (still rendering = PROCESSING) + temp_path = self.config.videos_dir_path / f"{video_id}.tmp.mp4" + if temp_path.exists(): + return VideoStatus.processing + + # Check if failed marker exists + failed_marker = self.config.videos_dir_path / f"{video_id}.failed" + if failed_marker.exists(): + return VideoStatus.failed + + # If processing flag is active but video not found, it might be in early stages + if self.processing: + return VideoStatus.processing + + # Video not found at all + return VideoStatus.failed + + def get_video_path(self, video_id: str) -> Path: + """Get path to video file""" + return self.config.videos_dir_path / f"{video_id}.mp4" + + def delete_video(self, video_id: str): + """Delete video file""" + video_path = self.get_video_path(video_id) + if video_path.exists(): + video_path.unlink() + logger.info(f"Deleted video {video_id}") + + def list_all_videos(self) -> List[Dict]: + """List all videos with their status""" + videos = [] + + # Get all MP4 files (exclude temp files) + for video_file in self.config.videos_dir_path.glob("*.mp4"): + # Skip temp files (*.tmp.mp4) + if ".tmp." in video_file.name: + continue + video_id = video_file.stem + videos.append({ + "id": video_id, + "status": self.get_status(video_id).value + }) + + # Add videos in queue + for item in self.queue: + if not any(v["id"] == item["id"] for v in videos): + videos.append({ + "id": item["id"], + "status": VideoStatus.processing.value + }) + + return videos + + def get_available_voices(self) -> List[str]: + """Get list of available TTS voices""" + return TTSClient.list_available_voices() + + def _plan_segments(self, duration: float) -> List[float]: + """ + Deterministic segmentation algorithm (Even Split Strategy): + - Segments between 2-5 seconds + - Avoid 1-second clips + - Sum exactly equals duration + - Distribute duration evenly to maximize segment length + """ + if duration <= 5.0: + return [duration] + + # Calculate optimal number of segments + # We want segments as close to 5.0 as possible, but >= 2.0 + num_segments = int(duration / 5.0) + if duration % 5.0 > 0: + num_segments += 1 + + segment_duration = duration / num_segments + + # Create list of equal segments + segments = [segment_duration] * num_segments + + # Handle floating point precision errors + current_sum = sum(segments) + diff = duration - current_sum + if abs(diff) > 0.0001: + segments[-1] += diff + + return segments + + def get_available_music_tags(self) -> List[str]: + """Get list of available music moods""" + return self.music_manager.get_available_moods() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..e8cef854bdd45ef4239e3d83d831e802a0f51bb0 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,23 @@ +# Required Python packages for NCAkit + +# Core Framework +fastapi +uvicorn[standard] +pydantic +pydantic-settings + +# HTTP & Async +requests +aiohttp + +# Video Processing +moviepy==1.0.3 +pydub +Pillow<10.0.0 +numpy<2.0.0 + +# AI/ML +faster-whisper + +# Utilities +python-multipart diff --git a/static/app.js b/static/app.js new file mode 100644 index 0000000000000000000000000000000000000000..67405f8ac211e2a28202994dc953c56a30f36d76 --- /dev/null +++ b/static/app.js @@ -0,0 +1,177 @@ +// API base URL +const API_BASE = ''; + +// DOM elements +const videoForm = document.getElementById('videoForm'); +const scenesContainer = document.getElementById('scenesContainer'); +const addSceneBtn = document.getElementById('addScene'); +const statusDiv = document.getElementById('status'); +const videosListDiv = document.getElementById('videosList'); + +let sceneCount = 1; + +// Add new scene +addSceneBtn.addEventListener('click', () => { + sceneCount++; + const sceneDiv = document.createElement('div'); + sceneDiv.className = 'scene'; + sceneDiv.innerHTML = ` +
+ + +
+
+ + +
+ `; + scenesContainer.appendChild(sceneDiv); +}); + +// Submit form +videoForm.addEventListener('submit', async (e) => { + e.preventDefault(); + + // Gather scenes + const scenes = Array.from(document.querySelectorAll('.scene')).map(scene => { + const text = scene.querySelector('.scene-text').value; + const keywords = scene.querySelector('.scene-keywords').value; + return { + text, + searchTerms: keywords.split(',').map(k => k.trim()).filter(k => k) + }; + }); + + // Gather config + const config = { + orientation: document.getElementById('orientation').value, + voice: document.getElementById('voice').value, + music: document.getElementById('music').value || null, + musicVolume: document.getElementById('musicVolume').value, + captionPosition: document.getElementById('captionPosition').value, + paddingBack: 0 + }; + + // Show processing status + showStatus('Creating video... This may take a few minutes.', 'processing'); + + try { + const response = await fetch(`${API_BASE}/api/short-video`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ scenes, config }) + }); + + if (!response.ok) { + throw new Error('Failed to create video'); + } + + const data = await response.json(); + const videoId = data.videoId; + + showStatus(`Video queued! ID: ${videoId}`, 'success'); + + // Start polling for status + pollVideoStatus(videoId); + + // Refresh video list + setTimeout(loadVideos, 1000); + + } catch (error) { + showStatus(`Error: ${error.message}`, 'error'); + } +}); + +// Poll video status +async function pollVideoStatus(videoId) { + const maxAttempts = 120; // 10 minutes + let attempts = 0; + + const interval = setInterval(async () => { + attempts++; + + if (attempts > maxAttempts) { + clearInterval(interval); + showStatus('Video processing timeout', 'error'); + return; + } + + try { + const response = await fetch(`${API_BASE}/api/short-video/${videoId}/status`); + const data = await response.json(); + + if (data.status === 'ready') { + clearInterval(interval); + showStatus('Video ready! Check your videos list below.', 'success'); + loadVideos(); + } else if (data.status === 'failed') { + clearInterval(interval); + showStatus('Video processing failed', 'error'); + loadVideos(); + } + } catch (error) { + console.error('Error polling status:', error); + } + }, 5000); // Poll every 5 seconds +} + +// Show status message +function showStatus(message, type) { + statusDiv.textContent = message; + statusDiv.className = `status ${type}`; + statusDiv.classList.remove('hidden'); +} + +// Load videos list +async function loadVideos() { + try { + const response = await fetch(`${API_BASE}/api/short-videos`); + const data = await response.json(); + + if (data.videos.length === 0) { + videosListDiv.innerHTML = '

No videos yet. Create one above!

'; + return; + } + + videosListDiv.innerHTML = data.videos.map(video => ` +
+

Video ID: ${video.id}

+ ${video.status.toUpperCase()} +
+ ${video.status === 'ready' ? ` + Download + ` : ''} + +
+
+ `).join(''); + + } catch (error) { + console.error('Error loading videos:', error); + videosListDiv.innerHTML = '

Error loading videos

'; + } +} + +// Delete video +async function deleteVideo(videoId) { + if (!confirm('Are you sure you want to delete this video?')) { + return; + } + + try { + await fetch(`${API_BASE}/api/short-video/${videoId}`, { + method: 'DELETE' + }); + loadVideos(); + } catch (error) { + alert('Error deleting video'); + } +} + +// Load videos on page load +loadVideos(); + +// Auto-refresh videos list every 10 seconds +setInterval(loadVideos, 10000); diff --git a/static/fonts/TheBoldFont.ttf b/static/fonts/TheBoldFont.ttf new file mode 100644 index 0000000000000000000000000000000000000000..e790788d68d32db53f06a52f2185284b258db76c Binary files /dev/null and b/static/fonts/TheBoldFont.ttf differ diff --git a/static/index.html b/static/index.html new file mode 100644 index 0000000000000000000000000000000000000000..5cb733ec6adc3f5922df9c31492710138021da9f --- /dev/null +++ b/static/index.html @@ -0,0 +1,568 @@ + + + + + + + NCAkit - Neural Content Automation + + + + + +
+
+

🤖 NCAkit

+

Neural Content Automation Toolkit

+
+ + +
+ + +
+ + +
+
+

🎬 Create Story Reel

+

+ AI generates script → TTS → Character images → Final video +

+ +
+
+ + +
+ +
+ + +
+ + +
+

👤 Character Profile (for consistency)

+ +
+
+ + +
+
+ + +
+
+ +
+
+ + +
+
+ + +
+
+ +
+
+ + +
+
+ + +
+
+ +
+ + +
+ +
+ + +
+
+ +
+ + +
+ + +
+ + +
+
+ + +
+
+

📹 Create Short Video

+

+ Scene-based video with TTS, captions, and Pexels backgrounds +

+ +
+
+
+ + +
+
+ + +
+
+ +
+
+ + +
+
+ + +
+
+ + + + +
+ + +
+
+
+ + + + + \ No newline at end of file diff --git a/static/music/Aurora on the Boulevard - National Sweetheart.mp3 b/static/music/Aurora on the Boulevard - National Sweetheart.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..3dcd816964c5053e7ea92d5b6115f2b5d00427b9 --- /dev/null +++ b/static/music/Aurora on the Boulevard - National Sweetheart.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c3ac883ec261ca6350edcbaf4af803c621446503e0a7b8a194abfce74d646bf +size 1672716 diff --git a/static/music/Baby Animals Playing - Joel Cummins.mp3 b/static/music/Baby Animals Playing - Joel Cummins.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..12db9dde3fd8f3427afe1a1f453841ce44c7bb31 --- /dev/null +++ b/static/music/Baby Animals Playing - Joel Cummins.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57011ecd4b2260d07155ae2bd7c69bb968e72baaa8a03ee0149c83f2f9ac31fa +size 1535417 diff --git a/static/music/Banjo Doops - Joel Cummins.mp3 b/static/music/Banjo Doops - Joel Cummins.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..6b8661a003bf4ea020e57ca86a19e79b1b7492c4 --- /dev/null +++ b/static/music/Banjo Doops - Joel Cummins.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2db27dcc03d6c9e686c66298778a91ab8c83d58abc10a3f41826ee4e2d82eaf4 +size 1204079 diff --git a/static/music/Buckle Up - Jeremy Korpas.mp3 b/static/music/Buckle Up - Jeremy Korpas.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..82dba16fb0254fa507688eba6ef6852e9ed33168 --- /dev/null +++ b/static/music/Buckle Up - Jeremy Korpas.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca08e6d344bb9d491d5e58bada2231831cce85b7f8d28bf8c7284eb8fcb2b0c5 +size 1586826 diff --git a/static/music/Cafecito por la Manana - Cumbia Deli.mp3 b/static/music/Cafecito por la Manana - Cumbia Deli.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..0dd3fa1391b666f5692595d736adeda951da59ab --- /dev/null +++ b/static/music/Cafecito por la Manana - Cumbia Deli.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fced893aef0edb289f3b83734eb297466b4a81bb97c5cb9f037fdfc51e449b1c +size 2388680 diff --git a/static/music/Champion - Telecasted.mp3 b/static/music/Champion - Telecasted.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..e0fcb8308440f6e07777f95bba9a723c70525a1d --- /dev/null +++ b/static/music/Champion - Telecasted.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c930b57cb0e8146b8e1c7ac4941a59c23ce06fb45d028f561450bbd010fb39a +size 1751397 diff --git a/static/music/Crystaline - Quincas Moreira.mp3 b/static/music/Crystaline - Quincas Moreira.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..a8742ee5f2c6d66d754b60f594f238699bd5522a --- /dev/null +++ b/static/music/Crystaline - Quincas Moreira.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55d833de4846cea810032fbcd67d38589fd3842787fbbd6f5fe63d66bc8d4a9a +size 1736977 diff --git a/static/music/Curse of the Witches - Jimena Contreras.mp3 b/static/music/Curse of the Witches - Jimena Contreras.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..20c8f8bbe2f9f409ad16eac9e3f168062a977b04 --- /dev/null +++ b/static/music/Curse of the Witches - Jimena Contreras.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c02103fff292eadc79c81013f53b4be1f5aa2efca405649cd5fc1ed58fe3256 +size 1226336 diff --git a/static/music/Delayed Baggage - Ryan Stasik.mp3 b/static/music/Delayed Baggage - Ryan Stasik.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..14bf9c8775af935c97fdef1791375661f44524d5 --- /dev/null +++ b/static/music/Delayed Baggage - Ryan Stasik.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a95b53a89fd0ace25d880337d7068be44af2b176e4e97d723e7ed50c1c5bbd80 +size 1370218 diff --git a/static/music/Final Soliloquy - Asher Fulero.mp3 b/static/music/Final Soliloquy - Asher Fulero.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..a8f7493fe6534cb4983f49d61388a3871d1d2724 --- /dev/null +++ b/static/music/Final Soliloquy - Asher Fulero.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9c6e7662e8fff552562bac2a1343ab86b6b1dd684a907a9abeae5c17ec15fd5 +size 2204360 diff --git a/static/music/Heartbeat Of The Wind - Asher Fulero.mp3 b/static/music/Heartbeat Of The Wind - Asher Fulero.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..705196df09856392d878ae3f52884d0151c73033 --- /dev/null +++ b/static/music/Heartbeat Of The Wind - Asher Fulero.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd5b46b86ab2991ec06d13b455f1af79867d4d695acd5fec9828b3a59de7dfc5 +size 1540119 diff --git a/static/music/Honey, I Dismembered The Kids - Ezra Lipp.mp3 b/static/music/Honey, I Dismembered The Kids - Ezra Lipp.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..d6d53e1919c568c796628da6c6ce704bf7375449 --- /dev/null +++ b/static/music/Honey, I Dismembered The Kids - Ezra Lipp.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30afe0545017a4f6b4f73981ac77ea9a3f8421bad947e3667ecbabe8ca376e95 +size 1885562 diff --git a/static/music/Hopeful - Nat Keefe.mp3 b/static/music/Hopeful - Nat Keefe.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..adf0f7a89c5bf1538b4161279e4a84d1170f723a --- /dev/null +++ b/static/music/Hopeful - Nat Keefe.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff3b69ef1fde3eb9d909a6fbd2201f0a19eaa475bcd06eb7336ede03486ec903 +size 2166430 diff --git a/static/music/Hopeful Freedom - Asher Fulero.mp3 b/static/music/Hopeful Freedom - Asher Fulero.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..e2aa7e00e1d09490b15cc6fd92f5f92f2fa3c081 --- /dev/null +++ b/static/music/Hopeful Freedom - Asher Fulero.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7fc2f3bc3ea56dc13352440bf0b2bbf714f1f5e1d4a5bc73e713c821ec1fd97 +size 2118156 diff --git a/static/music/Hopeless - Jimena Contreras.mp3 b/static/music/Hopeless - Jimena Contreras.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..e3e8b657ea9f0c687a08debc759866a6210be048 --- /dev/null +++ b/static/music/Hopeless - Jimena Contreras.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26e0c4b8e407fb8e7c22f4ceeeeb4749bad620c5b58e89664d3c139d87d3b7bd +size 3028471 diff --git a/static/music/Jetski - Telecasted.mp3 b/static/music/Jetski - Telecasted.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..235af9056e09fbb38393abd3fbc9678f6e3ff455 --- /dev/null +++ b/static/music/Jetski - Telecasted.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7054bb7371f10601ddbafc02c42a116b3792459f752505408345a8216b95933d +size 1802806 diff --git a/static/music/Like It Loud - Dyalla.mp3 b/static/music/Like It Loud - Dyalla.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..7634daa7ed345600e402e3927ad955cea8c52c8f --- /dev/null +++ b/static/music/Like It Loud - Dyalla.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ca736b697e05c6e9430ef586e670e86828ef916b7a3cccf935a8f8f1a5d1dc4 +size 1988380 diff --git a/static/music/Name The Time And Place - Telecasted.mp3 b/static/music/Name The Time And Place - Telecasted.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..4a87a371a71d717695bbd681d277681fc4a844c7 --- /dev/null +++ b/static/music/Name The Time And Place - Telecasted.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f24a708a6e1c9f692a5ca2dc93afb5c54fa3449411834621d43d18c9ab01cdf0 +size 1791208 diff --git a/static/music/Night Hunt - Jimena Contreras.mp3 b/static/music/Night Hunt - Jimena Contreras.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..a901aba318ce8baa64c1a8e9e1a7545fb598f7df --- /dev/null +++ b/static/music/Night Hunt - Jimena Contreras.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f997bec6195a3059cab18ac9c3ee4306bd9040574c4db375fd04591de3ec59fc +size 1086842 diff --git a/static/music/No.2 Remembering Her - Esther Abrami.mp3 b/static/music/No.2 Remembering Her - Esther Abrami.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..912762fc90b11505e4e5618532758d9ceb3f273b --- /dev/null +++ b/static/music/No.2 Remembering Her - Esther Abrami.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8503df3e61a6290037c7b2e5e36c1cab6fa7e2c74d74b490b3b07a62c86fc09b +size 1727573 diff --git a/static/music/Oh Please - Telecasted.mp3 b/static/music/Oh Please - Telecasted.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..51ca3caaac62fa64ce9b9f44fc53e77a55cc0c26 --- /dev/null +++ b/static/music/Oh Please - Telecasted.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5174ce5016ce824ed1329b5aa28961a42fe1acca47f7008494f53b6f4cc710bc +size 1947002 diff --git a/static/music/On The Hunt - Andrew Langdon.mp3 b/static/music/On The Hunt - Andrew Langdon.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..76ba9e0048d6ac6cd1f32e43e72c7b486c2b202e --- /dev/null +++ b/static/music/On The Hunt - Andrew Langdon.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9686ecbaaf9a7ce6ddaf1aed182ab883d12dd2f2e966225c5f1b7bdff4067c39 +size 1152670 diff --git a/static/music/Organic Guitar House - Dyalla.mp3 b/static/music/Organic Guitar House - Dyalla.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..7d9b04f51fdf0605a4471e3f6455b0c444820256 --- /dev/null +++ b/static/music/Organic Guitar House - Dyalla.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5855965526e3ce3685dffff638404aed9a05ed7d1db286a5a000ebf71fc9cdc +size 1988380 diff --git a/static/music/Phantom - Density & Time.mp3 b/static/music/Phantom - Density & Time.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..d5d5f12f21c3fbd1f9aa178d0517b898faeba197 --- /dev/null +++ b/static/music/Phantom - Density & Time.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b29026275c5707f9eeda72b63eee835ddc47291fe15602530f2012c2ea2537e +size 2167684 diff --git a/static/music/Restless Heart - Jimena Contreras.mp3 b/static/music/Restless Heart - Jimena Contreras.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..a7ee470933416683155c53911b3256b1f5c145bb --- /dev/null +++ b/static/music/Restless Heart - Jimena Contreras.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14f2a509b09b7a15e316b3b094f69da264ec84ec0a65adacece1e3c5a1a95a27 +size 1152670 diff --git a/static/music/Seagull - Telecasted.mp3 b/static/music/Seagull - Telecasted.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..51d9a2d097b0b1f97561e668640b1fd6971cd1a6 --- /dev/null +++ b/static/music/Seagull - Telecasted.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb61692738dce3171784fbcf512a7de60b0aed34ed735d42e562c7be59b66669 +size 1516922 diff --git a/static/music/Sinister - Anno Domini Beats.mp3 b/static/music/Sinister - Anno Domini Beats.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..7a6332606fbd9736e133d9f9844d782075026ae5 --- /dev/null +++ b/static/music/Sinister - Anno Domini Beats.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c0c7b79de29c1c8bad70bb539f2869732a88a80c5caf8d0b06b4cc8b500863d +size 2634754 diff --git a/static/music/Sly Sky - Telecasted.mp3 b/static/music/Sly Sky - Telecasted.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..9f2e4c020b0b4931be4af0f78bad4d0746f4076d --- /dev/null +++ b/static/music/Sly Sky - Telecasted.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e408c01a422abc234e407295740347d7a831cfc114995272309ae578294b9e8 +size 1915968 diff --git a/static/music/Touch - Anno Domini Beats.mp3 b/static/music/Touch - Anno Domini Beats.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..848d5fb949db3a6d96fead3175d7777f49ca2113 --- /dev/null +++ b/static/music/Touch - Anno Domini Beats.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f5dae3ddcec93488e24eacc135b58172ef0ea95d26b699b747b7f031656cd54 +size 2042924 diff --git a/static/music/Traversing - Godmode.mp3 b/static/music/Traversing - Godmode.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..f8573c9293749feb88cc0a28cd7bbb2a05b1965f --- /dev/null +++ b/static/music/Traversing - Godmode.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7eecdec09b692ed6ee80064062c7ba5a86c2ed814fa27cf8e60936d33620f6b0 +size 1177748 diff --git a/static/music/Twin Engines - Jeremy Korpas.mp3 b/static/music/Twin Engines - Jeremy Korpas.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..79f7c67901d45ef9f2cbe8c8ce047628f595560a --- /dev/null +++ b/static/music/Twin Engines - Jeremy Korpas.mp3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fa0a1bb512f1f8f64557c1b60a73d774ee6f8376f3ab5ee4df85b39ec95ed71 +size 1505637 diff --git a/static/style.css b/static/style.css new file mode 100644 index 0000000000000000000000000000000000000000..537a2c2e7b373caa19c005f877d9c6245df3a949 --- /dev/null +++ b/static/style.css @@ -0,0 +1,230 @@ +* { + margin: 0; + padding: 0; + box-sizing: border-box; +} + +body { + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif; + background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); + min-height: 100vh; + padding: 20px; +} + +.container { + max-width: 1200px; + margin: 0 auto; +} + +header { + background: white; + padding: 30px; + border-radius: 12px; + margin-bottom: 30px; + box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); + text-align: center; +} + +header h1 { + color: #333; + margin-bottom: 10px; +} + +header p { + color: #666; + font-size: 16px; +} + +main { + display: grid; + grid-template-columns: 1fr 1fr; + gap: 30px; +} + +@media (max-width: 968px) { + main { + grid-template-columns: 1fr; + } +} + +section { + background: white; + padding: 30px; + border-radius: 12px; + box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); +} + +h2 { + color: #333; + margin-bottom: 20px; + font-size: 24px; +} + +h3 { + color: #555; + margin: 20px 0 15px; + font-size: 18px; +} + +.form-group { + margin-bottom: 20px; +} + +label { + display: block; + margin-bottom: 8px; + color: #555; + font-weight: 500; +} + +.input { + width: 100%; + padding: 12px; + border: 2px solid #e0e0e0; + border-radius: 8px; + font-size: 14px; + transition: border-color 0.3s; +} + +.input:focus { + outline: none; + border-color: #667eea; +} + +textarea.input { + resize: vertical; + font-family: inherit; +} + +.scene { + background: #f8f9fa; + padding: 20px; + border-radius: 8px; + margin-bottom: 15px; + border: 2px solid #e0e0e0; +} + +.btn { + padding: 12px 24px; + border: none; + border-radius: 8px; + font-size: 16px; + font-weight: 600; + cursor: pointer; + transition: all 0.3s; +} + +.btn-primary { + background: #667eea; + color: white; + width: 100%; +} + +.btn-primary:hover { + background: #5568d3; + transform: translateY(-2px); + box-shadow: 0 4px 12px rgba(102, 126, 234, 0.4); +} + +.btn-secondary { + background: #e0e0e0; + color: #333; + margin-bottom: 20px; +} + +.btn-secondary:hover { + background: #d0d0d0; +} + +.form-actions { + margin-top: 30px; +} + +.status { + margin-top: 20px; + padding: 15px; + border-radius: 8px; + text-align: center; + font-weight: 500; +} + +.status.success { + background: #d4edda; + color: #155724; + border: 1px solid #c3e6cb; +} + +.status.error { + background: #f8d7da; + color: #721c24; + border: 1px solid #f5c6cb; +} + +.status.processing { + background: #fff3cd; + color: #856404; + border: 1px solid #ffeaa7; +} + +.hidden { + display: none; +} + +.videos-grid { + display: grid; + gap: 20px; +} + +.video-card { + background: #f8f9fa; + padding: 20px; + border-radius: 8px; + border: 2px solid #e0e0e0; +} + +.video-card h3 { + margin: 0 0 10px 0; + font-size: 16px; + color: #333; +} + +.video-status { + display: inline-block; + padding: 4px 12px; + border-radius: 20px; + font-size: 12px; + font-weight: 600; + margin-bottom: 15px; +} + +.video-status.processing { + background: #fff3cd; + color: #856404; +} + +.video-status.ready { + background: #d4edda; + color: #155724; +} + +.video-status.failed { + background: #f8d7da; + color: #721c24; +} + +.video-actions { + display: flex; + gap: 10px; +} + +.video-actions .btn { + padding: 8px 16px; + font-size: 14px; + flex: 1; +} + +.loading { + text-align: center; + color: #666; + padding: 40px; +}