""" ZeroGPU-compatible entrypoint using gradio.Server. Server extends FastAPI, so all your existing API routes work unchanged. """ from __future__ import annotations import os # 1. Lightweight imports only at top level import spaces import gradio as gr from gradio import Server from gradio.data_classes import FileData from fastapi import Request from slowapi.errors import RateLimitExceeded from slowapi import _rate_limit_exceeded_handler TTS_ENGINE = os.getenv("TTS_ENGINE", "chatterbox").lower() # 2. Create Server instead of FastAPI # Name it 'demo' so HF Space picks it up automatically demo = Server() # ----------------------------------------------------- # INTEGRATE SERVER.PY ROUTES # ----------------------------------------------------- from server import router, limiter, enforce_content_length_limit from tools_api import router as tools_router demo.include_router(router) demo.include_router(tools_router) demo.state.limiter = limiter demo.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler) # Apply content length middleware to the main app @demo.middleware("http") async def content_length_middleware(request: Request, call_next): return await enforce_content_length_limit(request, call_next) @demo.get("/api/health") def health(): return {"status": "ok", "tts": TTS_ENGINE} # ----------------------------------------------------- # ZERO GPU FUNCTION — lazy-loads torch/CUDA # ----------------------------------------------------- @spaces.GPU(duration=60) def run_pipeline(job_id: str): from pipeline import process_job return process_job(job_id) # ----------------------------------------------------- # GRADIO API INTEGRATION (this is what ZeroGPU detects) # ----------------------------------------------------- @demo.api(name="run_pipeline") def api_run_pipeline(job_id: str): """ Exposed through Gradio's API engine. ZeroGPU will allocate a GPU when this endpoint is called. """ return run_pipeline(job_id) # ----------------------------------------------------- # OPTIONAL: Gradio UI (if you still want a basic UI) # ----------------------------------------------------- with gr.Blocks(title="VideoVoice API") as ui: gr.Markdown(f"# VideoVoice API ({TTS_ENGINE.upper()})") job_id_box = gr.Textbox(label="Job ID") output_box = gr.Textbox(label="Result") btn = gr.Button("Run Pipeline") btn.click(fn=run_pipeline, inputs=job_id_box, outputs=output_box) # Mount the UI onto the Server instance gr.mount_gradio_app(demo, ui, path="/ui") # ----------------------------------------------------- # ENTRYPOINT # ----------------------------------------------------- if __name__ == "__main__": demo.launch(show_error=True)