Spaces:
Running on Zero
Running on Zero
| """ | |
| ZeroGPU-compatible entrypoint using gradio.Server. | |
| Server extends FastAPI, so all your existing API routes work unchanged. | |
| """ | |
| from __future__ import annotations | |
| import os | |
| # 1. Lightweight imports only at top level | |
| import spaces | |
| import gradio as gr | |
| from gradio import Server | |
| from gradio.data_classes import FileData | |
| from fastapi import Request | |
| from slowapi.errors import RateLimitExceeded | |
| from slowapi import _rate_limit_exceeded_handler | |
| TTS_ENGINE = os.getenv("TTS_ENGINE", "chatterbox").lower() | |
| # 2. Create Server instead of FastAPI | |
| # Name it 'demo' so HF Space picks it up automatically | |
| demo = Server() | |
| # ----------------------------------------------------- | |
| # INTEGRATE SERVER.PY ROUTES | |
| # ----------------------------------------------------- | |
| from server import router, limiter, enforce_content_length_limit | |
| from tools_api import router as tools_router | |
| demo.include_router(router) | |
| demo.include_router(tools_router) | |
| demo.state.limiter = limiter | |
| demo.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler) | |
| # Apply content length middleware to the main app | |
| async def content_length_middleware(request: Request, call_next): | |
| return await enforce_content_length_limit(request, call_next) | |
| def health(): | |
| return {"status": "ok", "tts": TTS_ENGINE} | |
| # ----------------------------------------------------- | |
| # ZERO GPU FUNCTION — lazy-loads torch/CUDA | |
| # ----------------------------------------------------- | |
| def run_pipeline(job_id: str): | |
| from pipeline import process_job | |
| return process_job(job_id) | |
| # ----------------------------------------------------- | |
| # GRADIO API INTEGRATION (this is what ZeroGPU detects) | |
| # ----------------------------------------------------- | |
| def api_run_pipeline(job_id: str): | |
| """ | |
| Exposed through Gradio's API engine. | |
| ZeroGPU will allocate a GPU when this endpoint is called. | |
| """ | |
| return run_pipeline(job_id) | |
| # ----------------------------------------------------- | |
| # OPTIONAL: Gradio UI (if you still want a basic UI) | |
| # ----------------------------------------------------- | |
| with gr.Blocks(title="VideoVoice API") as ui: | |
| gr.Markdown(f"# VideoVoice API ({TTS_ENGINE.upper()})") | |
| job_id_box = gr.Textbox(label="Job ID") | |
| output_box = gr.Textbox(label="Result") | |
| btn = gr.Button("Run Pipeline") | |
| btn.click(fn=run_pipeline, inputs=job_id_box, outputs=output_box) | |
| # Mount the UI onto the Server instance | |
| gr.mount_gradio_app(demo, ui, path="/ui") | |
| # ----------------------------------------------------- | |
| # ENTRYPOINT | |
| # ----------------------------------------------------- | |
| if __name__ == "__main__": | |
| demo.launch(show_error=True) |