File size: 2,732 Bytes
02ad302
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5b7cd5f
02ad302
 
5b7cd5f
02ad302
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
"""
ZeroGPU-compatible entrypoint using gradio.Server.
Server extends FastAPI, so all your existing API routes work unchanged.
"""
from __future__ import annotations

import os

# 1. Lightweight imports only at top level
import spaces
import gradio as gr
from gradio import Server
from gradio.data_classes import FileData
from fastapi import Request
from slowapi.errors import RateLimitExceeded
from slowapi import _rate_limit_exceeded_handler

TTS_ENGINE = os.getenv("TTS_ENGINE", "chatterbox").lower()

# 2. Create Server instead of FastAPI
# Name it 'demo' so HF Space picks it up automatically
demo = Server()

# -----------------------------------------------------
# INTEGRATE SERVER.PY ROUTES
# -----------------------------------------------------
from server import router, limiter, enforce_content_length_limit
from tools_api import router as tools_router

demo.include_router(router)
demo.include_router(tools_router)
demo.state.limiter = limiter
demo.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)

# Apply content length middleware to the main app
@demo.middleware("http")
async def content_length_middleware(request: Request, call_next):
    return await enforce_content_length_limit(request, call_next)

@demo.get("/api/health")
def health():
    return {"status": "ok", "tts": TTS_ENGINE}

# -----------------------------------------------------
# ZERO GPU FUNCTION — lazy-loads torch/CUDA
# -----------------------------------------------------
@spaces.GPU(duration=60)
def run_pipeline(job_id: str):
    from pipeline import process_job
    return process_job(job_id)

# -----------------------------------------------------
# GRADIO API INTEGRATION (this is what ZeroGPU detects)
# -----------------------------------------------------
@demo.api(name="run_pipeline")
def api_run_pipeline(job_id: str):
    """
    Exposed through Gradio's API engine.
    ZeroGPU will allocate a GPU when this endpoint is called.
    """
    return run_pipeline(job_id)

# -----------------------------------------------------
# OPTIONAL: Gradio UI (if you still want a basic UI)
# -----------------------------------------------------
with gr.Blocks(title="VideoVoice API") as ui:
    gr.Markdown(f"# VideoVoice API ({TTS_ENGINE.upper()})")
    job_id_box = gr.Textbox(label="Job ID")
    output_box = gr.Textbox(label="Result")
    btn = gr.Button("Run Pipeline")
    btn.click(fn=run_pipeline, inputs=job_id_box, outputs=output_box)

# Mount the UI onto the Server instance
gr.mount_gradio_app(demo, ui, path="/ui")

# -----------------------------------------------------
# ENTRYPOINT
# -----------------------------------------------------
if __name__ == "__main__":
    demo.launch(show_error=True)