hazardtln commited on
Commit
863559c
·
1 Parent(s): 72c5e69

Optimized server for concurrency and added full engine warmup

Browse files
Files changed (2) hide show
  1. .gitignore +2 -0
  2. App/backend/main.py +29 -6
.gitignore CHANGED
@@ -19,3 +19,5 @@ App/backend/*.wav
19
  .vscode
20
  .idea
21
  .DS_Store
 
 
 
19
  .vscode
20
  .idea
21
  .DS_Store
22
+ # Test outputs
23
+ test_outputs/
App/backend/main.py CHANGED
@@ -23,15 +23,25 @@ log = logging.getLogger("uvicorn.error")
23
  BACKEND_DIR = os.path.dirname(os.path.abspath(__file__))
24
  MODELS_DIR = os.path.join(BACKEND_DIR, "models")
25
 
26
- # Monkey-patch ONNX Runtime for better provider selection
27
  _original_inf_session = onnxruntime.InferenceSession
28
  def _patched_inf_session(path_or_bytes, sess_options=None, providers=None, *args, **kwargs):
 
 
 
 
 
 
 
 
29
  if providers is None or providers == ['CPUExecutionProvider']:
30
  available = onnxruntime.get_available_providers()
 
31
  preferred = [p for p in ['CUDAExecutionProvider', 'DmlExecutionProvider', 'CoreMLExecutionProvider'] if p in available]
32
  preferred.append('CPUExecutionProvider')
33
  providers = preferred
34
- return _original_inf_session(path_or_bytes, sess_options, providers, *args, **kwargs)
 
35
  onnxruntime.InferenceSession = _patched_inf_session
36
 
37
  # Load engines lazily
@@ -101,11 +111,24 @@ def synthesize_supertonic(text: str, voice_id: str, speed: float = 1.0, quality:
101
  async def lifespan(app: FastAPI):
102
  log.info("Warming up TTS engines...")
103
  try:
104
- # Warmup default piper
105
- synthesize_piper("Ready.", "en_US-lessac-low", None)
106
- log.info("Piper engine ready.")
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  except Exception as e:
108
- log.warning(f"Piper warmup failed: {e}")
109
  yield
110
 
111
  app = FastAPI(lifespan=lifespan)
 
23
  BACKEND_DIR = os.path.dirname(os.path.abspath(__file__))
24
  MODELS_DIR = os.path.join(BACKEND_DIR, "models")
25
 
26
+ # Monkey-patch ONNX Runtime for better provider selection and thread management
27
  _original_inf_session = onnxruntime.InferenceSession
28
  def _patched_inf_session(path_or_bytes, sess_options=None, providers=None, *args, **kwargs):
29
+ if sess_options is None:
30
+ sess_options = onnxruntime.SessionOptions()
31
+
32
+ # Crucial for shared server environments: Limit threads per request to allow better
33
+ # concurrency without CPU lockup (especially on Hugging Face free spaces)
34
+ sess_options.intra_op_num_threads = 1
35
+ sess_options.inter_op_num_threads = 1
36
+
37
  if providers is None or providers == ['CPUExecutionProvider']:
38
  available = onnxruntime.get_available_providers()
39
+ # Prefer GPU if available
40
  preferred = [p for p in ['CUDAExecutionProvider', 'DmlExecutionProvider', 'CoreMLExecutionProvider'] if p in available]
41
  preferred.append('CPUExecutionProvider')
42
  providers = preferred
43
+
44
+ return _original_inf_session(path_or_bytes, sess_options=sess_options, providers=providers, *args, **kwargs)
45
  onnxruntime.InferenceSession = _patched_inf_session
46
 
47
  # Load engines lazily
 
111
  async def lifespan(app: FastAPI):
112
  log.info("Warming up TTS engines...")
113
  try:
114
+ # 1. Warm up Supertonic (Premium)
115
+ get_supertonic()
116
+ log.info("Supertonic engine ready.")
117
+
118
+ # 2. Warm up Standard Piper (US Female)
119
+ get_piper_voice("en_US-lessac-low")
120
+ log.info("Piper (standard) ready.")
121
+
122
+ # 3. Warm up Multi-speaker Piper (LibriTTS) - This is the default in UI
123
+ get_piper_voice("en_US-libritts_r-medium")
124
+ log.info("Piper (multi-speaker) ready.")
125
+
126
+ # 4. Perform a tiny synthesis to ensure inference graph is built
127
+ synthesize_piper("READY", "en_US-lessac-low", None)
128
+ log.info("All TTS engines fully primed.")
129
+
130
  except Exception as e:
131
+ log.warning(f"Engine warmup incomplete: {e}")
132
  yield
133
 
134
  app = FastAPI(lifespan=lifespan)