IAMCB commited on
Commit
74ba223
·
1 Parent(s): 6890329

update the ui and api

Browse files
app.py CHANGED
@@ -1,116 +1,221 @@
1
  """
2
  Kokoro-TTS Local Generator
3
  -------------------------
4
- A Gradio interface for the Kokoro-TTS-Local text-to-speech system.
5
- Supports multiple voices and audio formats, with cross-platform compatibility.
6
 
7
  Key Features:
8
  - Multiple voice models support (26+ voices)
9
- - Real-time generation with progress logging
10
  - WAV, MP3, and AAC output formats
 
11
  - Network sharing capabilities
12
  - Cross-platform compatibility (Windows, macOS, Linux)
13
-
14
- Dependencies:
15
- - kokoro: Official Kokoro TTS library
16
- - gradio: Web interface framework
17
- - soundfile: Audio file handling
18
- - pydub: Audio format conversion
19
  """
20
 
21
  import gradio as gr
22
- import os
23
- import sys
24
  import platform
25
- from datetime import datetime
26
  import shutil
27
  from pathlib import Path
28
  import soundfile as sf
29
  from pydub import AudioSegment
30
  import torch
31
  import numpy as np
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  from models import (
33
  list_available_voices, build_model,
34
  generate_speech
35
  )
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  # Global configuration
38
- CONFIG_FILE = "tts_config.json" # Stores user preferences and paths
39
- DEFAULT_OUTPUT_DIR = "outputs" # Directory for generated audio files
40
- SAMPLE_RATE = 24000 # Updated from 22050 to match new model
41
 
42
- # Initialize model globally
43
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
 
44
  model = None
 
 
 
 
 
 
 
 
 
45
 
46
- def get_available_voices():
47
- """Get list of available voice models."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  try:
49
- # Initialize model to trigger voice downloads
50
- global model
51
  if model is None:
52
- print("Initializing model and downloading voices...")
53
  model = build_model(None, device)
 
 
 
 
 
 
 
 
 
 
54
 
55
  voices = list_available_voices()
56
  if not voices:
57
- print("No voices found after initialization. Attempting to download...")
58
- download_voice_files() # Try downloading again
59
- voices = list_available_voices()
60
 
61
- print("Available voices:", voices)
62
  return voices
63
  except Exception as e:
64
- print(f"Error getting voices: {e}")
65
  return []
66
 
67
- def convert_audio(input_path: str, output_path: str, format: str):
68
  """Convert audio to specified format."""
69
  try:
70
- if format == "wav":
71
  return input_path
 
 
72
  audio = AudioSegment.from_wav(input_path)
73
- if format == "mp3":
 
74
  audio.export(output_path, format="mp3", bitrate="192k")
75
- elif format == "aac":
76
  audio.export(output_path, format="aac", bitrate="192k")
 
 
 
 
 
77
  return output_path
78
  except Exception as e:
79
- print(f"Error converting audio: {e}")
80
  return input_path
81
 
82
- def generate_tts_with_logs(voice_name, text, format):
83
- """Generate TTS audio with progress logging."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  global model
85
 
86
  try:
87
  # Initialize model if needed
88
- if model is None:
89
- print("Initializing model...")
90
- model = build_model(None, device)
91
 
92
  # Create output directory
93
- os.makedirs(DEFAULT_OUTPUT_DIR, exist_ok=True)
94
 
95
  # Generate base filename from text
96
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
97
- base_name = f"tts_{timestamp}"
98
- wav_path = os.path.join(DEFAULT_OUTPUT_DIR, f"{base_name}.wav")
 
 
 
 
99
 
100
  # Generate speech
101
- print(f"\nGenerating speech for: '{text}'")
102
- print(f"Using voice: {voice_name}")
103
 
104
- generator = model(text, voice=f"voices/{voice_name}.pt", speed=1.0, split_pattern=r'\n+')
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
  all_audio = []
107
- for gs, ps, audio in generator:
108
  if audio is not None:
109
  if isinstance(audio, np.ndarray):
110
  audio = torch.from_numpy(audio).float()
111
  all_audio.append(audio)
112
- print(f"Generated segment: {gs}")
113
- print(f"Phonemes: {ps}")
114
 
115
  if not all_audio:
116
  raise Exception("No audio generated")
@@ -118,67 +223,341 @@ def generate_tts_with_logs(voice_name, text, format):
118
  # Combine audio segments and save
119
  final_audio = torch.cat(all_audio, dim=0)
120
  sf.write(wav_path, final_audio.numpy(), SAMPLE_RATE)
 
121
 
122
  # Convert to requested format if needed
123
- if format != "wav":
124
- output_path = os.path.join(DEFAULT_OUTPUT_DIR, f"{base_name}.{format}")
125
- return convert_audio(wav_path, output_path, format)
126
 
127
  return wav_path
128
 
129
  except Exception as e:
130
- print(f"Error generating speech: {e}")
131
  import traceback
132
- traceback.print_exc()
133
  return None
134
 
135
- def create_interface(server_name="0.0.0.0", server_port=7860):
136
- """Create and launch the Gradio interface."""
 
137
 
138
  # Get available voices
139
  voices = get_available_voices()
140
  if not voices:
141
- print("No voices found! Please check the voices directory.")
142
- return
143
-
 
 
 
 
 
 
 
 
 
144
  # Create interface
145
- with gr.Blocks(title="Kokoro TTS Generator") as interface:
146
- gr.Markdown("# Kokoro TTS Generator")
 
147
 
148
  with gr.Row():
149
- with gr.Column():
 
150
  voice = gr.Dropdown(
151
  choices=voices,
152
- value=voices[0] if voices else None,
153
  label="Voice"
154
  )
 
155
  text = gr.Textbox(
156
- lines=3,
157
  placeholder="Enter text to convert to speech...",
158
- label="Text"
159
  )
160
- format = gr.Radio(
 
161
  choices=["wav", "mp3", "aac"],
162
- value="wav",
163
  label="Output Format"
164
  )
165
- generate = gr.Button("Generate Speech")
 
 
 
 
 
 
 
 
 
166
 
167
- with gr.Column():
168
  output = gr.Audio(label="Generated Audio")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
 
170
- generate.click(
171
- fn=generate_tts_with_logs,
172
- inputs=[voice, text, format],
173
- outputs=output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
 
176
- # Launch interface
177
- interface.launch(
178
- server_name=server_name,
179
- server_port=server_port,
180
- share=True
181
- )
 
182
 
183
  if __name__ == "__main__":
184
- create_interface()
 
1
  """
2
  Kokoro-TTS Local Generator
3
  -------------------------
4
+ A high-performance text-to-speech system with both Gradio UI and REST API support.
5
+ Provides multiple voice models, audio formats, and cross-platform compatibility.
6
 
7
  Key Features:
8
  - Multiple voice models support (26+ voices)
9
+ - Real-time generation with progress tracking
10
  - WAV, MP3, and AAC output formats
11
+ - REST API for programmatic access
12
  - Network sharing capabilities
13
  - Cross-platform compatibility (Windows, macOS, Linux)
14
+ - Configurable caching and model management
 
 
 
 
 
15
  """
16
 
17
  import gradio as gr
18
+ import json
 
19
  import platform
20
+
21
  import shutil
22
  from pathlib import Path
23
  import soundfile as sf
24
  from pydub import AudioSegment
25
  import torch
26
  import numpy as np
27
+ import time
28
+ import uuid
29
+ from typing import Dict, List, Optional, Union, Tuple, Generator
30
+ import threading
31
+ import os
32
+ import sys
33
+ import time
34
+ import socket
35
+ import threading
36
+ import logging
37
+ from datetime import datetime
38
+ from werkzeug.middleware.dispatcher import DispatcherMiddleware
39
+ from werkzeug.serving import run_simple
40
+ # Import Kokoro models
41
  from models import (
42
  list_available_voices, build_model,
43
  generate_speech
44
  )
45
 
46
+ # Flask for API
47
+ from flask import Flask, request, jsonify, send_file
48
+
49
+ # Configure logging
50
+ logging.basicConfig(
51
+ level=logging.INFO,
52
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
53
+ handlers=[
54
+ logging.StreamHandler(),
55
+ logging.FileHandler("kokoro_tts.log")
56
+ ]
57
+ )
58
+ logger = logging.getLogger("kokoro_tts")
59
+
60
  # Global configuration
61
+ CONFIG_FILE = "tts_config.json"
62
+ DEFAULT_OUTPUT_DIR = "outputs"
63
+ SAMPLE_RATE = 24000
64
 
65
+ # Model and configuration
66
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
67
+ logger.info(f"Using device: {device}")
68
  model = None
69
+ config = {
70
+ "output_dir": DEFAULT_OUTPUT_DIR,
71
+ "default_voice": None,
72
+ "default_format": "wav",
73
+ "api_enabled": True,
74
+ "api_port": 5000,
75
+ "ui_port": 7860,
76
+ "share_ui": True
77
+ }
78
 
79
+ def load_config() -> Dict:
80
+ """Load configuration from file or create default."""
81
+ try:
82
+ if os.path.exists(CONFIG_FILE):
83
+ with open(CONFIG_FILE, 'r') as f:
84
+ loaded_config = json.load(f)
85
+ # Update with any new config options
86
+ for k, v in config.items():
87
+ if k not in loaded_config:
88
+ loaded_config[k] = v
89
+ return loaded_config
90
+ else:
91
+ save_config(config)
92
+ return config
93
+ except Exception as e:
94
+ logger.error(f"Error loading config: {e}")
95
+ return config
96
+
97
+ def save_config(config_data: Dict) -> None:
98
+ """Save configuration to file."""
99
+ try:
100
+ with open(CONFIG_FILE, 'w') as f:
101
+ json.dump(config_data, f, indent=4)
102
+ except Exception as e:
103
+ logger.error(f"Error saving config: {e}")
104
+
105
+ def initialize_model() -> None:
106
+ """Initialize the TTS model."""
107
+ global model
108
  try:
 
 
109
  if model is None:
110
+ logger.info("Initializing Kokoro TTS model...")
111
  model = build_model(None, device)
112
+ logger.info("Model initialization complete")
113
+ except Exception as e:
114
+ logger.error(f"Error initializing model: {e}")
115
+ raise
116
+
117
+ def get_available_voices() -> List[str]:
118
+ """Get list of available voice models."""
119
+ try:
120
+ # Initialize model to trigger voice downloads
121
+ initialize_model()
122
 
123
  voices = list_available_voices()
124
  if not voices:
125
+ logger.warning("No voices found after initialization.")
 
 
126
 
127
+ logger.info(f"Available voices: {voices}")
128
  return voices
129
  except Exception as e:
130
+ logger.error(f"Error getting voices: {e}")
131
  return []
132
 
133
+ def convert_audio(input_path: str, output_format: str) -> str:
134
  """Convert audio to specified format."""
135
  try:
136
+ if output_format == "wav":
137
  return input_path
138
+
139
+ output_path = os.path.splitext(input_path)[0] + f".{output_format}"
140
  audio = AudioSegment.from_wav(input_path)
141
+
142
+ if output_format == "mp3":
143
  audio.export(output_path, format="mp3", bitrate="192k")
144
+ elif output_format == "aac":
145
  audio.export(output_path, format="aac", bitrate="192k")
146
+ else:
147
+ logger.warning(f"Unsupported format: {output_format}, defaulting to wav")
148
+ return input_path
149
+
150
+ logger.info(f"Converted audio to {output_format}: {output_path}")
151
  return output_path
152
  except Exception as e:
153
+ logger.error(f"Error converting audio: {e}")
154
  return input_path
155
 
156
+ def generate_tts(
157
+ text: str,
158
+ voice_name: str,
159
+ output_format: str = "wav",
160
+ output_path: Optional[str] = None,
161
+ speed: float = 1.0
162
+ ) -> Optional[str]:
163
+ """
164
+ Generate TTS audio and return the path to the generated file.
165
+
166
+ Args:
167
+ text: Text to convert to speech
168
+ voice_name: Name of the voice to use
169
+ output_format: Output audio format (wav, mp3, aac)
170
+ output_path: Optional custom output path
171
+ speed: Speech speed multiplier
172
+
173
+ Returns:
174
+ Path to the generated audio file, or None if generation failed
175
+ """
176
  global model
177
 
178
  try:
179
  # Initialize model if needed
180
+ initialize_model()
 
 
181
 
182
  # Create output directory
183
+ os.makedirs(config["output_dir"], exist_ok=True)
184
 
185
  # Generate base filename from text
186
+ if output_path:
187
+ base_path = output_path
188
+ wav_path = os.path.splitext(base_path)[0] + ".wav"
189
+ else:
190
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
191
+ base_name = f"tts_{timestamp}_{str(uuid.uuid4())[:8]}"
192
+ wav_path = os.path.join(config["output_dir"], f"{base_name}.wav")
193
 
194
  # Generate speech
195
+ logger.info(f"Generating speech for text: '{text[:50]}...' using voice: {voice_name}")
 
196
 
197
+ # Prepare voice path
198
+ voice_path = f"voices/{voice_name}.pt"
199
+ if not os.path.exists(voice_path):
200
+ logger.warning(f"Voice file not found: {voice_path}")
201
+ voices = get_available_voices()
202
+ if not voices:
203
+ raise Exception("No voices available")
204
+ if voice_name not in voices:
205
+ logger.warning(f"Using default voice instead of {voice_name}")
206
+ voice_name = voices[0]
207
+ voice_path = f"voices/{voice_name}.pt"
208
+
209
+ # Generate speech
210
+ generator = model(text, voice=voice_path, speed=speed, split_pattern=r'\n+')
211
 
212
  all_audio = []
213
+ for i, (gs, ps, audio) in enumerate(generator):
214
  if audio is not None:
215
  if isinstance(audio, np.ndarray):
216
  audio = torch.from_numpy(audio).float()
217
  all_audio.append(audio)
218
+ logger.debug(f"Generated segment {i+1}: {gs[:30]}...")
 
219
 
220
  if not all_audio:
221
  raise Exception("No audio generated")
 
223
  # Combine audio segments and save
224
  final_audio = torch.cat(all_audio, dim=0)
225
  sf.write(wav_path, final_audio.numpy(), SAMPLE_RATE)
226
+ logger.info(f"Saved WAV file to {wav_path}")
227
 
228
  # Convert to requested format if needed
229
+ if output_format != "wav":
230
+ output_file = convert_audio(wav_path, output_format)
231
+ return output_file
232
 
233
  return wav_path
234
 
235
  except Exception as e:
236
+ logger.error(f"Error generating speech: {e}")
237
  import traceback
238
+ logger.error(traceback.format_exc())
239
  return None
240
 
241
+ # UI INTERFACE
242
+ def create_ui_interface():
243
+ """Create and return the Gradio interface."""
244
 
245
  # Get available voices
246
  voices = get_available_voices()
247
  if not voices:
248
+ logger.error("No voices found! Please check the voices directory.")
249
+ # Don't return None, continue with empty list to allow UI to still load
250
+ voices = []
251
+
252
+ # Set default voice
253
+ default_voice = config.get("default_voice")
254
+ if not default_voice or default_voice not in voices:
255
+ default_voice = voices[0] if voices else None
256
+ if default_voice:
257
+ config["default_voice"] = default_voice
258
+ save_config(config)
259
+
260
  # Create interface
261
+ with gr.Blocks(title="CB's TTS Generator") as interface:
262
+ gr.Markdown("# **Welcome to CB's TTS Generator**")
263
+ gr.Markdown("There are multiple voices available for you to choose. This TTS is powered by Kokoro.")
264
 
265
  with gr.Row():
266
+ with gr.Column(scale=1):
267
+ # Group voice selection and text input without using Box
268
  voice = gr.Dropdown(
269
  choices=voices,
270
+ value=default_voice,
271
  label="Voice"
272
  )
273
+
274
  text = gr.Textbox(
275
+ lines=8,
276
  placeholder="Enter text to convert to speech...",
277
+ label="Text Input"
278
  )
279
+
280
+ format_choice = gr.Radio(
281
  choices=["wav", "mp3", "aac"],
282
+ value=config.get("default_format", "wav"),
283
  label="Output Format"
284
  )
285
+
286
+ speed = gr.Slider(
287
+ minimum=0.5,
288
+ maximum=2.0,
289
+ value=1.0,
290
+ step=0.1,
291
+ label="Speech Speed"
292
+ )
293
+
294
+ generate_btn = gr.Button("Generate Speech", variant="primary")
295
 
296
+ with gr.Column(scale=1):
297
  output = gr.Audio(label="Generated Audio")
298
+
299
+ status = gr.Textbox(label="Status", interactive=False)
300
+
301
+ # Function to update status and generate speech
302
+ def generate_wrapper(voice_name, text_input, format_choice, speed_value):
303
+ if not text_input.strip():
304
+ return None, "Error: Please enter some text to convert."
305
+
306
+ try:
307
+ output_path = generate_tts(
308
+ text=text_input,
309
+ voice_name=voice_name,
310
+ output_format=format_choice,
311
+ speed=speed_value
312
+ )
313
+
314
+ if output_path:
315
+ return output_path, f"Success! Generated audio with voice: {voice_name}"
316
+ else:
317
+ return None, "Error: Failed to generate audio. Check logs for details."
318
+ except Exception as e:
319
+ logger.error(f"UI generation error: {e}")
320
+ return None, f"Error: {str(e)}"
321
+
322
+ generate_btn.click(
323
+ fn=generate_wrapper,
324
+ inputs=[voice, text, format_choice, speed],
325
+ outputs=[output, status]
326
+ )
327
+
328
+ # Add movie quote examples if we have voices
329
+ if voices:
330
+ gr.Examples(
331
+ [
332
+ ["May the Force be with you.", default_voice, "wav", 1.0],
333
+ ["Here's looking at you, kid.", default_voice, "mp3", 1.0],
334
+ ["I'll be back.", default_voice, "wav", 1.0],
335
+ ["Houston, we have a problem.", default_voice, "mp3", 1.0]
336
+ ],
337
+ fn=generate_wrapper,
338
+ inputs=[text, voice, format_choice, speed],
339
+ outputs=[output, status]
340
+ )
341
+
342
+ return interface
343
+
344
+ # API SERVER
345
+ def create_api_server() -> Flask:
346
+ """Create and configure the Flask API server."""
347
+ app = Flask("KokoroTTS-API")
348
+
349
+ @app.route('/api/voices', methods=['GET'])
350
+ def api_voices():
351
+ """Get available voices."""
352
+ try:
353
+ voices = get_available_voices()
354
+ return jsonify({"voices": voices, "default": config.get("default_voice")})
355
+ except Exception as e:
356
+ logger.error(f"API error in voices: {e}")
357
+ return jsonify({"error": str(e)}), 500
358
+
359
+ @app.route('/api/tts', methods=['POST'])
360
+ def api_tts():
361
+ """Generate speech from text."""
362
+ try:
363
+ data = request.json
364
+
365
+ if not data or 'text' not in data:
366
+ return jsonify({"error": "Missing 'text' field"}), 400
367
+
368
+ text = data['text']
369
+ voice = data.get('voice', config.get("default_voice"))
370
+ output_format = data.get('format', config.get("default_format", "wav"))
371
+ speed = float(data.get('speed', 1.0))
372
+
373
+ # Create a dedicated output filename for this request
374
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
375
+ request_id = str(uuid.uuid4())[:8]
376
+ filename = f"api_tts_{timestamp}_{request_id}.{output_format}"
377
+ output_path = os.path.join(config["output_dir"], filename)
378
+
379
+ # Generate audio
380
+ generated_path = generate_tts(
381
+ text=text,
382
+ voice_name=voice,
383
+ output_format=output_format,
384
+ output_path=output_path,
385
+ speed=speed
386
+ )
387
+
388
+ if not generated_path or not os.path.exists(generated_path):
389
+ logger.error(f"Generated path doesn't exist: {generated_path}")
390
+ return jsonify({"error": "Failed to generate audio file"}), 500
391
+
392
+ # Verify file size
393
+ file_size = os.path.getsize(generated_path)
394
+ if file_size < 100: # Very small file likely indicates an error
395
+ logger.error(f"Generated file is too small ({file_size} bytes)")
396
+ return jsonify({"error": "Generated audio file appears to be empty or corrupted"}), 500
397
+
398
+ logger.info(f"Sending audio file: {generated_path} ({file_size} bytes)")
399
+
400
+ # Return audio file
401
+ return send_file(
402
+ generated_path,
403
+ as_attachment=True,
404
+ download_name=f"tts_output.{output_format}",
405
+ mimetype=f"audio/{output_format}" if output_format != "aac" else "audio/aac"
406
+ )
407
+
408
+ except Exception as e:
409
+ logger.error(f"API error in TTS: {e}")
410
+ import traceback
411
+ logger.error(traceback.format_exc())
412
+ return jsonify({"error": str(e)}), 500
413
+
414
+ # Add a health check endpoint
415
+ @app.route('/api/health', methods=['GET'])
416
+ def api_health():
417
+ """Health check endpoint."""
418
+ return jsonify({
419
+ "status": "ok",
420
+ "model_loaded": model is not None,
421
+ "voices_count": len(get_available_voices())
422
+ })
423
+
424
+ @app.route('/api/config', methods=['GET', 'PUT'])
425
+ def api_config():
426
+ """Get or update configuration."""
427
+ if request.method == 'GET':
428
+ return jsonify(config)
429
+ else:
430
+ try:
431
+ data = request.json
432
+ # Only update specific fields
433
+ for key in ['output_dir', 'default_voice', 'default_format']:
434
+ if key in data:
435
+ config[key] = data[key]
436
 
437
+ save_config(config)
438
+ return jsonify({"status": "success", "config": config})
439
+ except Exception as e:
440
+ logger.error(f"API error updating config: {e}")
441
+ return jsonify({"error": str(e)}), 500
442
+
443
+ return app
444
+
445
+ # SERVER LAUNCH FUNCTIONS
446
+ def launch_api(host="0.0.0.0", port=None):
447
+ """Launch the API server in a separate thread."""
448
+ if not config.get("api_enabled", True):
449
+ logger.info("API server disabled in configuration")
450
+ return
451
+
452
+ api_port = port or config.get("api_port", 5000)
453
+ logger.info(f"Launching API server on port {api_port}")
454
+
455
+ app = create_api_server()
456
+
457
+ def run_api_server():
458
+ try:
459
+ # Use Werkzeug development server for simplicity
460
+ from werkzeug.serving import run_simple
461
+ run_simple(host, api_port, app, threaded=True, use_reloader=False)
462
+ except Exception as e:
463
+ logger.error(f"Error in API server: {e}")
464
+ import traceback
465
+ logger.error(traceback.format_exc())
466
+
467
+ # Start in a daemon thread
468
+ api_thread = threading.Thread(target=run_api_server, daemon=True)
469
+ api_thread.start()
470
+
471
+ # Give the server a moment to start
472
+ time.sleep(1)
473
+ logger.info(f"API server running at http://{host}:{api_port}")
474
+ return api_thread
475
+
476
+ def launch_ui(server_name="0.0.0.0", server_port=None, share=None):
477
+ """Launch the Gradio UI server."""
478
+ port = server_port or config.get("ui_port", 7860)
479
+ share_ui = share if share is not None else config.get("share_ui", True)
480
+
481
+ logger.info(f"Launching UI on port {port} (share={share_ui})")
482
+ interface = create_ui_interface()
483
+ if interface:
484
+ interface.queue() # Enable queuing for better concurrent handling
485
+ interface.launch(
486
+ server_name=server_name,
487
+ server_port=port,
488
+ share=share_ui,
489
+ prevent_thread_lock=True # Important - don't block the main thread
490
  )
491
+ logger.info(f"UI server running at http://{server_name}:{port}")
492
+ return True
493
+ else:
494
+ logger.error("Failed to create UI interface")
495
+ return False
496
+
497
+ # MAIN APPLICATION
498
+ def main():
499
+ """Main application entry point."""
500
+ print("\n" + "="*50)
501
+ print("Starting Kokoro-TTS")
502
+ print("="*50)
503
+
504
+ # Load configuration and create output directory
505
+ global config
506
+ config = load_config()
507
+ os.makedirs(config["output_dir"], exist_ok=True)
508
+
509
+ # Initialize model
510
+ try:
511
+ initialize_model()
512
+ except Exception as e:
513
+ logger.error(f"Failed to initialize model: {e}")
514
+ print(f"ERROR: Failed to initialize model: {e}")
515
+ sys.exit(1)
516
+
517
+ # Get the network IP address for WSL access
518
+ hostname = socket.gethostname()
519
+ network_ip = socket.gethostbyname(hostname)
520
+
521
+ # Check if we are running on Hugging Face Spaces or if you want to combine the servers locally
522
+ if os.environ.get("HF_SPACE") is not None or os.environ.get("SINGLE_PORT") == "1":
523
+ # Create the API Flask app and the Gradio interface
524
+ api_app = create_api_server()
525
+ interface = create_ui_interface()
526
+
527
+ # Combine the Gradio app and Flask API under the same port using DispatcherMiddleware
528
+ # All routes under '/api' go to the Flask API, all other routes go to Gradio.
529
+ combined_app = DispatcherMiddleware(interface.app, {
530
+ '/api': api_app
531
+ })
532
+
533
+ # Use the UI port (or any single port you want)
534
+ port = config.get("ui_port", 7860)
535
+ print(f"Combined UI and API running on port: {port}")
536
+ print(f"Localhost: http://localhost:{port}")
537
+ print(f"Network: http://{network_ip}:{port}")
538
+
539
+ # Run the combined app on a single port
540
+ run_simple("0.0.0.0", port, combined_app, use_reloader=False, threaded=True)
541
+ else:
542
+ # Local deployment: run API and UI separately
543
+ if config.get("api_enabled", True):
544
+ launch_api() # launches API on its own thread (port 5000 by default)
545
+ ui_thread = threading.Thread(target=launch_ui, daemon=True)
546
+ ui_thread.start()
547
+
548
+ print(f"UI (localhost): http://localhost:{config.get('ui_port', 7860)}")
549
+ print(f"UI (network): http://{network_ip}:{config.get('ui_port', 7860)}")
550
+ if config.get("api_enabled", True):
551
+ print(f"API (localhost): http://localhost:{config.get('api_port', 5000)}")
552
+ print(f"API (network): http://{network_ip}:{config.get('api_port', 5000)}")
553
 
554
+ # Keep the main thread alive
555
+ try:
556
+ while True:
557
+ time.sleep(1)
558
+ except KeyboardInterrupt:
559
+ print("\nShutting down servers...")
560
+ print("Press Ctrl+C again to force quit")
561
 
562
  if __name__ == "__main__":
563
+ main()
kokoro_tts.log ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-03-08 14:56:44,108 - kokoro_tts - INFO - Using device: cuda
2
+ 2025-03-08 14:56:44,113 - kokoro_tts - INFO - Initializing Kokoro TTS model...
3
+ 2025-03-08 14:56:49,854 - kokoro_tts - INFO - Model initialization complete
4
+ 2025-03-08 14:56:49,855 - kokoro_tts - INFO - Launching API server on port 5000
5
+ 2025-03-08 14:56:49,861 - kokoro_tts - INFO - Launching UI on port 7860 (share=True)
6
+ 2025-03-08 14:56:49,870 - kokoro_tts - INFO - Available voices: ['af_alloy', 'af_aoede', 'af_bella', 'af_jessica', 'af_kore', 'af_nicole', 'af_nova', 'af_river', 'af_sarah', 'af_sky', 'am_adam', 'am_echo', 'am_eric', 'am_fenrir', 'am_liam', 'am_michael', 'am_onyx', 'am_puck', 'am_santa', 'bf_alice', 'bf_emma', 'bf_isabella', 'bf_lily', 'bm_daniel', 'bm_fable', 'bm_george', 'bm_lewis', 'em_alex', 'em_santa', 'ff_siwis', 'hf_alpha', 'hf_beta', 'hm_omega', 'hm_psi', 'jf_alpha', 'jf_nezumi', 'jf_tebukuro', 'jm_kumo', 'pf_dora', 'pm_alex', 'pm_santa', 'zf_xiaobei', 'zf_xiaoni', 'zf_xiaoyi']
7
+ 2025-03-08 14:56:49,997 - werkzeug - INFO - WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.
8
+ * Running on all addresses (0.0.0.0)
9
+ * Running on http://127.0.0.1:5000
10
+ * Running on http://172.21.207.14:5000
11
+ 2025-03-08 14:56:50,009 - werkzeug - INFO - Press CTRL+C to quit
12
+ 2025-03-08 14:56:50,345 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
13
+ 2025-03-08 14:56:50,375 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
14
+ 2025-03-08 14:56:51,198 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
15
+ 2025-03-08 14:56:51,654 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
16
+ 2025-03-08 14:56:56,056 - werkzeug - INFO - 172.21.192.1 - - [08/Mar/2025 14:56:56] "GET / HTTP/1.1" 404 -
17
+ 2025-03-08 14:56:56,351 - werkzeug - INFO - 172.21.192.1 - - [08/Mar/2025 14:56:56] "GET /favicon.ico HTTP/1.1" 404 -
18
+ 2025-03-08 14:56:59,424 - werkzeug - INFO - 172.21.192.1 - - [08/Mar/2025 14:56:59] "GET / HTTP/1.1" 404 -
19
+ 2025-03-08 14:58:06,168 - kokoro_tts - INFO - Generating speech for text: 'The quick brown fox jumps over the lazy dog. How v...' using voice: af_alloy
20
+ 2025-03-08 14:58:14,010 - kokoro_tts - INFO - Saved WAV file to outputs/tts_20250308_145806_1ca41e08.wav
21
+ 2025-03-08 14:58:49,050 - werkzeug - INFO - 172.21.192.1 - - [08/Mar/2025 14:58:49] "GET /api/generate HTTP/1.1" 404 -
22
+ 2025-03-08 14:58:54,675 - werkzeug - INFO - 172.21.192.1 - - [08/Mar/2025 14:58:54] "GET /api/tts HTTP/1.1" 405 -
23
+ 2025-03-08 15:05:11,950 - kokoro_tts - INFO - Using device: cuda
24
+ 2025-03-08 15:09:56,416 - kokoro_tts - INFO - Using device: cuda
25
+ 2025-03-08 15:09:56,422 - kokoro_tts - INFO - Initializing Kokoro TTS model...
26
+ 2025-03-08 15:09:57,714 - kokoro_tts - INFO - Model initialization complete
27
+ 2025-03-08 15:09:57,715 - kokoro_tts - INFO - Launching API server on port 5000
28
+ 2025-03-08 15:09:57,721 - werkzeug - INFO - WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.
29
+ * Running on all addresses (0.0.0.0)
30
+ * Running on http://127.0.0.1:5000
31
+ * Running on http://172.21.207.14:5000
32
+ 2025-03-08 15:09:57,722 - werkzeug - INFO - Press CTRL+C to quit
33
+ 2025-03-08 15:09:58,510 - kokoro_tts - INFO - API server running at http://0.0.0.0:5000
34
+ 2025-03-08 15:09:58,510 - kokoro_tts - INFO - Launching UI on port 7860 (share=True)
35
+ 2025-03-08 15:09:58,516 - kokoro_tts - INFO - Available voices: ['af_alloy', 'af_aoede', 'af_bella', 'af_jessica', 'af_kore', 'af_nicole', 'af_nova', 'af_river', 'af_sarah', 'af_sky', 'am_adam', 'am_echo', 'am_eric', 'am_fenrir', 'am_liam', 'am_michael', 'am_onyx', 'am_puck', 'am_santa', 'bf_alice', 'bf_emma', 'bf_isabella', 'bf_lily', 'bm_daniel', 'bm_fable', 'bm_george', 'bm_lewis', 'em_alex', 'em_santa', 'ff_siwis', 'hf_alpha', 'hf_beta', 'hm_omega', 'hm_psi', 'jf_alpha', 'jf_nezumi', 'jf_tebukuro', 'jm_kumo', 'pf_dora', 'pm_alex', 'pm_santa', 'zf_xiaobei', 'zf_xiaoni', 'zf_xiaoyi']
36
+ 2025-03-08 15:09:59,077 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
37
+ 2025-03-08 15:09:59,108 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
38
+ 2025-03-08 15:09:59,816 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
39
+ 2025-03-08 15:10:00,404 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
40
+ 2025-03-08 15:10:01,533 - kokoro_tts - INFO - UI server running at http://0.0.0.0:7860
41
+ 2025-03-08 15:16:15,943 - kokoro_tts - INFO - Using device: cuda
42
+ 2025-03-08 15:19:02,083 - kokoro_tts - INFO - Using device: cuda
43
+ 2025-03-08 15:19:02,089 - kokoro_tts - INFO - Initializing Kokoro TTS model...
44
+ 2025-03-08 15:19:08,491 - kokoro_tts - INFO - Model initialization complete
45
+ 2025-03-08 15:20:27,420 - kokoro_tts - INFO - Using device: cuda
46
+ 2025-03-08 15:20:27,426 - kokoro_tts - INFO - Initializing Kokoro TTS model...
47
+ 2025-03-08 15:20:32,732 - kokoro_tts - INFO - Model initialization complete
48
+ 2025-03-08 15:20:32,732 - kokoro_tts - INFO - Launching API server on port 5000
49
+ 2025-03-08 15:20:32,739 - werkzeug - INFO - WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.
50
+ * Running on all addresses (0.0.0.0)
51
+ * Running on http://127.0.0.1:5000
52
+ * Running on http://172.21.207.14:5000
53
+ 2025-03-08 15:20:32,739 - werkzeug - INFO - Press CTRL+C to quit
54
+ 2025-03-08 15:20:33,768 - kokoro_tts - INFO - API server running at http://0.0.0.0:5000
55
+ 2025-03-08 15:20:33,771 - kokoro_tts - INFO - Launching UI on port 7860 (share=True)
56
+ 2025-03-08 15:20:33,775 - kokoro_tts - INFO - Available voices: ['af_alloy', 'af_aoede', 'af_bella', 'af_jessica', 'af_kore', 'af_nicole', 'af_nova', 'af_river', 'af_sarah', 'af_sky', 'am_adam', 'am_echo', 'am_eric', 'am_fenrir', 'am_liam', 'am_michael', 'am_onyx', 'am_puck', 'am_santa', 'bf_alice', 'bf_emma', 'bf_isabella', 'bf_lily', 'bm_daniel', 'bm_fable', 'bm_george', 'bm_lewis', 'em_alex', 'em_santa', 'ff_siwis', 'hf_alpha', 'hf_beta', 'hm_omega', 'hm_psi', 'jf_alpha', 'jf_nezumi', 'jf_tebukuro', 'jm_kumo', 'pf_dora', 'pm_alex', 'pm_santa', 'zf_xiaobei', 'zf_xiaoni', 'zf_xiaoyi']
57
+ 2025-03-08 15:20:34,340 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
58
+ 2025-03-08 15:20:34,372 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
59
+ 2025-03-08 15:20:35,122 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
60
+ 2025-03-08 15:20:35,664 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
61
+ 2025-03-08 15:20:37,069 - kokoro_tts - INFO - UI server running at http://0.0.0.0:7860
62
+ 2025-03-08 15:21:04,433 - werkzeug - INFO - 172.21.192.1 - - [08/Mar/2025 15:21:04] "GET / HTTP/1.1" 404 -
63
+ 2025-03-08 15:21:10,598 - werkzeug - INFO - 172.21.192.1 - - [08/Mar/2025 15:21:10] "GET /api HTTP/1.1" 404 -
64
+ 2025-03-08 15:21:13,723 - werkzeug - INFO - 172.21.192.1 - - [08/Mar/2025 15:21:13] "GET /api/tts HTTP/1.1" 405 -
65
+ 2025-03-08 15:22:13,648 - kokoro_tts - INFO - Available voices: ['af_alloy', 'af_aoede', 'af_bella', 'af_jessica', 'af_kore', 'af_nicole', 'af_nova', 'af_river', 'af_sarah', 'af_sky', 'am_adam', 'am_echo', 'am_eric', 'am_fenrir', 'am_liam', 'am_michael', 'am_onyx', 'am_puck', 'am_santa', 'bf_alice', 'bf_emma', 'bf_isabella', 'bf_lily', 'bm_daniel', 'bm_fable', 'bm_george', 'bm_lewis', 'em_alex', 'em_santa', 'ff_siwis', 'hf_alpha', 'hf_beta', 'hm_omega', 'hm_psi', 'jf_alpha', 'jf_nezumi', 'jf_tebukuro', 'jm_kumo', 'pf_dora', 'pm_alex', 'pm_santa', 'zf_xiaobei', 'zf_xiaoni', 'zf_xiaoyi']
66
+ 2025-03-08 15:22:13,649 - werkzeug - INFO - 172.21.192.1 - - [08/Mar/2025 15:22:13] "GET /api/health HTTP/1.1" 200 -
67
+ 2025-03-08 15:22:13,671 - kokoro_tts - INFO - Available voices: ['af_alloy', 'af_aoede', 'af_bella', 'af_jessica', 'af_kore', 'af_nicole', 'af_nova', 'af_river', 'af_sarah', 'af_sky', 'am_adam', 'am_echo', 'am_eric', 'am_fenrir', 'am_liam', 'am_michael', 'am_onyx', 'am_puck', 'am_santa', 'bf_alice', 'bf_emma', 'bf_isabella', 'bf_lily', 'bm_daniel', 'bm_fable', 'bm_george', 'bm_lewis', 'em_alex', 'em_santa', 'ff_siwis', 'hf_alpha', 'hf_beta', 'hm_omega', 'hm_psi', 'jf_alpha', 'jf_nezumi', 'jf_tebukuro', 'jm_kumo', 'pf_dora', 'pm_alex', 'pm_santa', 'zf_xiaobei', 'zf_xiaoni', 'zf_xiaoyi']
68
+ 2025-03-08 15:22:13,672 - werkzeug - INFO - 172.21.192.1 - - [08/Mar/2025 15:22:13] "GET /api/voices HTTP/1.1" 200 -
69
+ 2025-03-08 15:27:20,170 - kokoro_tts - INFO - Available voices: ['af_alloy', 'af_aoede', 'af_bella', 'af_jessica', 'af_kore', 'af_nicole', 'af_nova', 'af_river', 'af_sarah', 'af_sky', 'am_adam', 'am_echo', 'am_eric', 'am_fenrir', 'am_liam', 'am_michael', 'am_onyx', 'am_puck', 'am_santa', 'bf_alice', 'bf_emma', 'bf_isabella', 'bf_lily', 'bm_daniel', 'bm_fable', 'bm_george', 'bm_lewis', 'em_alex', 'em_santa', 'ff_siwis', 'hf_alpha', 'hf_beta', 'hm_omega', 'hm_psi', 'jf_alpha', 'jf_nezumi', 'jf_tebukuro', 'jm_kumo', 'pf_dora', 'pm_alex', 'pm_santa', 'zf_xiaobei', 'zf_xiaoni', 'zf_xiaoyi']
70
+ 2025-03-08 15:27:20,171 - werkzeug - INFO - 172.21.192.1 - - [08/Mar/2025 15:27:20] "GET /api/health HTTP/1.1" 200 -
71
+ 2025-03-08 15:27:36,688 - kokoro_tts - INFO - Available voices: ['af_alloy', 'af_aoede', 'af_bella', 'af_jessica', 'af_kore', 'af_nicole', 'af_nova', 'af_river', 'af_sarah', 'af_sky', 'am_adam', 'am_echo', 'am_eric', 'am_fenrir', 'am_liam', 'am_michael', 'am_onyx', 'am_puck', 'am_santa', 'bf_alice', 'bf_emma', 'bf_isabella', 'bf_lily', 'bm_daniel', 'bm_fable', 'bm_george', 'bm_lewis', 'em_alex', 'em_santa', 'ff_siwis', 'hf_alpha', 'hf_beta', 'hm_omega', 'hm_psi', 'jf_alpha', 'jf_nezumi', 'jf_tebukuro', 'jm_kumo', 'pf_dora', 'pm_alex', 'pm_santa', 'zf_xiaobei', 'zf_xiaoni', 'zf_xiaoyi']
72
+ 2025-03-08 15:27:36,689 - werkzeug - INFO - 172.21.192.1 - - [08/Mar/2025 15:27:36] "GET /api/voices HTTP/1.1" 200 -
73
+ 2025-03-08 15:27:40,728 - kokoro_tts - INFO - Generating speech for text: 'hey how are you doing tonight...' using voice: af_alloy
74
+ 2025-03-08 15:27:42,744 - kokoro_tts - INFO - Saved WAV file to outputs/api_tts_20250308_152740_8976817e.wav
75
+ 2025-03-08 15:27:42,747 - kokoro_tts - INFO - Sending audio file: outputs/api_tts_20250308_152740_8976817e.wav (124844 bytes)
76
+ 2025-03-08 15:27:42,756 - werkzeug - INFO - 172.21.192.1 - - [08/Mar/2025 15:27:42] "POST /api/tts HTTP/1.1" 200 -
77
+ 2025-03-08 15:28:37,081 - werkzeug - INFO - 172.21.192.1 - - [08/Mar/2025 15:28:37] "GET / HTTP/1.1" 404 -
78
+ 2025-03-08 15:33:10,577 - kokoro_tts - INFO - Using device: cuda
79
+ 2025-03-08 15:33:10,582 - kokoro_tts - INFO - Initializing Kokoro TTS model...
80
+ 2025-03-08 15:33:16,937 - kokoro_tts - INFO - Model initialization complete
81
+ 2025-03-08 15:33:16,937 - kokoro_tts - INFO - Launching API server on port 5000
82
+ 2025-03-08 15:33:16,944 - werkzeug - INFO - WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.
83
+ * Running on all addresses (0.0.0.0)
84
+ * Running on http://127.0.0.1:5000
85
+ * Running on http://172.21.207.14:5000
86
+ 2025-03-08 15:33:16,945 - werkzeug - INFO - Press CTRL+C to quit
87
+ 2025-03-08 15:33:18,021 - kokoro_tts - INFO - API server running at http://0.0.0.0:5000
88
+ 2025-03-08 15:33:18,023 - kokoro_tts - INFO - Launching UI on port 7860 (share=True)
89
+ 2025-03-08 15:33:18,029 - kokoro_tts - INFO - Available voices: ['af_alloy', 'af_aoede', 'af_bella', 'af_jessica', 'af_kore', 'af_nicole', 'af_nova', 'af_river', 'af_sarah', 'af_sky', 'am_adam', 'am_echo', 'am_eric', 'am_fenrir', 'am_liam', 'am_michael', 'am_onyx', 'am_puck', 'am_santa', 'bf_alice', 'bf_emma', 'bf_isabella', 'bf_lily', 'bm_daniel', 'bm_fable', 'bm_george', 'bm_lewis', 'em_alex', 'em_santa', 'ff_siwis', 'hf_alpha', 'hf_beta', 'hm_omega', 'hm_psi', 'jf_alpha', 'jf_nezumi', 'jf_tebukuro', 'jm_kumo', 'pf_dora', 'pm_alex', 'pm_santa', 'zf_xiaobei', 'zf_xiaoni', 'zf_xiaoyi']
90
+ 2025-03-08 15:33:18,866 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
91
+ 2025-03-08 15:35:04,472 - kokoro_tts - INFO - Using device: cuda
92
+ 2025-03-08 15:35:04,478 - kokoro_tts - INFO - Initializing Kokoro TTS model...
93
+ 2025-03-08 15:35:12,275 - kokoro_tts - INFO - Model initialization complete
94
+ 2025-03-08 15:35:12,276 - kokoro_tts - INFO - Launching API server on port 5000
95
+ 2025-03-08 15:35:12,284 - werkzeug - INFO - WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.
96
+ * Running on all addresses (0.0.0.0)
97
+ * Running on http://127.0.0.1:5000
98
+ * Running on http://172.21.207.14:5000
99
+ 2025-03-08 15:35:12,284 - werkzeug - INFO - Press CTRL+C to quit
100
+ 2025-03-08 15:35:13,335 - kokoro_tts - INFO - API server running at http://0.0.0.0:5000
101
+ 2025-03-08 15:35:13,336 - kokoro_tts - INFO - Launching UI on port 7860 (share=True)
102
+ 2025-03-08 15:35:13,341 - kokoro_tts - INFO - Available voices: ['af_alloy', 'af_aoede', 'af_bella', 'af_jessica', 'af_kore', 'af_nicole', 'af_nova', 'af_river', 'af_sarah', 'af_sky', 'am_adam', 'am_echo', 'am_eric', 'am_fenrir', 'am_liam', 'am_michael', 'am_onyx', 'am_puck', 'am_santa', 'bf_alice', 'bf_emma', 'bf_isabella', 'bf_lily', 'bm_daniel', 'bm_fable', 'bm_george', 'bm_lewis', 'em_alex', 'em_santa', 'ff_siwis', 'hf_alpha', 'hf_beta', 'hm_omega', 'hm_psi', 'jf_alpha', 'jf_nezumi', 'jf_tebukuro', 'jm_kumo', 'pf_dora', 'pm_alex', 'pm_santa', 'zf_xiaobei', 'zf_xiaoni', 'zf_xiaoyi']
103
+ 2025-03-08 15:35:18,920 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
104
+ 2025-03-08 15:35:13,415 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
105
+ 2025-03-08 15:35:14,191 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
106
+ 2025-03-08 15:35:14,719 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
107
+ 2025-03-08 15:35:16,100 - kokoro_tts - INFO - UI server running at http://0.0.0.0:7860
output.mp3 ADDED
@@ -0,0 +1 @@
 
 
1
+ {"detail":"Not Found"}
output.wav ADDED
Binary file (125 kB). View file
 
outputs/api_tts_20250308_152740_8976817e.wav ADDED
Binary file (125 kB). View file
 
outputs/tts_20250308_145806_1ca41e08.wav ADDED
Binary file (287 kB). View file
 
text.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import sys
3
+ import requests
4
+
5
+ # Base URL of your running server. Adjust the port if needed.
6
+ BASE_URL = "http://172.21.207.14:5000/"
7
+
8
+ def check_api_health():
9
+ """Call the /api/health endpoint and display the result."""
10
+ try:
11
+ response = requests.get(f"{BASE_URL}/api/health")
12
+ if response.status_code == 200:
13
+ data = response.json()
14
+ print("API Health:", data)
15
+ else:
16
+ print("Error:", response.text)
17
+ except Exception as e:
18
+ print("Error connecting to API Health endpoint:", e)
19
+
20
+ def check_voices():
21
+ """Call the /api/voices endpoint and list available voices."""
22
+ try:
23
+ response = requests.get(f"{BASE_URL}/api/voices")
24
+ if response.status_code == 200:
25
+ data = response.json()
26
+ voices = data.get("voices", [])
27
+ default_voice = data.get("default", "None")
28
+ print("\nDefault Voice:", default_voice)
29
+ print("Available Voices:")
30
+ for voice in voices:
31
+ print(" -", voice)
32
+ else:
33
+ print("Error:", response.text)
34
+ except Exception as e:
35
+ print("Error connecting to API Voices endpoint:", e)
36
+
37
+ def generate_speech():
38
+ """Prompt for text and parameters, call the /api/tts endpoint, and save the generated audio."""
39
+ text = input("Enter text to convert to speech: ").strip()
40
+ if not text:
41
+ print("Error: Text cannot be empty.")
42
+ return
43
+
44
+ voice = input("Enter voice to use (leave blank for default): ").strip()
45
+ if voice == "":
46
+ # Get the default voice from the voices endpoint
47
+ try:
48
+ response = requests.get(f"{BASE_URL}/api/voices")
49
+ if response.status_code == 200:
50
+ voice = response.json().get("default", "")
51
+ print(f"Using default voice: {voice}")
52
+ else:
53
+ print("Unable to get default voice. Using empty voice name.")
54
+ except Exception as e:
55
+ print("Error getting default voice:", e)
56
+
57
+ output_format = input("Enter output format (wav/mp3/aac, default: wav): ").strip().lower() or "wav"
58
+ try:
59
+ speed = float(input("Enter speech speed (default: 1.0): ").strip() or 1.0)
60
+ except ValueError:
61
+ print("Invalid speed input. Using default speed 1.0.")
62
+ speed = 1.0
63
+
64
+ payload = {
65
+ "text": text,
66
+ "voice": voice,
67
+ "format": output_format,
68
+ "speed": speed
69
+ }
70
+
71
+ print("\nGenerating speech...")
72
+ try:
73
+ response = requests.post(f"{BASE_URL}/api/tts", json=payload)
74
+ if response.status_code == 200:
75
+ content_type = response.headers.get("Content-Type", "")
76
+ filename = f"output.{output_format}"
77
+ with open(filename, "wb") as f:
78
+ f.write(response.content)
79
+ print(f"Audio generated and saved as '{filename}' (Content-Type: {content_type})")
80
+ else:
81
+ print("Error generating speech:", response.text)
82
+ except Exception as e:
83
+ print("Error connecting to API TTS endpoint:", e)
84
+
85
+ def main():
86
+ while True:
87
+ print("\n==== Kokoro-TTS CLI Test ====")
88
+ print("1. Check API Health")
89
+ print("2. Check Voices")
90
+ print("3. Generate Speech")
91
+ print("4. Quit")
92
+ choice = input("Select an option (1-4): ").strip()
93
+
94
+ if choice == "1":
95
+ check_api_health()
96
+ elif choice == "2":
97
+ check_voices()
98
+ elif choice == "3":
99
+ generate_speech()
100
+ elif choice == "4":
101
+ print("Exiting.")
102
+ sys.exit(0)
103
+ else:
104
+ print("Invalid option. Please choose a number between 1 and 4.")
105
+
106
+ if __name__ == "__main__":
107
+ main()
tts_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "output_dir": "outputs",
3
+ "cache_voices": true,
4
+ "default_voice": "af_alloy",
5
+ "default_format": "wav",
6
+ "api_enabled": true,
7
+ "api_port": 5000,
8
+ "ui_port": 7860,
9
+ "share_ui": true
10
+ }