Spaces:
Sleeping
Sleeping
| from kokoro import generate | |
| from models import build_model | |
| from scipy.io.wavfile import write | |
| from pydub import AudioSegment | |
| import torch | |
| import sys | |
| import numpy as np | |
| import os | |
| text = "Hello world" | |
| if len(sys.argv) > 1: | |
| text = sys.argv[1] | |
| print("Got text: ", text) | |
| VOICE_NAME = [ | |
| 'af', # Default voice is a 50-50 mix of Bella & Sarah | |
| 'af_bella', 'af_sarah', 'am_adam', 'am_michael', | |
| 'bf_emma', 'bf_isabella', 'bm_george', 'bm_lewis', | |
| 'af_nicole', 'af_sky', | |
| ][3] | |
| VOICEPACK = torch.load(f'voices/{VOICE_NAME}.pt', weights_only=True).to('cpu') | |
| print(f'Loaded voice: {VOICE_NAME}\n') | |
| # Check if the model is already loaded | |
| MODEL_PATH = 'kokoro-v0_19.pth' | |
| if not os.path.exists(MODEL_PATH): | |
| print("\n--------------------\n") | |
| print(f"Error: Model file '{MODEL_PATH}' does not exist.") | |
| sys.exit(1) | |
| MODEL = None | |
| if 'MODEL' in globals() and MODEL is not None: | |
| print("\n--------------------\n") | |
| print("\nModel already loaded.") | |
| else: | |
| MODEL = build_model(MODEL_PATH, 'cpu') | |
| print("\n--------------------\n") | |
| print("Model loaded.") | |
| audio_data, out_ps = generate(MODEL, text, VOICEPACK, lang=VOICE_NAME[0]) | |
| # Normalize and scale audio data | |
| audio_data = np.array(audio_data) # Ensure it's a NumPy array | |
| normalized_audio = audio_data / np.max(np.abs(audio_data)) | |
| scaled_audio = np.int16(normalized_audio * 32767) | |
| # Save as WAV | |
| wav_path = f'./outputs/{text.split(" ")[0]}.wav' | |
| write(wav_path, 24000, scaled_audio) | |
| print("\n--------------------\n") | |
| print(f'[SYSTEM] WAV file saved at: {wav_path}') | |
| # Optional: Convert to MP3 using pydub | |
| mp3_path = f'./outputs/{text.split(" ")[0]}.mp3' | |
| audio_segment = AudioSegment.from_file(wav_path, format="wav") | |
| audio_segment.export(mp3_path, format="mp3") | |
| print(f'[SYSTEM] MP3 file saved at: {mp3_path}') | |