When I used this model, I discovered two issues.
#1
by Chris7v7 - opened
Since the model card does not mention voice, I chose the original version of Tara.
1.When I attempt to generate a Thai phrase, the audio at the end includes a long stretch of current noise without any speech.
2. When I attempt to generate a longer Thai sentence, the audio starts abnormally, plays normally in the middle, but becomes abnormal again towards the end.
this is my code:
from orpheus_tts import OrpheusModel
import wave
import time
def main():
model = OrpheusModel(model_name ="kadirnar/Orpheus-Porjai-Thai")
prompt = '''ฉันอยากไปเที่ยวเชียงใหม่'''
start_time = time.monotonic()
syn_tokens = model.generate_speech(
prompt=prompt,
voice="tara",
)
with wave.open("thai.wav", "wb") as wf:
wf.setnchannels(1)
wf.setsampwidth(2)
wf.setframerate(24000)
total_frames = 0
chunk_counter = 0
for audio_chunk in syn_tokens: # output streaming
chunk_counter += 1
frame_count = len(audio_chunk) // (wf.getsampwidth() * wf.getnchannels())
total_frames += frame_count
wf.writeframes(audio_chunk)
duration = total_frames / wf.getframerate()
end_time = time.monotonic()
print(f"It took {end_time - start_time} seconds to generate {duration:.2f} seconds of audio")
if __name__ == "__main__":
main()