import numpy as np from fastapi import FastAPI from fastapi.responses import StreamingResponse import io from gtts import gTTS import tempfile import wave app = FastAPI() def generate_speech_pcm(): text = "Hello, this is ESP32 audio test" tts = gTTS(text=text, lang='en') with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f: tts.save(f.name) # Convert MP3 to PCM using wave fallback import subprocess wav_file = f.name.replace(".mp3", ".wav") subprocess.run([ "ffmpeg", "-y", "-i", f.name, "-ar", "24000", "-ac", "1", "-f", "wav", wav_file ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) with wave.open(wav_file, "rb") as w: frames = w.readframes(w.getnframes()) return frames @app.get("/audio") def stream_audio(): return StreamingResponse( io.BytesIO(generate_speech_pcm()), media_type="application/octet-stream" ) import uvicorn if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=7860)