Spaces:

Arnavkumar01
/

AI_Calling_Agent

Sleeping

App Files Files Community

Arnavkumar01 commited on Oct 21, 2025

Commit

6ee9d08

1 Parent(s): ccd9d86

Changes made in requirements.txt, dockerfile and main.py to handle twilio websocket connection

Browse files

Files changed (3) hide show

Dockerfile +12 -11
main.py +319 -99
requirements.txt +1 -0

Dockerfile CHANGED Viewed

@@ -1,29 +1,30 @@
 # 1. Start with a lean and official Python base image
 FROM python:3.10-slim
-# Install dependencies needed for psycopg2
-RUN apt-get update && apt-get install -y libpq-dev && rm -rf /var/lib/apt/lists/*
 # 2. Set the working directory inside the container
 WORKDIR /app
-# 3. Create a non-root user and set up the cache
 RUN useradd -m -u 1000 user
 RUN mkdir -p /app/.cache && chown -R user:user /app/.cache
 ENV HF_HOME="/app/.cache"
 USER user
-# Add the local bin directory to the PATH (good practice)
 ENV PATH="/home/user/.local/bin:${PATH}"
-# 4. Copy and install requirements
 COPY --chown=user:user requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
-# 5. Copy your application code
-COPY --chown=user:user main.py .
-# 6. --- START OF FIX ---
-# Define the command to run your application using the full path to gunicorn
-CMD /home/user/.local/bin/gunicorn --bind 0.0.0.0:7860 --workers 1 --worker-class uvicorn.workers.UvicornWorker main:app
-# --- END OF FIX ---

 # 1. Start with a lean and official Python base image
 FROM python:3.10-slim
+# Install dependencies for psycopg2 and audio processing
+RUN apt-get update && apt-get install -y libpq-dev ffmpeg && rm -rf /var/lib/apt/lists/*
 # 2. Set the working directory inside the container
 WORKDIR /app
+# 3. Create a non-root user and set up cache
 RUN useradd -m -u 1000 user
 RUN mkdir -p /app/.cache && chown -R user:user /app/.cache
 ENV HF_HOME="/app/.cache"
 USER user
+# Add local bin directory to PATH
 ENV PATH="/home/user/.local/bin:${PATH}"
+# 4. Copy and install dependencies
 COPY --chown=user:user requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
+# 5. Copy the app source code
+COPY --chown=user:user . .
+# 6. Expose the port used by Hugging Face Spaces
+EXPOSE 7860
+# 7. Run the FastAPI app using Uvicorn (better for WebSockets)
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

main.py CHANGED Viewed

@@ -4,8 +4,9 @@ import logging
 import json
 import re
 from contextlib import asynccontextmanager
-from fastapi import FastAPI, WebSocket, WebSocketDisconnect, Request, status, Depends, Header, HTTPException
-from fastapi.concurrency import run_in_threadpool  # Import for handling blocking calls
 from pydantic import BaseModel
 from dotenv import load_dotenv
 from openai import OpenAI
@@ -14,8 +15,14 @@ from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_postgres.vectorstores import PGVector
 from sqlalchemy import create_engine
 # --- SETUP ---
-# Suppress noisy logs from underlying libraries
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
 logging.getLogger('tensorflow').setLevel(logging.ERROR)
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -42,15 +49,24 @@ TABLE_DESCRIPTIONS = """
 - "feedback_source": Customer feedback and ratings for projects.
 """
 # --- GLOBAL VARIABLES FOR LIFESPAN ---
-# These will be populated at startup
 embeddings = None
 vector_store = None
-# --- FASTAPI LIFESPAN MANAGEMENT ---
 @asynccontextmanager
 async def lifespan(app: FastAPI):
-    # This code runs on startup
     global embeddings, vector_store
     logging.info(f"Initializing embedding model: '{EMBEDDING_MODEL}'...")
     embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
@@ -61,17 +77,15 @@ async def lifespan(app: FastAPI):
     vector_store = PGVector(
         connection=engine,
         collection_name=COLLECTION_NAME,
-        embeddings=embeddings,  # <-- CRITICAL FIX: Corrected parameter name
     )
     logging.info("Successfully connected to the vector store.")
     yield
-    # This code would run on shutdown (if needed)
     logging.info("Application shutting down.")
-# --- INITIALIZE FastAPI APP WITH LIFESPAN ---
 app = FastAPI(lifespan=lifespan)
-client_openai = OpenAI(api_key=OPENAI_API_KEY)
-client_elevenlabs = ElevenLabs(api_key=ELEVENLABS_API_KEY)
 # --- PROMPTS ---
@@ -87,6 +101,7 @@ You are a query analysis agent. Your task is to transform a user's query into a
 4.  If no specific status keywords are mentioned (e.g., the user asks generally about projects in a location), set the filter table to null.
 5.  Respond ONLY with a JSON object containing "search_query" and "filter_table" (which should be the table name string or null).
 """
 ANSWER_SYSTEM_PROMPT = """
 You are an expert AI assistant for a premier real estate developer.
 ## YOUR PERSONA
@@ -101,127 +116,332 @@ You are an expert AI assistant for a premier real estate developer.
 3.  **Stay on Topic:** Only answer questions related to real estate.
 """
-# --- HELPER FUNCTIONS ---
-def transcribe_audio(audio_bytes: bytes) -> str:
-    """This is a blocking function."""
     try:
-        with open("temp_audio.wav", "wb") as f: f.write(audio_bytes)
-        with open("temp_audio.wav", "rb") as audio_file:
-            transcript = client_openai.audio.transcriptions.create(model="whisper-1", file=audio_file)
-        return transcript.text
     except Exception as e:
-        logging.error(f"Error during transcription: {e}")
-        return ""
 async def formulate_search_plan(user_query: str) -> dict:
     logging.info("Formulating search plan with Planner LLM...")
-    try:
-        response = client_openai.chat.completions.create( # This can be async if using an async client
-            model=PLANNER_MODEL,
-            messages=[{"role": "user", "content": QUERY_FORMULATION_PROMPT.format(user_query=user_query)}],
-            response_format={"type": "json_object"},
-            temperature=0.0
-        )
-        plan = json.loads(response.choices[0].message.content)
-        logging.info(f"Search plan received: {plan}")
-        return plan
-    except Exception as e:
-        logging.error(f"Error in Planner LLM call: {e}")
-        return {"search_query": user_query, "filter_table": None}
 async def get_agent_response(user_text: str) -> str:
-    """Runs the full RAG and generation logic for a given text query."""
-    search_plan = await formulate_search_plan(user_text)
-    search_query = search_plan.get("search_query", user_text)
-    filter_table = search_plan.get("filter_table")
-# --- START OF MODIFICATION ---
-    search_filter = {"source_table": filter_table} if filter_table else {}
-    if search_filter:
-        logging.info(f"Applying initial filter: {search_filter}")
-    # First attempt: A specific, filtered search
-    retrieved_docs = vector_store.similarity_search(search_query, k=3, filter=search_filter)
-    # If the first attempt finds nothing, try a broader search
-    if not retrieved_docs:
-        logging.info("Initial search returned no results. Performing a broader fallback search.")
-        retrieved_docs = vector_store.similarity_search(search_query, k=3) # No filter this time
-    # --- END OF MODIFICATION ---
-    context_text = "\n\n".join([doc.page_content for doc in retrieved_docs])
-    logging.info(f"Retrieved Context: {context_text[:500]}...")
-    final_prompt_messages = [
-        {"role": "system", "content": ANSWER_SYSTEM_PROMPT},
-        {"role": "system", "content": f"Use the following CONTEXT to answer:\n{context_text}"},
-        {"role": "user", "content": f"My original question was: '{user_text}'"}
-    ]
-    final_response = client_openai.chat.completions.create(
-        model=ANSWERER_MODEL,
-        messages=final_prompt_messages
-    )
-    return final_response.choices[0].message.content
-# --- Add this new function ---
 async def verify_token(x_auth_token: str = Header(...)):
-    """A dependency to verify the shared secret token."""
     if not SHARED_SECRET or x_auth_token != SHARED_SECRET:
         logging.warning("Authentication failed for /test-text-query.")
         raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid or missing authentication token")
     logging.info("Authentication successful for /test-text-query.")
 # --- API Endpoints ---
 class TextQuery(BaseModel):
     query: str
 @app.post("/test-text-query", dependencies=[Depends(verify_token)])
 async def test_text_query_endpoint(query: TextQuery):
-    """Endpoint for text-based testing via Swagger UI."""
     logging.info(f"Received text query: {query.query}")
     response_text = await get_agent_response(query.query)
     logging.info(f"Generated text response: {response_text}")
     return {"response": response_text}
 @app.websocket("/listen")
 async def websocket_endpoint(websocket: WebSocket):
-    auth_token = websocket.headers.get("x-auth-token")
-    if not SHARED_SECRET or auth_token != SHARED_SECRET:
-        logging.warning(f"Authentication failed. Closing connection.")
-        await websocket.close(code=status.WS_1008_POLICY_VIOLATION)
-        return
     await websocket.accept()
-    logging.info("Authentication successful. Call connected.")
     try:
         while True:
-            message = await websocket.receive_json()
-            audio_base64 = message.get('audio')
-            if not audio_base64: continue
-            # PERFORMANCE FIX: Run blocking transcription in a separate thread
-            user_text = await run_in_threadpool(
-                transcribe_audio, base64.b64decode(audio_base64)
-            )
-            logging.info(f"User said: {user_text}")
-            if not user_text.strip(): continue
-            agent_response_text = await get_agent_response(user_text)
-            logging.info(f"AI Responded: {agent_response_text}")
-            # PERFORMANCE FIX: Run blocking audio generation in a separate thread
-            audio_output = await run_in_threadpool(
-                client_elevenlabs.generate,
-                text=agent_response_text,
-                voice=ELEVENLABS_VOICE_NAME,
-                model="eleven_multilingual_v2"
-            )
-            response_audio_base64 = base64.b64encode(audio_output).decode('utf-8')
-            await websocket.send_json({'audio': response_audio_base64})
     except WebSocketDisconnect:
-        logging.info("Call disconnected.")
     except Exception as e:
         logging.error(f"An error occurred in the main loop: {e}", exc_info=True)
     finally:
-        await websocket.close()

 import json
 import re
 from contextlib import asynccontextmanager
+from typing import Optional
+from fastapi import FastAPI, WebSocket, WebSocketDisconnect, status, Depends, Header, HTTPException
+from fastapi.concurrency import run_in_threadpool
 from pydantic import BaseModel
 from dotenv import load_dotenv
 from openai import OpenAI
 from langchain_postgres.vectorstores import PGVector
 from sqlalchemy import create_engine
+# --- NEW IMPORTS FOR TWILIO INTEGRATION ---
+import asyncio
+import audioop
+import wave
+import io
+from pydub import AudioSegment
 # --- SETUP ---
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
 logging.getLogger('tensorflow').setLevel(logging.ERROR)
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 - "feedback_source": Customer feedback and ratings for projects.
 """
+# VAD Configuration
+SILENCE_THRESHOLD = 1000  # RMS threshold for speech detection (tune based on testing)
+MAX_AUDIO_BYTES = 80000  # Max buffer ~10s at 8kHz (prevent overflow)
+# Max loop iterations to avoid infinite loops (safety)
+MAX_LOOP_COUNT = 1200
 # --- GLOBAL VARIABLES FOR LIFESPAN ---
 embeddings = None
 vector_store = None
+# Initialize clients (will be used after load_dotenv)
+client_openai = OpenAI(api_key=OPENAI_API_KEY)
+client_elevenlabs = ElevenLabs(api_key=ELEVENLABS_API_KEY)
+# --- LIFESPAN / STARTUP ---
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     global embeddings, vector_store
     logging.info(f"Initializing embedding model: '{EMBEDDING_MODEL}'...")
     embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
     vector_store = PGVector(
         connection=engine,
         collection_name=COLLECTION_NAME,
+        embeddings=embeddings,
     )
     logging.info("Successfully connected to the vector store.")
     yield
     logging.info("Application shutting down.")
+# --- FASTAPI APP ---
 app = FastAPI(lifespan=lifespan)
 # --- PROMPTS ---
 4.  If no specific status keywords are mentioned (e.g., the user asks generally about projects in a location), set the filter table to null.
 5.  Respond ONLY with a JSON object containing "search_query" and "filter_table" (which should be the table name string or null).
 """
 ANSWER_SYSTEM_PROMPT = """
 You are an expert AI assistant for a premier real estate developer.
 ## YOUR PERSONA
 3.  **Stay on Topic:** Only answer questions related to real estate.
 """
+# --- HELPER FUNCTIONS (sync helpers executed in threadpool) ---
+def convert_mulaw_to_wav_bytes(mulaw_bytes: bytes) -> bytes:
+    """Converts raw mulaw audio bytes (8kHz) to in-memory WAV file bytes."""
     try:
+        pcm_bytes = audioop.ulaw2lin(mulaw_bytes, 2)
+        with io.BytesIO() as wav_buffer:
+            with wave.open(wav_buffer, 'wb') as wav_file:
+                wav_file.setnchannels(1)
+                wav_file.setsampwidth(2)
+                wav_file.setframerate(8000)
+                wav_file.writeframes(pcm_bytes)
+            return wav_buffer.getvalue()
     except Exception as e:
+        logging.error(f"Error converting mulaw to WAV: {e}", exc_info=True)
+        return b''
+def transcribe_audio_sync(audio_wav_bytes: bytes) -> str:
+    """Synchronous transcription using the OpenAI client (to be called inside threadpool)."""
+    for attempt in range(3):
+        try:
+            audio_file = io.BytesIO(audio_wav_bytes)
+            audio_file.name = "stream.wav"
+            transcript = client_openai.audio.transcriptions.create(model="whisper-1", file=audio_file)
+            text = transcript.text
+            # If Hindi script present, transliterate to Roman (Hinglish)
+            if re.search(r'[\u0900-\u097F]', text):
+                translit_prompt = f"Transliterate this Hindi text to Roman script (Hinglish style): {text}"
+                response = client_openai.chat.completions.create(
+                    model="gpt-4o-mini",
+                    messages=[{"role": "user", "content": translit_prompt}],
+                    temperature=0.0
+                )
+                text = response.choices[0].message.content
+            return text
+        except Exception as e:
+            logging.error(f"Error during transcription (attempt {attempt+1}): {e}", exc_info=True)
+            if attempt == 2:
+                return ""
+def convert_audio_to_mulaw_sync(audio_bytes: bytes) -> bytes:
+    """Synchronous conversion of arbitrary audio bytes to 8kHz mulaw (for Twilio)."""
+    for attempt in range(3):
+        try:
+            audio_segment = AudioSegment.from_file(io.BytesIO(audio_bytes))
+            audio_segment = audio_segment.set_frame_rate(8000)
+            audio_segment = audio_segment.set_channels(1)
+            pcm_data = audio_segment.raw_data
+            mulaw_data = audioop.lin2ulaw(pcm_data, 2)
+            return mulaw_data
+        except Exception as e:
+            logging.error(f"Error converting audio to mulaw (attempt {attempt+1}): {e}", exc_info=True)
+            if attempt == 2:
+                return b''
+def generate_elevenlabs_sync(text: str, voice: str, model: str = "eleven_multilingual_v2", output_format: str = "mp3_44100_128") -> bytes:
+    """Synchronous ElevenLabs generation wrapper for run_in_threadpool."""
+    for attempt in range(3):
+        try:
+            # The ElevenLabs client call is synchronous in this codebase
+            return client_elevenlabs.generate(
+                text=text,
+                voice=voice,
+                model=model,
+                output_format=output_format
+            )
+        except Exception as e:
+            logging.error(f"Error in ElevenLabs generate (attempt {attempt+1}): {e}", exc_info=True)
+            if attempt == 2:
+                return b''
+# --- LLM / RAG helpers (async, but will call sync via threadpool when appropriate) ---
 async def formulate_search_plan(user_query: str) -> dict:
     logging.info("Formulating search plan with Planner LLM...")
+    for attempt in range(3):
+        try:
+            response = client_openai.chat.completions.create(
+                model=PLANNER_MODEL,
+                messages=[{"role": "user", "content": QUERY_FORMULATION_PROMPT.format(user_query=user_query)}],
+                response_format={"type": "json_object"},
+                temperature=0.0
+            )
+            plan = json.loads(response.choices[0].message.content)
+            logging.info(f"Search plan received: {plan}")
+            return plan
+        except Exception as e:
+            logging.error(f"Error in Planner LLM call (attempt {attempt+1}): {e}", exc_info=True)
+            if attempt == 2:
+                return {"search_query": user_query, "filter_table": None}
 async def get_agent_response(user_text: str) -> str:
+    """Runs RAG and generation logic for a given text query with retries."""
+    for attempt in range(3):
+        try:
+            search_plan = await formulate_search_plan(user_text)
+            search_query = search_plan.get("search_query", user_text)
+            filter_table = search_plan.get("filter_table")
+            search_filter = {"source_table": filter_table} if filter_table else {}
+            if search_filter:
+                logging.info(f"Applying initial filter: {search_filter}")
+            retrieved_docs = vector_store.similarity_search(search_query, k=3, filter=search_filter)
+            if not retrieved_docs:
+                logging.info("Initial search returned no results. Performing a broader fallback search.")
+                retrieved_docs = vector_store.similarity_search(search_query, k=3)
+            context_text = "\n\n".join([doc.page_content for doc in retrieved_docs])
+            logging.info(f"Retrieved Context (preview): {context_text[:500]}...")
+            final_prompt_messages = [
+                {"role": "system", "content": ANSWER_SYSTEM_PROMPT},
+                {"role": "system", "content": f"Use the following CONTEXT to answer:\n{context_text}"},
+                {"role": "user", "content": f"My original question was: '{user_text}'"}
+            ]
+            final_response = client_openai.chat.completions.create(
+                model=ANSWERER_MODEL,
+                messages=final_prompt_messages
+            )
+            return final_response.choices[0].message.content
+        except Exception as e:
+            logging.error(f"Error in get_agent_response (attempt {attempt+1}): {e}", exc_info=True)
+            if attempt == 2:
+                return "Sorry, I couldn't generate a response. Please try again."
+# --- AUTH DEPENDENCY ---
 async def verify_token(x_auth_token: str = Header(...)):
+    """Dependency to verify the shared secret token."""
     if not SHARED_SECRET or x_auth_token != SHARED_SECRET:
         logging.warning("Authentication failed for /test-text-query.")
         raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid or missing authentication token")
     logging.info("Authentication successful for /test-text-query.")
 # --- API Endpoints ---
 class TextQuery(BaseModel):
     query: str
 @app.post("/test-text-query", dependencies=[Depends(verify_token)])
 async def test_text_query_endpoint(query: TextQuery):
     logging.info(f"Received text query: {query.query}")
     response_text = await get_agent_response(query.query)
     logging.info(f"Generated text response: {response_text}")
     return {"response": response_text}
+# --- WEBHOOK / WEBSOCKET FOR TWILIO STREAMING ---
 @app.websocket("/listen")
 async def websocket_endpoint(websocket: WebSocket):
     await websocket.accept()
+    logging.info("WebSocket connection accepted from Twilio.")
+    stream_sid: Optional[str] = None
     try:
+        first_message = await websocket.receive_json()
+        event = first_message.get("event")
+        if event != "start":
+            logging.error("Expected 'start' message. Closing.")
+            await websocket.close(code=status.WS_1003_UNSUPPORTED_DATA)
+            return
+        start_data = first_message.get("start", {})
+        custom_params = start_data.get("customParameters", {})
+        if not custom_params:
+            logging.error("Missing customParameters in start event. Closing.")
+            await websocket.close(code=status.WS_1003_UNSUPPORTED_DATA)
+            return
+        auth_token = custom_params.get("x-auth-token")
+        stream_sid = start_data.get("streamSid")
+        if not SHARED_SECRET or auth_token != SHARED_SECRET:
+            logging.warning("Authentication failed. Invalid token. Closing connection.")
+            await websocket.close(code=status.WS_1008_POLICY_VIOLATION)
+            return
+        logging.info(f"Authentication successful. Stream SID: {stream_sid}")
+        logging.debug(f"Full start message: {first_message}")
+        # Main loop with VAD using timeout + RMS threshold
+        accumulated_audio_mulaw = b''
+        loop_counter = 0
         while True:
+            loop_counter += 1
+            if loop_counter > MAX_LOOP_COUNT:
+                logging.info("Max loop count reached. Exiting to prevent hang.")
+                break
+            try:
+                message_str = await asyncio.wait_for(websocket.receive_text(), timeout=1.0)
+                message = json.loads(message_str)
+                event = message.get("event")
+                if event == "media":
+                    payload = message['media']['payload']
+                    mulaw_chunk = base64.b64decode(payload)
+                    # Compute RMS to avoid buffering pure silence / static
+                    try:
+                        pcm_chunk = audioop.ulaw2lin(mulaw_chunk, 2)
+                        rms = audioop.rms(pcm_chunk, 2)
+                    except Exception as e:
+                        logging.debug(f"Could not compute RMS on chunk: {e}")
+                        rms = 0
+                    if rms > SILENCE_THRESHOLD:
+                        accumulated_audio_mulaw += mulaw_chunk
+                        logging.debug(f"Buffered audio chunk; RMS={rms}, total_bytes={len(accumulated_audio_mulaw)}")
+                    else:
+                        logging.debug(f"Ignored low-energy chunk; RMS={rms}")
+                    # Safety: if buffer too large, process it
+                    if len(accumulated_audio_mulaw) > MAX_AUDIO_BYTES:
+                        logging.info(f"Max audio buffer reached ({len(accumulated_audio_mulaw)} bytes). Processing buffer.")
+                        await process_audio_buffer(websocket, stream_sid or "", accumulated_audio_mulaw)
+                        accumulated_audio_mulaw = b''
+                elif event == "stop":
+                    logging.info("Twilio stream sent 'stop' event.")
+                    # Process remaining buffered audio before breaking
+                    if accumulated_audio_mulaw:
+                        logging.info(f"Processing remaining audio on stop event ({len(accumulated_audio_mulaw)} bytes).")
+                        await process_audio_buffer(websocket, stream_sid or "", accumulated_audio_mulaw)
+                        accumulated_audio_mulaw = b''
+                    break
+                else:
+                    logging.debug(f"Ignored unknown event type: {event}")
+            except asyncio.TimeoutError:
+                # VAD trigger: no new data within timeout -> treat as end-of-speech
+                if accumulated_audio_mulaw:
+                    logging.info(f"End of speech detected (timeout). Processing {len(accumulated_audio_mulaw)} bytes.")
+                    await process_audio_buffer(websocket, stream_sid or "", accumulated_audio_mulaw)
+                    accumulated_audio_mulaw = b''
+                else:
+                    # No buffered audio, loop again
+                    pass
+            except (ValueError, json.JSONDecodeError) as e:
+                logging.warning(f"Invalid message received: {e}. Skipping this message.")
+            except WebSocketDisconnect:
+                logging.info("WebSocket disconnected by client.")
+                break
     except WebSocketDisconnect:
+        logging.info("Call disconnected during start phase.")
     except Exception as e:
         logging.error(f"An error occurred in the main loop: {e}", exc_info=True)
     finally:
+        try:
+            await websocket.close()
+        except Exception:
+            pass
+# --- PROCESS AUDIO BUFFER (async wrapper that uses sync helpers in threadpool) ---
+async def process_audio_buffer(websocket: WebSocket, stream_sid: str, accumulated_audio_mulaw: bytes):
+    logging.info(f"Processing audio buffer of {len(accumulated_audio_mulaw)} bytes...")
+    # 1. Convert accumulated mulaw audio to WAV (in threadpool)
+    wav_bytes = await run_in_threadpool(convert_mulaw_to_wav_bytes, accumulated_audio_mulaw)
+    if not wav_bytes:
+        logging.warning("WAV conversion produced no bytes. Skipping processing.")
+        return
+    # 2. Transcribe the WAV audio (in threadpool)
+    user_text = await run_in_threadpool(transcribe_audio_sync, wav_bytes)
+    if not user_text or not user_text.strip():
+        logging.info("Transcription empty; skipping further processing.")
+        return
+    user_text = user_text.strip()
+    logging.info(f"User said: {user_text}")
+    # 3. Get AI agent response (async)
+    agent_response_text = await get_agent_response(user_text)
+    logging.info(f"AI Responded (preview): {agent_response_text[:200]}")
+    if not agent_response_text or not agent_response_text.strip():
+        logging.warning("Agent generated empty response; skipping TTS.")
+        return
+    # 4. Generate AI speech with ElevenLabs (in threadpool wrapper with retries inside)
+    ai_audio_bytes = await run_in_threadpool(generate_elevenlabs_sync, agent_response_text, ELEVENLABS_VOICE_NAME)
+    if not ai_audio_bytes:
+        logging.error("ElevenLabs returned no audio bytes; skipping sending audio.")
+        return
+    # 5. Convert AI speech to 8kHz mulaw for Twilio (in threadpool)
+    mulaw_payload_bytes = await run_in_threadpool(convert_audio_to_mulaw_sync, ai_audio_bytes)
+    if not mulaw_payload_bytes:
+        logging.error("Conversion to mulaw failed; skipping sending audio.")
+        return
+    # 6. Base64 encode and send the audio back to Twilio
+    try:
+        base64_payload = base64.b64encode(mulaw_payload_bytes).decode('utf-8')
+        await websocket.send_json({
+            "event": "media",
+            "streamSid": stream_sid,
+            "media": {"payload": base64_payload}
+        })
+        logging.info("Sent AI audio response back to Twilio.")
+    except Exception as e:
+        logging.error(f"Failed to send AI audio to Twilio: {e}", exc_info=True)
+        return
+    # 7. Send 'clear' to flush Twilio's buffer
+    try:
+        await websocket.send_json({"event": "clear", "streamSid": stream_sid})
+        logging.info("Sent clear event to Twilio.")
+    except Exception as e:
+        logging.error(f"Failed to send 'clear' event: {e}", exc_info=True)
+# End of file

requirements.txt CHANGED Viewed

@@ -6,6 +6,7 @@ elevenlabs==2.17.0
 gunicorn==23.0.0
 psycopg2-binary==2.9.10
 pandas==2.2.3
 python-dotenv==1.1.0
 sentence-transformers==5.1.1
 langchain-huggingface==0.3.1

 gunicorn==23.0.0
 psycopg2-binary==2.9.10
 pandas==2.2.3
+pydub==0.25.1
 python-dotenv==1.1.0
 sentence-transformers==5.1.1
 langchain-huggingface==0.3.1