Arnavkumar01 commited on
Commit
b4343ae
·
1 Parent(s): 6317391

Initial commit of my FastAPI application

Browse files
Files changed (4) hide show
  1. Dockerfile +22 -0
  2. docker-compose.yml +26 -0
  3. main.py +207 -0
  4. requirements.txt +15 -0
Dockerfile ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 1. Start with a lean and official Python base image
2
+ FROM python:3.10-slim
3
+
4
+ RUN apt-get update && apt-get install -y libpq-dev && rm -rf /var/lib/apt/lists/*
5
+
6
+ # 2. Set the working directory inside the container
7
+ WORKDIR /app
8
+
9
+ # 3. Copy only the requirements file first to leverage Docker's caching
10
+ COPY requirements.txt .
11
+
12
+ # 4. Install all Python dependencies from requirements.txt
13
+ # This includes the extra PyTorch URL your file specifies.
14
+ RUN pip install --no-cache-dir -r requirements.txt
15
+
16
+ # 5. Now, copy your application code into the container
17
+ # This assumes your code is in a folder named 'app'
18
+ COPY main.py .
19
+
20
+ # 6. Define the command to run your application using Gunicorn
21
+ # This command points to the 'app' object inside your 'main.py' file within the 'app' module.
22
+ CMD gunicorn --bind 0.0.0.0:$PORT --workers 1 --worker-class uvicorn.workers.UvicornWorker --timeout 0 "main:app"
docker-compose.yml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # docker-compose.yml
2
+ services:
3
+ # This is the name of your service, you can call it anything
4
+ rag-api:
5
+ # Tells Docker Compose to build the image from the Dockerfile in the current directory (.)
6
+ build: .
7
+
8
+ # This is the magic part! It tells the service to load environment variables
9
+ # from the .env file in the same directory.
10
+ env_file:
11
+ - .env
12
+
13
+ # This maps port 8000 on your local machine to port 8000 inside the container.
14
+ # Your FastAPI app will be accessible at http://localhost:8000
15
+ ports:
16
+ - "8000:8000"
17
+
18
+ # This sets up a "volume" for live code reloading. Any changes you make in your
19
+ # local './app' folder will be instantly reflected inside the container's '/app/app'
20
+ # folder, so you don't have to rebuild the image for every code change.
21
+ volumes:
22
+ - ./app:/app/app
23
+
24
+ # Overrides the default command from the Dockerfile to enable --reload for development
25
+ # This makes Gunicorn restart automatically when you save a file.
26
+ command: gunicorn --bind 0.0.0.0:8000 --workers 1 --worker-class uvicorn.workers.UvicornWorker --timeout 0 --reload "main:app"
main.py ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import base64
3
+ import logging
4
+ import json
5
+ import re
6
+ from contextlib import asynccontextmanager
7
+ from fastapi import FastAPI, WebSocket, WebSocketDisconnect, Request, status
8
+ from fastapi.concurrency import run_in_threadpool # Import for handling blocking calls
9
+ from pydantic import BaseModel
10
+ from dotenv import load_dotenv
11
+ from openai import OpenAI
12
+ from elevenlabs.client import ElevenLabs
13
+ from langchain_huggingface import HuggingFaceEmbeddings
14
+ from langchain_postgres.vectorstores import PGVector
15
+ from sqlalchemy import create_engine
16
+
17
+ # --- SETUP ---
18
+ # Suppress noisy logs from underlying libraries
19
+ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
20
+ logging.getLogger('tensorflow').setLevel(logging.ERROR)
21
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
22
+
23
+ # Load environment variables
24
+ load_dotenv()
25
+ NEON_DATABASE_URL = os.getenv("NEON_DATABASE_URL")
26
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
27
+ ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
28
+ SHARED_SECRET = os.getenv("SHARED_SECRET")
29
+
30
+ # --- CONFIGURATION ---
31
+ COLLECTION_NAME = "real_estate_embeddings"
32
+ EMBEDDING_MODEL = "hkunlp/instructor-large"
33
+ ELEVENLABS_VOICE_NAME = "Leo"
34
+ PLANNER_MODEL = "gpt-4o-mini"
35
+ ANSWERER_MODEL = "gpt-4o"
36
+ TABLE_DESCRIPTIONS = """
37
+ - "ongoing_projects_source": Details about projects currently under construction.
38
+ - "upcoming_projects_source": Information on future planned projects.
39
+ - "completed_projects_source": Facts about projects that are already finished.
40
+ - "historical_sales_source": Specific sales records, including price, date, and property ID.
41
+ - "past_customers_source": Information about previous customers.
42
+ - "feedback_source": Customer feedback and ratings for projects.
43
+ """
44
+
45
+ # --- GLOBAL VARIABLES FOR LIFESPAN ---
46
+ # These will be populated at startup
47
+ embeddings = None
48
+ vector_store = None
49
+
50
+ # --- FASTAPI LIFESPAN MANAGEMENT ---
51
+ @asynccontextmanager
52
+ async def lifespan(app: FastAPI):
53
+ # This code runs on startup
54
+ global embeddings, vector_store
55
+ logging.info(f"Initializing embedding model: '{EMBEDDING_MODEL}'...")
56
+ embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
57
+ logging.info("Embedding model loaded successfully.")
58
+
59
+ logging.info(f"Connecting to vector store '{COLLECTION_NAME}'...")
60
+ engine = create_engine(NEON_DATABASE_URL)
61
+ vector_store = PGVector(
62
+ connection=engine,
63
+ collection_name=COLLECTION_NAME,
64
+ embeddings=embeddings, # <-- CRITICAL FIX: Corrected parameter name
65
+ )
66
+ logging.info("Successfully connected to the vector store.")
67
+ yield
68
+ # This code would run on shutdown (if needed)
69
+ logging.info("Application shutting down.")
70
+
71
+ # --- INITIALIZE FastAPI APP WITH LIFESPAN ---
72
+ app = FastAPI(lifespan=lifespan)
73
+ client_openai = OpenAI(api_key=OPENAI_API_KEY)
74
+ client_elevenlabs = ElevenLabs(api_key=ELEVENLABS_API_KEY)
75
+
76
+
77
+ # --- PROMPTS ---
78
+ QUERY_FORMULATION_PROMPT = f"""
79
+ You are a query analysis agent. Your task is to transform a user's query into a precise search query for a vector database and determine the correct table to filter by.
80
+ **Available Tables:**
81
+ {TABLE_DESCRIPTIONS}
82
+ **User's Query:** "{{user_query}}"
83
+ **Your Task:**
84
+ 1. Rephrase the user's query into a clear, keyword-focused English question suitable for a database search.
85
+ 2. Identify the single most relevant table from the list above to find the answer.
86
+ 3. Respond ONLY with a JSON object containing "search_query" and "filter_table".
87
+ """
88
+ ANSWER_SYSTEM_PROMPT = """
89
+ You are an expert AI assistant for a premier real estate developer.
90
+ ## YOUR PERSONA
91
+ - You are professional, helpful, and highly knowledgeable. Your tone should be polite and articulate.
92
+ ## CORE BUSINESS KNOWLEDGE
93
+ - **Operational Cities:** We are currently operational in Mohali, Delhi NCR, and Chandigarh.
94
+ - **Property Types:** We offer luxury apartments, villas, and commercial properties.
95
+ - **Budget Range:** Our residential properties typically range from 60 lakhs to 15 crores.
96
+ ## CORE RULES
97
+ 1. **Language Adaptation:** If the user's original query was in Hinglish, respond in Hinglish. If in English, respond in English.
98
+ 2. **Fact-Based Answers:** Use the provided CONTEXT to answer the user's question. If the context is empty, use your Core Business Knowledge.
99
+ 3. **Stay on Topic:** Only answer questions related to real estate.
100
+ """
101
+
102
+ # --- HELPER FUNCTIONS ---
103
+ def transcribe_audio(audio_bytes: bytes) -> str:
104
+ """This is a blocking function."""
105
+ try:
106
+ with open("temp_audio.wav", "wb") as f: f.write(audio_bytes)
107
+ with open("temp_audio.wav", "rb") as audio_file:
108
+ transcript = client_openai.audio.transcriptions.create(model="whisper-1", file=audio_file)
109
+ return transcript.text
110
+ except Exception as e:
111
+ logging.error(f"Error during transcription: {e}")
112
+ return ""
113
+
114
+ async def formulate_search_plan(user_query: str) -> dict:
115
+ logging.info("Formulating search plan with Planner LLM...")
116
+ try:
117
+ response = client_openai.chat.completions.create( # This can be async if using an async client
118
+ model=PLANNER_MODEL,
119
+ messages=[{"role": "user", "content": QUERY_FORMULATION_PROMPT.format(user_query=user_query)}],
120
+ response_format={"type": "json_object"},
121
+ temperature=0.0
122
+ )
123
+ plan = json.loads(response.choices[0].message.content)
124
+ logging.info(f"Search plan received: {plan}")
125
+ return plan
126
+ except Exception as e:
127
+ logging.error(f"Error in Planner LLM call: {e}")
128
+ return {"search_query": user_query, "filter_table": None}
129
+
130
+ async def get_agent_response(user_text: str) -> str:
131
+ """Runs the full RAG and generation logic for a given text query."""
132
+ search_plan = await formulate_search_plan(user_text)
133
+ search_query = search_plan.get("search_query", user_text)
134
+ filter_table = search_plan.get("filter_table")
135
+
136
+ search_filter = {"source_table": filter_table} if filter_table else {}
137
+ if search_filter: logging.info(f"Applying filter: {search_filter}")
138
+
139
+ retrieved_docs = vector_store.similarity_search(search_query, k=3, filter=search_filter)
140
+ context_text = "\n\n".join([doc.page_content for doc in retrieved_docs])
141
+ logging.info(f"Retrieved Context: {context_text[:500]}...")
142
+
143
+ final_prompt_messages = [
144
+ {"role": "system", "content": ANSWER_SYSTEM_PROMPT},
145
+ {"role": "system", "content": f"Use the following CONTEXT to answer:\n{context_text}"},
146
+ {"role": "user", "content": f"My original question was: '{user_text}'"}
147
+ ]
148
+ final_response = client_openai.chat.completions.create(
149
+ model=ANSWERER_MODEL,
150
+ messages=final_prompt_messages
151
+ )
152
+ return final_response.choices[0].message.content
153
+
154
+ # --- API Endpoints ---
155
+ class TextQuery(BaseModel):
156
+ query: str
157
+
158
+ @app.post("/test-text-query")
159
+ async def test_text_query_endpoint(query: TextQuery):
160
+ """Endpoint for text-based testing via Swagger UI."""
161
+ logging.info(f"Received text query: {query.query}")
162
+ response_text = await get_agent_response(query.query)
163
+ logging.info(f"Generated text response: {response_text}")
164
+ return {"response": response_text}
165
+
166
+ @app.websocket("/listen")
167
+ async def websocket_endpoint(websocket: WebSocket):
168
+ auth_token = websocket.headers.get("x-auth-token")
169
+ if not SHARED_SECRET or auth_token != SHARED_SECRET:
170
+ logging.warning(f"Authentication failed. Closing connection.")
171
+ await websocket.close(code=status.WS_1008_POLICY_VIOLATION)
172
+ return
173
+
174
+ await websocket.accept()
175
+ logging.info("Authentication successful. Call connected.")
176
+ try:
177
+ while True:
178
+ message = await websocket.receive_json()
179
+ audio_base64 = message.get('audio')
180
+ if not audio_base64: continue
181
+
182
+ # PERFORMANCE FIX: Run blocking transcription in a separate thread
183
+ user_text = await run_in_threadpool(
184
+ transcribe_audio, base64.b64decode(audio_base64)
185
+ )
186
+ logging.info(f"User said: {user_text}")
187
+ if not user_text.strip(): continue
188
+
189
+ agent_response_text = await get_agent_response(user_text)
190
+ logging.info(f"AI Responded: {agent_response_text}")
191
+
192
+ # PERFORMANCE FIX: Run blocking audio generation in a separate thread
193
+ audio_output = await run_in_threadpool(
194
+ client_elevenlabs.generate,
195
+ text=agent_response_text,
196
+ voice=ELEVENLABS_VOICE_NAME,
197
+ model="eleven_multilingual_v2"
198
+ )
199
+ response_audio_base64 = base64.b64encode(audio_output).decode('utf-8')
200
+ await websocket.send_json({'audio': response_audio_base64})
201
+
202
+ except WebSocketDisconnect:
203
+ logging.info("Call disconnected.")
204
+ except Exception as e:
205
+ logging.error(f"An error occurred in the main loop: {e}", exc_info=True)
206
+ finally:
207
+ await websocket.close()
requirements.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.115.13
2
+ uvicorn==0.34.3
3
+ websockets==15.0.1
4
+ openai==2.3.0
5
+ elevenlabs==2.17.0
6
+ gunicorn==23.0.0
7
+ psycopg2-binary==2.9.10
8
+ pandas==2.2.3
9
+ python-dotenv==1.1.0
10
+ sentence-transformers==5.1.1
11
+ langchain-huggingface==0.3.1
12
+ langchain-postgres==0.0.15
13
+ langchain-openai==0.3.35
14
+ langdetect==1.0.9
15
+ SQLAlchemy==2.0.40