sajith-0701 commited on
Commit
03faf26
·
1 Parent(s): 1cff1e5
.gitignore CHANGED
@@ -8,3 +8,7 @@ uploads/
8
  .next
9
  dist
10
  .vercel
 
 
 
 
 
8
  .next
9
  dist
10
  .vercel
11
+ inter
12
+ Resume.pdf
13
+ LANGGRAPH_AND_TOOLS.md
14
+ WORKFLOW.md
backend/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # This file makes the backend directory a Python package.
backend/config.py CHANGED
@@ -1,4 +1,5 @@
1
  from pydantic_settings import BaseSettings
 
2
  from functools import lru_cache
3
  import os
4
  from dotenv import load_dotenv
@@ -38,6 +39,26 @@ class Settings(BaseSettings):
38
  env_file = ".env"
39
  extra = "ignore"
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
  @lru_cache()
43
  def get_settings() -> Settings:
 
1
  from pydantic_settings import BaseSettings
2
+ from pydantic import field_validator
3
  from functools import lru_cache
4
  import os
5
  from dotenv import load_dotenv
 
39
  env_file = ".env"
40
  extra = "ignore"
41
 
42
+ @field_validator("MONGO_URI")
43
+ @classmethod
44
+ def validate_mongo_uri(cls, value: str) -> str:
45
+ v = (value or "").strip().lower()
46
+ if "localhost" in v or "127.0.0.1" in v:
47
+ raise ValueError("MONGO_URI must point to MongoDB Atlas, not localhost")
48
+ if not v.startswith("mongodb+srv://"):
49
+ raise ValueError("MONGO_URI must use mongodb+srv:// for cloud deployment")
50
+ return value
51
+
52
+ @field_validator("REDIS_URL")
53
+ @classmethod
54
+ def validate_redis_url(cls, value: str) -> str:
55
+ v = (value or "").strip().lower()
56
+ if "localhost" in v or "127.0.0.1" in v:
57
+ raise ValueError("REDIS_URL must point to a cloud Redis instance, not localhost")
58
+ if not (v.startswith("redis://") or v.startswith("rediss://")):
59
+ raise ValueError("REDIS_URL must start with redis:// or rediss://")
60
+ return value
61
+
62
 
63
  @lru_cache()
64
  def get_settings() -> Settings:
backend/database.py CHANGED
@@ -36,9 +36,17 @@ async def connect_db():
36
  )
37
 
38
  # Test connections
39
- await mongo_client.admin.command("ping")
40
- await redis_client.ping()
41
- print("✅ Connected to MongoDB Atlas and Redis")
 
 
 
 
 
 
 
 
42
 
43
 
44
  async def close_db():
 
36
  )
37
 
38
  # Test connections
39
+ try:
40
+ await mongo_client.admin.command("ping")
41
+ print("✅ Connected to MongoDB Atlas")
42
+ except Exception as e:
43
+ print(f"❌ Failed to connect to MongoDB: {e}")
44
+
45
+ try:
46
+ await redis_client.ping()
47
+ print("✅ Connected to Redis")
48
+ except Exception as e:
49
+ print(f"⚠️ Failed to connect to Redis (URL might be invalid or unreachable): {e}")
50
 
51
 
52
  async def close_db():
backend/main.py CHANGED
@@ -4,6 +4,8 @@ from fastapi.middleware.cors import CORSMiddleware
4
  from fastapi.staticfiles import StaticFiles
5
  import os
6
 
 
 
7
  from config import get_settings
8
  from database import connect_db, close_db
9
 
 
4
  from fastapi.staticfiles import StaticFiles
5
  import os
6
 
7
+
8
+
9
  from config import get_settings
10
  from database import connect_db, close_db
11
 
backend/models/collections.py CHANGED
@@ -6,6 +6,8 @@ SKILLS = "skills"
6
  JOB_ROLES = "job_roles"
7
  ROLE_REQUIREMENTS = "role_requirements"
8
  QUESTIONS = "questions"
 
 
9
  SESSIONS = "sessions"
10
  ANSWERS = "answers"
11
  RESULTS = "results"
 
6
  JOB_ROLES = "job_roles"
7
  ROLE_REQUIREMENTS = "role_requirements"
8
  QUESTIONS = "questions"
9
+ TOPICS = "topics"
10
+ TOPIC_QUESTIONS = "topic_questions"
11
  SESSIONS = "sessions"
12
  ANSWERS = "answers"
13
  RESULTS = "results"
backend/requirements.txt CHANGED
@@ -1,6 +1,7 @@
1
  fastapi==0.115.0
2
  uvicorn[standard]==0.30.0
3
  motor==3.5.0
 
4
  redis[hiredis]==5.0.0
5
  python-jose[cryptography]==3.3.0
6
  passlib[bcrypt]==1.7.4
@@ -8,6 +9,10 @@ bcrypt==4.0.1
8
  python-multipart==0.0.9
9
  google-genai==1.5.0
10
  langgraph==0.2.0
 
11
  pydantic-settings==2.5.0
 
12
  python-dotenv==1.0.1
13
  aiofiles==24.1.0
 
 
 
1
  fastapi==0.115.0
2
  uvicorn[standard]==0.30.0
3
  motor==3.5.0
4
+ pymongo<4.9
5
  redis[hiredis]==5.0.0
6
  python-jose[cryptography]==3.3.0
7
  passlib[bcrypt]==1.7.4
 
9
  python-multipart==0.0.9
10
  google-genai==1.5.0
11
  langgraph==0.2.0
12
+ langchain-core==0.2.43
13
  pydantic-settings==2.5.0
14
+ email-validator==2.2.0
15
  python-dotenv==1.0.1
16
  aiofiles==24.1.0
17
+ pypdf==5.4.0
18
+ python-docx==1.1.2
backend/routers/admin.py CHANGED
@@ -1,14 +1,20 @@
1
- from fastapi import APIRouter, Depends, HTTPException, Query
 
2
  from auth.jwt import require_role, get_current_user
3
  from schemas.admin import (
4
  JobRoleCreate, JobRoleUpdate,
5
  QuestionCreate, QuestionUpdate,
6
  RoleRequirementCreate,
 
7
  )
8
  from services.admin_service import (
9
  create_role, update_role, delete_role, list_roles,
10
- create_question, update_question, delete_question, list_questions,
 
 
11
  create_requirement, list_requirements, delete_requirement,
 
 
12
  )
13
  from services.analytics_service import get_admin_analytics
14
 
@@ -69,10 +75,18 @@ async def delete_role_endpoint(
69
  @router.get("/questions")
70
  async def get_questions(
71
  role_id: str = Query(None),
 
 
 
72
  current_user: dict = Depends(require_role("admin")),
73
  ):
74
  """List questions, optionally filtered by role."""
75
- questions = await list_questions(role_id)
 
 
 
 
 
76
  return {"questions": questions}
77
 
78
 
@@ -84,6 +98,8 @@ async def create_question_endpoint(
84
  """Create a new question (admin only)."""
85
  result = await create_question(
86
  role_id=request.role_id,
 
 
87
  question=request.question,
88
  difficulty=request.difficulty,
89
  category=request.category,
@@ -92,6 +108,64 @@ async def create_question_endpoint(
92
  return result
93
 
94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  @router.put("/questions/{question_id}")
96
  async def update_question_endpoint(
97
  question_id: str,
@@ -130,6 +204,76 @@ async def get_requirements(
130
  return {"requirements": requirements}
131
 
132
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  @router.post("/requirements")
134
  async def create_requirement_endpoint(
135
  request: RoleRequirementCreate,
@@ -165,3 +309,63 @@ async def get_analytics(
165
  """Get admin analytics dashboard data."""
166
  analytics = await get_admin_analytics()
167
  return analytics
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from fastapi import APIRouter, Depends, HTTPException, Query, UploadFile, File, Form
3
  from auth.jwt import require_role, get_current_user
4
  from schemas.admin import (
5
  JobRoleCreate, JobRoleUpdate,
6
  QuestionCreate, QuestionUpdate,
7
  RoleRequirementCreate,
8
+ TopicCreate, TopicUpdate, TopicPublishUpdate,
9
  )
10
  from services.admin_service import (
11
  create_role, update_role, delete_role, list_roles,
12
+ create_question, update_question, delete_question, list_questions, get_question_by_id,
13
+ create_topic, list_topics, update_topic, delete_topic, set_topic_publish_status,
14
+ import_questions_from_pdf,
15
  create_requirement, list_requirements, delete_requirement,
16
+ list_quit_interviews, list_admin_reports, get_admin_report_detail,
17
+ list_admin_users, delete_admin_user,
18
  )
19
  from services.analytics_service import get_admin_analytics
20
 
 
75
  @router.get("/questions")
76
  async def get_questions(
77
  role_id: str = Query(None),
78
+ topic_id: str = Query(None),
79
+ interview_type: str = Query(None),
80
+ difficulty: str = Query(None),
81
  current_user: dict = Depends(require_role("admin")),
82
  ):
83
  """List questions, optionally filtered by role."""
84
+ questions = await list_questions(
85
+ role_id=role_id,
86
+ topic_id=topic_id,
87
+ interview_type=interview_type,
88
+ difficulty=difficulty,
89
+ )
90
  return {"questions": questions}
91
 
92
 
 
98
  """Create a new question (admin only)."""
99
  result = await create_question(
100
  role_id=request.role_id,
101
+ topic_id=request.topic_id,
102
+ interview_type=request.interview_type,
103
  question=request.question,
104
  difficulty=request.difficulty,
105
  category=request.category,
 
108
  return result
109
 
110
 
111
+ @router.get("/questions/{question_id}")
112
+ async def get_question_by_id_endpoint(
113
+ question_id: str,
114
+ current_user: dict = Depends(require_role("admin")),
115
+ ):
116
+ """Get one question by id (admin only)."""
117
+ try:
118
+ question = await get_question_by_id(question_id)
119
+ return question
120
+ except ValueError as e:
121
+ raise HTTPException(status_code=404, detail=str(e))
122
+
123
+
124
+ @router.post("/questions/upload")
125
+ async def upload_questions_pdf_endpoint(
126
+ interview_type: str = Form("resume"),
127
+ role_id: str | None = Form(None),
128
+ topic_id: str | None = Form(None),
129
+ subjects: str | None = Form(None),
130
+ file: UploadFile = File(...),
131
+ current_user: dict = Depends(require_role("admin")),
132
+ ):
133
+ """Upload a PDF and extract interview questions (admin only)."""
134
+ if not file.filename:
135
+ raise HTTPException(status_code=400, detail="No file provided")
136
+
137
+ if not file.filename.lower().endswith(".pdf"):
138
+ raise HTTPException(status_code=400, detail="Only PDF files are supported for question import")
139
+
140
+ content = await file.read()
141
+ if len(content) > 10 * 1024 * 1024:
142
+ raise HTTPException(status_code=400, detail="File too large. Maximum 10MB")
143
+
144
+ parsed_subjects = []
145
+ if subjects:
146
+ try:
147
+ parsed_subjects = json.loads(subjects)
148
+ if not isinstance(parsed_subjects, list):
149
+ raise ValueError
150
+ except Exception:
151
+ parsed_subjects = [s.strip() for s in subjects.split(",") if s.strip()]
152
+
153
+ try:
154
+ result = await import_questions_from_pdf(
155
+ role_id=role_id,
156
+ topic_id=topic_id,
157
+ interview_type=interview_type,
158
+ subjects=parsed_subjects,
159
+ filename=file.filename,
160
+ file_content=content,
161
+ )
162
+ return result
163
+ except ValueError as e:
164
+ raise HTTPException(status_code=400, detail=str(e))
165
+ except Exception as e:
166
+ raise HTTPException(status_code=500, detail=f"Failed to import questions from PDF: {str(e)}")
167
+
168
+
169
  @router.put("/questions/{question_id}")
170
  async def update_question_endpoint(
171
  question_id: str,
 
204
  return {"requirements": requirements}
205
 
206
 
207
+ # ─── Topics ───
208
+
209
+ @router.get("/topics")
210
+ async def get_topics(current_user: dict = Depends(get_current_user)):
211
+ """List all topic categories (accessible by all authenticated users)."""
212
+ only_published = current_user.get("role") != "admin"
213
+ topics = await list_topics(only_published=only_published)
214
+ return {"topics": topics}
215
+
216
+
217
+ @router.post("/topics")
218
+ async def create_topic_endpoint(
219
+ request: TopicCreate,
220
+ current_user: dict = Depends(require_role("admin")),
221
+ ):
222
+ """Create a topic category (admin only)."""
223
+ try:
224
+ result = await create_topic(name=request.name, description=request.description)
225
+ return result
226
+ except ValueError as e:
227
+ raise HTTPException(status_code=400, detail=str(e))
228
+
229
+
230
+ @router.put("/topics/{topic_id}")
231
+ async def update_topic_endpoint(
232
+ topic_id: str,
233
+ request: TopicUpdate,
234
+ current_user: dict = Depends(require_role("admin")),
235
+ ):
236
+ """Update a topic category (admin only)."""
237
+ try:
238
+ result = await update_topic(topic_id, request.model_dump())
239
+ return result
240
+ except ValueError as e:
241
+ raise HTTPException(status_code=400, detail=str(e))
242
+
243
+
244
+ @router.delete("/topics/{topic_id}")
245
+ async def delete_topic_endpoint(
246
+ topic_id: str,
247
+ current_user: dict = Depends(require_role("admin")),
248
+ ):
249
+ """Delete a topic category and its topic questions (admin only)."""
250
+ success = await delete_topic(topic_id)
251
+ if not success:
252
+ raise HTTPException(status_code=404, detail="Topic not found")
253
+ return {"message": "Topic deleted"}
254
+
255
+
256
+ @router.put("/topics/{topic_id}/publish")
257
+ async def publish_topic_endpoint(
258
+ topic_id: str,
259
+ request: TopicPublishUpdate,
260
+ current_user: dict = Depends(require_role("admin")),
261
+ ):
262
+ """Publish/unpublish a topic for student interview selection (admin only)."""
263
+ try:
264
+ result = await set_topic_publish_status(
265
+ topic_id,
266
+ request.is_published,
267
+ timer_enabled=request.timer_enabled,
268
+ timer_seconds=request.timer_seconds,
269
+ )
270
+ return result
271
+ except ValueError as e:
272
+ detail = str(e)
273
+ status_code = 404 if "not found" in detail.lower() else 400
274
+ raise HTTPException(status_code=status_code, detail=detail)
275
+
276
+
277
  @router.post("/requirements")
278
  async def create_requirement_endpoint(
279
  request: RoleRequirementCreate,
 
309
  """Get admin analytics dashboard data."""
310
  analytics = await get_admin_analytics()
311
  return analytics
312
+
313
+
314
+ @router.get("/quit-interviews")
315
+ async def get_quit_interviews(
316
+ limit: int = Query(100, ge=1, le=500),
317
+ current_user: dict = Depends(require_role("admin")),
318
+ ):
319
+ """Get full details about interviews quit by users."""
320
+ items = await list_quit_interviews(limit=limit)
321
+ return {"items": items}
322
+
323
+
324
+ @router.get("/reports")
325
+ async def get_admin_reports(
326
+ limit: int = Query(100, ge=1, le=500),
327
+ current_user: dict = Depends(require_role("admin")),
328
+ ):
329
+ """Get all interview report summaries for admin."""
330
+ items = await list_admin_reports(limit=limit)
331
+ return {"items": items}
332
+
333
+
334
+ @router.get("/reports/{session_id}")
335
+ async def get_admin_report_by_session(
336
+ session_id: str,
337
+ current_user: dict = Depends(require_role("admin")),
338
+ ):
339
+ """Get full report details for a specific interview session (admin only)."""
340
+ try:
341
+ item = await get_admin_report_detail(session_id=session_id)
342
+ return item
343
+ except ValueError as e:
344
+ raise HTTPException(status_code=404, detail=str(e))
345
+
346
+
347
+ @router.get("/users")
348
+ async def get_admin_users(
349
+ limit: int = Query(500, ge=1, le=1000),
350
+ current_user: dict = Depends(require_role("admin")),
351
+ ):
352
+ """List users for admin management."""
353
+ items = await list_admin_users(limit=limit)
354
+ return {"items": items}
355
+
356
+
357
+ @router.delete("/users/{user_id}")
358
+ async def delete_admin_user_endpoint(
359
+ user_id: str,
360
+ current_user: dict = Depends(require_role("admin")),
361
+ ):
362
+ """Delete a student user and related records (admin only)."""
363
+ try:
364
+ success = await delete_admin_user(user_id, current_user["user_id"])
365
+ if not success:
366
+ raise HTTPException(status_code=404, detail="User not found")
367
+ return {"message": "User deleted"}
368
+ except ValueError as e:
369
+ detail = str(e)
370
+ status_code = 404 if "not found" in detail.lower() else 400
371
+ raise HTTPException(status_code=status_code, detail=detail)
backend/routers/interview.py CHANGED
@@ -3,10 +3,11 @@ from auth.jwt import get_current_user
3
  from schemas.interview import (
4
  StartInterviewRequest,
5
  SubmitAnswerRequest,
 
6
  InterviewStartResponse,
7
  AnswerResponse,
8
  )
9
- from services.interview_service import start_interview, submit_answer
10
  from services.evaluation_service import generate_report
11
 
12
  router = APIRouter()
@@ -22,6 +23,9 @@ async def start_interview_endpoint(
22
  result = await start_interview(
23
  user_id=current_user["user_id"],
24
  role_id=request.role_id,
 
 
 
25
  )
26
  return result
27
  except Exception as e:
@@ -47,6 +51,37 @@ async def submit_answer_endpoint(
47
  raise HTTPException(status_code=500, detail=str(e))
48
 
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  @router.get("/report")
51
  async def get_interview_report(
52
  session_id: str,
 
3
  from schemas.interview import (
4
  StartInterviewRequest,
5
  SubmitAnswerRequest,
6
+ QuitInterviewRequest,
7
  InterviewStartResponse,
8
  AnswerResponse,
9
  )
10
+ from services.interview_service import start_interview, submit_answer, quit_interview
11
  from services.evaluation_service import generate_report
12
 
13
  router = APIRouter()
 
23
  result = await start_interview(
24
  user_id=current_user["user_id"],
25
  role_id=request.role_id,
26
+ custom_role=request.custom_role,
27
+ interview_type=request.interview_type,
28
+ topic_id=request.topic_id,
29
  )
30
  return result
31
  except Exception as e:
 
51
  raise HTTPException(status_code=500, detail=str(e))
52
 
53
 
54
+ @router.post("/quit")
55
+ async def quit_interview_endpoint(
56
+ request: QuitInterviewRequest,
57
+ current_user: dict = Depends(get_current_user),
58
+ ):
59
+ """Quit an in-progress interview and generate a partial report if answers exist."""
60
+ try:
61
+ quit_result = await quit_interview(
62
+ session_id=request.session_id,
63
+ user_id=current_user["user_id"],
64
+ )
65
+
66
+ report = None
67
+ if quit_result.get("report_generated"):
68
+ report = await generate_report(
69
+ session_id=request.session_id,
70
+ user_id=current_user["user_id"],
71
+ )
72
+
73
+ return {
74
+ "session_id": request.session_id,
75
+ "report_generated": bool(report),
76
+ "report": report,
77
+ "message": quit_result.get("message", "Interview quit"),
78
+ }
79
+ except ValueError as e:
80
+ raise HTTPException(status_code=400, detail=str(e))
81
+ except Exception as e:
82
+ raise HTTPException(status_code=500, detail=str(e))
83
+
84
+
85
  @router.get("/report")
86
  async def get_interview_report(
87
  session_id: str,
backend/routers/profile.py CHANGED
@@ -1,8 +1,9 @@
1
- from fastapi import APIRouter, Depends
2
  from auth.jwt import get_current_user
3
  from database import get_db
4
  from models.collections import USERS, RESUMES, SKILLS
5
  from utils.helpers import str_objectid
 
6
  from bson import ObjectId
7
 
8
  router = APIRouter()
@@ -32,6 +33,7 @@ async def get_profile(current_user: dict = Depends(get_current_user)):
32
  "filename": resume.get("original_filename", ""),
33
  "uploaded_at": resume.get("uploaded_at", ""),
34
  "parsed_text": resume.get("parsed_text", ""),
 
35
  }
36
  else:
37
  profile["resume"] = None
@@ -39,6 +41,7 @@ async def get_profile(current_user: dict = Depends(get_current_user)):
39
  # Get skills
40
  skills_doc = await db[SKILLS].find_one({"user_id": current_user["user_id"]})
41
  profile["skills"] = skills_doc.get("skills", []) if skills_doc else []
 
42
 
43
  return profile
44
 
@@ -49,7 +52,7 @@ async def update_user_skills(
49
  ):
50
  """Update the current user's extracted skills."""
51
  db = get_db()
52
- skills = request_data.get("skills", [])
53
 
54
  # Upsert the skills document for this user
55
  await db[SKILLS].update_one(
@@ -59,3 +62,24 @@ async def update_user_skills(
59
  )
60
 
61
  return {"message": "Skills updated successfully", "skills": skills}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, Depends, HTTPException
2
  from auth.jwt import get_current_user
3
  from database import get_db
4
  from models.collections import USERS, RESUMES, SKILLS
5
  from utils.helpers import str_objectid
6
+ from utils.skills import normalize_skill_list, cluster_skills
7
  from bson import ObjectId
8
 
9
  router = APIRouter()
 
33
  "filename": resume.get("original_filename", ""),
34
  "uploaded_at": resume.get("uploaded_at", ""),
35
  "parsed_text": resume.get("parsed_text", ""),
36
+ "parsed_data": resume.get("parsed_data", {}),
37
  }
38
  else:
39
  profile["resume"] = None
 
41
  # Get skills
42
  skills_doc = await db[SKILLS].find_one({"user_id": current_user["user_id"]})
43
  profile["skills"] = skills_doc.get("skills", []) if skills_doc else []
44
+ profile["clustered_skills"] = cluster_skills(profile["skills"])
45
 
46
  return profile
47
 
 
52
  ):
53
  """Update the current user's extracted skills."""
54
  db = get_db()
55
+ skills = normalize_skill_list(request_data.get("skills", []))
56
 
57
  # Upsert the skills document for this user
58
  await db[SKILLS].update_one(
 
62
  )
63
 
64
  return {"message": "Skills updated successfully", "skills": skills}
65
+
66
+
67
+ @router.put("/resume-data")
68
+ async def update_resume_data(
69
+ request_data: dict,
70
+ current_user: dict = Depends(get_current_user)
71
+ ):
72
+ """Update the detailed parsed data of the user's resume."""
73
+ db = get_db()
74
+ parsed_data = request_data.get("parsed_data", {})
75
+
76
+ # Update only the parsed_data property inside the RESUMES collection
77
+ result = await db[RESUMES].update_one(
78
+ {"user_id": current_user["user_id"]},
79
+ {"$set": {"parsed_data": parsed_data}}
80
+ )
81
+
82
+ if result.matched_count == 0:
83
+ raise HTTPException(status_code=404, detail="Resume not found. Upload a resume first.")
84
+
85
+ return {"message": "Resume details updated successfully", "parsed_data": parsed_data}
backend/schemas/admin.py CHANGED
@@ -23,7 +23,9 @@ class JobRoleResponse(BaseModel):
23
 
24
 
25
  class QuestionCreate(BaseModel):
26
- role_id: str
 
 
27
  question: str
28
  difficulty: str = "medium"
29
  category: Optional[str] = None
@@ -39,13 +41,38 @@ class QuestionUpdate(BaseModel):
39
 
40
  class QuestionResponse(BaseModel):
41
  id: str
42
- role_id: str
 
 
43
  question: str
44
  difficulty: str
45
  category: Optional[str] = None
46
  created_at: str
47
 
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  class RoleRequirementCreate(BaseModel):
50
  role_id: str
51
  skill: str
 
23
 
24
 
25
  class QuestionCreate(BaseModel):
26
+ role_id: Optional[str] = None
27
+ topic_id: Optional[str] = None
28
+ interview_type: str = "resume"
29
  question: str
30
  difficulty: str = "medium"
31
  category: Optional[str] = None
 
41
 
42
  class QuestionResponse(BaseModel):
43
  id: str
44
+ role_id: Optional[str] = None
45
+ topic_id: Optional[str] = None
46
+ interview_type: str = "resume"
47
  question: str
48
  difficulty: str
49
  category: Optional[str] = None
50
  created_at: str
51
 
52
 
53
+ class TopicCreate(BaseModel):
54
+ name: str
55
+ description: Optional[str] = None
56
+
57
+
58
+ class TopicUpdate(BaseModel):
59
+ name: Optional[str] = None
60
+ description: Optional[str] = None
61
+
62
+
63
+ class TopicPublishUpdate(BaseModel):
64
+ is_published: bool
65
+ timer_enabled: Optional[bool] = None
66
+ timer_seconds: Optional[int] = None
67
+
68
+
69
+ class TopicResponse(BaseModel):
70
+ id: str
71
+ name: str
72
+ description: Optional[str] = None
73
+ created_at: str
74
+
75
+
76
  class RoleRequirementCreate(BaseModel):
77
  role_id: str
78
  skill: str
backend/schemas/interview.py CHANGED
@@ -4,6 +4,9 @@ from typing import Optional, List, Dict
4
 
5
  class StartInterviewRequest(BaseModel):
6
  role_id: Optional[str] = None
 
 
 
7
 
8
 
9
  class SubmitAnswerRequest(BaseModel):
@@ -12,6 +15,10 @@ class SubmitAnswerRequest(BaseModel):
12
  answer: str
13
 
14
 
 
 
 
 
15
  class InterviewQuestion(BaseModel):
16
  question_id: str
17
  question: str
@@ -33,6 +40,12 @@ class AnswerResponse(BaseModel):
33
  message: str = ""
34
 
35
 
 
 
 
 
 
 
36
  class QuestionScore(BaseModel):
37
  question: str
38
  answer: str
 
4
 
5
  class StartInterviewRequest(BaseModel):
6
  role_id: Optional[str] = None
7
+ custom_role: Optional[str] = None
8
+ interview_type: Optional[str] = "resume"
9
+ topic_id: Optional[str] = None
10
 
11
 
12
  class SubmitAnswerRequest(BaseModel):
 
15
  answer: str
16
 
17
 
18
+ class QuitInterviewRequest(BaseModel):
19
+ session_id: str
20
+
21
+
22
  class InterviewQuestion(BaseModel):
23
  question_id: str
24
  question: str
 
40
  message: str = ""
41
 
42
 
43
+ class QuitInterviewResponse(BaseModel):
44
+ session_id: str
45
+ report_generated: bool = False
46
+ message: str = ""
47
+
48
+
49
  class QuestionScore(BaseModel):
50
  question: str
51
  answer: str
backend/schemas/resume.py CHANGED
@@ -12,3 +12,17 @@ class ResumeResponse(BaseModel):
12
 
13
  class UpdateSkillsRequest(BaseModel):
14
  skills: List[str]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  class UpdateSkillsRequest(BaseModel):
14
  skills: List[str]
15
+
16
+ class ParsedDataPayload(BaseModel):
17
+ name: Optional[str] = None
18
+ email: Optional[str] = None
19
+ phone: Optional[str] = None
20
+ location: Optional[str] = None
21
+ recommended_roles: Optional[List[str]] = []
22
+ experience_summary: Optional[str] = None
23
+ experience: Optional[List[dict]] = []
24
+ education: Optional[List[dict]] = []
25
+ projects: Optional[List[dict]] = []
26
+
27
+ class UpdateResumeDataRequest(BaseModel):
28
+ parsed_data: ParsedDataPayload
backend/services/admin_service.py CHANGED
@@ -1,7 +1,12 @@
1
  from bson import ObjectId
 
 
 
2
  from database import get_db
3
- from models.collections import JOB_ROLES, ROLE_REQUIREMENTS, QUESTIONS
4
  from utils.helpers import utc_now, str_objectid, str_objectids
 
 
5
 
6
 
7
  # ─── Job Roles ───
@@ -47,18 +52,37 @@ async def list_roles() -> list:
47
 
48
  # ─── Questions ───
49
 
50
- async def create_question(role_id: str, question: str, difficulty: str = "medium",
51
- category: str = None, expected_answer: str = None) -> dict:
 
 
 
 
 
 
 
52
  db = get_db()
 
 
 
 
 
 
 
 
 
 
53
  doc = {
54
  "role_id": role_id,
 
 
55
  "question": question,
56
  "difficulty": difficulty,
57
  "category": category,
58
  "expected_answer": expected_answer,
59
  "created_at": utc_now(),
60
  }
61
- result = await db[QUESTIONS].insert_one(doc)
62
  doc["_id"] = result.inserted_id
63
  return str_objectid(doc)
64
 
@@ -69,8 +93,14 @@ async def update_question(question_id: str, data: dict) -> dict:
69
  if not update_data:
70
  raise ValueError("No fields to update")
71
  update_data["updated_at"] = utc_now()
72
- await db[QUESTIONS].update_one({"_id": ObjectId(question_id)}, {"$set": update_data})
 
 
 
 
73
  doc = await db[QUESTIONS].find_one({"_id": ObjectId(question_id)})
 
 
74
  if not doc:
75
  raise ValueError("Question not found")
76
  return str_objectid(doc)
@@ -79,17 +109,326 @@ async def update_question(question_id: str, data: dict) -> dict:
79
  async def delete_question(question_id: str) -> bool:
80
  db = get_db()
81
  result = await db[QUESTIONS].delete_one({"_id": ObjectId(question_id)})
 
 
 
82
  return result.deleted_count > 0
83
 
84
 
85
- async def list_questions(role_id: str = None) -> list:
 
 
 
 
 
86
  db = get_db()
87
- query = {"role_id": role_id} if role_id else {}
88
- cursor = db[QUESTIONS].find(query).sort("created_at", -1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  docs = await cursor.to_list(length=200)
90
  return str_objectids(docs)
91
 
92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  # ─── Role Requirements ───
94
 
95
  async def create_requirement(role_id: str, skill: str, level: str = "intermediate") -> dict:
@@ -116,3 +455,174 @@ async def delete_requirement(req_id: str) -> bool:
116
  db = get_db()
117
  result = await db[ROLE_REQUIREMENTS].delete_one({"_id": ObjectId(req_id)})
118
  return result.deleted_count > 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from bson import ObjectId
2
+ import json
3
+ import re
4
+ from datetime import datetime
5
  from database import get_db
6
+ from models.collections import JOB_ROLES, ROLE_REQUIREMENTS, QUESTIONS, TOPICS, TOPIC_QUESTIONS, SESSIONS, USERS, RESULTS, RESUMES, SKILLS, ANSWERS
7
  from utils.helpers import utc_now, str_objectid, str_objectids
8
+ from utils.gemini import call_gemini
9
+ from utils.resume_text import extract_resume_text
10
 
11
 
12
  # ─── Job Roles ───
 
52
 
53
  # ─── Questions ───
54
 
55
+ async def create_question(
56
+ role_id: str = None,
57
+ topic_id: str = None,
58
+ interview_type: str = "resume",
59
+ question: str = "",
60
+ difficulty: str = "medium",
61
+ category: str = None,
62
+ expected_answer: str = None,
63
+ ) -> dict:
64
  db = get_db()
65
+ interview_type = (interview_type or "resume").strip().lower()
66
+ if interview_type not in {"resume", "topic"}:
67
+ raise ValueError("interview_type must be either 'resume' or 'topic'")
68
+
69
+ if interview_type == "resume" and not role_id:
70
+ raise ValueError("role_id is required for resume interview questions")
71
+ if interview_type == "topic" and not topic_id:
72
+ raise ValueError("topic_id is required for topic interview questions")
73
+
74
+ collection = QUESTIONS if interview_type == "resume" else TOPIC_QUESTIONS
75
  doc = {
76
  "role_id": role_id,
77
+ "topic_id": topic_id,
78
+ "interview_type": interview_type,
79
  "question": question,
80
  "difficulty": difficulty,
81
  "category": category,
82
  "expected_answer": expected_answer,
83
  "created_at": utc_now(),
84
  }
85
+ result = await db[collection].insert_one(doc)
86
  doc["_id"] = result.inserted_id
87
  return str_objectid(doc)
88
 
 
93
  if not update_data:
94
  raise ValueError("No fields to update")
95
  update_data["updated_at"] = utc_now()
96
+ # Try resume question collection first, then topic question collection.
97
+ result = await db[QUESTIONS].update_one({"_id": ObjectId(question_id)}, {"$set": update_data})
98
+ if result.matched_count == 0:
99
+ await db[TOPIC_QUESTIONS].update_one({"_id": ObjectId(question_id)}, {"$set": update_data})
100
+
101
  doc = await db[QUESTIONS].find_one({"_id": ObjectId(question_id)})
102
+ if not doc:
103
+ doc = await db[TOPIC_QUESTIONS].find_one({"_id": ObjectId(question_id)})
104
  if not doc:
105
  raise ValueError("Question not found")
106
  return str_objectid(doc)
 
109
  async def delete_question(question_id: str) -> bool:
110
  db = get_db()
111
  result = await db[QUESTIONS].delete_one({"_id": ObjectId(question_id)})
112
+ if result.deleted_count > 0:
113
+ return True
114
+ result = await db[TOPIC_QUESTIONS].delete_one({"_id": ObjectId(question_id)})
115
  return result.deleted_count > 0
116
 
117
 
118
+ async def list_questions(
119
+ role_id: str = None,
120
+ topic_id: str = None,
121
+ interview_type: str = None,
122
+ difficulty: str = None,
123
+ ) -> list:
124
  db = get_db()
125
+ interview_type = (interview_type or "").strip().lower()
126
+ difficulty = (difficulty or "").strip().lower()
127
+
128
+ docs = []
129
+ if interview_type in {"", "resume"}:
130
+ query = {"role_id": role_id} if role_id else {}
131
+ if difficulty:
132
+ query["difficulty"] = difficulty
133
+ cursor = db[QUESTIONS].find(query).sort("created_at", -1)
134
+ resume_docs = await cursor.to_list(length=200)
135
+ docs.extend(resume_docs)
136
+
137
+ if interview_type in {"", "topic"}:
138
+ query = {"topic_id": topic_id} if topic_id else {}
139
+ if difficulty:
140
+ query["difficulty"] = difficulty
141
+ cursor = db[TOPIC_QUESTIONS].find(query).sort("created_at", -1)
142
+ topic_docs = await cursor.to_list(length=200)
143
+ docs.extend(topic_docs)
144
+
145
+ docs.sort(key=lambda d: d.get("created_at", ""), reverse=True)
146
+ return str_objectids(docs)
147
+
148
+
149
+ async def get_question_by_id(question_id: str) -> dict:
150
+ db = get_db()
151
+ doc = await db[QUESTIONS].find_one({"_id": ObjectId(question_id)})
152
+ if not doc:
153
+ doc = await db[TOPIC_QUESTIONS].find_one({"_id": ObjectId(question_id)})
154
+ if not doc:
155
+ raise ValueError("Question not found")
156
+ return str_objectid(doc)
157
+
158
+
159
+ # ─── Topics ───
160
+
161
+ async def create_topic(name: str, description: str = None) -> dict:
162
+ db = get_db()
163
+ existing = await db[TOPICS].find_one({"name": {"$regex": f"^{re.escape(name)}$", "$options": "i"}})
164
+ if existing:
165
+ raise ValueError("Topic already exists")
166
+
167
+ doc = {
168
+ "name": name,
169
+ "description": description,
170
+ "is_published": False,
171
+ "timer_enabled": False,
172
+ "timer_seconds": None,
173
+ "created_at": utc_now(),
174
+ }
175
+ result = await db[TOPICS].insert_one(doc)
176
+ doc["_id"] = result.inserted_id
177
+ return str_objectid(doc)
178
+
179
+
180
+ async def list_topics(only_published: bool = False) -> list:
181
+ db = get_db()
182
+ query = {"is_published": True} if only_published else {}
183
+ cursor = db[TOPICS].find(query).sort("created_at", -1)
184
  docs = await cursor.to_list(length=200)
185
  return str_objectids(docs)
186
 
187
 
188
+ async def update_topic(topic_id: str, data: dict) -> dict:
189
+ db = get_db()
190
+ update_data = {k: v for k, v in data.items() if v is not None}
191
+ if not update_data:
192
+ raise ValueError("No fields to update")
193
+ update_data["updated_at"] = utc_now()
194
+ await db[TOPICS].update_one({"_id": ObjectId(topic_id)}, {"$set": update_data})
195
+ doc = await db[TOPICS].find_one({"_id": ObjectId(topic_id)})
196
+ if not doc:
197
+ raise ValueError("Topic not found")
198
+ return str_objectid(doc)
199
+
200
+
201
+ async def delete_topic(topic_id: str) -> bool:
202
+ db = get_db()
203
+ await db[TOPIC_QUESTIONS].delete_many({"topic_id": topic_id})
204
+ result = await db[TOPICS].delete_one({"_id": ObjectId(topic_id)})
205
+ return result.deleted_count > 0
206
+
207
+
208
+ async def set_topic_publish_status(
209
+ topic_id: str,
210
+ is_published: bool,
211
+ timer_enabled: bool | None = None,
212
+ timer_seconds: int | None = None,
213
+ ) -> dict:
214
+ db = get_db()
215
+
216
+ update_data = {
217
+ "is_published": is_published,
218
+ "updated_at": utc_now(),
219
+ }
220
+
221
+ if timer_enabled is not None:
222
+ update_data["timer_enabled"] = bool(timer_enabled)
223
+
224
+ if timer_seconds is not None:
225
+ if timer_seconds <= 0:
226
+ raise ValueError("timer_seconds must be greater than 0")
227
+ update_data["timer_seconds"] = int(timer_seconds)
228
+
229
+ if timer_enabled is False:
230
+ update_data["timer_seconds"] = None
231
+
232
+ await db[TOPICS].update_one(
233
+ {"_id": ObjectId(topic_id)},
234
+ {"$set": update_data},
235
+ )
236
+ doc = await db[TOPICS].find_one({"_id": ObjectId(topic_id)})
237
+ if not doc:
238
+ raise ValueError("Topic not found")
239
+ return str_objectid(doc)
240
+
241
+
242
+ def _extract_json_object(text: str) -> str:
243
+ value = (text or "").strip()
244
+ if value.startswith("```"):
245
+ value = value.split("\n", 1)[1]
246
+ if value.endswith("```"):
247
+ value = value.rsplit("```", 1)[0]
248
+ value = value.strip()
249
+
250
+ if value.startswith("{") and value.endswith("}"):
251
+ return value
252
+
253
+ start = value.find("{")
254
+ end = value.rfind("}")
255
+ if start != -1 and end != -1 and end > start:
256
+ return value[start:end + 1]
257
+
258
+ return value
259
+
260
+
261
+ def _normalize_subject(subject: str, allowed_subjects: list[str]) -> str:
262
+ raw = (subject or "").strip().lower()
263
+ if not raw:
264
+ return ""
265
+
266
+ for allowed in allowed_subjects:
267
+ if raw == allowed.lower():
268
+ return allowed
269
+
270
+ for allowed in allowed_subjects:
271
+ a = allowed.lower()
272
+ if raw in a or a in raw:
273
+ return allowed
274
+
275
+ return ""
276
+
277
+
278
+ async def import_questions_from_pdf(
279
+ role_id: str | None,
280
+ topic_id: str | None,
281
+ interview_type: str,
282
+ subjects: list[str] | None,
283
+ filename: str,
284
+ file_content: bytes,
285
+ ) -> dict:
286
+ db = get_db()
287
+
288
+ interview_type = (interview_type or "resume").strip().lower()
289
+ if interview_type not in {"resume", "topic"}:
290
+ raise ValueError("interview_type must be either 'resume' or 'topic'")
291
+
292
+ clean_subjects = []
293
+ for item in (subjects or []):
294
+ value = (item or "").strip()
295
+ if value and value.lower() not in [s.lower() for s in clean_subjects]:
296
+ clean_subjects.append(value)
297
+
298
+ if interview_type == "resume" and not role_id:
299
+ raise ValueError("role_id is required for resume question import")
300
+
301
+ if interview_type == "topic" and not topic_id:
302
+ raise ValueError("topic_id is required for topic question import")
303
+
304
+ if interview_type == "resume" and not clean_subjects:
305
+ raise ValueError("At least one subject is required")
306
+
307
+ text = extract_resume_text(filename, file_content)
308
+ if not text or len(text) < 20:
309
+ raise ValueError("Could not extract readable text from PDF")
310
+
311
+ topic_name = ""
312
+ if interview_type == "topic" and topic_id:
313
+ topic_doc = await db[TOPICS].find_one({"_id": ObjectId(topic_id)})
314
+ if not topic_doc:
315
+ raise ValueError("Topic not found")
316
+ topic_name = (topic_doc.get("name") or "").strip()
317
+
318
+ if interview_type == "topic":
319
+ prompt = f"""You are extracting topic-specific interview questions from a document.
320
+
321
+ Target topic: {topic_name or "General"}
322
+
323
+ Rules:
324
+ 1. Extract only actual interview questions relevant to the target topic.
325
+ 2. Ignore headings, instructions, answers, explanations, and duplicates.
326
+ 3. Keep each question concise and interview-ready.
327
+ 4. Assign a difficulty: easy, medium, or hard.
328
+
329
+ Return ONLY valid JSON in this format:
330
+ {{
331
+ "questions": [
332
+ {{"question": "...", "difficulty": "medium"}}
333
+ ]
334
+ }}
335
+
336
+ Document text:
337
+ ---
338
+ {text}
339
+ ---"""
340
+ else:
341
+ prompt = f"""You are extracting interview questions from a document.
342
+
343
+ Allowed subjects (must choose one of these for each question): {', '.join(clean_subjects)}
344
+
345
+ Rules:
346
+ 1. Extract only actual interview questions from the document.
347
+ 2. Ignore headings, instructions, answers, explanations, and duplicates.
348
+ 3. Assign each extracted question to ONE allowed subject from the list above.
349
+ 4. Assign a difficulty: easy, medium, or hard.
350
+ 5. Keep question text clean and concise.
351
+
352
+ Return ONLY valid JSON in this format:
353
+ {{
354
+ "questions": [
355
+ {{"question": "...", "subject": "...", "difficulty": "medium"}}
356
+ ]
357
+ }}
358
+
359
+ Document text:
360
+ ---
361
+ {text}
362
+ ---"""
363
+
364
+ raw = await call_gemini(prompt)
365
+ parsed_text = _extract_json_object(raw)
366
+ try:
367
+ parsed = json.loads(parsed_text)
368
+ except json.JSONDecodeError as exc:
369
+ raise ValueError("Failed to parse extracted questions from AI response") from exc
370
+
371
+ items = parsed.get("questions", []) if isinstance(parsed, dict) else []
372
+ if not isinstance(items, list) or not items:
373
+ raise ValueError("No questions were extracted from this PDF")
374
+
375
+ allowed_difficulties = {"easy", "medium", "hard"}
376
+ docs = []
377
+ seen = set()
378
+
379
+ for item in items:
380
+ if not isinstance(item, dict):
381
+ continue
382
+
383
+ q_text = (item.get("question") or "").strip()
384
+ if len(q_text) < 8:
385
+ continue
386
+ q_text = re.sub(r"\s+", " ", q_text)
387
+
388
+ if interview_type == "topic":
389
+ subject = topic_name or "Topic"
390
+ else:
391
+ subject = _normalize_subject(item.get("subject", ""), clean_subjects)
392
+ if not subject:
393
+ continue
394
+
395
+ difficulty = (item.get("difficulty") or "medium").strip().lower()
396
+ if difficulty not in allowed_difficulties:
397
+ difficulty = "medium"
398
+
399
+ key = q_text.lower()
400
+ if key in seen:
401
+ continue
402
+ seen.add(key)
403
+
404
+ docs.append(
405
+ {
406
+ "role_id": role_id,
407
+ "topic_id": topic_id,
408
+ "interview_type": interview_type,
409
+ "question": q_text,
410
+ "difficulty": difficulty,
411
+ "category": subject,
412
+ "source": "pdf_upload",
413
+ "created_at": utc_now(),
414
+ }
415
+ )
416
+
417
+ if not docs:
418
+ if interview_type == "topic":
419
+ raise ValueError("No valid topic questions found in this PDF")
420
+ raise ValueError("No valid questions found after subject filtering")
421
+
422
+ collection = QUESTIONS if interview_type == "resume" else TOPIC_QUESTIONS
423
+ result = await db[collection].insert_many(docs)
424
+ return {
425
+ "inserted_count": len(result.inserted_ids),
426
+ "subjects": clean_subjects,
427
+ "interview_type": interview_type,
428
+ "topic_id": topic_id,
429
+ }
430
+
431
+
432
  # ─── Role Requirements ───
433
 
434
  async def create_requirement(role_id: str, skill: str, level: str = "intermediate") -> dict:
 
455
  db = get_db()
456
  result = await db[ROLE_REQUIREMENTS].delete_one({"_id": ObjectId(req_id)})
457
  return result.deleted_count > 0
458
+
459
+
460
+ async def list_quit_interviews(limit: int = 100) -> list:
461
+ """List interviews quit by users with full admin-facing details."""
462
+ db = get_db()
463
+
464
+ cursor = db[SESSIONS].find(
465
+ {"status": {"$in": ["quit", "quit_with_report"]}}
466
+ ).sort("quit_at", -1).limit(limit)
467
+ sessions = await cursor.to_list(length=limit)
468
+
469
+ output = []
470
+ for session in sessions:
471
+ user_id = session.get("user_id")
472
+ user_doc = None
473
+ if user_id:
474
+ try:
475
+ user_doc = await db[USERS].find_one({"_id": ObjectId(user_id)})
476
+ except Exception:
477
+ user_doc = await db[USERS].find_one({"id": user_id})
478
+
479
+ result_doc = await db[RESULTS].find_one({"session_id": session.get("session_id")})
480
+
481
+ quit_at = session.get("quit_at")
482
+ quit_dt = None
483
+ if isinstance(quit_at, str):
484
+ try:
485
+ quit_dt = datetime.fromisoformat(quit_at.replace("Z", "+00:00"))
486
+ except Exception:
487
+ quit_dt = None
488
+
489
+ output.append(
490
+ {
491
+ "session_id": session.get("session_id"),
492
+ "user_id": user_id,
493
+ "user_name": (user_doc or {}).get("name", "Unknown"),
494
+ "user_email": (user_doc or {}).get("email", "Unknown"),
495
+ "role_title": session.get("role_title", "Unknown"),
496
+ "status": session.get("status"),
497
+ "quit_reason": session.get("quit_reason", "user_requested"),
498
+ "answered_count": session.get("answered_count", 0),
499
+ "max_questions": session.get("max_questions", 0),
500
+ "quit_at": quit_at,
501
+ "quit_day": quit_dt.strftime("%A") if quit_dt else None,
502
+ "quit_date": quit_dt.strftime("%Y-%m-%d") if quit_dt else None,
503
+ "quit_time": quit_dt.strftime("%H:%M:%S %Z") if quit_dt else None,
504
+ "report_generated": bool(result_doc),
505
+ "overall_score": (result_doc or {}).get("overall_score"),
506
+ "total_questions_evaluated": (result_doc or {}).get("total_questions", 0),
507
+ "strengths": (result_doc or {}).get("strengths", []),
508
+ "weaknesses": (result_doc or {}).get("weaknesses", []),
509
+ "recommendations": (result_doc or {}).get("recommendations", []),
510
+ }
511
+ )
512
+
513
+ return output
514
+
515
+
516
+ async def list_admin_reports(limit: int = 100) -> list:
517
+ """List all interview results for admin overview."""
518
+ db = get_db()
519
+ cursor = db[RESULTS].find().sort("completed_at", -1).limit(limit)
520
+ reports = await cursor.to_list(length=limit)
521
+
522
+ output = []
523
+ for report in reports:
524
+ user_id = report.get("user_id")
525
+ user_doc = None
526
+ if user_id:
527
+ try:
528
+ user_doc = await db[USERS].find_one({"_id": ObjectId(user_id)})
529
+ except Exception:
530
+ user_doc = await db[USERS].find_one({"id": user_id})
531
+
532
+ output.append(
533
+ {
534
+ "session_id": report.get("session_id"),
535
+ "user_id": user_id,
536
+ "user_name": (user_doc or {}).get("name", "Unknown"),
537
+ "user_email": (user_doc or {}).get("email", "Unknown"),
538
+ "role_title": report.get("role_title", "Unknown"),
539
+ "overall_score": report.get("overall_score", 0),
540
+ "total_questions": report.get("total_questions", 0),
541
+ "completed_at": report.get("completed_at", ""),
542
+ "session_status": report.get("session_status", "completed"),
543
+ "is_quit": bool(report.get("is_quit", False)),
544
+ }
545
+ )
546
+
547
+ return output
548
+
549
+
550
+ async def get_admin_report_detail(session_id: str) -> dict:
551
+ """Get full interview result detail for admin view."""
552
+ db = get_db()
553
+ report = await db[RESULTS].find_one({"session_id": session_id})
554
+ if not report:
555
+ raise ValueError("Report not found")
556
+
557
+ user_id = report.get("user_id")
558
+ user_doc = None
559
+ if user_id:
560
+ try:
561
+ user_doc = await db[USERS].find_one({"_id": ObjectId(user_id)})
562
+ except Exception:
563
+ user_doc = await db[USERS].find_one({"id": user_id})
564
+
565
+ payload = str_objectid(report)
566
+ payload["user_name"] = (user_doc or {}).get("name", "Unknown")
567
+ payload["user_email"] = (user_doc or {}).get("email", "Unknown")
568
+ return payload
569
+
570
+
571
+ async def list_admin_users(limit: int = 500) -> list:
572
+ """List users for admin management with lightweight activity stats."""
573
+ db = get_db()
574
+
575
+ user_cursor = db[USERS].find({"role": "student"}, {"password": 0}).sort("created_at", -1).limit(limit)
576
+ users = await user_cursor.to_list(length=limit)
577
+
578
+ interview_counts = await db[SESSIONS].aggregate([
579
+ {"$group": {"_id": "$user_id", "count": {"$sum": 1}}},
580
+ ]).to_list(length=2000)
581
+ report_counts = await db[RESULTS].aggregate([
582
+ {"$group": {"_id": "$user_id", "count": {"$sum": 1}}},
583
+ ]).to_list(length=2000)
584
+
585
+ interview_map = {str(item.get("_id")): item.get("count", 0) for item in interview_counts}
586
+ report_map = {str(item.get("_id")): item.get("count", 0) for item in report_counts}
587
+
588
+ output = []
589
+ for user in users:
590
+ normalized = str_objectid(user)
591
+ user_id = normalized.get("id", "")
592
+ output.append(
593
+ {
594
+ "id": user_id,
595
+ "name": normalized.get("name", ""),
596
+ "email": normalized.get("email", ""),
597
+ "role": normalized.get("role", "student"),
598
+ "created_at": normalized.get("created_at", ""),
599
+ "interview_count": interview_map.get(user_id, 0),
600
+ "report_count": report_map.get(user_id, 0),
601
+ }
602
+ )
603
+
604
+ return output
605
+
606
+
607
+ async def delete_admin_user(target_user_id: str, current_admin_user_id: str) -> bool:
608
+ """Delete a user and associated data. Admin users cannot be deleted from this endpoint."""
609
+ db = get_db()
610
+
611
+ if target_user_id == current_admin_user_id:
612
+ raise ValueError("You cannot delete your own account")
613
+
614
+ user_doc = await db[USERS].find_one({"_id": ObjectId(target_user_id)})
615
+ if not user_doc:
616
+ raise ValueError("User not found")
617
+
618
+ if user_doc.get("role") == "admin":
619
+ raise ValueError("Admin users cannot be deleted from this page")
620
+
621
+ await db[RESUMES].delete_many({"user_id": target_user_id})
622
+ await db[SKILLS].delete_many({"user_id": target_user_id})
623
+ await db[SESSIONS].delete_many({"user_id": target_user_id})
624
+ await db[ANSWERS].delete_many({"user_id": target_user_id})
625
+ await db[RESULTS].delete_many({"user_id": target_user_id})
626
+
627
+ result = await db[USERS].delete_one({"_id": ObjectId(target_user_id)})
628
+ return result.deleted_count > 0
backend/services/analytics_service.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  from database import get_db
2
  from models.collections import RESULTS, SESSIONS, USERS
3
  from utils.helpers import str_objectid, str_objectids
@@ -27,6 +29,14 @@ async def get_admin_analytics() -> dict:
27
  # Total students
28
  total_students = await db[USERS].count_documents({"role": "student"})
29
 
 
 
 
 
 
 
 
 
30
  # Total interviews
31
  total_interviews = await db[RESULTS].count_documents({})
32
 
@@ -74,6 +84,8 @@ async def get_admin_analytics() -> dict:
74
 
75
  return {
76
  "total_students": total_students,
 
 
77
  "total_interviews": total_interviews,
78
  "average_score": avg_score,
79
  "top_performers": top_performers,
 
1
+ from datetime import datetime, timezone
2
+
3
  from database import get_db
4
  from models.collections import RESULTS, SESSIONS, USERS
5
  from utils.helpers import str_objectid, str_objectids
 
29
  # Total students
30
  total_students = await db[USERS].count_documents({"role": "student"})
31
 
32
+ # Users with in-progress interview sessions.
33
+ active_user_ids = await db[SESSIONS].distinct("user_id", {"status": "in_progress"})
34
+ live_users = len([uid for uid in active_user_ids if uid])
35
+
36
+ # New students created since start of current UTC day.
37
+ day_start = datetime.now(timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0).isoformat()
38
+ new_users_today = await db[USERS].count_documents({"role": "student", "created_at": {"$gte": day_start}})
39
+
40
  # Total interviews
41
  total_interviews = await db[RESULTS].count_documents({})
42
 
 
84
 
85
  return {
86
  "total_students": total_students,
87
+ "live_users": live_users,
88
+ "new_users_today": new_users_today,
89
  "total_interviews": total_interviews,
90
  "average_score": avg_score,
91
  "top_performers": top_performers,
backend/services/evaluation_service.py CHANGED
@@ -1,8 +1,19 @@
1
  from database import get_db, get_redis
 
2
  from models.collections import RESULTS, ANSWERS, SESSIONS
3
  from utils.helpers import utc_now
4
  from utils.gemini import evaluate_interview
5
- from services.interview_service import get_session_qa
 
 
 
 
 
 
 
 
 
 
6
 
7
 
8
  async def generate_report(session_id: str, user_id: str) -> dict:
@@ -15,7 +26,7 @@ async def generate_report(session_id: str, user_id: str) -> dict:
15
  if existing:
16
  existing["id"] = str(existing["_id"])
17
  del existing["_id"]
18
- return existing
19
 
20
  # Get session info
21
  session = await db[SESSIONS].find_one({"session_id": session_id})
@@ -26,6 +37,8 @@ async def generate_report(session_id: str, user_id: str) -> dict:
26
  raise ValueError("Unauthorized access to session")
27
 
28
  role_title = session.get("role_title", "Software Developer")
 
 
29
 
30
  # Get all Q&A from Redis
31
  qa_pairs = await get_session_qa(session_id)
@@ -40,6 +53,9 @@ async def generate_report(session_id: str, user_id: str) -> dict:
40
  "session_id": session_id,
41
  "user_id": user_id,
42
  "role_title": role_title,
 
 
 
43
  "overall_score": evaluation.get("overall_score", 0),
44
  "total_questions": len(qa_pairs),
45
  "detailed_scores": evaluation.get("detailed_scores", []),
@@ -48,7 +64,7 @@ async def generate_report(session_id: str, user_id: str) -> dict:
48
  "recommendations": evaluation.get("recommendations", []),
49
  "completed_at": utc_now(),
50
  }
51
- await db[RESULTS].insert_one(result_doc)
52
 
53
  # Store final answers in MongoDB
54
  for qa in qa_pairs:
@@ -69,6 +85,7 @@ async def generate_report(session_id: str, user_id: str) -> dict:
69
  keys_to_delete = [
70
  f"session:{session_id}",
71
  f"session:{session_id}:questions",
 
72
  f"session:{session_id}:answers",
73
  ]
74
  for qid in question_ids:
@@ -78,6 +95,23 @@ async def generate_report(session_id: str, user_id: str) -> dict:
78
  if keys_to_delete:
79
  await redis.delete(*keys_to_delete)
80
 
81
- result_doc["id"] = str(result_doc["_id"])
82
- del result_doc["_id"]
83
- return result_doc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from database import get_db, get_redis
2
+ from bson import ObjectId
3
  from models.collections import RESULTS, ANSWERS, SESSIONS
4
  from utils.helpers import utc_now
5
  from utils.gemini import evaluate_interview
6
+ from services.interview_service import get_session_qa, cleanup_interview_local_state
7
+
8
+
9
+ def _json_safe(value):
10
+ if isinstance(value, ObjectId):
11
+ return str(value)
12
+ if isinstance(value, dict):
13
+ return {k: _json_safe(v) for k, v in value.items()}
14
+ if isinstance(value, list):
15
+ return [_json_safe(item) for item in value]
16
+ return value
17
 
18
 
19
  async def generate_report(session_id: str, user_id: str) -> dict:
 
26
  if existing:
27
  existing["id"] = str(existing["_id"])
28
  del existing["_id"]
29
+ return _json_safe(existing)
30
 
31
  # Get session info
32
  session = await db[SESSIONS].find_one({"session_id": session_id})
 
37
  raise ValueError("Unauthorized access to session")
38
 
39
  role_title = session.get("role_title", "Software Developer")
40
+ session_status = session.get("status", "completed")
41
+ quit_at = session.get("quit_at")
42
 
43
  # Get all Q&A from Redis
44
  qa_pairs = await get_session_qa(session_id)
 
53
  "session_id": session_id,
54
  "user_id": user_id,
55
  "role_title": role_title,
56
+ "session_status": session_status,
57
+ "is_quit": session_status in {"quit", "quit_with_report"},
58
+ "quit_at": quit_at,
59
  "overall_score": evaluation.get("overall_score", 0),
60
  "total_questions": len(qa_pairs),
61
  "detailed_scores": evaluation.get("detailed_scores", []),
 
64
  "recommendations": evaluation.get("recommendations", []),
65
  "completed_at": utc_now(),
66
  }
67
+ inserted = await db[RESULTS].insert_one(result_doc)
68
 
69
  # Store final answers in MongoDB
70
  for qa in qa_pairs:
 
85
  keys_to_delete = [
86
  f"session:{session_id}",
87
  f"session:{session_id}:questions",
88
+ f"session:{session_id}:pending_questions",
89
  f"session:{session_id}:answers",
90
  ]
91
  for qid in question_ids:
 
95
  if keys_to_delete:
96
  await redis.delete(*keys_to_delete)
97
 
98
+ if session_status in {"quit", "quit_with_report"}:
99
+ await db[SESSIONS].update_one(
100
+ {"session_id": session_id},
101
+ {
102
+ "$set": {
103
+ "status": "quit_with_report",
104
+ "report_generated_at": utc_now(),
105
+ }
106
+ },
107
+ )
108
+ elif session_status == "completed":
109
+ await db[SESSIONS].update_one(
110
+ {"session_id": session_id},
111
+ {"$set": {"status": "completed_with_report", "report_generated_at": utc_now()}},
112
+ )
113
+
114
+ cleanup_interview_local_state(session_id)
115
+
116
+ result_doc["id"] = str(inserted.inserted_id)
117
+ return _json_safe(result_doc)
backend/services/interview_graph.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Dict, List, Optional, TypedDict
2
+
3
+ from langgraph.graph import END, StateGraph
4
+
5
+ from utils.gemini import generate_interview_question
6
+
7
+
8
+ class InterviewGraphState(TypedDict, total=False):
9
+ role_title: str
10
+ skills: List[str]
11
+ previous_questions: List[str]
12
+ previous_answer: Optional[str]
13
+ question_count: int
14
+ max_questions: int
15
+ current_difficulty: str
16
+ next_difficulty: str
17
+ question_stage: str
18
+ is_complete: bool
19
+ question_data: Dict[str, Any]
20
+
21
+
22
+ FOUNDATION_QUESTION_LIMIT = 3
23
+
24
+
25
+ def _difficulty_for_question_number(question_number: int, foundation_limit: int = FOUNDATION_QUESTION_LIMIT) -> str:
26
+ if question_number <= foundation_limit:
27
+ return "easy"
28
+ if question_number <= foundation_limit + 3:
29
+ return "medium"
30
+ return "hard"
31
+
32
+
33
+ async def _check_completion(state: InterviewGraphState) -> InterviewGraphState:
34
+ question_count = int(state.get("question_count", 0))
35
+ max_questions = int(state.get("max_questions", 10))
36
+ return {"is_complete": question_count >= max_questions}
37
+
38
+
39
+ def _route_after_completion(state: InterviewGraphState) -> str:
40
+ return "end" if state.get("is_complete") else "difficulty"
41
+
42
+
43
+ async def _set_next_difficulty(state: InterviewGraphState) -> InterviewGraphState:
44
+ question_count = int(state.get("question_count", 0))
45
+ # We are generating the next question, so use question_count + 1.
46
+ next_question_number = question_count + 1
47
+ stage = "foundation" if next_question_number <= FOUNDATION_QUESTION_LIMIT else "deep"
48
+ return {
49
+ "next_difficulty": _difficulty_for_question_number(next_question_number),
50
+ "question_stage": stage,
51
+ }
52
+
53
+
54
+ async def _generate_question(state: InterviewGraphState) -> InterviewGraphState:
55
+ role_title = state.get("role_title", "Software Developer")
56
+ skills = state.get("skills", ["general"])
57
+ previous_questions = state.get("previous_questions", [])
58
+ previous_answer = state.get("previous_answer")
59
+ difficulty = state.get("next_difficulty", state.get("current_difficulty", "medium"))
60
+ question_stage = state.get("question_stage", "deep")
61
+
62
+ question_data = await generate_interview_question(
63
+ skills=skills,
64
+ role_title=role_title,
65
+ previous_questions=previous_questions,
66
+ previous_answer=previous_answer,
67
+ difficulty=difficulty,
68
+ question_stage=question_stage,
69
+ foundation_limit=FOUNDATION_QUESTION_LIMIT,
70
+ )
71
+
72
+ return {
73
+ "question_data": question_data,
74
+ "current_difficulty": question_data.get("difficulty", difficulty),
75
+ }
76
+
77
+
78
+ def _build_graph():
79
+ graph = StateGraph(InterviewGraphState)
80
+
81
+ graph.add_node("check", _check_completion)
82
+ graph.add_node("difficulty", _set_next_difficulty)
83
+ graph.add_node("generate", _generate_question)
84
+
85
+ graph.set_entry_point("check")
86
+ graph.add_conditional_edges(
87
+ "check",
88
+ _route_after_completion,
89
+ {
90
+ "end": END,
91
+ "difficulty": "difficulty",
92
+ },
93
+ )
94
+ graph.add_edge("difficulty", "generate")
95
+ graph.add_edge("generate", END)
96
+
97
+ return graph.compile()
98
+
99
+
100
+ _INTERVIEW_GRAPH = _build_graph()
101
+
102
+
103
+ async def run_interview_graph(state: InterviewGraphState) -> InterviewGraphState:
104
+ result = await _INTERVIEW_GRAPH.ainvoke(state)
105
+ return result
backend/services/interview_service.py CHANGED
@@ -1,46 +1,369 @@
1
  import json
 
2
  from database import get_db, get_redis
3
- from models.collections import SESSIONS, JOB_ROLES, SKILLS, QUESTIONS
4
  from utils.helpers import generate_id, utc_now, str_objectid
5
- from utils.gemini import generate_interview_question
 
6
 
7
  MAX_QUESTIONS = 10
8
  SESSION_TTL = 7200 # 2 hours
 
 
9
 
 
 
 
10
 
11
- async def start_interview(user_id: str, role_id: str = None) -> dict:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  """Start a new interview session."""
 
 
 
 
 
 
13
  db = get_db()
14
  redis = get_redis()
15
 
16
  # Get user skills
17
  skills_doc = await db[SKILLS].find_one({"user_id": user_id})
18
- skills = skills_doc.get("skills", ["general"]) if skills_doc else ["general"]
 
19
 
20
  # Get role
21
  role_title = "Software Developer"
22
- if role_id:
 
 
23
  from bson import ObjectId
24
- role = await db[JOB_ROLES].find_one({"_id": ObjectId(role_id)})
25
- if role:
26
- role_title = role["title"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  # Check for existing questions in question bank
29
  bank_questions = []
30
- if role_id:
31
- cursor = db[QUESTIONS].find({"role_id": role_id}).limit(5)
32
- async for q in cursor:
33
- bank_questions.append(q["question"])
34
-
35
- # Generate first question
36
- question_data = await generate_interview_question(
37
- skills=skills,
 
 
38
  role_title=role_title,
39
- difficulty="medium",
 
 
 
 
 
 
40
  )
 
 
41
 
42
  session_id = generate_id()
43
- question_id = generate_id()
44
 
45
  # Create session in MongoDB
46
  session_doc = {
@@ -49,9 +372,10 @@ async def start_interview(user_id: str, role_id: str = None) -> dict:
49
  "role_id": role_id,
50
  "role_title": role_title,
51
  "status": "in_progress",
 
52
  "question_count": 1,
53
  "max_questions": MAX_QUESTIONS,
54
- "current_difficulty": "medium",
55
  "started_at": utc_now(),
56
  }
57
  await db[SESSIONS].insert_one(session_doc)
@@ -60,36 +384,53 @@ async def start_interview(user_id: str, role_id: str = None) -> dict:
60
  session_state = {
61
  "user_id": user_id,
62
  "role_title": role_title,
63
- "skills": json.dumps(skills),
 
 
 
 
64
  "question_count": 1,
 
 
 
65
  "max_questions": MAX_QUESTIONS,
66
- "current_difficulty": "medium",
 
67
  "status": "in_progress",
68
  }
69
  await redis.hset(f"session:{session_id}", mapping=session_state)
70
  await redis.expire(f"session:{session_id}", SESSION_TTL)
71
 
72
- # Store question in Redis
73
- q_data = {
74
- "question_id": question_id,
75
- "question": question_data.get("question", "Tell me about yourself."),
76
- "difficulty": question_data.get("difficulty", "medium"),
77
- "category": question_data.get("category", "general"),
78
- }
79
- await redis.hset(f"session:{session_id}:q:{question_id}", mapping=q_data)
80
- await redis.rpush(f"session:{session_id}:questions", question_id)
81
- await redis.expire(f"session:{session_id}:q:{question_id}", SESSION_TTL)
82
- await redis.expire(f"session:{session_id}:questions", SESSION_TTL)
83
 
84
  return {
85
  "session_id": session_id,
 
 
 
 
 
 
 
86
  "question": {
87
- "question_id": question_id,
88
- "question": q_data["question"],
89
- "difficulty": q_data["difficulty"],
90
  "question_number": 1,
91
  "total_questions": MAX_QUESTIONS,
92
  },
 
 
 
 
93
  "message": "Interview started. Good luck!",
94
  }
95
 
@@ -118,12 +459,23 @@ async def submit_answer(session_id: str, question_id: str, answer: str) -> dict:
118
  await redis.expire(f"session:{session_id}:answers", SESSION_TTL)
119
 
120
  question_count = int(session.get("question_count", 1))
 
 
 
121
  max_questions = int(session.get("max_questions", MAX_QUESTIONS))
 
 
 
 
 
122
 
123
  # Check if interview is complete
124
- if question_count >= max_questions:
125
  # Mark session as completed
126
- await redis.hset(f"session:{session_id}", "status", "completed")
 
 
 
127
  await db[SESSIONS].update_one(
128
  {"session_id": session_id},
129
  {"$set": {"status": "completed", "completed_at": utc_now()}},
@@ -135,74 +487,142 @@ async def submit_answer(session_id: str, question_id: str, answer: str) -> dict:
135
  "message": "Interview complete! Generating your report...",
136
  }
137
 
138
- # Adjust difficulty based on question count
139
- difficulty = _adjust_difficulty(question_count, session.get("current_difficulty", "medium"))
140
-
141
- # Get previous questions from Redis
142
- question_ids = await redis.lrange(f"session:{session_id}:questions", 0, -1)
143
- previous_questions = []
144
- for qid in question_ids:
145
- q = await redis.hgetall(f"session:{session_id}:q:{qid}")
146
- if q:
147
- previous_questions.append(q.get("question", ""))
148
-
149
- # Get the current question text for context
150
- current_q = await redis.hgetall(f"session:{session_id}:q:{question_id}")
151
-
152
- skills = json.loads(session.get("skills", "[]"))
153
- role_title = session.get("role_title", "Software Developer")
154
-
155
- # Generate next question
156
- next_question_data = await generate_interview_question(
157
- skills=skills,
158
- role_title=role_title,
159
- previous_questions=previous_questions,
160
- previous_answer=answer,
161
- difficulty=difficulty,
162
- )
163
-
164
- new_question_id = generate_id()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  new_count = question_count + 1
166
-
167
- # Store new question in Redis
168
- q_data = {
169
- "question_id": new_question_id,
170
- "question": next_question_data.get("question", "Can you elaborate further?"),
171
- "difficulty": next_question_data.get("difficulty", difficulty),
172
- "category": next_question_data.get("category", "general"),
173
- }
174
- await redis.hset(f"session:{session_id}:q:{new_question_id}", mapping=q_data)
175
- await redis.rpush(f"session:{session_id}:questions", new_question_id)
176
- await redis.expire(f"session:{session_id}:q:{new_question_id}", SESSION_TTL)
177
 
178
  # Update session state
179
  await redis.hset(f"session:{session_id}", mapping={
180
  "question_count": str(new_count),
181
- "current_difficulty": difficulty,
 
 
182
  })
183
 
 
 
 
184
  return {
185
  "session_id": session_id,
186
  "next_question": {
187
- "question_id": new_question_id,
188
- "question": q_data["question"],
189
- "difficulty": q_data["difficulty"],
190
- "question_number": new_count,
191
  "total_questions": max_questions,
192
  },
193
  "is_complete": False,
194
- "message": f"Question {new_count} of {max_questions}",
195
  }
196
 
197
 
198
- def _adjust_difficulty(question_number: int, current: str) -> str:
199
- """Dynamically adjust difficulty based on progress."""
200
- if question_number <= 3:
201
- return "easy"
202
- elif question_number <= 6:
203
- return "medium"
204
- else:
205
- return "hard"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
 
207
 
208
  async def get_session_qa(session_id: str) -> list:
@@ -225,3 +645,9 @@ async def get_session_qa(session_id: str) -> list:
225
  })
226
 
227
  return qa_pairs
 
 
 
 
 
 
 
1
  import json
2
+ import asyncio
3
  from database import get_db, get_redis
4
+ from models.collections import SESSIONS, JOB_ROLES, SKILLS, QUESTIONS, TOPICS, TOPIC_QUESTIONS, ROLE_REQUIREMENTS
5
  from utils.helpers import generate_id, utc_now, str_objectid
6
+ from utils.skills import normalize_skill_list, find_matching_skills, find_missing_skills, build_interview_focus_skills
7
+ from services.interview_graph import run_interview_graph
8
 
9
  MAX_QUESTIONS = 10
10
  SESSION_TTL = 7200 # 2 hours
11
+ BATCH_SIZE = 5
12
+ PREGEN_MIN_PENDING = 2
13
 
14
+ # Local process memory summary requested in workflow.
15
+ _LOCAL_SUMMARIES: dict[str, str] = {}
16
+ _PREGEN_IN_FLIGHT: set[str] = set()
17
 
18
+
19
+ def _safe_json_list(value: str) -> list:
20
+ try:
21
+ data = json.loads(value or "[]")
22
+ return data if isinstance(data, list) else []
23
+ except Exception:
24
+ return []
25
+
26
+
27
+ def _update_local_summary(session_id: str, question: str, answer: str) -> None:
28
+ existing = _LOCAL_SUMMARIES.get(session_id, "")
29
+ combined = f"{existing}\nQ: {question}\nA: {answer}".strip()
30
+ # Keep summary bounded in memory.
31
+ _LOCAL_SUMMARIES[session_id] = combined[-1500:]
32
+
33
+
34
+ async def _get_generated_question_texts(redis, session_id: str) -> list[str]:
35
+ qids = await redis.lrange(f"session:{session_id}:questions", 0, -1)
36
+ questions = []
37
+ for qid in qids:
38
+ q = await redis.hgetall(f"session:{session_id}:q:{qid}")
39
+ if q and q.get("question"):
40
+ questions.append(q["question"])
41
+ return questions
42
+
43
+
44
+ async def _generate_question_batch(
45
+ role_title: str,
46
+ skills: list[str],
47
+ previous_questions: list[str],
48
+ generated_count: int,
49
+ max_questions: int,
50
+ current_difficulty: str,
51
+ local_summary: str | None,
52
+ batch_size: int,
53
+ ) -> tuple[list[dict], str]:
54
+ remaining = max(0, max_questions - generated_count)
55
+ target = min(batch_size, remaining)
56
+ if target <= 0:
57
+ return [], current_difficulty
58
+
59
+ generated: list[dict] = []
60
+ rolling_questions = list(previous_questions)
61
+ rolling_difficulty = current_difficulty
62
+ rolling_count = generated_count
63
+
64
+ for i in range(target):
65
+ state = {
66
+ "role_title": role_title,
67
+ "skills": skills,
68
+ "previous_questions": rolling_questions,
69
+ # Feed the local summary once per batch as extra context.
70
+ "previous_answer": local_summary if i == 0 else None,
71
+ "question_count": rolling_count,
72
+ "max_questions": max_questions,
73
+ "current_difficulty": rolling_difficulty,
74
+ }
75
+ graph_result = await run_interview_graph(state)
76
+ q_data = graph_result.get("question_data", {})
77
+ difficulty = q_data.get("difficulty", graph_result.get("current_difficulty", "medium"))
78
+ generated.append(
79
+ {
80
+ "question": q_data.get("question", "Can you explain your approach?"),
81
+ "difficulty": difficulty,
82
+ "category": q_data.get("category", "general"),
83
+ }
84
+ )
85
+ rolling_questions.append(generated[-1]["question"])
86
+ rolling_count += 1
87
+ rolling_difficulty = difficulty
88
+
89
+ return generated, rolling_difficulty
90
+
91
+
92
+ async def _append_batch_to_redis(redis, session_id: str, batch: list[dict]) -> list[str]:
93
+ created_ids: list[str] = []
94
+ for item in batch:
95
+ qid = generate_id()
96
+ created_ids.append(qid)
97
+ await redis.hset(
98
+ f"session:{session_id}:q:{qid}",
99
+ mapping={
100
+ "question_id": qid,
101
+ "question": item.get("question", "Can you explain your approach?"),
102
+ "difficulty": item.get("difficulty", "medium"),
103
+ "category": item.get("category", "general"),
104
+ },
105
+ )
106
+ await redis.rpush(f"session:{session_id}:questions", qid)
107
+ await redis.expire(f"session:{session_id}:q:{qid}", SESSION_TTL)
108
+ if created_ids:
109
+ await redis.expire(f"session:{session_id}:questions", SESSION_TTL)
110
+ return created_ids
111
+
112
+
113
+ async def _start_topic_interview(user_id: str, topic_id: str) -> dict:
114
+ """Start a topic-wise interview with admin-created questions."""
115
+ db = get_db()
116
+ redis = get_redis()
117
+
118
+ topic = await db[TOPICS].find_one({"_id": __import__("bson").ObjectId(topic_id)})
119
+ if not topic:
120
+ raise ValueError("Topic not found")
121
+ if not topic.get("is_published", False):
122
+ raise ValueError("This topic interview is not published yet")
123
+
124
+ topic_questions = await db[TOPIC_QUESTIONS].find({"topic_id": topic_id}).sort("created_at", -1).to_list(length=200)
125
+ if not topic_questions:
126
+ raise ValueError("No questions found for selected topic")
127
+
128
+ timer_enabled = bool(topic.get("timer_enabled", False))
129
+ timer_seconds = topic.get("timer_seconds") if timer_enabled else None
130
+
131
+ total_questions = min(MAX_QUESTIONS, len(topic_questions))
132
+ selected = topic_questions[:total_questions]
133
+
134
+ session_id = generate_id()
135
+ _LOCAL_SUMMARIES[session_id] = ""
136
+
137
+ session_doc = {
138
+ "session_id": session_id,
139
+ "user_id": user_id,
140
+ "role_id": None,
141
+ "role_title": topic.get("name", "Topic Interview"),
142
+ "topic_id": topic_id,
143
+ "interview_type": "topic",
144
+ "status": "in_progress",
145
+ "question_count": 1,
146
+ "max_questions": total_questions,
147
+ "current_difficulty": selected[0].get("difficulty", "medium"),
148
+ "timer_enabled": timer_enabled,
149
+ "timer_seconds": timer_seconds,
150
+ "started_at": utc_now(),
151
+ }
152
+ await db[SESSIONS].insert_one(session_doc)
153
+
154
+ session_state = {
155
+ "user_id": user_id,
156
+ "role_title": topic.get("name", "Topic Interview"),
157
+ "topic_id": topic_id,
158
+ "interview_type": "topic",
159
+ "skills": json.dumps([topic.get("name", "general")]),
160
+ "user_skills": json.dumps([]),
161
+ "required_skills": json.dumps([]),
162
+ "matched_skills": json.dumps([]),
163
+ "missing_skills": json.dumps([]),
164
+ "question_count": 1,
165
+ "answered_count": 0,
166
+ "served_count": 1,
167
+ "generated_count": total_questions,
168
+ "max_questions": total_questions,
169
+ "current_difficulty": selected[0].get("difficulty", "medium"),
170
+ "timer_enabled": str(timer_enabled),
171
+ "timer_seconds": str(timer_seconds or ""),
172
+ "status": "in_progress",
173
+ }
174
+ await redis.hset(f"session:{session_id}", mapping=session_state)
175
+ await redis.expire(f"session:{session_id}", SESSION_TTL)
176
+
177
+ created_ids = []
178
+ for q in selected:
179
+ qid = generate_id()
180
+ created_ids.append(qid)
181
+ await redis.hset(
182
+ f"session:{session_id}:q:{qid}",
183
+ mapping={
184
+ "question_id": qid,
185
+ "question": q.get("question", "Can you explain this concept?"),
186
+ "difficulty": q.get("difficulty", "medium"),
187
+ "category": q.get("category", topic.get("name", "topic")),
188
+ },
189
+ )
190
+ await redis.rpush(f"session:{session_id}:questions", qid)
191
+ await redis.expire(f"session:{session_id}:q:{qid}", SESSION_TTL)
192
+ await redis.expire(f"session:{session_id}:questions", SESSION_TTL)
193
+
194
+ first_id = created_ids[0]
195
+ pending_ids = created_ids[1:]
196
+ if pending_ids:
197
+ await redis.rpush(f"session:{session_id}:pending_questions", *pending_ids)
198
+ await redis.expire(f"session:{session_id}:pending_questions", SESSION_TTL)
199
+
200
+ first_q_data = await redis.hgetall(f"session:{session_id}:q:{first_id}")
201
+ return {
202
+ "session_id": session_id,
203
+ "interview_type": "topic",
204
+ "topic": {
205
+ "topic_id": topic_id,
206
+ "name": topic.get("name", "Topic Interview"),
207
+ "description": topic.get("description", ""),
208
+ },
209
+ "skill_alignment": {
210
+ "user_skills": [],
211
+ "required_skills": [topic.get("name", "")],
212
+ "matched_skills": [],
213
+ "missing_skills": [],
214
+ "interview_focus": [topic.get("name", "")],
215
+ },
216
+ "question": {
217
+ "question_id": first_id,
218
+ "question": first_q_data.get("question", "Can you explain this concept?"),
219
+ "difficulty": first_q_data.get("difficulty", "medium"),
220
+ "question_number": 1,
221
+ "total_questions": total_questions,
222
+ },
223
+ "timer": {
224
+ "enabled": timer_enabled,
225
+ "seconds": timer_seconds,
226
+ },
227
+ "message": "Topic interview started. Good luck!",
228
+ }
229
+
230
+
231
+ async def _async_pregenerate_next_batch(session_id: str) -> None:
232
+ redis = get_redis()
233
+ try:
234
+ session = await redis.hgetall(f"session:{session_id}")
235
+ if not session or session.get("status") != "in_progress":
236
+ return
237
+
238
+ pending_len = await redis.llen(f"session:{session_id}:pending_questions")
239
+ generated_count = int(session.get("generated_count", 0))
240
+ max_questions = int(session.get("max_questions", MAX_QUESTIONS))
241
+
242
+ if pending_len >= PREGEN_MIN_PENDING or generated_count >= max_questions:
243
+ return
244
+
245
+ previous_questions = await _get_generated_question_texts(redis, session_id)
246
+ skills = _safe_json_list(session.get("skills", "[]"))
247
+ role_title = session.get("role_title", "Software Developer")
248
+ current_difficulty = session.get("current_difficulty", "medium")
249
+ local_summary = _LOCAL_SUMMARIES.get(session_id)
250
+
251
+ batch, last_difficulty = await _generate_question_batch(
252
+ role_title=role_title,
253
+ skills=skills,
254
+ previous_questions=previous_questions,
255
+ generated_count=generated_count,
256
+ max_questions=max_questions,
257
+ current_difficulty=current_difficulty,
258
+ local_summary=local_summary,
259
+ batch_size=BATCH_SIZE,
260
+ )
261
+ if not batch:
262
+ return
263
+
264
+ new_ids = await _append_batch_to_redis(redis, session_id, batch)
265
+ if new_ids:
266
+ await redis.rpush(f"session:{session_id}:pending_questions", *new_ids)
267
+ await redis.expire(f"session:{session_id}:pending_questions", SESSION_TTL)
268
+ await redis.hset(
269
+ f"session:{session_id}",
270
+ mapping={
271
+ "generated_count": str(generated_count + len(new_ids)),
272
+ "current_difficulty": last_difficulty,
273
+ },
274
+ )
275
+ finally:
276
+ _PREGEN_IN_FLIGHT.discard(session_id)
277
+
278
+
279
+ def _schedule_pregen(session_id: str, answered_count: int) -> None:
280
+ # Start pre-generation after user answers Q1 and Q2, then keep it topped up.
281
+ if answered_count < 2:
282
+ return
283
+ if session_id in _PREGEN_IN_FLIGHT:
284
+ return
285
+ _PREGEN_IN_FLIGHT.add(session_id)
286
+ asyncio.create_task(_async_pregenerate_next_batch(session_id))
287
+
288
+
289
+ async def start_interview(
290
+ user_id: str,
291
+ role_id: str = None,
292
+ custom_role: str = None,
293
+ interview_type: str = "resume",
294
+ topic_id: str = None,
295
+ ) -> dict:
296
  """Start a new interview session."""
297
+ interview_type = (interview_type or "resume").strip().lower()
298
+ if interview_type == "topic":
299
+ if not topic_id:
300
+ raise ValueError("topic_id is required for topic interviews")
301
+ return await _start_topic_interview(user_id=user_id, topic_id=topic_id)
302
+
303
  db = get_db()
304
  redis = get_redis()
305
 
306
  # Get user skills
307
  skills_doc = await db[SKILLS].find_one({"user_id": user_id})
308
+ user_skills = skills_doc.get("skills", ["general"]) if skills_doc else ["general"]
309
+ user_skills = normalize_skill_list(user_skills)
310
 
311
  # Get role
312
  role_title = "Software Developer"
313
+ if custom_role:
314
+ role_title = custom_role
315
+ elif role_id:
316
  from bson import ObjectId
317
+ try:
318
+ role = await db[JOB_ROLES].find_one({"_id": ObjectId(role_id)})
319
+ if role:
320
+ role_title = role["title"]
321
+ except Exception:
322
+ # If it's not a valid ObjectId, assume it's a raw generic title passed from frontend
323
+ role_title = role_id
324
+
325
+ # Compare role requirements with user skills when admin role requirements exist.
326
+ required_skills = []
327
+ if role_id and not custom_role:
328
+ req_cursor = db[ROLE_REQUIREMENTS].find({"role_id": role_id})
329
+ req_docs = await req_cursor.to_list(length=100)
330
+ required_skills = [d.get("skill", "") for d in req_docs if d.get("skill")]
331
+
332
+ matched_role_skills = find_matching_skills(user_skills, required_skills)
333
+ missing_role_skills = find_missing_skills(user_skills, required_skills)
334
+
335
+ # Prioritize matched required skills and compress them into cluster-aware focus areas.
336
+ base_skills_for_interview = matched_role_skills if matched_role_skills else user_skills
337
+ skills_for_interview = build_interview_focus_skills(base_skills_for_interview)
338
+ if not skills_for_interview:
339
+ skills_for_interview = ["general"]
340
 
341
  # Check for existing questions in question bank
342
  bank_questions = []
343
+ if role_id and not custom_role:
344
+ try:
345
+ cursor = db[QUESTIONS].find({"role_id": role_id}).limit(5)
346
+ async for q in cursor:
347
+ bank_questions.append(q["question"])
348
+ except Exception:
349
+ pass
350
+
351
+ # Workflow: generate first batch upfront, store in Redis, serve Q1.
352
+ initial_batch, last_difficulty = await _generate_question_batch(
353
  role_title=role_title,
354
+ skills=skills_for_interview,
355
+ previous_questions=[],
356
+ generated_count=0,
357
+ max_questions=MAX_QUESTIONS,
358
+ current_difficulty="medium",
359
+ local_summary=None,
360
+ batch_size=BATCH_SIZE,
361
  )
362
+ if not initial_batch:
363
+ raise ValueError("Failed to generate initial interview questions")
364
 
365
  session_id = generate_id()
366
+ _LOCAL_SUMMARIES[session_id] = ""
367
 
368
  # Create session in MongoDB
369
  session_doc = {
 
372
  "role_id": role_id,
373
  "role_title": role_title,
374
  "status": "in_progress",
375
+ "interview_type": "resume",
376
  "question_count": 1,
377
  "max_questions": MAX_QUESTIONS,
378
+ "current_difficulty": initial_batch[0].get("difficulty", "medium"),
379
  "started_at": utc_now(),
380
  }
381
  await db[SESSIONS].insert_one(session_doc)
 
384
  session_state = {
385
  "user_id": user_id,
386
  "role_title": role_title,
387
+ "skills": json.dumps(skills_for_interview),
388
+ "user_skills": json.dumps(user_skills),
389
+ "required_skills": json.dumps(normalize_skill_list(required_skills)),
390
+ "matched_skills": json.dumps(matched_role_skills),
391
+ "missing_skills": json.dumps(missing_role_skills),
392
  "question_count": 1,
393
+ "answered_count": 0,
394
+ "served_count": 1,
395
+ "generated_count": len(initial_batch),
396
  "max_questions": MAX_QUESTIONS,
397
+ "current_difficulty": last_difficulty,
398
+ "interview_type": "resume",
399
  "status": "in_progress",
400
  }
401
  await redis.hset(f"session:{session_id}", mapping=session_state)
402
  await redis.expire(f"session:{session_id}", SESSION_TTL)
403
 
404
+ # Store batch in Redis and queue remaining for later serving.
405
+ new_ids = await _append_batch_to_redis(redis, session_id, initial_batch)
406
+ first_id = new_ids[0]
407
+ pending_ids = new_ids[1:]
408
+ if pending_ids:
409
+ await redis.rpush(f"session:{session_id}:pending_questions", *pending_ids)
410
+ await redis.expire(f"session:{session_id}:pending_questions", SESSION_TTL)
411
+
412
+ first_q_data = await redis.hgetall(f"session:{session_id}:q:{first_id}")
 
 
413
 
414
  return {
415
  "session_id": session_id,
416
+ "skill_alignment": {
417
+ "user_skills": user_skills,
418
+ "required_skills": normalize_skill_list(required_skills),
419
+ "matched_skills": matched_role_skills,
420
+ "missing_skills": missing_role_skills,
421
+ "interview_focus": skills_for_interview,
422
+ },
423
  "question": {
424
+ "question_id": first_id,
425
+ "question": first_q_data.get("question", "Tell me about yourself."),
426
+ "difficulty": first_q_data.get("difficulty", "medium"),
427
  "question_number": 1,
428
  "total_questions": MAX_QUESTIONS,
429
  },
430
+ "timer": {
431
+ "enabled": False,
432
+ "seconds": None,
433
+ },
434
  "message": "Interview started. Good luck!",
435
  }
436
 
 
459
  await redis.expire(f"session:{session_id}:answers", SESSION_TTL)
460
 
461
  question_count = int(session.get("question_count", 1))
462
+ answered_count = int(session.get("answered_count", 0)) + 1
463
+ served_count = int(session.get("served_count", 1))
464
+ generated_count = int(session.get("generated_count", 0))
465
  max_questions = int(session.get("max_questions", MAX_QUESTIONS))
466
+ interview_type = session.get("interview_type", "resume")
467
+
468
+ # Update local summary in-memory (requested local summary step).
469
+ current_q = await redis.hgetall(f"session:{session_id}:q:{question_id}")
470
+ _update_local_summary(session_id, current_q.get("question", ""), answer)
471
 
472
  # Check if interview is complete
473
+ if answered_count >= max_questions:
474
  # Mark session as completed
475
+ await redis.hset(
476
+ f"session:{session_id}",
477
+ mapping={"status": "completed", "answered_count": str(answered_count)},
478
+ )
479
  await db[SESSIONS].update_one(
480
  {"session_id": session_id},
481
  {"$set": {"status": "completed", "completed_at": utc_now()}},
 
487
  "message": "Interview complete! Generating your report...",
488
  }
489
 
490
+ # Serve from pending queue first.
491
+ next_question_id = await redis.lpop(f"session:{session_id}:pending_questions")
492
+
493
+ # If queue is empty, generate only for resume interviews.
494
+ if not next_question_id:
495
+ if interview_type == "topic":
496
+ await redis.hset(
497
+ f"session:{session_id}",
498
+ mapping={"status": "completed", "answered_count": str(answered_count)},
499
+ )
500
+ await db[SESSIONS].update_one(
501
+ {"session_id": session_id},
502
+ {"$set": {"status": "completed", "completed_at": utc_now()}},
503
+ )
504
+ return {
505
+ "session_id": session_id,
506
+ "next_question": None,
507
+ "is_complete": True,
508
+ "message": "Interview complete! Generating your report...",
509
+ }
510
+
511
+ previous_questions = await _get_generated_question_texts(redis, session_id)
512
+ skills = _safe_json_list(session.get("skills", "[]"))
513
+ role_title = session.get("role_title", "Software Developer")
514
+
515
+ sync_batch, last_difficulty = await _generate_question_batch(
516
+ role_title=role_title,
517
+ skills=skills,
518
+ previous_questions=previous_questions,
519
+ generated_count=generated_count,
520
+ max_questions=max_questions,
521
+ current_difficulty=session.get("current_difficulty", "medium"),
522
+ local_summary=_LOCAL_SUMMARIES.get(session_id),
523
+ batch_size=BATCH_SIZE,
524
+ )
525
+ new_ids = await _append_batch_to_redis(redis, session_id, sync_batch)
526
+ generated_count += len(new_ids)
527
+ if new_ids:
528
+ next_question_id = new_ids[0]
529
+ if len(new_ids) > 1:
530
+ await redis.rpush(f"session:{session_id}:pending_questions", *new_ids[1:])
531
+ await redis.expire(f"session:{session_id}:pending_questions", SESSION_TTL)
532
+ await redis.hset(
533
+ f"session:{session_id}",
534
+ mapping={
535
+ "generated_count": str(generated_count),
536
+ "current_difficulty": last_difficulty,
537
+ },
538
+ )
539
+
540
+ if not next_question_id:
541
+ raise ValueError("Unable to fetch or generate next question")
542
+
543
+ q_data = await redis.hgetall(f"session:{session_id}:q:{next_question_id}")
544
+ next_difficulty = q_data.get("difficulty", session.get("current_difficulty", "medium"))
545
  new_count = question_count + 1
546
+ new_served_count = served_count + 1
 
 
 
 
 
 
 
 
 
 
547
 
548
  # Update session state
549
  await redis.hset(f"session:{session_id}", mapping={
550
  "question_count": str(new_count),
551
+ "answered_count": str(answered_count),
552
+ "served_count": str(new_served_count),
553
+ "current_difficulty": next_difficulty,
554
  })
555
 
556
+ if interview_type == "resume":
557
+ _schedule_pregen(session_id, answered_count)
558
+
559
  return {
560
  "session_id": session_id,
561
  "next_question": {
562
+ "question_id": next_question_id,
563
+ "question": q_data.get("question", "Can you elaborate further?"),
564
+ "difficulty": q_data.get("difficulty", "medium"),
565
+ "question_number": new_served_count,
566
  "total_questions": max_questions,
567
  },
568
  "is_complete": False,
569
+ "message": f"Question {new_served_count} of {max_questions}",
570
  }
571
 
572
 
573
+ async def quit_interview(session_id: str, user_id: str) -> dict:
574
+ """Mark an interview as quit and indicate whether a partial report can be generated."""
575
+ db = get_db()
576
+ redis = get_redis()
577
+
578
+ session = await db[SESSIONS].find_one({"session_id": session_id})
579
+ if not session:
580
+ raise ValueError("Session not found")
581
+ if session.get("user_id") != user_id:
582
+ raise ValueError("Unauthorized access to session")
583
+
584
+ if session.get("status") in {"completed", "quit", "quit_with_report"}:
585
+ return {
586
+ "session_id": session_id,
587
+ "report_generated": session.get("status") == "quit_with_report",
588
+ "message": "Interview already finalized",
589
+ }
590
+
591
+ quit_at = utc_now()
592
+
593
+ # Update Redis state if still present.
594
+ redis_session_key = f"session:{session_id}"
595
+ redis_session = await redis.hgetall(redis_session_key)
596
+ answered_count = int(redis_session.get("answered_count", 0)) if redis_session else 0
597
+ if redis_session:
598
+ await redis.hset(
599
+ redis_session_key,
600
+ mapping={
601
+ "status": "quit",
602
+ "quit_at": quit_at,
603
+ },
604
+ )
605
+ await redis.expire(redis_session_key, SESSION_TTL)
606
+
607
+ # Persist quit metadata for admin visibility.
608
+ await db[SESSIONS].update_one(
609
+ {"session_id": session_id},
610
+ {
611
+ "$set": {
612
+ "status": "quit",
613
+ "quit_at": quit_at,
614
+ "quit_reason": "user_requested",
615
+ "answered_count": answered_count,
616
+ }
617
+ },
618
+ )
619
+
620
+ has_answers = answered_count > 0
621
+ return {
622
+ "session_id": session_id,
623
+ "report_generated": has_answers,
624
+ "message": "Interview quit successfully" if has_answers else "Interview quit. No answers to evaluate yet.",
625
+ }
626
 
627
 
628
  async def get_session_qa(session_id: str) -> list:
 
645
  })
646
 
647
  return qa_pairs
648
+
649
+
650
+ def cleanup_interview_local_state(session_id: str) -> None:
651
+ """Cleanup process-local state for a completed session."""
652
+ _LOCAL_SUMMARIES.pop(session_id, None)
653
+ _PREGEN_IN_FLIGHT.discard(session_id)
backend/services/resume_service.py CHANGED
@@ -4,6 +4,8 @@ from database import get_db
4
  from models.collections import RESUMES, SKILLS
5
  from utils.helpers import utc_now, str_objectid
6
  from utils.gemini import parse_resume_with_gemini
 
 
7
  from config import get_settings
8
 
9
  settings = get_settings()
@@ -20,12 +22,14 @@ async def upload_and_parse_resume(user_id: str, filename: str, file_content: byt
20
  async with aiofiles.open(file_path, "wb") as f:
21
  await f.write(file_content)
22
 
23
- # Read file text (for parsing)
24
- resume_text = file_content.decode("utf-8", errors="ignore")
25
 
26
  # Parse with Gemini
27
  parsed_data = await parse_resume_with_gemini(resume_text)
28
- skills = parsed_data.get("skills", [])
 
 
29
 
30
  # Upsert resume document
31
  resume_doc = {
@@ -50,6 +54,7 @@ async def upload_and_parse_resume(user_id: str, filename: str, file_content: byt
50
  {"$set": {
51
  "user_id": user_id,
52
  "skills": skills,
 
53
  "updated_at": utc_now(),
54
  }},
55
  upsert=True,
 
4
  from models.collections import RESUMES, SKILLS
5
  from utils.helpers import utc_now, str_objectid
6
  from utils.gemini import parse_resume_with_gemini
7
+ from utils.resume_text import extract_resume_text
8
+ from utils.skills import normalize_skill_list
9
  from config import get_settings
10
 
11
  settings = get_settings()
 
22
  async with aiofiles.open(file_path, "wb") as f:
23
  await f.write(file_content)
24
 
25
+ # Extract readable text by file type before sending to Gemini.
26
+ resume_text = extract_resume_text(filename, file_content)
27
 
28
  # Parse with Gemini
29
  parsed_data = await parse_resume_with_gemini(resume_text)
30
+ raw_skills = parsed_data.get("skills", [])
31
+ skills = normalize_skill_list(raw_skills)
32
+ parsed_data["skills"] = skills
33
 
34
  # Upsert resume document
35
  resume_doc = {
 
54
  {"$set": {
55
  "user_id": user_id,
56
  "skills": skills,
57
+ "raw_skills": raw_skills,
58
  "updated_at": utc_now(),
59
  }},
60
  upsert=True,
backend/utils/gemini.py CHANGED
@@ -1,5 +1,9 @@
1
  from google import genai
2
  from config import get_settings
 
 
 
 
3
 
4
  settings = get_settings()
5
 
@@ -11,6 +15,7 @@ async def call_gemini(prompt: str, system_instruction: str = None) -> str:
11
  config = {}
12
  if system_instruction:
13
  config["system_instruction"] = system_instruction
 
14
 
15
  response = client.models.generate_content(
16
  model=settings.GEMINI_MODEL,
@@ -20,14 +25,62 @@ async def call_gemini(prompt: str, system_instruction: str = None) -> str:
20
  return response.text
21
 
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  async def parse_resume_with_gemini(resume_text: str) -> dict:
24
  """Parse resume text and extract structured data using Gemini."""
25
  prompt = f"""Analyze the following resume and extract structured information.
26
- Return a JSON object with these fields:
27
- - "skills": list of technical and soft skills
28
- - "experience_summary": brief summary of work experience
29
- - "education": list of educational qualifications
30
- - "projects": list of notable projects
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  Resume text:
33
  ---
@@ -37,19 +90,37 @@ Resume text:
37
  Return ONLY valid JSON, no markdown formatting."""
38
 
39
  result = await call_gemini(prompt)
40
- # Clean up markdown code blocks if present
41
- result = result.strip()
42
- if result.startswith("```"):
43
- result = result.split("\n", 1)[1]
44
- if result.endswith("```"):
45
- result = result.rsplit("```", 1)[0]
46
- result = result.strip()
47
-
48
- import json
49
  try:
50
- return json.loads(result)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  except json.JSONDecodeError:
52
- return {"skills": [], "experience_summary": result, "education": [], "projects": []}
 
 
 
 
 
 
 
 
 
 
 
53
 
54
 
55
  async def generate_interview_question(
@@ -58,9 +129,13 @@ async def generate_interview_question(
58
  previous_questions: list = None,
59
  previous_answer: str = None,
60
  difficulty: str = "medium",
 
 
61
  ) -> dict:
62
  """Generate an interview question using Gemini."""
63
- context = f"Role: {role_title}\nCandidate Skills: {', '.join(skills)}\nDifficulty: {difficulty}"
 
 
64
 
65
  if previous_questions:
66
  context += f"\n\nPrevious questions asked (do NOT repeat these):\n"
@@ -71,13 +146,19 @@ async def generate_interview_question(
71
  context += f"\nCandidate's last answer: {previous_answer}"
72
  context += "\nGenerate a follow-up question based on this answer to probe deeper."
73
 
74
- prompt = f"""{context}
 
75
 
76
  Generate ONE interview question for this candidate. The question should:
77
  1. Be relevant to the role and candidate's skills
78
  2. Match the {difficulty} difficulty level
79
  3. Be clear and specific
80
  4. Test practical knowledge
 
 
 
 
 
81
 
82
  Return ONLY a JSON object with:
83
  - "question": the interview question text
@@ -85,16 +166,10 @@ Return ONLY a JSON object with:
85
  - "category": the skill category this tests
86
 
87
  Return ONLY valid JSON, no markdown formatting."""
 
 
88
 
89
- result = await call_gemini(prompt)
90
- result = result.strip()
91
- if result.startswith("```"):
92
- result = result.split("\n", 1)[1]
93
- if result.endswith("```"):
94
- result = result.rsplit("```", 1)[0]
95
- result = result.strip()
96
-
97
- import json
98
  try:
99
  return json.loads(result)
100
  except json.JSONDecodeError:
@@ -111,7 +186,8 @@ async def evaluate_interview(questions_and_answers: list, role_title: str) -> di
111
  for i, qa in enumerate(questions_and_answers, 1):
112
  qa_text += f"\nQ{i}: {qa['question']}\nA{i}: {qa['answer']}\n"
113
 
114
- prompt = f"""You are an expert technical interviewer evaluating a candidate for the role: {role_title}
 
115
 
116
  Here are the interview questions and the candidate's answers:
117
  {qa_text}
@@ -128,16 +204,10 @@ Evaluate the candidate and return a JSON object with:
128
  - "recommendations": list of 3-5 actionable recommendations
129
 
130
  Be fair but thorough. Return ONLY valid JSON, no markdown formatting."""
 
 
131
 
132
- result = await call_gemini(prompt)
133
- result = result.strip()
134
- if result.startswith("```"):
135
- result = result.split("\n", 1)[1]
136
- if result.endswith("```"):
137
- result = result.rsplit("```", 1)[0]
138
- result = result.strip()
139
-
140
- import json
141
  try:
142
  return json.loads(result)
143
  except json.JSONDecodeError:
 
1
  from google import genai
2
  from config import get_settings
3
+ from utils.skills import normalize_skill_list
4
+ import json
5
+ import re
6
+ from langchain_core.prompts import PromptTemplate
7
 
8
  settings = get_settings()
9
 
 
15
  config = {}
16
  if system_instruction:
17
  config["system_instruction"] = system_instruction
18
+ config["response_mime_type"] = "application/json"
19
 
20
  response = client.models.generate_content(
21
  model=settings.GEMINI_MODEL,
 
25
  return response.text
26
 
27
 
28
+ def _extract_json_object(text: str) -> str:
29
+ value = (text or "").strip()
30
+ if value.startswith("```"):
31
+ value = value.split("\n", 1)[1]
32
+ if value.endswith("```"):
33
+ value = value.rsplit("```", 1)[0]
34
+ value = value.strip()
35
+
36
+ if value.startswith("{") and value.endswith("}"):
37
+ return value
38
+
39
+ # Fallback when model wraps JSON with extra text.
40
+ start = value.find("{")
41
+ end = value.rfind("}")
42
+ if start != -1 and end != -1 and end > start:
43
+ return value[start:end + 1]
44
+
45
+ return value
46
+
47
+
48
+ def _fallback_skill_scan(resume_text: str) -> list:
49
+ common = [
50
+ "python", "java", "javascript", "typescript", "react", "next.js", "node.js",
51
+ "fastapi", "django", "flask", "spring", "mongodb", "postgresql", "mysql",
52
+ "redis", "docker", "kubernetes", "aws", "gcp", "azure", "git", "linux",
53
+ "rest api", "graphql", "machine learning", "data analysis", "sql",
54
+ ]
55
+ text = (resume_text or "").lower()
56
+ found = []
57
+ for skill in common:
58
+ pattern = r"\b" + re.escape(skill.lower()) + r"\b"
59
+ if re.search(pattern, text):
60
+ found.append(skill)
61
+ return normalize_skill_list(found)
62
+
63
+
64
  async def parse_resume_with_gemini(resume_text: str) -> dict:
65
  """Parse resume text and extract structured data using Gemini."""
66
  prompt = f"""Analyze the following resume and extract structured information.
67
+ CRITICAL INSTRUCTION FOR SKILLS:
68
+ 1) Extract concrete tools/technologies/frameworks/languages from the resume text.
69
+ 2) Exclude vague traits such as "hardworking", "leadership", "problem solving", "communication".
70
+ 3) If a line contains multiple skills (comma-separated), split them into separate list items.
71
+ 4) Do NOT add skills that are not present in the resume.
72
+
73
+ Return a JSON object with these exact fields:
74
+ - "name": full name of the candidate (string or null)
75
+ - "email": candidate's email address (string or null)
76
+ - "phone": candidate's phone number (string or null)
77
+ - "location": candidate's location/address (string or null)
78
+ - "skills": list of technical and soft skills verbatim from the text (array of strings)
79
+ - "recommended_roles": list of 3-5 recommended job role titles the user is qualified for based on these skills (array of strings)
80
+ - "experience_summary": brief summary of work experience (string)
81
+ - "experience": list of dictionaries, each with "company", "role", "duration", and "description"
82
+ - "education": list of dictionaries, each with "institution", "degree", "graduation_year"
83
+ - "projects": list of dictionaries, each with "name" and "description"
84
 
85
  Resume text:
86
  ---
 
90
  Return ONLY valid JSON, no markdown formatting."""
91
 
92
  result = await call_gemini(prompt)
93
+ result = _extract_json_object(result)
94
+
 
 
 
 
 
 
 
95
  try:
96
+ parsed = json.loads(result)
97
+ parsed.setdefault("name", None)
98
+ parsed.setdefault("email", None)
99
+ parsed.setdefault("phone", None)
100
+ parsed.setdefault("location", None)
101
+ parsed.setdefault("recommended_roles", [])
102
+ parsed.setdefault("experience_summary", "")
103
+ parsed.setdefault("experience", [])
104
+ parsed.setdefault("education", [])
105
+ parsed.setdefault("projects", [])
106
+
107
+ parsed["skills"] = normalize_skill_list(parsed.get("skills", []))
108
+ if not parsed["skills"]:
109
+ parsed["skills"] = _fallback_skill_scan(resume_text)
110
+ return parsed
111
  except json.JSONDecodeError:
112
+ return {
113
+ "name": None,
114
+ "email": None,
115
+ "phone": None,
116
+ "location": None,
117
+ "skills": _fallback_skill_scan(resume_text),
118
+ "recommended_roles": [],
119
+ "experience_summary": result,
120
+ "experience": [],
121
+ "education": [],
122
+ "projects": []
123
+ }
124
 
125
 
126
  async def generate_interview_question(
 
129
  previous_questions: list = None,
130
  previous_answer: str = None,
131
  difficulty: str = "medium",
132
+ question_stage: str = "deep",
133
+ foundation_limit: int = 3,
134
  ) -> dict:
135
  """Generate an interview question using Gemini."""
136
+ context = f"Role: {role_title}\nCandidate Skill Focus Areas: {', '.join(skills)}\nDifficulty: {difficulty}"
137
+ context += f"\nCurrent Stage: {question_stage}"
138
+ context += f"\nFoundation Question Limit: {foundation_limit}"
139
 
140
  if previous_questions:
141
  context += f"\n\nPrevious questions asked (do NOT repeat these):\n"
 
146
  context += f"\nCandidate's last answer: {previous_answer}"
147
  context += "\nGenerate a follow-up question based on this answer to probe deeper."
148
 
149
+ prompt_template = PromptTemplate.from_template(
150
+ """{context}
151
 
152
  Generate ONE interview question for this candidate. The question should:
153
  1. Be relevant to the role and candidate's skills
154
  2. Match the {difficulty} difficulty level
155
  3. Be clear and specific
156
  4. Test practical knowledge
157
+ 5. If a skill is a cluster label like "Deep Learning (CNN, LSTM)", pick one member skill from that cluster and ask a concrete question on it
158
+ 6. Rotate topics to avoid repeatedly asking from the same cluster
159
+ 7. If Current Stage is "foundation": ask only core/fundamental basics
160
+ 8. If Current Stage is "deep": DO NOT ask basic definition/foundation questions; ask applied, scenario-based, debugging, optimization, or trade-off questions only
161
+ 9. Treat Foundation Question Limit as a strict cap: once foundation stage is done, never return to foundation-style prompts
162
 
163
  Return ONLY a JSON object with:
164
  - "question": the interview question text
 
166
  - "category": the skill category this tests
167
 
168
  Return ONLY valid JSON, no markdown formatting."""
169
+ )
170
+ prompt = prompt_template.format(context=context, difficulty=difficulty)
171
 
172
+ result = _extract_json_object(await call_gemini(prompt))
 
 
 
 
 
 
 
 
173
  try:
174
  return json.loads(result)
175
  except json.JSONDecodeError:
 
186
  for i, qa in enumerate(questions_and_answers, 1):
187
  qa_text += f"\nQ{i}: {qa['question']}\nA{i}: {qa['answer']}\n"
188
 
189
+ prompt_template = PromptTemplate.from_template(
190
+ """You are an expert technical interviewer evaluating a candidate for the role: {role_title}
191
 
192
  Here are the interview questions and the candidate's answers:
193
  {qa_text}
 
204
  - "recommendations": list of 3-5 actionable recommendations
205
 
206
  Be fair but thorough. Return ONLY valid JSON, no markdown formatting."""
207
+ )
208
+ prompt = prompt_template.format(role_title=role_title, qa_text=qa_text)
209
 
210
+ result = _extract_json_object(await call_gemini(prompt))
 
 
 
 
 
 
 
 
211
  try:
212
  return json.loads(result)
213
  except json.JSONDecodeError:
backend/utils/resume_text.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+
3
+
4
+ def _extract_pdf_text(file_content: bytes) -> str:
5
+ from pypdf import PdfReader
6
+
7
+ reader = PdfReader(io.BytesIO(file_content))
8
+ pages = []
9
+ for page in reader.pages:
10
+ pages.append(page.extract_text() or "")
11
+ return "\n".join(pages)
12
+
13
+
14
+ def _extract_docx_text(file_content: bytes) -> str:
15
+ from docx import Document
16
+
17
+ doc = Document(io.BytesIO(file_content))
18
+ paragraphs = [p.text for p in doc.paragraphs if p.text and p.text.strip()]
19
+ return "\n".join(paragraphs)
20
+
21
+
22
+ def extract_resume_text(filename: str, file_content: bytes) -> str:
23
+ ext = (filename or "").lower().rsplit(".", 1)
24
+ ext = f".{ext[-1]}" if len(ext) > 1 else ""
25
+
26
+ if ext == ".pdf":
27
+ text = _extract_pdf_text(file_content)
28
+ elif ext == ".docx":
29
+ text = _extract_docx_text(file_content)
30
+ else:
31
+ # Fallback path for txt/doc and unknown formats.
32
+ text = file_content.decode("utf-8", errors="ignore")
33
+
34
+ cleaned = text.replace("\x00", " ")
35
+ cleaned = "\n".join(line.strip() for line in cleaned.splitlines() if line.strip())
36
+ return cleaned
backend/utils/skills.py ADDED
@@ -0,0 +1,288 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from typing import Iterable, List, Set
3
+
4
+
5
+ _SKILL_ALIASES = {
6
+ "node": "Node.js",
7
+ "nodejs": "Node.js",
8
+ "node.js": "Node.js",
9
+ "express": "Express.js",
10
+ "expressjs": "Express.js",
11
+ "express.js": "Express.js",
12
+ "react": "React",
13
+ "reactjs": "React",
14
+ "react.js": "React",
15
+ "next": "Next.js",
16
+ "nextjs": "Next.js",
17
+ "next.js": "Next.js",
18
+ "js": "JavaScript",
19
+ "javascript": "JavaScript",
20
+ "ts": "TypeScript",
21
+ "typescript": "TypeScript",
22
+ "py": "Python",
23
+ "mongo": "MongoDB",
24
+ "mongodb": "MongoDB",
25
+ "postgres": "PostgreSQL",
26
+ "postgresql": "PostgreSQL",
27
+ "mysql": "MySQL",
28
+ "aws": "AWS",
29
+ "gcp": "GCP",
30
+ "azure": "Azure",
31
+ "ci/cd": "CI/CD",
32
+ "ci cd": "CI/CD",
33
+ "rest": "REST API",
34
+ "rest api": "REST API",
35
+ "fastapi": "FastAPI",
36
+ "langchain": "LangChain",
37
+ "langgraph": "LangGraph",
38
+ "langsmith": "LangSmith",
39
+ "rag": "RAG",
40
+ "rag pipeline": "RAG Pipelines",
41
+ "rag pipelines": "RAG Pipelines",
42
+ "chromadb": "ChromaDB",
43
+ "scikit learn": "Scikit-learn",
44
+ "scikit-learn": "Scikit-learn",
45
+ "pytorch": "PyTorch",
46
+ "llama": "Llama",
47
+ "llama 4": "Llama 4",
48
+ "gemini api": "Gemini API",
49
+ "sentence transformers": "Sentence Transformers",
50
+ "e5 multilingual embeddings": "E5 Multilingual Embeddings",
51
+ "cnn": "CNN",
52
+ "cnns": "CNN",
53
+ "rnn": "RNN",
54
+ "rnns": "RNN",
55
+ "gan": "GAN",
56
+ "gans": "GAN",
57
+ "bert": "BERT",
58
+ "bert fine tuning": "BERT Fine-tuning",
59
+ "ocr": "OCR",
60
+ "ocr based extraction": "OCR Based Extraction",
61
+ "k means": "K-Means",
62
+ "cross validation": "Cross-validation",
63
+ "oop": "OOP",
64
+ "ml": "Machine Learning",
65
+ }
66
+
67
+
68
+ _SKILL_CLUSTER_RULES = [
69
+ (
70
+ "Deep Learning",
71
+ ["cnn", "rnn", "lstm", "gru", "gan", "transformers", "bert", "pytorch", "tensorflow", "encoder decoder"],
72
+ ),
73
+ (
74
+ "Machine Learning",
75
+ [
76
+ "machine learning",
77
+ "random forest",
78
+ "svm",
79
+ "logistic regression",
80
+ "linear regression",
81
+ "k means",
82
+ "model evaluation",
83
+ "cross validation",
84
+ "scikit learn",
85
+ ],
86
+ ),
87
+ (
88
+ "LLM and GenAI",
89
+ [
90
+ "langchain",
91
+ "langgraph",
92
+ "langsmith",
93
+ "prompt engineering",
94
+ "rag",
95
+ "rag pipeline",
96
+ "rag pipelines",
97
+ "semantic search",
98
+ "gemini api",
99
+ "llama",
100
+ "embedding models",
101
+ "e5 multilingual embeddings",
102
+ "sentence transformers",
103
+ ],
104
+ ),
105
+ (
106
+ "Data and Databases",
107
+ ["sql", "mysql", "postgresql", "mongodb", "pinecone", "chromadb", "vector similarity search"],
108
+ ),
109
+ (
110
+ "Backend and APIs",
111
+ ["python", "java", "javascript", "typescript", "fastapi", "django", "flask", "node", "express", "rest api"],
112
+ ),
113
+ (
114
+ "Cloud and DevOps",
115
+ ["docker", "kubernetes", "aws", "gcp", "azure", "git", "github", "ci cd"],
116
+ ),
117
+ (
118
+ "Document AI and OCR",
119
+ ["ocr", "ocr based extraction", "document extraction"],
120
+ ),
121
+ ]
122
+
123
+
124
+ def _normalize_key(value: str) -> str:
125
+ value = value.strip().lower()
126
+ value = re.sub(r"[\u2010-\u2015]", "-", value)
127
+ value = value.replace("&", " and ")
128
+ value = re.sub(r"[^a-z0-9+#.\-/ ]+", " ", value)
129
+ value = value.replace("/", " ")
130
+ value = value.replace("-", " ")
131
+ value = re.sub(r"\s+", " ", value).strip()
132
+ return value
133
+
134
+
135
+ def canonicalize_skill(skill: str) -> str:
136
+ if not isinstance(skill, str):
137
+ return ""
138
+
139
+ cleaned = skill.strip()
140
+ if not cleaned:
141
+ return ""
142
+
143
+ normalized = _normalize_key(cleaned)
144
+ if normalized in _SKILL_ALIASES:
145
+ return _SKILL_ALIASES[normalized]
146
+
147
+ # Keep all-caps acronyms readable (e.g., SQL, API, OOP).
148
+ if cleaned.isupper() and len(cleaned) <= 6:
149
+ return cleaned
150
+
151
+ return " ".join(part.capitalize() for part in normalized.split(" "))
152
+
153
+
154
+ def _split_skill_chunks(skill: str) -> List[str]:
155
+ if not isinstance(skill, str):
156
+ return []
157
+
158
+ parts = re.split(r",|\||;", skill)
159
+ chunks = []
160
+ for part in parts:
161
+ candidate = part.strip()
162
+ if not candidate:
163
+ continue
164
+ chunks.append(candidate)
165
+ return chunks
166
+
167
+
168
+ def normalize_skill_list(skills: Iterable[str], limit: int = 80) -> List[str]:
169
+ unique: List[str] = []
170
+ seen: Set[str] = set()
171
+
172
+ for raw in skills or []:
173
+ for token in _split_skill_chunks(raw):
174
+ canon = canonicalize_skill(token)
175
+ if not canon:
176
+ continue
177
+ key = _normalize_key(canon)
178
+ if key in seen:
179
+ continue
180
+ seen.add(key)
181
+ unique.append(canon)
182
+ if len(unique) >= limit:
183
+ return unique
184
+
185
+ return unique
186
+
187
+
188
+ def _classify_cluster(skill: str) -> str | None:
189
+ key = _normalize_key(skill)
190
+ if not key:
191
+ return None
192
+
193
+ for cluster_name, rules in _SKILL_CLUSTER_RULES:
194
+ for rule in rules:
195
+ if rule in key or key in rule:
196
+ return cluster_name
197
+ return None
198
+
199
+
200
+ def cluster_skills(skills: Iterable[str], max_members_per_cluster: int = 4) -> List[dict]:
201
+ """Return grouped skills with compact labels for UI and prompting."""
202
+ normalized = normalize_skill_list(skills)
203
+ grouped: dict[str, list[str]] = {}
204
+
205
+ for skill in normalized:
206
+ cluster_name = _classify_cluster(skill)
207
+ if not cluster_name:
208
+ continue
209
+ grouped.setdefault(cluster_name, [])
210
+ if skill not in grouped[cluster_name]:
211
+ grouped[cluster_name].append(skill)
212
+
213
+ # Prefer denser clusters first for cleaner UX.
214
+ ordered = sorted(grouped.items(), key=lambda item: len(item[1]), reverse=True)
215
+
216
+ result = []
217
+ for cluster_name, members in ordered:
218
+ sampled = members[:max_members_per_cluster]
219
+ label = f"{cluster_name} ({', '.join(sampled)})"
220
+ result.append(
221
+ {
222
+ "cluster": cluster_name,
223
+ "members": members,
224
+ "label": label,
225
+ "count": len(members),
226
+ }
227
+ )
228
+
229
+ return result
230
+
231
+
232
+ def build_interview_focus_skills(skills: Iterable[str], max_clusters: int = 6, max_extras: int = 2) -> List[str]:
233
+ """Build a compact, cluster-aware skill list for interview question generation."""
234
+ normalized = normalize_skill_list(skills)
235
+ grouped = cluster_skills(normalized)
236
+
237
+ focus = [g["label"] for g in grouped[:max_clusters]]
238
+
239
+ # Add a couple of non-clustered items so niche tools are not ignored.
240
+ extras = []
241
+ clustered_members = {m for g in grouped for m in g["members"]}
242
+ for skill in normalized:
243
+ if skill in clustered_members:
244
+ continue
245
+ extras.append(skill)
246
+ if len(extras) >= max_extras:
247
+ break
248
+
249
+ combined = focus + extras
250
+ return combined if combined else normalized[: max_clusters + max_extras]
251
+
252
+
253
+ def skill_match(candidate_skill: str, required_skill: str) -> bool:
254
+ c_key = _normalize_key(canonicalize_skill(candidate_skill))
255
+ r_key = _normalize_key(canonicalize_skill(required_skill))
256
+ if not c_key or not r_key:
257
+ return False
258
+ if c_key == r_key:
259
+ return True
260
+
261
+ # Soft phrase matching for related forms like "rest api" vs "restful api".
262
+ if c_key in r_key or r_key in c_key:
263
+ return True
264
+
265
+ return False
266
+
267
+
268
+ def find_matching_skills(candidate_skills: Iterable[str], required_skills: Iterable[str]) -> List[str]:
269
+ matched: List[str] = []
270
+ for req in required_skills or []:
271
+ for cand in candidate_skills or []:
272
+ if skill_match(cand, req):
273
+ matched.append(canonicalize_skill(req))
274
+ break
275
+ return normalize_skill_list(matched)
276
+
277
+
278
+ def find_missing_skills(candidate_skills: Iterable[str], required_skills: Iterable[str]) -> List[str]:
279
+ missing: List[str] = []
280
+ for req in required_skills or []:
281
+ has_match = False
282
+ for cand in candidate_skills or []:
283
+ if skill_match(cand, req):
284
+ has_match = True
285
+ break
286
+ if not has_match:
287
+ missing.append(canonicalize_skill(req))
288
+ return normalize_skill_list(missing)