Spaces:
Sleeping
Sleeping
Commit ·
5837391
1
Parent(s): be9a4dd
v3.1
Browse filesupdated the flow working, and implemented gpu for tts and stt
- backend/database.py +3 -0
- backend/routers/interview.py +80 -0
- backend/routers/speech.py +53 -8
- backend/services/evaluation_service.py +4 -0
- backend/services/gemini_service.py +281 -0
- backend/services/interview_service.py +1064 -298
- backend/services/latency_service.py +179 -0
- backend/services/queue_service.py +194 -0
- backend/services/stt_service.py +96 -9
- backend/services/tts_service.py +67 -9
- backend/utils/gemini.py +188 -10
- resume-jd-verification-2026-04-10T05-15-44-248Z.pdf +646 -0
backend/database.py
CHANGED
|
@@ -27,7 +27,10 @@ async def connect_db():
|
|
| 27 |
await db.sessions.create_index("user_id")
|
| 28 |
await db.results.create_index("session_id")
|
| 29 |
await db.results.create_index("user_id")
|
|
|
|
|
|
|
| 30 |
await db.questions.create_index("role_id")
|
|
|
|
| 31 |
|
| 32 |
# Redis
|
| 33 |
redis_client = aioredis.from_url(
|
|
|
|
| 27 |
await db.sessions.create_index("user_id")
|
| 28 |
await db.results.create_index("session_id")
|
| 29 |
await db.results.create_index("user_id")
|
| 30 |
+
await db.answers.create_index("user_id")
|
| 31 |
+
await db.answers.create_index("session_id")
|
| 32 |
await db.questions.create_index("role_id")
|
| 33 |
+
await db.jd_verifications.create_index([("user_id", 1), ("cache_key", 1)])
|
| 34 |
|
| 35 |
# Redis
|
| 36 |
redis_client = aioredis.from_url(
|
backend/routers/interview.py
CHANGED
|
@@ -12,9 +12,11 @@ from services.interview_service import (
|
|
| 12 |
start_interview,
|
| 13 |
verify_resume_job_description,
|
| 14 |
submit_answer,
|
|
|
|
| 15 |
quit_interview,
|
| 16 |
)
|
| 17 |
from services.evaluation_service import generate_report
|
|
|
|
| 18 |
|
| 19 |
router = APIRouter()
|
| 20 |
|
|
@@ -41,6 +43,28 @@ async def start_interview_endpoint(
|
|
| 41 |
raise HTTPException(status_code=500, detail=str(e))
|
| 42 |
|
| 43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
@router.post("/verify")
|
| 45 |
async def verify_resume_job_description_endpoint(
|
| 46 |
request: VerifyResumeJdRequest,
|
|
@@ -80,6 +104,43 @@ async def submit_answer_endpoint(
|
|
| 80 |
raise HTTPException(status_code=500, detail=str(e))
|
| 81 |
|
| 82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
@router.post("/quit")
|
| 84 |
async def quit_interview_endpoint(
|
| 85 |
request: QuitInterviewRequest,
|
|
@@ -111,6 +172,25 @@ async def quit_interview_endpoint(
|
|
| 111 |
raise HTTPException(status_code=500, detail=str(e))
|
| 112 |
|
| 113 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
@router.get("/report")
|
| 115 |
async def get_interview_report(
|
| 116 |
session_id: str,
|
|
|
|
| 12 |
start_interview,
|
| 13 |
verify_resume_job_description,
|
| 14 |
submit_answer,
|
| 15 |
+
get_next_question,
|
| 16 |
quit_interview,
|
| 17 |
)
|
| 18 |
from services.evaluation_service import generate_report
|
| 19 |
+
from services.latency_service import get_latency_metrics, reset_latency_metrics
|
| 20 |
|
| 21 |
router = APIRouter()
|
| 22 |
|
|
|
|
| 43 |
raise HTTPException(status_code=500, detail=str(e))
|
| 44 |
|
| 45 |
|
| 46 |
+
@router.post("/start_interview")
|
| 47 |
+
async def start_interview_compat_endpoint(
|
| 48 |
+
request: StartInterviewRequest,
|
| 49 |
+
current_user: dict = Depends(get_current_user),
|
| 50 |
+
):
|
| 51 |
+
"""Compatibility endpoint aligned with alternate API naming."""
|
| 52 |
+
try:
|
| 53 |
+
result = await start_interview(
|
| 54 |
+
user_id=current_user["user_id"],
|
| 55 |
+
role_id=request.role_id,
|
| 56 |
+
custom_role=request.custom_role,
|
| 57 |
+
interview_type=request.interview_type,
|
| 58 |
+
topic_id=request.topic_id,
|
| 59 |
+
job_description_id=request.job_description_id,
|
| 60 |
+
)
|
| 61 |
+
return result
|
| 62 |
+
except ValueError as e:
|
| 63 |
+
raise HTTPException(status_code=400, detail=str(e))
|
| 64 |
+
except Exception as e:
|
| 65 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 66 |
+
|
| 67 |
+
|
| 68 |
@router.post("/verify")
|
| 69 |
async def verify_resume_job_description_endpoint(
|
| 70 |
request: VerifyResumeJdRequest,
|
|
|
|
| 104 |
raise HTTPException(status_code=500, detail=str(e))
|
| 105 |
|
| 106 |
|
| 107 |
+
@router.post("/submit_answer")
|
| 108 |
+
async def submit_answer_compat_endpoint(
|
| 109 |
+
request: SubmitAnswerRequest,
|
| 110 |
+
current_user: dict = Depends(get_current_user),
|
| 111 |
+
):
|
| 112 |
+
"""Compatibility endpoint aligned with alternate API naming."""
|
| 113 |
+
try:
|
| 114 |
+
result = await submit_answer(
|
| 115 |
+
session_id=request.session_id,
|
| 116 |
+
question_id=request.question_id,
|
| 117 |
+
answer=request.answer,
|
| 118 |
+
)
|
| 119 |
+
return result
|
| 120 |
+
except ValueError as e:
|
| 121 |
+
raise HTTPException(status_code=400, detail=str(e))
|
| 122 |
+
except Exception as e:
|
| 123 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
@router.get("/next_question")
|
| 127 |
+
async def get_next_question_endpoint(
|
| 128 |
+
session_id: str,
|
| 129 |
+
current_user: dict = Depends(get_current_user),
|
| 130 |
+
):
|
| 131 |
+
"""Preview next queued question without modifying answer state."""
|
| 132 |
+
try:
|
| 133 |
+
result = await get_next_question(
|
| 134 |
+
session_id=session_id,
|
| 135 |
+
user_id=current_user["user_id"],
|
| 136 |
+
)
|
| 137 |
+
return result
|
| 138 |
+
except ValueError as e:
|
| 139 |
+
raise HTTPException(status_code=400, detail=str(e))
|
| 140 |
+
except Exception as e:
|
| 141 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 142 |
+
|
| 143 |
+
|
| 144 |
@router.post("/quit")
|
| 145 |
async def quit_interview_endpoint(
|
| 146 |
request: QuitInterviewRequest,
|
|
|
|
| 172 |
raise HTTPException(status_code=500, detail=str(e))
|
| 173 |
|
| 174 |
|
| 175 |
+
@router.get("/latency")
|
| 176 |
+
async def interview_latency_metrics(
|
| 177 |
+
sample_size: int = 500,
|
| 178 |
+
current_user: dict = Depends(get_current_user),
|
| 179 |
+
):
|
| 180 |
+
"""Get STT/submit/Gemini latency metrics with p50 and p95."""
|
| 181 |
+
_ = current_user
|
| 182 |
+
return await get_latency_metrics(sample_size=sample_size)
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
@router.post("/latency/reset")
|
| 186 |
+
async def reset_interview_latency_metrics(
|
| 187 |
+
current_user: dict = Depends(get_current_user),
|
| 188 |
+
):
|
| 189 |
+
"""Reset latency metric samples to start a fresh before/after comparison."""
|
| 190 |
+
_ = current_user
|
| 191 |
+
return await reset_latency_metrics()
|
| 192 |
+
|
| 193 |
+
|
| 194 |
@router.get("/report")
|
| 195 |
async def get_interview_report(
|
| 196 |
session_id: str,
|
backend/routers/speech.py
CHANGED
|
@@ -1,10 +1,12 @@
|
|
| 1 |
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form
|
| 2 |
from fastapi.responses import Response
|
| 3 |
from pydantic import BaseModel
|
|
|
|
| 4 |
|
| 5 |
from auth.jwt import get_current_user
|
| 6 |
-
from services.tts_service import synthesize_wav, warmup_xtts_model
|
| 7 |
from services.stt_service import transcribe_audio_bytes, warmup_whisper_model
|
|
|
|
| 8 |
|
| 9 |
router = APIRouter()
|
| 10 |
|
|
@@ -17,15 +19,34 @@ class SpeechSynthesisRequest(BaseModel):
|
|
| 17 |
@router.get("/health")
|
| 18 |
async def speech_health(current_user: dict = Depends(get_current_user)):
|
| 19 |
"""Check whether speech route is available for authenticated users."""
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
|
| 23 |
@router.post("/warmup")
|
| 24 |
async def speech_warmup(current_user: dict = Depends(get_current_user)):
|
| 25 |
"""Warm XTTS model so first interview playback does not hit cold-start delay."""
|
| 26 |
-
|
|
|
|
| 27 |
await warmup_whisper_model()
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
|
| 31 |
@router.post("/synthesize")
|
|
@@ -42,13 +63,34 @@ async def synthesize_speech(
|
|
| 42 |
except RuntimeError as e:
|
| 43 |
# XTTS may be in cold-start transition; warm once and retry before failing.
|
| 44 |
try:
|
| 45 |
-
await warmup_xtts_model()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
wav_bytes = await synthesize_wav(request.text, request.voice_gender)
|
| 47 |
return Response(content=wav_bytes, media_type="audio/wav")
|
| 48 |
-
except
|
|
|
|
|
|
|
| 49 |
raise HTTPException(status_code=503, detail=str(e))
|
| 50 |
except Exception as e:
|
| 51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
|
| 54 |
@router.post("/transcribe")
|
|
@@ -58,6 +100,7 @@ async def transcribe_speech(
|
|
| 58 |
current_user: dict = Depends(get_current_user),
|
| 59 |
):
|
| 60 |
"""Transcribe uploaded interview audio using Whisper model."""
|
|
|
|
| 61 |
try:
|
| 62 |
payload = await audio.read()
|
| 63 |
text = await transcribe_audio_bytes(
|
|
@@ -65,7 +108,9 @@ async def transcribe_speech(
|
|
| 65 |
filename=audio.filename or "speech.webm",
|
| 66 |
language=language,
|
| 67 |
)
|
| 68 |
-
|
|
|
|
|
|
|
| 69 |
except ValueError as e:
|
| 70 |
raise HTTPException(status_code=400, detail=str(e))
|
| 71 |
except RuntimeError as e:
|
|
|
|
| 1 |
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form
|
| 2 |
from fastapi.responses import Response
|
| 3 |
from pydantic import BaseModel
|
| 4 |
+
from time import perf_counter
|
| 5 |
|
| 6 |
from auth.jwt import get_current_user
|
| 7 |
+
from services.tts_service import synthesize_wav, warmup_xtts_model, get_xtts_warmup_state
|
| 8 |
from services.stt_service import transcribe_audio_bytes, warmup_whisper_model
|
| 9 |
+
from services.latency_service import record_latency
|
| 10 |
|
| 11 |
router = APIRouter()
|
| 12 |
|
|
|
|
| 19 |
@router.get("/health")
|
| 20 |
async def speech_health(current_user: dict = Depends(get_current_user)):
|
| 21 |
"""Check whether speech route is available for authenticated users."""
|
| 22 |
+
_ = current_user
|
| 23 |
+
state = get_xtts_warmup_state()
|
| 24 |
+
return {
|
| 25 |
+
"status": "ok",
|
| 26 |
+
"service": "speech",
|
| 27 |
+
"xtts_ready": bool(state.get("is_warm")),
|
| 28 |
+
}
|
| 29 |
|
| 30 |
|
| 31 |
@router.post("/warmup")
|
| 32 |
async def speech_warmup(current_user: dict = Depends(get_current_user)):
|
| 33 |
"""Warm XTTS model so first interview playback does not hit cold-start delay."""
|
| 34 |
+
_ = current_user
|
| 35 |
+
xtts_ready = await warmup_xtts_model()
|
| 36 |
await warmup_whisper_model()
|
| 37 |
+
|
| 38 |
+
state = get_xtts_warmup_state()
|
| 39 |
+
if not xtts_ready:
|
| 40 |
+
raise HTTPException(
|
| 41 |
+
status_code=503,
|
| 42 |
+
detail=f"XTTS warmup failed: {state.get('last_error') or 'unknown error'}",
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
return {
|
| 46 |
+
"status": "ok",
|
| 47 |
+
"message": "speech model warmed",
|
| 48 |
+
"xtts_ready": True,
|
| 49 |
+
}
|
| 50 |
|
| 51 |
|
| 52 |
@router.post("/synthesize")
|
|
|
|
| 63 |
except RuntimeError as e:
|
| 64 |
# XTTS may be in cold-start transition; warm once and retry before failing.
|
| 65 |
try:
|
| 66 |
+
xtts_ready = await warmup_xtts_model()
|
| 67 |
+
if not xtts_ready:
|
| 68 |
+
state = get_xtts_warmup_state()
|
| 69 |
+
raise HTTPException(
|
| 70 |
+
status_code=503,
|
| 71 |
+
detail=f"XTTS warmup failed: {state.get('last_error') or str(e)}",
|
| 72 |
+
)
|
| 73 |
wav_bytes = await synthesize_wav(request.text, request.voice_gender)
|
| 74 |
return Response(content=wav_bytes, media_type="audio/wav")
|
| 75 |
+
except HTTPException:
|
| 76 |
+
raise
|
| 77 |
+
except Exception:
|
| 78 |
raise HTTPException(status_code=503, detail=str(e))
|
| 79 |
except Exception as e:
|
| 80 |
+
# Retry once after explicit warmup even for non-RuntimeError failures.
|
| 81 |
+
try:
|
| 82 |
+
xtts_ready = await warmup_xtts_model()
|
| 83 |
+
if xtts_ready:
|
| 84 |
+
wav_bytes = await synthesize_wav(request.text, request.voice_gender)
|
| 85 |
+
return Response(content=wav_bytes, media_type="audio/wav")
|
| 86 |
+
except Exception:
|
| 87 |
+
pass
|
| 88 |
+
|
| 89 |
+
state = get_xtts_warmup_state()
|
| 90 |
+
raise HTTPException(
|
| 91 |
+
status_code=503,
|
| 92 |
+
detail=f"Speech synthesis backend unavailable: {state.get('last_error') or str(e)}",
|
| 93 |
+
)
|
| 94 |
|
| 95 |
|
| 96 |
@router.post("/transcribe")
|
|
|
|
| 100 |
current_user: dict = Depends(get_current_user),
|
| 101 |
):
|
| 102 |
"""Transcribe uploaded interview audio using Whisper model."""
|
| 103 |
+
started_at = perf_counter()
|
| 104 |
try:
|
| 105 |
payload = await audio.read()
|
| 106 |
text = await transcribe_audio_bytes(
|
|
|
|
| 108 |
filename=audio.filename or "speech.webm",
|
| 109 |
language=language,
|
| 110 |
)
|
| 111 |
+
elapsed_ms = (perf_counter() - started_at) * 1000.0
|
| 112 |
+
await record_latency("stt_ms", elapsed_ms)
|
| 113 |
+
return {"text": text, "stt_ms": round(elapsed_ms, 2)}
|
| 114 |
except ValueError as e:
|
| 115 |
raise HTTPException(status_code=400, detail=str(e))
|
| 116 |
except RuntimeError as e:
|
backend/services/evaluation_service.py
CHANGED
|
@@ -102,6 +102,10 @@ async def generate_report(session_id: str, user_id: str) -> dict:
|
|
| 102 |
f"session:{session_id}",
|
| 103 |
f"session:{session_id}:questions",
|
| 104 |
f"session:{session_id}:pending_questions",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
f"session:{session_id}:answers",
|
| 106 |
]
|
| 107 |
for qid in question_ids:
|
|
|
|
| 102 |
f"session:{session_id}",
|
| 103 |
f"session:{session_id}:questions",
|
| 104 |
f"session:{session_id}:pending_questions",
|
| 105 |
+
f"session:{session_id}:question_queue",
|
| 106 |
+
f"session:{session_id}:question_backlog",
|
| 107 |
+
f"session:{session_id}:context_cache",
|
| 108 |
+
f"session:{session_id}:asked_questions_set",
|
| 109 |
f"session:{session_id}:answers",
|
| 110 |
]
|
| 111 |
for qid in question_ids:
|
backend/services/gemini_service.py
ADDED
|
@@ -0,0 +1,281 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import re
|
| 3 |
+
|
| 4 |
+
from utils.gemini import call_gemini
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def _extract_json_object(text: str) -> str:
|
| 8 |
+
value = (text or "").strip()
|
| 9 |
+
if value.startswith("```"):
|
| 10 |
+
value = value.split("\n", 1)[1]
|
| 11 |
+
if value.endswith("```"):
|
| 12 |
+
value = value.rsplit("```", 1)[0]
|
| 13 |
+
value = value.strip()
|
| 14 |
+
|
| 15 |
+
if value.startswith("{") and value.endswith("}"):
|
| 16 |
+
return value
|
| 17 |
+
|
| 18 |
+
start = value.find("{")
|
| 19 |
+
end = value.rfind("}")
|
| 20 |
+
if start != -1 and end != -1 and end > start:
|
| 21 |
+
return value[start:end + 1]
|
| 22 |
+
|
| 23 |
+
return value
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def _extract_json_array(text: str) -> str:
|
| 27 |
+
value = (text or "").strip()
|
| 28 |
+
if value.startswith("```"):
|
| 29 |
+
value = value.split("\n", 1)[1]
|
| 30 |
+
if value.endswith("```"):
|
| 31 |
+
value = value.rsplit("```", 1)[0]
|
| 32 |
+
value = value.strip()
|
| 33 |
+
|
| 34 |
+
if value.startswith("[") and value.endswith("]"):
|
| 35 |
+
return value
|
| 36 |
+
|
| 37 |
+
start = value.find("[")
|
| 38 |
+
end = value.rfind("]")
|
| 39 |
+
if start != -1 and end != -1 and end > start:
|
| 40 |
+
return value[start:end + 1]
|
| 41 |
+
|
| 42 |
+
return value
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def _fallback_score(answer: str) -> int:
|
| 46 |
+
text = (answer or "").strip().lower()
|
| 47 |
+
words = len(text.split())
|
| 48 |
+
weak = any(marker in text for marker in ["not sure", "maybe", "i think", "dont know", "don't know"])
|
| 49 |
+
|
| 50 |
+
if words < 10:
|
| 51 |
+
return 35
|
| 52 |
+
if words < 25:
|
| 53 |
+
return 55
|
| 54 |
+
if weak:
|
| 55 |
+
return 50
|
| 56 |
+
if words > 80:
|
| 57 |
+
return 75
|
| 58 |
+
return 65
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
async def generate_resume_seed_questions(
|
| 62 |
+
role_title: str,
|
| 63 |
+
resume_summary: str,
|
| 64 |
+
resume_skills: list[str],
|
| 65 |
+
jd_title: str,
|
| 66 |
+
jd_description: str,
|
| 67 |
+
jd_required_skills: list[str],
|
| 68 |
+
excluded_questions: list[str],
|
| 69 |
+
count: int = 2,
|
| 70 |
+
) -> list[dict]:
|
| 71 |
+
count = max(1, int(count or 2))
|
| 72 |
+
|
| 73 |
+
payload = {
|
| 74 |
+
"role_title": role_title,
|
| 75 |
+
"resume_summary": resume_summary,
|
| 76 |
+
"resume_skills": resume_skills,
|
| 77 |
+
"jd_title": jd_title,
|
| 78 |
+
"jd_description": jd_description,
|
| 79 |
+
"jd_required_skills": jd_required_skills,
|
| 80 |
+
"excluded_questions": excluded_questions[-25:] if excluded_questions else [],
|
| 81 |
+
"count": count,
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
prompt = f"""Generate exactly {count} resume interview questions.
|
| 85 |
+
|
| 86 |
+
Input JSON:
|
| 87 |
+
{json.dumps(payload, ensure_ascii=True)}
|
| 88 |
+
|
| 89 |
+
Rules:
|
| 90 |
+
1) Questions must be strictly from JD required skills and role context.
|
| 91 |
+
2) Use resume context for relevance.
|
| 92 |
+
3) Do not repeat or paraphrase excluded_questions.
|
| 93 |
+
4) Keep questions concise and practical.
|
| 94 |
+
|
| 95 |
+
Return ONLY valid JSON array with objects:
|
| 96 |
+
- question (string)
|
| 97 |
+
- difficulty (easy|medium|hard)
|
| 98 |
+
- category (string)
|
| 99 |
+
"""
|
| 100 |
+
|
| 101 |
+
try:
|
| 102 |
+
result = _extract_json_array(
|
| 103 |
+
await call_gemini(
|
| 104 |
+
prompt,
|
| 105 |
+
max_attempts=1,
|
| 106 |
+
request_timeout_seconds=3.5,
|
| 107 |
+
)
|
| 108 |
+
)
|
| 109 |
+
data = json.loads(result)
|
| 110 |
+
if not isinstance(data, list):
|
| 111 |
+
raise ValueError("seed output is not a list")
|
| 112 |
+
|
| 113 |
+
output = []
|
| 114 |
+
for item in data[:count]:
|
| 115 |
+
if not isinstance(item, dict):
|
| 116 |
+
item = {}
|
| 117 |
+
output.append(
|
| 118 |
+
{
|
| 119 |
+
"question": (item.get("question") or "").strip(),
|
| 120 |
+
"difficulty": item.get("difficulty") if item.get("difficulty") in {"easy", "medium", "hard"} else "medium",
|
| 121 |
+
"category": item.get("category") or "resume-seed",
|
| 122 |
+
}
|
| 123 |
+
)
|
| 124 |
+
return [q for q in output if q.get("question")]
|
| 125 |
+
except Exception:
|
| 126 |
+
base_skill = jd_required_skills[0] if jd_required_skills else (resume_skills[0] if resume_skills else "this role")
|
| 127 |
+
fallback = []
|
| 128 |
+
for i in range(count):
|
| 129 |
+
fallback.append(
|
| 130 |
+
{
|
| 131 |
+
"question": (
|
| 132 |
+
f"Explain your hands-on experience with {base_skill} in a project relevant to {role_title}."
|
| 133 |
+
if i == 0
|
| 134 |
+
else f"What trade-offs did you consider when working with {base_skill}?"
|
| 135 |
+
),
|
| 136 |
+
"difficulty": "medium",
|
| 137 |
+
"category": "resume-seed",
|
| 138 |
+
}
|
| 139 |
+
)
|
| 140 |
+
return fallback
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
async def evaluate_and_generate_followup(
|
| 144 |
+
role_title: str,
|
| 145 |
+
required_skills: list[str],
|
| 146 |
+
recent_context: list[dict],
|
| 147 |
+
current_question: str,
|
| 148 |
+
current_answer: str,
|
| 149 |
+
excluded_questions: list[str],
|
| 150 |
+
) -> dict:
|
| 151 |
+
payload = {
|
| 152 |
+
"role_title": role_title,
|
| 153 |
+
"required_skills": required_skills,
|
| 154 |
+
"recent_context": recent_context[-3:] if recent_context else [],
|
| 155 |
+
"current_question": current_question,
|
| 156 |
+
"current_answer": current_answer,
|
| 157 |
+
"excluded_questions": excluded_questions[-25:] if excluded_questions else [],
|
| 158 |
+
}
|
| 159 |
+
|
| 160 |
+
prompt = f"""You are a strict technical interviewer.
|
| 161 |
+
|
| 162 |
+
Input JSON:
|
| 163 |
+
{json.dumps(payload, ensure_ascii=True)}
|
| 164 |
+
|
| 165 |
+
Task:
|
| 166 |
+
1) Evaluate current_answer for current_question.
|
| 167 |
+
2) Generate one non-duplicate follow-up question.
|
| 168 |
+
|
| 169 |
+
Rules:
|
| 170 |
+
1) Follow-up must stay within required_skills only.
|
| 171 |
+
2) Use recent_context for continuity.
|
| 172 |
+
3) Do not repeat/paraphrase excluded_questions.
|
| 173 |
+
4) Score should reflect conceptual correctness, not verbosity.
|
| 174 |
+
|
| 175 |
+
Return ONLY valid JSON object:
|
| 176 |
+
{{
|
| 177 |
+
"score": 0-100,
|
| 178 |
+
"feedback": "short technical feedback",
|
| 179 |
+
"followup_question": "...",
|
| 180 |
+
"difficulty": "easy|medium|hard",
|
| 181 |
+
"category": "..."
|
| 182 |
+
}}
|
| 183 |
+
"""
|
| 184 |
+
|
| 185 |
+
try:
|
| 186 |
+
result = _extract_json_object(
|
| 187 |
+
await call_gemini(
|
| 188 |
+
prompt,
|
| 189 |
+
max_attempts=1,
|
| 190 |
+
request_timeout_seconds=2.8,
|
| 191 |
+
)
|
| 192 |
+
)
|
| 193 |
+
data = json.loads(result)
|
| 194 |
+
followup = (data.get("followup_question") or "").strip()
|
| 195 |
+
return {
|
| 196 |
+
"score": int(data.get("score", 0)),
|
| 197 |
+
"feedback": (data.get("feedback") or "").strip() or "Answer reviewed.",
|
| 198 |
+
"followup_question": followup,
|
| 199 |
+
"difficulty": data.get("difficulty") if data.get("difficulty") in {"easy", "medium", "hard"} else "medium",
|
| 200 |
+
"category": data.get("category") or "follow-up",
|
| 201 |
+
}
|
| 202 |
+
except Exception:
|
| 203 |
+
fallback_skill = required_skills[0] if required_skills else "the selected role requirement"
|
| 204 |
+
return {
|
| 205 |
+
"score": _fallback_score(current_answer),
|
| 206 |
+
"feedback": "Try to explain the mechanism, trade-offs, and one concrete example.",
|
| 207 |
+
"followup_question": f"Can you walk me through a real scenario where you applied {fallback_skill} and what trade-offs you handled?",
|
| 208 |
+
"difficulty": "medium",
|
| 209 |
+
"category": "follow-up",
|
| 210 |
+
}
|
| 211 |
+
|
| 212 |
+
|
| 213 |
+
async def generate_topic_followup_batch(
|
| 214 |
+
topic_name: str,
|
| 215 |
+
qa_pairs: list[dict],
|
| 216 |
+
excluded_questions: list[str],
|
| 217 |
+
count: int = 3,
|
| 218 |
+
) -> list[dict]:
|
| 219 |
+
count = max(1, int(count or 3))
|
| 220 |
+
|
| 221 |
+
payload = {
|
| 222 |
+
"topic": topic_name,
|
| 223 |
+
"qa_pairs": qa_pairs,
|
| 224 |
+
"excluded_questions": excluded_questions[-30:] if excluded_questions else [],
|
| 225 |
+
"count": count,
|
| 226 |
+
}
|
| 227 |
+
|
| 228 |
+
prompt = f"""Generate exactly {count} topic-focused technical follow-up questions.
|
| 229 |
+
|
| 230 |
+
Input JSON:
|
| 231 |
+
{json.dumps(payload, ensure_ascii=True)}
|
| 232 |
+
|
| 233 |
+
Rules:
|
| 234 |
+
1) Stay in topic scope only.
|
| 235 |
+
2) Build on candidate weak points from qa_pairs.
|
| 236 |
+
3) Do not repeat/paraphrase excluded_questions.
|
| 237 |
+
|
| 238 |
+
Return ONLY valid JSON array with objects:
|
| 239 |
+
- question (string)
|
| 240 |
+
- difficulty (easy|medium|hard)
|
| 241 |
+
- category (string)
|
| 242 |
+
"""
|
| 243 |
+
|
| 244 |
+
try:
|
| 245 |
+
result = _extract_json_array(
|
| 246 |
+
await call_gemini(
|
| 247 |
+
prompt,
|
| 248 |
+
max_attempts=1,
|
| 249 |
+
request_timeout_seconds=3.5,
|
| 250 |
+
)
|
| 251 |
+
)
|
| 252 |
+
data = json.loads(result)
|
| 253 |
+
if not isinstance(data, list):
|
| 254 |
+
raise ValueError("topic output is not a list")
|
| 255 |
+
|
| 256 |
+
out = []
|
| 257 |
+
for item in data[:count]:
|
| 258 |
+
if not isinstance(item, dict):
|
| 259 |
+
item = {}
|
| 260 |
+
text = (item.get("question") or "").strip()
|
| 261 |
+
if not text:
|
| 262 |
+
continue
|
| 263 |
+
out.append(
|
| 264 |
+
{
|
| 265 |
+
"question": text,
|
| 266 |
+
"difficulty": item.get("difficulty") if item.get("difficulty") in {"easy", "medium", "hard"} else "medium",
|
| 267 |
+
"category": item.get("category") or topic_name,
|
| 268 |
+
}
|
| 269 |
+
)
|
| 270 |
+
return out
|
| 271 |
+
except Exception:
|
| 272 |
+
fallback = []
|
| 273 |
+
for i in range(count):
|
| 274 |
+
fallback.append(
|
| 275 |
+
{
|
| 276 |
+
"question": f"In {topic_name}, explain how you would solve a real production issue and why.",
|
| 277 |
+
"difficulty": "medium" if i < 2 else "hard",
|
| 278 |
+
"category": topic_name,
|
| 279 |
+
}
|
| 280 |
+
)
|
| 281 |
+
return fallback
|
backend/services/interview_service.py
CHANGED
|
@@ -2,15 +2,32 @@ import json
|
|
| 2 |
import asyncio
|
| 3 |
import random
|
| 4 |
import re
|
|
|
|
| 5 |
from bson import ObjectId
|
| 6 |
from database import get_db, get_redis
|
| 7 |
-
from models.collections import SESSIONS, USERS, JOB_ROLES, SKILLS, QUESTIONS, TOPICS, TOPIC_QUESTIONS, RESUMES, JD_VERIFICATIONS
|
| 8 |
from utils.helpers import generate_id, utc_now, str_objectid
|
| 9 |
from utils.skills import normalize_skill_list, build_interview_focus_skills
|
| 10 |
from services.interview_graph import run_interview_graph
|
| 11 |
from utils.gemini import generate_interview_question_batch, analyze_resume_vs_job_description
|
| 12 |
from services.job_description_service import get_job_description_for_user
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
from services.tts_service import prefetch_wav
|
|
|
|
| 14 |
|
| 15 |
MAX_QUESTIONS = 20
|
| 16 |
RESUME_MAX_QUESTIONS = 10
|
|
@@ -20,10 +37,19 @@ BATCH_SIZE = 5
|
|
| 20 |
PREGEN_MIN_PENDING = 2
|
| 21 |
FOLLOWUP_AI_COUNT = 2
|
| 22 |
FOLLOWUP_BANK_COUNT = 3
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
# Local process memory summary requested in workflow.
|
| 25 |
_LOCAL_SUMMARIES: dict[str, str] = {}
|
| 26 |
_PREGEN_IN_FLIGHT: set[str] = set()
|
|
|
|
| 27 |
|
| 28 |
|
| 29 |
def _safe_json_list(value: str) -> list:
|
|
@@ -34,6 +60,36 @@ def _safe_json_list(value: str) -> list:
|
|
| 34 |
return []
|
| 35 |
|
| 36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
def _update_local_summary(session_id: str, question: str, answer: str) -> None:
|
| 38 |
existing = _LOCAL_SUMMARIES.get(session_id, "")
|
| 39 |
combined = f"{existing}\nQ: {question}\nA: {answer}".strip()
|
|
@@ -73,6 +129,32 @@ def _schedule_question_audio_prefetch(questions: list[str], voice_gender: str) -
|
|
| 73 |
pass
|
| 74 |
|
| 75 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
def _normalize_bank_difficulty(value: str) -> str:
|
| 77 |
difficulty = (value or "medium").strip().lower()
|
| 78 |
if difficulty not in {"easy", "medium", "hard"}:
|
|
@@ -136,13 +218,275 @@ async def _resolve_role_title(db, role_id: str | None, custom_role: str | None)
|
|
| 136 |
return "Software Developer"
|
| 137 |
|
| 138 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
async def verify_resume_job_description(
|
| 140 |
user_id: str,
|
| 141 |
role_id: str = None,
|
| 142 |
custom_role: str = None,
|
| 143 |
job_description_id: str = None,
|
| 144 |
) -> dict:
|
| 145 |
-
"""Run resume-vs-job-description verification without starting an interview.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
if not job_description_id:
|
| 147 |
raise ValueError("job_description_id is required for verification")
|
| 148 |
|
|
@@ -163,8 +507,59 @@ async def verify_resume_job_description(
|
|
| 163 |
resume_summary = "\n".join([part for part in summary_parts if part]).strip() or "No summary available"
|
| 164 |
|
| 165 |
role_title = await _resolve_role_title(db, role_id=role_id, custom_role=custom_role)
|
|
|
|
| 166 |
selected_jd = await get_job_description_for_user(user_id, job_description_id)
|
| 167 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
jd_alignment = await analyze_resume_vs_job_description(
|
| 169 |
role_title=role_title,
|
| 170 |
resume_skills=resume_skills if resume_skills else ["general"],
|
|
@@ -176,7 +571,7 @@ async def verify_resume_job_description(
|
|
| 176 |
|
| 177 |
resume_snapshot = {
|
| 178 |
"filename": resume_doc.get("original_filename") or resume_doc.get("filename") or "",
|
| 179 |
-
"uploaded_at":
|
| 180 |
"skills": resume_skills,
|
| 181 |
"parsed_data": {
|
| 182 |
"name": parsed_data.get("name"),
|
|
@@ -190,36 +585,36 @@ async def verify_resume_job_description(
|
|
| 190 |
|
| 191 |
verification_id = generate_id()
|
| 192 |
saved_at = utc_now()
|
| 193 |
-
|
| 194 |
-
{
|
| 195 |
-
"verification_id": verification_id,
|
| 196 |
-
"user_id": user_id,
|
| 197 |
-
"role_id": role_id,
|
| 198 |
-
"custom_role": custom_role,
|
| 199 |
-
"role_title": role_title,
|
| 200 |
-
"job_description": {
|
| 201 |
-
"id": selected_jd.get("id"),
|
| 202 |
-
"title": selected_jd.get("title"),
|
| 203 |
-
"company": selected_jd.get("company"),
|
| 204 |
-
"description": selected_jd.get("description"),
|
| 205 |
-
"required_skills": selected_jd.get("required_skills", []) or [],
|
| 206 |
-
},
|
| 207 |
-
"resume_snapshot": resume_snapshot,
|
| 208 |
-
"jd_alignment": jd_alignment,
|
| 209 |
-
"created_at": saved_at,
|
| 210 |
-
}
|
| 211 |
-
)
|
| 212 |
-
|
| 213 |
-
return {
|
| 214 |
"verification_id": verification_id,
|
| 215 |
-
"
|
|
|
|
|
|
|
| 216 |
"role_title": role_title,
|
| 217 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 218 |
"resume_snapshot": resume_snapshot,
|
| 219 |
"jd_alignment": jd_alignment,
|
| 220 |
-
"
|
|
|
|
| 221 |
}
|
| 222 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 223 |
|
| 224 |
async def _get_generated_question_texts(redis, session_id: str) -> list[str]:
|
| 225 |
qids = await redis.lrange(f"session:{session_id}:questions", 0, -1)
|
|
@@ -604,7 +999,7 @@ async def _generate_mixed_followup_batch(
|
|
| 604 |
|
| 605 |
|
| 606 |
async def _start_topic_interview(user_id: str, topic_id: str) -> dict:
|
| 607 |
-
"""Start
|
| 608 |
db = get_db()
|
| 609 |
redis = get_redis()
|
| 610 |
|
|
@@ -614,18 +1009,21 @@ async def _start_topic_interview(user_id: str, topic_id: str) -> dict:
|
|
| 614 |
if not topic.get("is_published", False):
|
| 615 |
raise ValueError("This topic interview is not published yet")
|
| 616 |
|
| 617 |
-
|
| 618 |
-
|
| 619 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 620 |
|
| 621 |
timer_enabled = bool(topic.get("timer_enabled", False))
|
| 622 |
timer_seconds = topic.get("timer_seconds") if timer_enabled else None
|
| 623 |
|
| 624 |
-
total_questions = min(MAX_QUESTIONS, len(topic_questions))
|
| 625 |
-
# Randomize question selection for each interview session
|
| 626 |
-
random.shuffle(topic_questions)
|
| 627 |
-
selected = topic_questions[:total_questions]
|
| 628 |
-
|
| 629 |
session_id = generate_id()
|
| 630 |
_LOCAL_SUMMARIES[session_id] = ""
|
| 631 |
|
|
@@ -636,6 +1034,48 @@ async def _start_topic_interview(user_id: str, topic_id: str) -> dict:
|
|
| 636 |
user_doc = await db[USERS].find_one({"user_id": user_id}, {"speech_settings": 1})
|
| 637 |
speech_voice_gender = _normalize_voice_gender(((user_doc or {}).get("speech_settings") or {}).get("voice_gender"))
|
| 638 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 639 |
session_doc = {
|
| 640 |
"session_id": session_id,
|
| 641 |
"user_id": user_id,
|
|
@@ -645,16 +1085,17 @@ async def _start_topic_interview(user_id: str, topic_id: str) -> dict:
|
|
| 645 |
"interview_type": "topic",
|
| 646 |
"status": "in_progress",
|
| 647 |
"question_count": 1,
|
| 648 |
-
"max_questions":
|
| 649 |
-
"current_difficulty":
|
| 650 |
"metrics_gemini_calls": 0,
|
| 651 |
"metrics_gemini_questions": 0,
|
| 652 |
-
"metrics_bank_questions":
|
| 653 |
-
"metrics_bank_shortfall": 0,
|
| 654 |
-
"metrics_generation_batches":
|
| 655 |
"speech_voice_gender": speech_voice_gender,
|
| 656 |
"timer_enabled": timer_enabled,
|
| 657 |
"timer_seconds": timer_seconds,
|
|
|
|
| 658 |
"started_at": utc_now(),
|
| 659 |
}
|
| 660 |
await db[SESSIONS].insert_one(session_doc)
|
|
@@ -672,53 +1113,26 @@ async def _start_topic_interview(user_id: str, topic_id: str) -> dict:
|
|
| 672 |
"question_count": 1,
|
| 673 |
"answered_count": 0,
|
| 674 |
"served_count": 1,
|
| 675 |
-
"generated_count":
|
| 676 |
-
"max_questions":
|
| 677 |
-
"current_difficulty":
|
| 678 |
"timer_enabled": str(timer_enabled),
|
| 679 |
"timer_seconds": str(timer_seconds or ""),
|
| 680 |
"status": "in_progress",
|
| 681 |
"speech_voice_gender": speech_voice_gender,
|
| 682 |
"metrics_gemini_calls": 0,
|
| 683 |
"metrics_gemini_questions": 0,
|
| 684 |
-
"metrics_bank_questions":
|
| 685 |
-
"metrics_bank_shortfall": 0,
|
| 686 |
-
"metrics_generation_batches":
|
|
|
|
| 687 |
}
|
| 688 |
await redis.hset(f"session:{session_id}", mapping=session_state)
|
| 689 |
await redis.expire(f"session:{session_id}", SESSION_TTL)
|
| 690 |
|
| 691 |
-
|
| 692 |
-
|
| 693 |
-
|
| 694 |
-
created_ids.append(qid)
|
| 695 |
-
await redis.hset(
|
| 696 |
-
f"session:{session_id}:q:{qid}",
|
| 697 |
-
mapping={
|
| 698 |
-
"question_id": qid,
|
| 699 |
-
"question": q.get("question", "Can you explain this concept?"),
|
| 700 |
-
"difficulty": q.get("difficulty", "medium"),
|
| 701 |
-
"category": q.get("category", topic.get("name", "topic")),
|
| 702 |
-
},
|
| 703 |
-
)
|
| 704 |
-
await redis.rpush(f"session:{session_id}:questions", qid)
|
| 705 |
-
await redis.expire(f"session:{session_id}:q:{qid}", SESSION_TTL)
|
| 706 |
-
await redis.expire(f"session:{session_id}:questions", SESSION_TTL)
|
| 707 |
-
|
| 708 |
-
first_id = created_ids[0]
|
| 709 |
-
pending_ids = created_ids[1:]
|
| 710 |
-
if pending_ids:
|
| 711 |
-
await redis.rpush(f"session:{session_id}:pending_questions", *pending_ids)
|
| 712 |
-
await redis.expire(f"session:{session_id}:pending_questions", SESSION_TTL)
|
| 713 |
-
|
| 714 |
-
first_q_data = await redis.hgetall(f"session:{session_id}:q:{first_id}")
|
| 715 |
-
prefetch_targets = []
|
| 716 |
-
if len(selected) > 1:
|
| 717 |
-
prefetch_targets.append(selected[1].get("question", ""))
|
| 718 |
-
_schedule_question_audio_prefetch(
|
| 719 |
-
prefetch_targets,
|
| 720 |
-
speech_voice_gender,
|
| 721 |
-
)
|
| 722 |
|
| 723 |
return {
|
| 724 |
"session_id": session_id,
|
|
@@ -737,10 +1151,10 @@ async def _start_topic_interview(user_id: str, topic_id: str) -> dict:
|
|
| 737 |
},
|
| 738 |
"question": {
|
| 739 |
"question_id": first_id,
|
| 740 |
-
"question":
|
| 741 |
-
"difficulty":
|
| 742 |
"question_number": 1,
|
| 743 |
-
"total_questions":
|
| 744 |
},
|
| 745 |
"timer": {
|
| 746 |
"enabled": timer_enabled,
|
|
@@ -839,7 +1253,7 @@ async def start_interview(
|
|
| 839 |
topic_id: str = None,
|
| 840 |
job_description_id: str = None,
|
| 841 |
) -> dict:
|
| 842 |
-
"""Start a new interview session."""
|
| 843 |
interview_type = (interview_type or "resume").strip().lower()
|
| 844 |
if interview_type == "topic":
|
| 845 |
if not topic_id:
|
|
@@ -856,163 +1270,133 @@ async def start_interview(
|
|
| 856 |
user_doc = await db[USERS].find_one({"user_id": user_id}, {"speech_settings": 1})
|
| 857 |
speech_voice_gender = _normalize_voice_gender(((user_doc or {}).get("speech_settings") or {}).get("voice_gender"))
|
| 858 |
|
| 859 |
-
# Get user skills
|
| 860 |
skills_doc = await db[SKILLS].find_one({"user_id": user_id})
|
| 861 |
-
user_skills = skills_doc.get("skills", [
|
| 862 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 863 |
|
| 864 |
if not job_description_id:
|
| 865 |
raise ValueError("Please select a Job Description before starting Resume Interview")
|
| 866 |
|
| 867 |
-
# Get role
|
| 868 |
role_title = await _resolve_role_title(db, role_id=role_id, custom_role=custom_role)
|
| 869 |
-
|
| 870 |
-
selected_jd = None
|
| 871 |
-
if job_description_id:
|
| 872 |
-
selected_jd = await get_job_description_for_user(user_id, job_description_id)
|
| 873 |
|
| 874 |
jd_required_skills = normalize_skill_list((selected_jd or {}).get("required_skills", []))
|
| 875 |
if not jd_required_skills:
|
| 876 |
-
raise ValueError(
|
| 877 |
-
"Selected Job Description has no required skills. Add required skills in Settings first."
|
| 878 |
-
)
|
| 879 |
|
| 880 |
user_skill_set = {s.lower() for s in user_skills}
|
| 881 |
matched_role_skills = [s for s in jd_required_skills if s.lower() in user_skill_set]
|
| 882 |
missing_role_skills = [s for s in jd_required_skills if s.lower() not in user_skill_set]
|
| 883 |
-
required_skills = list(jd_required_skills)
|
| 884 |
-
|
| 885 |
-
# Resume interview scope is strictly JD-required skills.
|
| 886 |
base_skills_for_interview = matched_role_skills + [s for s in missing_role_skills if s not in matched_role_skills]
|
| 887 |
-
skills_for_interview = build_interview_focus_skills(base_skills_for_interview)
|
| 888 |
-
if not skills_for_interview:
|
| 889 |
-
skills_for_interview = required_skills
|
| 890 |
|
| 891 |
-
|
| 892 |
-
initial_bank = await _fetch_question_bank_batch(
|
| 893 |
-
db=db,
|
| 894 |
-
role_id=role_id,
|
| 895 |
-
excluded_questions=[],
|
| 896 |
-
limit=RESUME_INITIAL_BATCH_SIZE,
|
| 897 |
-
skill_hints=required_skills,
|
| 898 |
-
)
|
| 899 |
-
|
| 900 |
-
resume_source_mode = "db" if len(initial_bank) >= RESUME_INITIAL_BATCH_SIZE else "ai"
|
| 901 |
-
|
| 902 |
-
if resume_source_mode == "db":
|
| 903 |
-
initial_batch = list(initial_bank[:RESUME_INITIAL_BATCH_SIZE])
|
| 904 |
-
initial_gemini_calls = 0
|
| 905 |
-
initial_gemini_questions = 0
|
| 906 |
-
initial_bank_questions = len(initial_batch)
|
| 907 |
-
initial_bank_shortfall = 0
|
| 908 |
-
else:
|
| 909 |
-
initial_batch, _ = await _generate_question_batch(
|
| 910 |
-
role_title=role_title,
|
| 911 |
-
skills=skills_for_interview,
|
| 912 |
-
previous_questions=[],
|
| 913 |
-
generated_count=0,
|
| 914 |
-
max_questions=RESUME_MAX_QUESTIONS,
|
| 915 |
-
current_difficulty="medium",
|
| 916 |
-
local_summary=None,
|
| 917 |
-
batch_size=RESUME_INITIAL_BATCH_SIZE,
|
| 918 |
-
)
|
| 919 |
-
if not initial_batch:
|
| 920 |
-
raise ValueError("Failed to generate initial resume interview questions")
|
| 921 |
-
initial_gemini_calls = 1
|
| 922 |
-
initial_gemini_questions = len(initial_batch)
|
| 923 |
-
initial_bank_questions = 0
|
| 924 |
-
initial_bank_shortfall = RESUME_INITIAL_BATCH_SIZE
|
| 925 |
-
|
| 926 |
-
last_difficulty = initial_batch[-1].get("difficulty", "medium") if initial_batch else "medium"
|
| 927 |
-
if not initial_batch:
|
| 928 |
-
raise ValueError("Failed to generate initial interview questions")
|
| 929 |
|
| 930 |
session_id = generate_id()
|
| 931 |
_LOCAL_SUMMARIES[session_id] = ""
|
| 932 |
|
| 933 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 934 |
session_doc = {
|
| 935 |
"session_id": session_id,
|
| 936 |
"user_id": user_id,
|
| 937 |
"role_id": role_id,
|
| 938 |
"role_title": role_title,
|
| 939 |
-
"job_description_id": selected_jd.get("id")
|
| 940 |
-
"job_description_title": selected_jd.get("title")
|
| 941 |
"status": "in_progress",
|
| 942 |
"interview_type": "resume",
|
| 943 |
"question_count": 1,
|
| 944 |
"max_questions": RESUME_MAX_QUESTIONS,
|
| 945 |
-
"current_difficulty":
|
| 946 |
-
"metrics_gemini_calls":
|
| 947 |
-
"metrics_gemini_questions":
|
| 948 |
-
"metrics_bank_questions":
|
| 949 |
-
"metrics_bank_shortfall":
|
| 950 |
-
"metrics_generation_batches":
|
| 951 |
"speech_voice_gender": speech_voice_gender,
|
| 952 |
"started_at": utc_now(),
|
|
|
|
| 953 |
}
|
| 954 |
await db[SESSIONS].insert_one(session_doc)
|
| 955 |
|
| 956 |
-
# Store session state in Redis
|
| 957 |
session_state = {
|
| 958 |
"user_id": user_id,
|
| 959 |
"role_id": role_id or "",
|
| 960 |
"role_title": role_title,
|
| 961 |
"skills": json.dumps(skills_for_interview),
|
| 962 |
"user_skills": json.dumps(user_skills),
|
| 963 |
-
"required_skills": json.dumps(
|
| 964 |
"matched_skills": json.dumps(matched_role_skills),
|
| 965 |
"missing_skills": json.dumps(missing_role_skills),
|
| 966 |
"question_count": 1,
|
| 967 |
"answered_count": 0,
|
| 968 |
"served_count": 1,
|
| 969 |
-
"generated_count":
|
| 970 |
"max_questions": RESUME_MAX_QUESTIONS,
|
| 971 |
-
"current_difficulty":
|
| 972 |
"interview_type": "resume",
|
| 973 |
"status": "in_progress",
|
| 974 |
"speech_voice_gender": speech_voice_gender,
|
| 975 |
-
"
|
| 976 |
-
"
|
| 977 |
-
"
|
| 978 |
-
"
|
| 979 |
-
"
|
| 980 |
-
"
|
| 981 |
-
"
|
|
|
|
|
|
|
|
|
|
| 982 |
}
|
| 983 |
await redis.hset(f"session:{session_id}", mapping=session_state)
|
| 984 |
await redis.expire(f"session:{session_id}", SESSION_TTL)
|
| 985 |
|
| 986 |
-
#
|
| 987 |
-
|
| 988 |
-
first_id = new_ids[0]
|
| 989 |
-
pending_ids = new_ids[1:]
|
| 990 |
-
if pending_ids:
|
| 991 |
-
await redis.rpush(f"session:{session_id}:pending_questions", *pending_ids)
|
| 992 |
-
await redis.expire(f"session:{session_id}:pending_questions", SESSION_TTL)
|
| 993 |
-
|
| 994 |
-
first_q_data = await redis.hgetall(f"session:{session_id}:q:{first_id}")
|
| 995 |
-
prefetch_targets = []
|
| 996 |
-
if len(initial_batch) > 1:
|
| 997 |
-
prefetch_targets.append(initial_batch[1].get("question", ""))
|
| 998 |
-
_schedule_question_audio_prefetch(
|
| 999 |
-
prefetch_targets,
|
| 1000 |
-
speech_voice_gender,
|
| 1001 |
-
)
|
| 1002 |
|
| 1003 |
return {
|
| 1004 |
"session_id": session_id,
|
| 1005 |
"skill_alignment": {
|
| 1006 |
"user_skills": user_skills,
|
| 1007 |
-
"required_skills":
|
| 1008 |
"matched_skills": matched_role_skills,
|
| 1009 |
"missing_skills": missing_role_skills,
|
| 1010 |
"interview_focus": skills_for_interview,
|
| 1011 |
},
|
| 1012 |
"question": {
|
| 1013 |
"question_id": first_id,
|
| 1014 |
-
"question":
|
| 1015 |
-
"difficulty":
|
| 1016 |
"question_number": 1,
|
| 1017 |
"total_questions": RESUME_MAX_QUESTIONS,
|
| 1018 |
},
|
|
@@ -1026,177 +1410,481 @@ async def start_interview(
|
|
| 1026 |
}
|
| 1027 |
|
| 1028 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1029 |
async def submit_answer(session_id: str, question_id: str, answer: str) -> dict:
|
| 1030 |
-
"""Submit
|
|
|
|
| 1031 |
db = get_db()
|
| 1032 |
redis = get_redis()
|
| 1033 |
|
| 1034 |
-
# Get session state from Redis
|
| 1035 |
session = await redis.hgetall(f"session:{session_id}")
|
| 1036 |
if not session:
|
| 1037 |
raise ValueError("Interview session not found or expired")
|
| 1038 |
-
|
| 1039 |
if session.get("status") != "in_progress":
|
| 1040 |
raise ValueError("Interview is not in progress")
|
| 1041 |
|
| 1042 |
-
|
| 1043 |
-
|
| 1044 |
-
|
| 1045 |
-
|
| 1046 |
-
"
|
| 1047 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1048 |
await redis.rpush(f"session:{session_id}:answers", question_id)
|
| 1049 |
await redis.expire(f"session:{session_id}:a:{question_id}", SESSION_TTL)
|
| 1050 |
await redis.expire(f"session:{session_id}:answers", SESSION_TTL)
|
| 1051 |
|
| 1052 |
-
question_count =
|
| 1053 |
-
answered_count =
|
| 1054 |
-
served_count =
|
| 1055 |
-
generated_count =
|
| 1056 |
-
max_questions =
|
| 1057 |
interview_type = session.get("interview_type", "resume")
|
|
|
|
| 1058 |
|
| 1059 |
-
|
| 1060 |
-
|
| 1061 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1062 |
|
| 1063 |
-
# Check if interview is complete
|
| 1064 |
if answered_count >= max_questions:
|
| 1065 |
-
# Mark session as completed
|
| 1066 |
await redis.hset(
|
| 1067 |
f"session:{session_id}",
|
| 1068 |
-
mapping={
|
|
|
|
|
|
|
|
|
|
| 1069 |
)
|
| 1070 |
await db[SESSIONS].update_one(
|
| 1071 |
{"session_id": session_id},
|
| 1072 |
{"$set": {"status": "completed", "completed_at": utc_now()}},
|
| 1073 |
)
|
|
|
|
|
|
|
| 1074 |
return {
|
| 1075 |
"session_id": session_id,
|
| 1076 |
"next_question": None,
|
| 1077 |
"is_complete": True,
|
| 1078 |
"message": "Interview complete! Generating your report...",
|
|
|
|
| 1079 |
}
|
| 1080 |
|
| 1081 |
-
|
| 1082 |
-
|
| 1083 |
-
|
| 1084 |
-
|
| 1085 |
-
|
| 1086 |
-
|
| 1087 |
-
|
| 1088 |
-
"generation_batches": 0,
|
| 1089 |
-
}
|
| 1090 |
|
| 1091 |
-
|
| 1092 |
-
|
| 1093 |
-
|
| 1094 |
-
|
| 1095 |
-
|
| 1096 |
-
|
| 1097 |
-
|
| 1098 |
-
|
| 1099 |
-
|
| 1100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1101 |
)
|
| 1102 |
-
|
| 1103 |
-
|
| 1104 |
-
|
| 1105 |
-
|
| 1106 |
-
"message": "Interview complete! Generating your report...",
|
| 1107 |
-
}
|
| 1108 |
|
| 1109 |
-
|
| 1110 |
db=db,
|
| 1111 |
redis=redis,
|
| 1112 |
session_id=session_id,
|
| 1113 |
session=session,
|
|
|
|
| 1114 |
generated_count=generated_count,
|
| 1115 |
-
max_questions=max_questions,
|
| 1116 |
)
|
| 1117 |
-
|
| 1118 |
-
|
| 1119 |
-
|
| 1120 |
-
|
| 1121 |
-
|
| 1122 |
-
|
| 1123 |
-
|
| 1124 |
-
|
| 1125 |
-
|
| 1126 |
-
|
| 1127 |
-
|
| 1128 |
-
|
| 1129 |
-
|
| 1130 |
-
|
| 1131 |
-
|
| 1132 |
-
|
| 1133 |
-
|
| 1134 |
-
|
| 1135 |
-
|
| 1136 |
-
|
| 1137 |
-
|
| 1138 |
-
|
| 1139 |
-
|
| 1140 |
-
|
| 1141 |
-
|
| 1142 |
-
|
| 1143 |
-
|
| 1144 |
-
|
| 1145 |
-
|
| 1146 |
-
|
| 1147 |
-
|
| 1148 |
-
metrics_delta = {
|
| 1149 |
-
"gemini_calls": batch_metrics.get("gemini_calls", 0),
|
| 1150 |
-
"gemini_questions": batch_metrics.get("gemini_questions", 0),
|
| 1151 |
-
"bank_questions": batch_metrics.get("bank_questions", 0),
|
| 1152 |
-
"bank_shortfall": batch_metrics.get("bank_shortfall", 0),
|
| 1153 |
-
"generation_batches": 1,
|
| 1154 |
}
|
| 1155 |
-
|
| 1156 |
-
f"[interview-metrics] session={session_id} "
|
| 1157 |
-
f"batch_size={len(new_ids)} gemini_calls+={batch_metrics.get('gemini_calls', 0)} "
|
| 1158 |
-
f"gemini_questions+={batch_metrics.get('gemini_questions', 0)} "
|
| 1159 |
-
f"bank_questions+={batch_metrics.get('bank_questions', 0)} "
|
| 1160 |
-
f"bank_shortfall+={batch_metrics.get('bank_shortfall', 0)}"
|
| 1161 |
-
)
|
| 1162 |
|
| 1163 |
-
|
| 1164 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1165 |
|
| 1166 |
-
|
| 1167 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1168 |
|
| 1169 |
-
# Prefetch one-ahead question only. Current question is synthesized by active playback path.
|
| 1170 |
-
prefetch_texts = []
|
| 1171 |
-
peek_next_id = await redis.lindex(f"session:{session_id}:pending_questions", 0)
|
| 1172 |
-
if peek_next_id:
|
| 1173 |
-
peek_q = await redis.hgetall(f"session:{session_id}:q:{peek_next_id}")
|
| 1174 |
-
prefetch_texts.append(peek_q.get("question", ""))
|
| 1175 |
-
_schedule_question_audio_prefetch(prefetch_texts, speech_voice_gender)
|
| 1176 |
next_difficulty = q_data.get("difficulty", session.get("current_difficulty", "medium"))
|
| 1177 |
-
|
| 1178 |
new_served_count = served_count + 1
|
| 1179 |
|
| 1180 |
-
|
| 1181 |
-
|
| 1182 |
-
|
| 1183 |
-
|
| 1184 |
-
|
| 1185 |
-
|
| 1186 |
-
|
| 1187 |
-
|
| 1188 |
-
|
| 1189 |
-
|
| 1190 |
-
|
| 1191 |
-
effective_stats = {
|
| 1192 |
-
"gemini_calls": _safe_int(session.get("metrics_gemini_calls", 0)) + metrics_delta["gemini_calls"],
|
| 1193 |
-
"gemini_questions": _safe_int(session.get("metrics_gemini_questions", 0)) + metrics_delta["gemini_questions"],
|
| 1194 |
-
"bank_questions": _safe_int(session.get("metrics_bank_questions", 0)) + metrics_delta["bank_questions"],
|
| 1195 |
-
"bank_shortfall": _safe_int(session.get("metrics_bank_shortfall", 0)) + metrics_delta["bank_shortfall"],
|
| 1196 |
-
"generation_batches": _safe_int(session.get("metrics_generation_batches", 0)) + metrics_delta["generation_batches"],
|
| 1197 |
-
}
|
| 1198 |
|
| 1199 |
-
|
| 1200 |
"session_id": session_id,
|
| 1201 |
"next_question": {
|
| 1202 |
"question_id": next_question_id,
|
|
@@ -1210,6 +1898,83 @@ async def submit_answer(session_id: str, question_id: str, answer: str) -> dict:
|
|
| 1210 |
"generation_stats": effective_stats,
|
| 1211 |
}
|
| 1212 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1213 |
|
| 1214 |
async def quit_interview(session_id: str, user_id: str) -> dict:
|
| 1215 |
"""Mark an interview as quit and indicate whether a partial report can be generated."""
|
|
@@ -1292,3 +2057,4 @@ def cleanup_interview_local_state(session_id: str) -> None:
|
|
| 1292 |
"""Cleanup process-local state for a completed session."""
|
| 1293 |
_LOCAL_SUMMARIES.pop(session_id, None)
|
| 1294 |
_PREGEN_IN_FLIGHT.discard(session_id)
|
|
|
|
|
|
| 2 |
import asyncio
|
| 3 |
import random
|
| 4 |
import re
|
| 5 |
+
from time import perf_counter
|
| 6 |
from bson import ObjectId
|
| 7 |
from database import get_db, get_redis
|
| 8 |
+
from models.collections import SESSIONS, USERS, JOB_ROLES, SKILLS, QUESTIONS, TOPICS, TOPIC_QUESTIONS, RESUMES, JD_VERIFICATIONS, ANSWERS
|
| 9 |
from utils.helpers import generate_id, utc_now, str_objectid
|
| 10 |
from utils.skills import normalize_skill_list, build_interview_focus_skills
|
| 11 |
from services.interview_graph import run_interview_graph
|
| 12 |
from utils.gemini import generate_interview_question_batch, analyze_resume_vs_job_description
|
| 13 |
from services.job_description_service import get_job_description_for_user
|
| 14 |
+
from services.gemini_service import (
|
| 15 |
+
evaluate_and_generate_followup,
|
| 16 |
+
generate_resume_seed_questions,
|
| 17 |
+
generate_topic_followup_batch,
|
| 18 |
+
)
|
| 19 |
+
from services.queue_service import (
|
| 20 |
+
enqueue_question,
|
| 21 |
+
flush_backlog_to_queue,
|
| 22 |
+
get_recent_context_items,
|
| 23 |
+
mark_question_asked,
|
| 24 |
+
peek_next_question,
|
| 25 |
+
pop_next_question,
|
| 26 |
+
push_context_item,
|
| 27 |
+
queue_size,
|
| 28 |
+
)
|
| 29 |
from services.tts_service import prefetch_wav
|
| 30 |
+
from services.latency_service import record_latency
|
| 31 |
|
| 32 |
MAX_QUESTIONS = 20
|
| 33 |
RESUME_MAX_QUESTIONS = 10
|
|
|
|
| 37 |
PREGEN_MIN_PENDING = 2
|
| 38 |
FOLLOWUP_AI_COUNT = 2
|
| 39 |
FOLLOWUP_BANK_COUNT = 3
|
| 40 |
+
MAX_QUEUE_SIZE = 3
|
| 41 |
+
CONTEXT_CACHE_ITEMS = 3
|
| 42 |
+
|
| 43 |
+
TOPIC_INITIAL_DB_QUESTIONS = 5
|
| 44 |
+
TOPIC_INITIAL_ASK_COUNT = 4
|
| 45 |
+
TOPIC_AI_FOLLOWUPS = 3
|
| 46 |
+
TOPIC_DB_FOLLOWUPS = 2
|
| 47 |
+
TOPIC_TOTAL_QUESTIONS = TOPIC_INITIAL_ASK_COUNT + TOPIC_AI_FOLLOWUPS + TOPIC_DB_FOLLOWUPS
|
| 48 |
|
| 49 |
# Local process memory summary requested in workflow.
|
| 50 |
_LOCAL_SUMMARIES: dict[str, str] = {}
|
| 51 |
_PREGEN_IN_FLIGHT: set[str] = set()
|
| 52 |
+
_POST_SUBMIT_LOCKS: dict[str, asyncio.Lock] = {}
|
| 53 |
|
| 54 |
|
| 55 |
def _safe_json_list(value: str) -> list:
|
|
|
|
| 60 |
return []
|
| 61 |
|
| 62 |
|
| 63 |
+
def _question_fingerprint(text: str) -> str:
|
| 64 |
+
base = (text or "").strip().lower()
|
| 65 |
+
base = re.sub(r"[^a-z0-9\s]", " ", base)
|
| 66 |
+
base = re.sub(r"\s+", " ", base).strip()
|
| 67 |
+
return base
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def _unique_question_items(items: list[dict], *, excluded_questions: list[str], limit: int) -> list[dict]:
|
| 71 |
+
excluded = {_question_fingerprint(q) for q in excluded_questions if q}
|
| 72 |
+
unique: list[dict] = []
|
| 73 |
+
for item in items or []:
|
| 74 |
+
text = (item.get("question") or "").strip()
|
| 75 |
+
if not text:
|
| 76 |
+
continue
|
| 77 |
+
key = _question_fingerprint(text)
|
| 78 |
+
if not key or key in excluded:
|
| 79 |
+
continue
|
| 80 |
+
excluded.add(key)
|
| 81 |
+
unique.append(
|
| 82 |
+
{
|
| 83 |
+
"question": text,
|
| 84 |
+
"difficulty": item.get("difficulty", "medium"),
|
| 85 |
+
"category": item.get("category", "general"),
|
| 86 |
+
}
|
| 87 |
+
)
|
| 88 |
+
if len(unique) >= limit:
|
| 89 |
+
break
|
| 90 |
+
return unique
|
| 91 |
+
|
| 92 |
+
|
| 93 |
def _update_local_summary(session_id: str, question: str, answer: str) -> None:
|
| 94 |
existing = _LOCAL_SUMMARIES.get(session_id, "")
|
| 95 |
combined = f"{existing}\nQ: {question}\nA: {answer}".strip()
|
|
|
|
| 129 |
pass
|
| 130 |
|
| 131 |
|
| 132 |
+
def _get_post_submit_lock(session_id: str) -> asyncio.Lock:
|
| 133 |
+
lock = _POST_SUBMIT_LOCKS.get(session_id)
|
| 134 |
+
if lock is None:
|
| 135 |
+
lock = asyncio.Lock()
|
| 136 |
+
_POST_SUBMIT_LOCKS[session_id] = lock
|
| 137 |
+
return lock
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
def _consume_post_submit_task_result(task: asyncio.Task) -> None:
|
| 141 |
+
try:
|
| 142 |
+
task.result()
|
| 143 |
+
except Exception:
|
| 144 |
+
# Background processing is best-effort; ignore task-level failures.
|
| 145 |
+
pass
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
def _current_generation_stats(session: dict) -> dict:
|
| 149 |
+
return {
|
| 150 |
+
"gemini_calls": _safe_int(session.get("metrics_gemini_calls", 0)),
|
| 151 |
+
"gemini_questions": _safe_int(session.get("metrics_gemini_questions", 0)),
|
| 152 |
+
"bank_questions": _safe_int(session.get("metrics_bank_questions", 0)),
|
| 153 |
+
"bank_shortfall": _safe_int(session.get("metrics_bank_shortfall", 0)),
|
| 154 |
+
"generation_batches": _safe_int(session.get("metrics_generation_batches", 0)),
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
|
| 158 |
def _normalize_bank_difficulty(value: str) -> str:
|
| 159 |
difficulty = (value or "medium").strip().lower()
|
| 160 |
if difficulty not in {"easy", "medium", "hard"}:
|
|
|
|
| 218 |
return "Software Developer"
|
| 219 |
|
| 220 |
|
| 221 |
+
async def _get_recent_user_questions(db, user_id: str, limit: int = 40) -> list[str]:
|
| 222 |
+
recent: list[str] = []
|
| 223 |
+
seen: set[str] = set()
|
| 224 |
+
|
| 225 |
+
cursor = db[ANSWERS].find({"user_id": user_id}, {"question": 1}).sort("stored_at", -1).limit(limit)
|
| 226 |
+
async for doc in cursor:
|
| 227 |
+
text = (doc.get("question") or "").strip()
|
| 228 |
+
key = _question_fingerprint(text)
|
| 229 |
+
if not text or not key or key in seen:
|
| 230 |
+
continue
|
| 231 |
+
seen.add(key)
|
| 232 |
+
recent.append(text)
|
| 233 |
+
|
| 234 |
+
return recent
|
| 235 |
+
|
| 236 |
+
|
| 237 |
+
def _build_resume_intro_question(role_title: str, jd_title: str) -> str:
|
| 238 |
+
title = (jd_title or "the selected job description").strip()
|
| 239 |
+
role = (role_title or "this role").strip()
|
| 240 |
+
return (
|
| 241 |
+
f"Introduce yourself and explain how your background aligns with {role} "
|
| 242 |
+
f"for {title}."
|
| 243 |
+
)
|
| 244 |
+
|
| 245 |
+
|
| 246 |
+
def _build_resume_resilient_followup_question(session: dict, question_number: int, variant: int = 0) -> str:
|
| 247 |
+
role_title = (session.get("role_title") or "this role").strip()
|
| 248 |
+
jd_skills = _safe_json_list(session.get("jd_required_skills", "[]"))
|
| 249 |
+
focus_skills = _safe_json_list(session.get("skills", "[]"))
|
| 250 |
+
skill_pool = jd_skills or focus_skills or ["core technical concepts"]
|
| 251 |
+
|
| 252 |
+
index = max(0, question_number - 1) + max(0, variant)
|
| 253 |
+
skill = skill_pool[index % len(skill_pool)]
|
| 254 |
+
|
| 255 |
+
templates = [
|
| 256 |
+
"Question {n}: Describe a real project where you applied {skill} for {role}. What constraints and trade-offs shaped your design?",
|
| 257 |
+
"Question {n}: If {skill} failed in production for a {role} workflow, how would you debug it step by step?",
|
| 258 |
+
"Question {n}: Explain how you would test and validate a solution using {skill} before shipping it for {role}.",
|
| 259 |
+
"Question {n}: Compare two approaches for {skill} in a {role} context and justify the final choice.",
|
| 260 |
+
"Question {n}: Design an improvement plan to make your {skill} implementation more scalable and reliable for {role}.",
|
| 261 |
+
]
|
| 262 |
+
template = templates[index % len(templates)]
|
| 263 |
+
return template.format(n=question_number, skill=skill, role=role_title)
|
| 264 |
+
|
| 265 |
+
|
| 266 |
+
async def _enqueue_resume_followup_with_fallback(
|
| 267 |
+
*,
|
| 268 |
+
redis,
|
| 269 |
+
session_id: str,
|
| 270 |
+
session: dict,
|
| 271 |
+
answered_count: int,
|
| 272 |
+
suggested_text: str,
|
| 273 |
+
suggested_difficulty: str,
|
| 274 |
+
suggested_category: str,
|
| 275 |
+
) -> tuple[str | None, bool]:
|
| 276 |
+
candidates: list[tuple[str, str, str, bool]] = []
|
| 277 |
+
|
| 278 |
+
primary = (suggested_text or "").strip()
|
| 279 |
+
if primary:
|
| 280 |
+
candidates.append((primary, suggested_difficulty or "medium", suggested_category or "follow-up", True))
|
| 281 |
+
|
| 282 |
+
# Deterministic local fallback prevents early completion when model output is empty/duplicate.
|
| 283 |
+
base_question_number = max(2, answered_count + 1)
|
| 284 |
+
for variant in range(6):
|
| 285 |
+
question_number = base_question_number + variant
|
| 286 |
+
fallback_text = _build_resume_resilient_followup_question(
|
| 287 |
+
session=session,
|
| 288 |
+
question_number=question_number,
|
| 289 |
+
variant=variant,
|
| 290 |
+
)
|
| 291 |
+
candidates.append((fallback_text, "medium", "resume-fallback", False))
|
| 292 |
+
|
| 293 |
+
seen: set[str] = set()
|
| 294 |
+
for text, difficulty, category, is_primary in candidates:
|
| 295 |
+
key = _question_fingerprint(text)
|
| 296 |
+
if not key or key in seen:
|
| 297 |
+
continue
|
| 298 |
+
seen.add(key)
|
| 299 |
+
|
| 300 |
+
qid = await enqueue_question(
|
| 301 |
+
redis=redis,
|
| 302 |
+
session_id=session_id,
|
| 303 |
+
question=text,
|
| 304 |
+
difficulty=difficulty,
|
| 305 |
+
category=category,
|
| 306 |
+
ttl_seconds=SESSION_TTL,
|
| 307 |
+
max_queue_size=MAX_QUEUE_SIZE,
|
| 308 |
+
)
|
| 309 |
+
if qid:
|
| 310 |
+
return qid, is_primary
|
| 311 |
+
|
| 312 |
+
return None, False
|
| 313 |
+
|
| 314 |
+
|
| 315 |
+
async def _get_session_question_texts(redis, session_id: str) -> list[str]:
|
| 316 |
+
question_ids = await redis.lrange(f"session:{session_id}:questions", 0, -1)
|
| 317 |
+
output: list[str] = []
|
| 318 |
+
for qid in question_ids:
|
| 319 |
+
q = await redis.hgetall(f"session:{session_id}:q:{qid}")
|
| 320 |
+
text = (q.get("question") or "").strip()
|
| 321 |
+
if text:
|
| 322 |
+
output.append(text)
|
| 323 |
+
return output
|
| 324 |
+
|
| 325 |
+
|
| 326 |
+
async def _sample_topic_questions(
|
| 327 |
+
db,
|
| 328 |
+
topic_id: str,
|
| 329 |
+
excluded_questions: list[str],
|
| 330 |
+
limit: int,
|
| 331 |
+
) -> list[dict]:
|
| 332 |
+
if limit <= 0:
|
| 333 |
+
return []
|
| 334 |
+
|
| 335 |
+
docs = await db[TOPIC_QUESTIONS].find({"topic_id": topic_id}).to_list(length=500)
|
| 336 |
+
random.shuffle(docs)
|
| 337 |
+
excluded = {_question_fingerprint(q) for q in excluded_questions if q}
|
| 338 |
+
|
| 339 |
+
selected: list[dict] = []
|
| 340 |
+
for doc in docs:
|
| 341 |
+
text = (doc.get("question") or "").strip()
|
| 342 |
+
if not text:
|
| 343 |
+
continue
|
| 344 |
+
fp = _question_fingerprint(text)
|
| 345 |
+
if not fp or fp in excluded:
|
| 346 |
+
continue
|
| 347 |
+
|
| 348 |
+
excluded.add(fp)
|
| 349 |
+
selected.append(
|
| 350 |
+
{
|
| 351 |
+
"question": text,
|
| 352 |
+
"difficulty": _normalize_bank_difficulty(doc.get("difficulty") or "medium"),
|
| 353 |
+
"category": doc.get("category") or "topic",
|
| 354 |
+
}
|
| 355 |
+
)
|
| 356 |
+
if len(selected) >= limit:
|
| 357 |
+
break
|
| 358 |
+
|
| 359 |
+
return selected
|
| 360 |
+
|
| 361 |
+
|
| 362 |
+
async def _seed_resume_questions_task(session_id: str) -> None:
|
| 363 |
+
db = get_db()
|
| 364 |
+
redis = get_redis()
|
| 365 |
+
|
| 366 |
+
session = await redis.hgetall(f"session:{session_id}")
|
| 367 |
+
if not session or session.get("status") != "in_progress" or session.get("interview_type") != "resume":
|
| 368 |
+
return
|
| 369 |
+
|
| 370 |
+
try:
|
| 371 |
+
await flush_backlog_to_queue(
|
| 372 |
+
redis=redis,
|
| 373 |
+
session_id=session_id,
|
| 374 |
+
ttl_seconds=SESSION_TTL,
|
| 375 |
+
max_queue_size=MAX_QUEUE_SIZE,
|
| 376 |
+
)
|
| 377 |
+
|
| 378 |
+
current_q_size = await queue_size(redis, session_id)
|
| 379 |
+
needed = max(0, RESUME_INITIAL_BATCH_SIZE - current_q_size)
|
| 380 |
+
|
| 381 |
+
if needed > 0:
|
| 382 |
+
excluded_questions = await _get_session_question_texts(redis, session_id)
|
| 383 |
+
seed_items = await generate_resume_seed_questions(
|
| 384 |
+
role_title=session.get("role_title", "Software Developer"),
|
| 385 |
+
resume_summary=session.get("resume_summary", "No summary available"),
|
| 386 |
+
resume_skills=_safe_json_list(session.get("skills", "[]")),
|
| 387 |
+
jd_title=session.get("job_description_title", ""),
|
| 388 |
+
jd_description=session.get("job_description_text", ""),
|
| 389 |
+
jd_required_skills=_safe_json_list(session.get("jd_required_skills", "[]")),
|
| 390 |
+
excluded_questions=excluded_questions,
|
| 391 |
+
count=needed,
|
| 392 |
+
)
|
| 393 |
+
|
| 394 |
+
appended = 0
|
| 395 |
+
for item in seed_items:
|
| 396 |
+
qid = await enqueue_question(
|
| 397 |
+
redis=redis,
|
| 398 |
+
session_id=session_id,
|
| 399 |
+
question=item.get("question", ""),
|
| 400 |
+
difficulty=item.get("difficulty", "medium"),
|
| 401 |
+
category=item.get("category", "resume-seed"),
|
| 402 |
+
ttl_seconds=SESSION_TTL,
|
| 403 |
+
max_queue_size=MAX_QUEUE_SIZE,
|
| 404 |
+
)
|
| 405 |
+
if qid:
|
| 406 |
+
appended += 1
|
| 407 |
+
|
| 408 |
+
await redis.hset(
|
| 409 |
+
f"session:{session_id}",
|
| 410 |
+
mapping={
|
| 411 |
+
"generated_count": str(_safe_int(session.get("generated_count", 0)) + appended),
|
| 412 |
+
"metrics_gemini_calls": str(_safe_int(session.get("metrics_gemini_calls", 0)) + 1),
|
| 413 |
+
"metrics_gemini_questions": str(_safe_int(session.get("metrics_gemini_questions", 0)) + appended),
|
| 414 |
+
"metrics_generation_batches": str(_safe_int(session.get("metrics_generation_batches", 0)) + 1),
|
| 415 |
+
},
|
| 416 |
+
)
|
| 417 |
+
|
| 418 |
+
await db[SESSIONS].update_one(
|
| 419 |
+
{"session_id": session_id},
|
| 420 |
+
{
|
| 421 |
+
"$set": {
|
| 422 |
+
"metrics_gemini_calls": _safe_int(session.get("metrics_gemini_calls", 0)) + 1,
|
| 423 |
+
"metrics_gemini_questions": _safe_int(session.get("metrics_gemini_questions", 0)) + appended,
|
| 424 |
+
"metrics_generation_batches": _safe_int(session.get("metrics_generation_batches", 0)) + 1,
|
| 425 |
+
}
|
| 426 |
+
},
|
| 427 |
+
)
|
| 428 |
+
|
| 429 |
+
await flush_backlog_to_queue(
|
| 430 |
+
redis=redis,
|
| 431 |
+
session_id=session_id,
|
| 432 |
+
ttl_seconds=SESSION_TTL,
|
| 433 |
+
max_queue_size=MAX_QUEUE_SIZE,
|
| 434 |
+
)
|
| 435 |
+
|
| 436 |
+
next_qid, next_q = await peek_next_question(redis, session_id)
|
| 437 |
+
if next_qid and next_q:
|
| 438 |
+
_schedule_question_audio_prefetch(
|
| 439 |
+
[next_q.get("question", "")],
|
| 440 |
+
_normalize_voice_gender(session.get("speech_voice_gender")),
|
| 441 |
+
)
|
| 442 |
+
except Exception:
|
| 443 |
+
# Non-blocking pre-seed path should never fail interview startup.
|
| 444 |
+
return
|
| 445 |
+
|
| 446 |
+
|
| 447 |
+
def _normalize_role_key(role_title: str) -> str:
|
| 448 |
+
normalized = re.sub(r"\s+", " ", (role_title or "").strip().lower())
|
| 449 |
+
return normalized or "software developer"
|
| 450 |
+
|
| 451 |
+
|
| 452 |
+
def _build_verification_cache_key(
|
| 453 |
+
role_key: str,
|
| 454 |
+
jd_id: str,
|
| 455 |
+
jd_updated_at: str,
|
| 456 |
+
resume_uploaded_at: str,
|
| 457 |
+
) -> str:
|
| 458 |
+
return "||".join([
|
| 459 |
+
role_key or "software developer",
|
| 460 |
+
jd_id or "-",
|
| 461 |
+
jd_updated_at or "-",
|
| 462 |
+
resume_uploaded_at or "-",
|
| 463 |
+
])
|
| 464 |
+
|
| 465 |
+
|
| 466 |
+
def _verification_doc_to_response(doc: dict, *, message: str, cached: bool) -> dict:
|
| 467 |
+
return {
|
| 468 |
+
"verification_id": doc.get("verification_id"),
|
| 469 |
+
"saved_at": doc.get("saved_at") or doc.get("created_at") or utc_now(),
|
| 470 |
+
"role_title": doc.get("role_title"),
|
| 471 |
+
"job_description": doc.get("job_description") or {},
|
| 472 |
+
"resume_snapshot": doc.get("resume_snapshot") or {},
|
| 473 |
+
"jd_alignment": doc.get("jd_alignment") or {},
|
| 474 |
+
"message": message,
|
| 475 |
+
"cached": cached,
|
| 476 |
+
}
|
| 477 |
+
|
| 478 |
+
|
| 479 |
async def verify_resume_job_description(
|
| 480 |
user_id: str,
|
| 481 |
role_id: str = None,
|
| 482 |
custom_role: str = None,
|
| 483 |
job_description_id: str = None,
|
| 484 |
) -> dict:
|
| 485 |
+
"""Run resume-vs-job-description verification without starting an interview.
|
| 486 |
+
|
| 487 |
+
Reuses a saved verification while the selected role, JD version, and resume
|
| 488 |
+
upload timestamp are unchanged.
|
| 489 |
+
"""
|
| 490 |
if not job_description_id:
|
| 491 |
raise ValueError("job_description_id is required for verification")
|
| 492 |
|
|
|
|
| 507 |
resume_summary = "\n".join([part for part in summary_parts if part]).strip() or "No summary available"
|
| 508 |
|
| 509 |
role_title = await _resolve_role_title(db, role_id=role_id, custom_role=custom_role)
|
| 510 |
+
role_key = _normalize_role_key(role_title)
|
| 511 |
selected_jd = await get_job_description_for_user(user_id, job_description_id)
|
| 512 |
|
| 513 |
+
resume_uploaded_at = resume_doc.get("uploaded_at") or ""
|
| 514 |
+
jd_updated_at = selected_jd.get("updated_at") or ""
|
| 515 |
+
cache_key = _build_verification_cache_key(
|
| 516 |
+
role_key=role_key,
|
| 517 |
+
jd_id=selected_jd.get("id") or job_description_id,
|
| 518 |
+
jd_updated_at=jd_updated_at,
|
| 519 |
+
resume_uploaded_at=resume_uploaded_at,
|
| 520 |
+
)
|
| 521 |
+
|
| 522 |
+
existing_verification = await db[JD_VERIFICATIONS].find_one(
|
| 523 |
+
{"user_id": user_id, "cache_key": cache_key},
|
| 524 |
+
sort=[("created_at", -1)],
|
| 525 |
+
)
|
| 526 |
+
|
| 527 |
+
if not existing_verification:
|
| 528 |
+
compatibility_query = {
|
| 529 |
+
"user_id": user_id,
|
| 530 |
+
"role_title": role_title,
|
| 531 |
+
"job_description.id": selected_jd.get("id"),
|
| 532 |
+
"resume_snapshot.uploaded_at": resume_uploaded_at,
|
| 533 |
+
}
|
| 534 |
+
if jd_updated_at:
|
| 535 |
+
compatibility_query["job_description.updated_at"] = jd_updated_at
|
| 536 |
+
|
| 537 |
+
existing_verification = await db[JD_VERIFICATIONS].find_one(
|
| 538 |
+
compatibility_query,
|
| 539 |
+
sort=[("created_at", -1)],
|
| 540 |
+
)
|
| 541 |
+
|
| 542 |
+
if existing_verification:
|
| 543 |
+
await db[JD_VERIFICATIONS].update_one(
|
| 544 |
+
{"_id": existing_verification["_id"]},
|
| 545 |
+
{
|
| 546 |
+
"$set": {
|
| 547 |
+
"cache_key": cache_key,
|
| 548 |
+
"role_key": role_key,
|
| 549 |
+
"saved_at": existing_verification.get("saved_at")
|
| 550 |
+
or existing_verification.get("created_at")
|
| 551 |
+
or utc_now(),
|
| 552 |
+
}
|
| 553 |
+
},
|
| 554 |
+
)
|
| 555 |
+
|
| 556 |
+
if existing_verification:
|
| 557 |
+
return _verification_doc_to_response(
|
| 558 |
+
existing_verification,
|
| 559 |
+
message="Loaded saved verification",
|
| 560 |
+
cached=True,
|
| 561 |
+
)
|
| 562 |
+
|
| 563 |
jd_alignment = await analyze_resume_vs_job_description(
|
| 564 |
role_title=role_title,
|
| 565 |
resume_skills=resume_skills if resume_skills else ["general"],
|
|
|
|
| 571 |
|
| 572 |
resume_snapshot = {
|
| 573 |
"filename": resume_doc.get("original_filename") or resume_doc.get("filename") or "",
|
| 574 |
+
"uploaded_at": resume_uploaded_at,
|
| 575 |
"skills": resume_skills,
|
| 576 |
"parsed_data": {
|
| 577 |
"name": parsed_data.get("name"),
|
|
|
|
| 585 |
|
| 586 |
verification_id = generate_id()
|
| 587 |
saved_at = utc_now()
|
| 588 |
+
verification_doc = {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 589 |
"verification_id": verification_id,
|
| 590 |
+
"user_id": user_id,
|
| 591 |
+
"role_id": role_id,
|
| 592 |
+
"custom_role": custom_role,
|
| 593 |
"role_title": role_title,
|
| 594 |
+
"role_key": role_key,
|
| 595 |
+
"cache_key": cache_key,
|
| 596 |
+
"job_description": {
|
| 597 |
+
"id": selected_jd.get("id"),
|
| 598 |
+
"title": selected_jd.get("title"),
|
| 599 |
+
"company": selected_jd.get("company"),
|
| 600 |
+
"description": selected_jd.get("description"),
|
| 601 |
+
"required_skills": selected_jd.get("required_skills", []) or [],
|
| 602 |
+
"updated_at": jd_updated_at,
|
| 603 |
+
},
|
| 604 |
"resume_snapshot": resume_snapshot,
|
| 605 |
"jd_alignment": jd_alignment,
|
| 606 |
+
"saved_at": saved_at,
|
| 607 |
+
"created_at": saved_at,
|
| 608 |
}
|
| 609 |
|
| 610 |
+
await db[JD_VERIFICATIONS].insert_one(verification_doc)
|
| 611 |
+
|
| 612 |
+
return _verification_doc_to_response(
|
| 613 |
+
verification_doc,
|
| 614 |
+
message="Verification complete",
|
| 615 |
+
cached=False,
|
| 616 |
+
)
|
| 617 |
+
|
| 618 |
|
| 619 |
async def _get_generated_question_texts(redis, session_id: str) -> list[str]:
|
| 620 |
qids = await redis.lrange(f"session:{session_id}:questions", 0, -1)
|
|
|
|
| 999 |
|
| 1000 |
|
| 1001 |
async def _start_topic_interview(user_id: str, topic_id: str) -> dict:
|
| 1002 |
+
"""Start topic interview with low-cost DB-first flow and staged AI follow-ups."""
|
| 1003 |
db = get_db()
|
| 1004 |
redis = get_redis()
|
| 1005 |
|
|
|
|
| 1009 |
if not topic.get("is_published", False):
|
| 1010 |
raise ValueError("This topic interview is not published yet")
|
| 1011 |
|
| 1012 |
+
initial_items = await _sample_topic_questions(
|
| 1013 |
+
db=db,
|
| 1014 |
+
topic_id=topic_id,
|
| 1015 |
+
excluded_questions=[],
|
| 1016 |
+
limit=TOPIC_INITIAL_DB_QUESTIONS,
|
| 1017 |
+
)
|
| 1018 |
+
if len(initial_items) < TOPIC_INITIAL_ASK_COUNT:
|
| 1019 |
+
raise ValueError("Not enough topic questions to start interview")
|
| 1020 |
+
|
| 1021 |
+
first_question = initial_items[0]
|
| 1022 |
+
queued_initial = initial_items[1:TOPIC_INITIAL_ASK_COUNT]
|
| 1023 |
|
| 1024 |
timer_enabled = bool(topic.get("timer_enabled", False))
|
| 1025 |
timer_seconds = topic.get("timer_seconds") if timer_enabled else None
|
| 1026 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1027 |
session_id = generate_id()
|
| 1028 |
_LOCAL_SUMMARIES[session_id] = ""
|
| 1029 |
|
|
|
|
| 1034 |
user_doc = await db[USERS].find_one({"user_id": user_id}, {"speech_settings": 1})
|
| 1035 |
speech_voice_gender = _normalize_voice_gender(((user_doc or {}).get("speech_settings") or {}).get("voice_gender"))
|
| 1036 |
|
| 1037 |
+
first_id = generate_id()
|
| 1038 |
+
await redis.hset(
|
| 1039 |
+
f"session:{session_id}:q:{first_id}",
|
| 1040 |
+
mapping={
|
| 1041 |
+
"question_id": first_id,
|
| 1042 |
+
"question": first_question.get("question", "Can you explain this topic?"),
|
| 1043 |
+
"difficulty": first_question.get("difficulty", "medium"),
|
| 1044 |
+
"category": first_question.get("category", topic.get("name", "topic")),
|
| 1045 |
+
},
|
| 1046 |
+
)
|
| 1047 |
+
await redis.expire(f"session:{session_id}:q:{first_id}", SESSION_TTL)
|
| 1048 |
+
await redis.rpush(f"session:{session_id}:questions", first_id)
|
| 1049 |
+
await redis.expire(f"session:{session_id}:questions", SESSION_TTL)
|
| 1050 |
+
|
| 1051 |
+
await mark_question_asked(
|
| 1052 |
+
redis=redis,
|
| 1053 |
+
session_id=session_id,
|
| 1054 |
+
question_text=first_question.get("question", ""),
|
| 1055 |
+
ttl_seconds=SESSION_TTL,
|
| 1056 |
+
)
|
| 1057 |
+
|
| 1058 |
+
queued_count = 0
|
| 1059 |
+
for item in queued_initial:
|
| 1060 |
+
qid = await enqueue_question(
|
| 1061 |
+
redis=redis,
|
| 1062 |
+
session_id=session_id,
|
| 1063 |
+
question=item.get("question", ""),
|
| 1064 |
+
difficulty=item.get("difficulty", "medium"),
|
| 1065 |
+
category=item.get("category", topic.get("name", "topic")),
|
| 1066 |
+
ttl_seconds=SESSION_TTL,
|
| 1067 |
+
max_queue_size=MAX_QUEUE_SIZE,
|
| 1068 |
+
)
|
| 1069 |
+
if qid:
|
| 1070 |
+
queued_count += 1
|
| 1071 |
+
|
| 1072 |
+
await flush_backlog_to_queue(
|
| 1073 |
+
redis=redis,
|
| 1074 |
+
session_id=session_id,
|
| 1075 |
+
ttl_seconds=SESSION_TTL,
|
| 1076 |
+
max_queue_size=MAX_QUEUE_SIZE,
|
| 1077 |
+
)
|
| 1078 |
+
|
| 1079 |
session_doc = {
|
| 1080 |
"session_id": session_id,
|
| 1081 |
"user_id": user_id,
|
|
|
|
| 1085 |
"interview_type": "topic",
|
| 1086 |
"status": "in_progress",
|
| 1087 |
"question_count": 1,
|
| 1088 |
+
"max_questions": TOPIC_TOTAL_QUESTIONS,
|
| 1089 |
+
"current_difficulty": first_question.get("difficulty", "medium"),
|
| 1090 |
"metrics_gemini_calls": 0,
|
| 1091 |
"metrics_gemini_questions": 0,
|
| 1092 |
+
"metrics_bank_questions": queued_count + 1,
|
| 1093 |
+
"metrics_bank_shortfall": max(0, TOPIC_INITIAL_ASK_COUNT - (queued_count + 1)),
|
| 1094 |
+
"metrics_generation_batches": 1,
|
| 1095 |
"speech_voice_gender": speech_voice_gender,
|
| 1096 |
"timer_enabled": timer_enabled,
|
| 1097 |
"timer_seconds": timer_seconds,
|
| 1098 |
+
"topic_followups_generated": False,
|
| 1099 |
"started_at": utc_now(),
|
| 1100 |
}
|
| 1101 |
await db[SESSIONS].insert_one(session_doc)
|
|
|
|
| 1113 |
"question_count": 1,
|
| 1114 |
"answered_count": 0,
|
| 1115 |
"served_count": 1,
|
| 1116 |
+
"generated_count": queued_count + 1,
|
| 1117 |
+
"max_questions": TOPIC_TOTAL_QUESTIONS,
|
| 1118 |
+
"current_difficulty": first_question.get("difficulty", "medium"),
|
| 1119 |
"timer_enabled": str(timer_enabled),
|
| 1120 |
"timer_seconds": str(timer_seconds or ""),
|
| 1121 |
"status": "in_progress",
|
| 1122 |
"speech_voice_gender": speech_voice_gender,
|
| 1123 |
"metrics_gemini_calls": 0,
|
| 1124 |
"metrics_gemini_questions": 0,
|
| 1125 |
+
"metrics_bank_questions": queued_count + 1,
|
| 1126 |
+
"metrics_bank_shortfall": max(0, TOPIC_INITIAL_ASK_COUNT - (queued_count + 1)),
|
| 1127 |
+
"metrics_generation_batches": 1,
|
| 1128 |
+
"topic_followups_generated": "0",
|
| 1129 |
}
|
| 1130 |
await redis.hset(f"session:{session_id}", mapping=session_state)
|
| 1131 |
await redis.expire(f"session:{session_id}", SESSION_TTL)
|
| 1132 |
|
| 1133 |
+
next_qid, next_q = await peek_next_question(redis, session_id)
|
| 1134 |
+
prefetch_targets = [next_q.get("question", "")] if next_qid and next_q else []
|
| 1135 |
+
_schedule_question_audio_prefetch(prefetch_targets, speech_voice_gender)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1136 |
|
| 1137 |
return {
|
| 1138 |
"session_id": session_id,
|
|
|
|
| 1151 |
},
|
| 1152 |
"question": {
|
| 1153 |
"question_id": first_id,
|
| 1154 |
+
"question": first_question.get("question", "Can you explain this topic?"),
|
| 1155 |
+
"difficulty": first_question.get("difficulty", "medium"),
|
| 1156 |
"question_number": 1,
|
| 1157 |
+
"total_questions": TOPIC_TOTAL_QUESTIONS,
|
| 1158 |
},
|
| 1159 |
"timer": {
|
| 1160 |
"enabled": timer_enabled,
|
|
|
|
| 1253 |
topic_id: str = None,
|
| 1254 |
job_description_id: str = None,
|
| 1255 |
) -> dict:
|
| 1256 |
+
"""Start a new interview session with low-cost queue-first orchestration."""
|
| 1257 |
interview_type = (interview_type or "resume").strip().lower()
|
| 1258 |
if interview_type == "topic":
|
| 1259 |
if not topic_id:
|
|
|
|
| 1270 |
user_doc = await db[USERS].find_one({"user_id": user_id}, {"speech_settings": 1})
|
| 1271 |
speech_voice_gender = _normalize_voice_gender(((user_doc or {}).get("speech_settings") or {}).get("voice_gender"))
|
| 1272 |
|
|
|
|
| 1273 |
skills_doc = await db[SKILLS].find_one({"user_id": user_id})
|
| 1274 |
+
user_skills = normalize_skill_list(skills_doc.get("skills", [])) if skills_doc else []
|
| 1275 |
+
|
| 1276 |
+
resume_doc = await db[RESUMES].find_one({"user_id": user_id})
|
| 1277 |
+
if not resume_doc:
|
| 1278 |
+
raise ValueError("Please upload your resume before starting a resume interview")
|
| 1279 |
+
|
| 1280 |
+
parsed_resume = (resume_doc or {}).get("parsed_data", {}) or {}
|
| 1281 |
+
resume_summary_parts = [
|
| 1282 |
+
parsed_resume.get("experience_summary") or "",
|
| 1283 |
+
" ".join(parsed_resume.get("recommended_roles", []) or []),
|
| 1284 |
+
]
|
| 1285 |
+
resume_summary = "\n".join([part for part in resume_summary_parts if part]).strip() or "No summary available"
|
| 1286 |
|
| 1287 |
if not job_description_id:
|
| 1288 |
raise ValueError("Please select a Job Description before starting Resume Interview")
|
| 1289 |
|
|
|
|
| 1290 |
role_title = await _resolve_role_title(db, role_id=role_id, custom_role=custom_role)
|
| 1291 |
+
selected_jd = await get_job_description_for_user(user_id, job_description_id)
|
|
|
|
|
|
|
|
|
|
| 1292 |
|
| 1293 |
jd_required_skills = normalize_skill_list((selected_jd or {}).get("required_skills", []))
|
| 1294 |
if not jd_required_skills:
|
| 1295 |
+
raise ValueError("Selected Job Description has no required skills. Add required skills first.")
|
|
|
|
|
|
|
| 1296 |
|
| 1297 |
user_skill_set = {s.lower() for s in user_skills}
|
| 1298 |
matched_role_skills = [s for s in jd_required_skills if s.lower() in user_skill_set]
|
| 1299 |
missing_role_skills = [s for s in jd_required_skills if s.lower() not in user_skill_set]
|
|
|
|
|
|
|
|
|
|
| 1300 |
base_skills_for_interview = matched_role_skills + [s for s in missing_role_skills if s not in matched_role_skills]
|
| 1301 |
+
skills_for_interview = build_interview_focus_skills(base_skills_for_interview) or list(jd_required_skills)
|
|
|
|
|
|
|
| 1302 |
|
| 1303 |
+
intro_question = _build_resume_intro_question(role_title=role_title, jd_title=selected_jd.get("title", ""))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1304 |
|
| 1305 |
session_id = generate_id()
|
| 1306 |
_LOCAL_SUMMARIES[session_id] = ""
|
| 1307 |
|
| 1308 |
+
first_id = generate_id()
|
| 1309 |
+
await redis.hset(
|
| 1310 |
+
f"session:{session_id}:q:{first_id}",
|
| 1311 |
+
mapping={
|
| 1312 |
+
"question_id": first_id,
|
| 1313 |
+
"question": intro_question,
|
| 1314 |
+
"difficulty": "easy",
|
| 1315 |
+
"category": "intro",
|
| 1316 |
+
},
|
| 1317 |
+
)
|
| 1318 |
+
await redis.expire(f"session:{session_id}:q:{first_id}", SESSION_TTL)
|
| 1319 |
+
await redis.rpush(f"session:{session_id}:questions", first_id)
|
| 1320 |
+
await redis.expire(f"session:{session_id}:questions", SESSION_TTL)
|
| 1321 |
+
|
| 1322 |
+
await mark_question_asked(
|
| 1323 |
+
redis=redis,
|
| 1324 |
+
session_id=session_id,
|
| 1325 |
+
question_text=intro_question,
|
| 1326 |
+
ttl_seconds=SESSION_TTL,
|
| 1327 |
+
)
|
| 1328 |
+
|
| 1329 |
session_doc = {
|
| 1330 |
"session_id": session_id,
|
| 1331 |
"user_id": user_id,
|
| 1332 |
"role_id": role_id,
|
| 1333 |
"role_title": role_title,
|
| 1334 |
+
"job_description_id": selected_jd.get("id"),
|
| 1335 |
+
"job_description_title": selected_jd.get("title"),
|
| 1336 |
"status": "in_progress",
|
| 1337 |
"interview_type": "resume",
|
| 1338 |
"question_count": 1,
|
| 1339 |
"max_questions": RESUME_MAX_QUESTIONS,
|
| 1340 |
+
"current_difficulty": "easy",
|
| 1341 |
+
"metrics_gemini_calls": 0,
|
| 1342 |
+
"metrics_gemini_questions": 0,
|
| 1343 |
+
"metrics_bank_questions": 1,
|
| 1344 |
+
"metrics_bank_shortfall": 0,
|
| 1345 |
+
"metrics_generation_batches": 0,
|
| 1346 |
"speech_voice_gender": speech_voice_gender,
|
| 1347 |
"started_at": utc_now(),
|
| 1348 |
+
"interview_generation_mode": "queue_followup",
|
| 1349 |
}
|
| 1350 |
await db[SESSIONS].insert_one(session_doc)
|
| 1351 |
|
|
|
|
| 1352 |
session_state = {
|
| 1353 |
"user_id": user_id,
|
| 1354 |
"role_id": role_id or "",
|
| 1355 |
"role_title": role_title,
|
| 1356 |
"skills": json.dumps(skills_for_interview),
|
| 1357 |
"user_skills": json.dumps(user_skills),
|
| 1358 |
+
"required_skills": json.dumps(jd_required_skills),
|
| 1359 |
"matched_skills": json.dumps(matched_role_skills),
|
| 1360 |
"missing_skills": json.dumps(missing_role_skills),
|
| 1361 |
"question_count": 1,
|
| 1362 |
"answered_count": 0,
|
| 1363 |
"served_count": 1,
|
| 1364 |
+
"generated_count": 1,
|
| 1365 |
"max_questions": RESUME_MAX_QUESTIONS,
|
| 1366 |
+
"current_difficulty": "easy",
|
| 1367 |
"interview_type": "resume",
|
| 1368 |
"status": "in_progress",
|
| 1369 |
"speech_voice_gender": speech_voice_gender,
|
| 1370 |
+
"jd_required_skills": json.dumps(jd_required_skills),
|
| 1371 |
+
"job_description_title": selected_jd.get("title", ""),
|
| 1372 |
+
"job_description_text": selected_jd.get("description", ""),
|
| 1373 |
+
"resume_summary": resume_summary,
|
| 1374 |
+
"metrics_gemini_calls": 0,
|
| 1375 |
+
"metrics_gemini_questions": 0,
|
| 1376 |
+
"metrics_bank_questions": 1,
|
| 1377 |
+
"metrics_bank_shortfall": 0,
|
| 1378 |
+
"metrics_generation_batches": 0,
|
| 1379 |
+
"interview_generation_mode": "queue_followup",
|
| 1380 |
}
|
| 1381 |
await redis.hset(f"session:{session_id}", mapping=session_state)
|
| 1382 |
await redis.expire(f"session:{session_id}", SESSION_TTL)
|
| 1383 |
|
| 1384 |
+
# Preload initial queue in background (2 questions) without blocking first question delivery.
|
| 1385 |
+
asyncio.create_task(_seed_resume_questions_task(session_id))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1386 |
|
| 1387 |
return {
|
| 1388 |
"session_id": session_id,
|
| 1389 |
"skill_alignment": {
|
| 1390 |
"user_skills": user_skills,
|
| 1391 |
+
"required_skills": jd_required_skills,
|
| 1392 |
"matched_skills": matched_role_skills,
|
| 1393 |
"missing_skills": missing_role_skills,
|
| 1394 |
"interview_focus": skills_for_interview,
|
| 1395 |
},
|
| 1396 |
"question": {
|
| 1397 |
"question_id": first_id,
|
| 1398 |
+
"question": intro_question,
|
| 1399 |
+
"difficulty": "easy",
|
| 1400 |
"question_number": 1,
|
| 1401 |
"total_questions": RESUME_MAX_QUESTIONS,
|
| 1402 |
},
|
|
|
|
| 1410 |
}
|
| 1411 |
|
| 1412 |
|
| 1413 |
+
async def _record_submit_latency(started_at: float) -> float:
|
| 1414 |
+
elapsed_ms = (perf_counter() - started_at) * 1000.0
|
| 1415 |
+
await record_latency("submit_ms", elapsed_ms)
|
| 1416 |
+
return round(elapsed_ms, 2)
|
| 1417 |
+
|
| 1418 |
+
|
| 1419 |
+
async def _apply_generation_metric_delta(
|
| 1420 |
+
*,
|
| 1421 |
+
db,
|
| 1422 |
+
redis,
|
| 1423 |
+
session_id: str,
|
| 1424 |
+
session: dict,
|
| 1425 |
+
metrics_delta: dict,
|
| 1426 |
+
generated_count: int | None = None,
|
| 1427 |
+
extra_redis_fields: dict | None = None,
|
| 1428 |
+
extra_db_fields: dict | None = None,
|
| 1429 |
+
) -> dict:
|
| 1430 |
+
base_stats = _current_generation_stats(session)
|
| 1431 |
+
effective_stats = {
|
| 1432 |
+
"gemini_calls": base_stats["gemini_calls"] + _safe_int(metrics_delta.get("gemini_calls", 0)),
|
| 1433 |
+
"gemini_questions": base_stats["gemini_questions"] + _safe_int(metrics_delta.get("gemini_questions", 0)),
|
| 1434 |
+
"bank_questions": base_stats["bank_questions"] + _safe_int(metrics_delta.get("bank_questions", 0)),
|
| 1435 |
+
"bank_shortfall": base_stats["bank_shortfall"] + _safe_int(metrics_delta.get("bank_shortfall", 0)),
|
| 1436 |
+
"generation_batches": base_stats["generation_batches"] + _safe_int(metrics_delta.get("generation_batches", 0)),
|
| 1437 |
+
}
|
| 1438 |
+
|
| 1439 |
+
redis_mapping = {
|
| 1440 |
+
"metrics_gemini_calls": str(effective_stats["gemini_calls"]),
|
| 1441 |
+
"metrics_gemini_questions": str(effective_stats["gemini_questions"]),
|
| 1442 |
+
"metrics_bank_questions": str(effective_stats["bank_questions"]),
|
| 1443 |
+
"metrics_bank_shortfall": str(effective_stats["bank_shortfall"]),
|
| 1444 |
+
"metrics_generation_batches": str(effective_stats["generation_batches"]),
|
| 1445 |
+
}
|
| 1446 |
+
if generated_count is not None:
|
| 1447 |
+
redis_mapping["generated_count"] = str(generated_count)
|
| 1448 |
+
if extra_redis_fields:
|
| 1449 |
+
redis_mapping.update(extra_redis_fields)
|
| 1450 |
+
|
| 1451 |
+
await redis.hset(f"session:{session_id}", mapping=redis_mapping)
|
| 1452 |
+
|
| 1453 |
+
db_set = {
|
| 1454 |
+
"metrics_gemini_calls": effective_stats["gemini_calls"],
|
| 1455 |
+
"metrics_gemini_questions": effective_stats["gemini_questions"],
|
| 1456 |
+
"metrics_bank_questions": effective_stats["bank_questions"],
|
| 1457 |
+
"metrics_bank_shortfall": effective_stats["bank_shortfall"],
|
| 1458 |
+
"metrics_generation_batches": effective_stats["generation_batches"],
|
| 1459 |
+
}
|
| 1460 |
+
if generated_count is not None:
|
| 1461 |
+
db_set["generated_count"] = generated_count
|
| 1462 |
+
if extra_db_fields:
|
| 1463 |
+
db_set.update(extra_db_fields)
|
| 1464 |
+
|
| 1465 |
+
await db[SESSIONS].update_one({"session_id": session_id}, {"$set": db_set})
|
| 1466 |
+
return effective_stats
|
| 1467 |
+
|
| 1468 |
+
|
| 1469 |
+
async def _post_submit_resume_processing(
|
| 1470 |
+
session_id: str,
|
| 1471 |
+
question_id: str,
|
| 1472 |
+
question_text: str,
|
| 1473 |
+
answer: str,
|
| 1474 |
+
answered_count: int,
|
| 1475 |
+
max_questions: int,
|
| 1476 |
+
) -> None:
|
| 1477 |
+
db = get_db()
|
| 1478 |
+
redis = get_redis()
|
| 1479 |
+
|
| 1480 |
+
async with _get_post_submit_lock(session_id):
|
| 1481 |
+
session = await redis.hgetall(f"session:{session_id}")
|
| 1482 |
+
if not session:
|
| 1483 |
+
return
|
| 1484 |
+
|
| 1485 |
+
recent_context = await get_recent_context_items(
|
| 1486 |
+
redis=redis,
|
| 1487 |
+
session_id=session_id,
|
| 1488 |
+
max_items=CONTEXT_CACHE_ITEMS,
|
| 1489 |
+
)
|
| 1490 |
+
excluded_questions = await _get_session_question_texts(redis, session_id)
|
| 1491 |
+
evaluation = await evaluate_and_generate_followup(
|
| 1492 |
+
role_title=session.get("role_title", "Software Developer"),
|
| 1493 |
+
required_skills=_safe_json_list(session.get("jd_required_skills", "[]")),
|
| 1494 |
+
recent_context=recent_context,
|
| 1495 |
+
current_question=question_text,
|
| 1496 |
+
current_answer=answer,
|
| 1497 |
+
excluded_questions=excluded_questions,
|
| 1498 |
+
)
|
| 1499 |
+
|
| 1500 |
+
await redis.hset(
|
| 1501 |
+
f"session:{session_id}:a:{question_id}",
|
| 1502 |
+
mapping={
|
| 1503 |
+
"score": str(_safe_int(evaluation.get("score", 0))),
|
| 1504 |
+
"feedback": evaluation.get("feedback", ""),
|
| 1505 |
+
},
|
| 1506 |
+
)
|
| 1507 |
+
|
| 1508 |
+
metrics_delta = {
|
| 1509 |
+
"gemini_calls": 1,
|
| 1510 |
+
"gemini_questions": 0,
|
| 1511 |
+
"bank_questions": 0,
|
| 1512 |
+
"bank_shortfall": 0,
|
| 1513 |
+
"generation_batches": 1,
|
| 1514 |
+
}
|
| 1515 |
+
generated_count = _safe_int(session.get("generated_count", 0))
|
| 1516 |
+
|
| 1517 |
+
follow_text = (evaluation.get("followup_question") or "").strip()
|
| 1518 |
+
if answered_count < max_questions and session.get("status") == "in_progress":
|
| 1519 |
+
qid, used_model_followup = await _enqueue_resume_followup_with_fallback(
|
| 1520 |
+
redis=redis,
|
| 1521 |
+
session_id=session_id,
|
| 1522 |
+
session=session,
|
| 1523 |
+
answered_count=answered_count,
|
| 1524 |
+
suggested_text=follow_text,
|
| 1525 |
+
suggested_difficulty=evaluation.get("difficulty", "medium"),
|
| 1526 |
+
suggested_category=evaluation.get("category", "follow-up"),
|
| 1527 |
+
)
|
| 1528 |
+
if qid:
|
| 1529 |
+
generated_count += 1
|
| 1530 |
+
if used_model_followup:
|
| 1531 |
+
metrics_delta["gemini_questions"] += 1
|
| 1532 |
+
|
| 1533 |
+
await _apply_generation_metric_delta(
|
| 1534 |
+
db=db,
|
| 1535 |
+
redis=redis,
|
| 1536 |
+
session_id=session_id,
|
| 1537 |
+
session=session,
|
| 1538 |
+
metrics_delta=metrics_delta,
|
| 1539 |
+
generated_count=generated_count,
|
| 1540 |
+
)
|
| 1541 |
+
|
| 1542 |
+
await flush_backlog_to_queue(
|
| 1543 |
+
redis=redis,
|
| 1544 |
+
session_id=session_id,
|
| 1545 |
+
ttl_seconds=SESSION_TTL,
|
| 1546 |
+
max_queue_size=MAX_QUEUE_SIZE,
|
| 1547 |
+
)
|
| 1548 |
+
|
| 1549 |
+
if session.get("status") == "in_progress":
|
| 1550 |
+
qid, q = await peek_next_question(redis, session_id)
|
| 1551 |
+
if qid and q:
|
| 1552 |
+
_schedule_question_audio_prefetch(
|
| 1553 |
+
[q.get("question", "")],
|
| 1554 |
+
_normalize_voice_gender(session.get("speech_voice_gender")),
|
| 1555 |
+
)
|
| 1556 |
+
|
| 1557 |
+
|
| 1558 |
+
async def _post_submit_topic_processing(
|
| 1559 |
+
session_id: str,
|
| 1560 |
+
answered_count: int,
|
| 1561 |
+
) -> None:
|
| 1562 |
+
db = get_db()
|
| 1563 |
+
redis = get_redis()
|
| 1564 |
+
|
| 1565 |
+
if answered_count < TOPIC_INITIAL_ASK_COUNT:
|
| 1566 |
+
return
|
| 1567 |
+
|
| 1568 |
+
async with _get_post_submit_lock(session_id):
|
| 1569 |
+
session = await redis.hgetall(f"session:{session_id}")
|
| 1570 |
+
if not session:
|
| 1571 |
+
return
|
| 1572 |
+
if session.get("topic_followups_generated", "0") == "1":
|
| 1573 |
+
return
|
| 1574 |
+
|
| 1575 |
+
qa_pairs = await get_session_qa(session_id)
|
| 1576 |
+
excluded_questions = await _get_session_question_texts(redis, session_id)
|
| 1577 |
+
|
| 1578 |
+
ai_items = await generate_topic_followup_batch(
|
| 1579 |
+
topic_name=session.get("role_title", "Topic Interview"),
|
| 1580 |
+
qa_pairs=qa_pairs,
|
| 1581 |
+
excluded_questions=excluded_questions,
|
| 1582 |
+
count=TOPIC_AI_FOLLOWUPS,
|
| 1583 |
+
)
|
| 1584 |
+
db_items = await _sample_topic_questions(
|
| 1585 |
+
db=db,
|
| 1586 |
+
topic_id=session.get("topic_id", ""),
|
| 1587 |
+
excluded_questions=excluded_questions + [i.get("question", "") for i in ai_items],
|
| 1588 |
+
limit=TOPIC_DB_FOLLOWUPS,
|
| 1589 |
+
)
|
| 1590 |
+
|
| 1591 |
+
topic_added = 0
|
| 1592 |
+
for item in ai_items + db_items:
|
| 1593 |
+
qid = await enqueue_question(
|
| 1594 |
+
redis=redis,
|
| 1595 |
+
session_id=session_id,
|
| 1596 |
+
question=item.get("question", ""),
|
| 1597 |
+
difficulty=item.get("difficulty", "medium"),
|
| 1598 |
+
category=item.get("category", session.get("role_title", "topic")),
|
| 1599 |
+
ttl_seconds=SESSION_TTL,
|
| 1600 |
+
max_queue_size=MAX_QUEUE_SIZE,
|
| 1601 |
+
)
|
| 1602 |
+
if qid:
|
| 1603 |
+
topic_added += 1
|
| 1604 |
+
|
| 1605 |
+
generated_count = _safe_int(session.get("generated_count", 0)) + topic_added
|
| 1606 |
+
await _apply_generation_metric_delta(
|
| 1607 |
+
db=db,
|
| 1608 |
+
redis=redis,
|
| 1609 |
+
session_id=session_id,
|
| 1610 |
+
session=session,
|
| 1611 |
+
metrics_delta={
|
| 1612 |
+
"gemini_calls": 1,
|
| 1613 |
+
"gemini_questions": len(ai_items),
|
| 1614 |
+
"bank_questions": len(db_items),
|
| 1615 |
+
"bank_shortfall": 0,
|
| 1616 |
+
"generation_batches": 1,
|
| 1617 |
+
},
|
| 1618 |
+
generated_count=generated_count,
|
| 1619 |
+
extra_redis_fields={"topic_followups_generated": "1"},
|
| 1620 |
+
extra_db_fields={"topic_followups_generated": True},
|
| 1621 |
+
)
|
| 1622 |
+
|
| 1623 |
+
await flush_backlog_to_queue(
|
| 1624 |
+
redis=redis,
|
| 1625 |
+
session_id=session_id,
|
| 1626 |
+
ttl_seconds=SESSION_TTL,
|
| 1627 |
+
max_queue_size=MAX_QUEUE_SIZE,
|
| 1628 |
+
)
|
| 1629 |
+
|
| 1630 |
+
if session.get("status") == "in_progress":
|
| 1631 |
+
qid, q = await peek_next_question(redis, session_id)
|
| 1632 |
+
if qid and q:
|
| 1633 |
+
_schedule_question_audio_prefetch(
|
| 1634 |
+
[q.get("question", "")],
|
| 1635 |
+
_normalize_voice_gender(session.get("speech_voice_gender")),
|
| 1636 |
+
)
|
| 1637 |
+
|
| 1638 |
+
|
| 1639 |
+
def _schedule_post_submit_processing(
|
| 1640 |
+
*,
|
| 1641 |
+
session_id: str,
|
| 1642 |
+
question_id: str,
|
| 1643 |
+
question_text: str,
|
| 1644 |
+
answer: str,
|
| 1645 |
+
answered_count: int,
|
| 1646 |
+
max_questions: int,
|
| 1647 |
+
interview_type: str,
|
| 1648 |
+
) -> None:
|
| 1649 |
+
try:
|
| 1650 |
+
if interview_type == "resume":
|
| 1651 |
+
task = asyncio.create_task(
|
| 1652 |
+
_post_submit_resume_processing(
|
| 1653 |
+
session_id=session_id,
|
| 1654 |
+
question_id=question_id,
|
| 1655 |
+
question_text=question_text,
|
| 1656 |
+
answer=answer,
|
| 1657 |
+
answered_count=answered_count,
|
| 1658 |
+
max_questions=max_questions,
|
| 1659 |
+
)
|
| 1660 |
+
)
|
| 1661 |
+
task.add_done_callback(_consume_post_submit_task_result)
|
| 1662 |
+
return
|
| 1663 |
+
|
| 1664 |
+
if interview_type == "topic":
|
| 1665 |
+
task = asyncio.create_task(
|
| 1666 |
+
_post_submit_topic_processing(
|
| 1667 |
+
session_id=session_id,
|
| 1668 |
+
answered_count=answered_count,
|
| 1669 |
+
)
|
| 1670 |
+
)
|
| 1671 |
+
task.add_done_callback(_consume_post_submit_task_result)
|
| 1672 |
+
except Exception:
|
| 1673 |
+
# Never block request response on scheduler errors.
|
| 1674 |
+
return
|
| 1675 |
+
|
| 1676 |
+
|
| 1677 |
async def submit_answer(session_id: str, question_id: str, answer: str) -> dict:
|
| 1678 |
+
"""Submit answer and return next queued question immediately."""
|
| 1679 |
+
started_at = perf_counter()
|
| 1680 |
db = get_db()
|
| 1681 |
redis = get_redis()
|
| 1682 |
|
|
|
|
| 1683 |
session = await redis.hgetall(f"session:{session_id}")
|
| 1684 |
if not session:
|
| 1685 |
raise ValueError("Interview session not found or expired")
|
|
|
|
| 1686 |
if session.get("status") != "in_progress":
|
| 1687 |
raise ValueError("Interview is not in progress")
|
| 1688 |
|
| 1689 |
+
current_q = await redis.hgetall(f"session:{session_id}:q:{question_id}")
|
| 1690 |
+
current_question_text = current_q.get("question", "")
|
| 1691 |
+
|
| 1692 |
+
await redis.hset(
|
| 1693 |
+
f"session:{session_id}:a:{question_id}",
|
| 1694 |
+
mapping={
|
| 1695 |
+
"question_id": question_id,
|
| 1696 |
+
"answer": answer,
|
| 1697 |
+
"submitted_at": utc_now(),
|
| 1698 |
+
},
|
| 1699 |
+
)
|
| 1700 |
await redis.rpush(f"session:{session_id}:answers", question_id)
|
| 1701 |
await redis.expire(f"session:{session_id}:a:{question_id}", SESSION_TTL)
|
| 1702 |
await redis.expire(f"session:{session_id}:answers", SESSION_TTL)
|
| 1703 |
|
| 1704 |
+
question_count = _safe_int(session.get("question_count", 1))
|
| 1705 |
+
answered_count = _safe_int(session.get("answered_count", 0)) + 1
|
| 1706 |
+
served_count = _safe_int(session.get("served_count", 1))
|
| 1707 |
+
generated_count = _safe_int(session.get("generated_count", 0))
|
| 1708 |
+
max_questions = _safe_int(session.get("max_questions", MAX_QUESTIONS))
|
| 1709 |
interview_type = session.get("interview_type", "resume")
|
| 1710 |
+
speech_voice_gender = _normalize_voice_gender(session.get("speech_voice_gender"))
|
| 1711 |
|
| 1712 |
+
if interview_type == "resume" and max_questions < RESUME_MAX_QUESTIONS:
|
| 1713 |
+
max_questions = RESUME_MAX_QUESTIONS
|
| 1714 |
+
await redis.hset(f"session:{session_id}", mapping={"max_questions": str(max_questions)})
|
| 1715 |
+
await db[SESSIONS].update_one(
|
| 1716 |
+
{"session_id": session_id},
|
| 1717 |
+
{"$set": {"max_questions": max_questions}},
|
| 1718 |
+
)
|
| 1719 |
+
|
| 1720 |
+
_update_local_summary(session_id, current_question_text, answer)
|
| 1721 |
+
await push_context_item(
|
| 1722 |
+
redis=redis,
|
| 1723 |
+
session_id=session_id,
|
| 1724 |
+
item={
|
| 1725 |
+
"question": current_question_text,
|
| 1726 |
+
"answer": answer,
|
| 1727 |
+
},
|
| 1728 |
+
ttl_seconds=SESSION_TTL,
|
| 1729 |
+
max_items=CONTEXT_CACHE_ITEMS,
|
| 1730 |
+
)
|
| 1731 |
|
|
|
|
| 1732 |
if answered_count >= max_questions:
|
|
|
|
| 1733 |
await redis.hset(
|
| 1734 |
f"session:{session_id}",
|
| 1735 |
+
mapping={
|
| 1736 |
+
"status": "completed",
|
| 1737 |
+
"answered_count": str(answered_count),
|
| 1738 |
+
},
|
| 1739 |
)
|
| 1740 |
await db[SESSIONS].update_one(
|
| 1741 |
{"session_id": session_id},
|
| 1742 |
{"$set": {"status": "completed", "completed_at": utc_now()}},
|
| 1743 |
)
|
| 1744 |
+
|
| 1745 |
+
submit_ms = await _record_submit_latency(started_at)
|
| 1746 |
return {
|
| 1747 |
"session_id": session_id,
|
| 1748 |
"next_question": None,
|
| 1749 |
"is_complete": True,
|
| 1750 |
"message": "Interview complete! Generating your report...",
|
| 1751 |
+
"submit_ms": submit_ms,
|
| 1752 |
}
|
| 1753 |
|
| 1754 |
+
await flush_backlog_to_queue(
|
| 1755 |
+
redis=redis,
|
| 1756 |
+
session_id=session_id,
|
| 1757 |
+
ttl_seconds=SESSION_TTL,
|
| 1758 |
+
max_queue_size=MAX_QUEUE_SIZE,
|
| 1759 |
+
)
|
| 1760 |
+
next_question_id, q_data = await pop_next_question(redis, session_id)
|
|
|
|
|
|
|
| 1761 |
|
| 1762 |
+
effective_stats = _current_generation_stats(session)
|
| 1763 |
+
fallback_evaluation = None
|
| 1764 |
+
|
| 1765 |
+
# Emergency fallback for rare queue-empty cases.
|
| 1766 |
+
if not next_question_id and interview_type == "resume":
|
| 1767 |
+
recent_context = await get_recent_context_items(
|
| 1768 |
+
redis=redis,
|
| 1769 |
+
session_id=session_id,
|
| 1770 |
+
max_items=CONTEXT_CACHE_ITEMS,
|
| 1771 |
+
)
|
| 1772 |
+
excluded_questions = await _get_session_question_texts(redis, session_id)
|
| 1773 |
+
fallback_evaluation = await evaluate_and_generate_followup(
|
| 1774 |
+
role_title=session.get("role_title", "Software Developer"),
|
| 1775 |
+
required_skills=_safe_json_list(session.get("jd_required_skills", "[]")),
|
| 1776 |
+
recent_context=recent_context,
|
| 1777 |
+
current_question=current_question_text,
|
| 1778 |
+
current_answer=answer,
|
| 1779 |
+
excluded_questions=excluded_questions,
|
| 1780 |
+
)
|
| 1781 |
+
|
| 1782 |
+
await redis.hset(
|
| 1783 |
+
f"session:{session_id}:a:{question_id}",
|
| 1784 |
+
mapping={
|
| 1785 |
+
"score": str(_safe_int(fallback_evaluation.get("score", 0))),
|
| 1786 |
+
"feedback": fallback_evaluation.get("feedback", ""),
|
| 1787 |
+
},
|
| 1788 |
+
)
|
| 1789 |
+
|
| 1790 |
+
fallback_delta = {
|
| 1791 |
+
"gemini_calls": 1,
|
| 1792 |
+
"gemini_questions": 0,
|
| 1793 |
+
"bank_questions": 0,
|
| 1794 |
+
"bank_shortfall": 0,
|
| 1795 |
+
"generation_batches": 1,
|
| 1796 |
+
}
|
| 1797 |
+
follow_text = (fallback_evaluation.get("followup_question") or "").strip()
|
| 1798 |
+
if answered_count < max_questions:
|
| 1799 |
+
qid, used_model_followup = await _enqueue_resume_followup_with_fallback(
|
| 1800 |
+
redis=redis,
|
| 1801 |
+
session_id=session_id,
|
| 1802 |
+
session=session,
|
| 1803 |
+
answered_count=answered_count,
|
| 1804 |
+
suggested_text=follow_text,
|
| 1805 |
+
suggested_difficulty=fallback_evaluation.get("difficulty", "medium"),
|
| 1806 |
+
suggested_category=fallback_evaluation.get("category", "follow-up"),
|
| 1807 |
)
|
| 1808 |
+
if qid:
|
| 1809 |
+
generated_count += 1
|
| 1810 |
+
if used_model_followup:
|
| 1811 |
+
fallback_delta["gemini_questions"] = 1
|
|
|
|
|
|
|
| 1812 |
|
| 1813 |
+
effective_stats = await _apply_generation_metric_delta(
|
| 1814 |
db=db,
|
| 1815 |
redis=redis,
|
| 1816 |
session_id=session_id,
|
| 1817 |
session=session,
|
| 1818 |
+
metrics_delta=fallback_delta,
|
| 1819 |
generated_count=generated_count,
|
|
|
|
| 1820 |
)
|
| 1821 |
+
|
| 1822 |
+
await flush_backlog_to_queue(
|
| 1823 |
+
redis=redis,
|
| 1824 |
+
session_id=session_id,
|
| 1825 |
+
ttl_seconds=SESSION_TTL,
|
| 1826 |
+
max_queue_size=MAX_QUEUE_SIZE,
|
| 1827 |
+
)
|
| 1828 |
+
next_question_id, q_data = await pop_next_question(redis, session_id)
|
| 1829 |
+
|
| 1830 |
+
if not next_question_id or not q_data:
|
| 1831 |
+
await redis.hset(
|
| 1832 |
+
f"session:{session_id}",
|
| 1833 |
+
mapping={"status": "completed", "answered_count": str(answered_count)},
|
| 1834 |
+
)
|
| 1835 |
+
await db[SESSIONS].update_one(
|
| 1836 |
+
{"session_id": session_id},
|
| 1837 |
+
{"$set": {"status": "completed", "completed_at": utc_now()}},
|
| 1838 |
+
)
|
| 1839 |
+
|
| 1840 |
+
submit_ms = await _record_submit_latency(started_at)
|
| 1841 |
+
payload = {
|
| 1842 |
+
"session_id": session_id,
|
| 1843 |
+
"next_question": None,
|
| 1844 |
+
"is_complete": True,
|
| 1845 |
+
"message": "Interview complete! Generating your report...",
|
| 1846 |
+
"submit_ms": submit_ms,
|
| 1847 |
+
}
|
| 1848 |
+
if fallback_evaluation:
|
| 1849 |
+
payload["answer_evaluation"] = {
|
| 1850 |
+
"score": _safe_int(fallback_evaluation.get("score", 0)),
|
| 1851 |
+
"feedback": fallback_evaluation.get("feedback", ""),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1852 |
}
|
| 1853 |
+
return payload
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1854 |
|
| 1855 |
+
await mark_question_asked(
|
| 1856 |
+
redis=redis,
|
| 1857 |
+
session_id=session_id,
|
| 1858 |
+
question_text=q_data.get("question", ""),
|
| 1859 |
+
ttl_seconds=SESSION_TTL,
|
| 1860 |
+
)
|
| 1861 |
|
| 1862 |
+
await flush_backlog_to_queue(
|
| 1863 |
+
redis=redis,
|
| 1864 |
+
session_id=session_id,
|
| 1865 |
+
ttl_seconds=SESSION_TTL,
|
| 1866 |
+
max_queue_size=MAX_QUEUE_SIZE,
|
| 1867 |
+
)
|
| 1868 |
+
peek_next_id, peek_q = await peek_next_question(redis, session_id)
|
| 1869 |
+
if peek_next_id and peek_q:
|
| 1870 |
+
_schedule_question_audio_prefetch([peek_q.get("question", "")], speech_voice_gender)
|
| 1871 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1872 |
next_difficulty = q_data.get("difficulty", session.get("current_difficulty", "medium"))
|
| 1873 |
+
new_question_count = question_count + 1
|
| 1874 |
new_served_count = served_count + 1
|
| 1875 |
|
| 1876 |
+
await redis.hset(
|
| 1877 |
+
f"session:{session_id}",
|
| 1878 |
+
mapping={
|
| 1879 |
+
"question_count": str(new_question_count),
|
| 1880 |
+
"answered_count": str(answered_count),
|
| 1881 |
+
"served_count": str(new_served_count),
|
| 1882 |
+
"generated_count": str(generated_count),
|
| 1883 |
+
"current_difficulty": next_difficulty,
|
| 1884 |
+
},
|
| 1885 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1886 |
|
| 1887 |
+
response = {
|
| 1888 |
"session_id": session_id,
|
| 1889 |
"next_question": {
|
| 1890 |
"question_id": next_question_id,
|
|
|
|
| 1898 |
"generation_stats": effective_stats,
|
| 1899 |
}
|
| 1900 |
|
| 1901 |
+
if fallback_evaluation:
|
| 1902 |
+
response["answer_evaluation"] = {
|
| 1903 |
+
"score": _safe_int(fallback_evaluation.get("score", 0)),
|
| 1904 |
+
"feedback": fallback_evaluation.get("feedback", ""),
|
| 1905 |
+
}
|
| 1906 |
+
elif interview_type == "resume":
|
| 1907 |
+
response["answer_evaluation"] = {
|
| 1908 |
+
"status": "processing",
|
| 1909 |
+
}
|
| 1910 |
+
|
| 1911 |
+
_schedule_post_submit_processing(
|
| 1912 |
+
session_id=session_id,
|
| 1913 |
+
question_id=question_id,
|
| 1914 |
+
question_text=current_question_text,
|
| 1915 |
+
answer=answer,
|
| 1916 |
+
answered_count=answered_count,
|
| 1917 |
+
max_questions=max_questions,
|
| 1918 |
+
interview_type=interview_type,
|
| 1919 |
+
)
|
| 1920 |
+
|
| 1921 |
+
submit_ms = await _record_submit_latency(started_at)
|
| 1922 |
+
response["submit_ms"] = submit_ms
|
| 1923 |
+
return response
|
| 1924 |
+
|
| 1925 |
+
|
| 1926 |
+
async def get_next_question(session_id: str, user_id: str) -> dict:
|
| 1927 |
+
"""Preview next queued question without submitting a new answer."""
|
| 1928 |
+
db = get_db()
|
| 1929 |
+
redis = get_redis()
|
| 1930 |
+
|
| 1931 |
+
session_doc = await db[SESSIONS].find_one({"session_id": session_id})
|
| 1932 |
+
if not session_doc:
|
| 1933 |
+
raise ValueError("Session not found")
|
| 1934 |
+
if session_doc.get("user_id") != user_id:
|
| 1935 |
+
raise ValueError("Unauthorized access to session")
|
| 1936 |
+
|
| 1937 |
+
session = await redis.hgetall(f"session:{session_id}")
|
| 1938 |
+
if not session:
|
| 1939 |
+
raise ValueError("Interview session not found or expired")
|
| 1940 |
+
if session.get("status") != "in_progress":
|
| 1941 |
+
return {
|
| 1942 |
+
"session_id": session_id,
|
| 1943 |
+
"next_question": None,
|
| 1944 |
+
"is_complete": True,
|
| 1945 |
+
"message": "Interview is not in progress",
|
| 1946 |
+
}
|
| 1947 |
+
|
| 1948 |
+
await flush_backlog_to_queue(
|
| 1949 |
+
redis=redis,
|
| 1950 |
+
session_id=session_id,
|
| 1951 |
+
ttl_seconds=SESSION_TTL,
|
| 1952 |
+
max_queue_size=MAX_QUEUE_SIZE,
|
| 1953 |
+
)
|
| 1954 |
+
|
| 1955 |
+
qid, q = await peek_next_question(redis, session_id)
|
| 1956 |
+
if not qid or not q:
|
| 1957 |
+
return {
|
| 1958 |
+
"session_id": session_id,
|
| 1959 |
+
"next_question": None,
|
| 1960 |
+
"is_complete": False,
|
| 1961 |
+
"message": "No queued question yet",
|
| 1962 |
+
"queue_size": await queue_size(redis, session_id),
|
| 1963 |
+
}
|
| 1964 |
+
|
| 1965 |
+
return {
|
| 1966 |
+
"session_id": session_id,
|
| 1967 |
+
"next_question": {
|
| 1968 |
+
"question_id": qid,
|
| 1969 |
+
"question": q.get("question", ""),
|
| 1970 |
+
"difficulty": q.get("difficulty", "medium"),
|
| 1971 |
+
"category": q.get("category", "general"),
|
| 1972 |
+
},
|
| 1973 |
+
"is_complete": False,
|
| 1974 |
+
"queue_size": await queue_size(redis, session_id),
|
| 1975 |
+
"message": "Next question ready",
|
| 1976 |
+
}
|
| 1977 |
+
|
| 1978 |
|
| 1979 |
async def quit_interview(session_id: str, user_id: str) -> dict:
|
| 1980 |
"""Mark an interview as quit and indicate whether a partial report can be generated."""
|
|
|
|
| 2057 |
"""Cleanup process-local state for a completed session."""
|
| 2058 |
_LOCAL_SUMMARIES.pop(session_id, None)
|
| 2059 |
_PREGEN_IN_FLIGHT.discard(session_id)
|
| 2060 |
+
_POST_SUBMIT_LOCKS.pop(session_id, None)
|
backend/services/latency_service.py
ADDED
|
@@ -0,0 +1,179 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import math
|
| 2 |
+
from typing import Iterable
|
| 3 |
+
|
| 4 |
+
from database import get_redis
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
LATENCY_METRICS = ("stt_ms", "submit_ms", "gemini_ms")
|
| 8 |
+
_LATENCY_PREFIX = "metrics:latency"
|
| 9 |
+
_DEFAULT_SAMPLE_SIZE = 500
|
| 10 |
+
_MAX_SAMPLE_SIZE = 5000
|
| 11 |
+
_MAX_STORED_ITEMS = 5000
|
| 12 |
+
_METRICS_TTL_SECONDS = 7 * 24 * 60 * 60
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def _metric_key(metric_name: str) -> str:
|
| 16 |
+
return f"{_LATENCY_PREFIX}:{metric_name}"
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def _normalize_metric_names(metric_names: Iterable[str] | None) -> list[str]:
|
| 20 |
+
if not metric_names:
|
| 21 |
+
return list(LATENCY_METRICS)
|
| 22 |
+
|
| 23 |
+
normalized: list[str] = []
|
| 24 |
+
for metric in metric_names:
|
| 25 |
+
name = (metric or "").strip().lower()
|
| 26 |
+
if name in LATENCY_METRICS and name not in normalized:
|
| 27 |
+
normalized.append(name)
|
| 28 |
+
return normalized
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def _normalize_sample_size(sample_size: int) -> int:
|
| 32 |
+
try:
|
| 33 |
+
value = int(sample_size)
|
| 34 |
+
except Exception:
|
| 35 |
+
value = _DEFAULT_SAMPLE_SIZE
|
| 36 |
+
return max(1, min(_MAX_SAMPLE_SIZE, value))
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def _safe_float(value) -> float | None:
|
| 40 |
+
try:
|
| 41 |
+
parsed = float(value)
|
| 42 |
+
except Exception:
|
| 43 |
+
return None
|
| 44 |
+
if math.isnan(parsed) or math.isinf(parsed) or parsed < 0:
|
| 45 |
+
return None
|
| 46 |
+
return parsed
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def _percentile(sorted_values: list[float], percentile: float) -> float | None:
|
| 50 |
+
if not sorted_values:
|
| 51 |
+
return None
|
| 52 |
+
|
| 53 |
+
if len(sorted_values) == 1:
|
| 54 |
+
return sorted_values[0]
|
| 55 |
+
|
| 56 |
+
position = ((len(sorted_values) - 1) * percentile) / 100.0
|
| 57 |
+
lower = int(math.floor(position))
|
| 58 |
+
upper = int(math.ceil(position))
|
| 59 |
+
|
| 60 |
+
if lower == upper:
|
| 61 |
+
return sorted_values[lower]
|
| 62 |
+
|
| 63 |
+
weight = position - lower
|
| 64 |
+
return sorted_values[lower] + (sorted_values[upper] - sorted_values[lower]) * weight
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def _round(value: float | None) -> float | None:
|
| 68 |
+
if value is None:
|
| 69 |
+
return None
|
| 70 |
+
return round(value, 2)
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
async def record_latency(
|
| 74 |
+
metric_name: str,
|
| 75 |
+
duration_ms: float,
|
| 76 |
+
*,
|
| 77 |
+
ttl_seconds: int = _METRICS_TTL_SECONDS,
|
| 78 |
+
max_items: int = _MAX_STORED_ITEMS,
|
| 79 |
+
) -> None:
|
| 80 |
+
name = (metric_name or "").strip().lower()
|
| 81 |
+
if name not in LATENCY_METRICS:
|
| 82 |
+
return
|
| 83 |
+
|
| 84 |
+
value = _safe_float(duration_ms)
|
| 85 |
+
if value is None:
|
| 86 |
+
return
|
| 87 |
+
|
| 88 |
+
redis = get_redis()
|
| 89 |
+
if not redis:
|
| 90 |
+
return
|
| 91 |
+
|
| 92 |
+
key = _metric_key(name)
|
| 93 |
+
await redis.lpush(key, f"{value:.3f}")
|
| 94 |
+
await redis.ltrim(key, 0, max(0, int(max_items) - 1))
|
| 95 |
+
await redis.expire(key, int(ttl_seconds))
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
async def get_latency_metrics(
|
| 99 |
+
*,
|
| 100 |
+
metric_names: Iterable[str] | None = None,
|
| 101 |
+
sample_size: int = _DEFAULT_SAMPLE_SIZE,
|
| 102 |
+
) -> dict:
|
| 103 |
+
metrics = _normalize_metric_names(metric_names)
|
| 104 |
+
size = _normalize_sample_size(sample_size)
|
| 105 |
+
|
| 106 |
+
redis = get_redis()
|
| 107 |
+
if not redis:
|
| 108 |
+
return {
|
| 109 |
+
"sample_size": size,
|
| 110 |
+
"metrics": {name: _empty_summary() for name in metrics},
|
| 111 |
+
"message": "Redis is not available",
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
output: dict[str, dict] = {}
|
| 115 |
+
for metric in metrics:
|
| 116 |
+
raw = await redis.lrange(_metric_key(metric), 0, size - 1)
|
| 117 |
+
values: list[float] = []
|
| 118 |
+
for item in raw:
|
| 119 |
+
parsed = _safe_float(item)
|
| 120 |
+
if parsed is not None:
|
| 121 |
+
values.append(parsed)
|
| 122 |
+
|
| 123 |
+
# Stored newest-first in Redis; reverse to chronological for last_ms.
|
| 124 |
+
values.reverse()
|
| 125 |
+
output[metric] = _build_summary(values)
|
| 126 |
+
|
| 127 |
+
return {
|
| 128 |
+
"sample_size": size,
|
| 129 |
+
"metrics": output,
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
async def reset_latency_metrics(metric_names: Iterable[str] | None = None) -> dict:
|
| 134 |
+
metrics = _normalize_metric_names(metric_names)
|
| 135 |
+
redis = get_redis()
|
| 136 |
+
if not redis:
|
| 137 |
+
return {
|
| 138 |
+
"cleared": [],
|
| 139 |
+
"message": "Redis is not available",
|
| 140 |
+
}
|
| 141 |
+
|
| 142 |
+
keys = [_metric_key(metric) for metric in metrics]
|
| 143 |
+
if keys:
|
| 144 |
+
await redis.delete(*keys)
|
| 145 |
+
|
| 146 |
+
return {
|
| 147 |
+
"cleared": metrics,
|
| 148 |
+
}
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
def _empty_summary() -> dict:
|
| 152 |
+
return {
|
| 153 |
+
"count": 0,
|
| 154 |
+
"min_ms": None,
|
| 155 |
+
"avg_ms": None,
|
| 156 |
+
"p50_ms": None,
|
| 157 |
+
"p95_ms": None,
|
| 158 |
+
"max_ms": None,
|
| 159 |
+
"last_ms": None,
|
| 160 |
+
}
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
def _build_summary(values: list[float]) -> dict:
|
| 164 |
+
if not values:
|
| 165 |
+
return _empty_summary()
|
| 166 |
+
|
| 167 |
+
sorted_values = sorted(values)
|
| 168 |
+
count = len(sorted_values)
|
| 169 |
+
avg = sum(sorted_values) / count
|
| 170 |
+
|
| 171 |
+
return {
|
| 172 |
+
"count": count,
|
| 173 |
+
"min_ms": _round(sorted_values[0]),
|
| 174 |
+
"avg_ms": _round(avg),
|
| 175 |
+
"p50_ms": _round(_percentile(sorted_values, 50)),
|
| 176 |
+
"p95_ms": _round(_percentile(sorted_values, 95)),
|
| 177 |
+
"max_ms": _round(sorted_values[-1]),
|
| 178 |
+
"last_ms": _round(values[-1]),
|
| 179 |
+
}
|
backend/services/queue_service.py
ADDED
|
@@ -0,0 +1,194 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import re
|
| 3 |
+
from typing import Optional, Tuple
|
| 4 |
+
|
| 5 |
+
from utils.helpers import generate_id
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
QUESTION_QUEUE_SUFFIX = "question_queue"
|
| 9 |
+
QUESTION_BACKLOG_SUFFIX = "question_backlog"
|
| 10 |
+
CONTEXT_CACHE_SUFFIX = "context_cache"
|
| 11 |
+
ASKED_SET_SUFFIX = "asked_questions_set"
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def _key(session_id: str, suffix: str) -> str:
|
| 15 |
+
return f"session:{session_id}:{suffix}"
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def question_fingerprint(text: str) -> str:
|
| 19 |
+
value = (text or "").strip().lower()
|
| 20 |
+
value = re.sub(r"[^a-z0-9\s]", " ", value)
|
| 21 |
+
value = re.sub(r"\s+", " ", value).strip()
|
| 22 |
+
return value
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
async def mark_question_asked(redis, session_id: str, question_text: str, ttl_seconds: int) -> None:
|
| 26 |
+
fp = question_fingerprint(question_text)
|
| 27 |
+
if not fp:
|
| 28 |
+
return
|
| 29 |
+
|
| 30 |
+
key = _key(session_id, ASKED_SET_SUFFIX)
|
| 31 |
+
await redis.sadd(key, fp)
|
| 32 |
+
await redis.expire(key, ttl_seconds)
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
async def is_question_asked(redis, session_id: str, question_text: str) -> bool:
|
| 36 |
+
fp = question_fingerprint(question_text)
|
| 37 |
+
if not fp:
|
| 38 |
+
return False
|
| 39 |
+
key = _key(session_id, ASKED_SET_SUFFIX)
|
| 40 |
+
return bool(await redis.sismember(key, fp))
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
async def _has_in_list(redis, session_id: str, list_key: str, question_text: str) -> bool:
|
| 44 |
+
wanted = question_fingerprint(question_text)
|
| 45 |
+
if not wanted:
|
| 46 |
+
return False
|
| 47 |
+
|
| 48 |
+
ids = await redis.lrange(list_key, 0, -1)
|
| 49 |
+
for qid in ids:
|
| 50 |
+
q = await redis.hgetall(f"session:{session_id}:q:{qid}")
|
| 51 |
+
if question_fingerprint(q.get("question", "")) == wanted:
|
| 52 |
+
return True
|
| 53 |
+
return False
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
async def _append_question_object(
|
| 57 |
+
redis,
|
| 58 |
+
session_id: str,
|
| 59 |
+
question: str,
|
| 60 |
+
difficulty: str,
|
| 61 |
+
category: str,
|
| 62 |
+
ttl_seconds: int,
|
| 63 |
+
) -> str:
|
| 64 |
+
qid = generate_id()
|
| 65 |
+
q_key = f"session:{session_id}:q:{qid}"
|
| 66 |
+
|
| 67 |
+
await redis.hset(
|
| 68 |
+
q_key,
|
| 69 |
+
mapping={
|
| 70 |
+
"question_id": qid,
|
| 71 |
+
"question": question,
|
| 72 |
+
"difficulty": difficulty or "medium",
|
| 73 |
+
"category": category or "general",
|
| 74 |
+
},
|
| 75 |
+
)
|
| 76 |
+
await redis.expire(q_key, ttl_seconds)
|
| 77 |
+
|
| 78 |
+
questions_key = f"session:{session_id}:questions"
|
| 79 |
+
await redis.rpush(questions_key, qid)
|
| 80 |
+
await redis.expire(questions_key, ttl_seconds)
|
| 81 |
+
return qid
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
async def enqueue_question(
|
| 85 |
+
redis,
|
| 86 |
+
session_id: str,
|
| 87 |
+
question: str,
|
| 88 |
+
difficulty: str = "medium",
|
| 89 |
+
category: str = "general",
|
| 90 |
+
ttl_seconds: int = 7200,
|
| 91 |
+
max_queue_size: int = 3,
|
| 92 |
+
) -> Optional[str]:
|
| 93 |
+
text = (question or "").strip()
|
| 94 |
+
if not text:
|
| 95 |
+
return None
|
| 96 |
+
|
| 97 |
+
queue_key = _key(session_id, QUESTION_QUEUE_SUFFIX)
|
| 98 |
+
backlog_key = _key(session_id, QUESTION_BACKLOG_SUFFIX)
|
| 99 |
+
|
| 100 |
+
if await is_question_asked(redis, session_id, text):
|
| 101 |
+
return None
|
| 102 |
+
if await _has_in_list(redis, session_id, queue_key, text):
|
| 103 |
+
return None
|
| 104 |
+
if await _has_in_list(redis, session_id, backlog_key, text):
|
| 105 |
+
return None
|
| 106 |
+
|
| 107 |
+
q_len = await redis.llen(queue_key)
|
| 108 |
+
qid = await _append_question_object(
|
| 109 |
+
redis=redis,
|
| 110 |
+
session_id=session_id,
|
| 111 |
+
question=text,
|
| 112 |
+
difficulty=difficulty,
|
| 113 |
+
category=category,
|
| 114 |
+
ttl_seconds=ttl_seconds,
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
if q_len < max_queue_size:
|
| 118 |
+
await redis.rpush(queue_key, qid)
|
| 119 |
+
await redis.expire(queue_key, ttl_seconds)
|
| 120 |
+
return qid
|
| 121 |
+
|
| 122 |
+
await redis.rpush(backlog_key, qid)
|
| 123 |
+
await redis.expire(backlog_key, ttl_seconds)
|
| 124 |
+
return qid
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
async def flush_backlog_to_queue(
|
| 128 |
+
redis,
|
| 129 |
+
session_id: str,
|
| 130 |
+
ttl_seconds: int = 7200,
|
| 131 |
+
max_queue_size: int = 3,
|
| 132 |
+
) -> None:
|
| 133 |
+
queue_key = _key(session_id, QUESTION_QUEUE_SUFFIX)
|
| 134 |
+
backlog_key = _key(session_id, QUESTION_BACKLOG_SUFFIX)
|
| 135 |
+
|
| 136 |
+
while await redis.llen(queue_key) < max_queue_size:
|
| 137 |
+
qid = await redis.lpop(backlog_key)
|
| 138 |
+
if not qid:
|
| 139 |
+
break
|
| 140 |
+
await redis.rpush(queue_key, qid)
|
| 141 |
+
|
| 142 |
+
await redis.expire(queue_key, ttl_seconds)
|
| 143 |
+
await redis.expire(backlog_key, ttl_seconds)
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
async def queue_size(redis, session_id: str) -> int:
|
| 147 |
+
return int(await redis.llen(_key(session_id, QUESTION_QUEUE_SUFFIX)))
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
async def pop_next_question(redis, session_id: str) -> Tuple[Optional[str], Optional[dict]]:
|
| 151 |
+
queue_key = _key(session_id, QUESTION_QUEUE_SUFFIX)
|
| 152 |
+
qid = await redis.lpop(queue_key)
|
| 153 |
+
if not qid:
|
| 154 |
+
return None, None
|
| 155 |
+
q = await redis.hgetall(f"session:{session_id}:q:{qid}")
|
| 156 |
+
return qid, q
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
async def peek_next_question(redis, session_id: str) -> Tuple[Optional[str], Optional[dict]]:
|
| 160 |
+
queue_key = _key(session_id, QUESTION_QUEUE_SUFFIX)
|
| 161 |
+
qid = await redis.lindex(queue_key, 0)
|
| 162 |
+
if not qid:
|
| 163 |
+
return None, None
|
| 164 |
+
q = await redis.hgetall(f"session:{session_id}:q:{qid}")
|
| 165 |
+
return qid, q
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
async def push_context_item(
|
| 169 |
+
redis,
|
| 170 |
+
session_id: str,
|
| 171 |
+
item: dict,
|
| 172 |
+
ttl_seconds: int = 7200,
|
| 173 |
+
max_items: int = 3,
|
| 174 |
+
) -> None:
|
| 175 |
+
key = _key(session_id, CONTEXT_CACHE_SUFFIX)
|
| 176 |
+
await redis.lpush(key, json.dumps(item, ensure_ascii=True))
|
| 177 |
+
await redis.ltrim(key, 0, max(0, max_items - 1))
|
| 178 |
+
await redis.expire(key, ttl_seconds)
|
| 179 |
+
|
| 180 |
+
|
| 181 |
+
async def get_recent_context_items(redis, session_id: str, max_items: int = 3) -> list[dict]:
|
| 182 |
+
key = _key(session_id, CONTEXT_CACHE_SUFFIX)
|
| 183 |
+
raw_items = await redis.lrange(key, 0, max(0, max_items - 1))
|
| 184 |
+
|
| 185 |
+
parsed: list[dict] = []
|
| 186 |
+
for raw in raw_items:
|
| 187 |
+
try:
|
| 188 |
+
parsed.append(json.loads(raw))
|
| 189 |
+
except Exception:
|
| 190 |
+
continue
|
| 191 |
+
|
| 192 |
+
# Convert newest-first storage into chronological order for prompting.
|
| 193 |
+
parsed.reverse()
|
| 194 |
+
return parsed
|
backend/services/stt_service.py
CHANGED
|
@@ -8,13 +8,60 @@ os.environ.setdefault("KMP_DUPLICATE_LIB_OK", "TRUE")
|
|
| 8 |
|
| 9 |
_WHISPER_MODEL_CACHE = {}
|
| 10 |
_WHISPER_MODEL_LOCK = asyncio.Lock()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
|
| 13 |
def _resolve_device() -> str:
|
|
|
|
|
|
|
|
|
|
| 14 |
pref = os.getenv("WHISPER_DEVICE", "auto").strip().lower()
|
| 15 |
if pref in {"cpu", "cuda"}:
|
| 16 |
return pref
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
try:
|
| 19 |
import torch
|
| 20 |
|
|
@@ -31,8 +78,27 @@ def _resolve_compute_type(device: str) -> str:
|
|
| 31 |
|
| 32 |
|
| 33 |
def _resolve_model_size() -> str:
|
| 34 |
-
#
|
| 35 |
-
return os.getenv("WHISPER_MODEL_SIZE", "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
|
| 38 |
async def _get_whisper_model():
|
|
@@ -55,7 +121,9 @@ async def _get_whisper_model():
|
|
| 55 |
|
| 56 |
try:
|
| 57 |
return WhisperModel(model_size, device=device, compute_type=compute_type)
|
| 58 |
-
except Exception:
|
|
|
|
|
|
|
| 59 |
# Keep service resilient if GPU config mismatches runtime.
|
| 60 |
return WhisperModel(model_size, device="cpu", compute_type="int8")
|
| 61 |
|
|
@@ -79,6 +147,9 @@ async def transcribe_audio_bytes(audio_bytes: bytes, filename: str = "speech.web
|
|
| 79 |
model = await _get_whisper_model()
|
| 80 |
ext = os.path.splitext(filename or "speech.webm")[1] or ".webm"
|
| 81 |
target_language = (language or "en").strip().lower() or "en"
|
|
|
|
|
|
|
|
|
|
| 82 |
|
| 83 |
fd, tmp_path = tempfile.mkstemp(suffix=ext)
|
| 84 |
os.close(fd)
|
|
@@ -87,15 +158,16 @@ async def transcribe_audio_bytes(audio_bytes: bytes, filename: str = "speech.web
|
|
| 87 |
with open(tmp_path, "wb") as f:
|
| 88 |
f.write(audio_bytes)
|
| 89 |
|
| 90 |
-
def _transcribe() -> str:
|
| 91 |
-
segments, _ =
|
| 92 |
tmp_path,
|
| 93 |
language=target_language,
|
| 94 |
-
beam_size=
|
| 95 |
-
best_of=
|
| 96 |
-
vad_filter=
|
| 97 |
condition_on_previous_text=False,
|
| 98 |
temperature=0.0,
|
|
|
|
| 99 |
)
|
| 100 |
parts = []
|
| 101 |
for seg in segments:
|
|
@@ -104,7 +176,22 @@ async def transcribe_audio_bytes(audio_bytes: bytes, filename: str = "speech.web
|
|
| 104 |
parts.append(text)
|
| 105 |
return " ".join(parts).strip()
|
| 106 |
|
| 107 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
return text
|
| 109 |
finally:
|
| 110 |
if os.path.exists(tmp_path):
|
|
|
|
| 8 |
|
| 9 |
_WHISPER_MODEL_CACHE = {}
|
| 10 |
_WHISPER_MODEL_LOCK = asyncio.Lock()
|
| 11 |
+
_WHISPER_RUNTIME_FORCE_CPU = False
|
| 12 |
+
_WHISPER_LAST_ERROR: str | None = None
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def _is_cuda_runtime_error(error: Exception) -> bool:
|
| 16 |
+
message = str(error or "").strip().lower()
|
| 17 |
+
if not message:
|
| 18 |
+
return False
|
| 19 |
+
markers = (
|
| 20 |
+
"cublas64_12.dll",
|
| 21 |
+
"cublas",
|
| 22 |
+
"cudnn",
|
| 23 |
+
"libcudart",
|
| 24 |
+
"cuda",
|
| 25 |
+
"ctranslate2",
|
| 26 |
+
"failed to load library",
|
| 27 |
+
"cannot be loaded",
|
| 28 |
+
)
|
| 29 |
+
return any(marker in message for marker in markers)
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def _force_whisper_cpu_mode(reason: Exception | None = None) -> None:
|
| 33 |
+
global _WHISPER_RUNTIME_FORCE_CPU, _WHISPER_LAST_ERROR
|
| 34 |
+
_WHISPER_RUNTIME_FORCE_CPU = True
|
| 35 |
+
if reason is not None:
|
| 36 |
+
_WHISPER_LAST_ERROR = str(reason)
|
| 37 |
+
|
| 38 |
+
# Drop cached CUDA models so all future requests resolve to CPU safely.
|
| 39 |
+
for key in list(_WHISPER_MODEL_CACHE.keys()):
|
| 40 |
+
if "|cuda|" in key:
|
| 41 |
+
_WHISPER_MODEL_CACHE.pop(key, None)
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def _has_cuda_device_via_ctranslate2() -> bool:
|
| 45 |
+
try:
|
| 46 |
+
import ctranslate2
|
| 47 |
+
|
| 48 |
+
return ctranslate2.get_cuda_device_count() > 0
|
| 49 |
+
except Exception:
|
| 50 |
+
return False
|
| 51 |
|
| 52 |
|
| 53 |
def _resolve_device() -> str:
|
| 54 |
+
if _WHISPER_RUNTIME_FORCE_CPU:
|
| 55 |
+
return "cpu"
|
| 56 |
+
|
| 57 |
pref = os.getenv("WHISPER_DEVICE", "auto").strip().lower()
|
| 58 |
if pref in {"cpu", "cuda"}:
|
| 59 |
return pref
|
| 60 |
|
| 61 |
+
# Prefer ctranslate2 probe first because faster-whisper relies on it.
|
| 62 |
+
if _has_cuda_device_via_ctranslate2():
|
| 63 |
+
return "cuda"
|
| 64 |
+
|
| 65 |
try:
|
| 66 |
import torch
|
| 67 |
|
|
|
|
| 78 |
|
| 79 |
|
| 80 |
def _resolve_model_size() -> str:
|
| 81 |
+
# Fast default for real-time interview UX; can be overridden in env.
|
| 82 |
+
return os.getenv("WHISPER_MODEL_SIZE", "small.en").strip() or "small.en"
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
def _resolve_beam_size() -> int:
|
| 86 |
+
try:
|
| 87 |
+
return max(1, int(os.getenv("WHISPER_BEAM_SIZE", "1")))
|
| 88 |
+
except Exception:
|
| 89 |
+
return 1
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def _resolve_best_of() -> int:
|
| 93 |
+
try:
|
| 94 |
+
return max(1, int(os.getenv("WHISPER_BEST_OF", "1")))
|
| 95 |
+
except Exception:
|
| 96 |
+
return 1
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
def _resolve_vad_filter() -> bool:
|
| 100 |
+
value = os.getenv("WHISPER_VAD_FILTER", "0").strip().lower()
|
| 101 |
+
return value in {"1", "true", "yes", "on"}
|
| 102 |
|
| 103 |
|
| 104 |
async def _get_whisper_model():
|
|
|
|
| 121 |
|
| 122 |
try:
|
| 123 |
return WhisperModel(model_size, device=device, compute_type=compute_type)
|
| 124 |
+
except Exception as exc:
|
| 125 |
+
if device == "cuda" and _is_cuda_runtime_error(exc):
|
| 126 |
+
_force_whisper_cpu_mode(exc)
|
| 127 |
# Keep service resilient if GPU config mismatches runtime.
|
| 128 |
return WhisperModel(model_size, device="cpu", compute_type="int8")
|
| 129 |
|
|
|
|
| 147 |
model = await _get_whisper_model()
|
| 148 |
ext = os.path.splitext(filename or "speech.webm")[1] or ".webm"
|
| 149 |
target_language = (language or "en").strip().lower() or "en"
|
| 150 |
+
beam_size = _resolve_beam_size()
|
| 151 |
+
best_of = _resolve_best_of()
|
| 152 |
+
vad_filter = _resolve_vad_filter()
|
| 153 |
|
| 154 |
fd, tmp_path = tempfile.mkstemp(suffix=ext)
|
| 155 |
os.close(fd)
|
|
|
|
| 158 |
with open(tmp_path, "wb") as f:
|
| 159 |
f.write(audio_bytes)
|
| 160 |
|
| 161 |
+
def _transcribe(model_instance) -> str:
|
| 162 |
+
segments, _ = model_instance.transcribe(
|
| 163 |
tmp_path,
|
| 164 |
language=target_language,
|
| 165 |
+
beam_size=beam_size,
|
| 166 |
+
best_of=best_of,
|
| 167 |
+
vad_filter=vad_filter,
|
| 168 |
condition_on_previous_text=False,
|
| 169 |
temperature=0.0,
|
| 170 |
+
without_timestamps=True,
|
| 171 |
)
|
| 172 |
parts = []
|
| 173 |
for seg in segments:
|
|
|
|
| 176 |
parts.append(text)
|
| 177 |
return " ".join(parts).strip()
|
| 178 |
|
| 179 |
+
try:
|
| 180 |
+
text = await asyncio.to_thread(_transcribe, model)
|
| 181 |
+
except Exception as exc:
|
| 182 |
+
if not _is_cuda_runtime_error(exc):
|
| 183 |
+
raise RuntimeError(f"Whisper transcription failed: {str(exc)}") from exc
|
| 184 |
+
|
| 185 |
+
# Runtime CUDA failures can occur even after successful model construction.
|
| 186 |
+
_force_whisper_cpu_mode(exc)
|
| 187 |
+
cpu_model = await _get_whisper_model()
|
| 188 |
+
try:
|
| 189 |
+
text = await asyncio.to_thread(_transcribe, cpu_model)
|
| 190 |
+
except Exception as retry_exc:
|
| 191 |
+
raise RuntimeError(
|
| 192 |
+
f"Whisper transcription failed after CPU fallback: {str(retry_exc)}"
|
| 193 |
+
) from retry_exc
|
| 194 |
+
|
| 195 |
return text
|
| 196 |
finally:
|
| 197 |
if os.path.exists(tmp_path):
|
backend/services/tts_service.py
CHANGED
|
@@ -3,17 +3,20 @@ import os
|
|
| 3 |
import tempfile
|
| 4 |
from typing import Tuple
|
| 5 |
from collections import OrderedDict
|
|
|
|
| 6 |
|
| 7 |
_MODEL_CACHE = {}
|
| 8 |
_MODEL_LOCK = asyncio.Lock()
|
| 9 |
_AUDIO_CACHE = OrderedDict()
|
| 10 |
_AUDIO_CACHE_LOCK = asyncio.Lock()
|
| 11 |
_SYNTHESIZE_LOCK = asyncio.Lock()
|
|
|
|
| 12 |
|
| 13 |
XTTS_MODEL = "tts_models/multilingual/multi-dataset/xtts_v2"
|
| 14 |
XTTS_LANGUAGE = "en"
|
| 15 |
XTTS_SPEED = 1.2
|
| 16 |
_XTTS_WARM = False
|
|
|
|
| 17 |
AUDIO_CACHE_MAX_ITEMS = 300
|
| 18 |
|
| 19 |
|
|
@@ -37,6 +40,37 @@ XTTS_SPEAKER_BY_GENDER = {
|
|
| 37 |
}
|
| 38 |
|
| 39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
def _select_model(voice_gender: str) -> Tuple[str, str | None]:
|
| 41 |
gender = (voice_gender or "female").strip().lower()
|
| 42 |
if gender == "male":
|
|
@@ -52,6 +86,8 @@ async def _get_tts_model(model_name: str):
|
|
| 52 |
return _MODEL_CACHE[model_name]
|
| 53 |
|
| 54 |
def _load_model():
|
|
|
|
|
|
|
| 55 |
try:
|
| 56 |
from TTS.api import TTS
|
| 57 |
except Exception as exc:
|
|
@@ -73,14 +109,26 @@ async def _get_tts_model(model_name: str):
|
|
| 73 |
except Exception:
|
| 74 |
use_gpu = False
|
| 75 |
|
|
|
|
|
|
|
|
|
|
| 76 |
if use_gpu:
|
| 77 |
try:
|
| 78 |
-
|
|
|
|
| 79 |
except Exception:
|
| 80 |
# Graceful CPU fallback when CUDA runtime is unavailable/mismatched.
|
| 81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
|
| 83 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
|
| 85 |
model = await asyncio.to_thread(_load_model)
|
| 86 |
_MODEL_CACHE[model_name] = model
|
|
@@ -110,17 +158,27 @@ def _normalize_text_for_speech(value: str, max_length: int = XTTS_MAX_TEXT_LENGT
|
|
| 110 |
return trimmed
|
| 111 |
|
| 112 |
|
| 113 |
-
async def warmup_xtts_model() ->
|
| 114 |
"""Preload XTTS to avoid long cold-start on first interview question."""
|
| 115 |
-
global _XTTS_WARM
|
| 116 |
if _XTTS_WARM:
|
| 117 |
-
return
|
| 118 |
try:
|
| 119 |
await _get_tts_model(XTTS_MODEL)
|
| 120 |
_XTTS_WARM = True
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
|
| 125 |
|
| 126 |
def _synthesize_xtts_to_file(tts, text: str, speaker: str, file_path: str) -> None:
|
|
|
|
| 3 |
import tempfile
|
| 4 |
from typing import Tuple
|
| 5 |
from collections import OrderedDict
|
| 6 |
+
from functools import wraps
|
| 7 |
|
| 8 |
_MODEL_CACHE = {}
|
| 9 |
_MODEL_LOCK = asyncio.Lock()
|
| 10 |
_AUDIO_CACHE = OrderedDict()
|
| 11 |
_AUDIO_CACHE_LOCK = asyncio.Lock()
|
| 12 |
_SYNTHESIZE_LOCK = asyncio.Lock()
|
| 13 |
+
_TORCH_LOAD_PATCHED = False
|
| 14 |
|
| 15 |
XTTS_MODEL = "tts_models/multilingual/multi-dataset/xtts_v2"
|
| 16 |
XTTS_LANGUAGE = "en"
|
| 17 |
XTTS_SPEED = 1.2
|
| 18 |
_XTTS_WARM = False
|
| 19 |
+
_XTTS_LAST_ERROR: str | None = None
|
| 20 |
AUDIO_CACHE_MAX_ITEMS = 300
|
| 21 |
|
| 22 |
|
|
|
|
| 40 |
}
|
| 41 |
|
| 42 |
|
| 43 |
+
def _resolve_xtts_checkpoint_trust() -> bool:
|
| 44 |
+
"""Enable trusted local checkpoint loading compatibility by default."""
|
| 45 |
+
value = os.getenv("XTTS_TRUSTED_CHECKPOINTS", "1").strip().lower()
|
| 46 |
+
return value in {"1", "true", "yes", "on"}
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def _ensure_torch_load_compat_for_xtts() -> None:
|
| 50 |
+
"""Patch torch.load default for PyTorch 2.6+ when loading trusted XTTS checkpoints."""
|
| 51 |
+
global _TORCH_LOAD_PATCHED
|
| 52 |
+
if _TORCH_LOAD_PATCHED or not _resolve_xtts_checkpoint_trust():
|
| 53 |
+
return
|
| 54 |
+
|
| 55 |
+
try:
|
| 56 |
+
import torch
|
| 57 |
+
except Exception:
|
| 58 |
+
return
|
| 59 |
+
|
| 60 |
+
original_load = getattr(torch, "load", None)
|
| 61 |
+
if not callable(original_load):
|
| 62 |
+
return
|
| 63 |
+
|
| 64 |
+
@wraps(original_load)
|
| 65 |
+
def _torch_load_compat(*args, **kwargs):
|
| 66 |
+
# Coqui XTTS checkpoints require full object unpickling on newer PyTorch.
|
| 67 |
+
kwargs.setdefault("weights_only", False)
|
| 68 |
+
return original_load(*args, **kwargs)
|
| 69 |
+
|
| 70 |
+
torch.load = _torch_load_compat
|
| 71 |
+
_TORCH_LOAD_PATCHED = True
|
| 72 |
+
|
| 73 |
+
|
| 74 |
def _select_model(voice_gender: str) -> Tuple[str, str | None]:
|
| 75 |
gender = (voice_gender or "female").strip().lower()
|
| 76 |
if gender == "male":
|
|
|
|
| 86 |
return _MODEL_CACHE[model_name]
|
| 87 |
|
| 88 |
def _load_model():
|
| 89 |
+
_ensure_torch_load_compat_for_xtts()
|
| 90 |
+
|
| 91 |
try:
|
| 92 |
from TTS.api import TTS
|
| 93 |
except Exception as exc:
|
|
|
|
| 109 |
except Exception:
|
| 110 |
use_gpu = False
|
| 111 |
|
| 112 |
+
# TTS(..., gpu=...) is deprecated upstream. Load once, then move model.
|
| 113 |
+
tts = TTS(model_name=model_name, progress_bar=False)
|
| 114 |
+
|
| 115 |
if use_gpu:
|
| 116 |
try:
|
| 117 |
+
tts.to("cuda")
|
| 118 |
+
return tts
|
| 119 |
except Exception:
|
| 120 |
# Graceful CPU fallback when CUDA runtime is unavailable/mismatched.
|
| 121 |
+
try:
|
| 122 |
+
tts.to("cpu")
|
| 123 |
+
except Exception:
|
| 124 |
+
pass
|
| 125 |
+
return tts
|
| 126 |
|
| 127 |
+
try:
|
| 128 |
+
tts.to("cpu")
|
| 129 |
+
except Exception:
|
| 130 |
+
pass
|
| 131 |
+
return tts
|
| 132 |
|
| 133 |
model = await asyncio.to_thread(_load_model)
|
| 134 |
_MODEL_CACHE[model_name] = model
|
|
|
|
| 158 |
return trimmed
|
| 159 |
|
| 160 |
|
| 161 |
+
async def warmup_xtts_model() -> bool:
|
| 162 |
"""Preload XTTS to avoid long cold-start on first interview question."""
|
| 163 |
+
global _XTTS_WARM, _XTTS_LAST_ERROR
|
| 164 |
if _XTTS_WARM:
|
| 165 |
+
return True
|
| 166 |
try:
|
| 167 |
await _get_tts_model(XTTS_MODEL)
|
| 168 |
_XTTS_WARM = True
|
| 169 |
+
_XTTS_LAST_ERROR = None
|
| 170 |
+
return True
|
| 171 |
+
except Exception as exc:
|
| 172 |
+
# Keep API startup resilient; routes decide whether to surface this.
|
| 173 |
+
_XTTS_LAST_ERROR = str(exc)
|
| 174 |
+
return False
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
def get_xtts_warmup_state() -> dict:
|
| 178 |
+
return {
|
| 179 |
+
"is_warm": _XTTS_WARM,
|
| 180 |
+
"last_error": _XTTS_LAST_ERROR,
|
| 181 |
+
}
|
| 182 |
|
| 183 |
|
| 184 |
def _synthesize_xtts_to_file(tts, text: str, speaker: str, file_path: str) -> None:
|
backend/utils/gemini.py
CHANGED
|
@@ -3,8 +3,11 @@ from config import get_settings
|
|
| 3 |
from utils.skills import normalize_skill_list
|
| 4 |
import asyncio
|
| 5 |
import json
|
|
|
|
| 6 |
import re
|
|
|
|
| 7 |
from langchain_core.prompts import PromptTemplate
|
|
|
|
| 8 |
|
| 9 |
settings = get_settings()
|
| 10 |
|
|
@@ -25,30 +28,52 @@ def _is_transient_gemini_error(error: Exception) -> bool:
|
|
| 25 |
return any(marker in message for marker in transient_markers)
|
| 26 |
|
| 27 |
|
| 28 |
-
async def call_gemini(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
"""Call Gemini API with a prompt and optional system instruction."""
|
|
|
|
| 30 |
config = {}
|
| 31 |
if system_instruction:
|
| 32 |
config["system_instruction"] = system_instruction
|
| 33 |
config["response_mime_type"] = "application/json"
|
| 34 |
|
| 35 |
last_error = None
|
| 36 |
-
|
| 37 |
-
|
|
|
|
| 38 |
try:
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
return (response.text or "").strip()
|
| 45 |
except Exception as exc:
|
| 46 |
last_error = exc
|
| 47 |
-
if _is_transient_gemini_error(exc) and attempt <
|
| 48 |
await asyncio.sleep(0.8 * (attempt + 1))
|
| 49 |
continue
|
| 50 |
break
|
| 51 |
|
|
|
|
|
|
|
| 52 |
raise RuntimeError(f"Gemini request failed: {last_error}")
|
| 53 |
|
| 54 |
|
|
@@ -72,6 +97,25 @@ def _extract_json_object(text: str) -> str:
|
|
| 72 |
return value
|
| 73 |
|
| 74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
def _fallback_skill_scan(resume_text: str) -> list:
|
| 76 |
common = [
|
| 77 |
"python", "java", "javascript", "typescript", "react", "next.js", "node.js",
|
|
@@ -386,7 +430,7 @@ Return ONLY JSON, no markdown."""
|
|
| 386 |
prompt = prompt_template.format(context=context, count=count)
|
| 387 |
|
| 388 |
try:
|
| 389 |
-
result = (await call_gemini(prompt)).strip()
|
| 390 |
data = json.loads(result)
|
| 391 |
if not isinstance(data, list):
|
| 392 |
raise ValueError("Batch response is not a list")
|
|
@@ -426,6 +470,140 @@ Return ONLY JSON, no markdown."""
|
|
| 426 |
return fallback
|
| 427 |
|
| 428 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 429 |
async def generate_followup_question_batch_from_qa(
|
| 430 |
role_title: str,
|
| 431 |
skills: list,
|
|
|
|
| 3 |
from utils.skills import normalize_skill_list
|
| 4 |
import asyncio
|
| 5 |
import json
|
| 6 |
+
import random
|
| 7 |
import re
|
| 8 |
+
from time import perf_counter
|
| 9 |
from langchain_core.prompts import PromptTemplate
|
| 10 |
+
from services.latency_service import record_latency
|
| 11 |
|
| 12 |
settings = get_settings()
|
| 13 |
|
|
|
|
| 28 |
return any(marker in message for marker in transient_markers)
|
| 29 |
|
| 30 |
|
| 31 |
+
async def call_gemini(
|
| 32 |
+
prompt: str,
|
| 33 |
+
system_instruction: str = None,
|
| 34 |
+
*,
|
| 35 |
+
max_attempts: int = 3,
|
| 36 |
+
request_timeout_seconds: float | None = None,
|
| 37 |
+
) -> str:
|
| 38 |
"""Call Gemini API with a prompt and optional system instruction."""
|
| 39 |
+
started_at = perf_counter()
|
| 40 |
config = {}
|
| 41 |
if system_instruction:
|
| 42 |
config["system_instruction"] = system_instruction
|
| 43 |
config["response_mime_type"] = "application/json"
|
| 44 |
|
| 45 |
last_error = None
|
| 46 |
+
|
| 47 |
+
attempts = max(1, int(max_attempts or 1))
|
| 48 |
+
for attempt in range(attempts):
|
| 49 |
try:
|
| 50 |
+
def _invoke():
|
| 51 |
+
return client.models.generate_content(
|
| 52 |
+
model=settings.GEMINI_MODEL,
|
| 53 |
+
contents=prompt,
|
| 54 |
+
config=config if config else None,
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
if request_timeout_seconds and request_timeout_seconds > 0:
|
| 58 |
+
response = await asyncio.wait_for(
|
| 59 |
+
asyncio.to_thread(_invoke),
|
| 60 |
+
timeout=request_timeout_seconds,
|
| 61 |
+
)
|
| 62 |
+
else:
|
| 63 |
+
response = await asyncio.to_thread(_invoke)
|
| 64 |
+
|
| 65 |
+
elapsed_ms = (perf_counter() - started_at) * 1000.0
|
| 66 |
+
await record_latency("gemini_ms", elapsed_ms)
|
| 67 |
return (response.text or "").strip()
|
| 68 |
except Exception as exc:
|
| 69 |
last_error = exc
|
| 70 |
+
if _is_transient_gemini_error(exc) and attempt < attempts - 1:
|
| 71 |
await asyncio.sleep(0.8 * (attempt + 1))
|
| 72 |
continue
|
| 73 |
break
|
| 74 |
|
| 75 |
+
elapsed_ms = (perf_counter() - started_at) * 1000.0
|
| 76 |
+
await record_latency("gemini_ms", elapsed_ms)
|
| 77 |
raise RuntimeError(f"Gemini request failed: {last_error}")
|
| 78 |
|
| 79 |
|
|
|
|
| 97 |
return value
|
| 98 |
|
| 99 |
|
| 100 |
+
def _extract_json_array(text: str) -> str:
|
| 101 |
+
value = (text or "").strip()
|
| 102 |
+
if value.startswith("```"):
|
| 103 |
+
value = value.split("\n", 1)[1]
|
| 104 |
+
if value.endswith("```"):
|
| 105 |
+
value = value.rsplit("```", 1)[0]
|
| 106 |
+
value = value.strip()
|
| 107 |
+
|
| 108 |
+
if value.startswith("[") and value.endswith("]"):
|
| 109 |
+
return value
|
| 110 |
+
|
| 111 |
+
start = value.find("[")
|
| 112 |
+
end = value.rfind("]")
|
| 113 |
+
if start != -1 and end != -1 and end > start:
|
| 114 |
+
return value[start:end + 1]
|
| 115 |
+
|
| 116 |
+
return value
|
| 117 |
+
|
| 118 |
+
|
| 119 |
def _fallback_skill_scan(resume_text: str) -> list:
|
| 120 |
common = [
|
| 121 |
"python", "java", "javascript", "typescript", "react", "next.js", "node.js",
|
|
|
|
| 430 |
prompt = prompt_template.format(context=context, count=count)
|
| 431 |
|
| 432 |
try:
|
| 433 |
+
result = _extract_json_array((await call_gemini(prompt)).strip())
|
| 434 |
data = json.loads(result)
|
| 435 |
if not isinstance(data, list):
|
| 436 |
raise ValueError("Batch response is not a list")
|
|
|
|
| 470 |
return fallback
|
| 471 |
|
| 472 |
|
| 473 |
+
async def generate_realtime_technical_round(
|
| 474 |
+
role_title: str,
|
| 475 |
+
resume_skills: list,
|
| 476 |
+
resume_summary: str,
|
| 477 |
+
jd_title: str,
|
| 478 |
+
jd_description: str,
|
| 479 |
+
jd_required_skills: list,
|
| 480 |
+
previous_questions: list,
|
| 481 |
+
count: int = 10,
|
| 482 |
+
) -> list:
|
| 483 |
+
"""Generate a full interview round plan from opening to closing using resume + JD context."""
|
| 484 |
+
count = max(1, int(count or 10))
|
| 485 |
+
skills = normalize_skill_list(resume_skills or [])
|
| 486 |
+
jd_skills = normalize_skill_list(jd_required_skills or [])
|
| 487 |
+
|
| 488 |
+
# Use small randomness to avoid deterministic opening phrasing across attempts.
|
| 489 |
+
variation_seed = random.randint(1000, 9999)
|
| 490 |
+
|
| 491 |
+
payload = {
|
| 492 |
+
"role_title": role_title,
|
| 493 |
+
"resume_skills": skills,
|
| 494 |
+
"resume_summary": resume_summary,
|
| 495 |
+
"jd_title": jd_title,
|
| 496 |
+
"jd_description": jd_description,
|
| 497 |
+
"jd_required_skills": jd_skills,
|
| 498 |
+
"previous_questions": previous_questions[-30:] if previous_questions else [],
|
| 499 |
+
"count": count,
|
| 500 |
+
"variation_seed": variation_seed,
|
| 501 |
+
}
|
| 502 |
+
|
| 503 |
+
prompt_template = PromptTemplate.from_template(
|
| 504 |
+
"""You are an expert interviewer creating a realistic technical interview round.
|
| 505 |
+
|
| 506 |
+
Input JSON:
|
| 507 |
+
{payload}
|
| 508 |
+
|
| 509 |
+
Task:
|
| 510 |
+
Generate exactly {count} questions in sequence, simulating one real-time technical round from opening to wrap-up.
|
| 511 |
+
|
| 512 |
+
Required flow:
|
| 513 |
+
1) Opening/warm-up that is specific to the candidate profile and role.
|
| 514 |
+
2) Resume-linked experience probe.
|
| 515 |
+
3-7) Deep technical questions grounded in JD-required skills.
|
| 516 |
+
8) Debugging/failure-mode question.
|
| 517 |
+
9) Design/trade-off/decision-making question.
|
| 518 |
+
10) Final reflective closing question.
|
| 519 |
+
|
| 520 |
+
Strict rules:
|
| 521 |
+
1. Ask ONLY within JD required skills and role scope.
|
| 522 |
+
2. Use resume context to personalize wording and sequencing.
|
| 523 |
+
3. Do NOT repeat or closely paraphrase any question in previous_questions.
|
| 524 |
+
4. If previous_questions already include a generic "introduce yourself" opener, do not use that opener again.
|
| 525 |
+
5. Keep wording concise and interview-ready.
|
| 526 |
+
|
| 527 |
+
Return ONLY valid JSON array with objects:
|
| 528 |
+
- "question": string
|
| 529 |
+
- "difficulty": "easy" | "medium" | "hard"
|
| 530 |
+
- "category": string
|
| 531 |
+
|
| 532 |
+
No markdown, no extra text."""
|
| 533 |
+
)
|
| 534 |
+
|
| 535 |
+
prompt = prompt_template.format(payload=json.dumps(payload, ensure_ascii=True), count=count)
|
| 536 |
+
|
| 537 |
+
try:
|
| 538 |
+
result = _extract_json_array((await call_gemini(prompt)).strip())
|
| 539 |
+
data = json.loads(result)
|
| 540 |
+
if not isinstance(data, list):
|
| 541 |
+
raise ValueError("Realtime round response is not a list")
|
| 542 |
+
|
| 543 |
+
normalized = []
|
| 544 |
+
for i, item in enumerate(data[:count]):
|
| 545 |
+
if not isinstance(item, dict):
|
| 546 |
+
item = {}
|
| 547 |
+
|
| 548 |
+
if i <= 1:
|
| 549 |
+
fallback_difficulty = "easy"
|
| 550 |
+
elif i <= 6:
|
| 551 |
+
fallback_difficulty = "medium"
|
| 552 |
+
else:
|
| 553 |
+
fallback_difficulty = "hard"
|
| 554 |
+
|
| 555 |
+
normalized.append(
|
| 556 |
+
{
|
| 557 |
+
"question": item.get("question") or f"Explain your approach to {jd_skills[0] if jd_skills else (skills[0] if skills else 'this role expectation')}",
|
| 558 |
+
"difficulty": item.get("difficulty") if item.get("difficulty") in {"easy", "medium", "hard"} else fallback_difficulty,
|
| 559 |
+
"category": item.get("category") or "technical-round",
|
| 560 |
+
}
|
| 561 |
+
)
|
| 562 |
+
|
| 563 |
+
while len(normalized) < count:
|
| 564 |
+
idx = len(normalized)
|
| 565 |
+
if idx == 0:
|
| 566 |
+
fallback_q = "Walk me through your background and the projects most relevant to this role."
|
| 567 |
+
elif idx == count - 1:
|
| 568 |
+
fallback_q = "If you had one week to improve your readiness for this role, what would you focus on and why?"
|
| 569 |
+
else:
|
| 570 |
+
target_skill = jd_skills[idx % len(jd_skills)] if jd_skills else (skills[idx % len(skills)] if skills else "this requirement")
|
| 571 |
+
fallback_q = f"How would you handle a practical scenario involving {target_skill}?"
|
| 572 |
+
|
| 573 |
+
normalized.append(
|
| 574 |
+
{
|
| 575 |
+
"question": fallback_q,
|
| 576 |
+
"difficulty": "easy" if idx <= 1 else ("medium" if idx <= 6 else "hard"),
|
| 577 |
+
"category": "technical-round",
|
| 578 |
+
}
|
| 579 |
+
)
|
| 580 |
+
|
| 581 |
+
return normalized[:count]
|
| 582 |
+
except Exception:
|
| 583 |
+
fallback = []
|
| 584 |
+
skill_pool = jd_skills or skills or ["core technical concepts"]
|
| 585 |
+
for idx in range(count):
|
| 586 |
+
if idx == 0:
|
| 587 |
+
text = "Walk me through your background and the most role-relevant work you have done."
|
| 588 |
+
elif idx == 1:
|
| 589 |
+
text = "Pick one project from your resume and explain your exact responsibilities and impact."
|
| 590 |
+
elif idx == count - 2:
|
| 591 |
+
text = "Describe a difficult production issue you would debug for this role and your step-by-step approach."
|
| 592 |
+
elif idx == count - 1:
|
| 593 |
+
text = "What is one technical area you would improve next for this job, and what is your plan?"
|
| 594 |
+
else:
|
| 595 |
+
text = f"How would you solve a realistic problem involving {skill_pool[idx % len(skill_pool)]}?"
|
| 596 |
+
|
| 597 |
+
fallback.append(
|
| 598 |
+
{
|
| 599 |
+
"question": text,
|
| 600 |
+
"difficulty": "easy" if idx <= 1 else ("medium" if idx <= 6 else "hard"),
|
| 601 |
+
"category": "technical-round",
|
| 602 |
+
}
|
| 603 |
+
)
|
| 604 |
+
return fallback
|
| 605 |
+
|
| 606 |
+
|
| 607 |
async def generate_followup_question_batch_from_qa(
|
| 608 |
role_title: str,
|
| 609 |
skills: list,
|
resume-jd-verification-2026-04-10T05-15-44-248Z.pdf
ADDED
|
@@ -0,0 +1,646 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
%PDF-1.3
|
| 2 |
+
%�߬�
|
| 3 |
+
3 0 obj
|
| 4 |
+
<</Type /Page
|
| 5 |
+
/Parent 1 0 R
|
| 6 |
+
/Resources 2 0 R
|
| 7 |
+
/MediaBox [0 0 595.2799999999999727 841.8899999999999864]
|
| 8 |
+
/Contents 4 0 R
|
| 9 |
+
>>
|
| 10 |
+
endobj
|
| 11 |
+
4 0 obj
|
| 12 |
+
<<
|
| 13 |
+
/Length 5607
|
| 14 |
+
>>
|
| 15 |
+
stream
|
| 16 |
+
0.200025 w
|
| 17 |
+
0 G
|
| 18 |
+
BT
|
| 19 |
+
/F2 16 Tf
|
| 20 |
+
18.3999999999999986 TL
|
| 21 |
+
0 g
|
| 22 |
+
40. 795.8899999999999864 Td
|
| 23 |
+
(Resume vs Job Description Verification) Tj
|
| 24 |
+
ET
|
| 25 |
+
BT
|
| 26 |
+
/F2 11 Tf
|
| 27 |
+
12.6499999999999986 TL
|
| 28 |
+
0 g
|
| 29 |
+
40. 771.8899999999999864 Td
|
| 30 |
+
(Verification ID:) Tj
|
| 31 |
+
ET
|
| 32 |
+
BT
|
| 33 |
+
/F1 11 Tf
|
| 34 |
+
12.6499999999999986 TL
|
| 35 |
+
0 g
|
| 36 |
+
130. 771.8899999999999864 Td
|
| 37 |
+
(b5519c4e-2ab1-4bbd-a5ca-ff133b558b5b) Tj
|
| 38 |
+
ET
|
| 39 |
+
BT
|
| 40 |
+
/F2 11 Tf
|
| 41 |
+
12.6499999999999986 TL
|
| 42 |
+
0 g
|
| 43 |
+
40. 753.8899999999999864 Td
|
| 44 |
+
(Saved At:) Tj
|
| 45 |
+
ET
|
| 46 |
+
BT
|
| 47 |
+
/F1 11 Tf
|
| 48 |
+
12.6499999999999986 TL
|
| 49 |
+
0 g
|
| 50 |
+
130. 753.8899999999999864 Td
|
| 51 |
+
(4/10/2026, 10:45:29 AM) Tj
|
| 52 |
+
ET
|
| 53 |
+
BT
|
| 54 |
+
/F2 11 Tf
|
| 55 |
+
12.6499999999999986 TL
|
| 56 |
+
0 g
|
| 57 |
+
40. 735.8899999999999864 Td
|
| 58 |
+
(Role:) Tj
|
| 59 |
+
ET
|
| 60 |
+
BT
|
| 61 |
+
/F1 11 Tf
|
| 62 |
+
12.6499999999999986 TL
|
| 63 |
+
0 g
|
| 64 |
+
130. 735.8899999999999864 Td
|
| 65 |
+
(Generative AI Engineer) Tj
|
| 66 |
+
ET
|
| 67 |
+
BT
|
| 68 |
+
/F2 16 Tf
|
| 69 |
+
18.3999999999999986 TL
|
| 70 |
+
0 g
|
| 71 |
+
40. 711.8899999999999864 Td
|
| 72 |
+
(Job Description Snapshot) Tj
|
| 73 |
+
ET
|
| 74 |
+
BT
|
| 75 |
+
/F2 11 Tf
|
| 76 |
+
12.6499999999999986 TL
|
| 77 |
+
0 g
|
| 78 |
+
40. 687.8899999999999864 Td
|
| 79 |
+
(JD Title:) Tj
|
| 80 |
+
ET
|
| 81 |
+
BT
|
| 82 |
+
/F1 11 Tf
|
| 83 |
+
12.6499999999999986 TL
|
| 84 |
+
0 g
|
| 85 |
+
130. 687.8899999999999864 Td
|
| 86 |
+
(AI Engineering Intern) Tj
|
| 87 |
+
ET
|
| 88 |
+
BT
|
| 89 |
+
/F2 11 Tf
|
| 90 |
+
12.6499999999999986 TL
|
| 91 |
+
0 g
|
| 92 |
+
40. 669.8899999999999864 Td
|
| 93 |
+
(Company:) Tj
|
| 94 |
+
ET
|
| 95 |
+
BT
|
| 96 |
+
/F1 11 Tf
|
| 97 |
+
12.6499999999999986 TL
|
| 98 |
+
0 g
|
| 99 |
+
130. 669.8899999999999864 Td
|
| 100 |
+
(-) Tj
|
| 101 |
+
ET
|
| 102 |
+
BT
|
| 103 |
+
/F2 11 Tf
|
| 104 |
+
12.6499999999999986 TL
|
| 105 |
+
0 g
|
| 106 |
+
40. 651.8899999999999864 Td
|
| 107 |
+
(Required Skills:) Tj
|
| 108 |
+
ET
|
| 109 |
+
BT
|
| 110 |
+
/F1 11 Tf
|
| 111 |
+
12.6499999999999986 TL
|
| 112 |
+
0 g
|
| 113 |
+
130. 651.8899999999999864 Td
|
| 114 |
+
(Basic understanding of Machine Learning concepts \(supervised/unsupervised learning\),) Tj
|
| 115 |
+
T* (Familiarity with Python and libraries like NumPy, Pandas, Scikit-learn, Knowledge of) Tj
|
| 116 |
+
T* (deep learning frameworks \(e.g., TensorFlow or PyTorch\) is a plus Strong analytical and) Tj
|
| 117 |
+
T* (problem-solving skills) Tj
|
| 118 |
+
ET
|
| 119 |
+
BT
|
| 120 |
+
/F2 11 Tf
|
| 121 |
+
12.6499999999999986 TL
|
| 122 |
+
0 g
|
| 123 |
+
40. 595.8899999999999864 Td
|
| 124 |
+
(JD Description:) Tj
|
| 125 |
+
ET
|
| 126 |
+
BT
|
| 127 |
+
/F1 11 Tf
|
| 128 |
+
12.6499999999999986 TL
|
| 129 |
+
0 g
|
| 130 |
+
130. 595.8899999999999864 Td
|
| 131 |
+
(Key Responsibilities) Tj
|
| 132 |
+
T* () Tj
|
| 133 |
+
T* (Assist in developing and implementing AI/ML models and algorithms) Tj
|
| 134 |
+
T* (Work on data preprocessing, cleaning, and analysis) Tj
|
| 135 |
+
T* (Support model training, evaluation, and optimization) Tj
|
| 136 |
+
T* (Conduct research on the latest AI trends and technologies) Tj
|
| 137 |
+
T* (Collaborate with engineers and product teams to integrate AI solutions) Tj
|
| 138 |
+
T* (Document experiments, processes, and results) Tj
|
| 139 |
+
T* (Participate in brainstorming and problem-solving sessions) Tj
|
| 140 |
+
ET
|
| 141 |
+
BT
|
| 142 |
+
/F2 16 Tf
|
| 143 |
+
18.3999999999999986 TL
|
| 144 |
+
0 g
|
| 145 |
+
40. 463.8899999999999864 Td
|
| 146 |
+
(Resume Snapshot) Tj
|
| 147 |
+
ET
|
| 148 |
+
BT
|
| 149 |
+
/F2 11 Tf
|
| 150 |
+
12.6499999999999986 TL
|
| 151 |
+
0 g
|
| 152 |
+
40. 439.8899999999999864 Td
|
| 153 |
+
(Resume File:) Tj
|
| 154 |
+
ET
|
| 155 |
+
BT
|
| 156 |
+
/F1 11 Tf
|
| 157 |
+
12.6499999999999986 TL
|
| 158 |
+
0 g
|
| 159 |
+
130. 439.8899999999999864 Td
|
| 160 |
+
(Resume.pdf) Tj
|
| 161 |
+
ET
|
| 162 |
+
BT
|
| 163 |
+
/F2 11 Tf
|
| 164 |
+
12.6499999999999986 TL
|
| 165 |
+
0 g
|
| 166 |
+
40. 421.8899999999999864 Td
|
| 167 |
+
(Candidate:) Tj
|
| 168 |
+
ET
|
| 169 |
+
BT
|
| 170 |
+
/F1 11 Tf
|
| 171 |
+
12.6499999999999986 TL
|
| 172 |
+
0 g
|
| 173 |
+
130. 421.8899999999999864 Td
|
| 174 |
+
(SAJITH J) Tj
|
| 175 |
+
ET
|
| 176 |
+
BT
|
| 177 |
+
/F2 11 Tf
|
| 178 |
+
12.6499999999999986 TL
|
| 179 |
+
0 g
|
| 180 |
+
40. 403.8899999999999864 Td
|
| 181 |
+
(Email:) Tj
|
| 182 |
+
ET
|
| 183 |
+
BT
|
| 184 |
+
/F1 11 Tf
|
| 185 |
+
12.6499999999999986 TL
|
| 186 |
+
0 g
|
| 187 |
+
130. 403.8899999999999864 Td
|
| 188 |
+
(jsajith76@gmail.com) Tj
|
| 189 |
+
ET
|
| 190 |
+
BT
|
| 191 |
+
/F2 11 Tf
|
| 192 |
+
12.6499999999999986 TL
|
| 193 |
+
0 g
|
| 194 |
+
40. 385.8899999999999864 Td
|
| 195 |
+
(Phone:) Tj
|
| 196 |
+
ET
|
| 197 |
+
BT
|
| 198 |
+
/F1 11 Tf
|
| 199 |
+
12.6499999999999986 TL
|
| 200 |
+
0 g
|
| 201 |
+
130. 385.8899999999999864 Td
|
| 202 |
+
(+91 8637440071) Tj
|
| 203 |
+
ET
|
| 204 |
+
BT
|
| 205 |
+
/F2 11 Tf
|
| 206 |
+
12.6499999999999986 TL
|
| 207 |
+
0 g
|
| 208 |
+
40. 367.8899999999999864 Td
|
| 209 |
+
(Location:) Tj
|
| 210 |
+
ET
|
| 211 |
+
BT
|
| 212 |
+
/F1 11 Tf
|
| 213 |
+
12.6499999999999986 TL
|
| 214 |
+
0 g
|
| 215 |
+
130. 367.8899999999999864 Td
|
| 216 |
+
(Coimbatore, India) Tj
|
| 217 |
+
ET
|
| 218 |
+
BT
|
| 219 |
+
/F2 11 Tf
|
| 220 |
+
12.6499999999999986 TL
|
| 221 |
+
0 g
|
| 222 |
+
40. 349.8899999999999864 Td
|
| 223 |
+
(Extracted Skills:) Tj
|
| 224 |
+
ET
|
| 225 |
+
BT
|
| 226 |
+
/F1 11 Tf
|
| 227 |
+
12.6499999999999986 TL
|
| 228 |
+
0 g
|
| 229 |
+
130. 349.8899999999999864 Td
|
| 230 |
+
(Python, SQL, RAG Pipelines, Semantic Search, Embedding Models, Vector Similarity) Tj
|
| 231 |
+
T* (Search, Prompt Engineering, LangChain, LangGraph, LangSmith, CNN, Transformers,) Tj
|
| 232 |
+
T* (BERT Fine-tuning, RNN, LSTM, GRU, Encoder Decoder, GAN, Pinecone, ChromaDB,) Tj
|
| 233 |
+
T* (MySQL, FastAPI, Docker, Git, Github, Sentence Transformers, Scikit-learn, Llama 4,) Tj
|
| 234 |
+
T* (Gemini API, E5 Multilingual Embeddings, OCR Based Extraction, PyTorch, BERT) Tj
|
| 235 |
+
ET
|
| 236 |
+
BT
|
| 237 |
+
/F2 11 Tf
|
| 238 |
+
12.6499999999999986 TL
|
| 239 |
+
0 g
|
| 240 |
+
40. 279.8899999999999864 Td
|
| 241 |
+
(Experience Summary:) Tj
|
| 242 |
+
ET
|
| 243 |
+
BT
|
| 244 |
+
/F1 11 Tf
|
| 245 |
+
12.6499999999999986 TL
|
| 246 |
+
0 g
|
| 247 |
+
130. 279.8899999999999864 Td
|
| 248 |
+
(AI & Data Science undergraduate with practical experience in architecting and) Tj
|
| 249 |
+
T* (deploying end-to-end AI systems, specializing in Deep Learning, RAG pipelines, and) Tj
|
| 250 |
+
T* (multimodal modeling.) Tj
|
| 251 |
+
ET
|
| 252 |
+
BT
|
| 253 |
+
/F2 16 Tf
|
| 254 |
+
18.3999999999999986 TL
|
| 255 |
+
0 g
|
| 256 |
+
40. 231.8899999999999864 Td
|
| 257 |
+
(Alignment Result) Tj
|
| 258 |
+
ET
|
| 259 |
+
BT
|
| 260 |
+
/F2 11 Tf
|
| 261 |
+
12.6499999999999986 TL
|
| 262 |
+
0 g
|
| 263 |
+
40. 207.8899999999999864 Td
|
| 264 |
+
(Fit Summary:) Tj
|
| 265 |
+
ET
|
| 266 |
+
BT
|
| 267 |
+
/F1 11 Tf
|
| 268 |
+
12.6499999999999986 TL
|
| 269 |
+
0 g
|
| 270 |
+
130. 207.8899999999999864 Td
|
| 271 |
+
(The student presents an exceptional fit for the Generative AI Engineer Intern role,) Tj
|
| 272 |
+
T* (showcasing a strong academic foundation in AI/Data Science, practical deployment) Tj
|
| 273 |
+
T* (experience, and highly specialized skills in Generative AI, RAG pipelines, and LLM) Tj
|
| 274 |
+
T* (development. Their demonstrated proficiency in PyTorch and MLOps tools directly) Tj
|
| 275 |
+
T* (aligns with the job's core responsibilities and 'plus' qualifications.) Tj
|
| 276 |
+
ET
|
| 277 |
+
BT
|
| 278 |
+
/F2 12 Tf
|
| 279 |
+
13.7999999999999989 TL
|
| 280 |
+
0 g
|
| 281 |
+
40. 137.8899999999999864 Td
|
| 282 |
+
(Meeting Expectations) Tj
|
| 283 |
+
ET
|
| 284 |
+
BT
|
| 285 |
+
/F1 11 Tf
|
| 286 |
+
12.6499999999999986 TL
|
| 287 |
+
0 g
|
| 288 |
+
46. 121.8899999999999864 Td
|
| 289 |
+
(- Strong foundation in Python and deep learning frameworks, specifically PyTorch, aligning with the) Tj
|
| 290 |
+
T* ('Knowledge of deep learning frameworks \(e.g., TensorFlow or PyTorch\) is a plus' requirement.) Tj
|
| 291 |
+
ET
|
| 292 |
+
BT
|
| 293 |
+
/F1 11 Tf
|
| 294 |
+
12.6499999999999986 TL
|
| 295 |
+
0 g
|
| 296 |
+
46. 93.8899999999999864 Td
|
| 297 |
+
(- Extensive experience with Machine Learning concepts and models, including CNN, Transformers,) Tj
|
| 298 |
+
T* (BERT, RNN, LSTM, GRU, Encoder Decoder, and GANs, demonstrating a robust understanding of AI/ML) Tj
|
| 299 |
+
T* (models and algorithms.) Tj
|
| 300 |
+
ET
|
| 301 |
+
endstream
|
| 302 |
+
endobj
|
| 303 |
+
5 0 obj
|
| 304 |
+
<</Type /Page
|
| 305 |
+
/Parent 1 0 R
|
| 306 |
+
/Resources 2 0 R
|
| 307 |
+
/MediaBox [0 0 595.2799999999999727 841.8899999999999864]
|
| 308 |
+
/Contents 6 0 R
|
| 309 |
+
>>
|
| 310 |
+
endobj
|
| 311 |
+
6 0 obj
|
| 312 |
+
<<
|
| 313 |
+
/Length 3330
|
| 314 |
+
>>
|
| 315 |
+
stream
|
| 316 |
+
0.200025 w
|
| 317 |
+
0 G
|
| 318 |
+
BT
|
| 319 |
+
/F1 11 Tf
|
| 320 |
+
12.6499999999999986 TL
|
| 321 |
+
0 g
|
| 322 |
+
46. 795.8899999999999864 Td
|
| 323 |
+
(- Direct and highly relevant skills in Generative AI, RAG Pipelines, Semantic Search, Embedding) Tj
|
| 324 |
+
T* (Models, Vector Similarity Search, and Prompt Engineering, which directly supports 'developing and) Tj
|
| 325 |
+
T* (implementing AI/ML models and algorithms' for a Generative AI role.) Tj
|
| 326 |
+
ET
|
| 327 |
+
BT
|
| 328 |
+
/F1 11 Tf
|
| 329 |
+
12.6499999999999986 TL
|
| 330 |
+
0 g
|
| 331 |
+
46. 753.8899999999999864 Td
|
| 332 |
+
(- Familiarity with key ML/DL libraries like Scikit-learn, which is explicitly mentioned as a required skill.) Tj
|
| 333 |
+
ET
|
| 334 |
+
BT
|
| 335 |
+
/F1 11 Tf
|
| 336 |
+
12.6499999999999986 TL
|
| 337 |
+
0 g
|
| 338 |
+
46. 739.8899999999999864 Td
|
| 339 |
+
(- Practical experience with MLOps and deployment tools such as FastAPI, Docker, Git, and Github,) Tj
|
| 340 |
+
T* (indicating the ability to 'integrate AI solutions'.) Tj
|
| 341 |
+
ET
|
| 342 |
+
BT
|
| 343 |
+
/F1 11 Tf
|
| 344 |
+
12.6499999999999986 TL
|
| 345 |
+
0 g
|
| 346 |
+
46. 711.8899999999999864 Td
|
| 347 |
+
(- Experience with LLMs like Llama 4, Gemini API, and fine-tuning BERT, showing proactive 'research on) Tj
|
| 348 |
+
T* (the latest AI trends and technologies'.) Tj
|
| 349 |
+
ET
|
| 350 |
+
BT
|
| 351 |
+
/F1 11 Tf
|
| 352 |
+
12.6499999999999986 TL
|
| 353 |
+
0 g
|
| 354 |
+
46. 683.8899999999999864 Td
|
| 355 |
+
(- Skills in managing data for AI, including Pinecone, ChromaDB, MySQL, and OCR Based Extraction,) Tj
|
| 356 |
+
T* (relevant to 'data preprocessing, cleaning, and analysis' and 'model training, evaluation, and optimization'.) Tj
|
| 357 |
+
ET
|
| 358 |
+
BT
|
| 359 |
+
/F1 11 Tf
|
| 360 |
+
12.6499999999999986 TL
|
| 361 |
+
0 g
|
| 362 |
+
46. 655.8899999999999864 Td
|
| 363 |
+
(- The resume summary highlights 'architecting and deploying end-to-end AI systems', which implies) Tj
|
| 364 |
+
T* (strong analytical and problem-solving skills, as well as the ability to 'collaborate with engineers and) Tj
|
| 365 |
+
T* (product teams'.) Tj
|
| 366 |
+
ET
|
| 367 |
+
BT
|
| 368 |
+
/F2 12 Tf
|
| 369 |
+
13.7999999999999989 TL
|
| 370 |
+
0 g
|
| 371 |
+
40. 607.8899999999999864 Td
|
| 372 |
+
(Missing Expectations) Tj
|
| 373 |
+
ET
|
| 374 |
+
BT
|
| 375 |
+
/F1 11 Tf
|
| 376 |
+
12.6499999999999986 TL
|
| 377 |
+
0 g
|
| 378 |
+
46. 591.8899999999999864 Td
|
| 379 |
+
(- While likely used, specific mention of 'NumPy' and 'Pandas' as explicit skills is absent from the resume.) Tj
|
| 380 |
+
ET
|
| 381 |
+
BT
|
| 382 |
+
/F1 11 Tf
|
| 383 |
+
12.6499999999999986 TL
|
| 384 |
+
0 g
|
| 385 |
+
46. 577.8899999999999864 Td
|
| 386 |
+
(- The resume could more explicitly detail experience in 'data preprocessing, cleaning, and analysis' for) Tj
|
| 387 |
+
T* (diverse datasets, beyond what's implied by 'RAG Pipelines' and 'OCR Based Extraction'.) Tj
|
| 388 |
+
ET
|
| 389 |
+
BT
|
| 390 |
+
/F2 12 Tf
|
| 391 |
+
13.7999999999999989 TL
|
| 392 |
+
0 g
|
| 393 |
+
40. 543.8899999999999864 Td
|
| 394 |
+
(Improvement Suggestions) Tj
|
| 395 |
+
ET
|
| 396 |
+
BT
|
| 397 |
+
/F1 11 Tf
|
| 398 |
+
12.6499999999999986 TL
|
| 399 |
+
0 g
|
| 400 |
+
46. 527.8899999999999864 Td
|
| 401 |
+
(- Add 'NumPy' and 'Pandas' to your skills list if you have experience with them, as they are foundational) Tj
|
| 402 |
+
T* (for data manipulation in Python.) Tj
|
| 403 |
+
ET
|
| 404 |
+
BT
|
| 405 |
+
/F1 11 Tf
|
| 406 |
+
12.6499999999999986 TL
|
| 407 |
+
0 g
|
| 408 |
+
46. 499.8899999999999864 Td
|
| 409 |
+
(- Prepare specific examples from past projects where you handled significant 'data preprocessing,) Tj
|
| 410 |
+
T* (cleaning, and analysis' challenges, detailing the techniques used and the impact.) Tj
|
| 411 |
+
ET
|
| 412 |
+
BT
|
| 413 |
+
/F1 11 Tf
|
| 414 |
+
12.6499999999999986 TL
|
| 415 |
+
0 g
|
| 416 |
+
46. 471.8899999999999864 Td
|
| 417 |
+
(- When discussing projects, explicitly highlight your contributions to 'documenting experiments,) Tj
|
| 418 |
+
T* (processes, and results' and examples of 'collaborating with engineers and product teams' to showcase) Tj
|
| 419 |
+
T* (teamwork and communication skills.) Tj
|
| 420 |
+
ET
|
| 421 |
+
BT
|
| 422 |
+
/F1 11 Tf
|
| 423 |
+
12.6499999999999986 TL
|
| 424 |
+
0 g
|
| 425 |
+
46. 429.8899999999999864 Td
|
| 426 |
+
(- Quantify your experience where possible \(e.g., 'deployed X RAG pipelines serving Y users', 'improved) Tj
|
| 427 |
+
T* (model performance by Z%'\), to demonstrate impact and scale.) Tj
|
| 428 |
+
ET
|
| 429 |
+
endstream
|
| 430 |
+
endobj
|
| 431 |
+
1 0 obj
|
| 432 |
+
<</Type /Pages
|
| 433 |
+
/Kids [3 0 R 5 0 R ]
|
| 434 |
+
/Count 2
|
| 435 |
+
>>
|
| 436 |
+
endobj
|
| 437 |
+
7 0 obj
|
| 438 |
+
<<
|
| 439 |
+
/Type /Font
|
| 440 |
+
/BaseFont /Helvetica
|
| 441 |
+
/Subtype /Type1
|
| 442 |
+
/Encoding /WinAnsiEncoding
|
| 443 |
+
/FirstChar 32
|
| 444 |
+
/LastChar 255
|
| 445 |
+
>>
|
| 446 |
+
endobj
|
| 447 |
+
8 0 obj
|
| 448 |
+
<<
|
| 449 |
+
/Type /Font
|
| 450 |
+
/BaseFont /Helvetica-Bold
|
| 451 |
+
/Subtype /Type1
|
| 452 |
+
/Encoding /WinAnsiEncoding
|
| 453 |
+
/FirstChar 32
|
| 454 |
+
/LastChar 255
|
| 455 |
+
>>
|
| 456 |
+
endobj
|
| 457 |
+
9 0 obj
|
| 458 |
+
<<
|
| 459 |
+
/Type /Font
|
| 460 |
+
/BaseFont /Helvetica-Oblique
|
| 461 |
+
/Subtype /Type1
|
| 462 |
+
/Encoding /WinAnsiEncoding
|
| 463 |
+
/FirstChar 32
|
| 464 |
+
/LastChar 255
|
| 465 |
+
>>
|
| 466 |
+
endobj
|
| 467 |
+
10 0 obj
|
| 468 |
+
<<
|
| 469 |
+
/Type /Font
|
| 470 |
+
/BaseFont /Helvetica-BoldOblique
|
| 471 |
+
/Subtype /Type1
|
| 472 |
+
/Encoding /WinAnsiEncoding
|
| 473 |
+
/FirstChar 32
|
| 474 |
+
/LastChar 255
|
| 475 |
+
>>
|
| 476 |
+
endobj
|
| 477 |
+
11 0 obj
|
| 478 |
+
<<
|
| 479 |
+
/Type /Font
|
| 480 |
+
/BaseFont /Courier
|
| 481 |
+
/Subtype /Type1
|
| 482 |
+
/Encoding /WinAnsiEncoding
|
| 483 |
+
/FirstChar 32
|
| 484 |
+
/LastChar 255
|
| 485 |
+
>>
|
| 486 |
+
endobj
|
| 487 |
+
12 0 obj
|
| 488 |
+
<<
|
| 489 |
+
/Type /Font
|
| 490 |
+
/BaseFont /Courier-Bold
|
| 491 |
+
/Subtype /Type1
|
| 492 |
+
/Encoding /WinAnsiEncoding
|
| 493 |
+
/FirstChar 32
|
| 494 |
+
/LastChar 255
|
| 495 |
+
>>
|
| 496 |
+
endobj
|
| 497 |
+
13 0 obj
|
| 498 |
+
<<
|
| 499 |
+
/Type /Font
|
| 500 |
+
/BaseFont /Courier-Oblique
|
| 501 |
+
/Subtype /Type1
|
| 502 |
+
/Encoding /WinAnsiEncoding
|
| 503 |
+
/FirstChar 32
|
| 504 |
+
/LastChar 255
|
| 505 |
+
>>
|
| 506 |
+
endobj
|
| 507 |
+
14 0 obj
|
| 508 |
+
<<
|
| 509 |
+
/Type /Font
|
| 510 |
+
/BaseFont /Courier-BoldOblique
|
| 511 |
+
/Subtype /Type1
|
| 512 |
+
/Encoding /WinAnsiEncoding
|
| 513 |
+
/FirstChar 32
|
| 514 |
+
/LastChar 255
|
| 515 |
+
>>
|
| 516 |
+
endobj
|
| 517 |
+
15 0 obj
|
| 518 |
+
<<
|
| 519 |
+
/Type /Font
|
| 520 |
+
/BaseFont /Times-Roman
|
| 521 |
+
/Subtype /Type1
|
| 522 |
+
/Encoding /WinAnsiEncoding
|
| 523 |
+
/FirstChar 32
|
| 524 |
+
/LastChar 255
|
| 525 |
+
>>
|
| 526 |
+
endobj
|
| 527 |
+
16 0 obj
|
| 528 |
+
<<
|
| 529 |
+
/Type /Font
|
| 530 |
+
/BaseFont /Times-Bold
|
| 531 |
+
/Subtype /Type1
|
| 532 |
+
/Encoding /WinAnsiEncoding
|
| 533 |
+
/FirstChar 32
|
| 534 |
+
/LastChar 255
|
| 535 |
+
>>
|
| 536 |
+
endobj
|
| 537 |
+
17 0 obj
|
| 538 |
+
<<
|
| 539 |
+
/Type /Font
|
| 540 |
+
/BaseFont /Times-Italic
|
| 541 |
+
/Subtype /Type1
|
| 542 |
+
/Encoding /WinAnsiEncoding
|
| 543 |
+
/FirstChar 32
|
| 544 |
+
/LastChar 255
|
| 545 |
+
>>
|
| 546 |
+
endobj
|
| 547 |
+
18 0 obj
|
| 548 |
+
<<
|
| 549 |
+
/Type /Font
|
| 550 |
+
/BaseFont /Times-BoldItalic
|
| 551 |
+
/Subtype /Type1
|
| 552 |
+
/Encoding /WinAnsiEncoding
|
| 553 |
+
/FirstChar 32
|
| 554 |
+
/LastChar 255
|
| 555 |
+
>>
|
| 556 |
+
endobj
|
| 557 |
+
19 0 obj
|
| 558 |
+
<<
|
| 559 |
+
/Type /Font
|
| 560 |
+
/BaseFont /ZapfDingbats
|
| 561 |
+
/Subtype /Type1
|
| 562 |
+
/FirstChar 32
|
| 563 |
+
/LastChar 255
|
| 564 |
+
>>
|
| 565 |
+
endobj
|
| 566 |
+
20 0 obj
|
| 567 |
+
<<
|
| 568 |
+
/Type /Font
|
| 569 |
+
/BaseFont /Symbol
|
| 570 |
+
/Subtype /Type1
|
| 571 |
+
/FirstChar 32
|
| 572 |
+
/LastChar 255
|
| 573 |
+
>>
|
| 574 |
+
endobj
|
| 575 |
+
2 0 obj
|
| 576 |
+
<<
|
| 577 |
+
/ProcSet [/PDF /Text /ImageB /ImageC /ImageI]
|
| 578 |
+
/Font <<
|
| 579 |
+
/F1 7 0 R
|
| 580 |
+
/F2 8 0 R
|
| 581 |
+
/F3 9 0 R
|
| 582 |
+
/F4 10 0 R
|
| 583 |
+
/F5 11 0 R
|
| 584 |
+
/F6 12 0 R
|
| 585 |
+
/F7 13 0 R
|
| 586 |
+
/F8 14 0 R
|
| 587 |
+
/F9 15 0 R
|
| 588 |
+
/F10 16 0 R
|
| 589 |
+
/F11 17 0 R
|
| 590 |
+
/F12 18 0 R
|
| 591 |
+
/F13 19 0 R
|
| 592 |
+
/F14 20 0 R
|
| 593 |
+
>>
|
| 594 |
+
/XObject <<
|
| 595 |
+
>>
|
| 596 |
+
>>
|
| 597 |
+
endobj
|
| 598 |
+
21 0 obj
|
| 599 |
+
<<
|
| 600 |
+
/Producer (jsPDF 4.2.1)
|
| 601 |
+
/CreationDate (D:20260410104544+05'30')
|
| 602 |
+
>>
|
| 603 |
+
endobj
|
| 604 |
+
22 0 obj
|
| 605 |
+
<<
|
| 606 |
+
/Type /Catalog
|
| 607 |
+
/Pages 1 0 R
|
| 608 |
+
/OpenAction [3 0 R /FitH null]
|
| 609 |
+
/PageLayout /OneColumn
|
| 610 |
+
>>
|
| 611 |
+
endobj
|
| 612 |
+
xref
|
| 613 |
+
0 23
|
| 614 |
+
0000000000 65535 f
|
| 615 |
+
0000009330 00000 n
|
| 616 |
+
0000011155 00000 n
|
| 617 |
+
0000000015 00000 n
|
| 618 |
+
0000000152 00000 n
|
| 619 |
+
0000005811 00000 n
|
| 620 |
+
0000005948 00000 n
|
| 621 |
+
0000009393 00000 n
|
| 622 |
+
0000009518 00000 n
|
| 623 |
+
0000009648 00000 n
|
| 624 |
+
0000009781 00000 n
|
| 625 |
+
0000009919 00000 n
|
| 626 |
+
0000010043 00000 n
|
| 627 |
+
0000010172 00000 n
|
| 628 |
+
0000010304 00000 n
|
| 629 |
+
0000010440 00000 n
|
| 630 |
+
0000010568 00000 n
|
| 631 |
+
0000010695 00000 n
|
| 632 |
+
0000010824 00000 n
|
| 633 |
+
0000010957 00000 n
|
| 634 |
+
0000011059 00000 n
|
| 635 |
+
0000011405 00000 n
|
| 636 |
+
0000011491 00000 n
|
| 637 |
+
trailer
|
| 638 |
+
<<
|
| 639 |
+
/Size 23
|
| 640 |
+
/Root 22 0 R
|
| 641 |
+
/Info 21 0 R
|
| 642 |
+
/ID [ <95A654D90B03BE650BD8733007BC1C07> <95A654D90B03BE650BD8733007BC1C07> ]
|
| 643 |
+
>>
|
| 644 |
+
startxref
|
| 645 |
+
11595
|
| 646 |
+
%%EOF
|