Spaces:

sajith-0701
/

interviewbot

Sleeping

App Files Files Community

sajith-0701 commited on Apr 10

Commit

5837391

1 Parent(s): be9a4dd

v3.1

Browse files

updated the flow working, and implemented gpu for tts and stt

Files changed (12) hide show

backend/database.py +3 -0
backend/routers/interview.py +80 -0
backend/routers/speech.py +53 -8
backend/services/evaluation_service.py +4 -0
backend/services/gemini_service.py +281 -0
backend/services/interview_service.py +1064 -298
backend/services/latency_service.py +179 -0
backend/services/queue_service.py +194 -0
backend/services/stt_service.py +96 -9
backend/services/tts_service.py +67 -9
backend/utils/gemini.py +188 -10
resume-jd-verification-2026-04-10T05-15-44-248Z.pdf +646 -0

backend/database.py CHANGED Viewed

@@ -27,7 +27,10 @@ async def connect_db():
     await db.sessions.create_index("user_id")
     await db.results.create_index("session_id")
     await db.results.create_index("user_id")
     await db.questions.create_index("role_id")
     # Redis
     redis_client = aioredis.from_url(

     await db.sessions.create_index("user_id")
     await db.results.create_index("session_id")
     await db.results.create_index("user_id")
+    await db.answers.create_index("user_id")
+    await db.answers.create_index("session_id")
     await db.questions.create_index("role_id")
+    await db.jd_verifications.create_index([("user_id", 1), ("cache_key", 1)])
     # Redis
     redis_client = aioredis.from_url(

backend/routers/interview.py CHANGED Viewed

@@ -12,9 +12,11 @@ from services.interview_service import (
     start_interview,
     verify_resume_job_description,
     submit_answer,
     quit_interview,
 )
 from services.evaluation_service import generate_report
 router = APIRouter()
@@ -41,6 +43,28 @@ async def start_interview_endpoint(
         raise HTTPException(status_code=500, detail=str(e))
 @router.post("/verify")
 async def verify_resume_job_description_endpoint(
     request: VerifyResumeJdRequest,
@@ -80,6 +104,43 @@ async def submit_answer_endpoint(
         raise HTTPException(status_code=500, detail=str(e))
 @router.post("/quit")
 async def quit_interview_endpoint(
     request: QuitInterviewRequest,
@@ -111,6 +172,25 @@ async def quit_interview_endpoint(
         raise HTTPException(status_code=500, detail=str(e))
 @router.get("/report")
 async def get_interview_report(
     session_id: str,

     start_interview,
     verify_resume_job_description,
     submit_answer,
+    get_next_question,
     quit_interview,
 )
 from services.evaluation_service import generate_report
+from services.latency_service import get_latency_metrics, reset_latency_metrics
 router = APIRouter()
         raise HTTPException(status_code=500, detail=str(e))
+@router.post("/start_interview")
+async def start_interview_compat_endpoint(
+    request: StartInterviewRequest,
+    current_user: dict = Depends(get_current_user),
+):
+    """Compatibility endpoint aligned with alternate API naming."""
+    try:
+        result = await start_interview(
+            user_id=current_user["user_id"],
+            role_id=request.role_id,
+            custom_role=request.custom_role,
+            interview_type=request.interview_type,
+            topic_id=request.topic_id,
+            job_description_id=request.job_description_id,
+        )
+        return result
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
 @router.post("/verify")
 async def verify_resume_job_description_endpoint(
     request: VerifyResumeJdRequest,
         raise HTTPException(status_code=500, detail=str(e))
+@router.post("/submit_answer")
+async def submit_answer_compat_endpoint(
+    request: SubmitAnswerRequest,
+    current_user: dict = Depends(get_current_user),
+):
+    """Compatibility endpoint aligned with alternate API naming."""
+    try:
+        result = await submit_answer(
+            session_id=request.session_id,
+            question_id=request.question_id,
+            answer=request.answer,
+        )
+        return result
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@router.get("/next_question")
+async def get_next_question_endpoint(
+    session_id: str,
+    current_user: dict = Depends(get_current_user),
+):
+    """Preview next queued question without modifying answer state."""
+    try:
+        result = await get_next_question(
+            session_id=session_id,
+            user_id=current_user["user_id"],
+        )
+        return result
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
 @router.post("/quit")
 async def quit_interview_endpoint(
     request: QuitInterviewRequest,
         raise HTTPException(status_code=500, detail=str(e))
+@router.get("/latency")
+async def interview_latency_metrics(
+    sample_size: int = 500,
+    current_user: dict = Depends(get_current_user),
+):
+    """Get STT/submit/Gemini latency metrics with p50 and p95."""
+    _ = current_user
+    return await get_latency_metrics(sample_size=sample_size)
+@router.post("/latency/reset")
+async def reset_interview_latency_metrics(
+    current_user: dict = Depends(get_current_user),
+):
+    """Reset latency metric samples to start a fresh before/after comparison."""
+    _ = current_user
+    return await reset_latency_metrics()
 @router.get("/report")
 async def get_interview_report(
     session_id: str,

backend/routers/speech.py CHANGED Viewed

@@ -1,10 +1,12 @@
 from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form
 from fastapi.responses import Response
 from pydantic import BaseModel
 from auth.jwt import get_current_user
-from services.tts_service import synthesize_wav, warmup_xtts_model
 from services.stt_service import transcribe_audio_bytes, warmup_whisper_model
 router = APIRouter()
@@ -17,15 +19,34 @@ class SpeechSynthesisRequest(BaseModel):
 @router.get("/health")
 async def speech_health(current_user: dict = Depends(get_current_user)):
     """Check whether speech route is available for authenticated users."""
-    return {"status": "ok", "service": "speech"}
 @router.post("/warmup")
 async def speech_warmup(current_user: dict = Depends(get_current_user)):
     """Warm XTTS model so first interview playback does not hit cold-start delay."""
-    await warmup_xtts_model()
     await warmup_whisper_model()
-    return {"status": "ok", "message": "speech model warmed"}
 @router.post("/synthesize")
@@ -42,13 +63,34 @@ async def synthesize_speech(
     except RuntimeError as e:
         # XTTS may be in cold-start transition; warm once and retry before failing.
         try:
-            await warmup_xtts_model()
             wav_bytes = await synthesize_wav(request.text, request.voice_gender)
             return Response(content=wav_bytes, media_type="audio/wav")
-        except RuntimeError:
             raise HTTPException(status_code=503, detail=str(e))
     except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Speech synthesis failed: {str(e)}")
 @router.post("/transcribe")
@@ -58,6 +100,7 @@ async def transcribe_speech(
     current_user: dict = Depends(get_current_user),
 ):
     """Transcribe uploaded interview audio using Whisper model."""
     try:
         payload = await audio.read()
         text = await transcribe_audio_bytes(
@@ -65,7 +108,9 @@ async def transcribe_speech(
             filename=audio.filename or "speech.webm",
             language=language,
         )
-        return {"text": text}
     except ValueError as e:
         raise HTTPException(status_code=400, detail=str(e))
     except RuntimeError as e:

 from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form
 from fastapi.responses import Response
 from pydantic import BaseModel
+from time import perf_counter
 from auth.jwt import get_current_user
+from services.tts_service import synthesize_wav, warmup_xtts_model, get_xtts_warmup_state
 from services.stt_service import transcribe_audio_bytes, warmup_whisper_model
+from services.latency_service import record_latency
 router = APIRouter()
 @router.get("/health")
 async def speech_health(current_user: dict = Depends(get_current_user)):
     """Check whether speech route is available for authenticated users."""
+    _ = current_user
+    state = get_xtts_warmup_state()
+    return {
+        "status": "ok",
+        "service": "speech",
+        "xtts_ready": bool(state.get("is_warm")),
+    }
 @router.post("/warmup")
 async def speech_warmup(current_user: dict = Depends(get_current_user)):
     """Warm XTTS model so first interview playback does not hit cold-start delay."""
+    _ = current_user
+    xtts_ready = await warmup_xtts_model()
     await warmup_whisper_model()
+    state = get_xtts_warmup_state()
+    if not xtts_ready:
+        raise HTTPException(
+            status_code=503,
+            detail=f"XTTS warmup failed: {state.get('last_error') or 'unknown error'}",
+        )
+    return {
+        "status": "ok",
+        "message": "speech model warmed",
+        "xtts_ready": True,
+    }
 @router.post("/synthesize")
     except RuntimeError as e:
         # XTTS may be in cold-start transition; warm once and retry before failing.
         try:
+            xtts_ready = await warmup_xtts_model()
+            if not xtts_ready:
+                state = get_xtts_warmup_state()
+                raise HTTPException(
+                    status_code=503,
+                    detail=f"XTTS warmup failed: {state.get('last_error') or str(e)}",
+                )
             wav_bytes = await synthesize_wav(request.text, request.voice_gender)
             return Response(content=wav_bytes, media_type="audio/wav")
+        except HTTPException:
+            raise
+        except Exception:
             raise HTTPException(status_code=503, detail=str(e))
     except Exception as e:
+        # Retry once after explicit warmup even for non-RuntimeError failures.
+        try:
+            xtts_ready = await warmup_xtts_model()
+            if xtts_ready:
+                wav_bytes = await synthesize_wav(request.text, request.voice_gender)
+                return Response(content=wav_bytes, media_type="audio/wav")
+        except Exception:
+            pass
+        state = get_xtts_warmup_state()
+        raise HTTPException(
+            status_code=503,
+            detail=f"Speech synthesis backend unavailable: {state.get('last_error') or str(e)}",
+        )
 @router.post("/transcribe")
     current_user: dict = Depends(get_current_user),
 ):
     """Transcribe uploaded interview audio using Whisper model."""
+    started_at = perf_counter()
     try:
         payload = await audio.read()
         text = await transcribe_audio_bytes(
             filename=audio.filename or "speech.webm",
             language=language,
         )
+        elapsed_ms = (perf_counter() - started_at) * 1000.0
+        await record_latency("stt_ms", elapsed_ms)
+        return {"text": text, "stt_ms": round(elapsed_ms, 2)}
     except ValueError as e:
         raise HTTPException(status_code=400, detail=str(e))
     except RuntimeError as e:

backend/services/evaluation_service.py CHANGED Viewed

@@ -102,6 +102,10 @@ async def generate_report(session_id: str, user_id: str) -> dict:
         f"session:{session_id}",
         f"session:{session_id}:questions",
         f"session:{session_id}:pending_questions",
         f"session:{session_id}:answers",
     ]
     for qid in question_ids:

         f"session:{session_id}",
         f"session:{session_id}:questions",
         f"session:{session_id}:pending_questions",
+        f"session:{session_id}:question_queue",
+        f"session:{session_id}:question_backlog",
+        f"session:{session_id}:context_cache",
+        f"session:{session_id}:asked_questions_set",
         f"session:{session_id}:answers",
     ]
     for qid in question_ids:

backend/services/gemini_service.py ADDED Viewed

	@@ -0,0 +1,281 @@

+import json
+import re
+from utils.gemini import call_gemini
+def _extract_json_object(text: str) -> str:
+    value = (text or "").strip()
+    if value.startswith("```"):
+        value = value.split("\n", 1)[1]
+    if value.endswith("```"):
+        value = value.rsplit("```", 1)[0]
+    value = value.strip()
+    if value.startswith("{") and value.endswith("}"):
+        return value
+    start = value.find("{")
+    end = value.rfind("}")
+    if start != -1 and end != -1 and end > start:
+        return value[start:end + 1]
+    return value
+def _extract_json_array(text: str) -> str:
+    value = (text or "").strip()
+    if value.startswith("```"):
+        value = value.split("\n", 1)[1]
+    if value.endswith("```"):
+        value = value.rsplit("```", 1)[0]
+    value = value.strip()
+    if value.startswith("[") and value.endswith("]"):
+        return value
+    start = value.find("[")
+    end = value.rfind("]")
+    if start != -1 and end != -1 and end > start:
+        return value[start:end + 1]
+    return value
+def _fallback_score(answer: str) -> int:
+    text = (answer or "").strip().lower()
+    words = len(text.split())
+    weak = any(marker in text for marker in ["not sure", "maybe", "i think", "dont know", "don't know"])
+    if words < 10:
+        return 35
+    if words < 25:
+        return 55
+    if weak:
+        return 50
+    if words > 80:
+        return 75
+    return 65
+async def generate_resume_seed_questions(
+    role_title: str,
+    resume_summary: str,
+    resume_skills: list[str],
+    jd_title: str,
+    jd_description: str,
+    jd_required_skills: list[str],
+    excluded_questions: list[str],
+    count: int = 2,
+) -> list[dict]:
+    count = max(1, int(count or 2))
+    payload = {
+        "role_title": role_title,
+        "resume_summary": resume_summary,
+        "resume_skills": resume_skills,
+        "jd_title": jd_title,
+        "jd_description": jd_description,
+        "jd_required_skills": jd_required_skills,
+        "excluded_questions": excluded_questions[-25:] if excluded_questions else [],
+        "count": count,
+    }
+    prompt = f"""Generate exactly {count} resume interview questions.
+Input JSON:
+{json.dumps(payload, ensure_ascii=True)}
+Rules:
+1) Questions must be strictly from JD required skills and role context.
+2) Use resume context for relevance.
+3) Do not repeat or paraphrase excluded_questions.
+4) Keep questions concise and practical.
+Return ONLY valid JSON array with objects:
+- question (string)
+- difficulty (easy|medium|hard)
+- category (string)
+"""
+    try:
+        result = _extract_json_array(
+            await call_gemini(
+                prompt,
+                max_attempts=1,
+                request_timeout_seconds=3.5,
+            )
+        )
+        data = json.loads(result)
+        if not isinstance(data, list):
+            raise ValueError("seed output is not a list")
+        output = []
+        for item in data[:count]:
+            if not isinstance(item, dict):
+                item = {}
+            output.append(
+                {
+                    "question": (item.get("question") or "").strip(),
+                    "difficulty": item.get("difficulty") if item.get("difficulty") in {"easy", "medium", "hard"} else "medium",
+                    "category": item.get("category") or "resume-seed",
+                }
+            )
+        return [q for q in output if q.get("question")]
+    except Exception:
+        base_skill = jd_required_skills[0] if jd_required_skills else (resume_skills[0] if resume_skills else "this role")
+        fallback = []
+        for i in range(count):
+            fallback.append(
+                {
+                    "question": (
+                        f"Explain your hands-on experience with {base_skill} in a project relevant to {role_title}."
+                        if i == 0
+                        else f"What trade-offs did you consider when working with {base_skill}?"
+                    ),
+                    "difficulty": "medium",
+                    "category": "resume-seed",
+                }
+            )
+        return fallback
+async def evaluate_and_generate_followup(
+    role_title: str,
+    required_skills: list[str],
+    recent_context: list[dict],
+    current_question: str,
+    current_answer: str,
+    excluded_questions: list[str],
+) -> dict:
+    payload = {
+        "role_title": role_title,
+        "required_skills": required_skills,
+        "recent_context": recent_context[-3:] if recent_context else [],
+        "current_question": current_question,
+        "current_answer": current_answer,
+        "excluded_questions": excluded_questions[-25:] if excluded_questions else [],
+    }
+    prompt = f"""You are a strict technical interviewer.
+Input JSON:
+{json.dumps(payload, ensure_ascii=True)}
+Task:
+1) Evaluate current_answer for current_question.
+2) Generate one non-duplicate follow-up question.
+Rules:
+1) Follow-up must stay within required_skills only.
+2) Use recent_context for continuity.
+3) Do not repeat/paraphrase excluded_questions.
+4) Score should reflect conceptual correctness, not verbosity.
+Return ONLY valid JSON object:
+{{
+  "score": 0-100,
+  "feedback": "short technical feedback",
+  "followup_question": "...",
+  "difficulty": "easy|medium|hard",
+  "category": "..."
+}}
+"""
+    try:
+        result = _extract_json_object(
+            await call_gemini(
+                prompt,
+                max_attempts=1,
+                request_timeout_seconds=2.8,
+            )
+        )
+        data = json.loads(result)
+        followup = (data.get("followup_question") or "").strip()
+        return {
+            "score": int(data.get("score", 0)),
+            "feedback": (data.get("feedback") or "").strip() or "Answer reviewed.",
+            "followup_question": followup,
+            "difficulty": data.get("difficulty") if data.get("difficulty") in {"easy", "medium", "hard"} else "medium",
+            "category": data.get("category") or "follow-up",
+        }
+    except Exception:
+        fallback_skill = required_skills[0] if required_skills else "the selected role requirement"
+        return {
+            "score": _fallback_score(current_answer),
+            "feedback": "Try to explain the mechanism, trade-offs, and one concrete example.",
+            "followup_question": f"Can you walk me through a real scenario where you applied {fallback_skill} and what trade-offs you handled?",
+            "difficulty": "medium",
+            "category": "follow-up",
+        }
+async def generate_topic_followup_batch(
+    topic_name: str,
+    qa_pairs: list[dict],
+    excluded_questions: list[str],
+    count: int = 3,
+) -> list[dict]:
+    count = max(1, int(count or 3))
+    payload = {
+        "topic": topic_name,
+        "qa_pairs": qa_pairs,
+        "excluded_questions": excluded_questions[-30:] if excluded_questions else [],
+        "count": count,
+    }
+    prompt = f"""Generate exactly {count} topic-focused technical follow-up questions.
+Input JSON:
+{json.dumps(payload, ensure_ascii=True)}
+Rules:
+1) Stay in topic scope only.
+2) Build on candidate weak points from qa_pairs.
+3) Do not repeat/paraphrase excluded_questions.
+Return ONLY valid JSON array with objects:
+- question (string)
+- difficulty (easy|medium|hard)
+- category (string)
+"""
+    try:
+        result = _extract_json_array(
+            await call_gemini(
+                prompt,
+                max_attempts=1,
+                request_timeout_seconds=3.5,
+            )
+        )
+        data = json.loads(result)
+        if not isinstance(data, list):
+            raise ValueError("topic output is not a list")
+        out = []
+        for item in data[:count]:
+            if not isinstance(item, dict):
+                item = {}
+            text = (item.get("question") or "").strip()
+            if not text:
+                continue
+            out.append(
+                {
+                    "question": text,
+                    "difficulty": item.get("difficulty") if item.get("difficulty") in {"easy", "medium", "hard"} else "medium",
+                    "category": item.get("category") or topic_name,
+                }
+            )
+        return out
+    except Exception:
+        fallback = []
+        for i in range(count):
+            fallback.append(
+                {
+                    "question": f"In {topic_name}, explain how you would solve a real production issue and why.",
+                    "difficulty": "medium" if i < 2 else "hard",
+                    "category": topic_name,
+                }
+            )
+        return fallback

backend/services/interview_service.py CHANGED Viewed

@@ -2,15 +2,32 @@ import json
 import asyncio
 import random
 import re
 from bson import ObjectId
 from database import get_db, get_redis
-from models.collections import SESSIONS, USERS, JOB_ROLES, SKILLS, QUESTIONS, TOPICS, TOPIC_QUESTIONS, RESUMES, JD_VERIFICATIONS
 from utils.helpers import generate_id, utc_now, str_objectid
 from utils.skills import normalize_skill_list, build_interview_focus_skills
 from services.interview_graph import run_interview_graph
 from utils.gemini import generate_interview_question_batch, analyze_resume_vs_job_description
 from services.job_description_service import get_job_description_for_user
 from services.tts_service import prefetch_wav
 MAX_QUESTIONS = 20
 RESUME_MAX_QUESTIONS = 10
@@ -20,10 +37,19 @@ BATCH_SIZE = 5
 PREGEN_MIN_PENDING = 2
 FOLLOWUP_AI_COUNT = 2
 FOLLOWUP_BANK_COUNT = 3
 # Local process memory summary requested in workflow.
 _LOCAL_SUMMARIES: dict[str, str] = {}
 _PREGEN_IN_FLIGHT: set[str] = set()
 def _safe_json_list(value: str) -> list:
@@ -34,6 +60,36 @@ def _safe_json_list(value: str) -> list:
         return []
 def _update_local_summary(session_id: str, question: str, answer: str) -> None:
     existing = _LOCAL_SUMMARIES.get(session_id, "")
     combined = f"{existing}\nQ: {question}\nA: {answer}".strip()
@@ -73,6 +129,32 @@ def _schedule_question_audio_prefetch(questions: list[str], voice_gender: str) -
             pass
 def _normalize_bank_difficulty(value: str) -> str:
     difficulty = (value or "medium").strip().lower()
     if difficulty not in {"easy", "medium", "hard"}:
@@ -136,13 +218,275 @@ async def _resolve_role_title(db, role_id: str | None, custom_role: str | None)
     return "Software Developer"
 async def verify_resume_job_description(
     user_id: str,
     role_id: str = None,
     custom_role: str = None,
     job_description_id: str = None,
 ) -> dict:
-    """Run resume-vs-job-description verification without starting an interview."""
     if not job_description_id:
         raise ValueError("job_description_id is required for verification")
@@ -163,8 +507,59 @@ async def verify_resume_job_description(
     resume_summary = "\n".join([part for part in summary_parts if part]).strip() or "No summary available"
     role_title = await _resolve_role_title(db, role_id=role_id, custom_role=custom_role)
     selected_jd = await get_job_description_for_user(user_id, job_description_id)
     jd_alignment = await analyze_resume_vs_job_description(
         role_title=role_title,
         resume_skills=resume_skills if resume_skills else ["general"],
@@ -176,7 +571,7 @@ async def verify_resume_job_description(
     resume_snapshot = {
         "filename": resume_doc.get("original_filename") or resume_doc.get("filename") or "",
-        "uploaded_at": resume_doc.get("uploaded_at"),
         "skills": resume_skills,
         "parsed_data": {
             "name": parsed_data.get("name"),
@@ -190,36 +585,36 @@ async def verify_resume_job_description(
     verification_id = generate_id()
     saved_at = utc_now()
-    await db[JD_VERIFICATIONS].insert_one(
-        {
-            "verification_id": verification_id,
-            "user_id": user_id,
-            "role_id": role_id,
-            "custom_role": custom_role,
-            "role_title": role_title,
-            "job_description": {
-                "id": selected_jd.get("id"),
-                "title": selected_jd.get("title"),
-                "company": selected_jd.get("company"),
-                "description": selected_jd.get("description"),
-                "required_skills": selected_jd.get("required_skills", []) or [],
-            },
-            "resume_snapshot": resume_snapshot,
-            "jd_alignment": jd_alignment,
-            "created_at": saved_at,
-        }
-    )
-    return {
         "verification_id": verification_id,
-        "saved_at": saved_at,
         "role_title": role_title,
-        "job_description": selected_jd,
         "resume_snapshot": resume_snapshot,
         "jd_alignment": jd_alignment,
-        "message": "Verification complete",
     }
 async def _get_generated_question_texts(redis, session_id: str) -> list[str]:
     qids = await redis.lrange(f"session:{session_id}:questions", 0, -1)
@@ -604,7 +999,7 @@ async def _generate_mixed_followup_batch(
 async def _start_topic_interview(user_id: str, topic_id: str) -> dict:
-    """Start a topic-wise interview with admin-created questions."""
     db = get_db()
     redis = get_redis()
@@ -614,18 +1009,21 @@ async def _start_topic_interview(user_id: str, topic_id: str) -> dict:
     if not topic.get("is_published", False):
         raise ValueError("This topic interview is not published yet")
-    topic_questions = await db[TOPIC_QUESTIONS].find({"topic_id": topic_id}).to_list(length=200)
-    if not topic_questions:
-        raise ValueError("No questions found for selected topic")
     timer_enabled = bool(topic.get("timer_enabled", False))
     timer_seconds = topic.get("timer_seconds") if timer_enabled else None
-    total_questions = min(MAX_QUESTIONS, len(topic_questions))
-    # Randomize question selection for each interview session
-    random.shuffle(topic_questions)
-    selected = topic_questions[:total_questions]
     session_id = generate_id()
     _LOCAL_SUMMARIES[session_id] = ""
@@ -636,6 +1034,48 @@ async def _start_topic_interview(user_id: str, topic_id: str) -> dict:
         user_doc = await db[USERS].find_one({"user_id": user_id}, {"speech_settings": 1})
     speech_voice_gender = _normalize_voice_gender(((user_doc or {}).get("speech_settings") or {}).get("voice_gender"))
     session_doc = {
         "session_id": session_id,
         "user_id": user_id,
@@ -645,16 +1085,17 @@ async def _start_topic_interview(user_id: str, topic_id: str) -> dict:
         "interview_type": "topic",
         "status": "in_progress",
         "question_count": 1,
-        "max_questions": total_questions,
-        "current_difficulty": selected[0].get("difficulty", "medium"),
         "metrics_gemini_calls": 0,
         "metrics_gemini_questions": 0,
-        "metrics_bank_questions": 0,
-        "metrics_bank_shortfall": 0,
-        "metrics_generation_batches": 0,
         "speech_voice_gender": speech_voice_gender,
         "timer_enabled": timer_enabled,
         "timer_seconds": timer_seconds,
         "started_at": utc_now(),
     }
     await db[SESSIONS].insert_one(session_doc)
@@ -672,53 +1113,26 @@ async def _start_topic_interview(user_id: str, topic_id: str) -> dict:
         "question_count": 1,
         "answered_count": 0,
         "served_count": 1,
-        "generated_count": total_questions,
-        "max_questions": total_questions,
-        "current_difficulty": selected[0].get("difficulty", "medium"),
         "timer_enabled": str(timer_enabled),
         "timer_seconds": str(timer_seconds or ""),
         "status": "in_progress",
         "speech_voice_gender": speech_voice_gender,
         "metrics_gemini_calls": 0,
         "metrics_gemini_questions": 0,
-        "metrics_bank_questions": 0,
-        "metrics_bank_shortfall": 0,
-        "metrics_generation_batches": 0,
     }
     await redis.hset(f"session:{session_id}", mapping=session_state)
     await redis.expire(f"session:{session_id}", SESSION_TTL)
-    created_ids = []
-    for q in selected:
-        qid = generate_id()
-        created_ids.append(qid)
-        await redis.hset(
-            f"session:{session_id}:q:{qid}",
-            mapping={
-                "question_id": qid,
-                "question": q.get("question", "Can you explain this concept?"),
-                "difficulty": q.get("difficulty", "medium"),
-                "category": q.get("category", topic.get("name", "topic")),
-            },
-        )
-        await redis.rpush(f"session:{session_id}:questions", qid)
-        await redis.expire(f"session:{session_id}:q:{qid}", SESSION_TTL)
-    await redis.expire(f"session:{session_id}:questions", SESSION_TTL)
-    first_id = created_ids[0]
-    pending_ids = created_ids[1:]
-    if pending_ids:
-        await redis.rpush(f"session:{session_id}:pending_questions", *pending_ids)
-        await redis.expire(f"session:{session_id}:pending_questions", SESSION_TTL)
-    first_q_data = await redis.hgetall(f"session:{session_id}:q:{first_id}")
-    prefetch_targets = []
-    if len(selected) > 1:
-        prefetch_targets.append(selected[1].get("question", ""))
-    _schedule_question_audio_prefetch(
-        prefetch_targets,
-        speech_voice_gender,
-    )
     return {
         "session_id": session_id,
@@ -737,10 +1151,10 @@ async def _start_topic_interview(user_id: str, topic_id: str) -> dict:
         },
         "question": {
             "question_id": first_id,
-            "question": first_q_data.get("question", "Can you explain this concept?"),
-            "difficulty": first_q_data.get("difficulty", "medium"),
             "question_number": 1,
-            "total_questions": total_questions,
         },
         "timer": {
             "enabled": timer_enabled,
@@ -839,7 +1253,7 @@ async def start_interview(
     topic_id: str = None,
     job_description_id: str = None,
 ) -> dict:
-    """Start a new interview session."""
     interview_type = (interview_type or "resume").strip().lower()
     if interview_type == "topic":
         if not topic_id:
@@ -856,163 +1270,133 @@ async def start_interview(
         user_doc = await db[USERS].find_one({"user_id": user_id}, {"speech_settings": 1})
     speech_voice_gender = _normalize_voice_gender(((user_doc or {}).get("speech_settings") or {}).get("voice_gender"))
-    # Get user skills
     skills_doc = await db[SKILLS].find_one({"user_id": user_id})
-    user_skills = skills_doc.get("skills", ["general"]) if skills_doc else ["general"]
-    user_skills = normalize_skill_list(user_skills)
     if not job_description_id:
         raise ValueError("Please select a Job Description before starting Resume Interview")
-    # Get role
     role_title = await _resolve_role_title(db, role_id=role_id, custom_role=custom_role)
-    selected_jd = None
-    if job_description_id:
-        selected_jd = await get_job_description_for_user(user_id, job_description_id)
     jd_required_skills = normalize_skill_list((selected_jd or {}).get("required_skills", []))
     if not jd_required_skills:
-        raise ValueError(
-            "Selected Job Description has no required skills. Add required skills in Settings first."
-        )
     user_skill_set = {s.lower() for s in user_skills}
     matched_role_skills = [s for s in jd_required_skills if s.lower() in user_skill_set]
     missing_role_skills = [s for s in jd_required_skills if s.lower() not in user_skill_set]
-    required_skills = list(jd_required_skills)
-    # Resume interview scope is strictly JD-required skills.
     base_skills_for_interview = matched_role_skills + [s for s in missing_role_skills if s not in matched_role_skills]
-    skills_for_interview = build_interview_focus_skills(base_skills_for_interview)
-    if not skills_for_interview:
-        skills_for_interview = required_skills
-    # Start with two questions ready so Q1 is asked immediately and Q2 is already queued.
-    initial_bank = await _fetch_question_bank_batch(
-        db=db,
-        role_id=role_id,
-        excluded_questions=[],
-        limit=RESUME_INITIAL_BATCH_SIZE,
-        skill_hints=required_skills,
-    )
-    resume_source_mode = "db" if len(initial_bank) >= RESUME_INITIAL_BATCH_SIZE else "ai"
-    if resume_source_mode == "db":
-        initial_batch = list(initial_bank[:RESUME_INITIAL_BATCH_SIZE])
-        initial_gemini_calls = 0
-        initial_gemini_questions = 0
-        initial_bank_questions = len(initial_batch)
-        initial_bank_shortfall = 0
-    else:
-        initial_batch, _ = await _generate_question_batch(
-            role_title=role_title,
-            skills=skills_for_interview,
-            previous_questions=[],
-            generated_count=0,
-            max_questions=RESUME_MAX_QUESTIONS,
-            current_difficulty="medium",
-            local_summary=None,
-            batch_size=RESUME_INITIAL_BATCH_SIZE,
-        )
-        if not initial_batch:
-            raise ValueError("Failed to generate initial resume interview questions")
-        initial_gemini_calls = 1
-        initial_gemini_questions = len(initial_batch)
-        initial_bank_questions = 0
-        initial_bank_shortfall = RESUME_INITIAL_BATCH_SIZE
-    last_difficulty = initial_batch[-1].get("difficulty", "medium") if initial_batch else "medium"
-    if not initial_batch:
-        raise ValueError("Failed to generate initial interview questions")
     session_id = generate_id()
     _LOCAL_SUMMARIES[session_id] = ""
-    # Create session in MongoDB
     session_doc = {
         "session_id": session_id,
         "user_id": user_id,
         "role_id": role_id,
         "role_title": role_title,
-        "job_description_id": selected_jd.get("id") if selected_jd else None,
-        "job_description_title": selected_jd.get("title") if selected_jd else None,
         "status": "in_progress",
         "interview_type": "resume",
         "question_count": 1,
         "max_questions": RESUME_MAX_QUESTIONS,
-        "current_difficulty": initial_batch[0].get("difficulty", "medium"),
-        "metrics_gemini_calls": initial_gemini_calls,
-        "metrics_gemini_questions": initial_gemini_questions,
-        "metrics_bank_questions": initial_bank_questions,
-        "metrics_bank_shortfall": initial_bank_shortfall,
-        "metrics_generation_batches": 1,
         "speech_voice_gender": speech_voice_gender,
         "started_at": utc_now(),
     }
     await db[SESSIONS].insert_one(session_doc)
-    # Store session state in Redis
     session_state = {
         "user_id": user_id,
         "role_id": role_id or "",
         "role_title": role_title,
         "skills": json.dumps(skills_for_interview),
         "user_skills": json.dumps(user_skills),
-        "required_skills": json.dumps(normalize_skill_list(required_skills)),
         "matched_skills": json.dumps(matched_role_skills),
         "missing_skills": json.dumps(missing_role_skills),
         "question_count": 1,
         "answered_count": 0,
         "served_count": 1,
-        "generated_count": len(initial_batch),
         "max_questions": RESUME_MAX_QUESTIONS,
-        "current_difficulty": last_difficulty,
         "interview_type": "resume",
         "status": "in_progress",
         "speech_voice_gender": speech_voice_gender,
-        "resume_source_mode": resume_source_mode,
-        "jd_required_skills": json.dumps(required_skills),
-        "metrics_gemini_calls": initial_gemini_calls,
-        "metrics_gemini_questions": initial_gemini_questions,
-        "metrics_bank_questions": initial_bank_questions,
-        "metrics_bank_shortfall": initial_bank_shortfall,
-        "metrics_generation_batches": 1,
     }
     await redis.hset(f"session:{session_id}", mapping=session_state)
     await redis.expire(f"session:{session_id}", SESSION_TTL)
-    # Store batch in Redis and queue remaining for later serving.
-    new_ids = await _append_batch_to_redis(redis, session_id, initial_batch)
-    first_id = new_ids[0]
-    pending_ids = new_ids[1:]
-    if pending_ids:
-        await redis.rpush(f"session:{session_id}:pending_questions", *pending_ids)
-        await redis.expire(f"session:{session_id}:pending_questions", SESSION_TTL)
-    first_q_data = await redis.hgetall(f"session:{session_id}:q:{first_id}")
-    prefetch_targets = []
-    if len(initial_batch) > 1:
-        prefetch_targets.append(initial_batch[1].get("question", ""))
-    _schedule_question_audio_prefetch(
-        prefetch_targets,
-        speech_voice_gender,
-    )
     return {
         "session_id": session_id,
         "skill_alignment": {
             "user_skills": user_skills,
-            "required_skills": normalize_skill_list(required_skills),
             "matched_skills": matched_role_skills,
             "missing_skills": missing_role_skills,
             "interview_focus": skills_for_interview,
         },
         "question": {
             "question_id": first_id,
-            "question": first_q_data.get("question", "Tell me about yourself."),
-            "difficulty": first_q_data.get("difficulty", "medium"),
             "question_number": 1,
             "total_questions": RESUME_MAX_QUESTIONS,
         },
@@ -1026,177 +1410,481 @@ async def start_interview(
     }
 async def submit_answer(session_id: str, question_id: str, answer: str) -> dict:
-    """Submit an answer and generate next question."""
     db = get_db()
     redis = get_redis()
-    # Get session state from Redis
     session = await redis.hgetall(f"session:{session_id}")
     if not session:
         raise ValueError("Interview session not found or expired")
     if session.get("status") != "in_progress":
         raise ValueError("Interview is not in progress")
-    # Store answer in Redis
-    await redis.hset(f"session:{session_id}:a:{question_id}", mapping={
-        "question_id": question_id,
-        "answer": answer,
-        "submitted_at": utc_now(),
-    })
     await redis.rpush(f"session:{session_id}:answers", question_id)
     await redis.expire(f"session:{session_id}:a:{question_id}", SESSION_TTL)
     await redis.expire(f"session:{session_id}:answers", SESSION_TTL)
-    question_count = int(session.get("question_count", 1))
-    answered_count = int(session.get("answered_count", 0)) + 1
-    served_count = int(session.get("served_count", 1))
-    generated_count = int(session.get("generated_count", 0))
-    max_questions = int(session.get("max_questions", MAX_QUESTIONS))
     interview_type = session.get("interview_type", "resume")
-    # Update local summary in-memory (requested local summary step).
-    current_q = await redis.hgetall(f"session:{session_id}:q:{question_id}")
-    _update_local_summary(session_id, current_q.get("question", ""), answer)
-    # Check if interview is complete
     if answered_count >= max_questions:
-        # Mark session as completed
         await redis.hset(
             f"session:{session_id}",
-            mapping={"status": "completed", "answered_count": str(answered_count)},
         )
         await db[SESSIONS].update_one(
             {"session_id": session_id},
             {"$set": {"status": "completed", "completed_at": utc_now()}},
         )
         return {
             "session_id": session_id,
             "next_question": None,
             "is_complete": True,
             "message": "Interview complete! Generating your report...",
         }
-    # Serve from pending queue first.
-    next_question_id = await redis.lpop(f"session:{session_id}:pending_questions")
-    metrics_delta = {
-        "gemini_calls": 0,
-        "gemini_questions": 0,
-        "bank_questions": 0,
-        "bank_shortfall": 0,
-        "generation_batches": 0,
-    }
-    # If queue is empty, generate only for resume interviews.
-    if not next_question_id:
-        if interview_type == "topic":
-            await redis.hset(
-                f"session:{session_id}",
-                mapping={"status": "completed", "answered_count": str(answered_count)},
-            )
-            await db[SESSIONS].update_one(
-                {"session_id": session_id},
-                {"$set": {"status": "completed", "completed_at": utc_now()}},
             )
-            return {
-                "session_id": session_id,
-                "next_question": None,
-                "is_complete": True,
-                "message": "Interview complete! Generating your report...",
-            }
-        sync_batch, last_difficulty, batch_metrics = await _generate_mixed_followup_batch(
             db=db,
             redis=redis,
             session_id=session_id,
             session=session,
             generated_count=generated_count,
-            max_questions=max_questions,
         )
-        new_ids = await _append_batch_to_redis(redis, session_id, sync_batch)
-        generated_count += len(new_ids)
-        if new_ids:
-            next_question_id = new_ids[0]
-            if len(new_ids) > 1:
-                await redis.rpush(f"session:{session_id}:pending_questions", *new_ids[1:])
-                await redis.expire(f"session:{session_id}:pending_questions", SESSION_TTL)
-            await redis.hset(
-                f"session:{session_id}",
-                mapping={
-                    "generated_count": str(generated_count),
-                    "current_difficulty": last_difficulty,
-                    "metrics_gemini_calls": str(_safe_int(session.get("metrics_gemini_calls", 0)) + batch_metrics.get("gemini_calls", 0)),
-                    "metrics_gemini_questions": str(_safe_int(session.get("metrics_gemini_questions", 0)) + batch_metrics.get("gemini_questions", 0)),
-                    "metrics_bank_questions": str(_safe_int(session.get("metrics_bank_questions", 0)) + batch_metrics.get("bank_questions", 0)),
-                    "metrics_bank_shortfall": str(_safe_int(session.get("metrics_bank_shortfall", 0)) + batch_metrics.get("bank_shortfall", 0)),
-                    "metrics_generation_batches": str(_safe_int(session.get("metrics_generation_batches", 0)) + 1),
-                },
-            )
-            await db[SESSIONS].update_one(
-                {"session_id": session_id},
-                {
-                    "$set": {
-                        "metrics_gemini_calls": _safe_int(session.get("metrics_gemini_calls", 0)) + batch_metrics.get("gemini_calls", 0),
-                        "metrics_gemini_questions": _safe_int(session.get("metrics_gemini_questions", 0)) + batch_metrics.get("gemini_questions", 0),
-                        "metrics_bank_questions": _safe_int(session.get("metrics_bank_questions", 0)) + batch_metrics.get("bank_questions", 0),
-                        "metrics_bank_shortfall": _safe_int(session.get("metrics_bank_shortfall", 0)) + batch_metrics.get("bank_shortfall", 0),
-                        "metrics_generation_batches": _safe_int(session.get("metrics_generation_batches", 0)) + 1,
-                    }
-                },
-            )
-            metrics_delta = {
-                "gemini_calls": batch_metrics.get("gemini_calls", 0),
-                "gemini_questions": batch_metrics.get("gemini_questions", 0),
-                "bank_questions": batch_metrics.get("bank_questions", 0),
-                "bank_shortfall": batch_metrics.get("bank_shortfall", 0),
-                "generation_batches": 1,
             }
-            print(
-                f"[interview-metrics] session={session_id} "
-                f"batch_size={len(new_ids)} gemini_calls+={batch_metrics.get('gemini_calls', 0)} "
-                f"gemini_questions+={batch_metrics.get('gemini_questions', 0)} "
-                f"bank_questions+={batch_metrics.get('bank_questions', 0)} "
-                f"bank_shortfall+={batch_metrics.get('bank_shortfall', 0)}"
-            )
-    if not next_question_id:
-        raise ValueError("Unable to fetch or generate next question")
-    q_data = await redis.hgetall(f"session:{session_id}:q:{next_question_id}")
-    speech_voice_gender = _normalize_voice_gender(session.get("speech_voice_gender"))
-    # Prefetch one-ahead question only. Current question is synthesized by active playback path.
-    prefetch_texts = []
-    peek_next_id = await redis.lindex(f"session:{session_id}:pending_questions", 0)
-    if peek_next_id:
-        peek_q = await redis.hgetall(f"session:{session_id}:q:{peek_next_id}")
-        prefetch_texts.append(peek_q.get("question", ""))
-    _schedule_question_audio_prefetch(prefetch_texts, speech_voice_gender)
     next_difficulty = q_data.get("difficulty", session.get("current_difficulty", "medium"))
-    new_count = question_count + 1
     new_served_count = served_count + 1
-    # Update session state
-    await redis.hset(f"session:{session_id}", mapping={
-        "question_count": str(new_count),
-        "answered_count": str(answered_count),
-        "served_count": str(new_served_count),
-        "current_difficulty": next_difficulty,
-    })
-    if interview_type == "resume":
-        _schedule_pregen(session_id, answered_count)
-    effective_stats = {
-        "gemini_calls": _safe_int(session.get("metrics_gemini_calls", 0)) + metrics_delta["gemini_calls"],
-        "gemini_questions": _safe_int(session.get("metrics_gemini_questions", 0)) + metrics_delta["gemini_questions"],
-        "bank_questions": _safe_int(session.get("metrics_bank_questions", 0)) + metrics_delta["bank_questions"],
-        "bank_shortfall": _safe_int(session.get("metrics_bank_shortfall", 0)) + metrics_delta["bank_shortfall"],
-        "generation_batches": _safe_int(session.get("metrics_generation_batches", 0)) + metrics_delta["generation_batches"],
-    }
-    return {
         "session_id": session_id,
         "next_question": {
             "question_id": next_question_id,
@@ -1210,6 +1898,83 @@ async def submit_answer(session_id: str, question_id: str, answer: str) -> dict:
         "generation_stats": effective_stats,
     }
 async def quit_interview(session_id: str, user_id: str) -> dict:
     """Mark an interview as quit and indicate whether a partial report can be generated."""
@@ -1292,3 +2057,4 @@ def cleanup_interview_local_state(session_id: str) -> None:
     """Cleanup process-local state for a completed session."""
     _LOCAL_SUMMARIES.pop(session_id, None)
     _PREGEN_IN_FLIGHT.discard(session_id)

 import asyncio
 import random
 import re
+from time import perf_counter
 from bson import ObjectId
 from database import get_db, get_redis
+from models.collections import SESSIONS, USERS, JOB_ROLES, SKILLS, QUESTIONS, TOPICS, TOPIC_QUESTIONS, RESUMES, JD_VERIFICATIONS, ANSWERS
 from utils.helpers import generate_id, utc_now, str_objectid
 from utils.skills import normalize_skill_list, build_interview_focus_skills
 from services.interview_graph import run_interview_graph
 from utils.gemini import generate_interview_question_batch, analyze_resume_vs_job_description
 from services.job_description_service import get_job_description_for_user
+from services.gemini_service import (
+    evaluate_and_generate_followup,
+    generate_resume_seed_questions,
+    generate_topic_followup_batch,
+)
+from services.queue_service import (
+    enqueue_question,
+    flush_backlog_to_queue,
+    get_recent_context_items,
+    mark_question_asked,
+    peek_next_question,
+    pop_next_question,
+    push_context_item,
+    queue_size,
+)
 from services.tts_service import prefetch_wav
+from services.latency_service import record_latency
 MAX_QUESTIONS = 20
 RESUME_MAX_QUESTIONS = 10
 PREGEN_MIN_PENDING = 2
 FOLLOWUP_AI_COUNT = 2
 FOLLOWUP_BANK_COUNT = 3
+MAX_QUEUE_SIZE = 3
+CONTEXT_CACHE_ITEMS = 3
+TOPIC_INITIAL_DB_QUESTIONS = 5
+TOPIC_INITIAL_ASK_COUNT = 4
+TOPIC_AI_FOLLOWUPS = 3
+TOPIC_DB_FOLLOWUPS = 2
+TOPIC_TOTAL_QUESTIONS = TOPIC_INITIAL_ASK_COUNT + TOPIC_AI_FOLLOWUPS + TOPIC_DB_FOLLOWUPS
 # Local process memory summary requested in workflow.
 _LOCAL_SUMMARIES: dict[str, str] = {}
 _PREGEN_IN_FLIGHT: set[str] = set()
+_POST_SUBMIT_LOCKS: dict[str, asyncio.Lock] = {}
 def _safe_json_list(value: str) -> list:
         return []
+def _question_fingerprint(text: str) -> str:
+    base = (text or "").strip().lower()
+    base = re.sub(r"[^a-z0-9\s]", " ", base)
+    base = re.sub(r"\s+", " ", base).strip()
+    return base
+def _unique_question_items(items: list[dict], *, excluded_questions: list[str], limit: int) -> list[dict]:
+    excluded = {_question_fingerprint(q) for q in excluded_questions if q}
+    unique: list[dict] = []
+    for item in items or []:
+        text = (item.get("question") or "").strip()
+        if not text:
+            continue
+        key = _question_fingerprint(text)
+        if not key or key in excluded:
+            continue
+        excluded.add(key)
+        unique.append(
+            {
+                "question": text,
+                "difficulty": item.get("difficulty", "medium"),
+                "category": item.get("category", "general"),
+            }
+        )
+        if len(unique) >= limit:
+            break
+    return unique
 def _update_local_summary(session_id: str, question: str, answer: str) -> None:
     existing = _LOCAL_SUMMARIES.get(session_id, "")
     combined = f"{existing}\nQ: {question}\nA: {answer}".strip()
             pass
+def _get_post_submit_lock(session_id: str) -> asyncio.Lock:
+    lock = _POST_SUBMIT_LOCKS.get(session_id)
+    if lock is None:
+        lock = asyncio.Lock()
+        _POST_SUBMIT_LOCKS[session_id] = lock
+    return lock
+def _consume_post_submit_task_result(task: asyncio.Task) -> None:
+    try:
+        task.result()
+    except Exception:
+        # Background processing is best-effort; ignore task-level failures.
+        pass
+def _current_generation_stats(session: dict) -> dict:
+    return {
+        "gemini_calls": _safe_int(session.get("metrics_gemini_calls", 0)),
+        "gemini_questions": _safe_int(session.get("metrics_gemini_questions", 0)),
+        "bank_questions": _safe_int(session.get("metrics_bank_questions", 0)),
+        "bank_shortfall": _safe_int(session.get("metrics_bank_shortfall", 0)),
+        "generation_batches": _safe_int(session.get("metrics_generation_batches", 0)),
+    }
 def _normalize_bank_difficulty(value: str) -> str:
     difficulty = (value or "medium").strip().lower()
     if difficulty not in {"easy", "medium", "hard"}:
     return "Software Developer"
+async def _get_recent_user_questions(db, user_id: str, limit: int = 40) -> list[str]:
+    recent: list[str] = []
+    seen: set[str] = set()
+    cursor = db[ANSWERS].find({"user_id": user_id}, {"question": 1}).sort("stored_at", -1).limit(limit)
+    async for doc in cursor:
+        text = (doc.get("question") or "").strip()
+        key = _question_fingerprint(text)
+        if not text or not key or key in seen:
+            continue
+        seen.add(key)
+        recent.append(text)
+    return recent
+def _build_resume_intro_question(role_title: str, jd_title: str) -> str:
+    title = (jd_title or "the selected job description").strip()
+    role = (role_title or "this role").strip()
+    return (
+        f"Introduce yourself and explain how your background aligns with {role} "
+        f"for {title}."
+    )
+def _build_resume_resilient_followup_question(session: dict, question_number: int, variant: int = 0) -> str:
+    role_title = (session.get("role_title") or "this role").strip()
+    jd_skills = _safe_json_list(session.get("jd_required_skills", "[]"))
+    focus_skills = _safe_json_list(session.get("skills", "[]"))
+    skill_pool = jd_skills or focus_skills or ["core technical concepts"]
+    index = max(0, question_number - 1) + max(0, variant)
+    skill = skill_pool[index % len(skill_pool)]
+    templates = [
+        "Question {n}: Describe a real project where you applied {skill} for {role}. What constraints and trade-offs shaped your design?",
+        "Question {n}: If {skill} failed in production for a {role} workflow, how would you debug it step by step?",
+        "Question {n}: Explain how you would test and validate a solution using {skill} before shipping it for {role}.",
+        "Question {n}: Compare two approaches for {skill} in a {role} context and justify the final choice.",
+        "Question {n}: Design an improvement plan to make your {skill} implementation more scalable and reliable for {role}.",
+    ]
+    template = templates[index % len(templates)]
+    return template.format(n=question_number, skill=skill, role=role_title)
+async def _enqueue_resume_followup_with_fallback(
+    *,
+    redis,
+    session_id: str,
+    session: dict,
+    answered_count: int,
+    suggested_text: str,
+    suggested_difficulty: str,
+    suggested_category: str,
+) -> tuple[str | None, bool]:
+    candidates: list[tuple[str, str, str, bool]] = []
+    primary = (suggested_text or "").strip()
+    if primary:
+        candidates.append((primary, suggested_difficulty or "medium", suggested_category or "follow-up", True))
+    # Deterministic local fallback prevents early completion when model output is empty/duplicate.
+    base_question_number = max(2, answered_count + 1)
+    for variant in range(6):
+        question_number = base_question_number + variant
+        fallback_text = _build_resume_resilient_followup_question(
+            session=session,
+            question_number=question_number,
+            variant=variant,
+        )
+        candidates.append((fallback_text, "medium", "resume-fallback", False))
+    seen: set[str] = set()
+    for text, difficulty, category, is_primary in candidates:
+        key = _question_fingerprint(text)
+        if not key or key in seen:
+            continue
+        seen.add(key)
+        qid = await enqueue_question(
+            redis=redis,
+            session_id=session_id,
+            question=text,
+            difficulty=difficulty,
+            category=category,
+            ttl_seconds=SESSION_TTL,
+            max_queue_size=MAX_QUEUE_SIZE,
+        )
+        if qid:
+            return qid, is_primary
+    return None, False
+async def _get_session_question_texts(redis, session_id: str) -> list[str]:
+    question_ids = await redis.lrange(f"session:{session_id}:questions", 0, -1)
+    output: list[str] = []
+    for qid in question_ids:
+        q = await redis.hgetall(f"session:{session_id}:q:{qid}")
+        text = (q.get("question") or "").strip()
+        if text:
+            output.append(text)
+    return output
+async def _sample_topic_questions(
+    db,
+    topic_id: str,
+    excluded_questions: list[str],
+    limit: int,
+) -> list[dict]:
+    if limit <= 0:
+        return []
+    docs = await db[TOPIC_QUESTIONS].find({"topic_id": topic_id}).to_list(length=500)
+    random.shuffle(docs)
+    excluded = {_question_fingerprint(q) for q in excluded_questions if q}
+    selected: list[dict] = []
+    for doc in docs:
+        text = (doc.get("question") or "").strip()
+        if not text:
+            continue
+        fp = _question_fingerprint(text)
+        if not fp or fp in excluded:
+            continue
+        excluded.add(fp)
+        selected.append(
+            {
+                "question": text,
+                "difficulty": _normalize_bank_difficulty(doc.get("difficulty") or "medium"),
+                "category": doc.get("category") or "topic",
+            }
+        )
+        if len(selected) >= limit:
+            break
+    return selected
+async def _seed_resume_questions_task(session_id: str) -> None:
+    db = get_db()
+    redis = get_redis()
+    session = await redis.hgetall(f"session:{session_id}")
+    if not session or session.get("status") != "in_progress" or session.get("interview_type") != "resume":
+        return
+    try:
+        await flush_backlog_to_queue(
+            redis=redis,
+            session_id=session_id,
+            ttl_seconds=SESSION_TTL,
+            max_queue_size=MAX_QUEUE_SIZE,
+        )
+        current_q_size = await queue_size(redis, session_id)
+        needed = max(0, RESUME_INITIAL_BATCH_SIZE - current_q_size)
+        if needed > 0:
+            excluded_questions = await _get_session_question_texts(redis, session_id)
+            seed_items = await generate_resume_seed_questions(
+                role_title=session.get("role_title", "Software Developer"),
+                resume_summary=session.get("resume_summary", "No summary available"),
+                resume_skills=_safe_json_list(session.get("skills", "[]")),
+                jd_title=session.get("job_description_title", ""),
+                jd_description=session.get("job_description_text", ""),
+                jd_required_skills=_safe_json_list(session.get("jd_required_skills", "[]")),
+                excluded_questions=excluded_questions,
+                count=needed,
+            )
+            appended = 0
+            for item in seed_items:
+                qid = await enqueue_question(
+                    redis=redis,
+                    session_id=session_id,
+                    question=item.get("question", ""),
+                    difficulty=item.get("difficulty", "medium"),
+                    category=item.get("category", "resume-seed"),
+                    ttl_seconds=SESSION_TTL,
+                    max_queue_size=MAX_QUEUE_SIZE,
+                )
+                if qid:
+                    appended += 1
+            await redis.hset(
+                f"session:{session_id}",
+                mapping={
+                    "generated_count": str(_safe_int(session.get("generated_count", 0)) + appended),
+                    "metrics_gemini_calls": str(_safe_int(session.get("metrics_gemini_calls", 0)) + 1),
+                    "metrics_gemini_questions": str(_safe_int(session.get("metrics_gemini_questions", 0)) + appended),
+                    "metrics_generation_batches": str(_safe_int(session.get("metrics_generation_batches", 0)) + 1),
+                },
+            )
+            await db[SESSIONS].update_one(
+                {"session_id": session_id},
+                {
+                    "$set": {
+                        "metrics_gemini_calls": _safe_int(session.get("metrics_gemini_calls", 0)) + 1,
+                        "metrics_gemini_questions": _safe_int(session.get("metrics_gemini_questions", 0)) + appended,
+                        "metrics_generation_batches": _safe_int(session.get("metrics_generation_batches", 0)) + 1,
+                    }
+                },
+            )
+        await flush_backlog_to_queue(
+            redis=redis,
+            session_id=session_id,
+            ttl_seconds=SESSION_TTL,
+            max_queue_size=MAX_QUEUE_SIZE,
+        )
+        next_qid, next_q = await peek_next_question(redis, session_id)
+        if next_qid and next_q:
+            _schedule_question_audio_prefetch(
+                [next_q.get("question", "")],
+                _normalize_voice_gender(session.get("speech_voice_gender")),
+            )
+    except Exception:
+        # Non-blocking pre-seed path should never fail interview startup.
+        return
+def _normalize_role_key(role_title: str) -> str:
+    normalized = re.sub(r"\s+", " ", (role_title or "").strip().lower())
+    return normalized or "software developer"
+def _build_verification_cache_key(
+    role_key: str,
+    jd_id: str,
+    jd_updated_at: str,
+    resume_uploaded_at: str,
+) -> str:
+    return "||".join([
+        role_key or "software developer",
+        jd_id or "-",
+        jd_updated_at or "-",
+        resume_uploaded_at or "-",
+    ])
+def _verification_doc_to_response(doc: dict, *, message: str, cached: bool) -> dict:
+    return {
+        "verification_id": doc.get("verification_id"),
+        "saved_at": doc.get("saved_at") or doc.get("created_at") or utc_now(),
+        "role_title": doc.get("role_title"),
+        "job_description": doc.get("job_description") or {},
+        "resume_snapshot": doc.get("resume_snapshot") or {},
+        "jd_alignment": doc.get("jd_alignment") or {},
+        "message": message,
+        "cached": cached,
+    }
 async def verify_resume_job_description(
     user_id: str,
     role_id: str = None,
     custom_role: str = None,
     job_description_id: str = None,
 ) -> dict:
+    """Run resume-vs-job-description verification without starting an interview.
+    Reuses a saved verification while the selected role, JD version, and resume
+    upload timestamp are unchanged.
+    """
     if not job_description_id:
         raise ValueError("job_description_id is required for verification")
     resume_summary = "\n".join([part for part in summary_parts if part]).strip() or "No summary available"
     role_title = await _resolve_role_title(db, role_id=role_id, custom_role=custom_role)
+    role_key = _normalize_role_key(role_title)
     selected_jd = await get_job_description_for_user(user_id, job_description_id)
+    resume_uploaded_at = resume_doc.get("uploaded_at") or ""
+    jd_updated_at = selected_jd.get("updated_at") or ""
+    cache_key = _build_verification_cache_key(
+        role_key=role_key,
+        jd_id=selected_jd.get("id") or job_description_id,
+        jd_updated_at=jd_updated_at,
+        resume_uploaded_at=resume_uploaded_at,
+    )
+    existing_verification = await db[JD_VERIFICATIONS].find_one(
+        {"user_id": user_id, "cache_key": cache_key},
+        sort=[("created_at", -1)],
+    )
+    if not existing_verification:
+        compatibility_query = {
+            "user_id": user_id,
+            "role_title": role_title,
+            "job_description.id": selected_jd.get("id"),
+            "resume_snapshot.uploaded_at": resume_uploaded_at,
+        }
+        if jd_updated_at:
+            compatibility_query["job_description.updated_at"] = jd_updated_at
+        existing_verification = await db[JD_VERIFICATIONS].find_one(
+            compatibility_query,
+            sort=[("created_at", -1)],
+        )
+        if existing_verification:
+            await db[JD_VERIFICATIONS].update_one(
+                {"_id": existing_verification["_id"]},
+                {
+                    "$set": {
+                        "cache_key": cache_key,
+                        "role_key": role_key,
+                        "saved_at": existing_verification.get("saved_at")
+                        or existing_verification.get("created_at")
+                        or utc_now(),
+                    }
+                },
+            )
+    if existing_verification:
+        return _verification_doc_to_response(
+            existing_verification,
+            message="Loaded saved verification",
+            cached=True,
+        )
     jd_alignment = await analyze_resume_vs_job_description(
         role_title=role_title,
         resume_skills=resume_skills if resume_skills else ["general"],
     resume_snapshot = {
         "filename": resume_doc.get("original_filename") or resume_doc.get("filename") or "",
+        "uploaded_at": resume_uploaded_at,
         "skills": resume_skills,
         "parsed_data": {
             "name": parsed_data.get("name"),
     verification_id = generate_id()
     saved_at = utc_now()
+    verification_doc = {
         "verification_id": verification_id,
+        "user_id": user_id,
+        "role_id": role_id,
+        "custom_role": custom_role,
         "role_title": role_title,
+        "role_key": role_key,
+        "cache_key": cache_key,
+        "job_description": {
+            "id": selected_jd.get("id"),
+            "title": selected_jd.get("title"),
+            "company": selected_jd.get("company"),
+            "description": selected_jd.get("description"),
+            "required_skills": selected_jd.get("required_skills", []) or [],
+            "updated_at": jd_updated_at,
+        },
         "resume_snapshot": resume_snapshot,
         "jd_alignment": jd_alignment,
+        "saved_at": saved_at,
+        "created_at": saved_at,
     }
+    await db[JD_VERIFICATIONS].insert_one(verification_doc)
+    return _verification_doc_to_response(
+        verification_doc,
+        message="Verification complete",
+        cached=False,
+    )
 async def _get_generated_question_texts(redis, session_id: str) -> list[str]:
     qids = await redis.lrange(f"session:{session_id}:questions", 0, -1)
 async def _start_topic_interview(user_id: str, topic_id: str) -> dict:
+    """Start topic interview with low-cost DB-first flow and staged AI follow-ups."""
     db = get_db()
     redis = get_redis()
     if not topic.get("is_published", False):
         raise ValueError("This topic interview is not published yet")
+    initial_items = await _sample_topic_questions(
+        db=db,
+        topic_id=topic_id,
+        excluded_questions=[],
+        limit=TOPIC_INITIAL_DB_QUESTIONS,
+    )
+    if len(initial_items) < TOPIC_INITIAL_ASK_COUNT:
+        raise ValueError("Not enough topic questions to start interview")
+    first_question = initial_items[0]
+    queued_initial = initial_items[1:TOPIC_INITIAL_ASK_COUNT]
     timer_enabled = bool(topic.get("timer_enabled", False))
     timer_seconds = topic.get("timer_seconds") if timer_enabled else None
     session_id = generate_id()
     _LOCAL_SUMMARIES[session_id] = ""
         user_doc = await db[USERS].find_one({"user_id": user_id}, {"speech_settings": 1})
     speech_voice_gender = _normalize_voice_gender(((user_doc or {}).get("speech_settings") or {}).get("voice_gender"))
+    first_id = generate_id()
+    await redis.hset(
+        f"session:{session_id}:q:{first_id}",
+        mapping={
+            "question_id": first_id,
+            "question": first_question.get("question", "Can you explain this topic?"),
+            "difficulty": first_question.get("difficulty", "medium"),
+            "category": first_question.get("category", topic.get("name", "topic")),
+        },
+    )
+    await redis.expire(f"session:{session_id}:q:{first_id}", SESSION_TTL)
+    await redis.rpush(f"session:{session_id}:questions", first_id)
+    await redis.expire(f"session:{session_id}:questions", SESSION_TTL)
+    await mark_question_asked(
+        redis=redis,
+        session_id=session_id,
+        question_text=first_question.get("question", ""),
+        ttl_seconds=SESSION_TTL,
+    )
+    queued_count = 0
+    for item in queued_initial:
+        qid = await enqueue_question(
+            redis=redis,
+            session_id=session_id,
+            question=item.get("question", ""),
+            difficulty=item.get("difficulty", "medium"),
+            category=item.get("category", topic.get("name", "topic")),
+            ttl_seconds=SESSION_TTL,
+            max_queue_size=MAX_QUEUE_SIZE,
+        )
+        if qid:
+            queued_count += 1
+    await flush_backlog_to_queue(
+        redis=redis,
+        session_id=session_id,
+        ttl_seconds=SESSION_TTL,
+        max_queue_size=MAX_QUEUE_SIZE,
+    )
     session_doc = {
         "session_id": session_id,
         "user_id": user_id,
         "interview_type": "topic",
         "status": "in_progress",
         "question_count": 1,
+        "max_questions": TOPIC_TOTAL_QUESTIONS,
+        "current_difficulty": first_question.get("difficulty", "medium"),
         "metrics_gemini_calls": 0,
         "metrics_gemini_questions": 0,
+        "metrics_bank_questions": queued_count + 1,
+        "metrics_bank_shortfall": max(0, TOPIC_INITIAL_ASK_COUNT - (queued_count + 1)),
+        "metrics_generation_batches": 1,
         "speech_voice_gender": speech_voice_gender,
         "timer_enabled": timer_enabled,
         "timer_seconds": timer_seconds,
+        "topic_followups_generated": False,
         "started_at": utc_now(),
     }
     await db[SESSIONS].insert_one(session_doc)
         "question_count": 1,
         "answered_count": 0,
         "served_count": 1,
+        "generated_count": queued_count + 1,
+        "max_questions": TOPIC_TOTAL_QUESTIONS,
+        "current_difficulty": first_question.get("difficulty", "medium"),
         "timer_enabled": str(timer_enabled),
         "timer_seconds": str(timer_seconds or ""),
         "status": "in_progress",
         "speech_voice_gender": speech_voice_gender,
         "metrics_gemini_calls": 0,
         "metrics_gemini_questions": 0,
+        "metrics_bank_questions": queued_count + 1,
+        "metrics_bank_shortfall": max(0, TOPIC_INITIAL_ASK_COUNT - (queued_count + 1)),
+        "metrics_generation_batches": 1,
+        "topic_followups_generated": "0",
     }
     await redis.hset(f"session:{session_id}", mapping=session_state)
     await redis.expire(f"session:{session_id}", SESSION_TTL)
+    next_qid, next_q = await peek_next_question(redis, session_id)
+    prefetch_targets = [next_q.get("question", "")] if next_qid and next_q else []
+    _schedule_question_audio_prefetch(prefetch_targets, speech_voice_gender)
     return {
         "session_id": session_id,
         },
         "question": {
             "question_id": first_id,
+            "question": first_question.get("question", "Can you explain this topic?"),
+            "difficulty": first_question.get("difficulty", "medium"),
             "question_number": 1,
+            "total_questions": TOPIC_TOTAL_QUESTIONS,
         },
         "timer": {
             "enabled": timer_enabled,
     topic_id: str = None,
     job_description_id: str = None,
 ) -> dict:
+    """Start a new interview session with low-cost queue-first orchestration."""
     interview_type = (interview_type or "resume").strip().lower()
     if interview_type == "topic":
         if not topic_id:
         user_doc = await db[USERS].find_one({"user_id": user_id}, {"speech_settings": 1})
     speech_voice_gender = _normalize_voice_gender(((user_doc or {}).get("speech_settings") or {}).get("voice_gender"))
     skills_doc = await db[SKILLS].find_one({"user_id": user_id})
+    user_skills = normalize_skill_list(skills_doc.get("skills", [])) if skills_doc else []
+    resume_doc = await db[RESUMES].find_one({"user_id": user_id})
+    if not resume_doc:
+        raise ValueError("Please upload your resume before starting a resume interview")
+    parsed_resume = (resume_doc or {}).get("parsed_data", {}) or {}
+    resume_summary_parts = [
+        parsed_resume.get("experience_summary") or "",
+        " ".join(parsed_resume.get("recommended_roles", []) or []),
+    ]
+    resume_summary = "\n".join([part for part in resume_summary_parts if part]).strip() or "No summary available"
     if not job_description_id:
         raise ValueError("Please select a Job Description before starting Resume Interview")
     role_title = await _resolve_role_title(db, role_id=role_id, custom_role=custom_role)
+    selected_jd = await get_job_description_for_user(user_id, job_description_id)
     jd_required_skills = normalize_skill_list((selected_jd or {}).get("required_skills", []))
     if not jd_required_skills:
+        raise ValueError("Selected Job Description has no required skills. Add required skills first.")
     user_skill_set = {s.lower() for s in user_skills}
     matched_role_skills = [s for s in jd_required_skills if s.lower() in user_skill_set]
     missing_role_skills = [s for s in jd_required_skills if s.lower() not in user_skill_set]
     base_skills_for_interview = matched_role_skills + [s for s in missing_role_skills if s not in matched_role_skills]
+    skills_for_interview = build_interview_focus_skills(base_skills_for_interview) or list(jd_required_skills)
+    intro_question = _build_resume_intro_question(role_title=role_title, jd_title=selected_jd.get("title", ""))
     session_id = generate_id()
     _LOCAL_SUMMARIES[session_id] = ""
+    first_id = generate_id()
+    await redis.hset(
+        f"session:{session_id}:q:{first_id}",
+        mapping={
+            "question_id": first_id,
+            "question": intro_question,
+            "difficulty": "easy",
+            "category": "intro",
+        },
+    )
+    await redis.expire(f"session:{session_id}:q:{first_id}", SESSION_TTL)
+    await redis.rpush(f"session:{session_id}:questions", first_id)
+    await redis.expire(f"session:{session_id}:questions", SESSION_TTL)
+    await mark_question_asked(
+        redis=redis,
+        session_id=session_id,
+        question_text=intro_question,
+        ttl_seconds=SESSION_TTL,
+    )
     session_doc = {
         "session_id": session_id,
         "user_id": user_id,
         "role_id": role_id,
         "role_title": role_title,
+        "job_description_id": selected_jd.get("id"),
+        "job_description_title": selected_jd.get("title"),
         "status": "in_progress",
         "interview_type": "resume",
         "question_count": 1,
         "max_questions": RESUME_MAX_QUESTIONS,
+        "current_difficulty": "easy",
+        "metrics_gemini_calls": 0,
+        "metrics_gemini_questions": 0,
+        "metrics_bank_questions": 1,
+        "metrics_bank_shortfall": 0,
+        "metrics_generation_batches": 0,
         "speech_voice_gender": speech_voice_gender,
         "started_at": utc_now(),
+        "interview_generation_mode": "queue_followup",
     }
     await db[SESSIONS].insert_one(session_doc)
     session_state = {
         "user_id": user_id,
         "role_id": role_id or "",
         "role_title": role_title,
         "skills": json.dumps(skills_for_interview),
         "user_skills": json.dumps(user_skills),
+        "required_skills": json.dumps(jd_required_skills),
         "matched_skills": json.dumps(matched_role_skills),
         "missing_skills": json.dumps(missing_role_skills),
         "question_count": 1,
         "answered_count": 0,
         "served_count": 1,
+        "generated_count": 1,
         "max_questions": RESUME_MAX_QUESTIONS,
+        "current_difficulty": "easy",
         "interview_type": "resume",
         "status": "in_progress",
         "speech_voice_gender": speech_voice_gender,
+        "jd_required_skills": json.dumps(jd_required_skills),
+        "job_description_title": selected_jd.get("title", ""),
+        "job_description_text": selected_jd.get("description", ""),
+        "resume_summary": resume_summary,
+        "metrics_gemini_calls": 0,
+        "metrics_gemini_questions": 0,
+        "metrics_bank_questions": 1,
+        "metrics_bank_shortfall": 0,
+        "metrics_generation_batches": 0,
+        "interview_generation_mode": "queue_followup",
     }
     await redis.hset(f"session:{session_id}", mapping=session_state)
     await redis.expire(f"session:{session_id}", SESSION_TTL)
+    # Preload initial queue in background (2 questions) without blocking first question delivery.
+    asyncio.create_task(_seed_resume_questions_task(session_id))
     return {
         "session_id": session_id,
         "skill_alignment": {
             "user_skills": user_skills,
+            "required_skills": jd_required_skills,
             "matched_skills": matched_role_skills,
             "missing_skills": missing_role_skills,
             "interview_focus": skills_for_interview,
         },
         "question": {
             "question_id": first_id,
+            "question": intro_question,
+            "difficulty": "easy",
             "question_number": 1,
             "total_questions": RESUME_MAX_QUESTIONS,
         },
     }
+async def _record_submit_latency(started_at: float) -> float:
+    elapsed_ms = (perf_counter() - started_at) * 1000.0
+    await record_latency("submit_ms", elapsed_ms)
+    return round(elapsed_ms, 2)
+async def _apply_generation_metric_delta(
+    *,
+    db,
+    redis,
+    session_id: str,
+    session: dict,
+    metrics_delta: dict,
+    generated_count: int | None = None,
+    extra_redis_fields: dict | None = None,
+    extra_db_fields: dict | None = None,
+) -> dict:
+    base_stats = _current_generation_stats(session)
+    effective_stats = {
+        "gemini_calls": base_stats["gemini_calls"] + _safe_int(metrics_delta.get("gemini_calls", 0)),
+        "gemini_questions": base_stats["gemini_questions"] + _safe_int(metrics_delta.get("gemini_questions", 0)),
+        "bank_questions": base_stats["bank_questions"] + _safe_int(metrics_delta.get("bank_questions", 0)),
+        "bank_shortfall": base_stats["bank_shortfall"] + _safe_int(metrics_delta.get("bank_shortfall", 0)),
+        "generation_batches": base_stats["generation_batches"] + _safe_int(metrics_delta.get("generation_batches", 0)),
+    }
+    redis_mapping = {
+        "metrics_gemini_calls": str(effective_stats["gemini_calls"]),
+        "metrics_gemini_questions": str(effective_stats["gemini_questions"]),
+        "metrics_bank_questions": str(effective_stats["bank_questions"]),
+        "metrics_bank_shortfall": str(effective_stats["bank_shortfall"]),
+        "metrics_generation_batches": str(effective_stats["generation_batches"]),
+    }
+    if generated_count is not None:
+        redis_mapping["generated_count"] = str(generated_count)
+    if extra_redis_fields:
+        redis_mapping.update(extra_redis_fields)
+    await redis.hset(f"session:{session_id}", mapping=redis_mapping)
+    db_set = {
+        "metrics_gemini_calls": effective_stats["gemini_calls"],
+        "metrics_gemini_questions": effective_stats["gemini_questions"],
+        "metrics_bank_questions": effective_stats["bank_questions"],
+        "metrics_bank_shortfall": effective_stats["bank_shortfall"],
+        "metrics_generation_batches": effective_stats["generation_batches"],
+    }
+    if generated_count is not None:
+        db_set["generated_count"] = generated_count
+    if extra_db_fields:
+        db_set.update(extra_db_fields)
+    await db[SESSIONS].update_one({"session_id": session_id}, {"$set": db_set})
+    return effective_stats
+async def _post_submit_resume_processing(
+    session_id: str,
+    question_id: str,
+    question_text: str,
+    answer: str,
+    answered_count: int,
+    max_questions: int,
+) -> None:
+    db = get_db()
+    redis = get_redis()
+    async with _get_post_submit_lock(session_id):
+        session = await redis.hgetall(f"session:{session_id}")
+        if not session:
+            return
+        recent_context = await get_recent_context_items(
+            redis=redis,
+            session_id=session_id,
+            max_items=CONTEXT_CACHE_ITEMS,
+        )
+        excluded_questions = await _get_session_question_texts(redis, session_id)
+        evaluation = await evaluate_and_generate_followup(
+            role_title=session.get("role_title", "Software Developer"),
+            required_skills=_safe_json_list(session.get("jd_required_skills", "[]")),
+            recent_context=recent_context,
+            current_question=question_text,
+            current_answer=answer,
+            excluded_questions=excluded_questions,
+        )
+        await redis.hset(
+            f"session:{session_id}:a:{question_id}",
+            mapping={
+                "score": str(_safe_int(evaluation.get("score", 0))),
+                "feedback": evaluation.get("feedback", ""),
+            },
+        )
+        metrics_delta = {
+            "gemini_calls": 1,
+            "gemini_questions": 0,
+            "bank_questions": 0,
+            "bank_shortfall": 0,
+            "generation_batches": 1,
+        }
+        generated_count = _safe_int(session.get("generated_count", 0))
+        follow_text = (evaluation.get("followup_question") or "").strip()
+        if answered_count < max_questions and session.get("status") == "in_progress":
+            qid, used_model_followup = await _enqueue_resume_followup_with_fallback(
+                redis=redis,
+                session_id=session_id,
+                session=session,
+                answered_count=answered_count,
+                suggested_text=follow_text,
+                suggested_difficulty=evaluation.get("difficulty", "medium"),
+                suggested_category=evaluation.get("category", "follow-up"),
+            )
+            if qid:
+                generated_count += 1
+                if used_model_followup:
+                    metrics_delta["gemini_questions"] += 1
+        await _apply_generation_metric_delta(
+            db=db,
+            redis=redis,
+            session_id=session_id,
+            session=session,
+            metrics_delta=metrics_delta,
+            generated_count=generated_count,
+        )
+        await flush_backlog_to_queue(
+            redis=redis,
+            session_id=session_id,
+            ttl_seconds=SESSION_TTL,
+            max_queue_size=MAX_QUEUE_SIZE,
+        )
+        if session.get("status") == "in_progress":
+            qid, q = await peek_next_question(redis, session_id)
+            if qid and q:
+                _schedule_question_audio_prefetch(
+                    [q.get("question", "")],
+                    _normalize_voice_gender(session.get("speech_voice_gender")),
+                )
+async def _post_submit_topic_processing(
+    session_id: str,
+    answered_count: int,
+) -> None:
+    db = get_db()
+    redis = get_redis()
+    if answered_count < TOPIC_INITIAL_ASK_COUNT:
+        return
+    async with _get_post_submit_lock(session_id):
+        session = await redis.hgetall(f"session:{session_id}")
+        if not session:
+            return
+        if session.get("topic_followups_generated", "0") == "1":
+            return
+        qa_pairs = await get_session_qa(session_id)
+        excluded_questions = await _get_session_question_texts(redis, session_id)
+        ai_items = await generate_topic_followup_batch(
+            topic_name=session.get("role_title", "Topic Interview"),
+            qa_pairs=qa_pairs,
+            excluded_questions=excluded_questions,
+            count=TOPIC_AI_FOLLOWUPS,
+        )
+        db_items = await _sample_topic_questions(
+            db=db,
+            topic_id=session.get("topic_id", ""),
+            excluded_questions=excluded_questions + [i.get("question", "") for i in ai_items],
+            limit=TOPIC_DB_FOLLOWUPS,
+        )
+        topic_added = 0
+        for item in ai_items + db_items:
+            qid = await enqueue_question(
+                redis=redis,
+                session_id=session_id,
+                question=item.get("question", ""),
+                difficulty=item.get("difficulty", "medium"),
+                category=item.get("category", session.get("role_title", "topic")),
+                ttl_seconds=SESSION_TTL,
+                max_queue_size=MAX_QUEUE_SIZE,
+            )
+            if qid:
+                topic_added += 1
+        generated_count = _safe_int(session.get("generated_count", 0)) + topic_added
+        await _apply_generation_metric_delta(
+            db=db,
+            redis=redis,
+            session_id=session_id,
+            session=session,
+            metrics_delta={
+                "gemini_calls": 1,
+                "gemini_questions": len(ai_items),
+                "bank_questions": len(db_items),
+                "bank_shortfall": 0,
+                "generation_batches": 1,
+            },
+            generated_count=generated_count,
+            extra_redis_fields={"topic_followups_generated": "1"},
+            extra_db_fields={"topic_followups_generated": True},
+        )
+        await flush_backlog_to_queue(
+            redis=redis,
+            session_id=session_id,
+            ttl_seconds=SESSION_TTL,
+            max_queue_size=MAX_QUEUE_SIZE,
+        )
+        if session.get("status") == "in_progress":
+            qid, q = await peek_next_question(redis, session_id)
+            if qid and q:
+                _schedule_question_audio_prefetch(
+                    [q.get("question", "")],
+                    _normalize_voice_gender(session.get("speech_voice_gender")),
+                )
+def _schedule_post_submit_processing(
+    *,
+    session_id: str,
+    question_id: str,
+    question_text: str,
+    answer: str,
+    answered_count: int,
+    max_questions: int,
+    interview_type: str,
+) -> None:
+    try:
+        if interview_type == "resume":
+            task = asyncio.create_task(
+                _post_submit_resume_processing(
+                    session_id=session_id,
+                    question_id=question_id,
+                    question_text=question_text,
+                    answer=answer,
+                    answered_count=answered_count,
+                    max_questions=max_questions,
+                )
+            )
+            task.add_done_callback(_consume_post_submit_task_result)
+            return
+        if interview_type == "topic":
+            task = asyncio.create_task(
+                _post_submit_topic_processing(
+                    session_id=session_id,
+                    answered_count=answered_count,
+                )
+            )
+            task.add_done_callback(_consume_post_submit_task_result)
+    except Exception:
+        # Never block request response on scheduler errors.
+        return
 async def submit_answer(session_id: str, question_id: str, answer: str) -> dict:
+    """Submit answer and return next queued question immediately."""
+    started_at = perf_counter()
     db = get_db()
     redis = get_redis()
     session = await redis.hgetall(f"session:{session_id}")
     if not session:
         raise ValueError("Interview session not found or expired")
     if session.get("status") != "in_progress":
         raise ValueError("Interview is not in progress")
+    current_q = await redis.hgetall(f"session:{session_id}:q:{question_id}")
+    current_question_text = current_q.get("question", "")
+    await redis.hset(
+        f"session:{session_id}:a:{question_id}",
+        mapping={
+            "question_id": question_id,
+            "answer": answer,
+            "submitted_at": utc_now(),
+        },
+    )
     await redis.rpush(f"session:{session_id}:answers", question_id)
     await redis.expire(f"session:{session_id}:a:{question_id}", SESSION_TTL)
     await redis.expire(f"session:{session_id}:answers", SESSION_TTL)
+    question_count = _safe_int(session.get("question_count", 1))
+    answered_count = _safe_int(session.get("answered_count", 0)) + 1
+    served_count = _safe_int(session.get("served_count", 1))
+    generated_count = _safe_int(session.get("generated_count", 0))
+    max_questions = _safe_int(session.get("max_questions", MAX_QUESTIONS))
     interview_type = session.get("interview_type", "resume")
+    speech_voice_gender = _normalize_voice_gender(session.get("speech_voice_gender"))
+    if interview_type == "resume" and max_questions < RESUME_MAX_QUESTIONS:
+        max_questions = RESUME_MAX_QUESTIONS
+        await redis.hset(f"session:{session_id}", mapping={"max_questions": str(max_questions)})
+        await db[SESSIONS].update_one(
+            {"session_id": session_id},
+            {"$set": {"max_questions": max_questions}},
+        )
+    _update_local_summary(session_id, current_question_text, answer)
+    await push_context_item(
+        redis=redis,
+        session_id=session_id,
+        item={
+            "question": current_question_text,
+            "answer": answer,
+        },
+        ttl_seconds=SESSION_TTL,
+        max_items=CONTEXT_CACHE_ITEMS,
+    )
     if answered_count >= max_questions:
         await redis.hset(
             f"session:{session_id}",
+            mapping={
+                "status": "completed",
+                "answered_count": str(answered_count),
+            },
         )
         await db[SESSIONS].update_one(
             {"session_id": session_id},
             {"$set": {"status": "completed", "completed_at": utc_now()}},
         )
+        submit_ms = await _record_submit_latency(started_at)
         return {
             "session_id": session_id,
             "next_question": None,
             "is_complete": True,
             "message": "Interview complete! Generating your report...",
+            "submit_ms": submit_ms,
         }
+    await flush_backlog_to_queue(
+        redis=redis,
+        session_id=session_id,
+        ttl_seconds=SESSION_TTL,
+        max_queue_size=MAX_QUEUE_SIZE,
+    )
+    next_question_id, q_data = await pop_next_question(redis, session_id)
+    effective_stats = _current_generation_stats(session)
+    fallback_evaluation = None
+    # Emergency fallback for rare queue-empty cases.
+    if not next_question_id and interview_type == "resume":
+        recent_context = await get_recent_context_items(
+            redis=redis,
+            session_id=session_id,
+            max_items=CONTEXT_CACHE_ITEMS,
+        )
+        excluded_questions = await _get_session_question_texts(redis, session_id)
+        fallback_evaluation = await evaluate_and_generate_followup(
+            role_title=session.get("role_title", "Software Developer"),
+            required_skills=_safe_json_list(session.get("jd_required_skills", "[]")),
+            recent_context=recent_context,
+            current_question=current_question_text,
+            current_answer=answer,
+            excluded_questions=excluded_questions,
+        )
+        await redis.hset(
+            f"session:{session_id}:a:{question_id}",
+            mapping={
+                "score": str(_safe_int(fallback_evaluation.get("score", 0))),
+                "feedback": fallback_evaluation.get("feedback", ""),
+            },
+        )
+        fallback_delta = {
+            "gemini_calls": 1,
+            "gemini_questions": 0,
+            "bank_questions": 0,
+            "bank_shortfall": 0,
+            "generation_batches": 1,
+        }
+        follow_text = (fallback_evaluation.get("followup_question") or "").strip()
+        if answered_count < max_questions:
+            qid, used_model_followup = await _enqueue_resume_followup_with_fallback(
+                redis=redis,
+                session_id=session_id,
+                session=session,
+                answered_count=answered_count,
+                suggested_text=follow_text,
+                suggested_difficulty=fallback_evaluation.get("difficulty", "medium"),
+                suggested_category=fallback_evaluation.get("category", "follow-up"),
             )
+            if qid:
+                generated_count += 1
+                if used_model_followup:
+                    fallback_delta["gemini_questions"] = 1
+        effective_stats = await _apply_generation_metric_delta(
             db=db,
             redis=redis,
             session_id=session_id,
             session=session,
+            metrics_delta=fallback_delta,
             generated_count=generated_count,
         )
+        await flush_backlog_to_queue(
+            redis=redis,
+            session_id=session_id,
+            ttl_seconds=SESSION_TTL,
+            max_queue_size=MAX_QUEUE_SIZE,
+        )
+        next_question_id, q_data = await pop_next_question(redis, session_id)
+    if not next_question_id or not q_data:
+        await redis.hset(
+            f"session:{session_id}",
+            mapping={"status": "completed", "answered_count": str(answered_count)},
+        )
+        await db[SESSIONS].update_one(
+            {"session_id": session_id},
+            {"$set": {"status": "completed", "completed_at": utc_now()}},
+        )
+        submit_ms = await _record_submit_latency(started_at)
+        payload = {
+            "session_id": session_id,
+            "next_question": None,
+            "is_complete": True,
+            "message": "Interview complete! Generating your report...",
+            "submit_ms": submit_ms,
+        }
+        if fallback_evaluation:
+            payload["answer_evaluation"] = {
+                "score": _safe_int(fallback_evaluation.get("score", 0)),
+                "feedback": fallback_evaluation.get("feedback", ""),
             }
+        return payload
+    await mark_question_asked(
+        redis=redis,
+        session_id=session_id,
+        question_text=q_data.get("question", ""),
+        ttl_seconds=SESSION_TTL,
+    )
+    await flush_backlog_to_queue(
+        redis=redis,
+        session_id=session_id,
+        ttl_seconds=SESSION_TTL,
+        max_queue_size=MAX_QUEUE_SIZE,
+    )
+    peek_next_id, peek_q = await peek_next_question(redis, session_id)
+    if peek_next_id and peek_q:
+        _schedule_question_audio_prefetch([peek_q.get("question", "")], speech_voice_gender)
     next_difficulty = q_data.get("difficulty", session.get("current_difficulty", "medium"))
+    new_question_count = question_count + 1
     new_served_count = served_count + 1
+    await redis.hset(
+        f"session:{session_id}",
+        mapping={
+            "question_count": str(new_question_count),
+            "answered_count": str(answered_count),
+            "served_count": str(new_served_count),
+            "generated_count": str(generated_count),
+            "current_difficulty": next_difficulty,
+        },
+    )
+    response = {
         "session_id": session_id,
         "next_question": {
             "question_id": next_question_id,
         "generation_stats": effective_stats,
     }
+    if fallback_evaluation:
+        response["answer_evaluation"] = {
+            "score": _safe_int(fallback_evaluation.get("score", 0)),
+            "feedback": fallback_evaluation.get("feedback", ""),
+        }
+    elif interview_type == "resume":
+        response["answer_evaluation"] = {
+            "status": "processing",
+        }
+    _schedule_post_submit_processing(
+        session_id=session_id,
+        question_id=question_id,
+        question_text=current_question_text,
+        answer=answer,
+        answered_count=answered_count,
+        max_questions=max_questions,
+        interview_type=interview_type,
+    )
+    submit_ms = await _record_submit_latency(started_at)
+    response["submit_ms"] = submit_ms
+    return response
+async def get_next_question(session_id: str, user_id: str) -> dict:
+    """Preview next queued question without submitting a new answer."""
+    db = get_db()
+    redis = get_redis()
+    session_doc = await db[SESSIONS].find_one({"session_id": session_id})
+    if not session_doc:
+        raise ValueError("Session not found")
+    if session_doc.get("user_id") != user_id:
+        raise ValueError("Unauthorized access to session")
+    session = await redis.hgetall(f"session:{session_id}")
+    if not session:
+        raise ValueError("Interview session not found or expired")
+    if session.get("status") != "in_progress":
+        return {
+            "session_id": session_id,
+            "next_question": None,
+            "is_complete": True,
+            "message": "Interview is not in progress",
+        }
+    await flush_backlog_to_queue(
+        redis=redis,
+        session_id=session_id,
+        ttl_seconds=SESSION_TTL,
+        max_queue_size=MAX_QUEUE_SIZE,
+    )
+    qid, q = await peek_next_question(redis, session_id)
+    if not qid or not q:
+        return {
+            "session_id": session_id,
+            "next_question": None,
+            "is_complete": False,
+            "message": "No queued question yet",
+            "queue_size": await queue_size(redis, session_id),
+        }
+    return {
+        "session_id": session_id,
+        "next_question": {
+            "question_id": qid,
+            "question": q.get("question", ""),
+            "difficulty": q.get("difficulty", "medium"),
+            "category": q.get("category", "general"),
+        },
+        "is_complete": False,
+        "queue_size": await queue_size(redis, session_id),
+        "message": "Next question ready",
+    }
 async def quit_interview(session_id: str, user_id: str) -> dict:
     """Mark an interview as quit and indicate whether a partial report can be generated."""
     """Cleanup process-local state for a completed session."""
     _LOCAL_SUMMARIES.pop(session_id, None)
     _PREGEN_IN_FLIGHT.discard(session_id)
+    _POST_SUBMIT_LOCKS.pop(session_id, None)

backend/services/latency_service.py ADDED Viewed

	@@ -0,0 +1,179 @@

+import math
+from typing import Iterable
+from database import get_redis
+LATENCY_METRICS = ("stt_ms", "submit_ms", "gemini_ms")
+_LATENCY_PREFIX = "metrics:latency"
+_DEFAULT_SAMPLE_SIZE = 500
+_MAX_SAMPLE_SIZE = 5000
+_MAX_STORED_ITEMS = 5000
+_METRICS_TTL_SECONDS = 7 * 24 * 60 * 60
+def _metric_key(metric_name: str) -> str:
+    return f"{_LATENCY_PREFIX}:{metric_name}"
+def _normalize_metric_names(metric_names: Iterable[str] | None) -> list[str]:
+    if not metric_names:
+        return list(LATENCY_METRICS)
+    normalized: list[str] = []
+    for metric in metric_names:
+        name = (metric or "").strip().lower()
+        if name in LATENCY_METRICS and name not in normalized:
+            normalized.append(name)
+    return normalized
+def _normalize_sample_size(sample_size: int) -> int:
+    try:
+        value = int(sample_size)
+    except Exception:
+        value = _DEFAULT_SAMPLE_SIZE
+    return max(1, min(_MAX_SAMPLE_SIZE, value))
+def _safe_float(value) -> float | None:
+    try:
+        parsed = float(value)
+    except Exception:
+        return None
+    if math.isnan(parsed) or math.isinf(parsed) or parsed < 0:
+        return None
+    return parsed
+def _percentile(sorted_values: list[float], percentile: float) -> float | None:
+    if not sorted_values:
+        return None
+    if len(sorted_values) == 1:
+        return sorted_values[0]
+    position = ((len(sorted_values) - 1) * percentile) / 100.0
+    lower = int(math.floor(position))
+    upper = int(math.ceil(position))
+    if lower == upper:
+        return sorted_values[lower]
+    weight = position - lower
+    return sorted_values[lower] + (sorted_values[upper] - sorted_values[lower]) * weight
+def _round(value: float | None) -> float | None:
+    if value is None:
+        return None
+    return round(value, 2)
+async def record_latency(
+    metric_name: str,
+    duration_ms: float,
+    *,
+    ttl_seconds: int = _METRICS_TTL_SECONDS,
+    max_items: int = _MAX_STORED_ITEMS,
+) -> None:
+    name = (metric_name or "").strip().lower()
+    if name not in LATENCY_METRICS:
+        return
+    value = _safe_float(duration_ms)
+    if value is None:
+        return
+    redis = get_redis()
+    if not redis:
+        return
+    key = _metric_key(name)
+    await redis.lpush(key, f"{value:.3f}")
+    await redis.ltrim(key, 0, max(0, int(max_items) - 1))
+    await redis.expire(key, int(ttl_seconds))
+async def get_latency_metrics(
+    *,
+    metric_names: Iterable[str] | None = None,
+    sample_size: int = _DEFAULT_SAMPLE_SIZE,
+) -> dict:
+    metrics = _normalize_metric_names(metric_names)
+    size = _normalize_sample_size(sample_size)
+    redis = get_redis()
+    if not redis:
+        return {
+            "sample_size": size,
+            "metrics": {name: _empty_summary() for name in metrics},
+            "message": "Redis is not available",
+        }
+    output: dict[str, dict] = {}
+    for metric in metrics:
+        raw = await redis.lrange(_metric_key(metric), 0, size - 1)
+        values: list[float] = []
+        for item in raw:
+            parsed = _safe_float(item)
+            if parsed is not None:
+                values.append(parsed)
+        # Stored newest-first in Redis; reverse to chronological for last_ms.
+        values.reverse()
+        output[metric] = _build_summary(values)
+    return {
+        "sample_size": size,
+        "metrics": output,
+    }
+async def reset_latency_metrics(metric_names: Iterable[str] | None = None) -> dict:
+    metrics = _normalize_metric_names(metric_names)
+    redis = get_redis()
+    if not redis:
+        return {
+            "cleared": [],
+            "message": "Redis is not available",
+        }
+    keys = [_metric_key(metric) for metric in metrics]
+    if keys:
+        await redis.delete(*keys)
+    return {
+        "cleared": metrics,
+    }
+def _empty_summary() -> dict:
+    return {
+        "count": 0,
+        "min_ms": None,
+        "avg_ms": None,
+        "p50_ms": None,
+        "p95_ms": None,
+        "max_ms": None,
+        "last_ms": None,
+    }
+def _build_summary(values: list[float]) -> dict:
+    if not values:
+        return _empty_summary()
+    sorted_values = sorted(values)
+    count = len(sorted_values)
+    avg = sum(sorted_values) / count
+    return {
+        "count": count,
+        "min_ms": _round(sorted_values[0]),
+        "avg_ms": _round(avg),
+        "p50_ms": _round(_percentile(sorted_values, 50)),
+        "p95_ms": _round(_percentile(sorted_values, 95)),
+        "max_ms": _round(sorted_values[-1]),
+        "last_ms": _round(values[-1]),
+    }

backend/services/queue_service.py ADDED Viewed

	@@ -0,0 +1,194 @@

+import json
+import re
+from typing import Optional, Tuple
+from utils.helpers import generate_id
+QUESTION_QUEUE_SUFFIX = "question_queue"
+QUESTION_BACKLOG_SUFFIX = "question_backlog"
+CONTEXT_CACHE_SUFFIX = "context_cache"
+ASKED_SET_SUFFIX = "asked_questions_set"
+def _key(session_id: str, suffix: str) -> str:
+    return f"session:{session_id}:{suffix}"
+def question_fingerprint(text: str) -> str:
+    value = (text or "").strip().lower()
+    value = re.sub(r"[^a-z0-9\s]", " ", value)
+    value = re.sub(r"\s+", " ", value).strip()
+    return value
+async def mark_question_asked(redis, session_id: str, question_text: str, ttl_seconds: int) -> None:
+    fp = question_fingerprint(question_text)
+    if not fp:
+        return
+    key = _key(session_id, ASKED_SET_SUFFIX)
+    await redis.sadd(key, fp)
+    await redis.expire(key, ttl_seconds)
+async def is_question_asked(redis, session_id: str, question_text: str) -> bool:
+    fp = question_fingerprint(question_text)
+    if not fp:
+        return False
+    key = _key(session_id, ASKED_SET_SUFFIX)
+    return bool(await redis.sismember(key, fp))
+async def _has_in_list(redis, session_id: str, list_key: str, question_text: str) -> bool:
+    wanted = question_fingerprint(question_text)
+    if not wanted:
+        return False
+    ids = await redis.lrange(list_key, 0, -1)
+    for qid in ids:
+        q = await redis.hgetall(f"session:{session_id}:q:{qid}")
+        if question_fingerprint(q.get("question", "")) == wanted:
+            return True
+    return False
+async def _append_question_object(
+    redis,
+    session_id: str,
+    question: str,
+    difficulty: str,
+    category: str,
+    ttl_seconds: int,
+) -> str:
+    qid = generate_id()
+    q_key = f"session:{session_id}:q:{qid}"
+    await redis.hset(
+        q_key,
+        mapping={
+            "question_id": qid,
+            "question": question,
+            "difficulty": difficulty or "medium",
+            "category": category or "general",
+        },
+    )
+    await redis.expire(q_key, ttl_seconds)
+    questions_key = f"session:{session_id}:questions"
+    await redis.rpush(questions_key, qid)
+    await redis.expire(questions_key, ttl_seconds)
+    return qid
+async def enqueue_question(
+    redis,
+    session_id: str,
+    question: str,
+    difficulty: str = "medium",
+    category: str = "general",
+    ttl_seconds: int = 7200,
+    max_queue_size: int = 3,
+) -> Optional[str]:
+    text = (question or "").strip()
+    if not text:
+        return None
+    queue_key = _key(session_id, QUESTION_QUEUE_SUFFIX)
+    backlog_key = _key(session_id, QUESTION_BACKLOG_SUFFIX)
+    if await is_question_asked(redis, session_id, text):
+        return None
+    if await _has_in_list(redis, session_id, queue_key, text):
+        return None
+    if await _has_in_list(redis, session_id, backlog_key, text):
+        return None
+    q_len = await redis.llen(queue_key)
+    qid = await _append_question_object(
+        redis=redis,
+        session_id=session_id,
+        question=text,
+        difficulty=difficulty,
+        category=category,
+        ttl_seconds=ttl_seconds,
+    )
+    if q_len < max_queue_size:
+        await redis.rpush(queue_key, qid)
+        await redis.expire(queue_key, ttl_seconds)
+        return qid
+    await redis.rpush(backlog_key, qid)
+    await redis.expire(backlog_key, ttl_seconds)
+    return qid
+async def flush_backlog_to_queue(
+    redis,
+    session_id: str,
+    ttl_seconds: int = 7200,
+    max_queue_size: int = 3,
+) -> None:
+    queue_key = _key(session_id, QUESTION_QUEUE_SUFFIX)
+    backlog_key = _key(session_id, QUESTION_BACKLOG_SUFFIX)
+    while await redis.llen(queue_key) < max_queue_size:
+        qid = await redis.lpop(backlog_key)
+        if not qid:
+            break
+        await redis.rpush(queue_key, qid)
+    await redis.expire(queue_key, ttl_seconds)
+    await redis.expire(backlog_key, ttl_seconds)
+async def queue_size(redis, session_id: str) -> int:
+    return int(await redis.llen(_key(session_id, QUESTION_QUEUE_SUFFIX)))
+async def pop_next_question(redis, session_id: str) -> Tuple[Optional[str], Optional[dict]]:
+    queue_key = _key(session_id, QUESTION_QUEUE_SUFFIX)
+    qid = await redis.lpop(queue_key)
+    if not qid:
+        return None, None
+    q = await redis.hgetall(f"session:{session_id}:q:{qid}")
+    return qid, q
+async def peek_next_question(redis, session_id: str) -> Tuple[Optional[str], Optional[dict]]:
+    queue_key = _key(session_id, QUESTION_QUEUE_SUFFIX)
+    qid = await redis.lindex(queue_key, 0)
+    if not qid:
+        return None, None
+    q = await redis.hgetall(f"session:{session_id}:q:{qid}")
+    return qid, q
+async def push_context_item(
+    redis,
+    session_id: str,
+    item: dict,
+    ttl_seconds: int = 7200,
+    max_items: int = 3,
+) -> None:
+    key = _key(session_id, CONTEXT_CACHE_SUFFIX)
+    await redis.lpush(key, json.dumps(item, ensure_ascii=True))
+    await redis.ltrim(key, 0, max(0, max_items - 1))
+    await redis.expire(key, ttl_seconds)
+async def get_recent_context_items(redis, session_id: str, max_items: int = 3) -> list[dict]:
+    key = _key(session_id, CONTEXT_CACHE_SUFFIX)
+    raw_items = await redis.lrange(key, 0, max(0, max_items - 1))
+    parsed: list[dict] = []
+    for raw in raw_items:
+        try:
+            parsed.append(json.loads(raw))
+        except Exception:
+            continue
+    # Convert newest-first storage into chronological order for prompting.
+    parsed.reverse()
+    return parsed

backend/services/stt_service.py CHANGED Viewed

@@ -8,13 +8,60 @@ os.environ.setdefault("KMP_DUPLICATE_LIB_OK", "TRUE")
 _WHISPER_MODEL_CACHE = {}
 _WHISPER_MODEL_LOCK = asyncio.Lock()
 def _resolve_device() -> str:
     pref = os.getenv("WHISPER_DEVICE", "auto").strip().lower()
     if pref in {"cpu", "cuda"}:
         return pref
     try:
         import torch
@@ -31,8 +78,27 @@ def _resolve_compute_type(device: str) -> str:
 def _resolve_model_size() -> str:
-    # Prefer medium for better interview transcription quality.
-    return os.getenv("WHISPER_MODEL_SIZE", "medium").strip() or "medium"
 async def _get_whisper_model():
@@ -55,7 +121,9 @@ async def _get_whisper_model():
             try:
                 return WhisperModel(model_size, device=device, compute_type=compute_type)
-            except Exception:
                 # Keep service resilient if GPU config mismatches runtime.
                 return WhisperModel(model_size, device="cpu", compute_type="int8")
@@ -79,6 +147,9 @@ async def transcribe_audio_bytes(audio_bytes: bytes, filename: str = "speech.web
     model = await _get_whisper_model()
     ext = os.path.splitext(filename or "speech.webm")[1] or ".webm"
     target_language = (language or "en").strip().lower() or "en"
     fd, tmp_path = tempfile.mkstemp(suffix=ext)
     os.close(fd)
@@ -87,15 +158,16 @@ async def transcribe_audio_bytes(audio_bytes: bytes, filename: str = "speech.web
         with open(tmp_path, "wb") as f:
             f.write(audio_bytes)
-        def _transcribe() -> str:
-            segments, _ = model.transcribe(
                 tmp_path,
                 language=target_language,
-                beam_size=1,
-                best_of=1,
-                vad_filter=True,
                 condition_on_previous_text=False,
                 temperature=0.0,
             )
             parts = []
             for seg in segments:
@@ -104,7 +176,22 @@ async def transcribe_audio_bytes(audio_bytes: bytes, filename: str = "speech.web
                     parts.append(text)
             return " ".join(parts).strip()
-        text = await asyncio.to_thread(_transcribe)
         return text
     finally:
         if os.path.exists(tmp_path):

 _WHISPER_MODEL_CACHE = {}
 _WHISPER_MODEL_LOCK = asyncio.Lock()
+_WHISPER_RUNTIME_FORCE_CPU = False
+_WHISPER_LAST_ERROR: str | None = None
+def _is_cuda_runtime_error(error: Exception) -> bool:
+    message = str(error or "").strip().lower()
+    if not message:
+        return False
+    markers = (
+        "cublas64_12.dll",
+        "cublas",
+        "cudnn",
+        "libcudart",
+        "cuda",
+        "ctranslate2",
+        "failed to load library",
+        "cannot be loaded",
+    )
+    return any(marker in message for marker in markers)
+def _force_whisper_cpu_mode(reason: Exception | None = None) -> None:
+    global _WHISPER_RUNTIME_FORCE_CPU, _WHISPER_LAST_ERROR
+    _WHISPER_RUNTIME_FORCE_CPU = True
+    if reason is not None:
+        _WHISPER_LAST_ERROR = str(reason)
+    # Drop cached CUDA models so all future requests resolve to CPU safely.
+    for key in list(_WHISPER_MODEL_CACHE.keys()):
+        if "|cuda|" in key:
+            _WHISPER_MODEL_CACHE.pop(key, None)
+def _has_cuda_device_via_ctranslate2() -> bool:
+    try:
+        import ctranslate2
+        return ctranslate2.get_cuda_device_count() > 0
+    except Exception:
+        return False
 def _resolve_device() -> str:
+    if _WHISPER_RUNTIME_FORCE_CPU:
+        return "cpu"
     pref = os.getenv("WHISPER_DEVICE", "auto").strip().lower()
     if pref in {"cpu", "cuda"}:
         return pref
+    # Prefer ctranslate2 probe first because faster-whisper relies on it.
+    if _has_cuda_device_via_ctranslate2():
+        return "cuda"
     try:
         import torch
 def _resolve_model_size() -> str:
+    # Fast default for real-time interview UX; can be overridden in env.
+    return os.getenv("WHISPER_MODEL_SIZE", "small.en").strip() or "small.en"
+def _resolve_beam_size() -> int:
+    try:
+        return max(1, int(os.getenv("WHISPER_BEAM_SIZE", "1")))
+    except Exception:
+        return 1
+def _resolve_best_of() -> int:
+    try:
+        return max(1, int(os.getenv("WHISPER_BEST_OF", "1")))
+    except Exception:
+        return 1
+def _resolve_vad_filter() -> bool:
+    value = os.getenv("WHISPER_VAD_FILTER", "0").strip().lower()
+    return value in {"1", "true", "yes", "on"}
 async def _get_whisper_model():
             try:
                 return WhisperModel(model_size, device=device, compute_type=compute_type)
+            except Exception as exc:
+                if device == "cuda" and _is_cuda_runtime_error(exc):
+                    _force_whisper_cpu_mode(exc)
                 # Keep service resilient if GPU config mismatches runtime.
                 return WhisperModel(model_size, device="cpu", compute_type="int8")
     model = await _get_whisper_model()
     ext = os.path.splitext(filename or "speech.webm")[1] or ".webm"
     target_language = (language or "en").strip().lower() or "en"
+    beam_size = _resolve_beam_size()
+    best_of = _resolve_best_of()
+    vad_filter = _resolve_vad_filter()
     fd, tmp_path = tempfile.mkstemp(suffix=ext)
     os.close(fd)
         with open(tmp_path, "wb") as f:
             f.write(audio_bytes)
+        def _transcribe(model_instance) -> str:
+            segments, _ = model_instance.transcribe(
                 tmp_path,
                 language=target_language,
+                beam_size=beam_size,
+                best_of=best_of,
+                vad_filter=vad_filter,
                 condition_on_previous_text=False,
                 temperature=0.0,
+                without_timestamps=True,
             )
             parts = []
             for seg in segments:
                     parts.append(text)
             return " ".join(parts).strip()
+        try:
+            text = await asyncio.to_thread(_transcribe, model)
+        except Exception as exc:
+            if not _is_cuda_runtime_error(exc):
+                raise RuntimeError(f"Whisper transcription failed: {str(exc)}") from exc
+            # Runtime CUDA failures can occur even after successful model construction.
+            _force_whisper_cpu_mode(exc)
+            cpu_model = await _get_whisper_model()
+            try:
+                text = await asyncio.to_thread(_transcribe, cpu_model)
+            except Exception as retry_exc:
+                raise RuntimeError(
+                    f"Whisper transcription failed after CPU fallback: {str(retry_exc)}"
+                ) from retry_exc
         return text
     finally:
         if os.path.exists(tmp_path):

backend/services/tts_service.py CHANGED Viewed

@@ -3,17 +3,20 @@ import os
 import tempfile
 from typing import Tuple
 from collections import OrderedDict
 _MODEL_CACHE = {}
 _MODEL_LOCK = asyncio.Lock()
 _AUDIO_CACHE = OrderedDict()
 _AUDIO_CACHE_LOCK = asyncio.Lock()
 _SYNTHESIZE_LOCK = asyncio.Lock()
 XTTS_MODEL = "tts_models/multilingual/multi-dataset/xtts_v2"
 XTTS_LANGUAGE = "en"
 XTTS_SPEED = 1.2
 _XTTS_WARM = False
 AUDIO_CACHE_MAX_ITEMS = 300
@@ -37,6 +40,37 @@ XTTS_SPEAKER_BY_GENDER = {
 }
 def _select_model(voice_gender: str) -> Tuple[str, str | None]:
     gender = (voice_gender or "female").strip().lower()
     if gender == "male":
@@ -52,6 +86,8 @@ async def _get_tts_model(model_name: str):
             return _MODEL_CACHE[model_name]
         def _load_model():
             try:
                 from TTS.api import TTS
             except Exception as exc:
@@ -73,14 +109,26 @@ async def _get_tts_model(model_name: str):
                 except Exception:
                     use_gpu = False
             if use_gpu:
                 try:
-                    return TTS(model_name=model_name, progress_bar=False, gpu=True)
                 except Exception:
                     # Graceful CPU fallback when CUDA runtime is unavailable/mismatched.
-                    return TTS(model_name=model_name, progress_bar=False, gpu=False)
-            return TTS(model_name=model_name, progress_bar=False, gpu=False)
         model = await asyncio.to_thread(_load_model)
         _MODEL_CACHE[model_name] = model
@@ -110,17 +158,27 @@ def _normalize_text_for_speech(value: str, max_length: int = XTTS_MAX_TEXT_LENGT
     return trimmed
-async def warmup_xtts_model() -> None:
     """Preload XTTS to avoid long cold-start on first interview question."""
-    global _XTTS_WARM
     if _XTTS_WARM:
-        return
     try:
         await _get_tts_model(XTTS_MODEL)
         _XTTS_WARM = True
-    except Exception:
-        # Keep API startup resilient; synthesis route still has fallbacks.
-        pass
 def _synthesize_xtts_to_file(tts, text: str, speaker: str, file_path: str) -> None:

 import tempfile
 from typing import Tuple
 from collections import OrderedDict
+from functools import wraps
 _MODEL_CACHE = {}
 _MODEL_LOCK = asyncio.Lock()
 _AUDIO_CACHE = OrderedDict()
 _AUDIO_CACHE_LOCK = asyncio.Lock()
 _SYNTHESIZE_LOCK = asyncio.Lock()
+_TORCH_LOAD_PATCHED = False
 XTTS_MODEL = "tts_models/multilingual/multi-dataset/xtts_v2"
 XTTS_LANGUAGE = "en"
 XTTS_SPEED = 1.2
 _XTTS_WARM = False
+_XTTS_LAST_ERROR: str | None = None
 AUDIO_CACHE_MAX_ITEMS = 300
 }
+def _resolve_xtts_checkpoint_trust() -> bool:
+    """Enable trusted local checkpoint loading compatibility by default."""
+    value = os.getenv("XTTS_TRUSTED_CHECKPOINTS", "1").strip().lower()
+    return value in {"1", "true", "yes", "on"}
+def _ensure_torch_load_compat_for_xtts() -> None:
+    """Patch torch.load default for PyTorch 2.6+ when loading trusted XTTS checkpoints."""
+    global _TORCH_LOAD_PATCHED
+    if _TORCH_LOAD_PATCHED or not _resolve_xtts_checkpoint_trust():
+        return
+    try:
+        import torch
+    except Exception:
+        return
+    original_load = getattr(torch, "load", None)
+    if not callable(original_load):
+        return
+    @wraps(original_load)
+    def _torch_load_compat(*args, **kwargs):
+        # Coqui XTTS checkpoints require full object unpickling on newer PyTorch.
+        kwargs.setdefault("weights_only", False)
+        return original_load(*args, **kwargs)
+    torch.load = _torch_load_compat
+    _TORCH_LOAD_PATCHED = True
 def _select_model(voice_gender: str) -> Tuple[str, str | None]:
     gender = (voice_gender or "female").strip().lower()
     if gender == "male":
             return _MODEL_CACHE[model_name]
         def _load_model():
+            _ensure_torch_load_compat_for_xtts()
             try:
                 from TTS.api import TTS
             except Exception as exc:
                 except Exception:
                     use_gpu = False
+            # TTS(..., gpu=...) is deprecated upstream. Load once, then move model.
+            tts = TTS(model_name=model_name, progress_bar=False)
             if use_gpu:
                 try:
+                    tts.to("cuda")
+                    return tts
                 except Exception:
                     # Graceful CPU fallback when CUDA runtime is unavailable/mismatched.
+                    try:
+                        tts.to("cpu")
+                    except Exception:
+                        pass
+                    return tts
+            try:
+                tts.to("cpu")
+            except Exception:
+                pass
+            return tts
         model = await asyncio.to_thread(_load_model)
         _MODEL_CACHE[model_name] = model
     return trimmed
+async def warmup_xtts_model() -> bool:
     """Preload XTTS to avoid long cold-start on first interview question."""
+    global _XTTS_WARM, _XTTS_LAST_ERROR
     if _XTTS_WARM:
+        return True
     try:
         await _get_tts_model(XTTS_MODEL)
         _XTTS_WARM = True
+        _XTTS_LAST_ERROR = None
+        return True
+    except Exception as exc:
+        # Keep API startup resilient; routes decide whether to surface this.
+        _XTTS_LAST_ERROR = str(exc)
+        return False
+def get_xtts_warmup_state() -> dict:
+    return {
+        "is_warm": _XTTS_WARM,
+        "last_error": _XTTS_LAST_ERROR,
+    }
 def _synthesize_xtts_to_file(tts, text: str, speaker: str, file_path: str) -> None:

backend/utils/gemini.py CHANGED Viewed

@@ -3,8 +3,11 @@ from config import get_settings
 from utils.skills import normalize_skill_list
 import asyncio
 import json
 import re
 from langchain_core.prompts import PromptTemplate
 settings = get_settings()
@@ -25,30 +28,52 @@ def _is_transient_gemini_error(error: Exception) -> bool:
     return any(marker in message for marker in transient_markers)
-async def call_gemini(prompt: str, system_instruction: str = None) -> str:
     """Call Gemini API with a prompt and optional system instruction."""
     config = {}
     if system_instruction:
         config["system_instruction"] = system_instruction
     config["response_mime_type"] = "application/json"
     last_error = None
-    max_attempts = 3
-    for attempt in range(max_attempts):
         try:
-            response = client.models.generate_content(
-                model=settings.GEMINI_MODEL,
-                contents=prompt,
-                config=config if config else None,
-            )
             return (response.text or "").strip()
         except Exception as exc:
             last_error = exc
-            if _is_transient_gemini_error(exc) and attempt < max_attempts - 1:
                 await asyncio.sleep(0.8 * (attempt + 1))
                 continue
             break
     raise RuntimeError(f"Gemini request failed: {last_error}")
@@ -72,6 +97,25 @@ def _extract_json_object(text: str) -> str:
     return value
 def _fallback_skill_scan(resume_text: str) -> list:
     common = [
         "python", "java", "javascript", "typescript", "react", "next.js", "node.js",
@@ -386,7 +430,7 @@ Return ONLY JSON, no markdown."""
     prompt = prompt_template.format(context=context, count=count)
     try:
-        result = (await call_gemini(prompt)).strip()
         data = json.loads(result)
         if not isinstance(data, list):
             raise ValueError("Batch response is not a list")
@@ -426,6 +470,140 @@ Return ONLY JSON, no markdown."""
         return fallback
 async def generate_followup_question_batch_from_qa(
     role_title: str,
     skills: list,

 from utils.skills import normalize_skill_list
 import asyncio
 import json
+import random
 import re
+from time import perf_counter
 from langchain_core.prompts import PromptTemplate
+from services.latency_service import record_latency
 settings = get_settings()
     return any(marker in message for marker in transient_markers)
+async def call_gemini(
+    prompt: str,
+    system_instruction: str = None,
+    *,
+    max_attempts: int = 3,
+    request_timeout_seconds: float | None = None,
+) -> str:
     """Call Gemini API with a prompt and optional system instruction."""
+    started_at = perf_counter()
     config = {}
     if system_instruction:
         config["system_instruction"] = system_instruction
     config["response_mime_type"] = "application/json"
     last_error = None
+    attempts = max(1, int(max_attempts or 1))
+    for attempt in range(attempts):
         try:
+            def _invoke():
+                return client.models.generate_content(
+                    model=settings.GEMINI_MODEL,
+                    contents=prompt,
+                    config=config if config else None,
+                )
+            if request_timeout_seconds and request_timeout_seconds > 0:
+                response = await asyncio.wait_for(
+                    asyncio.to_thread(_invoke),
+                    timeout=request_timeout_seconds,
+                )
+            else:
+                response = await asyncio.to_thread(_invoke)
+            elapsed_ms = (perf_counter() - started_at) * 1000.0
+            await record_latency("gemini_ms", elapsed_ms)
             return (response.text or "").strip()
         except Exception as exc:
             last_error = exc
+            if _is_transient_gemini_error(exc) and attempt < attempts - 1:
                 await asyncio.sleep(0.8 * (attempt + 1))
                 continue
             break
+    elapsed_ms = (perf_counter() - started_at) * 1000.0
+    await record_latency("gemini_ms", elapsed_ms)
     raise RuntimeError(f"Gemini request failed: {last_error}")
     return value
+def _extract_json_array(text: str) -> str:
+    value = (text or "").strip()
+    if value.startswith("```"):
+        value = value.split("\n", 1)[1]
+    if value.endswith("```"):
+        value = value.rsplit("```", 1)[0]
+    value = value.strip()
+    if value.startswith("[") and value.endswith("]"):
+        return value
+    start = value.find("[")
+    end = value.rfind("]")
+    if start != -1 and end != -1 and end > start:
+        return value[start:end + 1]
+    return value
 def _fallback_skill_scan(resume_text: str) -> list:
     common = [
         "python", "java", "javascript", "typescript", "react", "next.js", "node.js",
     prompt = prompt_template.format(context=context, count=count)
     try:
+        result = _extract_json_array((await call_gemini(prompt)).strip())
         data = json.loads(result)
         if not isinstance(data, list):
             raise ValueError("Batch response is not a list")
         return fallback
+async def generate_realtime_technical_round(
+    role_title: str,
+    resume_skills: list,
+    resume_summary: str,
+    jd_title: str,
+    jd_description: str,
+    jd_required_skills: list,
+    previous_questions: list,
+    count: int = 10,
+) -> list:
+    """Generate a full interview round plan from opening to closing using resume + JD context."""
+    count = max(1, int(count or 10))
+    skills = normalize_skill_list(resume_skills or [])
+    jd_skills = normalize_skill_list(jd_required_skills or [])
+    # Use small randomness to avoid deterministic opening phrasing across attempts.
+    variation_seed = random.randint(1000, 9999)
+    payload = {
+        "role_title": role_title,
+        "resume_skills": skills,
+        "resume_summary": resume_summary,
+        "jd_title": jd_title,
+        "jd_description": jd_description,
+        "jd_required_skills": jd_skills,
+        "previous_questions": previous_questions[-30:] if previous_questions else [],
+        "count": count,
+        "variation_seed": variation_seed,
+    }
+    prompt_template = PromptTemplate.from_template(
+        """You are an expert interviewer creating a realistic technical interview round.
+Input JSON:
+{payload}
+Task:
+Generate exactly {count} questions in sequence, simulating one real-time technical round from opening to wrap-up.
+Required flow:
+1) Opening/warm-up that is specific to the candidate profile and role.
+2) Resume-linked experience probe.
+3-7) Deep technical questions grounded in JD-required skills.
+8) Debugging/failure-mode question.
+9) Design/trade-off/decision-making question.
+10) Final reflective closing question.
+Strict rules:
+1. Ask ONLY within JD required skills and role scope.
+2. Use resume context to personalize wording and sequencing.
+3. Do NOT repeat or closely paraphrase any question in previous_questions.
+4. If previous_questions already include a generic "introduce yourself" opener, do not use that opener again.
+5. Keep wording concise and interview-ready.
+Return ONLY valid JSON array with objects:
+- "question": string
+- "difficulty": "easy" | "medium" | "hard"
+- "category": string
+No markdown, no extra text."""
+    )
+    prompt = prompt_template.format(payload=json.dumps(payload, ensure_ascii=True), count=count)
+    try:
+        result = _extract_json_array((await call_gemini(prompt)).strip())
+        data = json.loads(result)
+        if not isinstance(data, list):
+            raise ValueError("Realtime round response is not a list")
+        normalized = []
+        for i, item in enumerate(data[:count]):
+            if not isinstance(item, dict):
+                item = {}
+            if i <= 1:
+                fallback_difficulty = "easy"
+            elif i <= 6:
+                fallback_difficulty = "medium"
+            else:
+                fallback_difficulty = "hard"
+            normalized.append(
+                {
+                    "question": item.get("question") or f"Explain your approach to {jd_skills[0] if jd_skills else (skills[0] if skills else 'this role expectation')}",
+                    "difficulty": item.get("difficulty") if item.get("difficulty") in {"easy", "medium", "hard"} else fallback_difficulty,
+                    "category": item.get("category") or "technical-round",
+                }
+            )
+        while len(normalized) < count:
+            idx = len(normalized)
+            if idx == 0:
+                fallback_q = "Walk me through your background and the projects most relevant to this role."
+            elif idx == count - 1:
+                fallback_q = "If you had one week to improve your readiness for this role, what would you focus on and why?"
+            else:
+                target_skill = jd_skills[idx % len(jd_skills)] if jd_skills else (skills[idx % len(skills)] if skills else "this requirement")
+                fallback_q = f"How would you handle a practical scenario involving {target_skill}?"
+            normalized.append(
+                {
+                    "question": fallback_q,
+                    "difficulty": "easy" if idx <= 1 else ("medium" if idx <= 6 else "hard"),
+                    "category": "technical-round",
+                }
+            )
+        return normalized[:count]
+    except Exception:
+        fallback = []
+        skill_pool = jd_skills or skills or ["core technical concepts"]
+        for idx in range(count):
+            if idx == 0:
+                text = "Walk me through your background and the most role-relevant work you have done."
+            elif idx == 1:
+                text = "Pick one project from your resume and explain your exact responsibilities and impact."
+            elif idx == count - 2:
+                text = "Describe a difficult production issue you would debug for this role and your step-by-step approach."
+            elif idx == count - 1:
+                text = "What is one technical area you would improve next for this job, and what is your plan?"
+            else:
+                text = f"How would you solve a realistic problem involving {skill_pool[idx % len(skill_pool)]}?"
+            fallback.append(
+                {
+                    "question": text,
+                    "difficulty": "easy" if idx <= 1 else ("medium" if idx <= 6 else "hard"),
+                    "category": "technical-round",
+                }
+            )
+        return fallback
 async def generate_followup_question_batch_from_qa(
     role_title: str,
     skills: list,

resume-jd-verification-2026-04-10T05-15-44-248Z.pdf ADDED Viewed

	@@ -0,0 +1,646 @@

+%PDF-1.3
+%�߬�
+3 0 obj
+<</Type /Page
+/Parent 1 0 R
+/Resources 2 0 R
+/MediaBox [0 0 595.2799999999999727 841.8899999999999864]
+/Contents 4 0 R
+>>
+endobj
+4 0 obj
+<<
+/Length 5607
+>>
+stream
+0.200025 w
+0 G
+BT
+/F2 16 Tf
+18.3999999999999986 TL
+0 g
+40. 795.8899999999999864 Td
+(Resume vs Job Description Verification) Tj
+ET
+BT
+/F2 11 Tf
+12.6499999999999986 TL
+0 g
+40. 771.8899999999999864 Td
+(Verification ID:) Tj
+ET
+BT
+/F1 11 Tf
+12.6499999999999986 TL
+0 g
+130. 771.8899999999999864 Td
+(b5519c4e-2ab1-4bbd-a5ca-ff133b558b5b) Tj
+ET
+BT
+/F2 11 Tf
+12.6499999999999986 TL
+0 g
+40. 753.8899999999999864 Td
+(Saved At:) Tj
+ET
+BT
+/F1 11 Tf
+12.6499999999999986 TL
+0 g
+130. 753.8899999999999864 Td
+(4/10/2026, 10:45:29 AM) Tj
+ET
+BT
+/F2 11 Tf
+12.6499999999999986 TL
+0 g
+40. 735.8899999999999864 Td
+(Role:) Tj
+ET
+BT
+/F1 11 Tf
+12.6499999999999986 TL
+0 g
+130. 735.8899999999999864 Td
+(Generative AI Engineer) Tj
+ET
+BT
+/F2 16 Tf
+18.3999999999999986 TL
+0 g
+40. 711.8899999999999864 Td
+(Job Description Snapshot) Tj
+ET
+BT
+/F2 11 Tf
+12.6499999999999986 TL
+0 g
+40. 687.8899999999999864 Td
+(JD Title:) Tj
+ET
+BT
+/F1 11 Tf
+12.6499999999999986 TL
+0 g
+130. 687.8899999999999864 Td
+(AI Engineering Intern) Tj
+ET
+BT
+/F2 11 Tf
+12.6499999999999986 TL
+0 g
+40. 669.8899999999999864 Td
+(Company:) Tj
+ET
+BT
+/F1 11 Tf
+12.6499999999999986 TL
+0 g
+130. 669.8899999999999864 Td
+(-) Tj
+ET
+BT
+/F2 11 Tf
+12.6499999999999986 TL
+0 g
+40. 651.8899999999999864 Td
+(Required Skills:) Tj
+ET
+BT
+/F1 11 Tf
+12.6499999999999986 TL
+0 g
+130. 651.8899999999999864 Td
+(Basic understanding of Machine Learning concepts \(supervised/unsupervised learning\),) Tj
+T* (Familiarity with Python and libraries like NumPy, Pandas, Scikit-learn, Knowledge of) Tj
+T* (deep learning frameworks \(e.g., TensorFlow or PyTorch\) is a plus Strong analytical and) Tj
+T* (problem-solving skills) Tj
+ET
+BT
+/F2 11 Tf
+12.6499999999999986 TL
+0 g
+40. 595.8899999999999864 Td
+(JD Description:) Tj
+ET
+BT
+/F1 11 Tf
+12.6499999999999986 TL
+0 g
+130. 595.8899999999999864 Td
+(Key Responsibilities) Tj
+T* () Tj
+T* (Assist in developing and implementing AI/ML models and algorithms) Tj
+T* (Work on data preprocessing, cleaning, and analysis) Tj
+T* (Support model training, evaluation, and optimization) Tj
+T* (Conduct research on the latest AI trends and technologies) Tj
+T* (Collaborate with engineers and product teams to integrate AI solutions) Tj
+T* (Document experiments, processes, and results) Tj
+T* (Participate in brainstorming and problem-solving sessions) Tj
+ET
+BT
+/F2 16 Tf
+18.3999999999999986 TL
+0 g
+40. 463.8899999999999864 Td
+(Resume Snapshot) Tj
+ET
+BT
+/F2 11 Tf
+12.6499999999999986 TL
+0 g
+40. 439.8899999999999864 Td
+(Resume File:) Tj
+ET
+BT
+/F1 11 Tf
+12.6499999999999986 TL
+0 g
+130. 439.8899999999999864 Td
+(Resume.pdf) Tj
+ET
+BT
+/F2 11 Tf
+12.6499999999999986 TL
+0 g
+40. 421.8899999999999864 Td
+(Candidate:) Tj
+ET
+BT
+/F1 11 Tf
+12.6499999999999986 TL
+0 g
+130. 421.8899999999999864 Td
+(SAJITH J) Tj
+ET
+BT
+/F2 11 Tf
+12.6499999999999986 TL
+0 g
+40. 403.8899999999999864 Td
+(Email:) Tj
+ET
+BT
+/F1 11 Tf
+12.6499999999999986 TL
+0 g
+130. 403.8899999999999864 Td
+(jsajith76@gmail.com) Tj
+ET
+BT
+/F2 11 Tf
+12.6499999999999986 TL
+0 g
+40. 385.8899999999999864 Td
+(Phone:) Tj
+ET
+BT
+/F1 11 Tf
+12.6499999999999986 TL
+0 g
+130. 385.8899999999999864 Td
+(+91 8637440071) Tj
+ET
+BT
+/F2 11 Tf
+12.6499999999999986 TL
+0 g
+40. 367.8899999999999864 Td
+(Location:) Tj
+ET
+BT
+/F1 11 Tf
+12.6499999999999986 TL
+0 g
+130. 367.8899999999999864 Td
+(Coimbatore, India) Tj
+ET
+BT
+/F2 11 Tf
+12.6499999999999986 TL
+0 g
+40. 349.8899999999999864 Td
+(Extracted Skills:) Tj
+ET
+BT
+/F1 11 Tf
+12.6499999999999986 TL
+0 g
+130. 349.8899999999999864 Td
+(Python, SQL, RAG Pipelines, Semantic Search, Embedding Models, Vector Similarity) Tj
+T* (Search, Prompt Engineering, LangChain, LangGraph, LangSmith, CNN, Transformers,) Tj
+T* (BERT Fine-tuning, RNN, LSTM, GRU, Encoder Decoder, GAN, Pinecone, ChromaDB,) Tj
+T* (MySQL, FastAPI, Docker, Git, Github, Sentence Transformers, Scikit-learn, Llama 4,) Tj
+T* (Gemini API, E5 Multilingual Embeddings, OCR Based Extraction, PyTorch, BERT) Tj
+ET
+BT
+/F2 11 Tf
+12.6499999999999986 TL
+0 g
+40. 279.8899999999999864 Td
+(Experience Summary:) Tj
+ET
+BT
+/F1 11 Tf
+12.6499999999999986 TL
+0 g
+130. 279.8899999999999864 Td
+(AI & Data Science undergraduate with practical experience in architecting and) Tj
+T* (deploying end-to-end AI systems, specializing in Deep Learning, RAG pipelines, and) Tj
+T* (multimodal modeling.) Tj
+ET
+BT
+/F2 16 Tf
+18.3999999999999986 TL
+0 g
+40. 231.8899999999999864 Td
+(Alignment Result) Tj
+ET
+BT
+/F2 11 Tf
+12.6499999999999986 TL
+0 g
+40. 207.8899999999999864 Td
+(Fit Summary:) Tj
+ET
+BT
+/F1 11 Tf
+12.6499999999999986 TL
+0 g
+130. 207.8899999999999864 Td
+(The student presents an exceptional fit for the Generative AI Engineer Intern role,) Tj
+T* (showcasing a strong academic foundation in AI/Data Science, practical deployment) Tj
+T* (experience, and highly specialized skills in Generative AI, RAG pipelines, and LLM) Tj
+T* (development. Their demonstrated proficiency in PyTorch and MLOps tools directly) Tj
+T* (aligns with the job's core responsibilities and 'plus' qualifications.) Tj
+ET
+BT
+/F2 12 Tf
+13.7999999999999989 TL
+0 g
+40. 137.8899999999999864 Td
+(Meeting Expectations) Tj
+ET
+BT
+/F1 11 Tf
+12.6499999999999986 TL
+0 g
+46. 121.8899999999999864 Td
+(- Strong foundation in Python and deep learning frameworks, specifically PyTorch, aligning with the) Tj
+T* ('Knowledge of deep learning frameworks \(e.g., TensorFlow or PyTorch\) is a plus' requirement.) Tj
+ET
+BT
+/F1 11 Tf
+12.6499999999999986 TL
+0 g
+46. 93.8899999999999864 Td
+(- Extensive experience with Machine Learning concepts and models, including CNN, Transformers,) Tj
+T* (BERT, RNN, LSTM, GRU, Encoder Decoder, and GANs, demonstrating a robust understanding of AI/ML) Tj
+T* (models and algorithms.) Tj
+ET
+endstream
+endobj
+5 0 obj
+<</Type /Page
+/Parent 1 0 R
+/Resources 2 0 R
+/MediaBox [0 0 595.2799999999999727 841.8899999999999864]
+/Contents 6 0 R
+>>
+endobj
+6 0 obj
+<<
+/Length 3330
+>>
+stream
+0.200025 w
+0 G
+BT
+/F1 11 Tf
+12.6499999999999986 TL
+0 g
+46. 795.8899999999999864 Td
+(- Direct and highly relevant skills in Generative AI, RAG Pipelines, Semantic Search, Embedding) Tj
+T* (Models, Vector Similarity Search, and Prompt Engineering, which directly supports 'developing and) Tj
+T* (implementing AI/ML models and algorithms' for a Generative AI role.) Tj
+ET
+BT
+/F1 11 Tf
+12.6499999999999986 TL
+0 g
+46. 753.8899999999999864 Td
+(- Familiarity with key ML/DL libraries like Scikit-learn, which is explicitly mentioned as a required skill.) Tj
+ET
+BT
+/F1 11 Tf
+12.6499999999999986 TL
+0 g
+46. 739.8899999999999864 Td
+(- Practical experience with MLOps and deployment tools such as FastAPI, Docker, Git, and Github,) Tj
+T* (indicating the ability to 'integrate AI solutions'.) Tj
+ET
+BT
+/F1 11 Tf
+12.6499999999999986 TL
+0 g
+46. 711.8899999999999864 Td
+(- Experience with LLMs like Llama 4, Gemini API, and fine-tuning BERT, showing proactive 'research on) Tj
+T* (the latest AI trends and technologies'.) Tj
+ET
+BT
+/F1 11 Tf
+12.6499999999999986 TL
+0 g
+46. 683.8899999999999864 Td
+(- Skills in managing data for AI, including Pinecone, ChromaDB, MySQL, and OCR Based Extraction,) Tj
+T* (relevant to 'data preprocessing, cleaning, and analysis' and 'model training, evaluation, and optimization'.) Tj
+ET
+BT
+/F1 11 Tf
+12.6499999999999986 TL
+0 g
+46. 655.8899999999999864 Td
+(- The resume summary highlights 'architecting and deploying end-to-end AI systems', which implies) Tj
+T* (strong analytical and problem-solving skills, as well as the ability to 'collaborate with engineers and) Tj
+T* (product teams'.) Tj
+ET
+BT
+/F2 12 Tf
+13.7999999999999989 TL
+0 g
+40. 607.8899999999999864 Td
+(Missing Expectations) Tj
+ET
+BT
+/F1 11 Tf
+12.6499999999999986 TL
+0 g
+46. 591.8899999999999864 Td
+(- While likely used, specific mention of 'NumPy' and 'Pandas' as explicit skills is absent from the resume.) Tj
+ET
+BT
+/F1 11 Tf
+12.6499999999999986 TL
+0 g
+46. 577.8899999999999864 Td
+(- The resume could more explicitly detail experience in 'data preprocessing, cleaning, and analysis' for) Tj
+T* (diverse datasets, beyond what's implied by 'RAG Pipelines' and 'OCR Based Extraction'.) Tj
+ET
+BT
+/F2 12 Tf
+13.7999999999999989 TL
+0 g
+40. 543.8899999999999864 Td
+(Improvement Suggestions) Tj
+ET
+BT
+/F1 11 Tf
+12.6499999999999986 TL
+0 g
+46. 527.8899999999999864 Td
+(- Add 'NumPy' and 'Pandas' to your skills list if you have experience with them, as they are foundational) Tj
+T* (for data manipulation in Python.) Tj
+ET
+BT
+/F1 11 Tf
+12.6499999999999986 TL
+0 g
+46. 499.8899999999999864 Td
+(- Prepare specific examples from past projects where you handled significant 'data preprocessing,) Tj
+T* (cleaning, and analysis' challenges, detailing the techniques used and the impact.) Tj
+ET
+BT
+/F1 11 Tf
+12.6499999999999986 TL
+0 g
+46. 471.8899999999999864 Td
+(- When discussing projects, explicitly highlight your contributions to 'documenting experiments,) Tj
+T* (processes, and results' and examples of 'collaborating with engineers and product teams' to showcase) Tj
+T* (teamwork and communication skills.) Tj
+ET
+BT
+/F1 11 Tf
+12.6499999999999986 TL
+0 g
+46. 429.8899999999999864 Td
+(- Quantify your experience where possible \(e.g., 'deployed X RAG pipelines serving Y users', 'improved) Tj
+T* (model performance by Z%'\), to demonstrate impact and scale.) Tj
+ET
+endstream
+endobj
+1 0 obj
+<</Type /Pages
+/Kids [3 0 R 5 0 R ]
+/Count 2
+>>
+endobj
+7 0 obj
+<<
+/Type /Font
+/BaseFont /Helvetica
+/Subtype /Type1
+/Encoding /WinAnsiEncoding
+/FirstChar 32
+/LastChar 255
+>>
+endobj
+8 0 obj
+<<
+/Type /Font
+/BaseFont /Helvetica-Bold
+/Subtype /Type1
+/Encoding /WinAnsiEncoding
+/FirstChar 32
+/LastChar 255
+>>
+endobj
+9 0 obj
+<<
+/Type /Font
+/BaseFont /Helvetica-Oblique
+/Subtype /Type1
+/Encoding /WinAnsiEncoding
+/FirstChar 32
+/LastChar 255
+>>
+endobj
+10 0 obj
+<<
+/Type /Font
+/BaseFont /Helvetica-BoldOblique
+/Subtype /Type1
+/Encoding /WinAnsiEncoding
+/FirstChar 32
+/LastChar 255
+>>
+endobj
+11 0 obj
+<<
+/Type /Font
+/BaseFont /Courier
+/Subtype /Type1
+/Encoding /WinAnsiEncoding
+/FirstChar 32
+/LastChar 255
+>>
+endobj
+12 0 obj
+<<
+/Type /Font
+/BaseFont /Courier-Bold
+/Subtype /Type1
+/Encoding /WinAnsiEncoding
+/FirstChar 32
+/LastChar 255
+>>
+endobj
+13 0 obj
+<<
+/Type /Font
+/BaseFont /Courier-Oblique
+/Subtype /Type1
+/Encoding /WinAnsiEncoding
+/FirstChar 32
+/LastChar 255
+>>
+endobj
+14 0 obj
+<<
+/Type /Font
+/BaseFont /Courier-BoldOblique
+/Subtype /Type1
+/Encoding /WinAnsiEncoding
+/FirstChar 32
+/LastChar 255
+>>
+endobj
+15 0 obj
+<<
+/Type /Font
+/BaseFont /Times-Roman
+/Subtype /Type1
+/Encoding /WinAnsiEncoding
+/FirstChar 32
+/LastChar 255
+>>
+endobj
+16 0 obj
+<<
+/Type /Font
+/BaseFont /Times-Bold
+/Subtype /Type1
+/Encoding /WinAnsiEncoding
+/FirstChar 32
+/LastChar 255
+>>
+endobj
+17 0 obj
+<<
+/Type /Font
+/BaseFont /Times-Italic
+/Subtype /Type1
+/Encoding /WinAnsiEncoding
+/FirstChar 32
+/LastChar 255
+>>
+endobj
+18 0 obj
+<<
+/Type /Font
+/BaseFont /Times-BoldItalic
+/Subtype /Type1
+/Encoding /WinAnsiEncoding
+/FirstChar 32
+/LastChar 255
+>>
+endobj
+19 0 obj
+<<
+/Type /Font
+/BaseFont /ZapfDingbats
+/Subtype /Type1
+/FirstChar 32
+/LastChar 255
+>>
+endobj
+20 0 obj
+<<
+/Type /Font
+/BaseFont /Symbol
+/Subtype /Type1
+/FirstChar 32
+/LastChar 255
+>>
+endobj
+2 0 obj
+<<
+/ProcSet [/PDF /Text /ImageB /ImageC /ImageI]
+/Font <<
+/F1 7 0 R
+/F2 8 0 R
+/F3 9 0 R
+/F4 10 0 R
+/F5 11 0 R
+/F6 12 0 R
+/F7 13 0 R
+/F8 14 0 R
+/F9 15 0 R
+/F10 16 0 R
+/F11 17 0 R
+/F12 18 0 R
+/F13 19 0 R
+/F14 20 0 R
+>>
+/XObject <<
+>>
+>>
+endobj
+21 0 obj
+<<
+/Producer (jsPDF 4.2.1)
+/CreationDate (D:20260410104544+05'30')
+>>
+endobj
+22 0 obj
+<<
+/Type /Catalog
+/Pages 1 0 R
+/OpenAction [3 0 R /FitH null]
+/PageLayout /OneColumn
+>>
+endobj
+xref
+0 23
+0000000000 65535 f
+0000009330 00000 n
+0000011155 00000 n
+0000000015 00000 n
+0000000152 00000 n
+0000005811 00000 n
+0000005948 00000 n
+0000009393 00000 n
+0000009518 00000 n
+0000009648 00000 n
+0000009781 00000 n
+0000009919 00000 n
+0000010043 00000 n
+0000010172 00000 n
+0000010304 00000 n
+0000010440 00000 n
+0000010568 00000 n
+0000010695 00000 n
+0000010824 00000 n
+0000010957 00000 n
+0000011059 00000 n
+0000011405 00000 n
+0000011491 00000 n
+trailer
+<<
+/Size 23
+/Root 22 0 R
+/Info 21 0 R
+/ID [ <95A654D90B03BE650BD8733007BC1C07> <95A654D90B03BE650BD8733007BC1C07> ]
+>>
+startxref
+11595
+%%EOF