Spaces:

sajith-0701
/

interviewbot

Sleeping

App Files Files Community

sajith-0701 commited on Apr 13

Commit

e39cad1

1 Parent(s): efa0074

v4.1

Browse files

Files changed (7) hide show

backend/config.py +2 -0
backend/main.py +10 -0
backend/services/evaluation_service.py +65 -11
backend/services/gemini_service.py +79 -31
backend/services/interview_service.py +368 -28
backend/services/queue_service.py +22 -3
backend/utils/gemini.py +231 -66

backend/config.py CHANGED Viewed

@@ -11,11 +11,13 @@ load_dotenv(os.path.join(os.path.dirname(__file__), ".env"))
 class Settings(BaseSettings):
     # App
     APP_ENV: str = "production"
     APP_PORT: int = 8000
     # Gemini
     GEMINI_API_KEY: str
     GEMINI_MODEL: str = "gemini-2.5-flash"
     # MongoDB Atlas
     MONGO_URI: str

 class Settings(BaseSettings):
     # App
     APP_ENV: str = "production"
+    APP_HOST: str = "0.0.0.0"
     APP_PORT: int = 8000
     # Gemini
     GEMINI_API_KEY: str
     GEMINI_MODEL: str = "gemini-2.5-flash"
+    GEMINI_FALLBACK_MODELS: str = ""
     # MongoDB Atlas
     MONGO_URI: str

backend/main.py CHANGED Viewed

@@ -4,6 +4,7 @@ from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.staticfiles import StaticFiles
 import os
@@ -68,3 +69,12 @@ app.include_router(speech.router, prefix="/speech", tags=["Speech"])
 @app.get("/health")
 async def health_check():
     return {"status": "healthy", "version": "1.0.0"}

 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.staticfiles import StaticFiles
 import os
+import uvicorn
 @app.get("/health")
 async def health_check():
     return {"status": "healthy", "version": "1.0.0"}
+if __name__ == "__main__":
+    uvicorn.run(
+        "main:app",
+        host=settings.APP_HOST,
+        port=settings.APP_PORT,
+        reload=settings.APP_ENV != "production",
+    )

backend/services/evaluation_service.py CHANGED Viewed

@@ -23,6 +23,20 @@ def _safe_int(value, default: int = 0) -> int:
         return default
 async def generate_report(session_id: str, user_id: str) -> dict:
     """Generate final evaluation report from Redis Q&A data using Gemini."""
     db = get_db()
@@ -30,7 +44,7 @@ async def generate_report(session_id: str, user_id: str) -> dict:
     # Check if report already exists
     existing = await db[RESULTS].find_one({"session_id": session_id})
-    if existing:
         existing["id"] = str(existing["_id"])
         del existing["_id"]
         return _json_safe(existing)
@@ -51,6 +65,25 @@ async def generate_report(session_id: str, user_id: str) -> dict:
     # Get all Q&A from Redis
     qa_pairs = await get_session_qa(session_id)
     if not qa_pairs:
         raise ValueError("No Q&A data found for this session")
@@ -80,21 +113,42 @@ async def generate_report(session_id: str, user_id: str) -> dict:
         },
         "completed_at": utc_now(),
     }
-    inserted = await db[RESULTS].insert_one(result_doc)
     # Store final answers in MongoDB
     for qa in qa_pairs:
-        answer_doc = {
             "session_id": session_id,
             "user_id": user_id,
-            "question_id": qa["question_id"],
-            "question": qa["question"],
-            "answer": qa["answer"],
-            "difficulty": qa["difficulty"],
-            "category": qa["category"],
-            "stored_at": utc_now(),
         }
-        await db[ANSWERS].insert_one(answer_doc)
     # Clean up Redis session data
     question_ids = await redis.lrange(f"session:{session_id}:questions", 0, -1)
@@ -133,5 +187,5 @@ async def generate_report(session_id: str, user_id: str) -> dict:
     cleanup_interview_local_state(session_id)
-    result_doc["id"] = str(inserted.inserted_id)
     return _json_safe(result_doc)

         return default
+def _is_placeholder_report(report: dict) -> bool:
+    strengths = [str(item).strip().lower() for item in (report.get("strengths") or []) if str(item).strip()]
+    weaknesses = [str(item).strip().lower() for item in (report.get("weaknesses") or []) if str(item).strip()]
+    recommendations = [str(item).strip().lower() for item in (report.get("recommendations") or []) if str(item).strip()]
+    if any("unable to evaluate" in item for item in strengths + weaknesses):
+        return True
+    if any("please retry the interview" in item for item in recommendations):
+        return True
+    if not (report.get("detailed_scores") or []):
+        return True
+    return False
 async def generate_report(session_id: str, user_id: str) -> dict:
     """Generate final evaluation report from Redis Q&A data using Gemini."""
     db = get_db()
     # Check if report already exists
     existing = await db[RESULTS].find_one({"session_id": session_id})
+    if existing and not _is_placeholder_report(existing):
         existing["id"] = str(existing["_id"])
         del existing["_id"]
         return _json_safe(existing)
     # Get all Q&A from Redis
     qa_pairs = await get_session_qa(session_id)
+    if not qa_pairs:
+        archived_answers = await db[ANSWERS].find(
+            {"session_id": session_id, "user_id": user_id}
+        ).sort("stored_at", 1).to_list(length=200)
+        for item in archived_answers:
+            question = (item.get("question") or "").strip()
+            answer = (item.get("answer") or "").strip()
+            if not question or not answer:
+                continue
+            qa_pairs.append(
+                {
+                    "question_id": item.get("question_id") or "",
+                    "question": question,
+                    "answer": answer,
+                    "difficulty": item.get("difficulty", "medium"),
+                    "category": item.get("category", "general"),
+                }
+            )
     if not qa_pairs:
         raise ValueError("No Q&A data found for this session")
         },
         "completed_at": utc_now(),
     }
+    if existing:
+        await db[RESULTS].update_one(
+            {"_id": existing["_id"]},
+            {"$set": result_doc},
+        )
+        result_doc_id = str(existing["_id"])
+    else:
+        inserted = await db[RESULTS].insert_one(result_doc)
+        result_doc_id = str(inserted.inserted_id)
     # Store final answers in MongoDB
     for qa in qa_pairs:
+        question_id = (qa.get("question_id") or "").strip()
+        upsert_filter = {
             "session_id": session_id,
             "user_id": user_id,
         }
+        if question_id:
+            upsert_filter["question_id"] = question_id
+        else:
+            upsert_filter["question"] = qa.get("question", "")
+        await db[ANSWERS].update_one(
+            upsert_filter,
+            {
+                "$set": {
+                    "question_id": question_id,
+                    "question": qa.get("question", ""),
+                    "answer": qa.get("answer", ""),
+                    "difficulty": qa.get("difficulty", "medium"),
+                    "category": qa.get("category", "general"),
+                    "stored_at": utc_now(),
+                }
+            },
+            upsert=True,
+        )
     # Clean up Redis session data
     question_ids = await redis.lrange(f"session:{session_id}:questions", 0, -1)
     cleanup_interview_local_state(session_id)
+    result_doc["id"] = result_doc_id
     return _json_safe(result_doc)

backend/services/gemini_service.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import json
 import re
 from utils.gemini import call_gemini
@@ -42,6 +43,30 @@ def _extract_json_array(text: str) -> str:
     return value
 def _fallback_score(answer: str) -> int:
     text = (answer or "").strip().lower()
     words = len(text.split())
@@ -91,6 +116,9 @@ Rules:
 2) Use resume context for relevance.
 3) Do not repeat or paraphrase excluded_questions.
 4) Keep questions concise and practical.
 Return ONLY valid JSON array with objects:
 - question (string)
@@ -99,16 +127,12 @@ Return ONLY valid JSON array with objects:
 """
     try:
-        result = _extract_json_array(
-            await call_gemini(
-                prompt,
-                max_attempts=1,
-                request_timeout_seconds=3.5,
-            )
         )
-        data = json.loads(result)
-        if not isinstance(data, list):
-            raise ValueError("seed output is not a list")
         output = []
         for item in data[:count]:
@@ -124,15 +148,19 @@ Return ONLY valid JSON array with objects:
         return [q for q in output if q.get("question")]
     except Exception:
         base_skill = jd_required_skills[0] if jd_required_skills else (resume_skills[0] if resume_skills else "this role")
         fallback = []
         for i in range(count):
             fallback.append(
                 {
-                    "question": (
-                        f"Explain your hands-on experience with {base_skill} in a project relevant to {role_title}."
-                        if i == 0
-                        else f"What trade-offs did you consider when working with {base_skill}?"
-                    ),
                     "difficulty": "medium",
                     "category": "resume-seed",
                 }
@@ -147,6 +175,8 @@ async def evaluate_and_generate_followup(
     current_question: str,
     current_answer: str,
     excluded_questions: list[str],
 ) -> dict:
     payload = {
         "role_title": role_title,
@@ -155,6 +185,8 @@ async def evaluate_and_generate_followup(
         "current_question": current_question,
         "current_answer": current_answer,
         "excluded_questions": excluded_questions[-25:] if excluded_questions else [],
     }
     prompt = f"""You are a strict technical interviewer.
@@ -171,40 +203,60 @@ Rules:
 2) Use recent_context for continuity.
 3) Do not repeat/paraphrase excluded_questions.
 4) Score should reflect conceptual correctness, not verbosity.
 Return ONLY valid JSON object:
 {{
   "score": 0-100,
   "feedback": "short technical feedback",
   "followup_question": "...",
   "difficulty": "easy|medium|hard",
   "category": "..."
 }}
 """
     try:
-        result = _extract_json_object(
-            await call_gemini(
-                prompt,
-                max_attempts=1,
-                request_timeout_seconds=2.8,
-            )
         )
-        data = json.loads(result)
         followup = (data.get("followup_question") or "").strip()
         return {
             "score": int(data.get("score", 0)),
             "feedback": (data.get("feedback") or "").strip() or "Answer reviewed.",
             "followup_question": followup,
             "difficulty": data.get("difficulty") if data.get("difficulty") in {"easy", "medium", "hard"} else "medium",
             "category": data.get("category") or "follow-up",
         }
     except Exception:
         fallback_skill = required_skills[0] if required_skills else "the selected role requirement"
         return {
             "score": _fallback_score(current_answer),
             "feedback": "Try to explain the mechanism, trade-offs, and one concrete example.",
-            "followup_question": f"Can you walk me through a real scenario where you applied {fallback_skill} and what trade-offs you handled?",
             "difficulty": "medium",
             "category": "follow-up",
         }
@@ -242,16 +294,12 @@ Return ONLY valid JSON array with objects:
 """
     try:
-        result = _extract_json_array(
-            await call_gemini(
-                prompt,
-                max_attempts=1,
-                request_timeout_seconds=3.5,
-            )
         )
-        data = json.loads(result)
-        if not isinstance(data, list):
-            raise ValueError("topic output is not a list")
         out = []
         for item in data[:count]:

 import json
 import re
+import random
 from utils.gemini import call_gemini
     return value
+def _parse_json_object_loose(text: str) -> dict:
+    value = _extract_json_object(text)
+    try:
+        parsed = json.loads(value)
+    except Exception:
+        cleaned = re.sub(r",\s*([}\]])", r"\1", value)
+        parsed = json.loads(cleaned)
+    if not isinstance(parsed, dict):
+        raise ValueError("Parsed payload is not a JSON object")
+    return parsed
+def _parse_json_array_loose(text: str) -> list:
+    value = _extract_json_array(text)
+    try:
+        parsed = json.loads(value)
+    except Exception:
+        cleaned = re.sub(r",\s*([}\]])", r"\1", value)
+        parsed = json.loads(cleaned)
+    if not isinstance(parsed, list):
+        raise ValueError("Parsed payload is not a JSON array")
+    return parsed
 def _fallback_score(answer: str) -> int:
     text = (answer or "").strip().lower()
     words = len(text.split())
 2) Use resume context for relevance.
 3) Do not repeat or paraphrase excluded_questions.
 4) Keep questions concise and practical.
+5) Make the set diverse: use different styles (scenario, debugging, trade-off, implementation, testing).
+6) Do not prefix with numbering like "Question 1:".
+7) Avoid generic repeats like "Explain your hands-on experience" for every question.
 Return ONLY valid JSON array with objects:
 - question (string)
 """
     try:
+        result = await call_gemini(
+            prompt,
+            max_attempts=3,
+            request_timeout_seconds=20,
         )
+        data = _parse_json_array_loose(result)
         output = []
         for item in data[:count]:
         return [q for q in output if q.get("question")]
     except Exception:
         base_skill = jd_required_skills[0] if jd_required_skills else (resume_skills[0] if resume_skills else "this role")
+        fallback_templates = [
+            "In a project aligned with {role}, where did {skill} materially change your design decisions?",
+            "If your {skill} implementation regressed after deployment for {role}, how would you triage it?",
+            "What trade-offs did you make while using {skill} under real delivery constraints in {role}?",
+            "How did you test and validate a {skill}-based feature before production in {role}?",
+            "Describe one architecture decision around {skill} that improved reliability or scale for {role}.",
+        ]
         fallback = []
         for i in range(count):
+            template = fallback_templates[i % len(fallback_templates)]
             fallback.append(
                 {
+                    "question": template.format(skill=base_skill, role=role_title),
                     "difficulty": "medium",
                     "category": "resume-seed",
                 }
     current_question: str,
     current_answer: str,
     excluded_questions: list[str],
+    focus_topic: str = "",
+    same_topic_streak: int = 0,
 ) -> dict:
     payload = {
         "role_title": role_title,
         "current_question": current_question,
         "current_answer": current_answer,
         "excluded_questions": excluded_questions[-25:] if excluded_questions else [],
+        "focus_topic": focus_topic,
+        "same_topic_streak": int(same_topic_streak or 0),
     }
     prompt = f"""You are a strict technical interviewer.
 2) Use recent_context for continuity.
 3) Do not repeat/paraphrase excluded_questions.
 4) Score should reflect conceptual correctness, not verbosity.
+5) If same_topic_streak is 2 or more, avoid another same-topic follow-up unless truly critical.
+6) Ask in realistic live-interview style (specific scenario, debugging, trade-off, design decision), not generic textbook phrasing.
+7) Do not prefix numbering like "Question 4:".
+8) Avoid repeating the previous follow-up wording pattern.
 Return ONLY valid JSON object:
 {{
   "score": 0-100,
   "feedback": "short technical feedback",
   "followup_question": "...",
+    "followup_topic": "specific required skill/topic for the follow-up",
+    "followup_need_score": 0-100,
   "difficulty": "easy|medium|hard",
   "category": "..."
 }}
 """
     try:
+        result = await call_gemini(
+            prompt,
+            max_attempts=3,
+            request_timeout_seconds=18,
         )
+        data = _parse_json_object_loose(result)
         followup = (data.get("followup_question") or "").strip()
+        try:
+            followup_need_score = int(data.get("followup_need_score", 70))
+        except Exception:
+            followup_need_score = 70
+        followup_need_score = max(0, min(100, followup_need_score))
         return {
             "score": int(data.get("score", 0)),
             "feedback": (data.get("feedback") or "").strip() or "Answer reviewed.",
             "followup_question": followup,
+            "followup_topic": (data.get("followup_topic") or "").strip(),
+            "followup_need_score": followup_need_score,
             "difficulty": data.get("difficulty") if data.get("difficulty") in {"easy", "medium", "hard"} else "medium",
             "category": data.get("category") or "follow-up",
         }
     except Exception:
         fallback_skill = required_skills[0] if required_skills else "the selected role requirement"
+        fallback_templates = [
+            "In a production system for {role}, describe a failure you would expect around {skill} and how you would debug it end-to-end.",
+            "Given a feature built with {skill}, what trade-offs would you make between speed, reliability, and maintainability in {role}?",
+            "How would you test and validate a {skill}-based implementation before release for {role}?",
+            "Walk through one real incident where {skill} decisions changed the final architecture for {role}.",
+        ]
+        template = random.choice(fallback_templates)
         return {
             "score": _fallback_score(current_answer),
             "feedback": "Try to explain the mechanism, trade-offs, and one concrete example.",
+            "followup_question": template.format(skill=fallback_skill, role=role_title),
+            "followup_topic": fallback_skill,
+            "followup_need_score": 70,
             "difficulty": "medium",
             "category": "follow-up",
         }
 """
     try:
+        result = await call_gemini(
+            prompt,
+            max_attempts=3,
+            request_timeout_seconds=20,
         )
+        data = _parse_json_array_loose(result)
         out = []
         for item in data[:count]:

backend/services/interview_service.py CHANGED Viewed

@@ -21,6 +21,7 @@ from services.queue_service import (
     flush_backlog_to_queue,
     get_recent_context_items,
     mark_question_asked,
     peek_next_question,
     pop_next_question,
     push_context_item,
@@ -45,11 +46,28 @@ TOPIC_INITIAL_ASK_COUNT = 4
 TOPIC_AI_FOLLOWUPS = 3
 TOPIC_DB_FOLLOWUPS = 2
 TOPIC_TOTAL_QUESTIONS = 10
 # Local process memory summary requested in workflow.
 _LOCAL_SUMMARIES: dict[str, str] = {}
 _PREGEN_IN_FLIGHT: set[str] = set()
 _POST_SUBMIT_LOCKS: dict[str, asyncio.Lock] = {}
 def _safe_json_list(value: str) -> list:
@@ -67,6 +85,47 @@ def _question_fingerprint(text: str) -> str:
     return base
 def _unique_question_items(items: list[dict], *, excluded_questions: list[str], limit: int) -> list[dict]:
     excluded = {_question_fingerprint(q) for q in excluded_questions if q}
     unique: list[dict] = []
@@ -104,6 +163,15 @@ def _safe_int(value, default: int = 0) -> int:
         return default
 def _normalize_voice_gender(value: str | None) -> str:
     return "male" if (value or "").strip().lower() == "male" else "female"
@@ -164,6 +232,123 @@ def _normalize_bank_difficulty(value: str) -> str:
     return difficulty
 def _avg_recent_answer_words(qa_pairs: list, window: int = 3) -> int:
     if not qa_pairs:
         return 0
@@ -235,32 +420,68 @@ async def _get_recent_user_questions(db, user_id: str, limit: int = 40) -> list[
 def _build_resume_intro_question(role_title: str, jd_title: str) -> str:
-    title = (jd_title or "the selected job description").strip()
     role = (role_title or "this role").strip()
     return (
-        f"Introduce yourself and explain how your background aligns with {role} "
-        f"for {title}."
     )
-def _build_resume_resilient_followup_question(session: dict, question_number: int, variant: int = 0) -> str:
     role_title = (session.get("role_title") or "this role").strip()
-    jd_skills = _safe_json_list(session.get("jd_required_skills", "[]"))
-    focus_skills = _safe_json_list(session.get("skills", "[]"))
-    skill_pool = jd_skills or focus_skills or ["core technical concepts"]
     index = max(0, question_number - 1) + max(0, variant)
-    skill = skill_pool[index % len(skill_pool)]
     templates = [
-        "Question {n}: Describe a real project where you applied {skill} for {role}. What constraints and trade-offs shaped your design?",
-        "Question {n}: If {skill} failed in production for a {role} workflow, how would you debug it step by step?",
-        "Question {n}: Explain how you would test and validate a solution using {skill} before shipping it for {role}.",
-        "Question {n}: Compare two approaches for {skill} in a {role} context and justify the final choice.",
-        "Question {n}: Design an improvement plan to make your {skill} implementation more scalable and reliable for {role}.",
     ]
     template = templates[index % len(templates)]
-    return template.format(n=question_number, skill=skill, role=role_title)
 def _build_topic_resilient_followup_question(session: dict, question_number: int, variant: int = 0) -> str:
@@ -268,14 +489,14 @@ def _build_topic_resilient_followup_question(session: dict, question_number: int
     index = max(0, question_number - 1) + max(0, variant)
     templates = [
-        "Question {n}: Explain {topic} with a practical example from a production-like scenario.",
-        "Question {n}: What are the most common failure patterns in {topic}, and how would you detect them early?",
-        "Question {n}: Design a step-by-step implementation plan for {topic} with measurable checkpoints.",
-        "Question {n}: Compare two approaches in {topic}, including trade-offs in scalability, latency, and maintainability.",
-        "Question {n}: If a {topic} solution regressed after deployment, how would you triage and recover safely?",
     ]
     template = templates[index % len(templates)]
-    return template.format(n=question_number, topic=topic_name)
 async def _enqueue_resume_followup_with_fallback(
@@ -287,8 +508,10 @@ async def _enqueue_resume_followup_with_fallback(
     suggested_text: str,
     suggested_difficulty: str,
     suggested_category: str,
 ) -> tuple[str | None, bool]:
     candidates: list[tuple[str, str, str, bool]] = []
     primary = (suggested_text or "").strip()
     if primary:
@@ -302,12 +525,17 @@ async def _enqueue_resume_followup_with_fallback(
             session=session,
             question_number=question_number,
             variant=variant,
         )
         candidates.append((fallback_text, "medium", "resume-fallback", False))
     seen: set[str] = set()
     for text, difficulty, category, is_primary in candidates:
-        key = _question_fingerprint(text)
         if not key or key in seen:
             continue
         seen.add(key)
@@ -315,13 +543,14 @@ async def _enqueue_resume_followup_with_fallback(
         qid = await enqueue_question(
             redis=redis,
             session_id=session_id,
-            question=text,
             difficulty=difficulty,
             category=category,
             ttl_seconds=SESSION_TTL,
             max_queue_size=MAX_QUEUE_SIZE,
         )
         if qid:
             return qid, is_primary
     return None, False
@@ -338,6 +567,22 @@ async def _get_session_question_texts(redis, session_id: str) -> list[str]:
     return output
 async def _sample_topic_questions(
     db,
     topic_id: str,
@@ -706,13 +951,16 @@ async def _generate_question_batch(
 async def _append_batch_to_redis(redis, session_id: str, batch: list[dict]) -> list[str]:
     created_ids: list[str] = []
     for item in batch:
         qid = generate_id()
         created_ids.append(qid)
         await redis.hset(
             f"session:{session_id}:q:{qid}",
             mapping={
                 "question_id": qid,
-                "question": item.get("question", "Can you explain your approach?"),
                 "difficulty": item.get("difficulty", "medium"),
                 "category": item.get("category", "general"),
             },
@@ -1054,7 +1302,7 @@ async def _start_topic_interview(user_id: str, topic_id: str) -> dict:
         f"session:{session_id}:q:{first_id}",
         mapping={
             "question_id": first_id,
-            "question": first_question.get("question", "Can you explain this topic?"),
             "difficulty": first_question.get("difficulty", "medium"),
             "category": first_question.get("category", topic.get("name", "topic")),
         },
@@ -1166,7 +1414,7 @@ async def _start_topic_interview(user_id: str, topic_id: str) -> dict:
         },
         "question": {
             "question_id": first_id,
-            "question": first_question.get("question", "Can you explain this topic?"),
             "difficulty": first_question.get("difficulty", "medium"),
             "question_number": 1,
             "total_questions": TOPIC_TOTAL_QUESTIONS,
@@ -1316,6 +1564,7 @@ async def start_interview(
     skills_for_interview = build_interview_focus_skills(base_skills_for_interview) or list(jd_required_skills)
     intro_question = _build_resume_intro_question(role_title=role_title, jd_title=selected_jd.get("title", ""))
     session_id = generate_id()
     _LOCAL_SUMMARIES[session_id] = ""
@@ -1497,6 +1746,17 @@ async def _post_submit_resume_processing(
         if not session:
             return
         recent_context = await get_recent_context_items(
             redis=redis,
             session_id=session_id,
@@ -1510,6 +1770,8 @@ async def _post_submit_resume_processing(
             current_question=question_text,
             current_answer=answer,
             excluded_questions=excluded_questions,
         )
         await redis.hset(
@@ -1529,7 +1791,16 @@ async def _post_submit_resume_processing(
         }
         generated_count = _safe_int(session.get("generated_count", 0))
-        follow_text = (evaluation.get("followup_question") or "").strip()
         if answered_count < max_questions and session.get("status") == "in_progress":
             qid, used_model_followup = await _enqueue_resume_followup_with_fallback(
                 redis=redis,
@@ -1539,6 +1810,7 @@ async def _post_submit_resume_processing(
                 suggested_text=follow_text,
                 suggested_difficulty=evaluation.get("difficulty", "medium"),
                 suggested_category=evaluation.get("category", "follow-up"),
             )
             if qid:
                 generated_count += 1
@@ -1775,6 +2047,9 @@ async def submit_answer(session_id: str, question_id: str, answer: str) -> dict:
         mapping={
             "question_id": question_id,
             "answer": answer,
             "submitted_at": utc_now(),
         },
     )
@@ -1782,6 +2057,24 @@ async def submit_answer(session_id: str, question_id: str, answer: str) -> dict:
     await redis.expire(f"session:{session_id}:a:{question_id}", SESSION_TTL)
     await redis.expire(f"session:{session_id}:answers", SESSION_TTL)
     question_count = _safe_int(session.get("question_count", 1))
     answered_count = _safe_int(session.get("answered_count", 0)) + 1
     served_count = _safe_int(session.get("served_count", 1))
@@ -1853,6 +2146,17 @@ async def submit_answer(session_id: str, question_id: str, answer: str) -> dict:
     # Emergency fallback for rare queue-empty cases.
     if not next_question_id and interview_type == "resume":
         recent_context = await get_recent_context_items(
             redis=redis,
             session_id=session_id,
@@ -1866,6 +2170,8 @@ async def submit_answer(session_id: str, question_id: str, answer: str) -> dict:
             current_question=current_question_text,
             current_answer=answer,
             excluded_questions=excluded_questions,
         )
         await redis.hset(
@@ -1883,7 +2189,16 @@ async def submit_answer(session_id: str, question_id: str, answer: str) -> dict:
             "bank_shortfall": 0,
             "generation_batches": 1,
         }
-        follow_text = (fallback_evaluation.get("followup_question") or "").strip()
         if answered_count < max_questions:
             qid, used_model_followup = await _enqueue_resume_followup_with_fallback(
                 redis=redis,
@@ -1893,6 +2208,7 @@ async def submit_answer(session_id: str, question_id: str, answer: str) -> dict:
                 suggested_text=follow_text,
                 suggested_difficulty=fallback_evaluation.get("difficulty", "medium"),
                 suggested_category=fallback_evaluation.get("category", "follow-up"),
             )
             if qid:
                 generated_count += 1
@@ -2143,9 +2459,33 @@ async def get_session_qa(session_id: str) -> list:
     """Get all Q&A pairs from Redis for a session."""
     redis = get_redis()
-    question_ids = await redis.lrange(f"session:{session_id}:questions", 0, -1)
     qa_pairs = []
     for qid in question_ids:
         q = await redis.hgetall(f"session:{session_id}:q:{qid}")
         a = await redis.hgetall(f"session:{session_id}:a:{qid}")

     flush_backlog_to_queue,
     get_recent_context_items,
     mark_question_asked,
+    normalize_question_text,
     peek_next_question,
     pop_next_question,
     push_context_item,
 TOPIC_AI_FOLLOWUPS = 3
 TOPIC_DB_FOLLOWUPS = 2
 TOPIC_TOTAL_QUESTIONS = 10
+MAX_SAME_TOPIC_FOLLOWUPS = 2
+THIRD_FOLLOWUP_NEED_SCORE = 95
 # Local process memory summary requested in workflow.
 _LOCAL_SUMMARIES: dict[str, str] = {}
 _PREGEN_IN_FLIGHT: set[str] = set()
 _POST_SUBMIT_LOCKS: dict[str, asyncio.Lock] = {}
+_QUESTION_STOPWORDS = {
+    "a", "an", "and", "are", "as", "at", "be", "by", "for", "from", "how", "if", "in", "into",
+    "is", "it", "of", "on", "or", "that", "the", "this", "to", "using", "what", "when", "with", "would",
+}
+_GENERIC_SOFT_SKILL_KEYS = {
+    "problem solving",
+    "analytical skills",
+    "communication",
+    "communication skills",
+    "teamwork",
+    "leadership",
+    "adaptability",
+    "time management",
+    "critical thinking",
+}
 def _safe_json_list(value: str) -> list:
     return base
+def _question_token_set(text: str) -> set[str]:
+    key = _question_fingerprint(text)
+    tokens = [token for token in key.split() if token and token not in _QUESTION_STOPWORDS]
+    return set(tokens)
+def _is_question_too_similar(candidate: str, recent_questions: list[str]) -> bool:
+    candidate_key = _question_fingerprint(candidate)
+    if not candidate_key:
+        return True
+    candidate_tokens = _question_token_set(candidate)
+    candidate_opening = " ".join(candidate_key.split()[:6])
+    for text in (recent_questions or [])[-5:]:
+        other_key = _question_fingerprint(text)
+        if not other_key:
+            continue
+        if candidate_key == other_key:
+            return True
+        other_opening = " ".join(other_key.split()[:6])
+        if candidate_opening and candidate_opening == other_opening:
+            return True
+        other_tokens = _question_token_set(text)
+        if not candidate_tokens or not other_tokens:
+            continue
+        intersection = len(candidate_tokens & other_tokens)
+        union = len(candidate_tokens | other_tokens)
+        if union <= 0:
+            continue
+        jaccard = intersection / union
+        if jaccard >= 0.72:
+            return True
+    return False
 def _unique_question_items(items: list[dict], *, excluded_questions: list[str], limit: int) -> list[dict]:
     excluded = {_question_fingerprint(q) for q in excluded_questions if q}
     unique: list[dict] = []
         return default
+def _safe_score_0_100(value, default: int = 0) -> int:
+    score = _safe_int(value, default)
+    if score < 0:
+        return 0
+    if score > 100:
+        return 100
+    return score
 def _normalize_voice_gender(value: str | None) -> str:
     return "male" if (value or "").strip().lower() == "male" else "female"
     return difficulty
+def _resume_skill_pool(session: dict) -> list[str]:
+    jd_skills = normalize_skill_list(_safe_json_list(session.get("jd_required_skills", "[]")))
+    focus_skills = normalize_skill_list(_safe_json_list(session.get("skills", "[]")))
+    ordered: list[str] = []
+    seen: set[str] = set()
+    for skill in jd_skills + focus_skills:
+        key = _question_fingerprint(skill)
+        if not key or key in seen:
+            continue
+        seen.add(key)
+        ordered.append(skill)
+    concrete = [skill for skill in ordered if _question_fingerprint(skill) not in _GENERIC_SOFT_SKILL_KEYS]
+    if len(concrete) >= 2:
+        return concrete
+    return ordered or ["core technical concepts"]
+def _infer_focus_skill_from_question(question_text: str, skill_pool: list[str]) -> str | None:
+    normalized_question = _question_fingerprint(question_text)
+    if not normalized_question:
+        return None
+    best_skill = None
+    best_score = 0
+    for skill in skill_pool:
+        normalized_skill = _question_fingerprint(skill)
+        if not normalized_skill:
+            continue
+        tokens = [token for token in normalized_skill.split() if len(token) >= 3]
+        if not tokens:
+            tokens = normalized_skill.split()
+        score = sum(1 for token in tokens if token and token in normalized_question)
+        if normalized_skill in normalized_question:
+            score = max(score, len(tokens) + 1)
+        if score > best_score:
+            best_score = score
+            best_skill = skill
+    return best_skill if best_score > 0 else None
+def _recent_focus_streak(question_texts: list[str], skill_pool: list[str]) -> tuple[str | None, int]:
+    active_skill = None
+    streak = 0
+    for text in reversed(question_texts):
+        skill = _infer_focus_skill_from_question(text, skill_pool)
+        if not skill:
+            break
+        if active_skill is None:
+            active_skill = skill
+            streak = 1
+            continue
+        if _question_fingerprint(skill) == _question_fingerprint(active_skill):
+            streak += 1
+            continue
+        break
+    return active_skill, streak
+def _pick_alternate_focus_skill(skill_pool: list[str], current_skill: str | None, seed: int) -> str | None:
+    if not skill_pool:
+        return None
+    if current_skill:
+        current_key = _question_fingerprint(current_skill)
+        alternatives = [skill for skill in skill_pool if _question_fingerprint(skill) != current_key]
+        if alternatives:
+            return alternatives[max(0, seed) % len(alternatives)]
+    return skill_pool[max(0, seed) % len(skill_pool)]
+def _apply_resume_followup_policy(
+    *,
+    skill_pool: list[str],
+    recent_focus_topic: str | None,
+    same_topic_streak: int,
+    suggested_question: str,
+    suggested_topic: str | None,
+    followup_need_score: int,
+    answered_count: int,
+) -> tuple[str, str | None]:
+    follow_text = (suggested_question or "").strip()
+    topic = (suggested_topic or "").strip()
+    if not topic and follow_text:
+        inferred = _infer_focus_skill_from_question(follow_text, skill_pool)
+        if inferred:
+            topic = inferred
+    topic_key = _question_fingerprint(topic)
+    recent_key = _question_fingerprint(recent_focus_topic or "")
+    if (
+        same_topic_streak >= MAX_SAME_TOPIC_FOLLOWUPS
+        and topic_key
+        and recent_key
+        and topic_key == recent_key
+        and _safe_score_0_100(followup_need_score) < THIRD_FOLLOWUP_NEED_SCORE
+    ):
+        return "", _pick_alternate_focus_skill(skill_pool, recent_focus_topic, answered_count)
+    return follow_text, None
 def _avg_recent_answer_words(qa_pairs: list, window: int = 3) -> int:
     if not qa_pairs:
         return 0
 def _build_resume_intro_question(role_title: str, jd_title: str) -> str:
     role = (role_title or "this role").strip()
+    title = (jd_title or "").strip()
+    def _normalized_key(value: str) -> str:
+        key = re.sub(r"[^a-z0-9\s]", " ", (value or "").lower())
+        key = re.sub(r"\s+", " ", key).strip()
+        for prefix in ("the ", "an ", "a "):
+            if key.startswith(prefix):
+                key = key[len(prefix):].strip()
+                break
+        return key
+    role_clean = re.sub(r"\s+", " ", role).strip()
+    if role_clean.lower().startswith("the "):
+        role_clean = role_clean[4:].strip()
+    role_phrase = f"the {role_clean}" if role_clean.lower().endswith(" role") else f"the {role_clean} role"
+    role_key = _normalized_key(role_clean)
+    title_key = _normalized_key(title)
+    is_generic_title = title_key in {
+        "",
+        "selected job description",
+        role_key,
+        f"{role_key} role",
+    }
+    if is_generic_title:
+        return f"Introduce yourself and explain how your background aligns with {role_phrase}."
+    title_phrase = title if title.lower().startswith(("the ", "an ", "a ")) else f"the {title}"
     return (
+        f"Introduce yourself and explain how your background aligns with {role_phrase} "
+        f"in {title_phrase} job description."
     )
+def _build_resume_resilient_followup_question(
+    session: dict,
+    question_number: int,
+    variant: int = 0,
+    focus_skill: str | None = None,
+) -> str:
     role_title = (session.get("role_title") or "this role").strip()
+    skill_pool = _resume_skill_pool(session)
     index = max(0, question_number - 1) + max(0, variant)
+    skill = (focus_skill or "").strip() or skill_pool[index % len(skill_pool)]
     templates = [
+        "Describe a real project where you applied {skill} for {role}. What constraints and trade-offs shaped your design?",
+        "If {skill} failed in production for a {role} workflow, how would you debug it step by step?",
+        "Explain how you would test and validate a solution using {skill} before shipping it for {role}.",
+        "Compare two approaches for {skill} in a {role} context and justify the final choice.",
+        "Design an improvement plan to make your {skill} implementation more scalable and reliable for {role}.",
+        "Your {role} service using {skill} has intermittent latency spikes. How would you investigate and stabilize it?",
+        "During code review, what risks would you look for in a {skill} implementation for {role}, and why?",
+        "How would you design rollback and observability for a feature centered on {skill} in {role}?",
+        "Assume two engineers propose different {skill} strategies for {role}. How would you evaluate and choose between them?",
+        "What failure modes around {skill} are easiest to miss in {role}, and how would you proactively test them?",
     ]
     template = templates[index % len(templates)]
+    return template.format(skill=skill, role=role_title)
 def _build_topic_resilient_followup_question(session: dict, question_number: int, variant: int = 0) -> str:
     index = max(0, question_number - 1) + max(0, variant)
     templates = [
+        "Explain {topic} with a practical example from a production-like scenario.",
+        "What are the most common failure patterns in {topic}, and how would you detect them early?",
+        "Design a step-by-step implementation plan for {topic} with measurable checkpoints.",
+        "Compare two approaches in {topic}, including trade-offs in scalability, latency, and maintainability.",
+        "If a {topic} solution regressed after deployment, how would you triage and recover safely?",
     ]
     template = templates[index % len(templates)]
+    return template.format(topic=topic_name)
 async def _enqueue_resume_followup_with_fallback(
     suggested_text: str,
     suggested_difficulty: str,
     suggested_category: str,
+    focus_skill_override: str | None = None,
 ) -> tuple[str | None, bool]:
     candidates: list[tuple[str, str, str, bool]] = []
+    existing_questions = await _get_session_question_texts(redis, session_id)
     primary = (suggested_text or "").strip()
     if primary:
             session=session,
             question_number=question_number,
             variant=variant,
+            focus_skill=focus_skill_override,
         )
         candidates.append((fallback_text, "medium", "resume-fallback", False))
     seen: set[str] = set()
     for text, difficulty, category, is_primary in candidates:
+        normalized_text = normalize_question_text(text)
+        if _is_question_too_similar(normalized_text, existing_questions):
+            continue
+        key = _question_fingerprint(normalized_text)
         if not key or key in seen:
             continue
         seen.add(key)
         qid = await enqueue_question(
             redis=redis,
             session_id=session_id,
+            question=normalized_text,
             difficulty=difficulty,
             category=category,
             ttl_seconds=SESSION_TTL,
             max_queue_size=MAX_QUEUE_SIZE,
         )
         if qid:
+            existing_questions.append(normalized_text)
             return qid, is_primary
     return None, False
     return output
+async def _get_answered_question_texts(redis, session_id: str, limit: int = 4) -> list[str]:
+    answer_ids = await redis.lrange(f"session:{session_id}:answers", -max(1, limit), -1)
+    output: list[str] = []
+    for qid in answer_ids:
+        answer_data = await redis.hgetall(f"session:{session_id}:a:{qid}")
+        text = (answer_data.get("question") or "").strip()
+        if not text:
+            q = await redis.hgetall(f"session:{session_id}:q:{qid}")
+            text = (q.get("question") or "").strip()
+        if text:
+            output.append(text)
+    return output
 async def _sample_topic_questions(
     db,
     topic_id: str,
 async def _append_batch_to_redis(redis, session_id: str, batch: list[dict]) -> list[str]:
     created_ids: list[str] = []
     for item in batch:
+        normalized_question = normalize_question_text(item.get("question", "Can you explain your approach?"))
+        if not normalized_question:
+            continue
         qid = generate_id()
         created_ids.append(qid)
         await redis.hset(
             f"session:{session_id}:q:{qid}",
             mapping={
                 "question_id": qid,
+                "question": normalized_question,
                 "difficulty": item.get("difficulty", "medium"),
                 "category": item.get("category", "general"),
             },
         f"session:{session_id}:q:{first_id}",
         mapping={
             "question_id": first_id,
+            "question": normalize_question_text(first_question.get("question", "Can you explain this topic?")),
             "difficulty": first_question.get("difficulty", "medium"),
             "category": first_question.get("category", topic.get("name", "topic")),
         },
         },
         "question": {
             "question_id": first_id,
+            "question": normalize_question_text(first_question.get("question", "Can you explain this topic?")),
             "difficulty": first_question.get("difficulty", "medium"),
             "question_number": 1,
             "total_questions": TOPIC_TOTAL_QUESTIONS,
     skills_for_interview = build_interview_focus_skills(base_skills_for_interview) or list(jd_required_skills)
     intro_question = _build_resume_intro_question(role_title=role_title, jd_title=selected_jd.get("title", ""))
+    intro_question = normalize_question_text(intro_question)
     session_id = generate_id()
     _LOCAL_SUMMARIES[session_id] = ""
         if not session:
             return
+        skill_pool = _resume_skill_pool(session)
+        recent_answered_questions = await _get_answered_question_texts(
+            redis=redis,
+            session_id=session_id,
+            limit=4,
+        )
+        recent_focus_topic, same_topic_streak = _recent_focus_streak(
+            recent_answered_questions,
+            skill_pool,
+        )
         recent_context = await get_recent_context_items(
             redis=redis,
             session_id=session_id,
             current_question=question_text,
             current_answer=answer,
             excluded_questions=excluded_questions,
+            focus_topic=recent_focus_topic or "",
+            same_topic_streak=same_topic_streak,
         )
         await redis.hset(
         }
         generated_count = _safe_int(session.get("generated_count", 0))
+        follow_text, focus_skill_override = _apply_resume_followup_policy(
+            skill_pool=skill_pool,
+            recent_focus_topic=recent_focus_topic,
+            same_topic_streak=same_topic_streak,
+            suggested_question=(evaluation.get("followup_question") or "").strip(),
+            suggested_topic=(evaluation.get("followup_topic") or "").strip(),
+            followup_need_score=_safe_score_0_100(evaluation.get("followup_need_score", 0)),
+            answered_count=answered_count,
+        )
         if answered_count < max_questions and session.get("status") == "in_progress":
             qid, used_model_followup = await _enqueue_resume_followup_with_fallback(
                 redis=redis,
                 suggested_text=follow_text,
                 suggested_difficulty=evaluation.get("difficulty", "medium"),
                 suggested_category=evaluation.get("category", "follow-up"),
+                focus_skill_override=focus_skill_override,
             )
             if qid:
                 generated_count += 1
         mapping={
             "question_id": question_id,
             "answer": answer,
+            "question": current_question_text,
+            "difficulty": current_q.get("difficulty", "medium"),
+            "category": current_q.get("category", "general"),
             "submitted_at": utc_now(),
         },
     )
     await redis.expire(f"session:{session_id}:a:{question_id}", SESSION_TTL)
     await redis.expire(f"session:{session_id}:answers", SESSION_TTL)
+    await db[ANSWERS].update_one(
+        {
+            "session_id": session_id,
+            "question_id": question_id,
+            "user_id": session.get("user_id"),
+        },
+        {
+            "$set": {
+                "question": current_question_text,
+                "answer": answer,
+                "difficulty": current_q.get("difficulty", "medium"),
+                "category": current_q.get("category", "general"),
+                "stored_at": utc_now(),
+            }
+        },
+        upsert=True,
+    )
     question_count = _safe_int(session.get("question_count", 1))
     answered_count = _safe_int(session.get("answered_count", 0)) + 1
     served_count = _safe_int(session.get("served_count", 1))
     # Emergency fallback for rare queue-empty cases.
     if not next_question_id and interview_type == "resume":
+        skill_pool = _resume_skill_pool(session)
+        recent_answered_questions = await _get_answered_question_texts(
+            redis=redis,
+            session_id=session_id,
+            limit=4,
+        )
+        recent_focus_topic, same_topic_streak = _recent_focus_streak(
+            recent_answered_questions,
+            skill_pool,
+        )
         recent_context = await get_recent_context_items(
             redis=redis,
             session_id=session_id,
             current_question=current_question_text,
             current_answer=answer,
             excluded_questions=excluded_questions,
+            focus_topic=recent_focus_topic or "",
+            same_topic_streak=same_topic_streak,
         )
         await redis.hset(
             "bank_shortfall": 0,
             "generation_batches": 1,
         }
+        follow_text, focus_skill_override = _apply_resume_followup_policy(
+            skill_pool=skill_pool,
+            recent_focus_topic=recent_focus_topic,
+            same_topic_streak=same_topic_streak,
+            suggested_question=(fallback_evaluation.get("followup_question") or "").strip(),
+            suggested_topic=(fallback_evaluation.get("followup_topic") or "").strip(),
+            followup_need_score=_safe_score_0_100(fallback_evaluation.get("followup_need_score", 0)),
+            answered_count=answered_count,
+        )
         if answered_count < max_questions:
             qid, used_model_followup = await _enqueue_resume_followup_with_fallback(
                 redis=redis,
                 suggested_text=follow_text,
                 suggested_difficulty=fallback_evaluation.get("difficulty", "medium"),
                 suggested_category=fallback_evaluation.get("category", "follow-up"),
+                focus_skill_override=focus_skill_override,
             )
             if qid:
                 generated_count += 1
     """Get all Q&A pairs from Redis for a session."""
     redis = get_redis()
+    answer_ids = await redis.lrange(f"session:{session_id}:answers", 0, -1)
     qa_pairs = []
+    if answer_ids:
+        for qid in answer_ids:
+            q = await redis.hgetall(f"session:{session_id}:q:{qid}")
+            a = await redis.hgetall(f"session:{session_id}:a:{qid}")
+            if not a:
+                continue
+            question_text = (a.get("question") or q.get("question") or "").strip()
+            answer_text = (a.get("answer") or "").strip()
+            if not question_text or not answer_text:
+                continue
+            qa_pairs.append({
+                "question_id": qid,
+                "question": question_text,
+                "answer": answer_text,
+                "difficulty": a.get("difficulty") or q.get("difficulty", "medium"),
+                "category": a.get("category") or q.get("category", "general"),
+            })
+        if qa_pairs:
+            return qa_pairs
+    question_ids = await redis.lrange(f"session:{session_id}:questions", 0, -1)
     for qid in question_ids:
         q = await redis.hgetall(f"session:{session_id}:q:{qid}")
         a = await redis.hgetall(f"session:{session_id}:a:{qid}")

backend/services/queue_service.py CHANGED Viewed

@@ -9,14 +9,32 @@ QUESTION_QUEUE_SUFFIX = "question_queue"
 QUESTION_BACKLOG_SUFFIX = "question_backlog"
 CONTEXT_CACHE_SUFFIX = "context_cache"
 ASKED_SET_SUFFIX = "asked_questions_set"
 def _key(session_id: str, suffix: str) -> str:
     return f"session:{session_id}:{suffix}"
 def question_fingerprint(text: str) -> str:
-    value = (text or "").strip().lower()
     value = re.sub(r"[^a-z0-9\s]", " ", value)
     value = re.sub(r"\s+", " ", value).strip()
     return value
@@ -61,6 +79,7 @@ async def _append_question_object(
     category: str,
     ttl_seconds: int,
 ) -> str:
     qid = generate_id()
     q_key = f"session:{session_id}:q:{qid}"
@@ -68,7 +87,7 @@ async def _append_question_object(
         q_key,
         mapping={
             "question_id": qid,
-            "question": question,
             "difficulty": difficulty or "medium",
             "category": category or "general",
         },
@@ -90,7 +109,7 @@ async def enqueue_question(
     ttl_seconds: int = 7200,
     max_queue_size: int = 3,
 ) -> Optional[str]:
-    text = (question or "").strip()
     if not text:
         return None

 QUESTION_BACKLOG_SUFFIX = "question_backlog"
 CONTEXT_CACHE_SUFFIX = "context_cache"
 ASKED_SET_SUFFIX = "asked_questions_set"
+QUESTION_PREFIX_RE = re.compile(
+    r"^\s*(?:question|q)\s*#?\s*\d+(?:\s*of\s*\d+)?\s*[\:\-\)\.]\s*",
+    re.IGNORECASE,
+)
 def _key(session_id: str, suffix: str) -> str:
     return f"session:{session_id}:{suffix}"
+def normalize_question_text(text: str) -> str:
+    value = (text or "").strip()
+    if not value:
+        return ""
+    while True:
+        updated = QUESTION_PREFIX_RE.sub("", value).strip()
+        if updated == value:
+            break
+        value = updated
+    return value
 def question_fingerprint(text: str) -> str:
+    value = normalize_question_text(text).lower()
     value = re.sub(r"[^a-z0-9\s]", " ", value)
     value = re.sub(r"\s+", " ", value).strip()
     return value
     category: str,
     ttl_seconds: int,
 ) -> str:
+    normalized_question = normalize_question_text(question)
     qid = generate_id()
     q_key = f"session:{session_id}:q:{qid}"
         q_key,
         mapping={
             "question_id": qid,
+            "question": normalized_question,
             "difficulty": difficulty or "medium",
             "category": category or "general",
         },
     ttl_seconds: int = 7200,
     max_queue_size: int = 3,
 ) -> Optional[str]:
+    text = normalize_question_text(question)
     if not text:
         return None

backend/utils/gemini.py CHANGED Viewed

@@ -14,6 +14,29 @@ settings = get_settings()
 client = genai.Client(api_key=settings.GEMINI_API_KEY)
 def _is_transient_gemini_error(error: Exception) -> bool:
     message = str(error or "").lower()
     transient_markers = [
@@ -28,6 +51,26 @@ def _is_transient_gemini_error(error: Exception) -> bool:
     return any(marker in message for marker in transient_markers)
 async def call_gemini(
     prompt: str,
     system_instruction: str = None,
@@ -43,34 +86,51 @@ async def call_gemini(
     config["response_mime_type"] = "application/json"
     last_error = None
     attempts = max(1, int(max_attempts or 1))
     for attempt in range(attempts):
-        try:
-            def _invoke():
-                return client.models.generate_content(
-                    model=settings.GEMINI_MODEL,
-                    contents=prompt,
-                    config=config if config else None,
-                )
-            if request_timeout_seconds and request_timeout_seconds > 0:
-                response = await asyncio.wait_for(
-                    asyncio.to_thread(_invoke),
-                    timeout=request_timeout_seconds,
-                )
-            else:
-                response = await asyncio.to_thread(_invoke)
-            elapsed_ms = (perf_counter() - started_at) * 1000.0
-            await record_latency("gemini_ms", elapsed_ms)
-            return (response.text or "").strip()
-        except Exception as exc:
-            last_error = exc
-            if _is_transient_gemini_error(exc) and attempt < attempts - 1:
-                await asyncio.sleep(0.8 * (attempt + 1))
-                continue
-            break
     elapsed_ms = (perf_counter() - started_at) * 1000.0
     await record_latency("gemini_ms", elapsed_ms)
@@ -705,53 +765,158 @@ No markdown, no extra text."""
 async def evaluate_interview(questions_and_answers: list, role_title: str) -> dict:
     """Batch evaluate all interview Q&A pairs using Gemini."""
-    qa_text = ""
     for i, qa in enumerate(questions_and_answers, 1):
-        qa_text += f"\nQ{i}: {qa['question']}\nA{i}: {qa['answer']}\n"
     prompt_template = PromptTemplate.from_template(
-                """You are a strict technical interviewer evaluating a candidate for the role: {role_title}.
-Here are the interview questions and the candidate's answers:
-{qa_text}
-Scoring policy (concept-first, strict):
-1. Score primarily on conceptual correctness, depth, and reasoning quality.
-2. Do NOT reward answer length, confidence, or communication style when concepts are wrong.
-3. Penalize vague, hand-wavy, or uncertain answers.
-4. Penalize technically incorrect claims even if explanation sounds fluent.
-5. Reward precise mechanisms, trade-offs, edge cases, and debugging logic.
-Score rubric per answer:
-- 90-100: conceptually correct, deep, and accurate with strong reasoning
-- 70-89: mostly correct with minor conceptual gaps
-- 50-69: partially correct but misses key mechanisms
-- 30-49: shallow/vague with major conceptual gaps
-- 0-29: incorrect or off-topic
-Return a JSON object with:
-- "overall_score": integer from 0-100
-- "detailed_scores": list of objects, each with:
-    - "question": the question text
-    - "answer": the answer text
-    - "score": integer 0-100
-    - "feedback": concise concept-focused feedback for this answer
-- "strengths": list of 3-5 strength areas
-- "weaknesses": list of 3-5 concept gaps
-- "recommendations": list of 3-5 actionable concept-improvement recommendations
-Return ONLY valid JSON, no markdown formatting."""
-        )
-    prompt = prompt_template.format(role_title=role_title, qa_text=qa_text)
     try:
-        result = _extract_json_object(await call_gemini(prompt))
-        return json.loads(result)
     except Exception:
         return {
-            "overall_score": 50,
-            "detailed_scores": [],
-            "strengths": ["Unable to evaluate"],
-            "weaknesses": ["Unable to evaluate"],
-            "recommendations": ["Please retry the interview"],
         }

 client = genai.Client(api_key=settings.GEMINI_API_KEY)
+def _extract_response_text(response) -> str:
+    text = (getattr(response, "text", None) or "").strip()
+    if text:
+        return text
+    try:
+        candidates = getattr(response, "candidates", None) or []
+        for candidate in candidates:
+            content = getattr(candidate, "content", None)
+            parts = getattr(content, "parts", None) or []
+            gathered = []
+            for part in parts:
+                part_text = getattr(part, "text", None)
+                if isinstance(part_text, str) and part_text.strip():
+                    gathered.append(part_text.strip())
+            if gathered:
+                return "\n".join(gathered).strip()
+    except Exception:
+        return ""
+    return ""
 def _is_transient_gemini_error(error: Exception) -> bool:
     message = str(error or "").lower()
     transient_markers = [
     return any(marker in message for marker in transient_markers)
+def _candidate_gemini_models() -> list[str]:
+    configured = [
+        item.strip()
+        for item in (getattr(settings, "GEMINI_FALLBACK_MODELS", "") or "").split(",")
+        if item and item.strip()
+    ]
+    defaults = ["gemini-2.0-flash", "gemini-2.0-flash-lite", "gemini-flash-latest"]
+    ordered = [settings.GEMINI_MODEL, *configured, *defaults]
+    seen: set[str] = set()
+    unique: list[str] = []
+    for model in ordered:
+        key = (model or "").strip()
+        if not key or key in seen:
+            continue
+        seen.add(key)
+        unique.append(key)
+    return unique
 async def call_gemini(
     prompt: str,
     system_instruction: str = None,
     config["response_mime_type"] = "application/json"
     last_error = None
+    model_candidates = _candidate_gemini_models()
     attempts = max(1, int(max_attempts or 1))
     for attempt in range(attempts):
+        for model_name in model_candidates:
+            try:
+                def _invoke():
+                    return client.models.generate_content(
+                        model=model_name,
+                        contents=prompt,
+                        config=config if config else None,
+                    )
+                if request_timeout_seconds and request_timeout_seconds > 0:
+                    response = await asyncio.wait_for(
+                        asyncio.to_thread(_invoke),
+                        timeout=request_timeout_seconds,
+                    )
+                else:
+                    response = await asyncio.to_thread(_invoke)
+                response_text = _extract_response_text(response)
+                if not response_text:
+                    raise RuntimeError("Gemini returned an empty response")
+                elapsed_ms = (perf_counter() - started_at) * 1000.0
+                await record_latency("gemini_ms", elapsed_ms)
+                return response_text
+            except Exception as exc:
+                last_error = exc
+                # Try next model candidate immediately on transient/unavailable errors.
+                if _is_transient_gemini_error(exc):
+                    continue
+                # Model-not-found style errors should try the next candidate too.
+                message = str(exc or "").lower()
+                if "not found" in message or "unsupported" in message:
+                    continue
+                break
+        if _is_transient_gemini_error(last_error) and attempt < attempts - 1:
+            await asyncio.sleep(0.8 * (attempt + 1))
+            continue
+        break
     elapsed_ms = (perf_counter() - started_at) * 1000.0
     await record_latency("gemini_ms", elapsed_ms)
 async def evaluate_interview(questions_and_answers: list, role_title: str) -> dict:
     """Batch evaluate all interview Q&A pairs using Gemini."""
+    def _clamp_score(value, default: int = 50) -> int:
+        try:
+            score = int(value)
+        except Exception:
+            score = default
+        return max(0, min(100, score))
+    def _fallback_item_score(answer: str) -> int:
+        text = (answer or "").strip().lower()
+        words = len(text.split())
+        if words < 10:
+            return 35
+        if words < 25:
+            return 52
+        if any(marker in text for marker in ["not sure", "maybe", "i think", "dont know", "don't know"]):
+            return 50
+        if words > 90:
+            return 74
+        return 64
+    if not questions_and_answers:
+        return {
+            "overall_score": 50,
+            "detailed_scores": [],
+            "strengths": ["No answers were available for evaluation"],
+            "weaknesses": ["No answers were available for evaluation"],
+            "recommendations": ["Complete the interview and generate report again"],
+        }
+    compact_qa = []
     for i, qa in enumerate(questions_and_answers, 1):
+        question = (qa.get("question") or "").strip()
+        answer = (qa.get("answer") or "").strip()
+        compact_qa.append(
+            {
+                "index": i,
+                "question": question[:260],
+                "answer": answer[:520],
+            }
+        )
+    payload = {
+        "role_title": role_title,
+        "question_count": len(compact_qa),
+        "qa": compact_qa,
+    }
     prompt_template = PromptTemplate.from_template(
+        """You are a strict technical interviewer evaluating a candidate for role: {role_title}.
+Input JSON:
+{payload}
+Scoring policy:
+1) Score conceptual correctness and depth, not verbosity.
+2) Penalize vague, uncertain, or incorrect technical claims.
+3) Reward concrete reasoning, trade-offs, and debugging clarity.
+Return ONLY valid JSON object with this exact schema:
+{{
+  "overall_score": 0-100 integer,
+  "per_question": [
+    {{"index": 1-based integer, "score": 0-100 integer, "feedback": "short concept-focused feedback"}}
+  ],
+  "strengths": ["3 to 5 concise points"],
+  "weaknesses": ["3 to 5 concise points"],
+  "recommendations": ["3 to 5 actionable points"]
+}}
+Rules:
+- per_question must include every question index from 1..question_count exactly once.
+- Do NOT echo full question or answer text in output.
+- Keep each feedback under 220 characters.
+"""
+    )
+    prompt = prompt_template.format(
+        role_title=role_title,
+        payload=json.dumps(payload, ensure_ascii=True),
+    )
+    parsed = None
     try:
+        result = _extract_json_object(
+            await call_gemini(
+                prompt,
+                max_attempts=3,
+                request_timeout_seconds=45,
+            )
+        )
+        parsed = json.loads(result)
     except Exception:
+        parsed = None
+    score_map: dict[int, tuple[int, str]] = {}
+    if isinstance(parsed, dict):
+        for item in parsed.get("per_question", []) or []:
+            if not isinstance(item, dict):
+                continue
+            idx = item.get("index")
+            try:
+                index = int(idx)
+            except Exception:
+                continue
+            if index < 1 or index > len(questions_and_answers):
+                continue
+            score = _clamp_score(item.get("score"), _fallback_item_score(questions_and_answers[index - 1].get("answer", "")))
+            feedback = (item.get("feedback") or "").strip() or "Answer reviewed with focus on conceptual correctness."
+            score_map[index] = (score, feedback)
+    detailed_scores = []
+    for index, qa in enumerate(questions_and_answers, 1):
+        fallback_score = _fallback_item_score(qa.get("answer", ""))
+        score, feedback = score_map.get(
+            index,
+            (fallback_score, "Could not derive detailed AI feedback for this answer; score based on response quality signals."),
+        )
+        detailed_scores.append(
+            {
+                "question": qa.get("question", ""),
+                "answer": qa.get("answer", ""),
+                "score": score,
+                "feedback": feedback,
+            }
+        )
+    if isinstance(parsed, dict):
+        overall_score = _clamp_score(parsed.get("overall_score"), int(round(sum(item["score"] for item in detailed_scores) / max(1, len(detailed_scores)))))
+        strengths = [str(s).strip() for s in (parsed.get("strengths") or []) if str(s).strip()][:5]
+        weaknesses = [str(w).strip() for w in (parsed.get("weaknesses") or []) if str(w).strip()][:5]
+        recommendations = [str(r).strip() for r in (parsed.get("recommendations") or []) if str(r).strip()][:5]
+        if not strengths:
+            strengths = ["Shows baseline understanding in parts of the discussion"]
+        if not weaknesses:
+            weaknesses = ["Needs deeper concept-level reasoning and sharper technical precision"]
+        if not recommendations:
+            recommendations = ["Practice answering with mechanisms, trade-offs, and one concrete production example per question"]
         return {
+            "overall_score": overall_score,
+            "detailed_scores": detailed_scores,
+            "strengths": strengths,
+            "weaknesses": weaknesses,
+            "recommendations": recommendations,
         }
+    fallback_overall = int(round(sum(item["score"] for item in detailed_scores) / max(1, len(detailed_scores))))
+    return {
+        "overall_score": _clamp_score(fallback_overall, 50),
+        "detailed_scores": detailed_scores,
+        "strengths": ["Attempted responses for all interview prompts"],
+        "weaknesses": ["Detailed AI evaluation was unavailable for this run"],
+        "recommendations": ["Retry report generation to get full AI feedback"],
+    }