Spaces:

sajith-0701
/

interviewbot

Sleeping

File size: 20,804 Bytes

from google import genai
from config import get_settings
from utils.skills import normalize_skill_list
import asyncio
import json
import re
from langchain_core.prompts import PromptTemplate

settings = get_settings()

client = genai.Client(api_key=settings.GEMINI_API_KEY)


def _is_transient_gemini_error(error: Exception) -> bool:
    message = str(error or "").lower()
    transient_markers = [
        "503",
        "unavailable",
        "resource_exhausted",
        "high demand",
        "deadline",
        "timed out",
        "timeout",
    ]
    return any(marker in message for marker in transient_markers)


async def call_gemini(prompt: str, system_instruction: str = None) -> str:
    """Call Gemini API with a prompt and optional system instruction."""
    config = {}
    if system_instruction:
        config["system_instruction"] = system_instruction
    config["response_mime_type"] = "application/json"

    last_error = None
    max_attempts = 3
    for attempt in range(max_attempts):
        try:
            response = client.models.generate_content(
                model=settings.GEMINI_MODEL,
                contents=prompt,
                config=config if config else None,
            )
            return (response.text or "").strip()
        except Exception as exc:
            last_error = exc
            if _is_transient_gemini_error(exc) and attempt < max_attempts - 1:
                await asyncio.sleep(0.8 * (attempt + 1))
                continue
            break

    raise RuntimeError(f"Gemini request failed: {last_error}")


def _extract_json_object(text: str) -> str:
    value = (text or "").strip()
    if value.startswith("```"):
        value = value.split("\n", 1)[1]
    if value.endswith("```"):
        value = value.rsplit("```", 1)[0]
    value = value.strip()

    if value.startswith("{") and value.endswith("}"):
        return value

    # Fallback when model wraps JSON with extra text.
    start = value.find("{")
    end = value.rfind("}")
    if start != -1 and end != -1 and end > start:
        return value[start:end + 1]

    return value


def _fallback_skill_scan(resume_text: str) -> list:
    common = [
        "python", "java", "javascript", "typescript", "react", "next.js", "node.js",
        "fastapi", "django", "flask", "spring", "mongodb", "postgresql", "mysql",
        "redis", "docker", "kubernetes", "aws", "gcp", "azure", "git", "linux",
        "rest api", "graphql", "machine learning", "data analysis", "sql",
    ]
    text = (resume_text or "").lower()
    found = []
    for skill in common:
        pattern = r"\b" + re.escape(skill.lower()) + r"\b"
        if re.search(pattern, text):
            found.append(skill)
    return normalize_skill_list(found)


def _is_loose_answer(answer: str) -> bool:
    text = (answer or "").strip().lower()
    if not text:
        return True

    word_count = len(text.split())
    if word_count < 18:
        return True

    weak_markers = [
        "i think",
        "maybe",
        "not sure",
        "dont know",
        "don't know",
        "something like",
        "etc",
        "kind of",
        "sort of",
    ]
    return any(marker in text for marker in weak_markers)


def _collect_loose_qa(qa_pairs: list, limit: int = 4) -> list:
    loose = []
    for qa in reversed(qa_pairs or []):
        question = (qa or {}).get("question", "")
        answer = (qa or {}).get("answer", "")
        if not question or not answer:
            continue
        if _is_loose_answer(answer):
            loose.append({"question": question, "answer": answer})
        if len(loose) >= limit:
            break
    loose.reverse()
    return loose


async def parse_resume_with_gemini(resume_text: str) -> dict:
    """Parse resume text and extract structured data using Gemini."""
    prompt = f"""Analyze the following resume and extract structured information.
CRITICAL INSTRUCTION FOR SKILLS:
1) Extract concrete tools/technologies/frameworks/languages from the resume text.
2) Exclude vague traits such as "hardworking", "leadership", "problem solving", "communication".
3) If a line contains multiple skills (comma-separated), split them into separate list items.
4) Do NOT add skills that are not present in the resume.

Return a JSON object with these exact fields:
- "name": full name of the candidate (string or null)
- "email": candidate's email address (string or null)
- "phone": candidate's phone number (string or null)
- "location": candidate's location/address (string or null)
- "skills": list of technical and soft skills verbatim from the text (array of strings)
- "recommended_roles": list of 3-5 recommended job role titles the user is qualified for based on these skills (array of strings)
- "experience_summary": brief summary of work experience (string)
- "experience": list of dictionaries, each with "company", "role", "duration", and "description"
- "education": list of dictionaries, each with "institution", "degree", "graduation_year"
- "projects": list of dictionaries, each with "name" and "description"

Resume text:
---
{resume_text}
---

Return ONLY valid JSON, no markdown formatting."""

    try:
        result = await call_gemini(prompt)
        result = _extract_json_object(result)
    except Exception:
        return {
            "name": None,
            "email": None,
            "phone": None,
            "location": None,
            "skills": _fallback_skill_scan(resume_text),
            "recommended_roles": [],
            "experience_summary": "Unable to parse with AI right now. Please retry.",
            "experience": [],
            "education": [],
            "projects": [],
        }

    try:
        parsed = json.loads(result)
        parsed.setdefault("name", None)
        parsed.setdefault("email", None)
        parsed.setdefault("phone", None)
        parsed.setdefault("location", None)
        parsed.setdefault("recommended_roles", [])
        parsed.setdefault("experience_summary", "")
        parsed.setdefault("experience", [])
        parsed.setdefault("education", [])
        parsed.setdefault("projects", [])

        parsed["skills"] = normalize_skill_list(parsed.get("skills", []))
        if not parsed["skills"]:
            parsed["skills"] = _fallback_skill_scan(resume_text)
        return parsed
    except json.JSONDecodeError:
        return {
            "name": None,
            "email": None,
            "phone": None,
            "location": None,
            "skills": _fallback_skill_scan(resume_text),
            "recommended_roles": [],
            "experience_summary": result, 
            "experience": [],
            "education": [], 
            "projects": []
        }


async def analyze_resume_vs_job_description(
    role_title: str,
    resume_skills: list,
    resume_summary: str,
    jd_title: str,
    jd_description: str,
    jd_required_skills: list | None = None,
) -> dict:
    """Compare resume and job description to produce interview guidance."""
    jd_required_skills = jd_required_skills or []
    prompt = f"""You are an interview coach helping a student prepare for a job.

Role title: {role_title}
Job Description Title: {jd_title}
Job Description Text:
---
{jd_description}
---

Job Description Required Skills (if provided): {json.dumps(jd_required_skills)}

Student Resume Skills: {json.dumps(resume_skills)}
Student Resume Summary:
---
{resume_summary}
---

Return ONLY valid JSON with this structure:
{{
  "meeting_expectations": ["..."],
  "missing_expectations": ["..."],
  "improvement_suggestions": ["..."],
  "fit_summary": "short summary"
}}

Rules:
1) Be practical and concise.
2) Mention what already matches first.
3) Missing expectations should be specific and skill/experience-oriented.
4) Suggestions should be actionable and student-friendly.
5) Avoid harsh wording.
"""

    try:
        result = _extract_json_object(await call_gemini(prompt))
        parsed = json.loads(result)
        return {
            "meeting_expectations": parsed.get("meeting_expectations", [])[:10],
            "missing_expectations": parsed.get("missing_expectations", [])[:10],
            "improvement_suggestions": parsed.get("improvement_suggestions", [])[:10],
            "fit_summary": parsed.get("fit_summary", ""),
        }
    except Exception:
        resume_set = {s.lower() for s in normalize_skill_list(resume_skills)}
        required = normalize_skill_list(jd_required_skills)
        missing = [s for s in required if s.lower() not in resume_set]
        met = [s for s in required if s.lower() in resume_set]
        return {
            "meeting_expectations": met[:6],
            "missing_expectations": missing[:6],
            "improvement_suggestions": [
                "Build 1-2 focused projects aligned with missing JD skills.",
                "Use STAR-style examples for your strongest matching skills.",
                "Revise resume bullets to highlight measurable impact.",
            ],
            "fit_summary": "You match some expectations and can improve fit by addressing the missing skills.",
        }


async def generate_interview_question(
    skills: list,
    role_title: str,
    previous_questions: list = None,
    previous_answer: str = None,
    difficulty: str = "medium",
    question_stage: str = "deep",
    foundation_limit: int = 3,
) -> dict:
    """Generate an interview question using Gemini."""
    context = f"Role: {role_title}\nCandidate Skill Focus Areas: {', '.join(skills)}\nDifficulty: {difficulty}"
    context += f"\nCurrent Stage: {question_stage}"
    context += f"\nFoundation Question Limit: {foundation_limit}"

    if previous_questions:
        context += f"\n\nPrevious questions asked (do NOT repeat these):\n"
        for i, q in enumerate(previous_questions, 1):
            context += f"{i}. {q}\n"

    if previous_answer:
        context += f"\nCandidate's last answer: {previous_answer}"
        context += "\nGenerate a follow-up question based on this answer to probe deeper."

    prompt_template = PromptTemplate.from_template(
        """{context}

Generate ONE interview question for this candidate. The question should:
1. Be relevant to the role and candidate's skills
2. Match the {difficulty} difficulty level
3. Be clear and specific
4. Test practical knowledge
5. If a skill is a cluster label like "Deep Learning (CNN, LSTM)", pick one member skill from that cluster and ask a concrete question on it
6. Rotate topics to avoid repeatedly asking from the same cluster
7. If Current Stage is "foundation": ask only core/fundamental basics
8. If Current Stage is "deep": DO NOT ask basic definition/foundation questions; ask applied, scenario-based, debugging, optimization, or trade-off questions only
9. Treat Foundation Question Limit as a strict cap: once foundation stage is done, never return to foundation-style prompts

Return ONLY a JSON object with:
- "question": the interview question text
- "difficulty": "{difficulty}"
- "category": the skill category this tests

Return ONLY valid JSON, no markdown formatting."""
    )
    prompt = prompt_template.format(context=context, difficulty=difficulty)

    try:
        result = _extract_json_object(await call_gemini(prompt))
        return json.loads(result)
    except Exception:
        return {
            "question": f"Tell me about your experience with {skills[0] if skills else 'software development'}.",
            "difficulty": difficulty,
            "category": "general",
        }


async def generate_interview_question_batch(
    skills: list,
    role_title: str,
    count: int,
    start_question_number: int = 1,
    previous_questions: list = None,
    foundation_limit: int = 3,
) -> list:
    """Generate a batch of interview questions in a single Gemini call."""
    previous_questions = previous_questions or []
    count = max(0, int(count or 0))
    if count == 0:
        return []

    plan = []
    for i in range(count):
        qn = start_question_number + i
        difficulty = "easy" if qn <= foundation_limit else ("medium" if qn <= foundation_limit + 3 else "hard")
        stage = "foundation" if qn <= foundation_limit else "deep"
        plan.append({"question_number": qn, "difficulty": difficulty, "stage": stage})

    context = (
        f"Role: {role_title}\n"
        f"Candidate Skill Focus Areas: {', '.join(skills)}\n"
        f"Question Plan: {json.dumps(plan)}\n"
        f"Foundation Question Limit: {foundation_limit}"
    )

    if previous_questions:
        context += "\n\nPrevious questions asked (do NOT repeat these):\n"
        for i, q in enumerate(previous_questions, 1):
            context += f"{i}. {q}\n"

    prompt_template = PromptTemplate.from_template(
        """{context}

Generate exactly {count} interview questions as a JSON array where each item follows the corresponding Question Plan entry.

Rules:
1. Questions must be relevant to the role and listed skills.
2. Do not repeat or rephrase previous questions.
3. If stage is "foundation": ask only core fundamentals.
4. If stage is "deep": ask applied/scenario/debugging/trade-off questions only.
5. Rotate topics across skills to avoid repetitive focus.
6. If a skill is a cluster label like "Deep Learning (CNN, LSTM)", ask about one concrete member skill.

Return ONLY valid JSON array with objects of shape:
- "question": string
- "difficulty": one of "easy" | "medium" | "hard"
- "category": string

Return ONLY JSON, no markdown."""
    )
    prompt = prompt_template.format(context=context, count=count)

    try:
        result = (await call_gemini(prompt)).strip()
        data = json.loads(result)
        if not isinstance(data, list):
            raise ValueError("Batch response is not a list")
        normalized = []
        for i, item in enumerate(data[:count]):
            spec = plan[i]
            if not isinstance(item, dict):
                item = {}
            normalized.append(
                {
                    "question": item.get("question") or f"Explain your approach for {skills[0] if skills else 'this topic'}.",
                    "difficulty": item.get("difficulty") if item.get("difficulty") in {"easy", "medium", "hard"} else spec["difficulty"],
                    "category": item.get("category") or "general",
                }
            )
        while len(normalized) < count:
            spec = plan[len(normalized)]
            normalized.append(
                {
                    "question": f"Tell me about your experience with {skills[0] if skills else 'software development'}.",
                    "difficulty": spec["difficulty"],
                    "category": "general",
                }
            )
        return normalized
    except Exception:
        fallback = []
        for i in range(count):
            spec = plan[i]
            fallback.append(
                {
                    "question": f"Tell me about your experience with {skills[0] if skills else 'software development'}.",
                    "difficulty": spec["difficulty"],
                    "category": "general",
                }
            )
        return fallback


async def generate_followup_question_batch_from_qa(
    role_title: str,
    skills: list,
    qa_pairs: list,
    previous_questions: list,
    count: int,
    difficulty: str = "medium",
) -> list:
    """Generate follow-up questions from interview Q&A context in a single Gemini call."""
    count = max(0, int(count or 0))
    if count == 0:
        return []

    compact_qa = []
    for qa in qa_pairs[-8:]:
        q = (qa or {}).get("question", "")
        a = (qa or {}).get("answer", "")
        if q and a:
            compact_qa.append({"question": q, "answer": a})

    payload = {
        "role_title": role_title,
        "skills": skills,
        "difficulty": difficulty,
        "count": count,
        "answered_qa": compact_qa,
        "loose_qa": _collect_loose_qa(qa_pairs),
        "previous_questions": previous_questions,
    }

    prompt_template = PromptTemplate.from_template(
        """You are generating strict, concept-focused technical interview follow-up questions.

Input JSON:
{payload}

Instructions:
1. Generate exactly {count} follow-up questions using answered_qa context.
    2. Questions must continue naturally from candidate's previous answers.
3. Do not repeat or paraphrase any question in previous_questions.
    4. Prioritize loose_qa first: if any answer is vague/short/uncertain, ask a direct follow-up that probes missing concept depth.
    5. Focus on concept validation (why, how, trade-offs, failure modes), not memorized definitions.
    6. Keep questions practical and role-relevant.
    7. Use difficulty {difficulty}. Do not output easy/basic-level questions.

Return ONLY valid JSON array with objects:
- "question": string
- "difficulty": "easy" | "medium" | "hard"
- "category": string

No markdown, no extra text."""
    )
    prompt = prompt_template.format(
        payload=json.dumps(payload, ensure_ascii=True),
        count=count,
        difficulty=difficulty,
    )

    try:
        result = (await call_gemini(prompt)).strip()
        data = json.loads(result)
        if not isinstance(data, list):
            raise ValueError("Follow-up batch response is not a list")

        normalized = []
        for item in data[:count]:
            if not isinstance(item, dict):
                item = {}
            normalized.append(
                {
                    "question": item.get("question") or f"Can you explain your approach for {skills[0] if skills else 'this scenario'}?",
                    "difficulty": item.get("difficulty") if item.get("difficulty") in {"easy", "medium", "hard"} else difficulty,
                    "category": item.get("category") or "follow-up",
                }
            )

        while len(normalized) < count:
            normalized.append(
                {
                    "question": f"Can you explain your approach for {skills[0] if skills else 'this scenario'}?",
                    "difficulty": difficulty,
                    "category": "follow-up",
                }
            )
        return normalized
    except Exception:
        fallback = []
        for _ in range(count):
            fallback.append(
                {
                    "question": f"Can you explain your approach for {skills[0] if skills else 'this scenario'}?",
                    "difficulty": difficulty,
                    "category": "follow-up",
                }
            )
        return fallback


async def evaluate_interview(questions_and_answers: list, role_title: str) -> dict:
    """Batch evaluate all interview Q&A pairs using Gemini."""
    qa_text = ""
    for i, qa in enumerate(questions_and_answers, 1):
        qa_text += f"\nQ{i}: {qa['question']}\nA{i}: {qa['answer']}\n"

    prompt_template = PromptTemplate.from_template(
                """You are a strict technical interviewer evaluating a candidate for the role: {role_title}.

Here are the interview questions and the candidate's answers:
{qa_text}

Scoring policy (concept-first, strict):
1. Score primarily on conceptual correctness, depth, and reasoning quality.
2. Do NOT reward answer length, confidence, or communication style when concepts are wrong.
3. Penalize vague, hand-wavy, or uncertain answers.
4. Penalize technically incorrect claims even if explanation sounds fluent.
5. Reward precise mechanisms, trade-offs, edge cases, and debugging logic.

Score rubric per answer:
- 90-100: conceptually correct, deep, and accurate with strong reasoning
- 70-89: mostly correct with minor conceptual gaps
- 50-69: partially correct but misses key mechanisms
- 30-49: shallow/vague with major conceptual gaps
- 0-29: incorrect or off-topic

Return a JSON object with:
- "overall_score": integer from 0-100
- "detailed_scores": list of objects, each with:
    - "question": the question text
    - "answer": the answer text
    - "score": integer 0-100
    - "feedback": concise concept-focused feedback for this answer
- "strengths": list of 3-5 strength areas
- "weaknesses": list of 3-5 concept gaps
- "recommendations": list of 3-5 actionable concept-improvement recommendations

Return ONLY valid JSON, no markdown formatting."""
        )
    prompt = prompt_template.format(role_title=role_title, qa_text=qa_text)

    try:
        result = _extract_json_object(await call_gemini(prompt))
        return json.loads(result)
    except Exception:
        return {
            "overall_score": 50,
            "detailed_scores": [],
            "strengths": ["Unable to evaluate"],
            "weaknesses": ["Unable to evaluate"],
            "recommendations": ["Please retry the interview"],
        }