Spaces:

sajith-0701
/

interviewbot

Running

App Files Files Community

interviewbot / backend /utils /gemini.py

sajith-0701

v2.1

d50ee26 about 2 months ago

raw

history blame

20.8 kB

	from google import genai
	from config import get_settings
	from utils.skills import normalize_skill_list
	import asyncio
	import json
	import re
	from langchain_core.prompts import PromptTemplate

	settings = get_settings()

	client = genai.Client(api_key=settings.GEMINI_API_KEY)


	def _is_transient_gemini_error(error: Exception) -> bool:
	message = str(error or "").lower()
	transient_markers = [
	"503",
	"unavailable",
	"resource_exhausted",
	"high demand",
	"deadline",
	"timed out",
	"timeout",
	]
	return any(marker in message for marker in transient_markers)


	async def call_gemini(prompt: str, system_instruction: str = None) -> str:
	"""Call Gemini API with a prompt and optional system instruction."""
	config = {}
	if system_instruction:
	config["system_instruction"] = system_instruction
	config["response_mime_type"] = "application/json"

	last_error = None
	max_attempts = 3
	for attempt in range(max_attempts):
	try:
	response = client.models.generate_content(
	model=settings.GEMINI_MODEL,
	contents=prompt,
	config=config if config else None,
	)
	return (response.text or "").strip()
	except Exception as exc:
	last_error = exc
	if _is_transient_gemini_error(exc) and attempt < max_attempts - 1:
	await asyncio.sleep(0.8 * (attempt + 1))
	continue
	break

	raise RuntimeError(f"Gemini request failed: {last_error}")


	def _extract_json_object(text: str) -> str:
	value = (text or "").strip()
	if value.startswith("```"):
	value = value.split("\n", 1)[1]
	if value.endswith("```"):
	value = value.rsplit("```", 1)[0]
	value = value.strip()

	if value.startswith("{") and value.endswith("}"):
	return value

	# Fallback when model wraps JSON with extra text.
	start = value.find("{")
	end = value.rfind("}")
	if start != -1 and end != -1 and end > start:
	return value[start:end + 1]

	return value


	def _fallback_skill_scan(resume_text: str) -> list:
	common = [
	"python", "java", "javascript", "typescript", "react", "next.js", "node.js",
	"fastapi", "django", "flask", "spring", "mongodb", "postgresql", "mysql",
	"redis", "docker", "kubernetes", "aws", "gcp", "azure", "git", "linux",
	"rest api", "graphql", "machine learning", "data analysis", "sql",
	]
	text = (resume_text or "").lower()
	found = []
	for skill in common:
	pattern = r"\b" + re.escape(skill.lower()) + r"\b"
	if re.search(pattern, text):
	found.append(skill)
	return normalize_skill_list(found)


	def _is_loose_answer(answer: str) -> bool:
	text = (answer or "").strip().lower()
	if not text:
	return True

	word_count = len(text.split())
	if word_count < 18:
	return True

	weak_markers = [
	"i think",
	"maybe",
	"not sure",
	"dont know",
	"don't know",
	"something like",
	"etc",
	"kind of",
	"sort of",
	]
	return any(marker in text for marker in weak_markers)


	def _collect_loose_qa(qa_pairs: list, limit: int = 4) -> list:
	loose = []
	for qa in reversed(qa_pairs or []):
	question = (qa or {}).get("question", "")
	answer = (qa or {}).get("answer", "")
	if not question or not answer:
	continue
	if _is_loose_answer(answer):
	loose.append({"question": question, "answer": answer})
	if len(loose) >= limit:
	break
	loose.reverse()
	return loose


	async def parse_resume_with_gemini(resume_text: str) -> dict:
	"""Parse resume text and extract structured data using Gemini."""
	prompt = f"""Analyze the following resume and extract structured information.
	CRITICAL INSTRUCTION FOR SKILLS:
	1) Extract concrete tools/technologies/frameworks/languages from the resume text.
	2) Exclude vague traits such as "hardworking", "leadership", "problem solving", "communication".
	3) If a line contains multiple skills (comma-separated), split them into separate list items.
	4) Do NOT add skills that are not present in the resume.

	Return a JSON object with these exact fields:
	- "name": full name of the candidate (string or null)
	- "email": candidate's email address (string or null)
	- "phone": candidate's phone number (string or null)
	- "location": candidate's location/address (string or null)
	- "skills": list of technical and soft skills verbatim from the text (array of strings)
	- "recommended_roles": list of 3-5 recommended job role titles the user is qualified for based on these skills (array of strings)
	- "experience_summary": brief summary of work experience (string)
	- "experience": list of dictionaries, each with "company", "role", "duration", and "description"
	- "education": list of dictionaries, each with "institution", "degree", "graduation_year"
	- "projects": list of dictionaries, each with "name" and "description"

	Resume text:
	---
	{resume_text}
	---

	Return ONLY valid JSON, no markdown formatting."""

	try:
	result = await call_gemini(prompt)
	result = _extract_json_object(result)
	except Exception:
	return {
	"name": None,
	"email": None,
	"phone": None,
	"location": None,
	"skills": _fallback_skill_scan(resume_text),
	"recommended_roles": [],
	"experience_summary": "Unable to parse with AI right now. Please retry.",
	"experience": [],
	"education": [],
	"projects": [],
	}

	try:
	parsed = json.loads(result)
	parsed.setdefault("name", None)
	parsed.setdefault("email", None)
	parsed.setdefault("phone", None)
	parsed.setdefault("location", None)
	parsed.setdefault("recommended_roles", [])
	parsed.setdefault("experience_summary", "")
	parsed.setdefault("experience", [])
	parsed.setdefault("education", [])
	parsed.setdefault("projects", [])

	parsed["skills"] = normalize_skill_list(parsed.get("skills", []))
	if not parsed["skills"]:
	parsed["skills"] = _fallback_skill_scan(resume_text)
	return parsed
	except json.JSONDecodeError:
	return {
	"name": None,
	"email": None,
	"phone": None,
	"location": None,
	"skills": _fallback_skill_scan(resume_text),
	"recommended_roles": [],
	"experience_summary": result,
	"experience": [],
	"education": [],
	"projects": []
	}


	async def analyze_resume_vs_job_description(
	role_title: str,
	resume_skills: list,
	resume_summary: str,
	jd_title: str,
	jd_description: str,
	jd_required_skills: list \| None = None,
	) -> dict:
	"""Compare resume and job description to produce interview guidance."""
	jd_required_skills = jd_required_skills or []
	prompt = f"""You are an interview coach helping a student prepare for a job.

	Role title: {role_title}
	Job Description Title: {jd_title}
	Job Description Text:
	---
	{jd_description}
	---

	Job Description Required Skills (if provided): {json.dumps(jd_required_skills)}

	Student Resume Skills: {json.dumps(resume_skills)}
	Student Resume Summary:
	---
	{resume_summary}
	---

	Return ONLY valid JSON with this structure:
	{{
	"meeting_expectations": ["..."],
	"missing_expectations": ["..."],
	"improvement_suggestions": ["..."],
	"fit_summary": "short summary"
	}}

	Rules:
	1) Be practical and concise.
	2) Mention what already matches first.
	3) Missing expectations should be specific and skill/experience-oriented.
	4) Suggestions should be actionable and student-friendly.
	5) Avoid harsh wording.
	"""

	try:
	result = _extract_json_object(await call_gemini(prompt))
	parsed = json.loads(result)
	return {
	"meeting_expectations": parsed.get("meeting_expectations", [])[:10],
	"missing_expectations": parsed.get("missing_expectations", [])[:10],
	"improvement_suggestions": parsed.get("improvement_suggestions", [])[:10],
	"fit_summary": parsed.get("fit_summary", ""),
	}
	except Exception:
	resume_set = {s.lower() for s in normalize_skill_list(resume_skills)}
	required = normalize_skill_list(jd_required_skills)
	missing = [s for s in required if s.lower() not in resume_set]
	met = [s for s in required if s.lower() in resume_set]
	return {
	"meeting_expectations": met[:6],
	"missing_expectations": missing[:6],
	"improvement_suggestions": [
	"Build 1-2 focused projects aligned with missing JD skills.",
	"Use STAR-style examples for your strongest matching skills.",
	"Revise resume bullets to highlight measurable impact.",
	],
	"fit_summary": "You match some expectations and can improve fit by addressing the missing skills.",
	}


	async def generate_interview_question(
	skills: list,
	role_title: str,
	previous_questions: list = None,
	previous_answer: str = None,
	difficulty: str = "medium",
	question_stage: str = "deep",
	foundation_limit: int = 3,
	) -> dict:
	"""Generate an interview question using Gemini."""
	context = f"Role: {role_title}\nCandidate Skill Focus Areas: {', '.join(skills)}\nDifficulty: {difficulty}"
	context += f"\nCurrent Stage: {question_stage}"
	context += f"\nFoundation Question Limit: {foundation_limit}"

	if previous_questions:
	context += f"\n\nPrevious questions asked (do NOT repeat these):\n"
	for i, q in enumerate(previous_questions, 1):
	context += f"{i}. {q}\n"

	if previous_answer:
	context += f"\nCandidate's last answer: {previous_answer}"
	context += "\nGenerate a follow-up question based on this answer to probe deeper."

	prompt_template = PromptTemplate.from_template(
	"""{context}

	Generate ONE interview question for this candidate. The question should:
	1. Be relevant to the role and candidate's skills
	2. Match the {difficulty} difficulty level
	3. Be clear and specific
	4. Test practical knowledge
	5. If a skill is a cluster label like "Deep Learning (CNN, LSTM)", pick one member skill from that cluster and ask a concrete question on it
	6. Rotate topics to avoid repeatedly asking from the same cluster
	7. If Current Stage is "foundation": ask only core/fundamental basics
	8. If Current Stage is "deep": DO NOT ask basic definition/foundation questions; ask applied, scenario-based, debugging, optimization, or trade-off questions only
	9. Treat Foundation Question Limit as a strict cap: once foundation stage is done, never return to foundation-style prompts

	Return ONLY a JSON object with:
	- "question": the interview question text
	- "difficulty": "{difficulty}"
	- "category": the skill category this tests

	Return ONLY valid JSON, no markdown formatting."""
	)
	prompt = prompt_template.format(context=context, difficulty=difficulty)

	try:
	result = _extract_json_object(await call_gemini(prompt))
	return json.loads(result)
	except Exception:
	return {
	"question": f"Tell me about your experience with {skills[0] if skills else 'software development'}.",
	"difficulty": difficulty,
	"category": "general",
	}


	async def generate_interview_question_batch(
	skills: list,
	role_title: str,
	count: int,
	start_question_number: int = 1,
	previous_questions: list = None,
	foundation_limit: int = 3,
	) -> list:
	"""Generate a batch of interview questions in a single Gemini call."""
	previous_questions = previous_questions or []
	count = max(0, int(count or 0))
	if count == 0:
	return []

	plan = []
	for i in range(count):
	qn = start_question_number + i
	difficulty = "easy" if qn <= foundation_limit else ("medium" if qn <= foundation_limit + 3 else "hard")
	stage = "foundation" if qn <= foundation_limit else "deep"
	plan.append({"question_number": qn, "difficulty": difficulty, "stage": stage})

	context = (
	f"Role: {role_title}\n"
	f"Candidate Skill Focus Areas: {', '.join(skills)}\n"
	f"Question Plan: {json.dumps(plan)}\n"
	f"Foundation Question Limit: {foundation_limit}"
	)

	if previous_questions:
	context += "\n\nPrevious questions asked (do NOT repeat these):\n"
	for i, q in enumerate(previous_questions, 1):
	context += f"{i}. {q}\n"

	prompt_template = PromptTemplate.from_template(
	"""{context}

	Generate exactly {count} interview questions as a JSON array where each item follows the corresponding Question Plan entry.

	Rules:
	1. Questions must be relevant to the role and listed skills.
	2. Do not repeat or rephrase previous questions.
	3. If stage is "foundation": ask only core fundamentals.
	4. If stage is "deep": ask applied/scenario/debugging/trade-off questions only.
	5. Rotate topics across skills to avoid repetitive focus.
	6. If a skill is a cluster label like "Deep Learning (CNN, LSTM)", ask about one concrete member skill.

	Return ONLY valid JSON array with objects of shape:
	- "question": string
	- "difficulty": one of "easy" \| "medium" \| "hard"
	- "category": string

	Return ONLY JSON, no markdown."""
	)
	prompt = prompt_template.format(context=context, count=count)

	try:
	result = (await call_gemini(prompt)).strip()
	data = json.loads(result)
	if not isinstance(data, list):
	raise ValueError("Batch response is not a list")
	normalized = []
	for i, item in enumerate(data[:count]):
	spec = plan[i]
	if not isinstance(item, dict):
	item = {}
	normalized.append(
	{
	"question": item.get("question") or f"Explain your approach for {skills[0] if skills else 'this topic'}.",
	"difficulty": item.get("difficulty") if item.get("difficulty") in {"easy", "medium", "hard"} else spec["difficulty"],
	"category": item.get("category") or "general",
	}
	)
	while len(normalized) < count:
	spec = plan[len(normalized)]
	normalized.append(
	{
	"question": f"Tell me about your experience with {skills[0] if skills else 'software development'}.",
	"difficulty": spec["difficulty"],
	"category": "general",
	}
	)
	return normalized
	except Exception:
	fallback = []
	for i in range(count):
	spec = plan[i]
	fallback.append(
	{
	"question": f"Tell me about your experience with {skills[0] if skills else 'software development'}.",
	"difficulty": spec["difficulty"],
	"category": "general",
	}
	)
	return fallback


	async def generate_followup_question_batch_from_qa(
	role_title: str,
	skills: list,
	qa_pairs: list,
	previous_questions: list,
	count: int,
	difficulty: str = "medium",
	) -> list:
	"""Generate follow-up questions from interview Q&A context in a single Gemini call."""
	count = max(0, int(count or 0))
	if count == 0:
	return []

	compact_qa = []
	for qa in qa_pairs[-8:]:
	q = (qa or {}).get("question", "")
	a = (qa or {}).get("answer", "")
	if q and a:
	compact_qa.append({"question": q, "answer": a})

	payload = {
	"role_title": role_title,
	"skills": skills,
	"difficulty": difficulty,
	"count": count,
	"answered_qa": compact_qa,
	"loose_qa": _collect_loose_qa(qa_pairs),
	"previous_questions": previous_questions,
	}

	prompt_template = PromptTemplate.from_template(
	"""You are generating strict, concept-focused technical interview follow-up questions.

	Input JSON:
	{payload}

	Instructions:
	1. Generate exactly {count} follow-up questions using answered_qa context.
	2. Questions must continue naturally from candidate's previous answers.
	3. Do not repeat or paraphrase any question in previous_questions.
	4. Prioritize loose_qa first: if any answer is vague/short/uncertain, ask a direct follow-up that probes missing concept depth.
	5. Focus on concept validation (why, how, trade-offs, failure modes), not memorized definitions.
	6. Keep questions practical and role-relevant.
	7. Use difficulty {difficulty}. Do not output easy/basic-level questions.

	Return ONLY valid JSON array with objects:
	- "question": string
	- "difficulty": "easy" \| "medium" \| "hard"
	- "category": string

	No markdown, no extra text."""
	)
	prompt = prompt_template.format(
	payload=json.dumps(payload, ensure_ascii=True),
	count=count,
	difficulty=difficulty,
	)

	try:
	result = (await call_gemini(prompt)).strip()
	data = json.loads(result)
	if not isinstance(data, list):
	raise ValueError("Follow-up batch response is not a list")

	normalized = []
	for item in data[:count]:
	if not isinstance(item, dict):
	item = {}
	normalized.append(
	{
	"question": item.get("question") or f"Can you explain your approach for {skills[0] if skills else 'this scenario'}?",
	"difficulty": item.get("difficulty") if item.get("difficulty") in {"easy", "medium", "hard"} else difficulty,
	"category": item.get("category") or "follow-up",
	}
	)

	while len(normalized) < count:
	normalized.append(
	{
	"question": f"Can you explain your approach for {skills[0] if skills else 'this scenario'}?",
	"difficulty": difficulty,
	"category": "follow-up",
	}
	)
	return normalized
	except Exception:
	fallback = []
	for _ in range(count):
	fallback.append(
	{
	"question": f"Can you explain your approach for {skills[0] if skills else 'this scenario'}?",
	"difficulty": difficulty,
	"category": "follow-up",
	}
	)
	return fallback


	async def evaluate_interview(questions_and_answers: list, role_title: str) -> dict:
	"""Batch evaluate all interview Q&A pairs using Gemini."""
	qa_text = ""
	for i, qa in enumerate(questions_and_answers, 1):
	qa_text += f"\nQ{i}: {qa['question']}\nA{i}: {qa['answer']}\n"

	prompt_template = PromptTemplate.from_template(
	"""You are a strict technical interviewer evaluating a candidate for the role: {role_title}.

	Here are the interview questions and the candidate's answers:
	{qa_text}

	Scoring policy (concept-first, strict):
	1. Score primarily on conceptual correctness, depth, and reasoning quality.
	2. Do NOT reward answer length, confidence, or communication style when concepts are wrong.
	3. Penalize vague, hand-wavy, or uncertain answers.
	4. Penalize technically incorrect claims even if explanation sounds fluent.
	5. Reward precise mechanisms, trade-offs, edge cases, and debugging logic.

	Score rubric per answer:
	- 90-100: conceptually correct, deep, and accurate with strong reasoning
	- 70-89: mostly correct with minor conceptual gaps
	- 50-69: partially correct but misses key mechanisms
	- 30-49: shallow/vague with major conceptual gaps
	- 0-29: incorrect or off-topic

	Return a JSON object with:
	- "overall_score": integer from 0-100
	- "detailed_scores": list of objects, each with:
	- "question": the question text
	- "answer": the answer text
	- "score": integer 0-100
	- "feedback": concise concept-focused feedback for this answer
	- "strengths": list of 3-5 strength areas
	- "weaknesses": list of 3-5 concept gaps
	- "recommendations": list of 3-5 actionable concept-improvement recommendations

	Return ONLY valid JSON, no markdown formatting."""
	)
	prompt = prompt_template.format(role_title=role_title, qa_text=qa_text)

	try:
	result = _extract_json_object(await call_gemini(prompt))
	return json.loads(result)
	except Exception:
	return {
	"overall_score": 50,
	"detailed_scores": [],
	"strengths": ["Unable to evaluate"],
	"weaknesses": ["Unable to evaluate"],
	"recommendations": ["Please retry the interview"],
	}