Spaces:

sajith-0701
/

interviewbot

Sleeping

App Files Files Community

interviewbot / backend /services /gemini_service.py

sajith-0701

v4.1

e39cad1 about 1 month ago

raw

history blame contribute delete

11.2 kB

	import json
	import re
	import random

	from utils.gemini import call_gemini


	def _extract_json_object(text: str) -> str:
	value = (text or "").strip()
	if value.startswith("```"):
	value = value.split("\n", 1)[1]
	if value.endswith("```"):
	value = value.rsplit("```", 1)[0]
	value = value.strip()

	if value.startswith("{") and value.endswith("}"):
	return value

	start = value.find("{")
	end = value.rfind("}")
	if start != -1 and end != -1 and end > start:
	return value[start:end + 1]

	return value


	def _extract_json_array(text: str) -> str:
	value = (text or "").strip()
	if value.startswith("```"):
	value = value.split("\n", 1)[1]
	if value.endswith("```"):
	value = value.rsplit("```", 1)[0]
	value = value.strip()

	if value.startswith("[") and value.endswith("]"):
	return value

	start = value.find("[")
	end = value.rfind("]")
	if start != -1 and end != -1 and end > start:
	return value[start:end + 1]

	return value


	def _parse_json_object_loose(text: str) -> dict:
	value = _extract_json_object(text)
	try:
	parsed = json.loads(value)
	except Exception:
	cleaned = re.sub(r",\s*([}\]])", r"\1", value)
	parsed = json.loads(cleaned)
	if not isinstance(parsed, dict):
	raise ValueError("Parsed payload is not a JSON object")
	return parsed


	def _parse_json_array_loose(text: str) -> list:
	value = _extract_json_array(text)
	try:
	parsed = json.loads(value)
	except Exception:
	cleaned = re.sub(r",\s*([}\]])", r"\1", value)
	parsed = json.loads(cleaned)
	if not isinstance(parsed, list):
	raise ValueError("Parsed payload is not a JSON array")
	return parsed


	def _fallback_score(answer: str) -> int:
	text = (answer or "").strip().lower()
	words = len(text.split())
	weak = any(marker in text for marker in ["not sure", "maybe", "i think", "dont know", "don't know"])

	if words < 10:
	return 35
	if words < 25:
	return 55
	if weak:
	return 50
	if words > 80:
	return 75
	return 65


	async def generate_resume_seed_questions(
	role_title: str,
	resume_summary: str,
	resume_skills: list[str],
	jd_title: str,
	jd_description: str,
	jd_required_skills: list[str],
	excluded_questions: list[str],
	count: int = 2,
	) -> list[dict]:
	count = max(1, int(count or 2))

	payload = {
	"role_title": role_title,
	"resume_summary": resume_summary,
	"resume_skills": resume_skills,
	"jd_title": jd_title,
	"jd_description": jd_description,
	"jd_required_skills": jd_required_skills,
	"excluded_questions": excluded_questions[-25:] if excluded_questions else [],
	"count": count,
	}

	prompt = f"""Generate exactly {count} resume interview questions.

	Input JSON:
	{json.dumps(payload, ensure_ascii=True)}

	Rules:
	1) Questions must be strictly from JD required skills and role context.
	2) Use resume context for relevance.
	3) Do not repeat or paraphrase excluded_questions.
	4) Keep questions concise and practical.
	5) Make the set diverse: use different styles (scenario, debugging, trade-off, implementation, testing).
	6) Do not prefix with numbering like "Question 1:".
	7) Avoid generic repeats like "Explain your hands-on experience" for every question.

	Return ONLY valid JSON array with objects:
	- question (string)
	- difficulty (easy\|medium\|hard)
	- category (string)
	"""

	try:
	result = await call_gemini(
	prompt,
	max_attempts=3,
	request_timeout_seconds=20,
	)
	data = _parse_json_array_loose(result)

	output = []
	for item in data[:count]:
	if not isinstance(item, dict):
	item = {}
	output.append(
	{
	"question": (item.get("question") or "").strip(),
	"difficulty": item.get("difficulty") if item.get("difficulty") in {"easy", "medium", "hard"} else "medium",
	"category": item.get("category") or "resume-seed",
	}
	)
	return [q for q in output if q.get("question")]
	except Exception:
	base_skill = jd_required_skills[0] if jd_required_skills else (resume_skills[0] if resume_skills else "this role")
	fallback_templates = [
	"In a project aligned with {role}, where did {skill} materially change your design decisions?",
	"If your {skill} implementation regressed after deployment for {role}, how would you triage it?",
	"What trade-offs did you make while using {skill} under real delivery constraints in {role}?",
	"How did you test and validate a {skill}-based feature before production in {role}?",
	"Describe one architecture decision around {skill} that improved reliability or scale for {role}.",
	]
	fallback = []
	for i in range(count):
	template = fallback_templates[i % len(fallback_templates)]
	fallback.append(
	{
	"question": template.format(skill=base_skill, role=role_title),
	"difficulty": "medium",
	"category": "resume-seed",
	}
	)
	return fallback


	async def evaluate_and_generate_followup(
	role_title: str,
	required_skills: list[str],
	recent_context: list[dict],
	current_question: str,
	current_answer: str,
	excluded_questions: list[str],
	focus_topic: str = "",
	same_topic_streak: int = 0,
	) -> dict:
	payload = {
	"role_title": role_title,
	"required_skills": required_skills,
	"recent_context": recent_context[-3:] if recent_context else [],
	"current_question": current_question,
	"current_answer": current_answer,
	"excluded_questions": excluded_questions[-25:] if excluded_questions else [],
	"focus_topic": focus_topic,
	"same_topic_streak": int(same_topic_streak or 0),
	}

	prompt = f"""You are a strict technical interviewer.

	Input JSON:
	{json.dumps(payload, ensure_ascii=True)}

	Task:
	1) Evaluate current_answer for current_question.
	2) Generate one non-duplicate follow-up question.

	Rules:
	1) Follow-up must stay within required_skills only.
	2) Use recent_context for continuity.
	3) Do not repeat/paraphrase excluded_questions.
	4) Score should reflect conceptual correctness, not verbosity.
	5) If same_topic_streak is 2 or more, avoid another same-topic follow-up unless truly critical.
	6) Ask in realistic live-interview style (specific scenario, debugging, trade-off, design decision), not generic textbook phrasing.
	7) Do not prefix numbering like "Question 4:".
	8) Avoid repeating the previous follow-up wording pattern.

	Return ONLY valid JSON object:
	{{
	"score": 0-100,
	"feedback": "short technical feedback",
	"followup_question": "...",
	"followup_topic": "specific required skill/topic for the follow-up",
	"followup_need_score": 0-100,
	"difficulty": "easy\|medium\|hard",
	"category": "..."
	}}
	"""

	try:
	result = await call_gemini(
	prompt,
	max_attempts=3,
	request_timeout_seconds=18,
	)
	data = _parse_json_object_loose(result)
	followup = (data.get("followup_question") or "").strip()
	try:
	followup_need_score = int(data.get("followup_need_score", 70))
	except Exception:
	followup_need_score = 70
	followup_need_score = max(0, min(100, followup_need_score))
	return {
	"score": int(data.get("score", 0)),
	"feedback": (data.get("feedback") or "").strip() or "Answer reviewed.",
	"followup_question": followup,
	"followup_topic": (data.get("followup_topic") or "").strip(),
	"followup_need_score": followup_need_score,
	"difficulty": data.get("difficulty") if data.get("difficulty") in {"easy", "medium", "hard"} else "medium",
	"category": data.get("category") or "follow-up",
	}
	except Exception:
	fallback_skill = required_skills[0] if required_skills else "the selected role requirement"
	fallback_templates = [
	"In a production system for {role}, describe a failure you would expect around {skill} and how you would debug it end-to-end.",
	"Given a feature built with {skill}, what trade-offs would you make between speed, reliability, and maintainability in {role}?",
	"How would you test and validate a {skill}-based implementation before release for {role}?",
	"Walk through one real incident where {skill} decisions changed the final architecture for {role}.",
	]
	template = random.choice(fallback_templates)
	return {
	"score": _fallback_score(current_answer),
	"feedback": "Try to explain the mechanism, trade-offs, and one concrete example.",
	"followup_question": template.format(skill=fallback_skill, role=role_title),
	"followup_topic": fallback_skill,
	"followup_need_score": 70,
	"difficulty": "medium",
	"category": "follow-up",
	}


	async def generate_topic_followup_batch(
	topic_name: str,
	qa_pairs: list[dict],
	excluded_questions: list[str],
	count: int = 3,
	) -> list[dict]:
	count = max(1, int(count or 3))

	payload = {
	"topic": topic_name,
	"qa_pairs": qa_pairs,
	"excluded_questions": excluded_questions[-30:] if excluded_questions else [],
	"count": count,
	}

	prompt = f"""Generate exactly {count} topic-focused technical follow-up questions.

	Input JSON:
	{json.dumps(payload, ensure_ascii=True)}

	Rules:
	1) Stay in topic scope only.
	2) Build on candidate weak points from qa_pairs.
	3) Do not repeat/paraphrase excluded_questions.

	Return ONLY valid JSON array with objects:
	- question (string)
	- difficulty (easy\|medium\|hard)
	- category (string)
	"""

	try:
	result = await call_gemini(
	prompt,
	max_attempts=3,
	request_timeout_seconds=20,
	)
	data = _parse_json_array_loose(result)

	out = []
	for item in data[:count]:
	if not isinstance(item, dict):
	item = {}
	text = (item.get("question") or "").strip()
	if not text:
	continue
	out.append(
	{
	"question": text,
	"difficulty": item.get("difficulty") if item.get("difficulty") in {"easy", "medium", "hard"} else "medium",
	"category": item.get("category") or topic_name,
	}
	)
	return out
	except Exception:
	fallback = []
	for i in range(count):
	fallback.append(
	{
	"question": f"In {topic_name}, explain how you would solve a real production issue and why.",
	"difficulty": "medium" if i < 2 else "hard",
	"category": topic_name,
	}
	)
	return fallback