import json import re import random from utils.gemini import call_gemini def _extract_json_object(text: str) -> str: value = (text or "").strip() if value.startswith("```"): value = value.split("\n", 1)[1] if value.endswith("```"): value = value.rsplit("```", 1)[0] value = value.strip() if value.startswith("{") and value.endswith("}"): return value start = value.find("{") end = value.rfind("}") if start != -1 and end != -1 and end > start: return value[start:end + 1] return value def _extract_json_array(text: str) -> str: value = (text or "").strip() if value.startswith("```"): value = value.split("\n", 1)[1] if value.endswith("```"): value = value.rsplit("```", 1)[0] value = value.strip() if value.startswith("[") and value.endswith("]"): return value start = value.find("[") end = value.rfind("]") if start != -1 and end != -1 and end > start: return value[start:end + 1] return value def _parse_json_object_loose(text: str) -> dict: value = _extract_json_object(text) try: parsed = json.loads(value) except Exception: cleaned = re.sub(r",\s*([}\]])", r"\1", value) parsed = json.loads(cleaned) if not isinstance(parsed, dict): raise ValueError("Parsed payload is not a JSON object") return parsed def _parse_json_array_loose(text: str) -> list: value = _extract_json_array(text) try: parsed = json.loads(value) except Exception: cleaned = re.sub(r",\s*([}\]])", r"\1", value) parsed = json.loads(cleaned) if not isinstance(parsed, list): raise ValueError("Parsed payload is not a JSON array") return parsed def _fallback_score(answer: str) -> int: text = (answer or "").strip().lower() words = len(text.split()) weak = any(marker in text for marker in ["not sure", "maybe", "i think", "dont know", "don't know"]) if words < 10: return 35 if words < 25: return 55 if weak: return 50 if words > 80: return 75 return 65 async def generate_resume_seed_questions( role_title: str, resume_summary: str, resume_skills: list[str], jd_title: str, jd_description: str, jd_required_skills: list[str], excluded_questions: list[str], count: int = 2, ) -> list[dict]: count = max(1, int(count or 2)) payload = { "role_title": role_title, "resume_summary": resume_summary, "resume_skills": resume_skills, "jd_title": jd_title, "jd_description": jd_description, "jd_required_skills": jd_required_skills, "excluded_questions": excluded_questions[-25:] if excluded_questions else [], "count": count, } prompt = f"""Generate exactly {count} resume interview questions. Input JSON: {json.dumps(payload, ensure_ascii=True)} Rules: 1) Questions must be strictly from JD required skills and role context. 2) Use resume context for relevance. 3) Do not repeat or paraphrase excluded_questions. 4) Keep questions concise and practical. 5) Make the set diverse: use different styles (scenario, debugging, trade-off, implementation, testing). 6) Do not prefix with numbering like "Question 1:". 7) Avoid generic repeats like "Explain your hands-on experience" for every question. Return ONLY valid JSON array with objects: - question (string) - difficulty (easy|medium|hard) - category (string) """ try: result = await call_gemini( prompt, max_attempts=3, request_timeout_seconds=20, ) data = _parse_json_array_loose(result) output = [] for item in data[:count]: if not isinstance(item, dict): item = {} output.append( { "question": (item.get("question") or "").strip(), "difficulty": item.get("difficulty") if item.get("difficulty") in {"easy", "medium", "hard"} else "medium", "category": item.get("category") or "resume-seed", } ) return [q for q in output if q.get("question")] except Exception: base_skill = jd_required_skills[0] if jd_required_skills else (resume_skills[0] if resume_skills else "this role") fallback_templates = [ "In a project aligned with {role}, where did {skill} materially change your design decisions?", "If your {skill} implementation regressed after deployment for {role}, how would you triage it?", "What trade-offs did you make while using {skill} under real delivery constraints in {role}?", "How did you test and validate a {skill}-based feature before production in {role}?", "Describe one architecture decision around {skill} that improved reliability or scale for {role}.", ] fallback = [] for i in range(count): template = fallback_templates[i % len(fallback_templates)] fallback.append( { "question": template.format(skill=base_skill, role=role_title), "difficulty": "medium", "category": "resume-seed", } ) return fallback async def evaluate_and_generate_followup( role_title: str, required_skills: list[str], recent_context: list[dict], current_question: str, current_answer: str, excluded_questions: list[str], focus_topic: str = "", same_topic_streak: int = 0, ) -> dict: payload = { "role_title": role_title, "required_skills": required_skills, "recent_context": recent_context[-3:] if recent_context else [], "current_question": current_question, "current_answer": current_answer, "excluded_questions": excluded_questions[-25:] if excluded_questions else [], "focus_topic": focus_topic, "same_topic_streak": int(same_topic_streak or 0), } prompt = f"""You are a strict technical interviewer. Input JSON: {json.dumps(payload, ensure_ascii=True)} Task: 1) Evaluate current_answer for current_question. 2) Generate one non-duplicate follow-up question. Rules: 1) Follow-up must stay within required_skills only. 2) Use recent_context for continuity. 3) Do not repeat/paraphrase excluded_questions. 4) Score should reflect conceptual correctness, not verbosity. 5) If same_topic_streak is 2 or more, avoid another same-topic follow-up unless truly critical. 6) Ask in realistic live-interview style (specific scenario, debugging, trade-off, design decision), not generic textbook phrasing. 7) Do not prefix numbering like "Question 4:". 8) Avoid repeating the previous follow-up wording pattern. Return ONLY valid JSON object: {{ "score": 0-100, "feedback": "short technical feedback", "followup_question": "...", "followup_topic": "specific required skill/topic for the follow-up", "followup_need_score": 0-100, "difficulty": "easy|medium|hard", "category": "..." }} """ try: result = await call_gemini( prompt, max_attempts=3, request_timeout_seconds=18, ) data = _parse_json_object_loose(result) followup = (data.get("followup_question") or "").strip() try: followup_need_score = int(data.get("followup_need_score", 70)) except Exception: followup_need_score = 70 followup_need_score = max(0, min(100, followup_need_score)) return { "score": int(data.get("score", 0)), "feedback": (data.get("feedback") or "").strip() or "Answer reviewed.", "followup_question": followup, "followup_topic": (data.get("followup_topic") or "").strip(), "followup_need_score": followup_need_score, "difficulty": data.get("difficulty") if data.get("difficulty") in {"easy", "medium", "hard"} else "medium", "category": data.get("category") or "follow-up", } except Exception: fallback_skill = required_skills[0] if required_skills else "the selected role requirement" fallback_templates = [ "In a production system for {role}, describe a failure you would expect around {skill} and how you would debug it end-to-end.", "Given a feature built with {skill}, what trade-offs would you make between speed, reliability, and maintainability in {role}?", "How would you test and validate a {skill}-based implementation before release for {role}?", "Walk through one real incident where {skill} decisions changed the final architecture for {role}.", ] template = random.choice(fallback_templates) return { "score": _fallback_score(current_answer), "feedback": "Try to explain the mechanism, trade-offs, and one concrete example.", "followup_question": template.format(skill=fallback_skill, role=role_title), "followup_topic": fallback_skill, "followup_need_score": 70, "difficulty": "medium", "category": "follow-up", } async def generate_topic_followup_batch( topic_name: str, qa_pairs: list[dict], excluded_questions: list[str], count: int = 3, ) -> list[dict]: count = max(1, int(count or 3)) payload = { "topic": topic_name, "qa_pairs": qa_pairs, "excluded_questions": excluded_questions[-30:] if excluded_questions else [], "count": count, } prompt = f"""Generate exactly {count} topic-focused technical follow-up questions. Input JSON: {json.dumps(payload, ensure_ascii=True)} Rules: 1) Stay in topic scope only. 2) Build on candidate weak points from qa_pairs. 3) Do not repeat/paraphrase excluded_questions. Return ONLY valid JSON array with objects: - question (string) - difficulty (easy|medium|hard) - category (string) """ try: result = await call_gemini( prompt, max_attempts=3, request_timeout_seconds=20, ) data = _parse_json_array_loose(result) out = [] for item in data[:count]: if not isinstance(item, dict): item = {} text = (item.get("question") or "").strip() if not text: continue out.append( { "question": text, "difficulty": item.get("difficulty") if item.get("difficulty") in {"easy", "medium", "hard"} else "medium", "category": item.get("category") or topic_name, } ) return out except Exception: fallback = [] for i in range(count): fallback.append( { "question": f"In {topic_name}, explain how you would solve a real production issue and why.", "difficulty": "medium" if i < 2 else "hard", "category": topic_name, } ) return fallback