File size: 4,290 Bytes
1cff1e5
15145f6
1cff1e5
 
15145f6
1cff1e5
 
03faf26
 
1cff1e5
15145f6
1cff1e5
 
 
15145f6
 
 
 
 
 
 
 
 
 
1cff1e5
 
15145f6
 
 
 
 
1cff1e5
 
15145f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1cff1e5
 
 
 
 
 
 
03faf26
 
1cff1e5
 
 
03faf26
 
 
1cff1e5
 
 
 
 
 
 
 
 
15145f6
1cff1e5
 
 
 
 
 
 
 
 
15145f6
 
 
 
 
 
1cff1e5
 
 
 
 
 
03faf26
1cff1e5
 
 
 
 
 
 
 
 
 
 
 
15145f6
1cff1e5
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import os
import re
import aiofiles
from database import get_db
from models.collections import RESUMES, SKILLS, USERS
from utils.helpers import utc_now, str_objectid
from utils.gemini import parse_resume_with_gemini
from utils.resume_text import extract_resume_text
from utils.skills import normalize_skill_list
from config import get_settings
from bson import ObjectId

settings = get_settings()

# Expected filename format: {12 digits}_{name}.{ext}
# Example: 714023243122_Sajith J.pdf
_RESUME_FILENAME_RE = re.compile(r'^(\d{12})_(.+)\.(pdf|doc|docx|txt)$', re.IGNORECASE)


def extract_reg_no_from_filename(filename: str) -> str | None:
    """Return the 12-digit register number from the filename, or None if format is invalid."""
    m = _RESUME_FILENAME_RE.match(filename or "")
    return m.group(1) if m else None


async def upload_and_parse_resume(user_id: str, filename: str, file_content: bytes) -> dict:
    """Upload resume file, parse with Gemini, extract skills.

    Filename must match: {12_digit_reg_no}_{name}.{ext}
    Example: 714023243122_Sajith J.pdf
    """
    db = get_db()

    # ── Validate filename format ──────────────────────────────────────────────
    reg_no = extract_reg_no_from_filename(filename)
    if reg_no is None:
        raise ValueError(
            "Invalid filename format. Resume filename must start with your 12-digit register number "
            "followed by an underscore and your name. "
            "Example: 714023243122_Sajith J.pdf"
        )

    # ── Check reg_no uniqueness ───────────────────────────────────────────────
    # Allow the same user to re-upload (same reg_no), but block if another user already holds it.
    existing_owner = await db[USERS].find_one(
        {"reg_no": reg_no, "_id": {"$ne": ObjectId(user_id)}}
    )
    if existing_owner:
        raise ValueError(
            "This register number is already associated with another account. "
            "Each student must use their own unique register number."
        )

    # Save file locally
    safe_filename = f"{user_id}_{filename}"
    file_path = os.path.join(settings.UPLOAD_DIR, safe_filename)

    async with aiofiles.open(file_path, "wb") as f:
        await f.write(file_content)

    # Extract readable text by file type before sending to Gemini.
    resume_text = extract_resume_text(filename, file_content)

    # Parse with Gemini
    parsed_data = await parse_resume_with_gemini(resume_text)
    raw_skills = parsed_data.get("skills", [])
    skills = normalize_skill_list(raw_skills)
    parsed_data["skills"] = skills

    # Upsert resume document
    resume_doc = {
        "user_id": user_id,
        "filename": safe_filename,
        "original_filename": filename,
        "file_path": file_path,
        "parsed_text": parsed_data.get("experience_summary", ""),
        "parsed_data": parsed_data,
        "reg_no": reg_no,
        "uploaded_at": utc_now(),
    }

    await db[RESUMES].update_one(
        {"user_id": user_id},
        {"$set": resume_doc},
        upsert=True,
    )

    # Persist reg_no on the user document for quick lookup and admin display
    await db[USERS].update_one(
        {"_id": ObjectId(user_id)},
        {"$set": {"reg_no": reg_no}},
    )

    # Upsert skills
    await db[SKILLS].update_one(
        {"user_id": user_id},
        {"$set": {
            "user_id": user_id,
            "skills": skills,
            "raw_skills": raw_skills,
            "updated_at": utc_now(),
        }},
        upsert=True,
    )

    result = await db[RESUMES].find_one({"user_id": user_id})
    return {
        "id": str(result["_id"]),
        "user_id": user_id,
        "filename": filename,
        "parsed_text": resume_doc["parsed_text"],
        "skills": skills,
        "reg_no": reg_no,
        "uploaded_at": resume_doc["uploaded_at"],
    }


async def get_user_skills(user_id: str) -> list:
    """Get extracted skills for a user."""
    db = get_db()
    skills_doc = await db[SKILLS].find_one({"user_id": user_id})
    if skills_doc:
        return skills_doc.get("skills", [])
    return []