Spaces:

avimittal30
/

candidate-recommender

Sleeping

File size: 6,980 Bytes

import re
from  datetime import datetime
from collections import defaultdict
from fuzzywuzzy import process, fuzz
from parse_job_description import extract_job_details
from data import resumes_data
import pandas as pd
# import multiprocessing as mp
# from functools import partial

def extract_experience(text):
    # Patterns for identifying Bachelor's and Master's degrees
    # print('Inside Extract Experience !!!')
    bachelors_patterns = [
        'bachelor', 'be', 'b.e.', 'b.tech', 'btech', 
        'bachelor of engineering', 'graduation'
    ]
    masters_patterns = [
        'master', 'm.e.', 'me', 'master of engineering'
    ]
    
    # Find all years in the text
    all_years = re.findall(r'\b(\d{4})\b', text)
    all_years = sorted(map(int, all_years), reverse=True)
    
    # look for Bachelor's degree year
    for pattern in bachelors_patterns:
        for year in all_years:
            if re.search(fr'{pattern}.*?{year}', text, re.IGNORECASE):
                current_year = datetime.now().year
                return current_year - year
    
    for pattern in masters_patterns:
        for year in all_years:
            if re.search(fr'{pattern}.*?{year}', text, re.IGNORECASE):
                current_year = datetime.now().year
                return current_year - year
    
    if all_years:
        current_year = datetime.now().year
        return current_year - all_years[0]
    
    return 0

# current_time=datetime.now()
# df=resumes_data()
# exp=extract_experience(df['Resume'][10])
# print (exp)
# end_time=datetime.now()
# print('total time:', end_time-current_time)



def extract_skills(text, job_details):
  job_skills=job_details['Skills']
#   print('Inside Extract Skills !!!')
  found_skills=[]
  for skill in job_skills:
    best_match = process.extractOne(skill.lower(), text.lower().split())
    if best_match[1] >= 95:
      found_skills.append(skill)
  return found_skills


# current_time=datetime.now()
# df=resumes_data()
# exp=extract_skills(df['Resume'][10], job_details)
# print (exp)
# end_time=datetime.now()
# print('total time:', end_time-current_time)


def extract_education(text, job_details):
    # print('Inside Extract Education!!!')
    education_patterns = job_details['Education']
    max_ratio=0
    for degree in education_patterns:
        # Fuzzy matching
        match_ratio = fuzz.partial_ratio(degree.lower(), text.lower())
        if match_ratio > max_ratio:
            max_ratio=match_ratio

    return max_ratio


# current_time=datetime.now()
# df=resumes_data()
# exp=extract_education(df['Resume'][10], job_details)
# print (exp)
# end_time=datetime.now()
# print('total time:', end_time-current_time)



def match_personality_traits(resume_traits,job_details, threshold=70):

    # print('Inside Match Personality!!!')
    """
    Matches personality traits from a job description with those in a candidate's resume using fuzzy matching.

    Parameters:
    - job_traits (list of str): Personality traits from the job description.
    - resume_traits (list of str): Personality traits from the candidate's resume.
    - threshold (int): Minimum similarity score (0-100) for a valid match.

    Returns:
    - dict: Mapping of job traits to best-matched resume traits with similarity scores.
    """
    matches = {}
    job_traits= job_details['Personality Traits']

    for job_trait in job_traits:    
        best_match, score = process.extractOne(job_trait, resume_traits, scorer=fuzz.token_sort_ratio)

        # Only return a match if the score meets the threshold
        if score >= threshold:
            matches[job_trait] = {'Matched Trait': best_match, 'Score': score}
        else:
            matches[job_trait] = {'Matched Trait': "No suitable match", 'Score': score}

    return matches


# current_time=datetime.now()
# df=resumes_data()
# exp=match_personality_traits(df['Resume'][10], job_details)
# print (exp)
# end_time=datetime.now()
# print('total time:', end_time-current_time)

def scoring(resume_text, job_description):
    # print('inside  scoring ..............')
    # Extract all required information from the resume text and compare with job_description
    matched_skills = len(extract_skills(resume_text, job_description))  # Pass job_description here
    traits = match_personality_traits(resume_text, job_description)  # Pass job_description here
    experience = extract_experience(resume_text)  # Pass job_description here
    education_relevance = extract_education(resume_text, job_description)/100  # Pass job_description here
    
    # Calculate trait flags
    trait_flags = list({
        trait: 0 if traits[trait]['Matched Trait'] == 'No suitable match' else 1
        for trait in traits
    }.values())[0]
    
    # Return a dictionary with all calculated values
    return {
        'matched_skills': matched_skills,
        'experience': experience,
        'education_relevance': education_relevance,
        'trait_flag': trait_flags
    }


# current_time=datetime.now()
# df=resumes_data()
# exp=scoring(df['Resume'][10], job_details)
# print (exp)
# end_time=datetime.now()
# print('total time:', end_time-current_time)



def get_scores_optimized(df, job_description):
    
    # Calculate all scores in a single apply operation
    # print('Lets see how much time it takes now !!!!')

    results = df['Resume'].apply(lambda x: scoring(x, job_description))

    
    # Convert the series of dictionaries into a DataFrame and join with original
    scores_df = pd.DataFrame(results.tolist(), index=df.index)
    
    # Return the original dataframe with the new columns
    return pd.concat([df, scores_df], axis=1)


# import pandas as pd
# import multiprocessing as mp
# from functools import partial
# import time

# First, ensure all the helper functions are defined at the module level
# These are the functions called by scoring(): extract_skills, match_personality_traits, 
# extract_experience, and extract_education

# def get_scores_optimized(df, job_description):
#     print('inside scores optimized..............')
#     start_time = time.time()
    
#     # Method 1: Use chunking with the original apply method
#     chunk_size = 32
#     results = []
    
#     for i in range(0, len(df), chunk_size):
#         chunk = df.iloc[i:i+chunk_size]
#         chunk_results = chunk['Resume'].apply(lambda x: scoring(x, job_description))
#         results.extend(chunk_results.tolist())
    
#     # Convert the list of dictionaries into a DataFrame and join with original
#     scores_df = pd.DataFrame(results, index=df.index)
    
#     end_time = time.time()
#     print(f"Processing took {end_time - start_time:.2f} seconds")
    
#     # Return the original dataframe with the new columns
#     return pd.concat([df, scores_df], axis=1)


# current_time=datetime.now()
# df=resumes_data()
# exp=get_scores_optimized(df, job_details)
# print (exp)
# end_time=datetime.now()
# print('total time:', end_time-current_time)