Spaces:
Sleeping
Sleeping
File size: 6,980 Bytes
09d7a91 6c5d253 09d7a91 a60a6e6 09d7a91 a60a6e6 09d7a91 a60a6e6 09d7a91 a60a6e6 09d7a91 a60a6e6 09d7a91 a60a6e6 09d7a91 a60a6e6 09d7a91 a60a6e6 09d7a91 a60a6e6 09d7a91 a60a6e6 09d7a91 a60a6e6 09d7a91 a60a6e6 09d7a91 a60a6e6 09d7a91 a60a6e6 6c5d253 a60a6e6 ccb6a47 a60a6e6 ccb6a47 a60a6e6 ccb6a47 a60a6e6 ccb6a47 a60a6e6 ccb6a47 a60a6e6 ccb6a47 a60a6e6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 | import re
from datetime import datetime
from collections import defaultdict
from fuzzywuzzy import process, fuzz
from parse_job_description import extract_job_details
from data import resumes_data
import pandas as pd
# import multiprocessing as mp
# from functools import partial
def extract_experience(text):
# Patterns for identifying Bachelor's and Master's degrees
# print('Inside Extract Experience !!!')
bachelors_patterns = [
'bachelor', 'be', 'b.e.', 'b.tech', 'btech',
'bachelor of engineering', 'graduation'
]
masters_patterns = [
'master', 'm.e.', 'me', 'master of engineering'
]
# Find all years in the text
all_years = re.findall(r'\b(\d{4})\b', text)
all_years = sorted(map(int, all_years), reverse=True)
# look for Bachelor's degree year
for pattern in bachelors_patterns:
for year in all_years:
if re.search(fr'{pattern}.*?{year}', text, re.IGNORECASE):
current_year = datetime.now().year
return current_year - year
for pattern in masters_patterns:
for year in all_years:
if re.search(fr'{pattern}.*?{year}', text, re.IGNORECASE):
current_year = datetime.now().year
return current_year - year
if all_years:
current_year = datetime.now().year
return current_year - all_years[0]
return 0
# current_time=datetime.now()
# df=resumes_data()
# exp=extract_experience(df['Resume'][10])
# print (exp)
# end_time=datetime.now()
# print('total time:', end_time-current_time)
def extract_skills(text, job_details):
job_skills=job_details['Skills']
# print('Inside Extract Skills !!!')
found_skills=[]
for skill in job_skills:
best_match = process.extractOne(skill.lower(), text.lower().split())
if best_match[1] >= 95:
found_skills.append(skill)
return found_skills
# current_time=datetime.now()
# df=resumes_data()
# exp=extract_skills(df['Resume'][10], job_details)
# print (exp)
# end_time=datetime.now()
# print('total time:', end_time-current_time)
def extract_education(text, job_details):
# print('Inside Extract Education!!!')
education_patterns = job_details['Education']
max_ratio=0
for degree in education_patterns:
# Fuzzy matching
match_ratio = fuzz.partial_ratio(degree.lower(), text.lower())
if match_ratio > max_ratio:
max_ratio=match_ratio
return max_ratio
# current_time=datetime.now()
# df=resumes_data()
# exp=extract_education(df['Resume'][10], job_details)
# print (exp)
# end_time=datetime.now()
# print('total time:', end_time-current_time)
def match_personality_traits(resume_traits,job_details, threshold=70):
# print('Inside Match Personality!!!')
"""
Matches personality traits from a job description with those in a candidate's resume using fuzzy matching.
Parameters:
- job_traits (list of str): Personality traits from the job description.
- resume_traits (list of str): Personality traits from the candidate's resume.
- threshold (int): Minimum similarity score (0-100) for a valid match.
Returns:
- dict: Mapping of job traits to best-matched resume traits with similarity scores.
"""
matches = {}
job_traits= job_details['Personality Traits']
for job_trait in job_traits:
best_match, score = process.extractOne(job_trait, resume_traits, scorer=fuzz.token_sort_ratio)
# Only return a match if the score meets the threshold
if score >= threshold:
matches[job_trait] = {'Matched Trait': best_match, 'Score': score}
else:
matches[job_trait] = {'Matched Trait': "No suitable match", 'Score': score}
return matches
# current_time=datetime.now()
# df=resumes_data()
# exp=match_personality_traits(df['Resume'][10], job_details)
# print (exp)
# end_time=datetime.now()
# print('total time:', end_time-current_time)
def scoring(resume_text, job_description):
# print('inside scoring ..............')
# Extract all required information from the resume text and compare with job_description
matched_skills = len(extract_skills(resume_text, job_description)) # Pass job_description here
traits = match_personality_traits(resume_text, job_description) # Pass job_description here
experience = extract_experience(resume_text) # Pass job_description here
education_relevance = extract_education(resume_text, job_description)/100 # Pass job_description here
# Calculate trait flags
trait_flags = list({
trait: 0 if traits[trait]['Matched Trait'] == 'No suitable match' else 1
for trait in traits
}.values())[0]
# Return a dictionary with all calculated values
return {
'matched_skills': matched_skills,
'experience': experience,
'education_relevance': education_relevance,
'trait_flag': trait_flags
}
# current_time=datetime.now()
# df=resumes_data()
# exp=scoring(df['Resume'][10], job_details)
# print (exp)
# end_time=datetime.now()
# print('total time:', end_time-current_time)
def get_scores_optimized(df, job_description):
# Calculate all scores in a single apply operation
# print('Lets see how much time it takes now !!!!')
results = df['Resume'].apply(lambda x: scoring(x, job_description))
# Convert the series of dictionaries into a DataFrame and join with original
scores_df = pd.DataFrame(results.tolist(), index=df.index)
# Return the original dataframe with the new columns
return pd.concat([df, scores_df], axis=1)
# import pandas as pd
# import multiprocessing as mp
# from functools import partial
# import time
# First, ensure all the helper functions are defined at the module level
# These are the functions called by scoring(): extract_skills, match_personality_traits,
# extract_experience, and extract_education
# def get_scores_optimized(df, job_description):
# print('inside scores optimized..............')
# start_time = time.time()
# # Method 1: Use chunking with the original apply method
# chunk_size = 32
# results = []
# for i in range(0, len(df), chunk_size):
# chunk = df.iloc[i:i+chunk_size]
# chunk_results = chunk['Resume'].apply(lambda x: scoring(x, job_description))
# results.extend(chunk_results.tolist())
# # Convert the list of dictionaries into a DataFrame and join with original
# scores_df = pd.DataFrame(results, index=df.index)
# end_time = time.time()
# print(f"Processing took {end_time - start_time:.2f} seconds")
# # Return the original dataframe with the new columns
# return pd.concat([df, scores_df], axis=1)
# current_time=datetime.now()
# df=resumes_data()
# exp=get_scores_optimized(df, job_details)
# print (exp)
# end_time=datetime.now()
# print('total time:', end_time-current_time)
|