File size: 6,980 Bytes
09d7a91
 
 
 
 
 
 
6c5d253
 
09d7a91
 
 
a60a6e6
09d7a91
 
 
 
 
 
 
 
 
 
 
 
a60a6e6
09d7a91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a60a6e6
 
 
 
 
 
 
 
 
09d7a91
 
a60a6e6
09d7a91
 
 
 
 
 
 
 
a60a6e6
 
 
 
 
 
 
 
09d7a91
a60a6e6
09d7a91
 
 
 
 
 
 
 
 
 
 
a60a6e6
 
 
 
 
 
 
 
 
09d7a91
 
a60a6e6
09d7a91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a60a6e6
 
 
 
 
 
 
09d7a91
a60a6e6
09d7a91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a60a6e6
 
 
 
 
 
 
 
09d7a91
 
a60a6e6
09d7a91
a60a6e6
09d7a91
 
 
 
 
 
 
 
a60a6e6
 
 
6c5d253
 
 
 
a60a6e6
 
 
 
 
ccb6a47
 
 
a60a6e6
ccb6a47
 
 
a60a6e6
ccb6a47
 
 
 
a60a6e6
ccb6a47
 
a60a6e6
ccb6a47
 
a60a6e6
ccb6a47
 
a60a6e6
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
import re
from  datetime import datetime
from collections import defaultdict
from fuzzywuzzy import process, fuzz
from parse_job_description import extract_job_details
from data import resumes_data
import pandas as pd
# import multiprocessing as mp
# from functools import partial

def extract_experience(text):
    # Patterns for identifying Bachelor's and Master's degrees
    # print('Inside Extract Experience !!!')
    bachelors_patterns = [
        'bachelor', 'be', 'b.e.', 'b.tech', 'btech', 
        'bachelor of engineering', 'graduation'
    ]
    masters_patterns = [
        'master', 'm.e.', 'me', 'master of engineering'
    ]
    
    # Find all years in the text
    all_years = re.findall(r'\b(\d{4})\b', text)
    all_years = sorted(map(int, all_years), reverse=True)
    
    # look for Bachelor's degree year
    for pattern in bachelors_patterns:
        for year in all_years:
            if re.search(fr'{pattern}.*?{year}', text, re.IGNORECASE):
                current_year = datetime.now().year
                return current_year - year
    
    for pattern in masters_patterns:
        for year in all_years:
            if re.search(fr'{pattern}.*?{year}', text, re.IGNORECASE):
                current_year = datetime.now().year
                return current_year - year
    
    if all_years:
        current_year = datetime.now().year
        return current_year - all_years[0]
    
    return 0

# current_time=datetime.now()
# df=resumes_data()
# exp=extract_experience(df['Resume'][10])
# print (exp)
# end_time=datetime.now()
# print('total time:', end_time-current_time)



def extract_skills(text, job_details):
  job_skills=job_details['Skills']
#   print('Inside Extract Skills !!!')
  found_skills=[]
  for skill in job_skills:
    best_match = process.extractOne(skill.lower(), text.lower().split())
    if best_match[1] >= 95:
      found_skills.append(skill)
  return found_skills


# current_time=datetime.now()
# df=resumes_data()
# exp=extract_skills(df['Resume'][10], job_details)
# print (exp)
# end_time=datetime.now()
# print('total time:', end_time-current_time)


def extract_education(text, job_details):
    # print('Inside Extract Education!!!')
    education_patterns = job_details['Education']
    max_ratio=0
    for degree in education_patterns:
        # Fuzzy matching
        match_ratio = fuzz.partial_ratio(degree.lower(), text.lower())
        if match_ratio > max_ratio:
            max_ratio=match_ratio

    return max_ratio


# current_time=datetime.now()
# df=resumes_data()
# exp=extract_education(df['Resume'][10], job_details)
# print (exp)
# end_time=datetime.now()
# print('total time:', end_time-current_time)



def match_personality_traits(resume_traits,job_details, threshold=70):

    # print('Inside Match Personality!!!')
    """
    Matches personality traits from a job description with those in a candidate's resume using fuzzy matching.

    Parameters:
    - job_traits (list of str): Personality traits from the job description.
    - resume_traits (list of str): Personality traits from the candidate's resume.
    - threshold (int): Minimum similarity score (0-100) for a valid match.

    Returns:
    - dict: Mapping of job traits to best-matched resume traits with similarity scores.
    """
    matches = {}
    job_traits= job_details['Personality Traits']

    for job_trait in job_traits:    
        best_match, score = process.extractOne(job_trait, resume_traits, scorer=fuzz.token_sort_ratio)

        # Only return a match if the score meets the threshold
        if score >= threshold:
            matches[job_trait] = {'Matched Trait': best_match, 'Score': score}
        else:
            matches[job_trait] = {'Matched Trait': "No suitable match", 'Score': score}

    return matches


# current_time=datetime.now()
# df=resumes_data()
# exp=match_personality_traits(df['Resume'][10], job_details)
# print (exp)
# end_time=datetime.now()
# print('total time:', end_time-current_time)

def scoring(resume_text, job_description):
    # print('inside  scoring ..............')
    # Extract all required information from the resume text and compare with job_description
    matched_skills = len(extract_skills(resume_text, job_description))  # Pass job_description here
    traits = match_personality_traits(resume_text, job_description)  # Pass job_description here
    experience = extract_experience(resume_text)  # Pass job_description here
    education_relevance = extract_education(resume_text, job_description)/100  # Pass job_description here
    
    # Calculate trait flags
    trait_flags = list({
        trait: 0 if traits[trait]['Matched Trait'] == 'No suitable match' else 1
        for trait in traits
    }.values())[0]
    
    # Return a dictionary with all calculated values
    return {
        'matched_skills': matched_skills,
        'experience': experience,
        'education_relevance': education_relevance,
        'trait_flag': trait_flags
    }


# current_time=datetime.now()
# df=resumes_data()
# exp=scoring(df['Resume'][10], job_details)
# print (exp)
# end_time=datetime.now()
# print('total time:', end_time-current_time)



def get_scores_optimized(df, job_description):
    
    # Calculate all scores in a single apply operation
    # print('Lets see how much time it takes now !!!!')

    results = df['Resume'].apply(lambda x: scoring(x, job_description))

    
    # Convert the series of dictionaries into a DataFrame and join with original
    scores_df = pd.DataFrame(results.tolist(), index=df.index)
    
    # Return the original dataframe with the new columns
    return pd.concat([df, scores_df], axis=1)


# import pandas as pd
# import multiprocessing as mp
# from functools import partial
# import time

# First, ensure all the helper functions are defined at the module level
# These are the functions called by scoring(): extract_skills, match_personality_traits, 
# extract_experience, and extract_education

# def get_scores_optimized(df, job_description):
#     print('inside scores optimized..............')
#     start_time = time.time()
    
#     # Method 1: Use chunking with the original apply method
#     chunk_size = 32
#     results = []
    
#     for i in range(0, len(df), chunk_size):
#         chunk = df.iloc[i:i+chunk_size]
#         chunk_results = chunk['Resume'].apply(lambda x: scoring(x, job_description))
#         results.extend(chunk_results.tolist())
    
#     # Convert the list of dictionaries into a DataFrame and join with original
#     scores_df = pd.DataFrame(results, index=df.index)
    
#     end_time = time.time()
#     print(f"Processing took {end_time - start_time:.2f} seconds")
    
#     # Return the original dataframe with the new columns
#     return pd.concat([df, scores_df], axis=1)


# current_time=datetime.now()
# df=resumes_data()
# exp=get_scores_optimized(df, job_details)
# print (exp)
# end_time=datetime.now()
# print('total time:', end_time-current_time)