| import pandas as pd |
| import chromadb |
| import re |
| from chatbot_functionalities.llms import llm_inference |
|
|
|
|
| def generate_questions( |
| position: str, candidate_profile: str, question_collection: chromadb.Collection |
| ) -> pd.DataFrame: |
| """This function will generate a set of relevant questions, given the candidate's position of choosing and their profile. |
| |
| Under the hood, it uses semantic search to extract the relevant questions from a vector database containing the |
| embeddings of the question bank gathered as part of the project. |
| |
| If a semantic search match is not found based on the position or candidate profile, then an LLM will be used |
| to generate a question for that particular interview phase. |
| |
| Args: |
| position (str): Position of the candidate for which the interview is taking place. |
| candidate_profile (str): Description of the profile of the candidate. |
| |
| Returns: |
| pd.DataFrame: Pandas dataframe containing a list of all relevant questions generated, along with the interview phase and candidate profile. |
| """ |
|
|
| |
| question_df = pd.DataFrame(columns=["question", "interview_phase", "position", "answer", "ratings", "feedback"]) |
|
|
| |
| questions_list = [] |
| interview_phase_list = [] |
|
|
| |
| |
| |
|
|
| |
| |
| |
|
|
| print("Generating questions for introduction phase...\n") |
| |
| intro_ques_semantic_search = question_collection.query( |
| query_texts=[candidate_profile], |
| where={ |
| "$and": [ |
| {"position": {"$eq": position}}, |
| {"interview_phase": {"$eq": "Introduction"}}, |
| ] |
| }, |
| n_results=2, |
| ) |
|
|
| |
| if len(intro_ques_semantic_search["documents"][0]) != 2: |
| num_ques_to_gen = 2 - len(intro_ques_semantic_search["documents"][0]) |
| intro_template = """Assume you are an expert interviewer, interviewing a candidate. You have the following information: |
| Position applying for : {position} |
| Candidate profile summary : {candidate_profile}. |
| Using the above information, generate {num_ques_to_gen} introductory question/questions which can help start off the interview. Please provide questions that are highly relevant for the job position only. Don't ask irrelevant questions.""" |
|
|
| intro_ques_llm = llm_inference( |
| model_type="huggingface", |
| input_variables_list=[position, candidate_profile, num_ques_to_gen], |
| prompt_template=intro_template, |
| hf_repo_id="tiiuae/falcon-7b-instruct", |
| temperature=0.1, |
| max_length=64, |
| ) |
| |
| intro_ques_llm_list = [x for x in intro_ques_llm.split("\n") if x != ""] |
| |
| pattern = re.compile(r"^\d+\.\s") |
| |
| intro_ques_llm_list = [re.sub(pattern, "", x) for x in intro_ques_llm_list] |
|
|
| questions_list.extend(intro_ques_llm_list) |
| questions_list.extend(intro_ques_semantic_search["documents"][0]) |
| interview_phase_list.extend(["Introduction"] * 2) |
| else: |
| questions_list.extend(intro_ques_semantic_search["documents"][0]) |
| interview_phase_list.extend(["Introduction"] * 2) |
|
|
| print("Introduction phase question generation complete...\n") |
|
|
| |
| |
| |
|
|
| print("Generating questions for core phase...\n") |
|
|
| |
| core_ques_semantic_search = question_collection.query( |
| query_texts=[candidate_profile], |
| where={ |
| "$and": [ |
| {"position": {"$eq": position}}, |
| {"interview_phase": {"$nin": ["Introduction", "Conclusion"]}}, |
| ] |
| }, |
| n_results=4, |
| ) |
|
|
| |
| if len(core_ques_semantic_search["documents"][0]) != 4: |
| num_ques_to_gen = 4 - len(core_ques_semantic_search["documents"][0]) |
| core_template = """Assume you are an expert interviewer, interviewing a candidate. You have the following information: |
| Position applying for : {position} |
| Candidate profile summary : {candidate_profile}. |
| Using the above information, generate {num_ques_to_gen} position specific question/questions which can help start off the interview. Please provide questions that are highly relevant for the job position only. Don't ask irrelevant questions.""" |
|
|
| core_ques_llm = llm_inference( |
| model_type="huggingface", |
| input_variables_list=[position, candidate_profile, num_ques_to_gen], |
| prompt_template=core_template, |
| hf_repo_id="tiiuae/falcon-7b-instruct", |
| temperature=0.1, |
| max_length=64, |
| ) |
| |
| core_ques_llm_list = [x for x in core_ques_llm.split("\n") if x != ""] |
| |
| pattern = re.compile(r"^\d+\.\s") |
| |
| core_ques_llm_list = [re.sub(pattern, "", x) for x in core_ques_llm_list] |
|
|
| questions_list.extend(core_ques_llm_list) |
| interview_phase_list.extend(["Core"] * num_ques_to_gen) |
| questions_list.extend(core_ques_semantic_search["documents"][0]) |
| interview_phase_list.extend( |
| [d["interview_phase"] for d in core_ques_semantic_search["metadatas"][0]] |
| ) |
| else: |
| questions_list.extend(core_ques_semantic_search["documents"][0]) |
| interview_phase_list.extend( |
| [d["interview_phase"] for d in core_ques_semantic_search["metadatas"][0]] |
| ) |
|
|
| print("Core phase question generation complete...\n") |
|
|
| |
| question_df["question"] = questions_list |
| question_df["interview_phase"] = interview_phase_list |
| question_df["position"] = [position] * len(questions_list) |
|
|
| return question_df |
|
|