| def get_question_text(problem): |
| question = problem['question'] |
| return question |
|
|
|
|
| def get_context_text(problem, use_caption): |
| txt_context = problem['hint'] |
| img_context = problem['caption'] if use_caption else "" |
| context = " ".join([txt_context, img_context]).strip() |
| if context == "": |
| context = "N/A" |
| return context |
|
|
|
|
| def get_choice_text(probelm, options): |
| choices = probelm['choices'] |
| choice_list = [] |
| for i, c in enumerate(choices): |
| choice_list.append("({}) {}".format(options[i], c)) |
| choice_txt = " ".join(choice_list) |
| |
| return choice_txt |
|
|
|
|
| def get_answer(problem, options): |
| return options[problem['answer']] |
|
|
|
|
| def get_lecture_text(problem): |
| |
| lecture = problem['lecture'].replace("\n", "\\n") |
| return lecture |
|
|
|
|
| def get_solution_text(problem): |
| |
| solution = problem['solution'].replace("\n", "\\n") |
| return solution |
|
|
|
|
| def create_one_example_chatbot(format, question, context, choice, answer, lecture, solution, test_example=True): |
|
|
| input_format, output_format = format.split("-") |
|
|
| |
| if input_format == "CQM": |
| input = f"Context: {context}\nQuestion: {question}\nOptions: {choice}\n" |
| elif input_format == "QCM": |
| input = f"Question: {question}\nContext: {context}\nOptions: {choice}\n" |
| |
| elif input_format == "QCML": |
| input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {lecture}\n" |
| elif input_format == "QCME": |
| input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {solution}\n" |
| elif input_format == "QCMLE": |
| input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {lecture} {solution}\n" |
|
|
| elif input_format == "QCLM": |
| input = f"Question: {question}\nContext: {context}\nBECAUSE: {lecture}\nOptions: {choice}\n" |
| elif input_format == "QCEM": |
| input = f"Question: {question}\nContext: {context}\nBECAUSE: {solution}\nOptions: {choice}\n" |
| elif input_format == "QCLEM": |
| input = f"Question: {question}\nContext: {context}\nBECAUSE: {lecture} {solution}\nOptions: {choice}\n" |
|
|
| |
| if test_example: |
| output = "Answer:" |
| elif output_format == 'A': |
| output = f"Answer: The answer is {answer}." |
|
|
| elif output_format == 'AL': |
| output = f"Answer: The answer is {answer}. BECAUSE: {solution}" |
| elif output_format == 'AE': |
| output = f"Answer: The answer is {answer}. BECAUSE: {lecture}" |
| elif output_format == 'ALE': |
| output = f"Answer: The answer is {answer}. BECAUSE: {lecture} {solution}" |
| elif output_format == 'AEL': |
| output = f"Answer: The answer is {answer}. BECAUSE: {solution} {lecture}" |
|
|
| elif output_format == 'LA': |
| output = f"Answer: {lecture} The answer is {answer}." |
| elif output_format == 'EA': |
| output = f"Answer: {solution} The answer is {answer}." |
| elif output_format == 'LEA': |
| output = f"Answer: {lecture} {solution} The answer is {answer}." |
| elif output_format == 'ELA': |
| output = f"Answer: {solution} {lecture} The answer is {answer}." |
| elif output_format == 'LEPA': |
| output = '' |
| if len(lecture.strip()) > 0: |
| output += f"LECTURE: {lecture}\n" |
| if len(solution.strip()) > 0: |
| output += f"SOLUTION: {solution}\n" |
| output += '###\n' |
| output += f"ANSWER: {answer}." |
|
|
| input = input.replace(" ", " ").strip() |
| output = output.replace(" ", " ").strip() |
| if input.endswith("BECAUSE:"): |
| input = input.replace("BECAUSE:", "").strip() |
| if output.endswith("BECAUSE:"): |
| output = output.replace("BECAUSE:", "").strip() |
| return input, output |
|
|
|
|
| def create_one_example(format, question, context, choice, answer, lecture, solution, test_example=True): |
|
|
| input_format, output_format = format.split("-") |
|
|
| |
| if input_format == "CQM": |
| input = f"Context: {context}\nQuestion: {question}\nOptions: {choice}\n" |
| elif input_format == "QCM": |
| input = f"Question: {question}\nContext: {context}\nOptions: {choice}\n" |
| |
| elif input_format == "QCML": |
| input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {lecture}\n" |
| elif input_format == "QCME": |
| input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {solution}\n" |
| elif input_format == "QCMLE": |
| input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {lecture} {solution}\n" |
|
|
| elif input_format == "QCLM": |
| input = f"Question: {question}\nContext: {context}\nBECAUSE: {lecture}\nOptions: {choice}\n" |
| elif input_format == "QCEM": |
| input = f"Question: {question}\nContext: {context}\nBECAUSE: {solution}\nOptions: {choice}\n" |
| elif input_format == "QCLEM": |
| input = f"Question: {question}\nContext: {context}\nBECAUSE: {lecture} {solution}\nOptions: {choice}\n" |
|
|
| |
| if test_example: |
| output = "Answer:" |
| elif output_format == 'A': |
| output = f"Answer: The answer is {answer}." |
|
|
| elif output_format == 'AL': |
| output = f"Answer: The answer is {answer}. BECAUSE: {solution}" |
| elif output_format == 'AE': |
| output = f"Answer: The answer is {answer}. BECAUSE: {lecture}" |
| elif output_format == 'ALE': |
| output = f"Answer: The answer is {answer}. BECAUSE: {lecture} {solution}" |
| elif output_format == 'AEL': |
| output = f"Answer: The answer is {answer}. BECAUSE: {solution} {lecture}" |
|
|
| elif output_format == 'LA': |
| output = f"Answer: {lecture} The answer is {answer}." |
| elif output_format == 'EA': |
| output = f"Answer: {solution} The answer is {answer}." |
| elif output_format == 'LEA': |
| output = f"Answer: {lecture} {solution} The answer is {answer}." |
| elif output_format == 'ELA': |
| output = f"Answer: {solution} {lecture} The answer is {answer}." |
|
|
| text = input + output |
| text = text.replace(" ", " ").strip() |
| if text.endswith("BECAUSE:"): |
| text = text.replace("BECAUSE:", "").strip() |
| return text |
|
|
|
|
|
|
| def create_one_example_gpt4(format, question, context, choice, answer, lecture, solution, test_example=True): |
|
|
| input_format, output_format = format.split("-") |
|
|
| |
| if input_format == "CQM": |
| input = f"Context: {context}\nQuestion: {question}\nOptions: {choice}\n" |
| elif input_format == "QCM": |
| input = f"Question: {question}\nContext: {context}\nOptions: {choice}\n" |
| |
| elif input_format == "QCML": |
| input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {lecture}\n" |
| elif input_format == "QCME": |
| input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {solution}\n" |
| elif input_format == "QCMLE": |
| input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {lecture} {solution}\n" |
|
|
| elif input_format == "QCLM": |
| input = f"Question: {question}\nContext: {context}\nBECAUSE: {lecture}\nOptions: {choice}\n" |
| elif input_format == "QCEM": |
| input = f"Question: {question}\nContext: {context}\nBECAUSE: {solution}\nOptions: {choice}\n" |
| elif input_format == "QCLEM": |
| input = f"Question: {question}\nContext: {context}\nBECAUSE: {lecture} {solution}\nOptions: {choice}\n" |
|
|
| |
| if test_example: |
| output = "Answer:" |
| elif output_format == 'A': |
| output = f"Answer: The answer is {answer}." |
|
|
| elif output_format == 'AL': |
| output = f"Answer: The answer is {answer}. BECAUSE: {solution}" |
| elif output_format == 'AE': |
| output = f"Answer: The answer is {answer}. BECAUSE: {lecture}" |
| elif output_format == 'ALE': |
| output = f"Answer: The answer is {answer}. BECAUSE: {lecture} {solution}" |
| elif output_format == 'AEL': |
| output = f"Answer: The answer is {answer}. BECAUSE: {solution} {lecture}" |
|
|
| elif output_format == 'LA': |
| output = f"Answer: {lecture} The answer is {answer}." |
| elif output_format == 'EA': |
| output = f"Answer: {solution} The answer is {answer}." |
| elif output_format == 'LEA': |
| output = f"Answer: {lecture} {solution} The answer is {answer}." |
| elif output_format == 'ELA': |
| output = f"Answer: {solution} {lecture} The answer is {answer}." |
|
|
| input = input.replace(" ", " ").strip() |
| output = output.replace(" ", " ").strip() |
| if output.endswith("BECAUSE:"): |
| output = output.replace("BECAUSE:", "").strip() |
|
|
| user_prompt = {"role": "user", "content": f"Can you explain {input}?"} |
| assistant_prompt = {"role": "assistant", "content": f"{output}"} |
|
|
| return user_prompt, assistant_prompt |
|
|
|
|
| def build_prompt_chatbot(problems, shot_qids, prompt_format, use_caption=False, options=["A", "B", "C", "D", "E"], is_test=False): |
| examples = {} |
|
|
| for qid in shot_qids: |
| question = get_question_text(problems[qid]) |
| context = get_context_text(problems[qid], use_caption) |
| choice = get_choice_text(problems[qid], options) |
| answer = get_answer(problems[qid], options) |
| lecture = get_lecture_text(problems[qid]).replace('\\n', '\n') |
| solution = get_solution_text(problems[qid]).replace('\\n', '\n') |
|
|
| train_example = create_one_example_chatbot(prompt_format, |
| question, |
| context, |
| choice, |
| answer, |
| lecture, |
| solution, |
| test_example=is_test) |
| examples[qid] = train_example |
| return examples |
|
|
|
|
| def build_prompt(problems, shot_qids, test_qid, args): |
|
|
| examples = [] |
|
|
| |
| for qid in shot_qids: |
| question = get_question_text(problems[qid]) |
| context = get_context_text(problems[qid], args.use_caption) |
| choice = get_choice_text(problems[qid], args.options) |
| answer = get_answer(problems[qid], args.options) |
| lecture = get_lecture_text(problems[qid]) |
| solution = get_solution_text(problems[qid]) |
|
|
| train_example = create_one_example(args.prompt_format, |
| question, |
| context, |
| choice, |
| answer, |
| lecture, |
| solution, |
| test_example=False) |
| examples.append(train_example) |
|
|
| |
| question = get_question_text(problems[test_qid]) |
| context = get_context_text(problems[test_qid], args.use_caption) |
| choice = get_choice_text(problems[test_qid], args.options) |
| answer = get_answer(problems[test_qid], args.options) |
| lecture = get_lecture_text(problems[test_qid]) |
| solution = get_solution_text(problems[test_qid]) |
|
|
| test_example = create_one_example(args.prompt_format, |
| question, |
| context, |
| choice, |
| answer, |
| lecture, |
| solution, |
| test_example=True) |
| examples.append(test_example) |
|
|
| |
| prompt_input = '\n\n'.join(examples) |
|
|
| return prompt_input |
|
|
|
|
| def build_prompt_gpt4(problems, shot_qids, test_qid, args): |
|
|
| prompt_array = [{"role": "system", "content": "You are a helpful assistant."}] |
|
|
| |
| for qid in shot_qids: |
| question = get_question_text(problems[qid]) |
| context = get_context_text(problems[qid], args.use_caption) |
| choice = get_choice_text(problems[qid], args.options) |
| answer = get_answer(problems[qid], args.options) |
| lecture = get_lecture_text(problems[qid]) |
| solution = get_solution_text(problems[qid]) |
|
|
| user_prompt, assistant_prompt = create_one_example_gpt4(args.prompt_format, |
| question, |
| context, |
| choice, |
| answer, |
| lecture, |
| solution, |
| test_example=False) |
| prompt_array.append(user_prompt) |
| prompt_array.append(assistant_prompt) |
|
|
| |
| question = get_question_text(problems[test_qid]) |
| context = get_context_text(problems[test_qid], args.use_caption) |
| choice = get_choice_text(problems[test_qid], args.options) |
| answer = get_answer(problems[test_qid], args.options) |
| lecture = get_lecture_text(problems[test_qid]) |
| solution = get_solution_text(problems[test_qid]) |
|
|
| user_prompt, assistant_prompt = create_one_example_gpt4(args.prompt_format, |
| question, |
| context, |
| choice, |
| answer, |
| lecture, |
| solution, |
| test_example=True) |
| prompt_array.append(user_prompt) |
| prompt_array.append(assistant_prompt) |
|
|
| return prompt_array |