| import re |
| import json |
| from typing import List, Union, Optional |
|
|
|
|
| def extract_final_answer(output: str) -> str: |
| """ |
| Extracts the text after 'FINAL ANSWER:' in the model's output. |
| Strips whitespace and ensures clean formatting. |
| If the answer is a comma-separated list, ensures a space after each comma. |
| """ |
| output = str(output) |
| marker = "FINAL ANSWER:" |
| lower_output = output.lower() |
|
|
| if marker.lower() in lower_output: |
| |
| idx = lower_output.rfind(marker.lower()) |
| raw_answer = output[idx + len(marker) :].strip() |
|
|
| |
| cleaned_answer = re.sub(r",\s*", ", ", raw_answer) |
| return cleaned_answer |
|
|
| return output |
|
|
|
|
| def replace_tool_mentions(prompt: str) -> str: |
| |
| prompt = re.sub(r"(?<!\w)`search`(?!\w)", "`web_search`", prompt) |
| prompt = re.sub(r"(?<!\w)`wiki`(?!\w)", "`wikipedia_search`", prompt) |
|
|
| |
| |
| prompt = re.sub(r"(?<!\w)(?<!_)search\(", "web_search(", prompt) |
| prompt = re.sub(r"(?<!\w)(?<!_)wiki\(", "wikipedia_search(", prompt) |
|
|
| return prompt |
|
|
| def _question_matches(question: str, filters: Union[str, List[str]]) -> bool: |
| """Helper: check if question matches any string in filters.""" |
| if isinstance(filters, str): |
| filters = [filters] |
| return any(f.lower() in question.lower() for f in filters) |
|
|
| def load_online_qas( |
| qa_type: Union[str, List[str]] = "all", |
| has_file: Optional[bool] = None, |
| file_path = "Final_Assignment_Template/allqas.jsonl" |
| ) -> List[dict]: |
| """ |
| Load online QAs from example_gaiaqa.json. |
| |
| Parameters: |
| - qa_type: str or List[str], used to match substrings in the Question. Use "all" for no filtering. |
| - has_file: bool or None, filters QAs by presence of 'file_name': |
| - True: only include QAs with file_name |
| - False: only include QAs without file_name |
| - None: no file_name filtering |
| - file_path: a path |
| |
| """ |
| data = [] |
| with open(file_path ,"r") as f: |
| for line in f: |
| entry = json.loads(line) |
| data.append(entry) |
|
|
| |
| if has_file is True: |
| data = [qa for qa in data if qa.get("file_name", "").strip()] |
| elif has_file is False: |
| data = [qa for qa in data if not qa.get("file_name", "").strip()] |
|
|
| |
| if qa_type == "all": |
| return data |
|
|
| return [qa for qa in data if _question_matches(qa.get("Question", ""), qa_type)] |
|
|
|
|
| def load_test_qas(qa_type: Union[str, List[str]] = "all") -> List[dict]: |
| """Loads test QAs with no attached files. Optionally filters by topic keywords in questions.""" |
| test_docs = [] |
| with open("Final_Assignment_Template/gaia_val.jsonl", "r") as f: |
| for line in f: |
| entry = json.loads(line) |
| if entry.get("file_name", "").strip() == "": |
| test_docs.append(entry) |
|
|
| if qa_type == "all": |
| return [ |
| { |
| "Question": e["Question"], |
| "Final answer": e.get("Final answer"), |
| "task_id": e["task_id"], |
| "tools": e.get("Annotator Metadata", {}).get("Tools"), |
| "file_name": e.get("file_name", "") |
| } |
| for e in test_docs |
| ] |
|
|
| return [ |
| { |
| "Question": e["Question"], |
| "Final answer": e.get("Final answer"), |
| "task_id": e["task_id"], |
| "tools": e.get("Annotator Metadata", {}).get("Tools"), |
| "file_name": e.get("file_name", "") |
| } |
| for e in test_docs |
| if _question_matches(e["Question"], qa_type) |
| ] |
|
|
|
|
| def load_val_qas(qa_type: Union[str, List[str]] = "all") -> List[dict]: |
| """Loads validation QAs with no attached files. Optionally filters by topic keywords in questions.""" |
| val_docs = [] |
| with open("Final_Assignment_Template/gaia_val.jsonl", "r") as f: |
| for line in f: |
| entry = json.loads(line) |
| if entry.get("file_name", "").strip() == "": |
| val_docs.append(entry) |
|
|
| if qa_type == "all": |
| return [ |
| { |
| "Question": e["Question"], |
| "Final answer": e.get("Final answer"), |
| "task_id": e["task_id"], |
| "tools": e.get("Annotator Metadata", {}).get("Tools"), |
| "file_name": e.get("file_name", "") |
| } |
| for e in val_docs |
| ] |
|
|
| return [ |
| { |
| "Question": e["Question"], |
| "Final answer": e.get("Final answer"), |
| "task_id": e["task_id"], |
| "tools": e.get("Annotator Metadata", {}).get("Tools"), |
| "file_name": e.get("file_name", "") |
| } |
| for e in val_docs |
| if _question_matches(e["Question"], qa_type) |
| ] |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
|
|
|
|
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |