la-leaderboard-v2 / src /tasks.py
pauvanbr's picture
Upload src/tasks.py
251a9d1 verified
from dataclasses import dataclass
from enum import Enum, auto
from typing import List
import pandas as pd
tasks_df = pd.read_csv("tasks/tasks.csv")
NUM_FEWSHOT = 0
class Language(Enum):
ALL = auto()
ES = auto()
CA = auto()
EU = auto()
GL = auto()
VA = auto()
PT = auto()
EN = auto()
class Domain(Enum):
MISCELLANEOUS = auto()
LANGUAGE = auto()
LEGAL = auto()
CLINICAL = auto()
AVERAGE = auto()
@dataclass
class Category:
col_name: str
language: Language
domains: List[Domain]
class Categories(Enum):
es = Category("Avg ES", Language.ES, [Domain.MISCELLANEOUS, Domain.LANGUAGE, Domain.CLINICAL, Domain.LEGAL])
ca = Category("Avg CA", Language.CA, [Domain.MISCELLANEOUS, Domain.LANGUAGE, Domain.CLINICAL, Domain.LEGAL])
eu = Category("Avg EU", Language.EU, [Domain.MISCELLANEOUS, Domain.LANGUAGE, Domain.CLINICAL, Domain.LEGAL])
gl = Category("Avg GL", Language.GL, [Domain.MISCELLANEOUS, Domain.LANGUAGE, Domain.CLINICAL, Domain.LEGAL])
va = Category("Avg VA", Language.VA, [Domain.MISCELLANEOUS, Domain.LANGUAGE, Domain.CLINICAL, Domain.LEGAL])
pt = Category("Avg PT", Language.PT, [Domain.MISCELLANEOUS, Domain.LANGUAGE, Domain.CLINICAL, Domain.LEGAL])
@dataclass
class Task:
"""
Leaderboard task.
Attributes:
benchmark (str): Name of the benchmark dataset, i.e. task_key in the
results JSON file.
metric (str): Evaluation metric used for the task, i.e. metric_key in
the results JSON file.
col_name (str): Column name to display in the leaderboard.
language (Language): Language in which the task is conducted.
domain (Domain): Domain category of the task.
"""
benchmark: str
metric: str
col_name: str
language: Language
domain: Domain
tasks_dict = {
row["Harness"]: Task(
row["Harness"],
(
f'{row["Metric"]},none' if row["Metric"] != "exact_match" else f'{row["Metric"]},remove_whitespace'
), # To match harness' post normalization
row["Name"],
Language[row["Language"].upper()],
Domain[row["Domain"].upper()],
)
for _, row in tasks_df.iterrows()
}
Tasks = Enum("Tasks", {task_name: task_obj for task_name, task_obj in tasks_dict.items()})