| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| import os |
|
|
| import datasets |
| import pandas as pd |
|
|
|
|
| _CITATION = """\ |
| @article{li2023cmmlu, |
| title={CMMLU: Measuring massive multitask language understanding in Chinese}, |
| author={Haonan Li and Yixuan Zhang and Fajri Koto and Yifei Yang and Hai Zhao and Yeyun Gong and Nan Duan and Timothy Baldwin}, |
| journal={arXiv preprint arXiv:2306.09212}, |
| year={2023} |
| } |
| """ |
|
|
| _DESCRIPTION = """\ |
| CMMLU is a comprehensive Chinese assessment suite specifically designed to evaluate the advanced knowledge and reasoning abilities of LLMs within the Chinese language and cultural context. |
| """ |
|
|
| _HOMEPAGE = "https://github.com/haonan-li/CMMLU" |
|
|
| _LICENSE = "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License" |
|
|
| _URL = "cmmlu.zip" |
|
|
| task_list = [ |
| "agronomy", |
| "anatomy", |
| "ancient_chinese", |
| "arts", |
| "astronomy", |
| "business_ethics", |
| "chinese_civil_service_exam", |
| "chinese_driving_rule", |
| "chinese_food_culture", |
| "chinese_foreign_policy", |
| "chinese_history", |
| "chinese_literature", |
| "chinese_teacher_qualification", |
| "clinical_knowledge", |
| "college_actuarial_science", |
| "college_education", |
| "college_engineering_hydrology", |
| "college_law", |
| "college_mathematics", |
| "college_medical_statistics", |
| "college_medicine", |
| "computer_science", |
| "computer_security", |
| "conceptual_physics", |
| "construction_project_management", |
| "economics", |
| "education", |
| "electrical_engineering", |
| "elementary_chinese", |
| "elementary_commonsense", |
| "elementary_information_and_technology", |
| "elementary_mathematics", |
| "ethnology", |
| "food_science", |
| "genetics", |
| "global_facts", |
| "high_school_biology", |
| "high_school_chemistry", |
| "high_school_geography", |
| "high_school_mathematics", |
| "high_school_physics", |
| "high_school_politics", |
| "human_sexuality", |
| "international_law", |
| "journalism", |
| "jurisprudence", |
| "legal_and_moral_basis", |
| "logical", |
| "machine_learning", |
| "management", |
| "marketing", |
| "marxist_theory", |
| "modern_chinese", |
| "nutrition", |
| "philosophy", |
| "professional_accounting", |
| "professional_law", |
| "professional_medicine", |
| "professional_psychology", |
| "public_relations", |
| "security_study", |
| "sociology", |
| "sports_science", |
| "traditional_chinese_medicine", |
| "virology", |
| "world_history", |
| "world_religions", |
| ] |
|
|
|
|
| class CMMLUConfig(datasets.BuilderConfig): |
| def __init__(self, **kwargs): |
| super().__init__(version=datasets.Version("1.0.1"), **kwargs) |
|
|
|
|
| class CMMLU(datasets.GeneratorBasedBuilder): |
| BUILDER_CONFIGS = [ |
| CMMLUConfig( |
| name=task_name, |
| ) |
| for task_name in task_list |
| ] |
|
|
| def _info(self): |
| features = datasets.Features( |
| { |
| "question": datasets.Value("string"), |
| "A": datasets.Value("string"), |
| "B": datasets.Value("string"), |
| "C": datasets.Value("string"), |
| "D": datasets.Value("string"), |
| "answer": datasets.Value("string"), |
| } |
| ) |
| return datasets.DatasetInfo( |
| description=_DESCRIPTION, |
| features=features, |
| homepage=_HOMEPAGE, |
| license=_LICENSE, |
| citation=_CITATION, |
| ) |
|
|
| def _split_generators(self, dl_manager): |
| data_dir = dl_manager.download_and_extract(_URL) |
| task_name = self.config.name |
| return [ |
| datasets.SplitGenerator( |
| name=datasets.Split.TEST, |
| gen_kwargs={ |
| "filepath": os.path.join(data_dir, f"test/{task_name}.csv"), |
| }, |
| ), |
| datasets.SplitGenerator( |
| name=datasets.Split.TRAIN, |
| gen_kwargs={ |
| "filepath": os.path.join(data_dir, f"dev/{task_name}.csv"), |
| }, |
| ), |
| ] |
|
|
| def _generate_examples(self, filepath): |
| df = pd.read_csv(filepath, header=0, index_col=0, encoding="utf-8") |
| for i, instance in enumerate(df.to_dict(orient="records")): |
| question = instance.pop("Question", "") |
| answer = instance.pop("Answer", "") |
| instance["question"] = question |
| instance["answer"] = answer |
| yield i, instance |
|
|