| from mmengine.config import read_base |
|
|
| with read_base(): |
| from opencompass.configs.datasets.subjective.compass_arena_subjective_bench.singleturn.pairwise_judge import compassarena_subjectivebench_singleturn_datasets |
| from opencompass.configs.datasets.subjective.compass_arena_subjective_bench.multiturn.pairwise_judge import compassarena_subjectivebench_multiturn_datasets |
|
|
| from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_5_7b_chat import models as lmdeploy_internlm2_5_7b_chat |
| from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_5_20b_chat import models as lmdeploy_internlm2_5_20b_chat |
| from opencompass.configs.models.hf_llama.lmdeploy_llama3_1_8b_instruct import models as lmdeploy_llama3_1_8b_instruct |
| from opencompass.configs.models.hf_llama.lmdeploy_llama3_1_70b_instruct import models as lmdeploy_llama3_1_70b_instruct |
| from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_0_5b_instruct import models as lmdeploy_qwen2_5_0_5b_instruct |
| from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_1_5b_instruct import models as lmdeploy_qwen2_5_1_5b_instruct |
| from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_3b_instruct import models as lmdeploy_qwen2_5_3b_instruct |
| from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_7b_instruct import models as lmdeploy_qwen2_5_7b_instruct |
| from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_14b_instruct import models as lmdeploy_qwen2_5_14b_instruct |
| from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_32b_instruct import models as lmdeploy_qwen2_5_32b_instruct |
| from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_72b_instruct import models as lmdeploy_qwen2_5_72b_instruct |
| from opencompass.configs.models.qwen.lmdeploy_qwen2_7b_instruct import models as lmdeploy_qwen2_7b_instruct |
|
|
| from opencompass.models import (HuggingFace, HuggingFaceCausalLM, |
| HuggingFaceChatGLM3, OpenAI, |
| TurboMindModelwithChatTemplate) |
| from opencompass.partitioners import NaivePartitioner, SizePartitioner |
| from opencompass.partitioners.sub_naive import SubjectiveNaivePartitioner |
| from opencompass.partitioners.sub_num_worker import \ |
| SubjectiveNumWorkerPartitioner |
| from opencompass.partitioners.sub_size import SubjectiveSizePartitioner |
| from opencompass.runners import LocalRunner, SlurmSequentialRunner |
| from opencompass.summarizers import DefaultSubjectiveSummarizer |
| from opencompass.tasks import OpenICLInferTask |
| from opencompass.tasks.subjective_eval import SubjectiveEvalTask |
|
|
| api_meta_template = dict(round=[ |
| dict(role='HUMAN', api_role='HUMAN'), |
| dict(role='BOT', api_role='BOT', generate=True), |
| ]) |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| models = [ |
| *lmdeploy_qwen2_5_14b_instruct, *lmdeploy_qwen2_5_32b_instruct, |
| *lmdeploy_qwen2_5_7b_instruct, *lmdeploy_qwen2_7b_instruct |
| ] |
|
|
| datasets = [ |
| *compassarena_subjectivebench_singleturn_datasets, |
| *compassarena_subjectivebench_multiturn_datasets |
| ] |
|
|
| infer = dict( |
| partitioner=dict(type=NaivePartitioner), |
| runner=dict(type=LocalRunner, |
| max_num_workers=16, |
| task=dict(type=OpenICLInferTask)), |
| ) |
| |
|
|
| |
| judge_models = [ |
| dict( |
| type=TurboMindModelwithChatTemplate, |
| abbr='CompassJudger-1-32B-Instruct', |
| path='opencompass/CompassJudger-1-32B-Instruct', |
| engine_config=dict(session_len=16384, max_batch_size=16, tp=4), |
| gen_config=dict(top_k=1, |
| temperature=1e-6, |
| top_p=0.9, |
| max_new_tokens=2048), |
| max_seq_len=16384, |
| max_out_len=2048, |
| batch_size=16, |
| run_cfg=dict(num_gpus=4), |
| ) |
| ] |
|
|
| |
| eval = dict( |
| partitioner=dict( |
| type=SubjectiveNaivePartitioner, |
| models=models, |
| judge_models=judge_models, |
| ), |
| runner=dict(type=LocalRunner, |
| max_num_workers=16, |
| task=dict(type=SubjectiveEvalTask)), |
| ) |
|
|
| summarizer = dict(type=DefaultSubjectiveSummarizer, ) |
| work_dir = 'outputs/subjective/' |
|
|