| from mmengine.config import read_base |
|
|
| with read_base(): |
| from opencompass.configs.datasets.chinese_simpleqa.chinese_simpleqa_gen import csimpleqa_datasets |
|
|
| from opencompass.models import HuggingFacewithChatTemplate |
| from opencompass.models.openai_api import OpenAI |
| from opencompass.partitioners.sub_naive import SubjectiveNaivePartitioner |
| from opencompass.runners import LocalRunner |
| from opencompass.summarizers import DefaultSubjectiveSummarizer |
| from opencompass.tasks.subjective_eval import SubjectiveEvalTask |
|
|
| |
| models = [ |
| dict( |
| type=HuggingFacewithChatTemplate, |
| abbr='Qwen2.5-1.5B-Instruct', |
| path='Qwen/Qwen2.5-1.5B-Instruct', |
| model_kwargs=dict( |
| device_map='auto', |
| trust_remote_code=True, |
| ), |
| tokenizer_kwargs=dict( |
| padding_side='left', |
| truncation_side='left', |
| trust_remote_code=True, |
| ), |
| generation_kwargs=dict(do_sample=True, ), |
| max_out_len=200, |
| max_seq_len=4096, |
| batch_size=8, |
| run_cfg=dict(num_gpus=1, num_procs=1), |
| ) |
| ] |
|
|
| datasets = sum([v for k, v in locals().items() if ('datasets' in k)], []) |
| summarizer = dict(type=DefaultSubjectiveSummarizer) |
|
|
| |
|
|
| |
|
|
| api_meta_template = dict(round=[ |
| dict(role='SYSTEM', api_role='SYSTEM'), |
| dict(role='HUMAN', api_role='HUMAN'), |
| dict(role='BOT', api_role='BOT', generate=True), |
| ]) |
| judge_models = [ |
| dict( |
| |
| abbr='gpt-4o-0513-global', |
| type=OpenAI, |
| |
| path='gpt-4o-0513-global', |
| key='xxx', |
| meta_template=api_meta_template, |
| query_per_second=16, |
| max_out_len=1000, |
| batch_size=8, |
| retry=3) |
| ] |
|
|
| |
| eval = dict( |
| partitioner=dict(type=SubjectiveNaivePartitioner, |
| models=models, |
| judge_models=judge_models), |
| runner=dict(type=LocalRunner, |
| max_num_workers=16, |
| task=dict(type=SubjectiveEvalTask)), |
| ) |
|
|
| work_dir = 'outputs/chinese_simpleqa/' |
|
|