| { |
| "config_general": { |
| "lighteval_sha": "?", |
| "num_fewshot_seeds": 1, |
| "max_samples": null, |
| "job_id": 0, |
| "start_time": 7391.813776305, |
| "end_time": 7504.824690543, |
| "total_evaluation_time_secondes": "113.01091423799971", |
| "model_name": "Qwen/Qwen3-0.6B", |
| "model_sha": "", |
| "model_dtype": null, |
| "model_size": null, |
| "generation_parameters": { |
| "early_stopping": null, |
| "repetition_penalty": null, |
| "frequency_penalty": null, |
| "length_penalty": null, |
| "presence_penalty": null, |
| "max_new_tokens": null, |
| "min_new_tokens": null, |
| "seed": null, |
| "stop_tokens": null, |
| "temperature": null, |
| "top_k": null, |
| "min_p": null, |
| "top_p": null, |
| "truncate_prompt": null, |
| "response_format": null |
| } |
| }, |
| "results": { |
| "original|mmlu:anatomy|0": { |
| "acc": 0.1925925925925926, |
| "acc_stderr": 0.03406542058502653 |
| }, |
| "all": { |
| "acc": 0.1925925925925926, |
| "acc_stderr": 0.03406542058502653 |
| } |
| }, |
| "versions": { |
| "original|mmlu:anatomy|0": 0 |
| }, |
| "config_tasks": { |
| "original|mmlu:anatomy": { |
| "name": "mmlu:anatomy", |
| "prompt_function": "mmlu_anatomy", |
| "hf_repo": "cais/mmlu", |
| "hf_subset": "anatomy", |
| "metric": [ |
| { |
| "metric_name": "acc", |
| "higher_is_better": true, |
| "category": "8", |
| "use_case": "1", |
| "sample_level_fn": "compute", |
| "corpus_level_fn": "mean" |
| } |
| ], |
| "hf_revision": null, |
| "hf_filter": null, |
| "hf_avail_splits": [ |
| "auxiliary_train", |
| "test", |
| "validation", |
| "dev" |
| ], |
| "trust_dataset": true, |
| "evaluation_splits": [ |
| "test" |
| ], |
| "few_shots_split": "dev", |
| "few_shots_select": "sequential", |
| "generation_size": 1, |
| "generation_grammar": null, |
| "stop_sequence": [ |
| "\n" |
| ], |
| "num_samples": null, |
| "suite": [ |
| "original", |
| "mmlu" |
| ], |
| "original_num_docs": 135, |
| "effective_num_docs": 135, |
| "must_remove_duplicate_docs": false, |
| "version": 0 |
| } |
| }, |
| "summary_tasks": { |
| "original|mmlu:anatomy|0": { |
| "hashes": { |
| "hash_examples": "2ace6ded4afc2a5e", |
| "hash_full_prompts": "2ace6ded4afc2a5e", |
| "hash_input_tokens": "2208f6c9b1418fb3", |
| "hash_cont_tokens": "b6f29f2efe9d60e5" |
| }, |
| "truncated": 0, |
| "non_truncated": 135, |
| "padded": 0, |
| "non_padded": 540, |
| "effective_few_shots": 0.0, |
| "num_truncated_few_shots": 0 |
| } |
| }, |
| "summary_general": { |
| "hashes": { |
| "hash_examples": "b4397c7c7be2716f", |
| "hash_full_prompts": "b4397c7c7be2716f", |
| "hash_input_tokens": "b6d70b8b90cdad6b", |
| "hash_cont_tokens": "d0f0c11bb15a1909" |
| }, |
| "truncated": 0, |
| "non_truncated": 135, |
| "padded": 0, |
| "non_padded": 540, |
| "num_truncated_few_shots": 0 |
| } |
| } |