| { |
| "config_general": { |
| "lighteval_sha": "?", |
| "num_fewshot_seeds": 1, |
| "max_samples": null, |
| "job_id": 0, |
| "start_time": 4012.162903884, |
| "end_time": 4201.836127132, |
| "total_evaluation_time_secondes": "189.67322324799989", |
| "model_name": "HuggingFaceTB/SmolLM2-360M", |
| "model_sha": "f8027fd0eaeea54caa13c31d31b9fdc459c38b49", |
| "model_dtype": null, |
| "model_size": "1.35 GB", |
| "generation_parameters": { |
| "early_stopping": null, |
| "repetition_penalty": null, |
| "frequency_penalty": null, |
| "length_penalty": null, |
| "presence_penalty": null, |
| "max_new_tokens": null, |
| "min_new_tokens": null, |
| "seed": null, |
| "stop_tokens": null, |
| "temperature": null, |
| "top_k": null, |
| "min_p": null, |
| "top_p": null, |
| "truncate_prompt": null, |
| "response_format": null |
| } |
| }, |
| "results": { |
| "leaderboard|truthfulqa:mc|0": { |
| "truthfulqa_mc1": 0.211750305997552, |
| "truthfulqa_mc1_stderr": 0.014302068353925612, |
| "truthfulqa_mc2": 0.33432310924938496, |
| "truthfulqa_mc2_stderr": 0.013335337173043119 |
| }, |
| "all": { |
| "truthfulqa_mc1": 0.211750305997552, |
| "truthfulqa_mc1_stderr": 0.014302068353925612, |
| "truthfulqa_mc2": 0.33432310924938496, |
| "truthfulqa_mc2_stderr": 0.013335337173043119 |
| } |
| }, |
| "versions": { |
| "leaderboard|truthfulqa:mc|0": 0 |
| }, |
| "config_tasks": { |
| "leaderboard|truthfulqa:mc": { |
| "name": "truthfulqa:mc", |
| "prompt_function": "truthful_qa_multiple_choice", |
| "hf_repo": "truthful_qa", |
| "hf_subset": "multiple_choice", |
| "metric": [ |
| { |
| "metric_name": [ |
| "truthfulqa_mc1", |
| "truthfulqa_mc2" |
| ], |
| "higher_is_better": { |
| "truthfulqa_mc1": true, |
| "truthfulqa_mc2": true |
| }, |
| "category": "8", |
| "use_case": "1", |
| "sample_level_fn": "truthfulqa_mc_metrics", |
| "corpus_level_fn": { |
| "truthfulqa_mc1": "mean", |
| "truthfulqa_mc2": "mean" |
| } |
| } |
| ], |
| "hf_revision": null, |
| "hf_filter": null, |
| "hf_avail_splits": [ |
| "validation" |
| ], |
| "trust_dataset": true, |
| "evaluation_splits": [ |
| "validation" |
| ], |
| "few_shots_split": null, |
| "few_shots_select": null, |
| "generation_size": -1, |
| "generation_grammar": null, |
| "stop_sequence": [ |
| "\n" |
| ], |
| "num_samples": null, |
| "suite": [ |
| "leaderboard" |
| ], |
| "original_num_docs": 817, |
| "effective_num_docs": 817, |
| "must_remove_duplicate_docs": false, |
| "version": 0 |
| } |
| }, |
| "summary_tasks": { |
| "leaderboard|truthfulqa:mc|0": { |
| "hashes": { |
| "hash_examples": "36a6d90e75d92d4a", |
| "hash_full_prompts": "36a6d90e75d92d4a", |
| "hash_input_tokens": "54863e78b01fe794", |
| "hash_cont_tokens": "7d6f5aee3c38a72f" |
| }, |
| "truncated": 0, |
| "non_truncated": 817, |
| "padded": 9216, |
| "non_padded": 780, |
| "effective_few_shots": 0.0, |
| "num_truncated_few_shots": 0 |
| } |
| }, |
| "summary_general": { |
| "hashes": { |
| "hash_examples": "aed1dfc67e53d0f2", |
| "hash_full_prompts": "aed1dfc67e53d0f2", |
| "hash_input_tokens": "1f841c18939d1dde", |
| "hash_cont_tokens": "3f412ba4b35e4c16" |
| }, |
| "truncated": 0, |
| "non_truncated": 817, |
| "padded": 9216, |
| "non_padded": 780, |
| "num_truncated_few_shots": 0 |
| } |
| } |