Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
mcq tasks
Browse files- __pycache__/app.cpython-313.pyc +0 -0
- backend/__pycache__/__init__.cpython-313.pyc +0 -0
- backend/__pycache__/config.cpython-313.pyc +0 -0
- backend/__pycache__/data_loader.cpython-313.pyc +0 -0
- backend/__pycache__/helpers.cpython-313.pyc +0 -0
- backend/__pycache__/submission_handler.cpython-313.pyc +0 -0
- backend/config.py +32 -21
- frontend/leaderboard.html +1 -1
__pycache__/app.cpython-313.pyc
ADDED
|
Binary file (8.07 kB). View file
|
|
|
backend/__pycache__/__init__.cpython-313.pyc
ADDED
|
Binary file (172 Bytes). View file
|
|
|
backend/__pycache__/config.cpython-313.pyc
ADDED
|
Binary file (1.79 kB). View file
|
|
|
backend/__pycache__/data_loader.cpython-313.pyc
ADDED
|
Binary file (6.86 kB). View file
|
|
|
backend/__pycache__/helpers.cpython-313.pyc
ADDED
|
Binary file (4.07 kB). View file
|
|
|
backend/__pycache__/submission_handler.cpython-313.pyc
ADDED
|
Binary file (20.6 kB). View file
|
|
|
backend/config.py
CHANGED
|
@@ -1,29 +1,40 @@
|
|
| 1 |
-
# backend/config.py
|
| 2 |
import os
|
|
|
|
| 3 |
from huggingface_hub import HfApi
|
| 4 |
|
| 5 |
-
#
|
| 6 |
-
|
| 7 |
-
|
|
|
|
|
|
|
| 8 |
|
| 9 |
-
|
| 10 |
-
TASKS = [
|
| 11 |
-
("arc", "acc_norm", "ARC"),
|
| 12 |
-
("mmlu", "acc", "MMLU"),
|
| 13 |
-
("hellaswag", "acc_norm", "HellaSwag"),
|
| 14 |
-
]
|
| 15 |
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
"chat": "π¬",
|
| 19 |
-
"instruct": "π§ ",
|
| 20 |
-
}
|
| 21 |
|
| 22 |
-
#
|
| 23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
-
#
|
| 26 |
-
API = HfApi(token=hf_api_token) if hf_api_token else HfApi()
|
| 27 |
|
| 28 |
-
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
+
from typing import Dict, List, Tuple, Optional
|
| 3 |
from huggingface_hub import HfApi
|
| 4 |
|
| 5 |
+
# --- 1. Repository & API Configuration ---
|
| 6 |
+
OWNER: str = "qimma"
|
| 7 |
+
REPO_ID: str = f"{OWNER}/Qimma-Leaderboard"
|
| 8 |
+
RESULTS_REPO_ID: str = f"{OWNER}/results_v3"
|
| 9 |
+
REQUESTS_REPO_ID: str = f"{OWNER}/requests_v3"
|
| 10 |
|
| 11 |
+
SLACK_WEBHOOK_URL = os.getenv("SLACK_WEBHOOK_URL", "")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
+
hf_api_token: Optional[str] = os.getenv("HF_API_TOKEN")
|
| 14 |
+
API = HfApi(token=hf_api_token)
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
+
# --- 2. Task Definitions ---
|
| 17 |
+
# Format: (dataset_key, metric_field, display_name)
|
| 18 |
+
TASKS: List[Tuple[str, str, str]] = [
|
| 19 |
+
("qimma-AraDiCE-Culture:_average|0", "acc:logprob_normalization=LogProbCharNorm(name='norm', ignore_first_space=False)", "AraDiCE-Culture"),
|
| 20 |
+
("qimma-ArabicMMLU:_average|0", "acc:logprob_normalization=LogProbCharNorm(name='norm', ignore_first_space=False)", "ArabicMMLU"),
|
| 21 |
+
("qimma-MedArabiQ:_average|0", "acc:logprob_normalization=LogProbCharNorm(name='norm', ignore_first_space=False)", "MedArabiQ"),
|
| 22 |
+
("qimma-ArabCulture:_average|0", "acc:logprob_normalization=LogProbCharNorm(name='norm', ignore_first_space=False)", "ArabCulture"),
|
| 23 |
+
("qimma-SyntheticQA:_average|0", "acc:logprob_normalization=LogProbCharNorm(name='norm', ignore_first_space=False)", "SyntheticQA"),
|
| 24 |
+
("qimma-PALMX-2025|0", "acc:logprob_normalization=LogProbCharNorm(name='norm', ignore_first_space=False)", "PALMX"),
|
| 25 |
+
("qimma-NativeQA|0", "acc:logprob_normalization=LogProbCharNorm(name='norm', ignore_first_space=False)", "NativeQA"),
|
| 26 |
+
("qimma-NativeQA-RDP|0", "acc:logprob_normalization=LogProbCharNorm(name='norm', ignore_first_space=False)", "NativeQA-RDP"),
|
| 27 |
+
("qimma-AraTrust|0", "acc:logprob_normalization=LogProbCharNorm(name='norm', ignore_first_space=False)", "AraTrust"),
|
| 28 |
+
("qimma-MizanQA|0", "acc:logprob_normalization=LogProbCharNorm(name='norm', ignore_first_space=False)", "MizanQA"),
|
| 29 |
+
]
|
| 30 |
|
| 31 |
+
# --- 3. Model Metadata ---
|
|
|
|
| 32 |
|
| 33 |
+
MODEL_TYPE_TO_EMOJI: Dict[str, str] = {
|
| 34 |
+
"π’ : pretrained": "π’",
|
| 35 |
+
"π© : continuously pretrained": "π©",
|
| 36 |
+
"π¬ : chat models (RLHF, DPO, IFT, ...)": "π¬",
|
| 37 |
+
"πΆ : fine-tuned on domain-specific datasets": "πΆ",
|
| 38 |
+
"π€ : base merges and merges": "π€",
|
| 39 |
+
"Missing": "?",
|
| 40 |
+
}
|
frontend/leaderboard.html
CHANGED
|
@@ -746,7 +746,7 @@
|
|
| 746 |
$('#modalLinkHF').href = `https://huggingface.co/${model["Model Name"]}`;
|
| 747 |
|
| 748 |
const cleanName = model["Model Name"].replace(/\//g, '__');
|
| 749 |
-
const datasetId = `OALL/details_${cleanName}
|
| 750 |
$('#modalLinkDetails').href = `https://huggingface.co/datasets/${datasetId}`;
|
| 751 |
|
| 752 |
const chartContainer = $('#modalChart');
|
|
|
|
| 746 |
$('#modalLinkHF').href = `https://huggingface.co/${model["Model Name"]}`;
|
| 747 |
|
| 748 |
const cleanName = model["Model Name"].replace(/\//g, '__');
|
| 749 |
+
const datasetId = `OALL/details_${cleanName}`;
|
| 750 |
$('#modalLinkDetails').href = `https://huggingface.co/datasets/${datasetId}`;
|
| 751 |
|
| 752 |
const chartContainer = $('#modalChart');
|