File size: 5,557 Bytes
89c433f
 
 
4938b51
89c433f
 
 
 
 
 
c2f2cb1
32df1dd
77975bf
 
bbbe055
 
32df1dd
 
c2f2cb1
89c433f
 
8a43688
 
 
89c433f
 
cbe7f33
 
 
 
89c433f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c350379
89c433f
 
 
 
 
 
b816952
c350379
fd5fd16
c350379
89c433f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c2f2cb1
 
 
 
 
 
 
 
89c433f
 
8a43688
89c433f
4938b51
89c433f
 
 
 
 
 
 
 
 
 
 
 
 
5c5b48c
89c433f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4938b51
 
89c433f
 
5c5b48c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
# app.py — максимально совместимый с Gradio 3.x / ранними 4.x
import os
import time
import pandas as pd
import gradio as gr

from src.submission.check_validity import check_submission
from src.submission.submit import evaluate_submission
from src.envs import load_jsonl, CORPUS_PATH

from huggingface_hub import hf_hub_download, HfApi

HF_TOKEN = os.getenv("HF_TOKEN")

LEADERBOARD_PATH = "leaderboard.csv"

# Download from a dataset

hf_hub_download(repo_id="datakomarov/RAG-LB", filename="leaderboard.csv", repo_type="space", token=HF_TOKEN, local_dir=".")

LB_COLUMNS = [
    "username", "team", "commit",
    "Answer not accepted", "Some truth", "Accepted answer", 
    "valid_doc_ratio", "n", "timestamp", 
]

def ensure_leaderboard():
    if not os.path.exists(LEADERBOARD_PATH):
        print("Something's wrong with reading a LB")
  

def load_corpus_ids():
    try:
        corpus = load_jsonl(CORPUS_PATH)
        return {str(x["doc_id"]) for x in corpus if "doc_id" in x}
    except Exception:
        return set()

def parse_submission_doc_ids(file_path: str):
    sub = load_jsonl(file_path)
    out = {}
    for rec in sub:
        qid = str(rec.get("id"))
        doc_ids = rec.get("doc_ids", [])
        if not isinstance(doc_ids, list):
            doc_ids = [doc_ids]
        out[qid] = [str(x) for x in doc_ids[:10] if str(x).strip()]
    return out

def compute_valid_doc_ratio(sub_docs, corpus_ids):
    if not sub_docs:
        return 0.0
    flags = []
    for _, ids in sub_docs.items():
        if not ids:
            flags.append(False)
        else:
            flags.append(all(i in corpus_ids for i in ids))
    return sum(flags) / len(flags)

def sort_leaderboard(df):
    return df.sort_values(
        by=["Answer not accepted", "Accepted answer", "Some truth", "valid_doc_ratio", "n"],
        ascending=[True, False, False, False, False],
    ).reset_index(drop=True)

def load_sorted_leaderboard():
    ensure_leaderboard()
    df = pd.read_csv(LEADERBOARD_PATH)
    if df.empty:
        return df
    df = sort_leaderboard(df).reset_index(drop=False)
    df['Place'] = df['index'] + 1
    df = df[['Place'] + LB_COLUMNS]
    return df

def submit_file(file_obj, username, team, commit):
    ensure_leaderboard()

    username = (username or "").strip()
    team = (team or "").strip()
    commit = (commit or "").strip()

    if not username:
        return "❌ Please provide username", load_sorted_leaderboard()
    if file_obj is None:
        return "❌ Please upload JSONL file", load_sorted_leaderboard()

    file_path = file_obj.name

    ok, msg = check_submission(file_path)
    if not ok:
        return f"❌ Invalid submission: {msg}", load_sorted_leaderboard()

    try:
        result = evaluate_submission(file_path)
    except Exception as e:
        return f"❌ Eval failed: {e}", load_sorted_leaderboard()

    corpus_ids = load_corpus_ids()
    sub_docs = parse_submission_doc_ids(file_path)
    valid_ratio = compute_valid_doc_ratio(sub_docs, corpus_ids)

    row = {
        "timestamp": time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime()),
        "username": username,
        "team": team,
        "commit": commit,
        "Answer not accepted": result["zeros"],
        "Some truth": result["ones"],
        "Accepted answer": result["twos"],
        "valid_doc_ratio": valid_ratio,
        "n": result["n"],
    }

    df = pd.read_csv(LEADERBOARD_PATH)
    df.loc[len(df)] = row
    df.to_csv(LEADERBOARD_PATH, index=False)
    df = sort_leaderboard(df)

    api = HfApi()
    api.upload_file(
        path_or_fileobj=LEADERBOARD_PATH,
        path_in_repo=LEADERBOARD_PATH,
        repo_id="datakomarov/RAG-LB",
        repo_type="space",
    )

    summary = (
        f"✅ Submitted! "
        f"N={row['n']} | Answer not accepted:{row['Answer not accepted']} Some truth:{row['Some truth']} Accepted answer:{row['Accepted answer']} | "
        f"doc_ratio={valid_ratio:.1%}"
    )
    return summary, df


def build_ui():
    ensure_leaderboard()
    with gr.Blocks(title="RAG Leaderboard") as demo:
        gr.Markdown("# 🏁 RAG Benchmark")

        # ===== 1) Лидерборд сверху =====
        gr.Markdown("## 📊 Leaderboard")
        out_df = gr.Dataframe(
            value=load_sorted_leaderboard(),
            interactive=False,
            wrap=True,
            label="",
        )

        # маленькая кнопка refresh, чтобы вручную обновлять таблицу
        refresh_btn = gr.Button("🔄 Refresh leaderboard", variant="secondary")
        refresh_btn.click(
            fn=lambda: load_sorted_leaderboard(),
            inputs=[],
            outputs=[out_df],
        )

        gr.Markdown("---")

        # ===== 2) Форма сабмита снизу =====
        gr.Markdown("## 📤 Submit your run")

        file_in = gr.File(label="Upload JSONL submission")
        username_in = gr.Text(label="Username (required)")
        team_in = gr.Text(label="Team (optional)")
        commit_in = gr.Text(label="Commit/tag (optional)")

        submit_btn = gr.Button("Submit", variant="primary")
        out_msg = gr.Markdown()

        submit_btn.click(
            submit_file,
            inputs=[file_in, username_in, team_in, commit_in],
            outputs=[out_msg, out_df],  # обновляем и сообщение, и таблицу
        )

    return demo


if __name__ == "__main__":
    app = build_ui()
    app.launch() # server_name="0.0.0.0", server_port=7860)