Scores
Browse files- app.py +14 -0
- static/app.js +16 -4
- static/index.html +21 -7
app.py
CHANGED
|
@@ -84,6 +84,7 @@ def _new_sentence(idx: int, surface_tokens: list[str], *, sentence_id: str = "",
|
|
| 84 |
"status": "pending", # pending | annotating | done | error
|
| 85 |
"error": "",
|
| 86 |
"n_disagreements": 0,
|
|
|
|
| 87 |
}
|
| 88 |
|
| 89 |
|
|
@@ -441,9 +442,21 @@ def add_sentence_to_icl(idx: int):
|
|
| 441 |
gold_annotation=ann,
|
| 442 |
source="corrected",
|
| 443 |
))
|
|
|
|
|
|
|
| 444 |
return _public_state()
|
| 445 |
|
| 446 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 447 |
@app.post("/api/icl/clear")
|
| 448 |
def clear_icl():
|
| 449 |
SESSION["icl_pool"] = ICLPool()
|
|
@@ -475,6 +488,7 @@ async def _annotate_sentence(sent: dict, client: LLMClient,
|
|
| 475 |
)
|
| 476 |
SESSION["rendered_user_cache"] = rendered_user
|
| 477 |
sent["status"] = "annotating"
|
|
|
|
| 478 |
results = await client.annotate_many(
|
| 479 |
models=models, system=sys_prompt, user=rendered_user,
|
| 480 |
schema=schema, temperature=float(temperature),
|
|
|
|
| 84 |
"status": "pending", # pending | annotating | done | error
|
| 85 |
"error": "",
|
| 86 |
"n_disagreements": 0,
|
| 87 |
+
"validated": False, # True once the user confirms this sentence as gold
|
| 88 |
}
|
| 89 |
|
| 90 |
|
|
|
|
| 442 |
gold_annotation=ann,
|
| 443 |
source="corrected",
|
| 444 |
))
|
| 445 |
+
# Adding to ICL implies the user accepts this annotation as gold → mark validated.
|
| 446 |
+
sent["validated"] = True
|
| 447 |
return _public_state()
|
| 448 |
|
| 449 |
|
| 450 |
+
@app.post("/api/sentence/{idx}/validate")
|
| 451 |
+
def set_validated(idx: int, payload: dict):
|
| 452 |
+
"""payload = {value: bool}. Toggles the user-validation flag on a sentence."""
|
| 453 |
+
sents = SESSION["sentences"]
|
| 454 |
+
if idx < 0 or idx >= len(sents):
|
| 455 |
+
raise HTTPException(404, "Bad sentence idx")
|
| 456 |
+
sents[idx]["validated"] = bool(payload.get("value", True))
|
| 457 |
+
return sents[idx]
|
| 458 |
+
|
| 459 |
+
|
| 460 |
@app.post("/api/icl/clear")
|
| 461 |
def clear_icl():
|
| 462 |
SESSION["icl_pool"] = ICLPool()
|
|
|
|
| 488 |
)
|
| 489 |
SESSION["rendered_user_cache"] = rendered_user
|
| 490 |
sent["status"] = "annotating"
|
| 491 |
+
sent["validated"] = False # re-annotation invalidates any prior user validation
|
| 492 |
results = await client.annotate_many(
|
| 493 |
models=models, system=sys_prompt, user=rendered_user,
|
| 494 |
schema=schema, temperature=float(temperature),
|
static/app.js
CHANGED
|
@@ -321,11 +321,11 @@ function annotator() {
|
|
| 321 |
return parts[parts.length - 1];
|
| 322 |
},
|
| 323 |
|
| 324 |
-
// Per-model accuracy on a single sentence,
|
| 325 |
-
//
|
| 326 |
-
//
|
| 327 |
modelAccuracy(sent) {
|
| 328 |
-
if (!sent || sent.status !== 'done') return [];
|
| 329 |
const perModel = sent.per_model || {};
|
| 330 |
const modelNames = Object.keys(perModel);
|
| 331 |
if (modelNames.length === 0) return [];
|
|
@@ -635,6 +635,18 @@ function annotator() {
|
|
| 635 |
this.toast(`Added to ICL pool (v${this.state.icl_pool.version}, ${this.state.icl_pool.size} entries).`, 'ok');
|
| 636 |
},
|
| 637 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 638 |
// ----------- token editor -----------
|
| 639 |
onTokenClick(ev, sidx, tidx) {
|
| 640 |
if (ev.shiftKey) {
|
|
|
|
| 321 |
return parts[parts.length - 1];
|
| 322 |
},
|
| 323 |
|
| 324 |
+
// Per-model accuracy on a single sentence, ONLY shown after the user has
|
| 325 |
+
// confirmed the annotation as gold (sent.validated === true). Skips
|
| 326 |
+
// confidence/comment (same as disagreement counting).
|
| 327 |
modelAccuracy(sent) {
|
| 328 |
+
if (!sent || sent.status !== 'done' || !sent.validated) return [];
|
| 329 |
const perModel = sent.per_model || {};
|
| 330 |
const modelNames = Object.keys(perModel);
|
| 331 |
if (modelNames.length === 0) return [];
|
|
|
|
| 635 |
this.toast(`Added to ICL pool (v${this.state.icl_pool.version}, ${this.state.icl_pool.size} entries).`, 'ok');
|
| 636 |
},
|
| 637 |
|
| 638 |
+
async setValidated(sidx, value) {
|
| 639 |
+
const r = await fetch(`/api/sentence/${sidx}/validate`, {
|
| 640 |
+
method: 'POST',
|
| 641 |
+
headers: { 'Content-Type': 'application/json' },
|
| 642 |
+
body: JSON.stringify({ value }),
|
| 643 |
+
});
|
| 644 |
+
if (!r.ok) { this.toast('Could not toggle scoring.', 'error'); return; }
|
| 645 |
+
const sent = await r.json();
|
| 646 |
+
this.replaceSentence(sidx, sent);
|
| 647 |
+
this.toast(value ? '📊 Showing per-model accuracy vs your current annotation.' : 'Scores hidden.', 'ok');
|
| 648 |
+
},
|
| 649 |
+
|
| 650 |
// ----------- token editor -----------
|
| 651 |
onTokenClick(ev, sidx, tidx) {
|
| 652 |
if (ev.shiftKey) {
|
static/index.html
CHANGED
|
@@ -24,7 +24,7 @@
|
|
| 24 |
}
|
| 25 |
}
|
| 26 |
</script>
|
| 27 |
-
<link rel="stylesheet" href="/static/styles.css?v=
|
| 28 |
<script defer src="https://cdn.jsdelivr.net/npm/alpinejs@3.13.10/dist/cdn.min.js"></script>
|
| 29 |
</head>
|
| 30 |
<body class="font-sans bg-ink-50 text-ink-900 min-h-screen" x-data="annotator()" x-init="init()" x-cloak>
|
|
@@ -239,21 +239,35 @@
|
|
| 239 |
<span x-show="sent.status === 'done'">↻ Re-annotate</span>
|
| 240 |
<span x-show="sent.status !== 'done'">▶ Annotate</span>
|
| 241 |
</button>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 242 |
<button @click="addSentenceToIcl(sidx)" :disabled="sent.status !== 'done'"
|
| 243 |
class="btn btn-ghost text-xs"
|
| 244 |
:class="{'opacity-50': sent.status !== 'done'}"
|
| 245 |
-
title="Add this
|
| 246 |
📥 to ICL
|
| 247 |
</button>
|
| 248 |
</header>
|
| 249 |
|
| 250 |
-
<!-- Per-model accuracy strip
|
| 251 |
<div class="px-4 pt-2 -mb-1 flex flex-wrap items-center gap-1.5"
|
| 252 |
-
x-show="sent.
|
| 253 |
-
<span class="text-[10px] uppercase tracking-wider text-
|
|
|
|
| 254 |
<template x-for="m in modelAccuracy(sent)" :key="m.model">
|
| 255 |
<span class="accuracy-pill" :class="accuracyClass(m.pct)"
|
| 256 |
-
:title="`${m.model}: ${m.correct}/${m.total}
|
| 257 |
<span class="font-mono opacity-80" x-text="modelShort(m.model)"></span>
|
| 258 |
<strong x-text="m.pct + '%'"></strong>
|
| 259 |
</span>
|
|
@@ -888,6 +902,6 @@
|
|
| 888 |
</template>
|
| 889 |
</div>
|
| 890 |
|
| 891 |
-
<script src="/static/app.js?v=
|
| 892 |
</body>
|
| 893 |
</html>
|
|
|
|
| 24 |
}
|
| 25 |
}
|
| 26 |
</script>
|
| 27 |
+
<link rel="stylesheet" href="/static/styles.css?v=20260516e">
|
| 28 |
<script defer src="https://cdn.jsdelivr.net/npm/alpinejs@3.13.10/dist/cdn.min.js"></script>
|
| 29 |
</head>
|
| 30 |
<body class="font-sans bg-ink-50 text-ink-900 min-h-screen" x-data="annotator()" x-init="init()" x-cloak>
|
|
|
|
| 239 |
<span x-show="sent.status === 'done'">↻ Re-annotate</span>
|
| 240 |
<span x-show="sent.status !== 'done'">▶ Annotate</span>
|
| 241 |
</button>
|
| 242 |
+
<!-- Score toggle — reveals per-model accuracy vs current annotation. Purely display. -->
|
| 243 |
+
<button x-show="sent.status === 'done' && !sent.validated"
|
| 244 |
+
@click="setValidated(sidx, true)"
|
| 245 |
+
class="btn btn-secondary text-xs"
|
| 246 |
+
title="Compute per-model accuracy against your current annotation. Doesn't affect export.">
|
| 247 |
+
📊 Score
|
| 248 |
+
</button>
|
| 249 |
+
<span x-show="sent.validated"
|
| 250 |
+
class="inline-flex items-center gap-1 text-xs text-accent-700 font-medium px-2 py-1 rounded bg-accent-50 border border-accent-200 cursor-pointer"
|
| 251 |
+
@click="setValidated(sidx, false)"
|
| 252 |
+
title="Click to hide the scores">
|
| 253 |
+
📊 Scored
|
| 254 |
+
</span>
|
| 255 |
<button @click="addSentenceToIcl(sidx)" :disabled="sent.status !== 'done'"
|
| 256 |
class="btn btn-ghost text-xs"
|
| 257 |
:class="{'opacity-50': sent.status !== 'done'}"
|
| 258 |
+
title="Add this annotation to the ICL pool as a few-shot example (also reveals scores).">
|
| 259 |
📥 to ICL
|
| 260 |
</button>
|
| 261 |
</header>
|
| 262 |
|
| 263 |
+
<!-- Per-model accuracy strip — purely informational, gated behind the Score toggle -->
|
| 264 |
<div class="px-4 pt-2 -mb-1 flex flex-wrap items-center gap-1.5"
|
| 265 |
+
x-show="sent.validated && Object.keys(sent.per_model || {}).length > 0">
|
| 266 |
+
<span class="text-[10px] uppercase tracking-wider text-accent-700 font-semibold mr-1"
|
| 267 |
+
title="% of task-meaningful fields each model got right, compared to your current annotation">match · your version</span>
|
| 268 |
<template x-for="m in modelAccuracy(sent)" :key="m.model">
|
| 269 |
<span class="accuracy-pill" :class="accuracyClass(m.pct)"
|
| 270 |
+
:title="`${m.model}: ${m.correct}/${m.total} fields match your current annotation`">
|
| 271 |
<span class="font-mono opacity-80" x-text="modelShort(m.model)"></span>
|
| 272 |
<strong x-text="m.pct + '%'"></strong>
|
| 273 |
</span>
|
|
|
|
| 902 |
</template>
|
| 903 |
</div>
|
| 904 |
|
| 905 |
+
<script src="/static/app.js?v=20260516e"></script>
|
| 906 |
</body>
|
| 907 |
</html>
|