Spaces:

ENC-PSL
/

lrec2026-llm-annotator

Running

App Files Files Community

dhuser commited on 6 days ago

Commit

80bc68c

1 Parent(s): 67cadad

Scores

Browse files

Files changed (3) hide show

app.py +14 -0
static/app.js +16 -4
static/index.html +21 -7

app.py CHANGED Viewed

@@ -84,6 +84,7 @@ def _new_sentence(idx: int, surface_tokens: list[str], *, sentence_id: str = "",
         "status": "pending",    # pending | annotating | done | error
         "error": "",
         "n_disagreements": 0,
     }
@@ -441,9 +442,21 @@ def add_sentence_to_icl(idx: int):
         gold_annotation=ann,
         source="corrected",
     ))
     return _public_state()
 @app.post("/api/icl/clear")
 def clear_icl():
     SESSION["icl_pool"] = ICLPool()
@@ -475,6 +488,7 @@ async def _annotate_sentence(sent: dict, client: LLMClient,
     )
     SESSION["rendered_user_cache"] = rendered_user
     sent["status"] = "annotating"
     results = await client.annotate_many(
         models=models, system=sys_prompt, user=rendered_user,
         schema=schema, temperature=float(temperature),

         "status": "pending",    # pending | annotating | done | error
         "error": "",
         "n_disagreements": 0,
+        "validated": False,     # True once the user confirms this sentence as gold
     }
         gold_annotation=ann,
         source="corrected",
     ))
+    # Adding to ICL implies the user accepts this annotation as gold → mark validated.
+    sent["validated"] = True
     return _public_state()
+@app.post("/api/sentence/{idx}/validate")
+def set_validated(idx: int, payload: dict):
+    """payload = {value: bool}. Toggles the user-validation flag on a sentence."""
+    sents = SESSION["sentences"]
+    if idx < 0 or idx >= len(sents):
+        raise HTTPException(404, "Bad sentence idx")
+    sents[idx]["validated"] = bool(payload.get("value", True))
+    return sents[idx]
 @app.post("/api/icl/clear")
 def clear_icl():
     SESSION["icl_pool"] = ICLPool()
     )
     SESSION["rendered_user_cache"] = rendered_user
     sent["status"] = "annotating"
+    sent["validated"] = False  # re-annotation invalidates any prior user validation
     results = await client.annotate_many(
         models=models, system=sys_prompt, user=rendered_user,
         schema=schema, temperature=float(temperature),

static/app.js CHANGED Viewed

@@ -321,11 +321,11 @@ function annotator() {
       return parts[parts.length - 1];
     },
-    // Per-model accuracy on a single sentence, computed vs the current consensus
-    // (i.e., what the user sees after MoE aggregation + their corrections).
-    // Skips confidence/comment (same `min`/`priority` aggregators ignored for disagreements).
     modelAccuracy(sent) {
-      if (!sent || sent.status !== 'done') return [];
       const perModel = sent.per_model || {};
       const modelNames = Object.keys(perModel);
       if (modelNames.length === 0) return [];
@@ -635,6 +635,18 @@ function annotator() {
       this.toast(`Added to ICL pool (v${this.state.icl_pool.version}, ${this.state.icl_pool.size} entries).`, 'ok');
     },
     // ----------- token editor -----------
     onTokenClick(ev, sidx, tidx) {
       if (ev.shiftKey) {

       return parts[parts.length - 1];
     },
+    // Per-model accuracy on a single sentence, ONLY shown after the user has
+    // confirmed the annotation as gold (sent.validated === true). Skips
+    // confidence/comment (same as disagreement counting).
     modelAccuracy(sent) {
+      if (!sent || sent.status !== 'done' || !sent.validated) return [];
       const perModel = sent.per_model || {};
       const modelNames = Object.keys(perModel);
       if (modelNames.length === 0) return [];
       this.toast(`Added to ICL pool (v${this.state.icl_pool.version}, ${this.state.icl_pool.size} entries).`, 'ok');
     },
+    async setValidated(sidx, value) {
+      const r = await fetch(`/api/sentence/${sidx}/validate`, {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ value }),
+      });
+      if (!r.ok) { this.toast('Could not toggle scoring.', 'error'); return; }
+      const sent = await r.json();
+      this.replaceSentence(sidx, sent);
+      this.toast(value ? '📊 Showing per-model accuracy vs your current annotation.' : 'Scores hidden.', 'ok');
+    },
     // ----------- token editor -----------
     onTokenClick(ev, sidx, tidx) {
       if (ev.shiftKey) {

static/index.html CHANGED Viewed

@@ -24,7 +24,7 @@
       }
     }
   </script>
-  <link rel="stylesheet" href="/static/styles.css?v=20260516c">
   <script defer src="https://cdn.jsdelivr.net/npm/alpinejs@3.13.10/dist/cdn.min.js"></script>
 </head>
 <body class="font-sans bg-ink-50 text-ink-900 min-h-screen" x-data="annotator()" x-init="init()" x-cloak>
@@ -239,21 +239,35 @@
               <span x-show="sent.status === 'done'">↻ Re-annotate</span>
               <span x-show="sent.status !== 'done'">▶ Annotate</span>
             </button>
             <button @click="addSentenceToIcl(sidx)" :disabled="sent.status !== 'done'"
                     class="btn btn-ghost text-xs"
                     :class="{'opacity-50': sent.status !== 'done'}"
-                    title="Add this sentence's corrected annotation to the ICL pool">
               📥 to ICL
             </button>
           </header>
-          <!-- Per-model accuracy strip (vs current consensus / your corrections) -->
           <div class="px-4 pt-2 -mb-1 flex flex-wrap items-center gap-1.5"
-               x-show="sent.status === 'done' && Object.keys(sent.per_model || {}).length > 0">
-            <span class="text-[10px] uppercase tracking-wider text-ink-500 font-semibold mr-1" title="Per-model agreement with the current consensus (updates as you correct)">vs consensus</span>
             <template x-for="m in modelAccuracy(sent)" :key="m.model">
               <span class="accuracy-pill" :class="accuracyClass(m.pct)"
-                    :title="`${m.model}: ${m.correct}/${m.total} task-meaningful fields match`">
                 <span class="font-mono opacity-80" x-text="modelShort(m.model)"></span>
                 <strong x-text="m.pct + '%'"></strong>
               </span>
@@ -888,6 +902,6 @@
     </template>
   </div>
-  <script src="/static/app.js?v=20260516c"></script>
 </body>
 </html>

       }
     }
   </script>
+  <link rel="stylesheet" href="/static/styles.css?v=20260516e">
   <script defer src="https://cdn.jsdelivr.net/npm/alpinejs@3.13.10/dist/cdn.min.js"></script>
 </head>
 <body class="font-sans bg-ink-50 text-ink-900 min-h-screen" x-data="annotator()" x-init="init()" x-cloak>
               <span x-show="sent.status === 'done'">↻ Re-annotate</span>
               <span x-show="sent.status !== 'done'">▶ Annotate</span>
             </button>
+            <!-- Score toggle — reveals per-model accuracy vs current annotation. Purely display. -->
+            <button x-show="sent.status === 'done' && !sent.validated"
+                    @click="setValidated(sidx, true)"
+                    class="btn btn-secondary text-xs"
+                    title="Compute per-model accuracy against your current annotation. Doesn't affect export.">
+              📊 Score
+            </button>
+            <span x-show="sent.validated"
+                  class="inline-flex items-center gap-1 text-xs text-accent-700 font-medium px-2 py-1 rounded bg-accent-50 border border-accent-200 cursor-pointer"
+                  @click="setValidated(sidx, false)"
+                  title="Click to hide the scores">
+              📊 Scored
+            </span>
             <button @click="addSentenceToIcl(sidx)" :disabled="sent.status !== 'done'"
                     class="btn btn-ghost text-xs"
                     :class="{'opacity-50': sent.status !== 'done'}"
+                    title="Add this annotation to the ICL pool as a few-shot example (also reveals scores).">
               📥 to ICL
             </button>
           </header>
+          <!-- Per-model accuracy strip — purely informational, gated behind the Score toggle -->
           <div class="px-4 pt-2 -mb-1 flex flex-wrap items-center gap-1.5"
+               x-show="sent.validated && Object.keys(sent.per_model || {}).length > 0">
+            <span class="text-[10px] uppercase tracking-wider text-accent-700 font-semibold mr-1"
+                  title="% of task-meaningful fields each model got right, compared to your current annotation">match · your version</span>
             <template x-for="m in modelAccuracy(sent)" :key="m.model">
               <span class="accuracy-pill" :class="accuracyClass(m.pct)"
+                    :title="`${m.model}: ${m.correct}/${m.total} fields match your current annotation`">
                 <span class="font-mono opacity-80" x-text="modelShort(m.model)"></span>
                 <strong x-text="m.pct + '%'"></strong>
               </span>
     </template>
   </div>
+  <script src="/static/app.js?v=20260516e"></script>
 </body>
 </html>