county commited on
Commit
fdc1e03
·
1 Parent(s): caf6918

Align cross-judge layout and remove full-set label

Browse files
Files changed (3) hide show
  1. index.html +8 -12
  2. script.js +2 -13
  3. styles.css +17 -5
index.html CHANGED
@@ -204,16 +204,16 @@
204
  robustness and expose judge-specific gaps.
205
  </p>
206
  </div>
 
 
 
 
 
 
 
 
207
  <div class="analysis-grid">
208
  <div class="analysis-main">
209
- <div class="text-content result-copy">
210
- <p>
211
- Across both judges, <strong>WildToM-Reasoner</strong> ranks first. On Gemini
212
- common-exam grading, its lead widens, while rank correlation remains high
213
- (<strong>Spearman &rho; = 0.867</strong>), indicating that the OE ranking trend is
214
- robust to judge replacement.
215
- </p>
216
- </div>
217
  <section class="cross-judge-card" aria-label="Cross-judge model ranking comparison">
218
  <div class="cross-toolbar">
219
  <div class="toolbar-row">
@@ -224,10 +224,6 @@
224
  <span class="toolbar-label">Judge</span>
225
  <div class="toggle-group" id="judge-toggle"></div>
226
  </div>
227
- <div class="toolbar-row">
228
- <span class="toolbar-label">Metric</span>
229
- <div class="toggle-group metric" id="metric-toggle"></div>
230
- </div>
231
  </div>
232
  <div class="results-caption" id="results-caption"></div>
233
  <div class="results-panel" id="results-table"></div>
 
204
  robustness and expose judge-specific gaps.
205
  </p>
206
  </div>
207
+ <div class="text-content result-copy result-copy-wide">
208
+ <p>
209
+ Across both judges, <strong>WildToM-Reasoner</strong> ranks first. On Gemini grading,
210
+ its lead widens, while rank correlation remains high
211
+ (<strong>Spearman &rho; = 0.867</strong>), indicating that the OE ranking trend is
212
+ robust to judge replacement.
213
+ </p>
214
+ </div>
215
  <div class="analysis-grid">
216
  <div class="analysis-main">
 
 
 
 
 
 
 
 
217
  <section class="cross-judge-card" aria-label="Cross-judge model ranking comparison">
218
  <div class="cross-toolbar">
219
  <div class="toolbar-row">
 
224
  <span class="toolbar-label">Judge</span>
225
  <div class="toggle-group" id="judge-toggle"></div>
226
  </div>
 
 
 
 
227
  </div>
228
  <div class="results-caption" id="results-caption"></div>
229
  <div class="results-panel" id="results-table"></div>
script.js CHANGED
@@ -161,7 +161,7 @@ const filters = ["Belief", "Desire", "Intention", "Emotion", "Knowledge"];
161
 
162
  const judgeResults = {
163
  gpt4o: {
164
- label: "GPT-4o (full set)",
165
  deltaLabel: "Gemini",
166
  rows: [
167
  { model: "WildToM-Reasoner", rank: 1, oeAcc: 32.7, oeScr: 2.6, isOurs: true },
@@ -211,7 +211,6 @@ const showcaseRoot = document.getElementById("showcase-browser");
211
  const resultsRoot = document.getElementById("results-table");
212
  const taskToggleRoot = document.getElementById("task-toggle");
213
  const judgeToggleRoot = document.getElementById("judge-toggle");
214
- const metricToggleRoot = document.getElementById("metric-toggle");
215
  const resultsCaptionRoot = document.getElementById("results-caption");
216
  const carouselState = {
217
  activeDimension: "Belief",
@@ -385,7 +384,6 @@ function renderControls() {
385
  return;
386
  }
387
  const judgeToggleRow = judgeToggleRoot.closest(".toolbar-row");
388
- const metricToggleRow = metricToggleRoot ? metricToggleRoot.closest(".toolbar-row") : null;
389
 
390
  const taskItems = [
391
  { key: "oe", label: "OE" },
@@ -404,21 +402,15 @@ function renderControls() {
404
  });
405
 
406
  judgeToggleRoot.innerHTML = "";
407
- if (metricToggleRoot) {
408
- metricToggleRoot.innerHTML = "";
409
- }
410
 
411
  if (crossJudgeState.activeTask === "oe") {
412
  judgeToggleRoot.classList.remove("is-hidden");
413
  if (judgeToggleRow) {
414
  judgeToggleRow.classList.remove("is-hidden");
415
  }
416
- if (metricToggleRow) {
417
- metricToggleRow.classList.add("is-hidden");
418
- }
419
 
420
  const judgeItems = [
421
- { key: "gpt4o", label: "GPT-4o (full set)" },
422
  { key: "gemini", label: "Gemini-2.5-Flash" }
423
  ];
424
  judgeItems.forEach((item) => {
@@ -434,9 +426,6 @@ function renderControls() {
434
  if (judgeToggleRow) {
435
  judgeToggleRow.classList.add("is-hidden");
436
  }
437
- if (metricToggleRow) {
438
- metricToggleRow.classList.add("is-hidden");
439
- }
440
  }
441
 
442
  function renderOeResults(activeJudge) {
 
161
 
162
  const judgeResults = {
163
  gpt4o: {
164
+ label: "GPT-4o",
165
  deltaLabel: "Gemini",
166
  rows: [
167
  { model: "WildToM-Reasoner", rank: 1, oeAcc: 32.7, oeScr: 2.6, isOurs: true },
 
211
  const resultsRoot = document.getElementById("results-table");
212
  const taskToggleRoot = document.getElementById("task-toggle");
213
  const judgeToggleRoot = document.getElementById("judge-toggle");
 
214
  const resultsCaptionRoot = document.getElementById("results-caption");
215
  const carouselState = {
216
  activeDimension: "Belief",
 
384
  return;
385
  }
386
  const judgeToggleRow = judgeToggleRoot.closest(".toolbar-row");
 
387
 
388
  const taskItems = [
389
  { key: "oe", label: "OE" },
 
402
  });
403
 
404
  judgeToggleRoot.innerHTML = "";
 
 
 
405
 
406
  if (crossJudgeState.activeTask === "oe") {
407
  judgeToggleRoot.classList.remove("is-hidden");
408
  if (judgeToggleRow) {
409
  judgeToggleRow.classList.remove("is-hidden");
410
  }
 
 
 
411
 
412
  const judgeItems = [
413
+ { key: "gpt4o", label: "GPT-4o" },
414
  { key: "gemini", label: "Gemini-2.5-Flash" }
415
  ];
416
  judgeItems.forEach((item) => {
 
426
  if (judgeToggleRow) {
427
  judgeToggleRow.classList.add("is-hidden");
428
  }
 
 
 
429
  }
430
 
431
  function renderOeResults(activeJudge) {
styles.css CHANGED
@@ -358,13 +358,14 @@ code {
358
  display: grid;
359
  grid-template-columns: minmax(0, 1fr) minmax(0, 1fr);
360
  gap: 14px;
361
- align-items: start;
362
  }
363
 
364
  .stats-left-stack {
365
  display: grid;
366
  gap: 10px;
367
  align-content: start;
 
368
  }
369
 
370
  .distribution-figure {
@@ -383,6 +384,7 @@ code {
383
  .table-stack {
384
  display: grid;
385
  gap: 10px;
 
386
  }
387
 
388
  .dimension-grid {
@@ -416,6 +418,8 @@ code {
416
  display: grid;
417
  grid-template-columns: repeat(2, minmax(0, 1fr));
418
  gap: 10px;
 
 
419
  }
420
 
421
  .table-card {
@@ -423,6 +427,7 @@ code {
423
  border-radius: 12px;
424
  background: #ffffff;
425
  padding: 12px 12px 8px;
 
426
  }
427
 
428
  .table-card h3 {
@@ -463,19 +468,16 @@ code {
463
  display: grid;
464
  grid-template-columns: minmax(0, 1.15fr) minmax(320px, 0.85fr);
465
  gap: 16px;
466
- align-items: stretch;
467
  }
468
 
469
  .analysis-main {
470
- display: grid;
471
- grid-template-rows: auto 1fr;
472
  min-height: 100%;
473
  }
474
 
475
  .analysis-side {
476
  display: grid;
477
  gap: 12px;
478
- margin-top: 92px;
479
  }
480
 
481
  .text-content {
@@ -490,6 +492,16 @@ code {
490
  margin-bottom: 12px;
491
  }
492
 
 
 
 
 
 
 
 
 
 
 
493
  .cross-judge-card {
494
  border: 1px solid var(--line);
495
  border-radius: 14px;
 
358
  display: grid;
359
  grid-template-columns: minmax(0, 1fr) minmax(0, 1fr);
360
  gap: 14px;
361
+ align-items: stretch;
362
  }
363
 
364
  .stats-left-stack {
365
  display: grid;
366
  gap: 10px;
367
  align-content: start;
368
+ height: 100%;
369
  }
370
 
371
  .distribution-figure {
 
384
  .table-stack {
385
  display: grid;
386
  gap: 10px;
387
+ height: 100%;
388
  }
389
 
390
  .dimension-grid {
 
418
  display: grid;
419
  grid-template-columns: repeat(2, minmax(0, 1fr));
420
  gap: 10px;
421
+ height: 100%;
422
+ align-items: stretch;
423
  }
424
 
425
  .table-card {
 
427
  border-radius: 12px;
428
  background: #ffffff;
429
  padding: 12px 12px 8px;
430
+ height: 100%;
431
  }
432
 
433
  .table-card h3 {
 
468
  display: grid;
469
  grid-template-columns: minmax(0, 1.15fr) minmax(320px, 0.85fr);
470
  gap: 16px;
471
+ align-items: start;
472
  }
473
 
474
  .analysis-main {
 
 
475
  min-height: 100%;
476
  }
477
 
478
  .analysis-side {
479
  display: grid;
480
  gap: 12px;
 
481
  }
482
 
483
  .text-content {
 
492
  margin-bottom: 12px;
493
  }
494
 
495
+ .result-copy-wide {
496
+ margin-bottom: 14px;
497
+ }
498
+
499
+ .result-copy-wide p {
500
+ text-align: justify;
501
+ text-justify: inter-word;
502
+ max-width: none;
503
+ }
504
+
505
  .cross-judge-card {
506
  border: 1px solid var(--line);
507
  border-radius: 14px;