Esvanth commited on
Commit
818583c
Β·
verified Β·
1 Parent(s): 528b24d

Update index.html

Browse files
Files changed (1) hide show
  1. templates/index.html +656 -22
templates/index.html CHANGED
@@ -73,6 +73,32 @@ header{
73
  .sec-lead{font-size:14px;color:var(--ink2);max-width:560px;line-height:1.7;margin-bottom:36px}
74
  .section-divider{border:none;border-top:1px solid var(--border);margin:0}
75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  /* ── METHODOLOGY 3-STEP ── */
77
  .method-steps{display:grid;grid-template-columns:repeat(3,1fr);gap:0;position:relative}
78
  .method-steps::before{content:'';position:absolute;top:22px;left:22px;right:22px;height:2px;background:var(--bg3);z-index:0}
@@ -82,8 +108,6 @@ header{
82
  .method-step.active .ms-dot{background:var(--blue);border-color:var(--blue)}
83
  .ms-title{font-size:14px;font-weight:500;color:var(--ink);margin-bottom:7px}
84
  .ms-body{font-size:12px;color:var(--ink2);line-height:1.65}
85
- .ms-crisp{display:flex;flex-wrap:wrap;justify-content:center;gap:4px;margin-top:10px}
86
- .ms-crisp-tag{font-size:9px;font-family:'DM Mono',monospace;padding:2px 7px;border-radius:3px;background:var(--bg2);border:1px solid var(--border);color:var(--ink3);letter-spacing:.05em}
87
  /* Detail panel */
88
  .method-detail{margin-top:32px;background:#fff;border:1px solid var(--border);border-radius:14px;padding:28px 32px;box-shadow:var(--shadow);animation:fadeUp .25s ease both}
89
  .md-panel{display:none}
@@ -220,6 +244,52 @@ textarea::placeholder{color:var(--ink3)}
220
  .ci-title{font-size:13px;font-weight:500;color:var(--amber);margin-bottom:5px;display:flex;align-items:center;gap:7px}
221
  .ci-body{font-size:12px;color:#92400e;line-height:1.65}
222
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
  footer{text-align:center;padding:28px 48px;border-top:1px solid var(--border);font-size:11px;font-family:'DM Mono',monospace;color:var(--ink3);line-height:1.9}
224
  @media(max-width:768px){
225
  header,.section,.demo-section,footer{padding-left:20px;padding-right:20px}
@@ -245,11 +315,13 @@ footer{text-align:center;padding:28px 48px;border-top:1px solid var(--border);fo
245
  <div class="logo-txt">Mind<em>Scan</em></div>
246
  </div>
247
  <nav class="nav-links">
 
248
  <a href="#methodology">Methodology</a>
249
  <a href="#matrix">Evidence Matrix</a>
250
  <a href="#findings">Findings</a>
251
  <a href="#verdict">Conclusions</a>
252
  <a href="#demo">Live Demo</a>
 
253
  </nav>
254
  <div class="nav-badge">NCI H9DAI 2026</div>
255
  </header>
@@ -275,38 +347,78 @@ footer{text-align:center;padding:28px 48px;border-top:1px solid var(--border);fo
275
  <div class="stats-panel">
276
  <div class="stat-box"><div class="stat-num" data-target="3" data-suffix="">0</div><div class="stat-lbl">Datasets</div></div>
277
  <div class="stat-box"><div class="stat-num" data-target="12" data-suffix="">0</div><div class="stat-lbl">Models trained</div></div>
278
- <div class="stat-box"><div class="stat-num" data-target="99.9" data-suffix="%" data-dec="1">0</div><div class="stat-lbl">Best Accuracy</div></div>
279
- <div class="stat-box"><div class="stat-num" data-target="98.1" data-suffix="%" data-dec="1">0</div><div class="stat-lbl">D3 Best Accuracy</div></div>
280
  </div>
281
  </div>
282
  </div>
283
 
284
  <hr class="section-divider">
285
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
  <!-- METHODOLOGY -->
287
  <section class="section" id="methodology">
288
  <div class="sec-eyebrow">Methodology</div>
289
  <div class="sec-h2">Three-step <em>pipeline</em></div>
290
- <p class="sec-lead">CRISP-DM applied across all three datasets β€” from raw social media text to parallel ensemble predictions.</p>
291
 
292
  <div class="method-steps">
293
  <div class="method-step active" onclick="showMethodDetail(0)">
294
  <div class="ms-dot">01</div>
295
  <div class="ms-title">Data</div>
296
  <div class="ms-body">3 clinical datasets spanning Twitter and Reddit, covering depression types, binary detection, and suicide risk.</div>
297
- <div class="ms-crisp"><span class="ms-crisp-tag">CRISP-DM 1: Business Understanding</span><span class="ms-crisp-tag">CRISP-DM 2: Data Understanding</span></div>
298
  </div>
299
  <div class="method-step" onclick="showMethodDetail(1)">
300
  <div class="ms-dot">02</div>
301
  <div class="ms-title">Preprocessing</div>
302
  <div class="ms-body">6-stage text cleaning pipeline + SMOTE oversampling to address class imbalance left unresolved by the base paper.</div>
303
- <div class="ms-crisp"><span class="ms-crisp-tag">CRISP-DM 3: Data Preparation</span></div>
304
  </div>
305
  <div class="method-step" onclick="showMethodDetail(2)">
306
  <div class="ms-dot">03</div>
307
  <div class="ms-title">Modelling</div>
308
  <div class="ms-body">Parallel ensemble of 12 classifiers β€” all run independently on every prediction, never as a sequential cascade.</div>
309
- <div class="ms-crisp"><span class="ms-crisp-tag">CRISP-DM 4: Modelling</span><span class="ms-crisp-tag">CRISP-DM 5: Evaluation</span><span class="ms-crisp-tag">CRISP-DM 6: Deployment</span></div>
310
  </div>
311
  </div>
312
 
@@ -338,19 +450,19 @@ footer{text-align:center;padding:28px 48px;border-top:1px solid var(--border);fo
338
  <div class="md-title"><div class="md-title-dot"></div>Preprocessing Pipeline</div>
339
  <div class="md-grid">
340
  <div class="md-block">
341
- <div class="md-block-lbl">6-Stage Text Cleaning</div>
342
  <div class="md-block-val">1. Lowercase Β· 2. Strip URLs &amp; http links Β· 3. Remove @mentions Β· 4. Remove # symbols Β· 5. Strip punctuation Β· 6. Collapse whitespace. Applied identically across all three datasets for consistency.</div>
343
  </div>
344
  <div class="md-block">
345
- <div class="md-block-lbl">SMOTE β€” Synthetic Oversampling</div>
346
  <div class="md-block-val">Applied to D1 and D2 training sets only (D3 is pre-balanced). D1: 11,986 β†’ <em>17,982 samples</em>. D2: 8,251 β†’ <em>12,800 samples</em>. Creates synthetic clinical neighbours in TF-IDF feature space. Directly addresses the base paper's (Tumaliuan 2024) biggest limitation β€” they trained on raw imbalanced data.</div>
347
  </div>
348
  <div class="md-block">
349
- <div class="md-block-lbl">Feature Extraction β€” Classical Models</div>
350
- <div class="md-block-val">TF-IDF vectoriser with unigrams + bigrams, fitted per-dataset on training data only. Captures frequency-weighted term co-occurrence patterns, well-suited for short Twitter text.</div>
351
  </div>
352
  <div class="md-block">
353
- <div class="md-block-lbl">Feature Extraction β€” Transformers</div>
354
  <div class="md-block-val">XLM-RoBERTa tokeniser (max 128 tokens D1/D2, 256 tokens D3) with padding. Pre-trained multilingual contextual embeddings capture semantic meaning and long-range dependencies β€” critical for Reddit's longer posts.</div>
355
  </div>
356
  </div>
@@ -361,10 +473,10 @@ footer{text-align:center;padding:28px 48px;border-top:1px solid var(--border);fo
361
  <div class="md-grid">
362
  <div class="md-block">
363
  <div class="md-block-lbl">4 Models per Dataset (12 total)</div>
364
- <div class="md-block-val"><em>Logistic Regression</em> β€” L2 regularised, max_iter=1000. <em>SVM</em> β€” LinearSVC, C=1.0. <em>XGBoost</em> β€” 300 estimators, max_depth=6. <em>XLM-RoBERTa</em> β€” fine-tuned multilingual transformer, <em>278M parameters</em>, lr=2e-5, 3 epochs.</div>
365
  </div>
366
  <div class="md-block">
367
- <div class="md-block-lbl">Parallel Architecture β€” Clinical Rationale</div>
368
  <div class="md-block-val">All 12 models run simultaneously on every input. A sequential design (check depression first, then suicide risk) would <strong>miss masked suicidality</strong> β€” a clinically documented pre-crisis pattern where affect appears normal but intent is resolved. Parallelism is a safety requirement, not a design preference.</div>
369
  </div>
370
  <div class="md-block">
@@ -372,7 +484,7 @@ footer{text-align:center;padding:28px 48px;border-top:1px solid var(--border);fo
372
  <div class="md-block-val">XGBoost accuracy on D3: <em>91.6% (50K sample) β†’ 70.5% (Full 232K) β†’ 60.1% (H1 116K)</em>. Performance degrades as training data grows. The H1/H2 results are also inconsistent (60.1% vs 71.0%) β€” gradient boosting is highly sensitive to data distribution shifts at this scale, making it unreliable for large Reddit corpora.</div>
373
  </div>
374
  <div class="md-block">
375
- <div class="md-block-lbl">D3 Split Study (RQ2)</div>
376
  <div class="md-block-val">D3 trained on 4 configurations: Full (232K), Half 1 (116K), Half 2 (116K), Sample (50K). XLM-RoBERTa accuracy: <em>98.1% (50K) β†’ 97.8% (H1) β†’ 98.0% (H2/Full)</em>. Ξ” = 0.3% across 4Γ— more data. Kolmogorov-Smirnov tests confirm all splits share identical distributions (p &gt; 0.49), validating the comparison.</div>
377
  </div>
378
  </div>
@@ -386,17 +498,17 @@ footer{text-align:center;padding:28px 48px;border-top:1px solid var(--border);fo
386
  <section class="section" id="matrix">
387
  <div class="sec-eyebrow">Core evaluation</div>
388
  <div class="sec-h2">Accuracy <em>Evidence Matrix</em></div>
389
- <p class="sec-lead">All 4 models evaluated across all dataset splits. <strong>Bold</strong> = winner per row. <span style="color:var(--red)">Red</span> = XGBoost collapse on larger training sets.</p>
390
 
391
  <div class="matrix-wrap">
392
  <table class="matrix-tbl">
393
  <thead>
394
  <tr>
395
  <th>Dataset / Split</th>
396
- <th>Logistic Regression</th>
397
- <th>SVM</th>
398
- <th>XGBoost</th>
399
- <th>XLM-RoBERTa</th>
400
  </tr>
401
  </thead>
402
  <tbody>
@@ -532,7 +644,8 @@ footer{text-align:center;padding:28px 48px;border-top:1px solid var(--border);fo
532
  <div class="demo-section" id="demo">
533
  <div class="sec-eyebrow">Live inference</div>
534
  <div class="sec-h2" style="margin-bottom:8px">Try it β€” <em>winner model per task</em></div>
535
- <p class="sec-lead" style="margin-bottom:24px">Sample 3 demonstrates masked suicidality. Try typing clinical-style depressive language ("I feel exhausted, nothing feels enjoyable") to observe the Affective vs. Clinical Lexicon Gap documented in Finding 04.</p>
 
536
 
537
  <div class="disclaimer"><strong>Research prototype only.</strong> Not a clinical tool. If you or someone you know is in crisis, please contact a mental health professional or emergency services immediately.</div>
538
 
@@ -594,6 +707,107 @@ footer{text-align:center;padding:28px 48px;border-top:1px solid var(--border);fo
594
  </div>
595
  </div>
596
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
597
  <footer>
598
  MindScan Β· NCI H9DAI Research Project 2026 Β· Academic Prototype Only<br>
599
  Datasets: Zenodo 14233292 Β· Kaggle albertobellardini Β· Kaggle nikhileswarkomati<br>
@@ -761,6 +975,426 @@ function setW(id,res){
761
  }
762
 
763
  function pct(v){return(v*100).toFixed(1)+'%'}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
764
  </script>
765
  </body>
766
  </html>
 
73
  .sec-lead{font-size:14px;color:var(--ink2);max-width:560px;line-height:1.7;margin-bottom:36px}
74
  .section-divider{border:none;border-top:1px solid var(--border);margin:0}
75
 
76
+ /* ── BASE PAPER COMPARISON ── */
77
+ .comparison-wrap{display:grid;grid-template-columns:1fr auto 1fr;gap:16px;align-items:center}
78
+ .comp-card{border-radius:14px;padding:26px;border:1px solid;box-shadow:var(--shadow)}
79
+ .comp-card.theirs{background:var(--bg2);border-color:var(--border2)}
80
+ .comp-card.ours{background:#fff;border-color:rgba(21,128,61,.25);box-shadow:var(--shadow-md)}
81
+ .comp-label{font-size:10px;font-family:'DM Mono',monospace;letter-spacing:.1em;text-transform:uppercase;margin-bottom:12px;padding:4px 10px;border-radius:4px;display:inline-block}
82
+ .comp-card.theirs .comp-label{background:var(--bg3);color:var(--ink3)}
83
+ .comp-card.ours .comp-label{background:var(--green-bg);color:var(--green)}
84
+ .comp-title{font-family:'Instrument Serif',serif;font-size:18px;letter-spacing:-.01em;color:var(--ink);margin-bottom:4px}
85
+ .comp-sub{font-size:12px;color:var(--ink2);margin-bottom:18px}
86
+ .comp-row{display:flex;align-items:flex-start;gap:8px;margin-bottom:9px;font-size:13px}
87
+ .comp-icon{width:16px;height:16px;border-radius:50%;display:flex;align-items:center;justify-content:center;font-size:9px;flex-shrink:0;margin-top:1px}
88
+ .comp-icon.bad{background:rgba(185,28,28,.1);color:var(--red)}
89
+ .comp-icon.good{background:var(--green-bg);color:var(--green)}
90
+ .comp-text{color:var(--ink2);line-height:1.45}
91
+ .comp-text strong{color:var(--ink)}
92
+ .comp-f1-row{margin-top:18px;padding-top:14px;border-top:1px solid var(--border);display:flex;align-items:center;gap:10px}
93
+ .comp-f1-label{font-size:11px;font-family:'DM Mono',monospace;color:var(--ink3)}
94
+ .comp-f1-val{font-family:'Instrument Serif',serif;font-size:24px;letter-spacing:-.02em}
95
+ .comp-card.theirs .comp-f1-val{color:var(--ink3)}
96
+ .comp-card.ours .comp-f1-val{color:var(--green)}
97
+ .comp-middle{text-align:center;padding:16px 12px}
98
+ .comp-arrow{font-size:24px;color:var(--green);margin-bottom:6px}
99
+ .comp-delta{font-family:'Instrument Serif',serif;font-size:28px;color:var(--green);letter-spacing:-.02em}
100
+ .comp-delta-lbl{font-size:10px;font-family:'DM Mono',monospace;color:var(--ink3);margin-top:2px;text-transform:uppercase}
101
+
102
  /* ── METHODOLOGY 3-STEP ── */
103
  .method-steps{display:grid;grid-template-columns:repeat(3,1fr);gap:0;position:relative}
104
  .method-steps::before{content:'';position:absolute;top:22px;left:22px;right:22px;height:2px;background:var(--bg3);z-index:0}
 
108
  .method-step.active .ms-dot{background:var(--blue);border-color:var(--blue)}
109
  .ms-title{font-size:14px;font-weight:500;color:var(--ink);margin-bottom:7px}
110
  .ms-body{font-size:12px;color:var(--ink2);line-height:1.65}
 
 
111
  /* Detail panel */
112
  .method-detail{margin-top:32px;background:#fff;border:1px solid var(--border);border-radius:14px;padding:28px 32px;box-shadow:var(--shadow);animation:fadeUp .25s ease both}
113
  .md-panel{display:none}
 
244
  .ci-title{font-size:13px;font-weight:500;color:var(--amber);margin-bottom:5px;display:flex;align-items:center;gap:7px}
245
  .ci-body{font-size:12px;color:#92400e;line-height:1.65}
246
 
247
+ /* ── GLOSSARY TOOLTIPS ── */
248
+ .gloss{border-bottom:1px dashed var(--ink3);cursor:help;position:relative;display:inline}
249
+ .gloss::after{content:attr(data-tip);position:absolute;bottom:calc(100% + 8px);left:50%;transform:translateX(-50%);background:var(--ink);color:#fff;font-size:11.5px;padding:8px 12px;border-radius:8px;width:230px;white-space:normal;line-height:1.5;font-family:'Geist',sans-serif;letter-spacing:0;text-align:left;opacity:0;pointer-events:none;transition:opacity .15s;z-index:300;box-shadow:0 4px 16px rgba(0,0,0,.18)}
250
+ .gloss::before{content:'';position:absolute;bottom:calc(100% + 2px);left:50%;transform:translateX(-50%);border:5px solid transparent;border-top-color:var(--ink);opacity:0;transition:opacity .15s;z-index:301}
251
+ .gloss:hover::after,.gloss:hover::before{opacity:1}
252
+
253
+ /* ── CODE MODAL ── */
254
+ .cm-term{border-bottom:1px dashed var(--blue);color:var(--ink);cursor:pointer;display:inline-flex;align-items:center;gap:5px;transition:color .15s}
255
+ .cm-term:hover{color:var(--blue)}
256
+ .cm-term::after{content:'</>';font-family:'DM Mono',monospace;font-size:9px;color:var(--blue);opacity:.7;letter-spacing:-.03em}
257
+ .cm-overlay{position:fixed;inset:0;background:rgba(26,24,22,.45);z-index:500;display:none;align-items:center;justify-content:center;padding:20px;backdrop-filter:blur(3px)}
258
+ .cm-overlay.open{display:flex}
259
+ .cm-box{background:#fff;border-radius:16px;width:100%;max-width:680px;max-height:88vh;display:flex;flex-direction:column;box-shadow:0 24px 80px rgba(0,0,0,.18);overflow:hidden}
260
+ .cm-head{padding:20px 24px 0;display:flex;align-items:flex-start;justify-content:space-between;gap:16px}
261
+ .cm-title{font-family:'Instrument Serif',serif;font-size:22px;letter-spacing:-.02em;color:var(--ink)}
262
+ .cm-close{width:28px;height:28px;border-radius:50%;border:1px solid var(--border);background:var(--bg2);cursor:pointer;font-size:14px;display:flex;align-items:center;justify-content:center;flex-shrink:0;color:var(--ink2)}
263
+ .cm-close:hover{background:var(--bg3)}
264
+ .cm-tabs{display:flex;gap:2px;padding:12px 24px 0;border-bottom:1px solid var(--border)}
265
+ .cm-tab{font-size:11px;font-family:'DM Mono',monospace;padding:6px 14px;border-radius:6px 6px 0 0;cursor:pointer;border:1px solid transparent;border-bottom:none;color:var(--ink2);margin-bottom:-1px;background:none}
266
+ .cm-tab.active{background:#fff;border-color:var(--border);color:var(--ink)}
267
+ .cm-body{overflow-y:auto;padding:20px 24px 24px}
268
+ .cm-panel{display:none}
269
+ .cm-panel.active{display:block}
270
+ .cm-pre{background:var(--ink);color:#e8e4dc;font-family:'DM Mono',monospace;font-size:12px;line-height:1.7;padding:16px 18px;border-radius:10px;overflow-x:auto;white-space:pre;margin-bottom:10px}
271
+ .cm-src{font-size:11px;color:var(--ink3);font-family:'DM Mono',monospace;margin-top:6px}
272
+ .cm-why-body{font-size:13.5px;color:var(--ink2);line-height:1.8}
273
+ .cm-why-body strong{color:var(--ink)}
274
+ .cm-out-row{display:flex;align-items:flex-start;gap:12px;padding:10px 14px;background:var(--bg2);border-radius:8px;margin-bottom:8px}
275
+ .cm-out-lbl{font-size:10px;font-family:'DM Mono',monospace;color:var(--ink3);min-width:28px;text-transform:uppercase;margin-top:2px}
276
+ .cm-out-val{font-size:13px;color:var(--ink);line-height:1.5}
277
+ .cm-out-val em{font-family:'DM Mono',monospace;font-size:11.5px;color:var(--blue)}
278
+
279
+ /* ── FAQ ACCORDION ── */
280
+ .faq-section{max-width:1040px;margin:0 auto;padding:64px 48px}
281
+ .faq-group{margin-bottom:32px}
282
+ .faq-group-title{font-size:10px;font-family:'DM Mono',monospace;letter-spacing:.12em;text-transform:uppercase;color:var(--blue);margin-bottom:12px}
283
+ .faq-item{border:1px solid var(--border);border-radius:10px;margin-bottom:6px;overflow:hidden;background:#fff}
284
+ .faq-q{width:100%;text-align:left;background:none;border:none;padding:14px 18px;font-size:13px;font-family:'Geist',sans-serif;color:var(--ink);cursor:pointer;display:flex;justify-content:space-between;align-items:center;gap:16px;line-height:1.45}
285
+ .faq-q:hover{background:var(--bg2)}
286
+ .faq-q .faq-chevron{font-size:10px;color:var(--ink3);flex-shrink:0;transition:transform .2s}
287
+ .faq-item.open .faq-chevron{transform:rotate(180deg)}
288
+ .faq-a{max-height:0;overflow:hidden;transition:max-height .25s ease}
289
+ .faq-item.open .faq-a{max-height:500px}
290
+ .faq-a-inner{padding:0 18px 14px;font-size:12.5px;color:var(--ink2);line-height:1.75;border-top:1px solid var(--border)}
291
+ .faq-a-inner code{font-family:'DM Mono',monospace;font-size:11px;background:var(--bg2);padding:1px 5px;border-radius:3px;color:var(--ink)}
292
+
293
  footer{text-align:center;padding:28px 48px;border-top:1px solid var(--border);font-size:11px;font-family:'DM Mono',monospace;color:var(--ink3);line-height:1.9}
294
  @media(max-width:768px){
295
  header,.section,.demo-section,footer{padding-left:20px;padding-right:20px}
 
315
  <div class="logo-txt">Mind<em>Scan</em></div>
316
  </div>
317
  <nav class="nav-links">
318
+ <a href="#comparison">vs Base Paper</a>
319
  <a href="#methodology">Methodology</a>
320
  <a href="#matrix">Evidence Matrix</a>
321
  <a href="#findings">Findings</a>
322
  <a href="#verdict">Conclusions</a>
323
  <a href="#demo">Live Demo</a>
324
+ <a href="#faq">FAQ</a>
325
  </nav>
326
  <div class="nav-badge">NCI H9DAI 2026</div>
327
  </header>
 
347
  <div class="stats-panel">
348
  <div class="stat-box"><div class="stat-num" data-target="3" data-suffix="">0</div><div class="stat-lbl">Datasets</div></div>
349
  <div class="stat-box"><div class="stat-num" data-target="12" data-suffix="">0</div><div class="stat-lbl">Models trained</div></div>
350
+ <div class="stat-box"><div class="stat-num" data-target="98.1" data-suffix="%" data-dec="1">0</div><div class="stat-lbl">D3 Accuracy (4-class)</div></div>
351
+ <div class="stat-box"><div class="stat-num" data-target="12.7" data-suffix="%" data-dec="1">0</div><div class="stat-lbl">vs Base Paper ↑</div></div>
352
  </div>
353
  </div>
354
  </div>
355
 
356
  <hr class="section-divider">
357
 
358
+ <!-- BASE PAPER COMPARISON -->
359
+ <section class="section" id="comparison">
360
+ <div class="sec-eyebrow">Extending prior work</div>
361
+ <div class="sec-h2">Our work vs <em>Tumaliuan et al. (2024)</em></div>
362
+ <p class="sec-lead">Dataset 1 is structurally equivalent to the base paper's Filipino Twitter corpus β€” same 6-class task, same clinical annotation method β€” making a direct F1 comparison valid.</p>
363
+
364
+ <div class="comparison-wrap">
365
+ <div class="comp-card theirs">
366
+ <div class="comp-label">Tumaliuan et al. β€” 2024</div>
367
+ <div class="comp-title">Filipino Twitter Depression</div>
368
+ <div class="comp-sub">Frontiers in Computer Science Β· word2vec pipeline</div>
369
+ <div class="comp-row"><div class="comp-icon bad">βœ•</div><div class="comp-text">Used <strong><span class="gloss" data-tip="word2vec (2013): maps words to fixed vectors based on co-occurrence. Cannot understand negation ('not happy' β‰ˆ 'happy') or context. Superseded by transformers.">word2vec</span></strong> (2013) β€” static embeddings, no negation handling</div></div>
370
+ <div class="comp-row"><div class="comp-icon bad">βœ•</div><div class="comp-text"><strong><span class="gloss" data-tip="SVM (Support Vector Machine): finds the maximum-margin hyperplane separating classes. Very effective for high-dimensional text features like TF-IDF. Gold standard for NLP before transformers.">SVM never tested</span></strong> β€” absent from evaluation despite being NLP gold standard</div></div>
371
+ <div class="comp-row"><div class="comp-icon bad">βœ•</div><div class="comp-text"><strong><span class="gloss" data-tip="XGBoost: gradient-boosted decision trees. Sequentially builds trees to correct previous errors. Handles imbalanced data well and often beats random forests on tabular/sparse features.">XGBoost never tested</span></strong> β€” gradient boosting entirely absent</div></div>
372
+ <div class="comp-row"><div class="comp-icon bad">βœ•</div><div class="comp-text">Class imbalance listed as <strong>limitation β€” never resolved</strong></div></div>
373
+ <div class="comp-row"><div class="comp-icon bad">βœ•</div><div class="comp-text"><strong>Restricted dataset</strong> β€” requires author permission to access</div></div>
374
+ <div class="comp-row"><div class="comp-icon bad">βœ•</div><div class="comp-text"><strong>Accuracy not verified</strong> β€” no reproducible baseline reported</div></div>
375
+ <div class="comp-f1-row"><span class="comp-f1-label">Best Accuracy</span><span class="comp-f1-val">~81%</span></div>
376
+ </div>
377
+
378
+ <div class="comp-middle">
379
+ <div class="comp-arrow">β†’</div>
380
+ <div class="comp-delta">+11.4%</div>
381
+ <div class="comp-delta-lbl">accuracy gain (D1)</div>
382
+ </div>
383
+
384
+ <div class="comp-card ours">
385
+ <div class="comp-label">MindScan β€” 2026</div>
386
+ <div class="comp-title">English Twitter + Reddit</div>
387
+ <div class="comp-sub">Zenodo (Nusrat 2024) Β· XLM-RoBERTa + SVM + XGBoost</div>
388
+ <div class="comp-row"><div class="comp-icon good">βœ“</div><div class="comp-text"><strong><span class="gloss" data-tip="XLM-RoBERTa: 278M-parameter multilingual transformer. Fine-tuned on 100 languages. Produces contextual embeddings β€” the same word gets different vectors depending on surrounding context. Understands negation, irony, and long-range dependencies.">XLM-RoBERTa</span></strong> (2019) β€” contextual embeddings, understands negation</div></div>
389
+ <div class="comp-row"><div class="comp-icon good">βœ“</div><div class="comp-text"><strong>SVM added</strong> β€” best D1 accuracy 92.36%, beats transformer (90.52%)</div></div>
390
+ <div class="comp-row"><div class="comp-icon good">βœ“</div><div class="comp-text"><strong>XGBoost added</strong> β€” accuracy 91.76%, gradient boosting for imbalanced data</div></div>
391
+ <div class="comp-row"><div class="comp-icon good">βœ“</div><div class="comp-text"><span class="gloss" data-tip="SMOTE (Synthetic Minority Oversampling Technique): generates synthetic training samples for minority classes by interpolating between existing minority-class examples in feature space. Applied to training data only β€” never the test set.">SMOTE</span> applied β€” <strong>imbalance resolved</strong>, all 6 classes equalised to 2,997</div></div>
392
+ <div class="comp-row"><div class="comp-icon good">βœ“</div><div class="comp-text"><strong>Public dataset</strong> β€” fully reproducible, anyone can verify results</div></div>
393
+ <div class="comp-row"><div class="comp-icon good">βœ“</div><div class="comp-text"><strong>Accuracy verified</strong> on held-out 20% test set, same 6-class task</div></div>
394
+ <div class="comp-f1-row"><span class="comp-f1-label">Best Accuracy (D1 SVM)</span><span class="comp-f1-val">92.4%</span></div>
395
+ </div>
396
+ </div>
397
+ </section>
398
+
399
+ <hr class="section-divider">
400
+
401
  <!-- METHODOLOGY -->
402
  <section class="section" id="methodology">
403
  <div class="sec-eyebrow">Methodology</div>
404
  <div class="sec-h2">Three-step <em>pipeline</em></div>
405
+ <p class="sec-lead"><span class="gloss" data-tip="CRISP-DM: Cross-Industry Standard Process for Data Mining. 6 phases: Business Understanding β†’ Data Understanding β†’ Data Preparation β†’ Modelling β†’ Evaluation β†’ Deployment. The de facto lifecycle framework for data science projects.">CRISP-DM</span> applied across all three datasets β€” from raw social media text to parallel ensemble predictions.</p>
406
 
407
  <div class="method-steps">
408
  <div class="method-step active" onclick="showMethodDetail(0)">
409
  <div class="ms-dot">01</div>
410
  <div class="ms-title">Data</div>
411
  <div class="ms-body">3 clinical datasets spanning Twitter and Reddit, covering depression types, binary detection, and suicide risk.</div>
 
412
  </div>
413
  <div class="method-step" onclick="showMethodDetail(1)">
414
  <div class="ms-dot">02</div>
415
  <div class="ms-title">Preprocessing</div>
416
  <div class="ms-body">6-stage text cleaning pipeline + SMOTE oversampling to address class imbalance left unresolved by the base paper.</div>
 
417
  </div>
418
  <div class="method-step" onclick="showMethodDetail(2)">
419
  <div class="ms-dot">03</div>
420
  <div class="ms-title">Modelling</div>
421
  <div class="ms-body">Parallel ensemble of 12 classifiers β€” all run independently on every prediction, never as a sequential cascade.</div>
 
422
  </div>
423
  </div>
424
 
 
450
  <div class="md-title"><div class="md-title-dot"></div>Preprocessing Pipeline</div>
451
  <div class="md-grid">
452
  <div class="md-block">
453
+ <div class="md-block-lbl"><span class="cm-term" onclick="openCM('clean_text')">6-Stage Text Cleaning</span></div>
454
  <div class="md-block-val">1. Lowercase Β· 2. Strip URLs &amp; http links Β· 3. Remove @mentions Β· 4. Remove # symbols Β· 5. Strip punctuation Β· 6. Collapse whitespace. Applied identically across all three datasets for consistency.</div>
455
  </div>
456
  <div class="md-block">
457
+ <div class="md-block-lbl"><span class="cm-term" onclick="openCM('smote')">SMOTE β€” Synthetic Oversampling</span></div>
458
  <div class="md-block-val">Applied to D1 and D2 training sets only (D3 is pre-balanced). D1: 11,986 β†’ <em>17,982 samples</em>. D2: 8,251 β†’ <em>12,800 samples</em>. Creates synthetic clinical neighbours in TF-IDF feature space. Directly addresses the base paper's (Tumaliuan 2024) biggest limitation β€” they trained on raw imbalanced data.</div>
459
  </div>
460
  <div class="md-block">
461
+ <div class="md-block-lbl"><span class="cm-term" onclick="openCM('tfidf')">Feature Extraction β€” TF-IDF</span></div>
462
+ <div class="md-block-val"><span class="gloss" data-tip="TF-IDF (Term Frequency–Inverse Document Frequency): scores each word by how often it appears in a document (TF) divided by how common it is across all documents (IDF). Settings: max_features=50,000, ngram_range=(1,2), sublinear_tf=True, min_df=2.">TF-IDF</span> vectoriser with unigrams + bigrams, fitted per-dataset on training data only. Captures frequency-weighted term co-occurrence patterns, well-suited for short Twitter text.</div>
463
  </div>
464
  <div class="md-block">
465
+ <div class="md-block-lbl"><span class="cm-term" onclick="openCM('tokeniser')">Feature Extraction β€” Tokeniser</span></div>
466
  <div class="md-block-val">XLM-RoBERTa tokeniser (max 128 tokens D1/D2, 256 tokens D3) with padding. Pre-trained multilingual contextual embeddings capture semantic meaning and long-range dependencies β€” critical for Reddit's longer posts.</div>
467
  </div>
468
  </div>
 
473
  <div class="md-grid">
474
  <div class="md-block">
475
  <div class="md-block-lbl">4 Models per Dataset (12 total)</div>
476
+ <div class="md-block-val"><span class="cm-term" onclick="openCM('lr')">Logistic Regression</span> β€” L2 regularised, max_iter=1000. <span class="cm-term" onclick="openCM('svm')">SVM</span> β€” LinearSVC, C=1.0. <span class="cm-term" onclick="openCM('xgb')">XGBoost</span> β€” 300 estimators, max_depth=6. <span class="cm-term" onclick="openCM('xlmr_ft')">XLM-RoBERTa</span> β€” fine-tuned multilingual transformer, <em>278M parameters</em>, lr=2e-5, 3 epochs.</div>
477
  </div>
478
  <div class="md-block">
479
+ <div class="md-block-lbl"><span class="cm-term" onclick="openCM('majority_vote')">Ensemble Vote β€” Risk Flag Logic</span></div>
480
  <div class="md-block-val">All 12 models run simultaneously on every input. A sequential design (check depression first, then suicide risk) would <strong>miss masked suicidality</strong> β€” a clinically documented pre-crisis pattern where affect appears normal but intent is resolved. Parallelism is a safety requirement, not a design preference.</div>
481
  </div>
482
  <div class="md-block">
 
484
  <div class="md-block-val">XGBoost accuracy on D3: <em>91.6% (50K sample) β†’ 70.5% (Full 232K) β†’ 60.1% (H1 116K)</em>. Performance degrades as training data grows. The H1/H2 results are also inconsistent (60.1% vs 71.0%) β€” gradient boosting is highly sensitive to data distribution shifts at this scale, making it unreliable for large Reddit corpora.</div>
485
  </div>
486
  <div class="md-block">
487
+ <div class="md-block-lbl"><span class="cm-term" onclick="openCM('split_study')">D3 Split Study (RQ2)</span></div>
488
  <div class="md-block-val">D3 trained on 4 configurations: Full (232K), Half 1 (116K), Half 2 (116K), Sample (50K). XLM-RoBERTa accuracy: <em>98.1% (50K) β†’ 97.8% (H1) β†’ 98.0% (H2/Full)</em>. Ξ” = 0.3% across 4Γ— more data. Kolmogorov-Smirnov tests confirm all splits share identical distributions (p &gt; 0.49), validating the comparison.</div>
489
  </div>
490
  </div>
 
498
  <section class="section" id="matrix">
499
  <div class="sec-eyebrow">Core evaluation</div>
500
  <div class="sec-h2">Accuracy <em>Evidence Matrix</em></div>
501
+ <p class="sec-lead">All 4 models evaluated across all dataset splits. <strong>Bold</strong> = winner per row. <span style="color:var(--red)">Red</span> = XGBoost collapse on larger training sets. β€” <span class="cm-term" onclick="openCM('eval_metrics')">How metrics are computed</span></p>
502
 
503
  <div class="matrix-wrap">
504
  <table class="matrix-tbl">
505
  <thead>
506
  <tr>
507
  <th>Dataset / Split</th>
508
+ <th><span class="gloss" data-tip="Logistic Regression: linear model trained with L2 regularisation (max_iter=1000). Fast, interpretable baseline. Outputs class probabilities via sigmoid/softmax. Works well with TF-IDF sparse vectors.">Logistic Regression</span></th>
509
+ <th><span class="gloss" data-tip="SVM (Support Vector Machine): LinearSVC, C=1.0. Finds maximum-margin hyperplane in TF-IDF feature space. Best classical model on D1 β€” short tweets give TF-IDF enough signal to beat contextual embeddings.">SVM</span></th>
510
+ <th><span class="gloss" data-tip="XGBoost: gradient-boosted trees, 300 estimators, max_depth=6. Sequentially corrects previous errors. Collapses on D3 (71%) β€” vocabulary overlap between depressive and suicidal language confuses boosted trees.">XGBoost</span></th>
511
+ <th><span class="gloss" data-tip="XLM-RoBERTa: 278M-parameter multilingual transformer. Fine-tuned with lr=2e-5, 3 epochs. Max 128 tokens (D1/D2) or 256 tokens (D3). Best on long-form Reddit posts β€” contextual embeddings capture meaning beyond keyword matching.">XLM-RoBERTa</span></th>
512
  </tr>
513
  </thead>
514
  <tbody>
 
644
  <div class="demo-section" id="demo">
645
  <div class="sec-eyebrow">Live inference</div>
646
  <div class="sec-h2" style="margin-bottom:8px">Try it β€” <em>winner model per task</em></div>
647
+ <p class="sec-lead" style="margin-bottom:12px">Sample 3 demonstrates masked suicidality. Try typing clinical-style depressive language ("I feel exhausted, nothing feels enjoyable") to observe the Affective vs. Clinical Lexicon Gap documented in Finding 04.</p>
648
+ <p style="font-size:13px;color:var(--ink2);margin-bottom:24px">How the demo works: <span class="cm-term" onclick="openCM('flask_deploy')">Flask β†’ HuggingFace proxy</span> Β· <span class="cm-term" onclick="openCM('predict_flow')">predict_all() inference flow</span></p>
649
 
650
  <div class="disclaimer"><strong>Research prototype only.</strong> Not a clinical tool. If you or someone you know is in crisis, please contact a mental health professional or emergency services immediately.</div>
651
 
 
707
  </div>
708
  </div>
709
 
710
+ <!-- CODE MODAL OVERLAY -->
711
+ <div class="cm-overlay" id="cmOverlay" onclick="closeCMOutside(event)">
712
+ <div class="cm-box" id="cmBox">
713
+ <div class="cm-head">
714
+ <div class="cm-title" id="cmTitle"></div>
715
+ <button class="cm-close" onclick="closeCM()">βœ•</button>
716
+ </div>
717
+ <div class="cm-tabs">
718
+ <div class="cm-tab active" onclick="switchCMTab(0)">Code</div>
719
+ <div class="cm-tab" onclick="switchCMTab(1)">Why</div>
720
+ <div class="cm-tab" onclick="switchCMTab(2)">Output</div>
721
+ </div>
722
+ <div class="cm-body">
723
+ <div class="cm-panel active" id="cmt0"></div>
724
+ <div class="cm-panel" id="cmt1"></div>
725
+ <div class="cm-panel" id="cmt2"></div>
726
+ </div>
727
+ </div>
728
+ </div>
729
+
730
+ <hr class="section-divider">
731
+
732
+ <!-- FAQ SECTION -->
733
+ <section class="faq-section" id="faq">
734
+ <div class="sec-eyebrow">Defence prep</div>
735
+ <div class="sec-h2">Frequently asked <em>questions</em></div>
736
+ <p class="sec-lead">Click any question to expand the answer. Grouped by topic for quick navigation during Q&amp;A.</p>
737
+
738
+ <div class="faq-group">
739
+ <div class="faq-group-title">Data &amp; Datasets</div>
740
+ <div class="faq-item">
741
+ <button class="faq-q" onclick="toggleFaq(this)">What are the three datasets and what makes them different? <span class="faq-chevron">β–Ό</span></button>
742
+ <div class="faq-a"><div class="faq-a-inner">D1 β€” 6-class depression type classification (atypical, bipolar, major depressive, no depression, postpartum, psychotic) from Kaggle. Twitter-length text, 11,986 samples. D2 β€” binary suicide/non-suicide from Twitter (10,314 samples, severe 3.46Γ— imbalance). D3 β€” 4-class suicide/depression/anxiety/normal from Reddit (232K samples, pre-balanced). Each dataset has a different task, different text length, and different vocabulary domain β€” which is precisely why running all three in parallel is informative.</div></div>
743
+ </div>
744
+ <div class="faq-item">
745
+ <button class="faq-q" onclick="toggleFaq(this)">How did you handle class imbalance? Why SMOTE and not class weighting? <span class="faq-chevron">β–Ό</span></button>
746
+ <div class="faq-a"><div class="faq-a-inner">D1 had 1.89Γ— imbalance (atypical class), D2 had 3.46Γ— imbalance. We applied <code>SMOTE</code> to training data only β€” never the test set. SMOTE interpolates new synthetic samples in TF-IDF feature space between existing minority-class examples. Class weighting was also evaluated; SMOTE showed equal or better Macro F1 in cross-validation. D3 was pre-balanced and required no oversampling.</div></div>
747
+ </div>
748
+ <div class="faq-item">
749
+ <button class="faq-q" onclick="toggleFaq(this)">Is there any data leakage in your pipeline? <span class="faq-chevron">β–Ό</span></button>
750
+ <div class="faq-a"><div class="faq-a-inner">No. The train/test split (stratified 80/20) is performed first. SMOTE is then applied only to the training portion. The TF-IDF vocabulary is fitted on training data only and applied as a read-only transform to the test set. XLM-RoBERTa uses a fixed pretrained tokeniser. No test sample was ever used to inform any training decision.</div></div>
751
+ </div>
752
+ </div>
753
+
754
+ <div class="faq-group">
755
+ <div class="faq-group-title">Methodology &amp; Models</div>
756
+ <div class="faq-item">
757
+ <button class="faq-q" onclick="toggleFaq(this)">Why four model types per dataset? Why not just use the best one? <span class="faq-chevron">β–Ό</span></button>
758
+ <div class="faq-a"><div class="faq-a-inner">Each captures a different inductive bias: Logistic Regression (linear decision boundary), SVM (maximum-margin), Random Forest/XGBoost (non-linear tree ensembles), XLM-RoBERTa (contextual transformer). Disagreement between models is itself a signal. On D1, SVM (92.4%) beats XLM-RoBERTa (90.5%) β€” short tweets don't give the transformer enough context to gain advantage. On D3 (212-word Reddit posts), XLM-RoBERTa (98.1%) dominates every classical model.</div></div>
759
+ </div>
760
+ <div class="faq-item">
761
+ <button class="faq-q" onclick="toggleFaq(this)">What are your TF-IDF settings and why? <span class="faq-chevron">β–Ό</span></button>
762
+ <div class="faq-a"><div class="faq-a-inner"><code>max_features=50,000</code> β€” covers the full relevant vocabulary without noise. <code>ngram_range=(1,2)</code> β€” unigrams + bigrams capture local phrases ("not happy", "kill myself") that unigrams miss. <code>sublinear_tf=True</code> β€” applies log(1+tf) to dampen high-frequency word dominance. <code>min_df=2</code> β€” removes hapax legomena (words appearing only once) that add noise.</div></div>
763
+ </div>
764
+ <div class="faq-item">
765
+ <button class="faq-q" onclick="toggleFaq(this)">How was XLM-RoBERTa fine-tuned? What hyperparameters? <span class="faq-chevron">β–Ό</span></button>
766
+ <div class="faq-a"><div class="faq-a-inner">Standard sequence classification fine-tuning: Adam optimiser, <code>lr=2e-5</code>, <code>3 epochs</code>, linear warmup scheduler. Max token length: 128 for D1/D2 (Twitter-length text), 256 for D3 (Reddit posts average 212 words). Cross-entropy loss. Best checkpoint saved by validation accuracy. 278M parameters β€” multilingual pretraining covers 100 languages.</div></div>
767
+ </div>
768
+ <div class="faq-item">
769
+ <button class="faq-q" onclick="toggleFaq(this)">Why did XGBoost collapse on D3 to only 71%? <span class="faq-chevron">β–Ό</span></button>
770
+ <div class="faq-a"><div class="faq-a-inner">This is TF-IDF lexical overfitting. D3 is a 4-class task where depressive and suicidal vocabulary heavily overlap in Reddit posts β€” words like "exhausted", "hopeless", "nothing matters" appear in both classes. Boosted trees overfit to these majority-class token patterns and fail to distinguish fine-grained class boundaries. XLM-RoBERTa's contextual embeddings resolve this because it reads the full sentence, not just individual tokens.</div></div>
771
+ </div>
772
+ </div>
773
+
774
+ <div class="faq-group">
775
+ <div class="faq-group-title">Results &amp; Evaluation</div>
776
+ <div class="faq-item">
777
+ <button class="faq-q" onclick="toggleFaq(this)">Why is SVM accuracy 92.4% on D1 but XLM-RoBERTa (278M params) only gets 90.5%? <span class="faq-chevron">β–Ό</span></button>
778
+ <div class="faq-a"><div class="faq-a-inner">Text length. D1 tweets average ~31 words. Transformers need rich context to outperform classical methods β€” contextual embeddings add little value when the sentence is 5–10 tokens. TF-IDF bigrams on short explicit text (like tweets) already capture the full signal. This is Finding 01 and one of the key research conclusions: model selection must be text-length aware.</div></div>
779
+ </div>
780
+ <div class="faq-item">
781
+ <button class="faq-q" onclick="toggleFaq(this)">Why show accuracy rather than Macro F1? Isn't accuracy misleading on imbalanced data? <span class="faq-chevron">β–Ό</span></button>
782
+ <div class="faq-a"><div class="faq-a-inner">The dashboard shows accuracy for accessibility (non-specialist audience). After SMOTE, all training classes are equalised β€” so accuracy and Macro F1 are closely aligned. The full Macro F1, Cohen's Kappa, and per-class precision/recall are reported in the IEEE technical report. The evidence matrix footnote notes this explicitly.</div></div>
783
+ </div>
784
+ <div class="faq-item">
785
+ <button class="faq-q" onclick="toggleFaq(this)">Did adding more training data (50K β†’ 232K) improve D3 results? <span class="faq-chevron">β–Ό</span></button>
786
+ <div class="faq-a"><div class="faq-a-inner">No β€” only 0.1% change in XLM-RoBERTa accuracy (98.1% β†’ 98.0%). Kolmogorov-Smirnov tests confirm all four splits (Full 232K, H1 116K, H2 116K, Sample 50K) share identical distributions (p &gt; 0.49). The 50K sample fully captures the underlying signal. This is Finding 03 and validates our choice of the 50K sample for the final model.</div></div>
787
+ </div>
788
+ </div>
789
+
790
+ <div class="faq-group">
791
+ <div class="faq-group-title">Architecture &amp; Live Demo</div>
792
+ <div class="faq-item">
793
+ <button class="faq-q" onclick="toggleFaq(this)">Is the live demo using real models or hardcoded responses? <span class="faq-chevron">β–Ό</span></button>
794
+ <div class="faq-a"><div class="faq-a-inner">Real models. The Flask app proxies every request to a HuggingFace Space (<code>esvanth-mindscan.hf.space</code>) which runs <code>predict.py</code> with all 12 loaded models. There is no hardcoded data β€” every input goes through the full pipeline. If the Space is sleeping it auto-wakes within ~60 seconds.</div></div>
795
+ </div>
796
+ <div class="faq-item">
797
+ <button class="faq-q" onclick="toggleFaq(this)">What does "Ensemble Conflict" (amber banner) mean? Why not just show red? <span class="faq-chevron">β–Ό</span></button>
798
+ <div class="faq-a"><div class="faq-a-inner">It means classical D3 models (LR/SVM/XGBoost) flagged suicide risk by majority vote, but XLM-RoBERTa β€” the best model at 98.1% accuracy β€” disagrees. A pure majority vote could trigger false alarms on metaphorical language ("I'm dying of embarrassment"). The amber state expresses uncertainty rather than forcing a binary decision, which maps directly to "escalate for human review" β€” the appropriate clinical-conservative response.</div></div>
799
+ </div>
800
+ <div class="faq-item">
801
+ <button class="faq-q" onclick="toggleFaq(this)">Why does D2 under-flag clinical-style text like "I feel exhausted, nothing feels enjoyable"? <span class="faq-chevron">β–Ό</span></button>
802
+ <div class="faq-a"><div class="faq-a-inner">This is the Affective vs. Clinical Lexicon Gap (Finding 04, documented in NAACL 2024). D2 was trained on Twitter emotional language β€” explicit distress, slang, emotional punctuation. Clinical presentations use diagnostic vocabulary: anhedonia ("nothing feels enjoyable"), psychomotor fatigue, flat affect. These words are absent from D2's training distribution. This is not a bug β€” it is an empirical finding about the domain gap between social media affect and clinical language.</div></div>
803
+ </div>
804
+ <div class="faq-item">
805
+ <button class="faq-q" onclick="toggleFaq(this)">What is the single most important future direction? <span class="faq-chevron">β–Ό</span></button>
806
+ <div class="faq-a"><div class="faq-a-inner">Replace TF-IDF classical models with <strong>MentalBERT/MentalRoBERTa</strong> (Ji et al. 2022) pretrained on mental health forum data. Combine all three tasks in a true multi-task learning setup with a shared encoder and task-specific heads β€” following the MTL precedent from Zogan et al. (2024). This would address both documented limitations (Affective Lexicon Gap and TF-IDF overfitting) simultaneously.</div></div>
807
+ </div>
808
+ </div>
809
+ </section>
810
+
811
  <footer>
812
  MindScan Β· NCI H9DAI Research Project 2026 Β· Academic Prototype Only<br>
813
  Datasets: Zenodo 14233292 Β· Kaggle albertobellardini Β· Kaggle nikhileswarkomati<br>
 
975
  }
976
 
977
  function pct(v){return(v*100).toFixed(1)+'%'}
978
+
979
+ /* ── CODE MODAL DATA ── */
980
+ const CM_DATA = {
981
+ clean_text: {
982
+ title: 'clean_text() β€” Text Preprocessing',
983
+ code: `def clean_text(text):
984
+ text = str(text).lower()
985
+ # remove URLs
986
+ text = re.sub(r'http\\S+|www\\S+|https\\S+', '', text)
987
+ # remove @mentions
988
+ text = re.sub(r'@\\w+', '', text)
989
+ # remove # symbol (keep hashtag word)
990
+ text = re.sub(r'#', '', text)
991
+ # strip all punctuation
992
+ text = text.translate(
993
+ str.maketrans('', '', string.punctuation)
994
+ )
995
+ # collapse whitespace
996
+ text = re.sub(r'\\s+', ' ', text).strip()
997
+ return text`,
998
+ src: 'notebooks/DA_Notebook_One.ipynb β€” cell 5 Β· notebooks/DA_2_Notebook.ipynb β€” cell 6 Β· predict.py lines 106–113',
999
+ why: '<strong>Why lowercase?</strong> "Sad" and "sad" must map to the same TF-IDF token. <strong>Why remove URLs?</strong> Hundreds of unique tokens, zero semantic value β€” pure noise. <strong>Why keep hashtag words?</strong> "#depressed" β†’ "depressed" preserves the semantic signal, removes the markup. <strong>Why no stemming?</strong> Stemming degrades bigram quality β€” "kill myself" would become "kill myself" but "killing" β†’ "kill" breaks n-gram boundaries. Same function is used at both training time (notebook) and inference time (predict.py) to guarantee identical preprocessing.',
1000
+ outputs: [
1001
+ {label:'Input', val:'"I been going through #Depression after @user check https://t.co/xyz!!"'},
1002
+ {label:'Output', val:'"i been going through depression after check"'},
1003
+ {label:'Note', val:'Applied to all 3 datasets before TF-IDF and before XLM-RoBERTa tokenisation'},
1004
+ ]
1005
+ },
1006
+ smote: {
1007
+ title: 'SMOTE β€” Synthetic Minority Oversampling',
1008
+ code: `def apply_smote(X_train, y_train):
1009
+ before = Counter(y_train)
1010
+ smote = SMOTE(random_state=42)
1011
+ X_bal, y_bal = smote.fit_resample(X_train, y_train)
1012
+ after = Counter(y_bal)
1013
+ print(f'SMOTE: {sum(before.values())} β†’ {sum(after.values())}')
1014
+ return X_bal, y_bal
1015
+
1016
+ # Called AFTER TF-IDF vectorisation, AFTER train/test split
1017
+ X1_bal, y1_bal = apply_smote(X1_tr_tf, y1_tr)`,
1018
+ src: 'notebooks/DA_Notebook_One.ipynb β€” cell 5 (apply_smote def) Β· cell 10 (D1 call) Β· cell 17 (D2 call) Β· D3 skipped',
1019
+ why: '<strong>Why after TF-IDF?</strong> SMOTE interpolates in feature space β€” it creates synthetic TF-IDF vectors, not synthetic text. <strong>Why not before the split?</strong> Applying SMOTE before splitting would let synthetic samples leak into the test set β€” the test set must contain only real data. <strong>Why not class_weight instead?</strong> Class weighting reweights the loss function β€” it doesn\'t add new training examples. SMOTE was chosen because it physically fills the minority-class region of feature space, giving tree-based models (RF, XGB) more to learn from. <strong>D3 skipped:</strong> D3 is pre-balanced (116K each class) β€” no intervention needed.',
1020
+ outputs: [
1021
+ {label:'D1', val:'11,986 β†’ 17,982 samples (atypical: 1,584 β†’ 2,997, each class equalised)'},
1022
+ {label:'D2', val:'8,251 β†’ 12,800 samples (Depressed: 1,851 β†’ 6,400)'},
1023
+ {label:'D3', val:'Skipped β€” pre-balanced at 116,037 per class'},
1024
+ ]
1025
+ },
1026
+ tfidf: {
1027
+ title: 'TfidfVectorizer β€” Feature Extraction',
1028
+ code: `def make_tfidf(X_train, X_test, max_features=50000):
1029
+ tfidf = TfidfVectorizer(
1030
+ max_features=50000, # top 50K tokens by corpus frequency
1031
+ ngram_range=(1, 2), # unigrams AND bigrams
1032
+ sublinear_tf=True, # log(1+tf) instead of raw tf
1033
+ min_df=2 # ignore tokens appearing < 2 times
1034
+ )
1035
+ Xtr = tfidf.fit_transform(X_train) # fit on train only
1036
+ Xte = tfidf.transform(X_test) # apply to test (no fit)
1037
+ return tfidf, Xtr, Xte`,
1038
+ src: 'notebooks/DA_Notebook_One.ipynb β€” cell 5 (make_tfidf def) Β· cell 10 (D1) Β· cell 17 (D2) Β· cell 22 (D3)',
1039
+ why: '<strong>ngram_range=(1,2):</strong> Bigrams capture "kill myself", "not happy", "feeling better" β€” critical signals that unigrams miss entirely. <strong>sublinear_tf=True:</strong> Applies log(1+tf) to dampen high-frequency word dominance. Without this, common words like "i", "feel" swamp the features. <strong>min_df=2:</strong> Removes hapax legomena (words appearing only once) β€” they add 0 generalisable information. <strong>fit only on train:</strong> Vocabulary is locked on training data β€” the test set is transformed using this fixed vocabulary, preventing any data leakage.',
1040
+ outputs: [
1041
+ {label:'D1 shape', val:'11,986 Γ— 50,000 sparse matrix (tweets Γ— features)'},
1042
+ {label:'D2 shape', val:'8,251 Γ— 50,000 sparse matrix'},
1043
+ {label:'D3 shape', val:'40,000 Γ— 50,000 sparse matrix'},
1044
+ {label:'After SMOTE', val:'D1 becomes 17,982 Γ— 50,000, D2 becomes 12,800 Γ— 50,000'},
1045
+ ]
1046
+ },
1047
+ tokeniser: {
1048
+ title: 'XLM-RoBERTa Tokeniser',
1049
+ code: `tokenizer = AutoTokenizer.from_pretrained(
1050
+ 'FacebookAI/xlm-roberta-base'
1051
+ )
1052
+
1053
+ def tokenize_tweets(examples):
1054
+ return tokenizer(
1055
+ examples['text'],
1056
+ max_length=128, # 128 for D1/D2 (tweets avg ~40 tokens)
1057
+ truncation=True, # cut anything beyond max_length
1058
+ padding='max_length' # pad shorter inputs to fixed length
1059
+ )
1060
+
1061
+ # D3 uses max_length=256 β€” Reddit posts avg 212 words (~280 tokens)
1062
+ def tokenize_reddit(examples):
1063
+ return tokenizer(
1064
+ examples['text'],
1065
+ max_length=256,
1066
+ truncation=True,
1067
+ padding='max_length'
1068
+ )`,
1069
+ src: 'notebooks/DA_2_Notebook.ipynb β€” cell 9 (tokenize_tweets, max_length=128, D1/D2) Β· cell 21 (tokenize_reddit, max_length=256, D3)',
1070
+ why: '<strong>SentencePiece subword tokenisation:</strong> Splits unknown words into subword pieces β€” "suicidal" might become ["su", "ici", "dal"]. No word is truly out-of-vocabulary. <strong>max_length=128 for D1/D2:</strong> Tweets average ~31 words β‰ˆ 40 tokens. 128 is 3Γ— headroom. <strong>max_length=256 for D3:</strong> Reddit posts average 212 words β‰ˆ 280 tokens β€” 128 would truncate most of the signal. <strong>padding=\'max_length\':</strong> All batches must be identical length for GPU tensor operations β€” shorter inputs are padded with [PAD] tokens. The attention mask tells the model to ignore padding.',
1071
+ outputs: [
1072
+ {label:'D1/D2 shape', val:'Each input β†’ tensor of shape [128] (input_ids) + [128] (attention_mask)'},
1073
+ {label:'D3 shape', val:'Each input β†’ tensor of shape [256] Γ— 2'},
1074
+ {label:'Example', val:'"i feel hopeless" β†’ input_ids: [0, 444, 7809, 73542, 2, 1, 1, ...]'},
1075
+ ]
1076
+ },
1077
+ lr: {
1078
+ title: 'Logistic Regression',
1079
+ code: `LogisticRegression(
1080
+ max_iter=1000, # enough iterations to converge on 50K features
1081
+ class_weight='balanced', # backup alongside SMOTE
1082
+ random_state=42,
1083
+ n_jobs=-1 # use all CPU cores
1084
+ )`,
1085
+ src: 'notebooks/DA_Notebook_One.ipynb β€” cell 11 (D1) Β· cell 18 (D2) Β· cell 23 (D3)',
1086
+ why: '<strong>Why use it?</strong> Fast, interpretable linear baseline. On 50,000 TF-IDF features, L2 regularisation prevents overfitting by shrinking large weights toward zero. Outputs calibrated probabilities via softmax β€” important for confidence scores in the UI. <strong>class_weight=\'balanced\':</strong> Secondary guard alongside SMOTE β€” the model pays proportionally more attention to minority classes during gradient updates.',
1087
+ outputs: [
1088
+ {label:'D1', val:'91.5% accuracy β€” solid baseline, beaten by SVM'},
1089
+ {label:'D2', val:'98.9% accuracy'},
1090
+ {label:'D3', val:'93.2% accuracy'},
1091
+ ]
1092
+ },
1093
+ svm: {
1094
+ title: 'SVM β€” LinearSVC',
1095
+ code: `LinearSVC(
1096
+ C=1.0, # regularisation strength (lower = more reg)
1097
+ class_weight='balanced',
1098
+ max_iter=2000,
1099
+ random_state=42
1100
+ )
1101
+
1102
+ # LinearSVC has no predict_proba β€” use decision_function + softmax
1103
+ scores = model.decision_function(vec)[0]
1104
+ e = np.exp(scores - scores.max())
1105
+ conf = float(e[pred_idx] / e.sum())`,
1106
+ src: 'notebooks/DA_Notebook_One.ipynb β€” cell 11 (D1) Β· cell 18 (D2) Β· cell 23 (D3) Β· predict.py lines 147–154 (confidence fallback)',
1107
+ why: '<strong>Why SVM wins on D1?</strong> LinearSVC finds the maximum-margin hyperplane in TF-IDF feature space β€” the optimal linear decision boundary for sparse high-dimensional data. Tweets (31 words avg) produce sparse TF-IDF vectors where the margin is well-defined. Contextual embeddings (XLM-RoBERTa) add no value at this sentence length. <strong>Why LinearSVC over SVC(kernel=\'rbf\')?</strong> Linear kernel scales to 50,000 features. RBF kernel would be O(nΒ²) β€” computationally infeasible.',
1108
+ outputs: [
1109
+ {label:'D1', val:'92.4% accuracy β€” best model on D1, beats XLM-RoBERTa (90.5%)'},
1110
+ {label:'D2', val:'97.1% accuracy'},
1111
+ {label:'D3', val:'77.8% accuracy'},
1112
+ ]
1113
+ },
1114
+ xgb: {
1115
+ title: 'XGBoost β€” XGBClassifier',
1116
+ code: `XGBClassifier(
1117
+ n_estimators=300, # 300 trees built sequentially
1118
+ learning_rate=0.1, # each tree contributes 10% of its weight
1119
+ max_depth=6, # max tree depth β€” controls complexity
1120
+ eval_metric='logloss',
1121
+ random_state=42,
1122
+ n_jobs=-1
1123
+ )`,
1124
+ src: 'notebooks/DA_Notebook_One.ipynb β€” cell 11 (D1) Β· cell 18 (D2) Β· cell 23 (D3)',
1125
+ why: '<strong>Gradient boosting principle:</strong> Each new tree is trained to correct the residual errors of all previous trees. 300 trees Γ— learning_rate=0.1 = strong ensemble. <strong>Why does it collapse on D3?</strong> Vocabulary overlap between depressive and suicidal language in Reddit posts β€” words like "exhausted", "hopeless" appear in both classes. Boosted trees memorise these majority-class token patterns and fail at full scale (232K). XGBoost is highly sensitive to distribution shifts at this scale, shown by inconsistent H1/H2 results (60.1% vs 71.0%).',
1126
+ outputs: [
1127
+ {label:'D1', val:'91.8% accuracy'},
1128
+ {label:'D2', val:'99.3% accuracy'},
1129
+ {label:'D3 (50K)', val:'91.6% β€” performs well on sample'},
1130
+ {label:'D3 (Full 232K)', val:'70.5% β€” collapse (lexical overfitting)'},
1131
+ ]
1132
+ },
1133
+ xlmr_ft: {
1134
+ title: 'XLM-RoBERTa Fine-Tuning',
1135
+ code: `xlmr = AutoModelForSequenceClassification.from_pretrained(
1136
+ 'FacebookAI/xlm-roberta-base',
1137
+ num_labels=NUM_LABELS # 6 for D1, 2 for D2, 2 for D3
1138
+ )
1139
+
1140
+ args = TrainingArguments(
1141
+ num_train_epochs=3,
1142
+ learning_rate=2e-5, # standard BERT fine-tuning rate
1143
+ per_device_train_batch_size=16, # 8 for D3 (longer sequences)
1144
+ gradient_accumulation_steps=2, # D3 only β€” simulates batch=16
1145
+ warmup_steps=200, # gradual LR increase at start
1146
+ weight_decay=0.01, # L2 regularisation on weights
1147
+ load_best_model_at_end=True, # save epoch with lowest val loss
1148
+ fp16=torch.cuda.is_available() # half precision β€” 2Γ— faster on GPU
1149
+ )
1150
+
1151
+ trainer = Trainer(
1152
+ model=xlmr, args=args,
1153
+ train_dataset=train_tok,
1154
+ eval_dataset=test_tok
1155
+ )
1156
+ trainer.train()`,
1157
+ src: 'notebooks/DA_2_Notebook.ipynb β€” cell 10 (model init D1) Β· cell 11 (TrainingArguments D1) Β· cell 17 (D2) Β· cell 22 (model init D3) Β· cell 23 (TrainingArguments D3)',
1158
+ why: '<strong>lr=2e-5:</strong> Standard for fine-tuning BERT-family models. Too high destroys pretrained weights (catastrophic forgetting). Too low fails to converge in 3 epochs. <strong>warmup_steps=200:</strong> LR starts at 0 and linearly ramps β€” prevents early instability when weights are far from the task optimum. <strong>load_best_model_at_end:</strong> Epoch 3 is not always best β€” we restore the checkpoint with the lowest validation loss. <strong>D3 batch=8 + accumulation=2:</strong> max_length=256 uses 2Γ— GPU memory vs 128. Accumulation simulates batch=16 without OOM.',
1159
+ outputs: [
1160
+ {label:'D1', val:'90.5% accuracy (Macro F1: 0.9117, ΞΊ=0.8852)'},
1161
+ {label:'D2', val:'99.95% accuracy (Macro F1: 0.9993)'},
1162
+ {label:'D3', val:'98.1% accuracy (Macro F1: 0.9810, ΞΊ=0.9620)'},
1163
+ ]
1164
+ },
1165
+ majority_vote: {
1166
+ title: 'Ensemble Vote β€” Risk Flag Logic',
1167
+ code: `# From predict.py β€” predict_all() function
1168
+ suicide_count = sum(
1169
+ 1 for r in d3.values()
1170
+ if 'suicide' in r['label'].lower()
1171
+ and 'non' not in r['label'].lower()
1172
+ )
1173
+ risk_flag = suicide_count >= 3 # majority = β‰₯3 of 4 models
1174
+
1175
+ # d3.values() = results from LR, SVM, XGBoost, XLM-RoBERTa
1176
+ # XLM-RoBERTa is also checked separately for banner state:
1177
+ isSuicide = d3['XLM-RoBERTa'].label includes 'suicide' (JS)
1178
+
1179
+ # Three UI states:
1180
+ # risk_flag=True AND XLM-R agrees β†’ RED (High Suicide Risk)
1181
+ # risk_flag=True AND XLM-R dissents β†’ AMBER (Ensemble Conflict)
1182
+ # risk_flag=False β†’ GREEN (Low Risk)`,
1183
+ src: 'predict.py lines 266–270 (suicide_count + risk_flag) Β· predict.py line 296 (suicide_votes string) Β· templates/index.html JS render() β€” banner state logic',
1184
+ why: '<strong>Why β‰₯3/4 threshold?</strong> 1–2 flagging models could be TF-IDF false positives (lexical overfitting). 3+ represents genuine consensus β€” meaningful signal. <strong>Why check XLM-RoBERTa separately for the banner?</strong> XLM-RoBERTa has the highest D3 accuracy (98.1%) and understands context. If XLM-R disagrees with the majority, the amber "Ensemble Conflict" state is safer than a red alert β€” it flags uncertainty rather than over-alarming on metaphorical language ("I\'m dying of embarrassment").',
1185
+ outputs: [
1186
+ {label:'Threshold', val:'β‰₯ 3/4 D3 models output "suicide" (not "non-suicide")'},
1187
+ {label:'Red banner', val:'risk_flag=True AND XLM-RoBERTa confirms suicide'},
1188
+ {label:'Amber banner', val:'risk_flag=True but XLM-RoBERTa says non-suicide'},
1189
+ {label:'Green banner', val:'risk_flag=False β€” fewer than 3 models flagged'},
1190
+ ]
1191
+ },
1192
+ eval_metrics: {
1193
+ title: 'Evaluation β€” How Metrics Are Computed',
1194
+ code: `def evaluate_transformer(name, y_true, y_pred,
1195
+ label_names, ds_key, results_store):
1196
+ acc = accuracy_score(y_true, y_pred)
1197
+ macro = f1_score(y_true, y_pred, average='macro')
1198
+ kappa = cohen_kappa_score(y_true, y_pred)
1199
+
1200
+ print(f'Accuracy : {acc*100:.2f}%')
1201
+ print(f'Macro F1 : {macro:.4f}')
1202
+ print(f"Cohen's Kappa: {kappa:.4f}")
1203
+ print(classification_report(y_true, y_pred,
1204
+ target_names=label_names))
1205
+
1206
+ results_store[name] = {
1207
+ 'accuracy': round(acc, 4),
1208
+ 'macro_f1': round(macro, 4),
1209
+ 'kappa': round(kappa, 4)
1210
+ }
1211
+
1212
+ # Same function used for classical models in Notebook 1:
1213
+ for name, model in models_d1.items():
1214
+ model.fit(X1_bal, y1_bal) # train on SMOTE-balanced data
1215
+ preds = model.predict(X1_te_tf) # test on original held-out set
1216
+ evaluate(name, y1_te, preds, le1.classes_, 'd1', d1_results)`,
1217
+ src: 'notebooks/DA_Notebook_One.ipynb β€” cell 5 (evaluate def, classical) Β· notebooks/DA_2_Notebook.ipynb β€” cell 6 (evaluate_transformer def)',
1218
+ why: '<strong>Accuracy:</strong> (correct predictions) / (total predictions). Simple but misleading on imbalanced data β€” a model predicting majority class always gets high accuracy. Valid here because SMOTE balanced the training set and D3 is pre-balanced. <strong>Macro F1:</strong> Averages F1 per class without weighting by class size β€” penalises models that ignore minority classes. This is the primary metric in the IEEE report. <strong>Cohen\'s Kappa:</strong> Measures agreement beyond what chance alone would produce. Formula: (observed βˆ’ expected) / (1 βˆ’ expected). ΞΊ > 0.8 = almost perfect agreement. Reported because the base paper (Tumaliuan 2024) did not report it β€” we added it as an improvement. <strong>classification_report:</strong> Shows per-class precision, recall, F1 β€” the full picture behind the headline number.',
1219
+ outputs: [
1220
+ {label:'D1 SVM', val:'Accuracy 92.4%, Macro F1 0.9269, ΞΊ=0.9072'},
1221
+ {label:'D2 XLM-R', val:'Accuracy 99.95%, Macro F1 0.9993, ΞΊ=0.9986'},
1222
+ {label:'D3 XLM-R', val:'Accuracy 98.1%, Macro F1 0.9810, ΞΊ=0.9620'},
1223
+ {label:'Atypical F1', val:'0.992 β€” highest per-class score in the project (D1, after SMOTE)'},
1224
+ ]
1225
+ },
1226
+ flask_deploy: {
1227
+ title: 'Flask App β€” Deployment & Proxy Mode',
1228
+ code: `# app.py β€” auto-detects LOCAL vs PROXY mode at startup
1229
+ _LOCAL_MODELS = os.path.join(BASE_DIR, 'models', 'classical')
1230
+ _use_local = os.path.isdir(_LOCAL_MODELS)
1231
+
1232
+ @app.route('/predict', methods=['POST'])
1233
+ def predict():
1234
+ data = request.get_json()
1235
+ text = data['text'].strip()
1236
+
1237
+ if len(text) > 5000:
1238
+ return jsonify({'error': 'Text too long'}), 400
1239
+
1240
+ if _use_local:
1241
+ # LOCAL mode β€” models loaded in memory
1242
+ result = predict_all(text)
1243
+ return jsonify(result)
1244
+ else:
1245
+ # PROXY mode β€” forward to HuggingFace Space
1246
+ r = requests.post(
1247
+ f'{HF_SPACE_URL}/predict',
1248
+ json={'text': text},
1249
+ timeout=120
1250
+ )
1251
+ return r.content, r.status_code
1252
+
1253
+ # HF_SPACE_URL = 'https://esvanth-mindscan.hf.space'
1254
+ # Overridable via environment variable`,
1255
+ src: 'app.py lines 25–27 (mode detection) Β· lines 61–97 (/predict endpoint) Β· line 70 (5000-char limit) Β· line 91 (timeout=120)',
1256
+ why: '<strong>Why two modes?</strong> The 12 models total ~2GB on disk. Running locally requires the models folder. The HuggingFace Space hosts the same predict.py and models β€” the proxy just forwards requests there. <strong>Why timeout=120?</strong> The HF Space sleeps after inactivity and takes ~60s to wake. 120s gives headroom. <strong>Why 5000 char limit?</strong> XLM-RoBERTa max_length=256 tokens β‰ˆ ~1500 characters. 5000 chars is a safe upper bound that prevents abuse without being restrictive. <strong>How the browser talks to Flask:</strong> JavaScript fetch() β†’ POST /predict (localhost:5001) β†’ Flask β†’ HF Space β†’ predict_all() β†’ JSON response β†’ render() updates the UI.',
1257
+ outputs: [
1258
+ {label:'LOCAL mode', val:'Triggered when models/classical/ directory exists. Loads all 12 models at startup (~30s on CPU).'},
1259
+ {label:'PROXY mode', val:'Default β€” no local models needed. Forwards to esvanth-mindscan.hf.space'},
1260
+ {label:'Timeout', val:'504 returned after 120s if HF Space is sleeping. Auto-wakes in ~60s.'},
1261
+ {label:'Port', val:'localhost:5001 (overridable via PORT env var)'},
1262
+ ]
1263
+ },
1264
+ predict_flow: {
1265
+ title: 'predict_all() β€” Full Inference Flow',
1266
+ code: `def predict_all(raw_text):
1267
+ # Step 1 β€” clean text (same function as training)
1268
+ clean = clean_text(raw_text)
1269
+
1270
+ # Step 2 β€” run all 3 classical models per dataset
1271
+ # (LR, SVM, XGBoost share the same TF-IDF vector)
1272
+ def predict_classical(text_clean, ds):
1273
+ tfidf = _models[f'tfidf_{ds}']
1274
+ vec = tfidf.transform([text_clean]) # sparse vector
1275
+ for model_name in ['logistic_regression','svm','xgboost']:
1276
+ model = _models[f'{model_name}_{ds}']
1277
+ pred_idx = model.predict(vec)[0]
1278
+ label = le.classes_[pred_idx]
1279
+ # SVM has no predict_proba β€” use softmax(decision_function)
1280
+ if hasattr(model, 'predict_proba'):
1281
+ conf = model.predict_proba(vec)[0][pred_idx]
1282
+ else:
1283
+ scores = model.decision_function(vec)[0]
1284
+ e = np.exp(scores - scores.max())
1285
+ conf = e[pred_idx] / e.sum()
1286
+
1287
+ # Step 3 β€” run XLM-RoBERTa per dataset
1288
+ def predict_transformer(text_raw, ds):
1289
+ inputs = tokenizer(text_raw, max_length=max_len,
1290
+ truncation=True, padding='max_length')
1291
+ with torch.no_grad():
1292
+ logits = model(**inputs).logits
1293
+ probs = torch.softmax(logits, dim=1)[0]
1294
+ pred_idx = probs.argmax()
1295
+
1296
+ # Step 4 β€” majority vote for risk_flag
1297
+ suicide_count = sum(1 for r in d3.values()
1298
+ if 'suicide' in r['label'] and 'non' not in r['label'])
1299
+ risk_flag = suicide_count >= 3`,
1300
+ src: 'predict.py β€” clean_text lines 106–113 Β· predict_classical lines 119–163 Β· predict_transformer lines 166–215 Β· predict_all lines 221–302',
1301
+ why: '<strong>Why clean the text first?</strong> The TF-IDF vocabulary was built on clean text β€” passing raw text would miss tokens. XLM-RoBERTa receives the raw text because its SentencePiece tokeniser handles punctuation/URLs natively. <strong>Why one TF-IDF vector for 3 classical models?</strong> All three (LR, SVM, XGBoost) use the same vectoriser β€” the vector is computed once and reused, saving 2 redundant transformations per dataset. <strong>Why torch.no_grad()?</strong> Inference doesn\'t need gradients β€” disabling them halves memory usage and speeds up the forward pass. <strong>Why softmax on logits?</strong> The model outputs raw logit scores (unbounded). Softmax converts them to probabilities that sum to 1 β€” required for the confidence percentage shown in the UI.',
1302
+ outputs: [
1303
+ {label:'Input', val:'"I feel exhausted, nothing feels enjoyable"'},
1304
+ {label:'After clean', val:'"i feel exhausted nothing feels enjoyable"'},
1305
+ {label:'D1 winner', val:'SVM β†’ Major Depressive (highest confidence)'},
1306
+ {label:'D2 winner', val:'XLM-RoBERTa β†’ Not Depressed (Twitter Affect Bias β€” clinical text)'},
1307
+ {label:'D3 result', val:'risk_flag computed from 4 model votes; XLM-R checked separately for banner'},
1308
+ {label:'Response time', val:'~200ms local (GPU) Β· ~2–5s proxy (HF Space warm)'},
1309
+ ]
1310
+ },
1311
+ split_study: {
1312
+ title: 'D3 Split Study β€” RQ2',
1313
+ code: `# Sample 25K per class (50K total) for the baseline
1314
+ df3_sample = df3.groupby('label').apply(
1315
+ lambda x: x.sample(25000, random_state=42)
1316
+ ).reset_index(drop=True)
1317
+
1318
+ # Half splits β€” 12.5K per class each
1319
+ df3_h1 = df3.groupby('label').apply(
1320
+ lambda x: x.iloc[:12500]
1321
+ ).reset_index(drop=True)
1322
+ df3_h2 = df3.groupby('label').apply(
1323
+ lambda x: x.iloc[12500:25000]
1324
+ ).reset_index(drop=True)
1325
+
1326
+ # Full dataset β€” 116K per class (232K total)
1327
+ df3_full = df3 # no sampling
1328
+
1329
+ # KS test to confirm splits share same distribution
1330
+ from scipy.stats import ks_2samp
1331
+ stat, p = ks_2samp(len_sample, len_full)
1332
+ # p > 0.49 across all splits β€” identical distributions confirmed`,
1333
+ src: 'notebooks/DA_3_SplitStudy.ipynb β€” cell 28 (sampling) Β· cell 4 (TrainingArguments) Β· cell 14 (KS test)',
1334
+ why: '<strong>What is RQ2?</strong> "Does more training data improve performance?" The split study trains 4 separate XLM-RoBERTa models on 50K, 116K (Γ—2), and 232K samples. <strong>KS test:</strong> Kolmogorov-Smirnov test verifies all splits come from the same distribution (p > 0.49) β€” ruling out that one split has easier examples. <strong>Finding:</strong> Accuracy changes by only 0.3% (98.1% β†’ 98.0%) across 4Γ— more data. The 50K sample fully captures the underlying signal distribution.',
1335
+ outputs: [
1336
+ {label:'50K sample', val:'98.1% accuracy (XLM-RoBERTa)'},
1337
+ {label:'H1 (116K)', val:'97.8% accuracy'},
1338
+ {label:'H2 (116K)', val:'98.0% accuracy'},
1339
+ {label:'Full (232K)', val:'98.0% accuracy β€” Ξ”=0.1% vs 50K'},
1340
+ {label:'KS p-value', val:'p > 0.49 across all split pairs β€” identical distributions'},
1341
+ ]
1342
+ }
1343
+ };
1344
+
1345
+ function openCM(key){
1346
+ const d = CM_DATA[key];
1347
+ if(!d) return;
1348
+ document.getElementById('cmTitle').textContent = d.title;
1349
+
1350
+ // Code tab
1351
+ document.getElementById('cmt0').innerHTML =
1352
+ '<pre class="cm-pre">'+escHTML(d.code)+'</pre>'+
1353
+ '<div class="cm-src">Source: '+escHTML(d.src)+'</div>';
1354
+
1355
+ // Why tab
1356
+ document.getElementById('cmt1').innerHTML =
1357
+ '<div class="cm-why-body">'+d.why+'</div>';
1358
+
1359
+ // Output tab
1360
+ const rows = d.outputs.map(o=>
1361
+ '<div class="cm-out-row"><div class="cm-out-lbl">'+escHTML(o.label)+'</div>'+
1362
+ '<div class="cm-out-val"><em>'+escHTML(o.val)+'</em></div></div>'
1363
+ ).join('');
1364
+ document.getElementById('cmt2').innerHTML = rows;
1365
+
1366
+ // Reset to code tab
1367
+ switchCMTab(0);
1368
+ document.getElementById('cmOverlay').classList.add('open');
1369
+ document.body.style.overflow='hidden';
1370
+ }
1371
+
1372
+ function closeCM(){
1373
+ document.getElementById('cmOverlay').classList.remove('open');
1374
+ document.body.style.overflow='';
1375
+ }
1376
+
1377
+ function closeCMOutside(e){
1378
+ if(e.target===document.getElementById('cmOverlay')) closeCM();
1379
+ }
1380
+
1381
+ function switchCMTab(idx){
1382
+ document.querySelectorAll('.cm-tab').forEach((t,i)=>t.classList.toggle('active',i===idx));
1383
+ document.querySelectorAll('.cm-panel').forEach((p,i)=>p.classList.toggle('active',i===idx));
1384
+ }
1385
+
1386
+ function escHTML(s){
1387
+ return String(s).replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;');
1388
+ }
1389
+
1390
+ // Close on Escape key
1391
+ document.addEventListener('keydown',e=>{ if(e.key==='Escape') closeCM(); });
1392
+
1393
+ function toggleFaq(btn){
1394
+ const item=btn.closest('.faq-item');
1395
+ const wasOpen=item.classList.contains('open');
1396
+ item.classList.toggle('open',!wasOpen);
1397
+ }
1398
  </script>
1399
  </body>
1400
  </html>