File size: 97,360 Bytes
af122d5 818583c af122d5 818583c af122d5 818583c af122d5 818583c d3a0265 af122d5 d3a0265 af122d5 818583c af122d5 818583c af122d5 d3a0265 af122d5 818583c af122d5 818583c af122d5 818583c af122d5 818583c af122d5 818583c af122d5 818583c af122d5 818583c af122d5 818583c af122d5 818583c af122d5 d3a0265 af122d5 818583c af122d5 818583c d3a0265 818583c d3a0265 818583c d3a0265 818583c d3a0265 818583c d3a0265 818583c d3a0265 818583c af122d5 818583c d3a0265 818583c d3a0265 818583c af122d5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 | <!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width,initial-scale=1.0">
<title>MindScan β Multi-Model Framework for Depression & Suicide Risk Detection</title>
<link href="https://fonts.googleapis.com/css2?family=Instrument+Serif:ital@0;1&family=Geist:wght@300;400;500;600&family=DM+Mono:wght@400;500&display=swap" rel="stylesheet">
<style>
:root{
--bg:#f7f5f0; --bg2:#efece8; --bg3:#e6e2da; --bg4:#dedad1;
--ink:#1a1816; --ink2:#5c5750; --ink3:#9c9790;
--border:rgba(26,24,22,0.09); --border2:rgba(26,24,22,0.16);
--blue:#1d4ed8; --blue-bg:#eff6ff; --blue-mid:#3b82f6;
--amber:#b45309; --amber-bg:#fffbeb; --amber-mid:#d97706;
--red:#b91c1c; --red-bg:#fef2f2;
--green:#15803d; --green-bg:#f0fdf4;
--purple:#6d28d9; --purple-bg:#f5f3ff;
--shadow:0 1px 3px rgba(26,24,22,0.06),0 4px 16px rgba(26,24,22,0.04);
--shadow-md:0 2px 8px rgba(26,24,22,0.08),0 8px 32px rgba(26,24,22,0.06);
}
*{box-sizing:border-box;margin:0;padding:0}
html{scroll-behavior:smooth}
body{background:var(--bg);color:var(--ink);font-family:'Geist',sans-serif;font-size:15px;line-height:1.6;overflow-x:hidden}
/* ββ HEADER ββ */
header{
padding:16px 48px;display:flex;align-items:center;justify-content:space-between;
border-bottom:1px solid var(--border);background:rgba(247,245,240,0.94);
position:sticky;top:0;z-index:100;backdrop-filter:blur(10px);
}
.logo{display:flex;align-items:center;gap:10px}
.logo-mark{width:28px;height:28px;background:var(--ink);border-radius:7px;display:flex;align-items:center;justify-content:center}
.logo-mark svg{width:14px;height:14px}
.logo-txt{font-family:'Instrument Serif',serif;font-size:18px;letter-spacing:-.02em}
.logo-txt em{font-style:italic;color:var(--ink2)}
.nav-links{display:flex;gap:2px}
.nav-links a{font-size:12px;color:var(--ink2);padding:5px 10px;border-radius:6px;text-decoration:none;transition:all .15s;font-family:'DM Mono',monospace}
.nav-links a:hover{background:var(--bg2);color:var(--ink)}
.nav-badge{font-size:10px;font-family:'DM Mono',monospace;background:var(--amber-bg);color:var(--amber);border:1px solid rgba(180,83,9,.2);padding:4px 10px;border-radius:20px}
/* ββ HERO ββ */
.hero{padding:80px 48px 64px;max-width:1040px;margin:0 auto}
.hero-top{display:grid;grid-template-columns:1fr 360px;gap:48px;align-items:start;margin-bottom:52px}
.hero-eyebrow{display:flex;align-items:center;gap:8px;margin-bottom:18px}
.eyebrow-dot{width:6px;height:6px;border-radius:50%;background:var(--green);animation:blink 2.5s infinite}
@keyframes blink{0%,100%{opacity:1}50%{opacity:.2}}
.eyebrow-txt{font-size:11px;font-family:'DM Mono',monospace;color:var(--ink3);letter-spacing:.1em;text-transform:uppercase}
.hero h1{font-family:'Instrument Serif',serif;font-size:clamp(28px,3.6vw,42px);font-weight:400;line-height:1.13;letter-spacing:-.03em;color:var(--ink);margin-bottom:18px}
.hero h1 em{font-style:italic;color:var(--ink2)}
.hero-sub{font-size:15px;color:var(--ink2);line-height:1.7;margin-bottom:24px;max-width:500px}
/* RQ Cards */
.rq-cards{display:flex;flex-direction:column;gap:10px}
.rq-card{border-radius:12px;padding:16px 18px;border:1px solid}
.rq-card.rq1{background:var(--blue-bg);border-color:rgba(29,78,216,.2)}
.rq-card.rq2{background:var(--amber-bg);border-color:rgba(180,83,9,.2)}
.rq-label{font-size:9px;font-family:'DM Mono',monospace;letter-spacing:.14em;text-transform:uppercase;font-weight:500;margin-bottom:5px}
.rq-card.rq1 .rq-label{color:var(--blue)}
.rq-card.rq2 .rq-label{color:var(--amber)}
.rq-text{font-size:13px;color:var(--ink);line-height:1.5}
/* Stats panel */
.stats-panel{background:var(--bg2);border:1px solid var(--border);border-radius:16px;padding:24px;display:grid;grid-template-columns:1fr 1fr;gap:16px;box-shadow:var(--shadow)}
.stat-box{text-align:center;padding:12px;background:var(--bg);border-radius:10px;border:1px solid var(--border)}
.stat-num{font-family:'Instrument Serif',serif;font-size:28px;letter-spacing:-.02em;color:var(--ink);line-height:1}
.stat-lbl{font-size:10px;font-family:'DM Mono',monospace;color:var(--ink3);margin-top:4px;text-transform:uppercase;letter-spacing:.08em}
/* ββ SECTION SHARED ββ */
.section{max-width:1040px;margin:0 auto;padding:64px 48px}
.sec-eyebrow{font-size:10px;font-family:'DM Mono',monospace;letter-spacing:.12em;text-transform:uppercase;color:var(--blue);margin-bottom:10px}
.sec-h2{font-family:'Instrument Serif',serif;font-size:clamp(24px,3.5vw,38px);font-weight:400;letter-spacing:-.02em;line-height:1.15;margin-bottom:8px}
.sec-h2 em{font-style:italic;color:var(--ink2)}
.sec-lead{font-size:14px;color:var(--ink2);max-width:560px;line-height:1.7;margin-bottom:36px}
.section-divider{border:none;border-top:1px solid var(--border);margin:0}
/* ββ BASE PAPER COMPARISON ββ */
.comparison-wrap{display:grid;grid-template-columns:1fr auto 1fr;gap:16px;align-items:center}
.comp-card{border-radius:14px;padding:26px;border:1px solid;box-shadow:var(--shadow)}
.comp-card.theirs{background:var(--bg2);border-color:var(--border2)}
.comp-card.ours{background:#fff;border-color:rgba(21,128,61,.25);box-shadow:var(--shadow-md)}
.comp-label{font-size:10px;font-family:'DM Mono',monospace;letter-spacing:.1em;text-transform:uppercase;margin-bottom:12px;padding:4px 10px;border-radius:4px;display:inline-block}
.comp-card.theirs .comp-label{background:var(--bg3);color:var(--ink3)}
.comp-card.ours .comp-label{background:var(--green-bg);color:var(--green)}
.comp-title{font-family:'Instrument Serif',serif;font-size:18px;letter-spacing:-.01em;color:var(--ink);margin-bottom:4px}
.comp-sub{font-size:12px;color:var(--ink2);margin-bottom:18px}
.comp-row{display:flex;align-items:flex-start;gap:8px;margin-bottom:9px;font-size:13px}
.comp-icon{width:16px;height:16px;border-radius:50%;display:flex;align-items:center;justify-content:center;font-size:9px;flex-shrink:0;margin-top:1px}
.comp-icon.bad{background:rgba(185,28,28,.1);color:var(--red)}
.comp-icon.good{background:var(--green-bg);color:var(--green)}
.comp-text{color:var(--ink2);line-height:1.45}
.comp-text strong{color:var(--ink)}
.comp-f1-row{margin-top:18px;padding-top:14px;border-top:1px solid var(--border);display:flex;align-items:center;gap:10px}
.comp-f1-label{font-size:11px;font-family:'DM Mono',monospace;color:var(--ink3)}
.comp-f1-val{font-family:'Instrument Serif',serif;font-size:24px;letter-spacing:-.02em}
.comp-card.theirs .comp-f1-val{color:var(--ink3)}
.comp-card.ours .comp-f1-val{color:var(--green)}
.comp-middle{text-align:center;padding:16px 12px}
.comp-arrow{font-size:24px;color:var(--green);margin-bottom:6px}
.comp-delta{font-family:'Instrument Serif',serif;font-size:28px;color:var(--green);letter-spacing:-.02em}
.comp-delta-lbl{font-size:10px;font-family:'DM Mono',monospace;color:var(--ink3);margin-top:2px;text-transform:uppercase}
/* ββ METHODOLOGY 3-STEP ββ */
.method-steps{display:grid;grid-template-columns:repeat(3,1fr);gap:0;position:relative}
.method-steps::before{content:'';position:absolute;top:22px;left:22px;right:22px;height:2px;background:var(--bg3);z-index:0}
.method-step{text-align:center;position:relative;z-index:2;padding:0 20px;cursor:pointer;transition:opacity .15s}
.method-step:hover{opacity:.8}
.ms-dot{width:44px;height:44px;border-radius:50%;background:var(--ink);border:2px solid var(--ink);display:flex;align-items:center;justify-content:center;margin:0 auto 14px;font-size:12px;font-family:'DM Mono',monospace;color:#fff;transition:background .2s,border-color .2s}
.method-step.active .ms-dot{background:var(--blue);border-color:var(--blue)}
.ms-title{font-size:14px;font-weight:500;color:var(--ink);margin-bottom:7px}
.ms-body{font-size:12px;color:var(--ink2);line-height:1.65}
/* Detail panel */
.method-detail{margin-top:32px;background:#fff;border:1px solid var(--border);border-radius:14px;padding:28px 32px;box-shadow:var(--shadow);animation:fadeUp .25s ease both}
.md-panel{display:none}
.md-panel.active{display:block}
.md-title{font-size:13px;font-weight:600;color:var(--ink);margin-bottom:14px;display:flex;align-items:center;gap:8px}
.md-title-dot{width:8px;height:8px;border-radius:50%;background:var(--blue);flex-shrink:0}
.md-grid{display:grid;grid-template-columns:1fr 1fr;gap:16px}
.md-block{padding:14px;background:var(--bg);border-radius:8px;border:1px solid var(--border)}
.md-block-lbl{font-size:9px;font-family:'DM Mono',monospace;letter-spacing:.1em;text-transform:uppercase;color:var(--ink3);margin-bottom:5px}
.md-block-val{font-size:12px;color:var(--ink);line-height:1.6}
.md-block-val strong{color:var(--red)}
.md-block-val em{color:var(--blue);font-style:normal;font-weight:500}
/* ββ EVIDENCE MATRIX ββ */
.matrix-wrap{overflow-x:auto;margin-top:28px}
.matrix-tbl{width:100%;border-collapse:collapse;font-size:13px}
.matrix-tbl th{text-align:center;padding:11px 16px;background:var(--bg2);border-bottom:2px solid var(--border2);font-size:10px;font-family:'DM Mono',monospace;letter-spacing:.1em;text-transform:uppercase;color:var(--ink3);font-weight:500}
.matrix-tbl th:first-child{text-align:left;min-width:200px}
.matrix-tbl td{padding:12px 16px;border-bottom:1px solid var(--border);text-align:center;font-family:'DM Mono',monospace;font-size:13px;color:var(--ink2);vertical-align:middle}
.matrix-tbl td:first-child{text-align:left;font-family:'Geist',sans-serif;font-size:13px;color:var(--ink)}
.matrix-tbl tr:hover td{background:var(--bg2)}
.matrix-tbl tr:last-child td{border-bottom:none}
.matrix-tbl td.winner{font-weight:600;color:var(--ink)}
.matrix-tbl td.collapsed{background:rgba(185,28,28,.07);color:var(--red)}
.ds-label{display:inline-flex;align-items:center;gap:8px}
.ds-badge{font-size:9px;font-family:'DM Mono',monospace;padding:2px 7px;border-radius:3px;font-weight:500;flex-shrink:0}
/* ββ MATRIX FOOTNOTE ββ */
.matrix-footnote{margin-top:14px;font-size:11px;color:var(--ink3);line-height:1.6;padding:10px 14px;background:var(--bg2);border:1px solid var(--border);border-radius:7px;font-family:'DM Mono',monospace}
.matrix-footnote strong{color:var(--ink2)}
/* ββ FINDINGS ββ */
.findings-grid{display:grid;grid-template-columns:1fr 1fr;gap:14px}
.finding:nth-child(5){grid-column:1/-1;}
.finding{background:#fff;border:1px solid var(--border);border-radius:12px;padding:22px;box-shadow:var(--shadow)}
.finding-n{font-family:'Instrument Serif',serif;font-size:36px;color:var(--bg3);line-height:1;margin-bottom:8px}
.finding-t{font-size:13px;font-weight:500;color:var(--ink);margin-bottom:6px}
.finding-b{font-size:12px;color:var(--ink2);line-height:1.65}
.finding-chip{display:inline-block;font-family:'DM Mono',monospace;font-size:10px;background:var(--bg2);border:1px solid var(--border);padding:3px 8px;border-radius:4px;margin-top:8px;color:var(--ink2)}
/* ββ VERDICT ββ */
.verdict-grid{display:grid;grid-template-columns:1fr 1fr;gap:14px;margin-bottom:24px}
.verdict-card{background:#fff;border:1px solid var(--border);border-radius:12px;padding:20px;box-shadow:var(--shadow)}
.verdict-card.rq{border-left:3px solid var(--blue-mid)}
.verdict-card.lim{border-left:3px solid var(--amber-mid)}
.vc-eyebrow{font-size:9px;font-family:'DM Mono',monospace;letter-spacing:.12em;text-transform:uppercase;margin-bottom:6px;font-weight:500}
.verdict-card.rq .vc-eyebrow{color:var(--blue)}
.verdict-card.lim .vc-eyebrow{color:var(--amber)}
.vc-title{font-size:13px;font-weight:500;color:var(--ink);margin-bottom:6px}
.vc-body{font-size:12px;color:var(--ink2);line-height:1.65}
.vc-chip{display:inline-block;font-family:'DM Mono',monospace;font-size:10px;background:var(--blue-bg);border:1px solid rgba(29,78,216,.2);color:var(--blue);padding:3px 8px;border-radius:4px;margin-top:8px}
.verdict-card.lim .vc-chip{background:var(--amber-bg);border-color:rgba(180,83,9,.2);color:var(--amber)}
/* ββ DEMO ββ */
.demo-section{max-width:1040px;margin:0 auto;padding:64px 48px}
.input-card{background:#fff;border:1px solid var(--border);border-radius:14px;padding:24px;box-shadow:var(--shadow);margin-bottom:16px}
textarea{
width:100%;background:var(--bg);border:1px solid var(--border2);border-radius:8px;
padding:13px 15px;font-family:'Geist',sans-serif;font-size:14px;color:var(--ink);
resize:vertical;min-height:100px;outline:none;line-height:1.6;transition:border-color .15s,box-shadow .15s;
}
textarea:focus{border-color:rgba(29,78,216,.4);box-shadow:0 0 0 3px rgba(29,78,216,.07)}
textarea::placeholder{color:var(--ink3)}
.input-foot{display:flex;align-items:center;justify-content:space-between;margin-top:10px;flex-wrap:wrap;gap:8px}
.char-count{font-size:11px;font-family:'DM Mono',monospace;color:var(--ink3)}
.samples{display:flex;gap:5px;flex-wrap:wrap}
.sbtn{font-size:10px;font-family:'DM Mono',monospace;background:var(--bg2);border:1px solid var(--border);border-radius:5px;padding:5px 10px;cursor:pointer;color:var(--ink2);transition:all .15s}
.sbtn:hover{border-color:var(--border2);color:var(--ink)}
.sbtn.danger{border-color:rgba(185,28,28,.25);color:var(--red);background:var(--red-bg);display:flex;align-items:center;gap:5px}
.sbtn-pulse{width:5px;height:5px;border-radius:50%;background:var(--red);animation:blink 1.5s infinite}
.run-btn{
width:100%;margin-top:12px;background:var(--ink);color:#fff;border:none;
border-radius:9px;padding:13px 24px;font-family:'Geist',sans-serif;font-size:14px;
font-weight:500;cursor:pointer;display:flex;align-items:center;justify-content:center;
gap:8px;transition:opacity .15s,transform .1s;letter-spacing:-.01em;
}
.run-btn:hover{opacity:.87}
.run-btn:active{transform:scale(.99)}
.run-btn:disabled{opacity:.45;cursor:not-allowed}
.spinner{width:14px;height:14px;border:2px solid rgba(255,255,255,.3);border-top-color:#fff;border-radius:50%;animation:spin .7s linear infinite;display:none}
@keyframes spin{to{transform:rotate(360deg)}}
.disclaimer{font-size:11px;color:var(--ink3);line-height:1.6;padding:10px 14px;background:var(--amber-bg);border:1px solid rgba(180,83,9,.15);border-radius:7px;margin-bottom:20px}
/* Results */
.results{display:none;animation:fadeUp .35s ease both}
@keyframes fadeUp{from{opacity:0;transform:translateY(8px)}to{opacity:1;transform:translateY(0)}}
.risk-banner{border-radius:10px;padding:14px 18px;margin-bottom:16px;border:1px solid;display:flex;align-items:flex-start;gap:12px}
.risk-banner.danger{background:var(--red-bg);border-color:rgba(185,28,28,.25)}
.risk-banner.safe{background:var(--green-bg);border-color:rgba(21,128,61,.2)}
.risk-banner.warn{background:var(--amber-bg);border-color:rgba(180,83,9,.25)}
.rb-icon{font-size:18px;flex-shrink:0;margin-top:1px}
.rb-title{font-size:13px;font-weight:500;margin-bottom:3px}
.rb-body{font-size:12px;line-height:1.55;color:var(--ink2)}
.risk-banner.danger .rb-title{color:var(--red)}
.risk-banner.safe .rb-title{color:var(--green)}
.risk-banner.warn .rb-title{color:var(--amber)}
.masked-callout{background:var(--amber-bg);border:1px solid rgba(180,83,9,.2);border-radius:10px;padding:14px 18px;margin-bottom:16px;display:none;animation:fadeUp .3s ease both}
.mc-callout-title{font-size:13px;font-weight:500;color:var(--amber);margin-bottom:4px;display:flex;align-items:center;gap:7px}
.mc-callout-body{font-size:12px;color:#92400e;line-height:1.6}
.results-hdr{display:flex;align-items:center;justify-content:space-between;margin-bottom:14px}
.results-hdr-title{font-size:14px;font-weight:500;color:var(--ink)}
.elapsed-chip{font-size:10px;font-family:'DM Mono',monospace;color:var(--ink3);background:var(--bg2);border:1px solid var(--border);padding:3px 9px;border-radius:20px}
.winner-grid{display:grid;grid-template-columns:repeat(3,1fr);gap:12px}
.win-card{border-radius:12px;padding:18px;border:1px solid;animation:fadeUp .4s ease both;background:#fff;box-shadow:var(--shadow);transition:all .3s ease}
.win-card.d1{border-color:rgba(29,78,216,.2);animation-delay:.14s}
.win-card.d2{border-color:rgba(180,83,9,.2);animation-delay:.07s}
.win-card.d3{border-color:rgba(185,28,28,.2)}
/* D3 dominant state when suicide risk is flagged */
.win-card.d3.risk-active{
border-color:var(--red);border-width:2px;
background:var(--red-bg);box-shadow:0 0 0 4px rgba(185,28,28,.08),var(--shadow-md);
}
.win-card.d3.risk-active .wc-pred{color:var(--red)}
.wc-lbl{font-size:9px;font-family:'DM Mono',monospace;letter-spacing:.1em;text-transform:uppercase;margin-bottom:8px;line-height:1.5}
.win-card.d1 .wc-lbl{color:var(--blue)}
.win-card.d2 .wc-lbl{color:var(--amber)}
.win-card.d3 .wc-lbl{color:var(--red)}
.wc-pred{font-family:'Instrument Serif',serif;font-size:20px;letter-spacing:-.02em;color:var(--ink);margin-bottom:6px;min-height:48px;display:flex;align-items:flex-end}
.conf-row{display:flex;align-items:center;gap:8px;margin-bottom:5px}
.conf-track{flex:1;height:4px;background:var(--bg2);border-radius:2px;overflow:hidden}
.conf-fill{height:100%;border-radius:2px;transition:width .8s cubic-bezier(.4,0,.2,1);width:0}
.win-card.d1 .conf-fill{background:var(--blue-mid)}
.win-card.d2 .conf-fill{background:var(--amber-mid)}
.win-card.d3 .conf-fill{background:var(--red)}
.conf-pct{font-size:11px;font-family:'DM Mono',monospace;min-width:34px;text-align:right}
.win-card.d1 .conf-pct{color:var(--blue)}
.win-card.d2 .conf-pct{color:var(--amber)}
.win-card.d3 .conf-pct{color:var(--red)}
.win-card.d3.risk-active .conf-pct{font-size:14px;font-weight:500}
.wc-meta{font-size:11px;color:var(--ink3)}
/* Clinical Insight Alert */
.clinical-insight{background:var(--amber-bg);border:1px solid rgba(180,83,9,.2);border-left:3px solid var(--amber-mid);border-radius:10px;padding:14px 18px;margin-bottom:16px;display:none;animation:fadeUp .3s ease both}
.ci-title{font-size:13px;font-weight:500;color:var(--amber);margin-bottom:5px;display:flex;align-items:center;gap:7px}
.ci-body{font-size:12px;color:#92400e;line-height:1.65}
/* ββ GLOSSARY TOOLTIPS ββ */
.gloss{border-bottom:1px dashed var(--ink3);cursor:help;position:relative;display:inline}
.gloss::after{content:attr(data-tip);position:absolute;bottom:calc(100% + 8px);left:50%;transform:translateX(-50%);background:var(--ink);color:#fff;font-size:11.5px;padding:8px 12px;border-radius:8px;width:230px;white-space:normal;line-height:1.5;font-family:'Geist',sans-serif;letter-spacing:0;text-align:left;opacity:0;pointer-events:none;transition:opacity .15s;z-index:300;box-shadow:0 4px 16px rgba(0,0,0,.18)}
.gloss::before{content:'';position:absolute;bottom:calc(100% + 2px);left:50%;transform:translateX(-50%);border:5px solid transparent;border-top-color:var(--ink);opacity:0;transition:opacity .15s;z-index:301}
.gloss:hover::after,.gloss:hover::before{opacity:1}
/* ββ CODE MODAL ββ */
.cm-term{border-bottom:1px dashed var(--blue);color:var(--ink);cursor:pointer;display:inline-flex;align-items:center;gap:5px;transition:color .15s}
.cm-term:hover{color:var(--blue)}
.cm-term::after{content:'</>';font-family:'DM Mono',monospace;font-size:9px;color:var(--blue);opacity:.7;letter-spacing:-.03em}
.cm-overlay{position:fixed;inset:0;background:rgba(26,24,22,.45);z-index:500;display:none;align-items:center;justify-content:center;padding:20px;backdrop-filter:blur(3px)}
.cm-overlay.open{display:flex}
.cm-box{background:#fff;border-radius:16px;width:100%;max-width:680px;max-height:88vh;display:flex;flex-direction:column;box-shadow:0 24px 80px rgba(0,0,0,.18);overflow:hidden}
.cm-head{padding:20px 24px 0;display:flex;align-items:flex-start;justify-content:space-between;gap:16px}
.cm-title{font-family:'Instrument Serif',serif;font-size:22px;letter-spacing:-.02em;color:var(--ink)}
.cm-close{width:28px;height:28px;border-radius:50%;border:1px solid var(--border);background:var(--bg2);cursor:pointer;font-size:14px;display:flex;align-items:center;justify-content:center;flex-shrink:0;color:var(--ink2)}
.cm-close:hover{background:var(--bg3)}
.cm-tabs{display:flex;gap:2px;padding:12px 24px 0;border-bottom:1px solid var(--border)}
.cm-tab{font-size:11px;font-family:'DM Mono',monospace;padding:6px 14px;border-radius:6px 6px 0 0;cursor:pointer;border:1px solid transparent;border-bottom:none;color:var(--ink2);margin-bottom:-1px;background:none}
.cm-tab.active{background:#fff;border-color:var(--border);color:var(--ink)}
.cm-body{overflow-y:auto;padding:20px 24px 24px}
.cm-panel{display:none}
.cm-panel.active{display:block}
.cm-pre{background:var(--ink);color:#e8e4dc;font-family:'DM Mono',monospace;font-size:12px;line-height:1.7;padding:16px 18px;border-radius:10px;overflow-x:auto;white-space:pre;margin-bottom:10px}
.cm-src{font-size:11px;color:var(--ink3);font-family:'DM Mono',monospace;margin-top:6px}
.cm-why-body{font-size:13.5px;color:var(--ink2);line-height:1.8}
.cm-why-body strong{color:var(--ink)}
.cm-out-row{display:flex;align-items:flex-start;gap:12px;padding:10px 14px;background:var(--bg2);border-radius:8px;margin-bottom:8px}
.cm-out-lbl{font-size:10px;font-family:'DM Mono',monospace;color:var(--ink3);min-width:28px;text-transform:uppercase;margin-top:2px}
.cm-out-val{font-size:13px;color:var(--ink);line-height:1.5}
.cm-out-val em{font-family:'DM Mono',monospace;font-size:11.5px;color:var(--blue)}
/* ββ FAQ ACCORDION ββ */
.faq-section{max-width:1040px;margin:0 auto;padding:64px 48px}
.faq-group{margin-bottom:32px}
.faq-group-title{font-size:10px;font-family:'DM Mono',monospace;letter-spacing:.12em;text-transform:uppercase;color:var(--blue);margin-bottom:12px}
.faq-item{border:1px solid var(--border);border-radius:10px;margin-bottom:6px;overflow:hidden;background:#fff}
.faq-q{width:100%;text-align:left;background:none;border:none;padding:14px 18px;font-size:13px;font-family:'Geist',sans-serif;color:var(--ink);cursor:pointer;display:flex;justify-content:space-between;align-items:center;gap:16px;line-height:1.45}
.faq-q:hover{background:var(--bg2)}
.faq-q .faq-chevron{font-size:10px;color:var(--ink3);flex-shrink:0;transition:transform .2s}
.faq-item.open .faq-chevron{transform:rotate(180deg)}
.faq-a{max-height:0;overflow:hidden;transition:max-height .25s ease}
.faq-item.open .faq-a{max-height:500px}
.faq-a-inner{padding:0 18px 14px;font-size:12.5px;color:var(--ink2);line-height:1.75;border-top:1px solid var(--border)}
.faq-a-inner code{font-family:'DM Mono',monospace;font-size:11px;background:var(--bg2);padding:1px 5px;border-radius:3px;color:var(--ink)}
footer{text-align:center;padding:28px 48px;border-top:1px solid var(--border);font-size:11px;font-family:'DM Mono',monospace;color:var(--ink3);line-height:1.9}
@media(max-width:768px){
header,.section,.demo-section,footer{padding-left:20px;padding-right:20px}
.hero{padding:48px 20px 40px}
.hero-top,.findings-grid,.winner-grid{grid-template-columns:1fr}
.method-steps{grid-template-columns:1fr;gap:24px}
.method-steps::before{display:none}
}
</style>
</head>
<body>
<!-- HEADER -->
<header>
<div class="logo">
<div class="logo-mark">
<svg viewBox="0 0 14 14" fill="none">
<circle cx="7" cy="7" r="5.5" stroke="white" stroke-width="1.3"/>
<path d="M5 7c0-1.2.8-2 2-2s2 .8 2 2-.8 2-2 2" stroke="white" stroke-width="1.3" stroke-linecap="round"/>
<circle cx="7" cy="7" r="1.2" fill="white"/>
</svg>
</div>
<div class="logo-txt">Mind<em>Scan</em></div>
</div>
<nav class="nav-links">
<a href="#comparison">vs Base Paper</a>
<a href="#methodology">Methodology</a>
<a href="#matrix">Evidence Matrix</a>
<a href="#findings">Findings</a>
<a href="#verdict">Conclusions</a>
<a href="#demo">Live Demo</a>
<a href="#faq">FAQ</a>
<a href="/flow" target="_blank" style="background:var(--bg3);color:var(--ink)">System Flow β</a>
</nav>
<div class="nav-badge">NCI H9DAI 2026</div>
</header>
<!-- HERO -->
<div class="hero">
<div class="hero-top">
<div>
<div class="hero-eyebrow"><div class="eyebrow-dot"></div><span class="eyebrow-txt">Mental health NLP research Β· NCI H9DAI</span></div>
<h1>Multi-Model Framework for Depression Classification and <em>Suicide Risk Detection</em> from Social Media Text</h1>
<p class="hero-sub">A parallel ensemble of 12 classifiers across 3 clinical datasets β extending Tumaliuan et al. (2024) with modern transformers and SMOTE balancing.</p>
<div class="rq-cards">
<div class="rq-card rq1">
<div class="rq-label">RQ1</div>
<div class="rq-text">Which machine learning model provides the highest Accuracy for identifying depression and suicide risk?</div>
</div>
<div class="rq-card rq2">
<div class="rq-label">RQ2</div>
<div class="rq-text">Does training on the full dataset (232K), a half split (116K), or a sample (50K) provide a significant boost in Accuracy?</div>
</div>
</div>
</div>
<div class="stats-panel">
<div class="stat-box"><div class="stat-num" data-target="3" data-suffix="">0</div><div class="stat-lbl">Datasets</div></div>
<div class="stat-box"><div class="stat-num" data-target="12" data-suffix="">0</div><div class="stat-lbl">Models trained</div></div>
<div class="stat-box"><div class="stat-num" data-target="98.1" data-suffix="%" data-dec="1">0</div><div class="stat-lbl">D3 Accuracy (binary)</div></div>
<div class="stat-box"><div class="stat-num" data-target="11.4" data-suffix="%" data-dec="1">0</div><div class="stat-lbl">vs Base Paper β</div></div>
</div>
</div>
</div>
<hr class="section-divider">
<!-- BASE PAPER COMPARISON -->
<section class="section" id="comparison">
<div class="sec-eyebrow">Extending prior work</div>
<div class="sec-h2">Our work vs <em>Tumaliuan et al. (2024)</em></div>
<p class="sec-lead">Dataset 1 is structurally equivalent to the base paper's Filipino Twitter corpus β same 6-class task, same clinical annotation method β making a direct F1 comparison valid.</p>
<div class="comparison-wrap">
<div class="comp-card theirs">
<div class="comp-label">Tumaliuan et al. β 2024</div>
<div class="comp-title">Filipino Twitter Depression</div>
<div class="comp-sub">Frontiers in Computer Science Β· word2vec pipeline</div>
<div class="comp-row"><div class="comp-icon bad">β</div><div class="comp-text">Used <strong><span class="gloss" data-tip="word2vec (2013): maps words to fixed vectors based on co-occurrence. Cannot understand negation ('not happy' β 'happy') or context. Superseded by transformers.">word2vec</span></strong> (2013) β static embeddings, no negation handling</div></div>
<div class="comp-row"><div class="comp-icon bad">β</div><div class="comp-text"><strong><span class="gloss" data-tip="SVM (Support Vector Machine): finds the maximum-margin hyperplane separating classes. Very effective for high-dimensional text features like TF-IDF. Gold standard for NLP before transformers.">SVM never tested</span></strong> β absent from evaluation despite being NLP gold standard</div></div>
<div class="comp-row"><div class="comp-icon bad">β</div><div class="comp-text"><strong><span class="gloss" data-tip="XGBoost: gradient-boosted decision trees. Sequentially builds trees to correct previous errors. Handles imbalanced data well and often beats random forests on tabular/sparse features.">XGBoost never tested</span></strong> β gradient boosting entirely absent</div></div>
<div class="comp-row"><div class="comp-icon bad">β</div><div class="comp-text">Class imbalance listed as <strong>limitation β never resolved</strong></div></div>
<div class="comp-row"><div class="comp-icon bad">β</div><div class="comp-text"><strong>Restricted dataset</strong> β requires author permission to access</div></div>
<div class="comp-row"><div class="comp-icon bad">β</div><div class="comp-text"><strong>Accuracy not verified</strong> β no reproducible baseline reported</div></div>
<div class="comp-f1-row"><span class="comp-f1-label">Best Accuracy</span><span class="comp-f1-val">~81%</span></div>
</div>
<div class="comp-middle">
<div class="comp-arrow">β</div>
<div class="comp-delta">+11.4%</div>
<div class="comp-delta-lbl">accuracy gain (D1)</div>
</div>
<div class="comp-card ours">
<div class="comp-label">MindScan β 2026</div>
<div class="comp-title">English Twitter + Reddit</div>
<div class="comp-sub">Zenodo (Nusrat 2024) Β· XLM-RoBERTa + SVM + XGBoost</div>
<div class="comp-row"><div class="comp-icon good">β</div><div class="comp-text"><strong><span class="gloss" data-tip="XLM-RoBERTa: 278M-parameter multilingual transformer. Fine-tuned on 100 languages. Produces contextual embeddings β the same word gets different vectors depending on surrounding context. Understands negation, irony, and long-range dependencies.">XLM-RoBERTa</span></strong> (2019) β contextual embeddings, understands negation</div></div>
<div class="comp-row"><div class="comp-icon good">β</div><div class="comp-text"><strong>SVM added</strong> β best D1 accuracy 92.36%, beats transformer (90.52%)</div></div>
<div class="comp-row"><div class="comp-icon good">β</div><div class="comp-text"><strong>XGBoost added</strong> β accuracy 91.76%, gradient boosting for imbalanced data</div></div>
<div class="comp-row"><div class="comp-icon good">β</div><div class="comp-text"><span class="gloss" data-tip="SMOTE (Synthetic Minority Oversampling Technique): generates synthetic training samples for minority classes by interpolating between existing minority-class examples in feature space. Applied to training data only β never the test set.">SMOTE</span> applied β <strong>imbalance resolved</strong>, all 6 classes equalised to 2,997</div></div>
<div class="comp-row"><div class="comp-icon good">β</div><div class="comp-text"><strong>Public dataset</strong> β fully reproducible, anyone can verify results</div></div>
<div class="comp-row"><div class="comp-icon good">β</div><div class="comp-text"><strong>Accuracy verified</strong> on held-out 20% test set, same 6-class task</div></div>
<div class="comp-f1-row"><span class="comp-f1-label">Best Accuracy (D1 SVM)</span><span class="comp-f1-val">92.4%</span></div>
</div>
</div>
</section>
<hr class="section-divider">
<!-- METHODOLOGY -->
<section class="section" id="methodology">
<div class="sec-eyebrow">Methodology</div>
<div class="sec-h2">Three-step <em>pipeline</em></div>
<p class="sec-lead"><span class="gloss" data-tip="CRISP-DM: Cross-Industry Standard Process for Data Mining. 6 phases: Business Understanding β Data Understanding β Data Preparation β Modelling β Evaluation β Deployment. The de facto lifecycle framework for data science projects.">CRISP-DM</span> applied across all three datasets β from raw social media text to parallel ensemble predictions.</p>
<div class="method-steps">
<div class="method-step active" onclick="showMethodDetail(0)">
<div class="ms-dot">01</div>
<div class="ms-title">Data</div>
<div class="ms-body">3 clinical datasets spanning Twitter and Reddit, covering depression types, binary detection, and suicide risk.</div>
</div>
<div class="method-step" onclick="showMethodDetail(1)">
<div class="ms-dot">02</div>
<div class="ms-title">Preprocessing</div>
<div class="ms-body">6-stage text cleaning pipeline + SMOTE oversampling to address class imbalance left unresolved by the base paper.</div>
</div>
<div class="method-step" onclick="showMethodDetail(2)">
<div class="ms-dot">03</div>
<div class="ms-title">Modelling</div>
<div class="ms-body">Parallel ensemble of 12 classifiers β all run independently on every prediction, never as a sequential cascade.</div>
</div>
</div>
<div class="method-detail">
<!-- Panel 0: Data -->
<div class="md-panel active" id="mdp0">
<div class="md-title"><div class="md-title-dot"></div>Dataset Overview</div>
<div class="md-grid">
<div class="md-block">
<div class="md-block-lbl">D1 β Depression Types (Zenodo 14233292)</div>
<div class="md-block-val"><em>14,983 tweets Β· 6 classes</em> β Postpartum (3,746), Major Depressive (2,517), Bipolar (2,443), Psychotic (2,312), No Depression (1,985), Atypical (1,980). Psychiatrist-verified labels. Class imbalance ratio: 1.89Γ.</div>
</div>
<div class="md-block">
<div class="md-block-lbl">D2 β Binary Depression (Kaggle: albertobellardini)</div>
<div class="md-block-val"><em>10,314 tweets Β· 2 classes</em> β Not Depressed (8,000) / Depressed (2,314). Severe class imbalance: <strong>3.46Γ</strong>. Twitter short-form text. SMOTE applied to training set (8,251 β 12,800 samples). Trained on Twitter affect patterns β may underdetect atypical presentations.</div>
</div>
<div class="md-block">
<div class="md-block-lbl">D3 β Suicide Risk (Kaggle: nikhileswarkomati)</div>
<div class="md-block-val"><em>232,074 Reddit posts Β· 2 classes</em> β Suicide / Non-Suicide (perfectly balanced, 116,037 each). Suicide posts average <em>200.8 words</em> (mean), non-suicide posts 63 words. We sample <em>50K posts</em> and compare against full/half splits to answer RQ2.</div>
</div>
<div class="md-block">
<div class="md-block-lbl">Business Context</div>
<div class="md-block-val">A clinically-motivated framework for social media monitoring β applicable to platform-level moderation, mental health triage, and early intervention systems. Complements rather than replaces clinical assessment.</div>
</div>
</div>
</div>
<!-- Panel 1: Preprocessing -->
<div class="md-panel" id="mdp1">
<div class="md-title"><div class="md-title-dot"></div>Preprocessing Pipeline</div>
<div class="md-grid">
<div class="md-block">
<div class="md-block-lbl"><span class="cm-term" onclick="openCM('clean_text')">6-Stage Text Cleaning</span></div>
<div class="md-block-val">1. Lowercase Β· 2. Strip URLs & http links Β· 3. Remove @mentions Β· 4. Remove # symbols Β· 5. Strip punctuation Β· 6. Collapse whitespace. Applied identically across all three datasets for consistency.</div>
</div>
<div class="md-block">
<div class="md-block-lbl"><span class="cm-term" onclick="openCM('smote')">SMOTE β Synthetic Oversampling</span></div>
<div class="md-block-val">Applied to D1 and D2 training sets only (D3 is pre-balanced). D1: 11,986 β <em>17,982 samples</em>. D2: 8,251 β <em>12,800 samples</em>. Creates synthetic clinical neighbours in TF-IDF feature space. Directly addresses the base paper's (Tumaliuan 2024) biggest limitation β they trained on raw imbalanced data.</div>
</div>
<div class="md-block">
<div class="md-block-lbl"><span class="cm-term" onclick="openCM('tfidf')">Feature Extraction β TF-IDF</span></div>
<div class="md-block-val"><span class="gloss" data-tip="TF-IDF (Term FrequencyβInverse Document Frequency): scores each word by how often it appears in a document (TF) divided by how common it is across all documents (IDF). Settings: max_features=50,000, ngram_range=(1,2), sublinear_tf=True, min_df=2.">TF-IDF</span> vectoriser with unigrams + bigrams, fitted per-dataset on training data only. Captures frequency-weighted term co-occurrence patterns, well-suited for short Twitter text.</div>
</div>
<div class="md-block">
<div class="md-block-lbl"><span class="cm-term" onclick="openCM('tokeniser')">Feature Extraction β Tokeniser</span></div>
<div class="md-block-val">XLM-RoBERTa tokeniser (max 128 tokens D1/D2, 256 tokens D3) with padding. Pre-trained multilingual contextual embeddings capture semantic meaning and long-range dependencies β critical for Reddit's longer posts.</div>
</div>
</div>
</div>
<!-- Panel 2: Modelling -->
<div class="md-panel" id="mdp2">
<div class="md-title"><div class="md-title-dot"></div>Ensemble Strategy & Architecture</div>
<div class="md-grid">
<div class="md-block">
<div class="md-block-lbl">4 Models per Dataset (12 total)</div>
<div class="md-block-val"><span class="cm-term" onclick="openCM('lr')">Logistic Regression</span> β L2 regularised, max_iter=1000. <span class="cm-term" onclick="openCM('svm')">SVM</span> β LinearSVC, C=1.0. <span class="cm-term" onclick="openCM('xgb')">XGBoost</span> β 300 estimators, max_depth=6. <span class="cm-term" onclick="openCM('xlmr_ft')">XLM-RoBERTa</span> β fine-tuned multilingual transformer, <em>278M parameters</em>, lr=2e-5, 3 epochs.</div>
</div>
<div class="md-block">
<div class="md-block-lbl"><span class="cm-term" onclick="openCM('majority_vote')">Ensemble Vote β Risk Flag Logic</span></div>
<div class="md-block-val">All 12 models run simultaneously on every input. A sequential design (check depression first, then suicide risk) would <strong>miss masked suicidality</strong> β a clinically documented pre-crisis pattern where affect appears normal but intent is resolved. Parallelism is a safety requirement, not a design preference.</div>
</div>
<div class="md-block">
<div class="md-block-lbl">XGBoost Algorithm Collapse</div>
<div class="md-block-val">XGBoost accuracy on D3: <em>91.6% (50K sample) β 70.5% (Full 232K) β 60.1% (H1 116K)</em>. Performance degrades as training data grows. The H1/H2 results are also inconsistent (60.1% vs 71.0%) β gradient boosting is highly sensitive to data distribution shifts at this scale, making it unreliable for large Reddit corpora.</div>
</div>
<div class="md-block">
<div class="md-block-lbl"><span class="cm-term" onclick="openCM('split_study')">D3 Split Study (RQ2)</span></div>
<div class="md-block-val">D3 trained on 4 configurations: Full (232K), Half 1 (116K), Half 2 (116K), Sample (50K). XLM-RoBERTa accuracy: <em>98.1% (50K) β 97.8% (H1) β 98.0% (H2/Full)</em>. Ξ = 0.3% across 4Γ more data. Kolmogorov-Smirnov tests confirm all splits share identical distributions (p > 0.49), validating the comparison.</div>
</div>
</div>
</div>
</div>
</section>
<hr class="section-divider">
<!-- ACCURACY EVIDENCE MATRIX -->
<section class="section" id="matrix">
<div class="sec-eyebrow">Core evaluation</div>
<div class="sec-h2">Accuracy <em>Evidence Matrix</em></div>
<p class="sec-lead">All 4 models evaluated across all dataset splits. <strong>Bold</strong> = winner per row. <span style="color:var(--red)">Red</span> = XGBoost collapse on larger training sets. β <span class="cm-term" onclick="openCM('eval_metrics')">How metrics are computed</span></p>
<div class="matrix-wrap">
<table class="matrix-tbl">
<thead>
<tr>
<th>Dataset / Split</th>
<th><span class="gloss" data-tip="Logistic Regression: linear model trained with L2 regularisation (max_iter=1000). Fast, interpretable baseline. Outputs class probabilities via sigmoid/softmax. Works well with TF-IDF sparse vectors.">Logistic Regression</span></th>
<th><span class="gloss" data-tip="SVM (Support Vector Machine): LinearSVC, C=1.0. Finds maximum-margin hyperplane in TF-IDF feature space. Best classical model on D1 β short tweets give TF-IDF enough signal to beat contextual embeddings.">SVM</span></th>
<th><span class="gloss" data-tip="XGBoost: gradient-boosted trees, 300 estimators, max_depth=6. Sequentially corrects previous errors. Collapses on D3 (71%) β vocabulary overlap between depressive and suicidal language confuses boosted trees.">XGBoost</span></th>
<th><span class="gloss" data-tip="XLM-RoBERTa: 278M-parameter multilingual transformer. Fine-tuned with lr=2e-5, 3 epochs. Max 128 tokens (D1/D2) or 256 tokens (D3). Best on long-form Reddit posts β contextual embeddings capture meaning beyond keyword matching.">XLM-RoBERTa</span></th>
</tr>
</thead>
<tbody>
<tr>
<td><div class="ds-label"><span class="ds-badge" style="background:var(--blue-bg);color:var(--blue)">D1</span> Depression Types</div></td>
<td>91.5%</td>
<td class="winner">92.4%</td>
<td>91.8%</td>
<td>90.5%</td>
</tr>
<tr>
<td><div class="ds-label"><span class="ds-badge" style="background:var(--amber-bg);color:var(--amber)">D2</span> Binary Depression</div></td>
<td>98.9%</td>
<td>97.1%</td>
<td>99.3%</td>
<td class="winner">99.9%</td>
</tr>
<tr>
<td><div class="ds-label"><span class="ds-badge" style="background:var(--red-bg);color:var(--red)">D3</span> Full (232K)</div></td>
<td>94.3%</td>
<td>94.6%</td>
<td class="collapsed">70.5%</td>
<td class="winner">98.0%</td>
</tr>
<tr>
<td><div class="ds-label"><span class="ds-badge" style="background:var(--blue-bg);color:var(--blue)">D3</span> Half 1 (116K)</div></td>
<td>93.8%</td>
<td>94.2%</td>
<td class="collapsed">60.1%</td>
<td class="winner">97.8%</td>
</tr>
<tr>
<td><div class="ds-label"><span class="ds-badge" style="background:var(--amber-bg);color:var(--amber)">D3</span> Half 2 (116K)</div></td>
<td>93.7%</td>
<td>94.2%</td>
<td class="collapsed">71.0%</td>
<td class="winner">98.0%</td>
</tr>
<tr>
<td><div class="ds-label"><span class="ds-badge" style="background:var(--green-bg);color:var(--green)">D3</span> Sample (50K) β
</div></td>
<td>93.2%</td>
<td>93.7%</td>
<td>91.6%</td>
<td class="winner">98.1%</td>
</tr>
</tbody>
</table>
</div>
<p class="matrix-footnote"><strong>Note:</strong> Full performance evaluation including Macro F1-Score, Cohen's Kappa, and per-class metrics are documented in the Final IEEE Report. Accuracy is shown here as the primary comparative metric for cross-dataset validation.</p>
</section>
<hr class="section-divider">
<!-- FINDINGS -->
<section class="section" id="findings">
<div class="sec-eyebrow">Key findings</div>
<div class="sec-h2">What the results <em>show</em></div>
<p class="sec-lead">Four insights that directly answer the research questions.</p>
<div class="findings-grid">
<div class="finding">
<div class="finding-n">01</div>
<div class="finding-t">SVM is the best model for short-form text</div>
<div class="finding-b">On 6-class depression type classification (D1), SVM achieves the highest Accuracy of 92.4%. Tweets average 31 words β too short for transformer contextual embeddings to gain advantage over TF-IDF bigrams.</div>
<div class="finding-chip">D1 Accuracy: SVM 92.4%</div>
</div>
<div class="finding">
<div class="finding-n">02</div>
<div class="finding-t">XLM-RoBERTa is the best model for long-form text</div>
<div class="finding-b">On Reddit suicide risk posts (D3), XLM-RoBERTa achieves 98.1% Accuracy with the 50K sample. Suicide posts average 200.8 words β rich enough context for transformer embeddings to dominate every competitor. D2 (Twitter, ~31 words) tells the opposite story.</div>
<div class="finding-chip">D3 Accuracy: XLM-RoBERTa 98.1%</div>
</div>
<div class="finding">
<div class="finding-n">03</div>
<div class="finding-t">Increasing data size provided no significant gain</div>
<div class="finding-b">Scaling from 50K to 232K samples produced only a 0.1% change in XLM-RoBERTa Accuracy (98.1% β 98.0%). Adding 182,000 more training examples gave no meaningful improvement, validating the 50K sample.</div>
<div class="finding-chip">50K β 232K: Ξ Accuracy = 0.1%</div>
</div>
<div class="finding">
<div class="finding-n">04</div>
<div class="finding-t">Social media affect β clinical presentation</div>
<div class="finding-b">D2 was trained on Twitter-style emotional language (explicit distress, slang). Clinical presentations β anhedonia ("nothing feels enjoyable"), fatigue, flat affect β use a different lexicon and are systematically under-flagged. This is the documented <em>Affective vs. Clinical Lexicon Gap</em>: models trained on social media affect fail to recognise diagnostic-criteria language.</div>
<div class="finding-chip">D2 limitation β documented failure mode</div>
</div>
<div class="finding">
<div class="finding-n">05</div>
<div class="finding-t">Parallel architecture is the safety net</div>
<div class="finding-b">When D2 misses a clinical presentation, D1 and D3 can still catch it. When classical D3 models over-flag depressive vocabulary, XLM-RoBERTa's contextual understanding overrides them. No single model is sufficient β the parallel ensemble exists precisely because each model's failure mode is different and partially compensated by the others.</div>
<div class="finding-chip">Multi-task learning precedent β Zogan et al. 2024</div>
</div>
</div>
</section>
<hr class="section-divider">
<!-- CONCLUSIONS & VERDICT -->
<section class="section" id="verdict">
<div class="sec-eyebrow">Conclusions</div>
<div class="sec-h2">Research <em>verdict</em></div>
<p class="sec-lead">Direct answers to both research questions, and the key limitations of the study.</p>
<div class="verdict-grid">
<div class="verdict-card rq">
<div class="vc-eyebrow">RQ1 β Best model</div>
<div class="vc-title">No single model wins across all tasks</div>
<div class="vc-body">SVM (92.4%) wins on short-form Twitter text (D1) where TF-IDF bigrams capture enough signal. XLM-RoBERTa wins on long-form Reddit posts (D2: 99.9%, D3: 98.1%) where contextual embeddings dominate. Model selection must be text-length aware.</div>
<div class="vc-chip">SVM for short text Β· XLM-RoBERTa for long text</div>
</div>
<div class="verdict-card rq">
<div class="vc-eyebrow">RQ2 β Dataset size</div>
<div class="vc-title">More data gave no meaningful gain</div>
<div class="vc-body">Scaling from 50K to 232K training samples produced only a 0.1% change in XLM-RoBERTa Accuracy (98.1% β 98.0%). For this task and model, the 50K sample captures the full signal β there is no statistically significant benefit from 4Γ more data.</div>
<div class="vc-chip">50K sample is sufficient Β· Ξ = 0.1%</div>
</div>
<div class="verdict-card lim">
<div class="vc-eyebrow">Limitation 1 β Affective vs. Clinical Lexicon Gap</div>
<div class="vc-title">Social media affect β clinical diagnostic criteria</div>
<div class="vc-body">D2 was trained on Twitter explicit emotional language. Clinical presentations using diagnostic vocabulary β anhedonia ("nothing feels enjoyable"), psychomotor fatigue, flat affect β do not match that training distribution and are systematically under-flagged. This is empirical evidence of the domain gap between self-reported social media affect and clinical language, not a model defect.</div>
<div class="vc-chip">Documented domain gap β Finding 04</div>
</div>
<div class="verdict-card lim">
<div class="vc-eyebrow">Limitation 2 β Classical model lexical overfitting</div>
<div class="vc-title">TF-IDF ignores word order and context</div>
<div class="vc-body">Classical D3 models (LR, SVM, XGBoost) use TF-IDF bag-of-words features. Vocabulary overlapping with r/SuicideWatch posts (e.g. "exhausted", "nothing feels enjoyable") triggers false-positive suicide flags β the model sees matching tokens without understanding the sentence context. XLM-RoBERTa's contextual embeddings override these false positives, demonstrating why the transformer is the reliable D3 winner.</div>
<div class="vc-chip">TF-IDF lexical overfitting β defer to XLM-RoBERTa</div>
</div>
</div>
</section>
<hr class="section-divider">
<!-- LIVE DEMO -->
<div class="demo-section" id="demo">
<div class="sec-eyebrow">Live inference</div>
<div class="sec-h2" style="margin-bottom:8px">Try it β <em>winner model per task</em></div>
<p class="sec-lead" style="margin-bottom:12px">Sample 3 demonstrates masked suicidality. Try typing clinical-style depressive language ("I feel exhausted, nothing feels enjoyable") to observe the Affective vs. Clinical Lexicon Gap documented in Finding 04.</p>
<p style="font-size:13px;color:var(--ink2);margin-bottom:24px">How the demo works: <span class="cm-term" onclick="openCM('flask_deploy')">Flask β HuggingFace proxy</span> Β· <span class="cm-term" onclick="openCM('predict_flow')">predict_all() inference flow</span></p>
<div class="disclaimer"><strong>Research prototype only.</strong> Not a clinical tool. If you or someone you know is in crisis, please contact a mental health professional or emergency services immediately.</div>
<div class="input-card">
<textarea id="textInput" placeholder="Enter any text β tweet, Reddit post, or sentence..."></textarea>
<div class="input-foot">
<div class="char-count" id="charCount">0 characters</div>
<div class="samples">
<button class="sbtn" onclick="loadSample(0)">Sample 1 β Postpartum</button>
<button class="sbtn" onclick="loadSample(1)">Sample 2 β Psychotic</button>
<button class="sbtn danger" onclick="loadSample(2)"><div class="sbtn-pulse"></div>Sample 3 β Masked risk</button>
<button class="sbtn" onclick="loadSample(3)">Sample 4 β No issue</button>
</div>
</div>
<button class="run-btn" id="runBtn" onclick="runAnalysis()">
<div class="spinner" id="spinner"></div>
<span id="btnTxt">Run analysis</span>
</button>
</div>
<div class="results" id="results">
<div class="risk-banner" id="riskBanner">
<div class="rb-icon" id="rbIcon"></div>
<div><div class="rb-title" id="rbTitle"></div><div class="rb-body" id="rbBody"></div></div>
</div>
<!-- Clinical Insight Alert β shown when patterns warrant clinical interpretation -->
<div class="clinical-insight" id="clinicalInsight">
<div class="ci-title" id="ciTitle"></div>
<div class="ci-body" id="ciBody"></div>
</div>
<div class="results-hdr">
<div class="results-hdr-title">Analysis results</div>
<div class="elapsed-chip" id="elapsed"></div>
</div>
<!-- Cards ordered D3 β D2 β D1 (safety-first triage) -->
<div class="winner-grid" id="winnerGrid">
<div class="win-card d3" id="cardD3">
<div class="wc-lbl">D3 β Immediate Risk Β· XLM-RoBERTa</div>
<div class="wc-pred" id="wpC">β</div>
<div class="conf-row"><div class="conf-track"><div class="conf-fill" id="wbC"></div></div><div class="conf-pct" id="wcC">β</div></div>
<div class="wc-meta">98.1% Accuracy on D3</div>
</div>
<div class="win-card d2" id="cardD2">
<div class="wc-lbl">D2 β Depressed? Β· XLM-RoBERTa</div>
<div class="wc-pred" id="wpB">β</div>
<div class="conf-row"><div class="conf-track"><div class="conf-fill" id="wbB"></div></div><div class="conf-pct" id="wcB">β</div></div>
<div class="wc-meta">99.9% Accuracy on D2</div>
</div>
<div class="win-card d1" id="cardD1">
<div class="wc-lbl">D1 β Depression type Β· SVM</div>
<div class="wc-pred" id="wpA">β</div>
<div class="conf-row"><div class="conf-track"><div class="conf-fill" id="wbA"></div></div><div class="conf-pct" id="wcA">β</div></div>
<div class="wc-meta">92.4% Accuracy on D1</div>
</div>
</div>
</div>
</div>
<!-- CODE MODAL OVERLAY -->
<div class="cm-overlay" id="cmOverlay" onclick="closeCMOutside(event)">
<div class="cm-box" id="cmBox">
<div class="cm-head">
<div class="cm-title" id="cmTitle"></div>
<button class="cm-close" onclick="closeCM()">β</button>
</div>
<div class="cm-tabs">
<div class="cm-tab active" onclick="switchCMTab(0)">Code</div>
<div class="cm-tab" onclick="switchCMTab(1)">Why</div>
<div class="cm-tab" onclick="switchCMTab(2)">Output</div>
</div>
<div class="cm-body">
<div class="cm-panel active" id="cmt0"></div>
<div class="cm-panel" id="cmt1"></div>
<div class="cm-panel" id="cmt2"></div>
</div>
</div>
</div>
<hr class="section-divider">
<!-- FAQ SECTION -->
<section class="faq-section" id="faq">
<div class="sec-eyebrow">Defence prep</div>
<div class="sec-h2">Frequently asked <em>questions</em></div>
<p class="sec-lead">Click any question to expand the answer. Grouped by topic for quick navigation during Q&A.</p>
<div class="faq-group">
<div class="faq-group-title">Data & Datasets</div>
<div class="faq-item">
<button class="faq-q" onclick="toggleFaq(this)">What are the three datasets and what makes them different? <span class="faq-chevron">βΌ</span></button>
<div class="faq-a"><div class="faq-a-inner">D1 β 6-class depression type classification (atypical, bipolar, major depressive, no depression, postpartum, psychotic) from Kaggle. Twitter-length text, 11,986 samples. D2 β binary depressed/not-depressed from Twitter (10,314 samples, severe 3.46Γ imbalance). D3 β binary suicide/non-suicide from Reddit (232K samples, perfectly balanced 116,037 each β we use a 50K sample of 25K per class). Each dataset has a different task, different text length, and different vocabulary domain β which is precisely why running all three in parallel is informative.</div></div>
</div>
<div class="faq-item">
<button class="faq-q" onclick="toggleFaq(this)">How did you handle class imbalance? Why SMOTE and not class weighting? <span class="faq-chevron">βΌ</span></button>
<div class="faq-a"><div class="faq-a-inner">D1 had 1.89Γ imbalance (atypical class), D2 had 3.46Γ imbalance. We applied <code>SMOTE</code> to training data only β never the test set. SMOTE interpolates new synthetic samples in TF-IDF feature space between existing minority-class examples. Class weighting was also evaluated; SMOTE showed equal or better Macro F1 in cross-validation. D3 was pre-balanced and required no oversampling.</div></div>
</div>
<div class="faq-item">
<button class="faq-q" onclick="toggleFaq(this)">Is there any data leakage in your pipeline? <span class="faq-chevron">βΌ</span></button>
<div class="faq-a"><div class="faq-a-inner">No. The train/test split (stratified 80/20) is performed first. SMOTE is then applied only to the training portion. The TF-IDF vocabulary is fitted on training data only and applied as a read-only transform to the test set. XLM-RoBERTa uses a fixed pretrained tokeniser. No test sample was ever used to inform any training decision.</div></div>
</div>
</div>
<div class="faq-group">
<div class="faq-group-title">Methodology & Models</div>
<div class="faq-item">
<button class="faq-q" onclick="toggleFaq(this)">Why four model types per dataset? Why not just use the best one? <span class="faq-chevron">βΌ</span></button>
<div class="faq-a"><div class="faq-a-inner">Each captures a different inductive bias: Logistic Regression (linear decision boundary), SVM (maximum-margin), Random Forest/XGBoost (non-linear tree ensembles), XLM-RoBERTa (contextual transformer). Disagreement between models is itself a signal. On D1, SVM (92.4%) beats XLM-RoBERTa (90.5%) β short tweets don't give the transformer enough context to gain advantage. On D3 (200.8-word Reddit posts), XLM-RoBERTa (98.1%) dominates every classical model.</div></div>
</div>
<div class="faq-item">
<button class="faq-q" onclick="toggleFaq(this)">What are your TF-IDF settings and why? <span class="faq-chevron">βΌ</span></button>
<div class="faq-a"><div class="faq-a-inner"><code>max_features=50,000</code> β covers the full relevant vocabulary without noise. <code>ngram_range=(1,2)</code> β unigrams + bigrams capture local phrases ("not happy", "kill myself") that unigrams miss. <code>sublinear_tf=True</code> β applies log(1+tf) to dampen high-frequency word dominance. <code>min_df=2</code> β removes hapax legomena (words appearing only once) that add noise.</div></div>
</div>
<div class="faq-item">
<button class="faq-q" onclick="toggleFaq(this)">How was XLM-RoBERTa fine-tuned? What hyperparameters? <span class="faq-chevron">βΌ</span></button>
<div class="faq-a"><div class="faq-a-inner">Standard sequence classification fine-tuning: Adam optimiser, <code>lr=2e-5</code>, <code>3 epochs</code>, linear warmup scheduler. Max token length: 128 for D1/D2 (Twitter-length text), 256 for D3 (Reddit posts average 200.8 words). Cross-entropy loss. Best checkpoint saved by validation accuracy. 278M parameters β multilingual pretraining covers 100 languages.</div></div>
</div>
<div class="faq-item">
<button class="faq-q" onclick="toggleFaq(this)">Why did XGBoost collapse on D3 at full scale? <span class="faq-chevron">βΌ</span></button>
<div class="faq-a"><div class="faq-a-inner">On the 50K sample, XGBoost achieves 91.6% β competitive. At full scale (232K), it collapses to 70.52% (Macro F1: 0.6998). This is TF-IDF lexical overfitting: vocabulary overlap between "suicide" and "non-suicide" Reddit posts increases with scale β words like "exhausted", "hopeless", "nothing matters" appear in both classes. Boosted trees memorise these majority-class token patterns instead of learning discriminative boundaries. H1 (116K) drops further to 60.1%, and H1 vs H2 are inconsistent (60.1% vs 70.9%), confirming XGBoost is unstable at this data scale. XLM-RoBERTa stays at 98.1% across all splits.</div></div>
</div>
</div>
<div class="faq-group">
<div class="faq-group-title">Results & Evaluation</div>
<div class="faq-item">
<button class="faq-q" onclick="toggleFaq(this)">Why is SVM accuracy 92.4% on D1 but XLM-RoBERTa (278M params) only gets 90.5%? <span class="faq-chevron">βΌ</span></button>
<div class="faq-a"><div class="faq-a-inner">Text length. D1 tweets average ~31 words. Transformers need rich context to outperform classical methods β contextual embeddings add little value on ~40-token inputs. TF-IDF bigrams on short explicit text (like tweets) already capture the full signal. This is Finding 01 and one of the key research conclusions: model selection must be text-length aware.</div></div>
</div>
<div class="faq-item">
<button class="faq-q" onclick="toggleFaq(this)">Why show accuracy rather than Macro F1? Isn't accuracy misleading on imbalanced data? <span class="faq-chevron">βΌ</span></button>
<div class="faq-a"><div class="faq-a-inner">The dashboard shows accuracy for accessibility (non-specialist audience). After SMOTE, all training classes are equalised β so accuracy and Macro F1 are closely aligned. The full Macro F1, Cohen's Kappa, and per-class precision/recall are reported in the IEEE technical report. The evidence matrix footnote notes this explicitly.</div></div>
</div>
<div class="faq-item">
<button class="faq-q" onclick="toggleFaq(this)">Did adding more training data (50K β 232K) improve D3 results? <span class="faq-chevron">βΌ</span></button>
<div class="faq-a"><div class="faq-a-inner">No β XLM-RoBERTa: 98.1% (50K, NB2) Β· 98.02% (Full 232K) Β· 97.78% (H1) Β· 98.02% (H2). Maximum delta = 0.32%. KS tests across the three split study splits (Full, H1, H2) confirm identical distributions: suicide class p=0.4967 (H1 vs H2), p=0.9758 (Full vs H1); non-suicide class p=0.8125 (H1 vs H2), p=0.9992 (Full vs H1). All well above the p=0.05 threshold β distribution shift is not driving the results. This is Finding 03.</div></div>
</div>
</div>
<div class="faq-group">
<div class="faq-group-title">Architecture & Live Demo</div>
<div class="faq-item">
<button class="faq-q" onclick="toggleFaq(this)">Is the live demo using real models or hardcoded responses? <span class="faq-chevron">βΌ</span></button>
<div class="faq-a"><div class="faq-a-inner">Real models. The Flask app proxies every request to a HuggingFace Space (<code>esvanth-mindscan.hf.space</code>) which runs <code>predict.py</code> with all 12 loaded models. There is no hardcoded data β every input goes through the full pipeline. If the Space is sleeping it auto-wakes within ~60 seconds.</div></div>
</div>
<div class="faq-item">
<button class="faq-q" onclick="toggleFaq(this)">What does "Ensemble Conflict" (amber banner) mean? Why not just show red? <span class="faq-chevron">βΌ</span></button>
<div class="faq-a"><div class="faq-a-inner">It means classical D3 models (LR/SVM/XGBoost) flagged suicide risk by majority vote, but XLM-RoBERTa β the best model at 98.1% accuracy β disagrees. A pure majority vote could trigger false alarms on metaphorical language ("I'm dying of embarrassment"). The amber state expresses uncertainty rather than forcing a binary decision, which maps directly to "escalate for human review" β the appropriate clinical-conservative response.</div></div>
</div>
<div class="faq-item">
<button class="faq-q" onclick="toggleFaq(this)">Why does D2 under-flag clinical-style text like "I feel exhausted, nothing feels enjoyable"? <span class="faq-chevron">βΌ</span></button>
<div class="faq-a"><div class="faq-a-inner">This is the Affective vs. Clinical Lexicon Gap (Finding 04, documented in NAACL 2024). D2 was trained on Twitter emotional language β explicit distress, slang, emotional punctuation. Clinical presentations use diagnostic vocabulary: anhedonia ("nothing feels enjoyable"), psychomotor fatigue, flat affect. These words are absent from D2's training distribution. This is not a bug β it is an empirical finding about the domain gap between social media affect and clinical language.</div></div>
</div>
<div class="faq-item">
<button class="faq-q" onclick="toggleFaq(this)">What is the single most important future direction? <span class="faq-chevron">βΌ</span></button>
<div class="faq-a"><div class="faq-a-inner">Replace TF-IDF classical models with <strong>MentalBERT/MentalRoBERTa</strong> (Ji et al. 2022) pretrained on mental health forum data. Combine all three tasks in a true multi-task learning setup with a shared encoder and task-specific heads β following the MTL precedent from Zogan et al. (2024). This would address both documented limitations (Affective Lexicon Gap and TF-IDF overfitting) simultaneously.</div></div>
</div>
</div>
</section>
<footer>
MindScan Β· NCI H9DAI Research Project 2026 Β· Academic Prototype Only<br>
Datasets: Zenodo 14233292 Β· Kaggle albertobellardini Β· Kaggle nikhileswarkomati<br>
Not for clinical use Β· MSc Artificial Intelligence coursework
</footer>
<script>
// ββ METHODOLOGY PANEL SWITCH ββββββββββββββββββββββββββββββββββββββ
function showMethodDetail(idx){
document.querySelectorAll('.method-step').forEach((s,i)=>{
s.classList.toggle('active',i===idx);
});
document.querySelectorAll('.md-panel').forEach((p,i)=>{
p.classList.toggle('active',i===idx);
});
// re-trigger animation
const det=document.querySelector('.method-detail');
det.style.animation='none';
requestAnimationFrame(()=>{det.style.animation=''});
}
// ββ COUNTER ANIMATION βββββββββββββββββββββββββββββββββββββββββββββ
function animateCounters(){
document.querySelectorAll('.stat-num[data-target]').forEach(el=>{
const target=parseFloat(el.getAttribute('data-target'));
const dec=parseInt(el.getAttribute('data-dec')||'0');
const suffix=el.getAttribute('data-suffix')||'';
const duration=1400;
const start=performance.now();
function step(now){
const p=Math.min((now-start)/duration,1);
const ease=1-Math.pow(1-p,3);
const val=target*ease;
el.textContent=(dec>0?val.toFixed(dec):Math.floor(val))+suffix;
if(p<1)requestAnimationFrame(step);
}
requestAnimationFrame(step);
});
}
window.addEventListener('load',()=>setTimeout(animateCounters,300));
// ββ SAMPLES βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
const SAMPLES=[
"I been going through depression after having my baby. I didn't even realise it till recently. I always said I'm strong but your emotions change, your body changes. I need time.",
"The universe is sending me signals I should follow. I know it's the psychosis. I know it isn't real. But it feels so real every single day.",
"I've sorted everything out. Told my friends I love them. Finally feel at peace with my decision. I'm not sad anymore β just ready.",
"Had such a great day today! Went hiking with friends and saw the most amazing sunset. Feeling really grateful and happy to be alive."
];
const ta=document.getElementById('textInput');
ta.addEventListener('input',()=>document.getElementById('charCount').textContent=ta.value.length+' characters');
function loadSample(i){ta.value=SAMPLES[i];document.getElementById('charCount').textContent=ta.value.length+' characters'}
// ββ RUN ANALYSIS ββββββββββββββββββββββββββββββββββββββββββββββββββ
async function runAnalysis(){
const text=ta.value.trim();if(!text)return;
const btn=document.getElementById('runBtn');
const sp=document.getElementById('spinner');
const bt=document.getElementById('btnTxt');
btn.disabled=true;sp.style.display='block';bt.textContent='Running models...';
document.getElementById('results').style.display='none';
try{
const r=await fetch('/predict',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({text})});
const d=await r.json();
if(!r.ok){alert('Error: '+(d.error||'failed'));return}
render(d);
}catch(e){
alert('Cannot reach the inference backend. The HuggingFace Space may be waking up β wait 30 seconds and try again.');
}finally{
btn.disabled=false;sp.style.display='none';bt.textContent='Run analysis';
}
}
function render(d){
const d1res = d.dataset1.models['SVM'];
const d2res = d.dataset2.models['XLM-RoBERTa'];
const d3res = d.dataset3.models['XLM-RoBERTa'];
const d1Label = d1res.label.toLowerCase();
const d2Label = d2res.label.toLowerCase();
const d3Label = d3res.label.toLowerCase();
const isRisk = d.risk_flag;
const notDepressed = d2Label.includes('not');
// "no depression" may come back as "no" from some model versions β none of the 5 disorder classes contain "no"
const hasDisorder = !d1Label.includes('no');
const isSuicide = d3Label.includes('suicide') && !d3Label.includes('non');
// ββ Risk banner βββββββββββββββββββββββββββββββββββββββββββββββ
const rb=document.getElementById('riskBanner');
if(isRisk && isSuicide){
// XLM-RoBERTa (best model) confirms suicide risk
rb.className='risk-banner danger';
document.getElementById('rbIcon').textContent='β ';
document.getElementById('rbTitle').textContent='High Suicide Risk Detected';
document.getElementById('rbBody').textContent='D3 flagged this text ('+d.suicide_votes+'). This is a research prototype β seek professional help if needed.';
}else if(isRisk && !isSuicide){
// Classical models flagged risk but XLM-RoBERTa (best model) disagrees
rb.className='risk-banner warn';
document.getElementById('rbIcon').textContent='β‘';
document.getElementById('rbTitle').textContent='Ensemble Conflict β Classical Models Flagged Risk';
document.getElementById('rbBody').textContent=d.suicide_votes+', but XLM-RoBERTa (best model, 98.1% accuracy) rates this as '+d3res.label+'. Classical TF-IDF models may be over-flagging depressive language.';
}else{
rb.className='risk-banner safe';
document.getElementById('rbIcon').textContent='β';
document.getElementById('rbTitle').textContent='No immediate crisis risk detected';
document.getElementById('rbBody').textContent='D3 did not detect suicidal ideation markers. ('+d.suicide_votes+')';
}
// ββ D3 card dominant state ββββββββββββββββββββββββββββββββββββ
const cardD3=document.getElementById('cardD3');
const d3lbl=document.getElementById('cardD3').querySelector('.wc-lbl');
// Disagreement: majority voted suicide but XLM-RoBERTa (best model) says non-suicide
const majorityVsWinner = isRisk && !isSuicide;
if(isRisk && isSuicide){
// Confirmed risk β XLM-RoBERTa agrees
cardD3.classList.add('risk-active');
d3lbl.textContent = 'D3 β Immediate Risk Β· XLM-RoBERTa';
}else{
cardD3.classList.remove('risk-active');
d3lbl.textContent = majorityVsWinner
? 'D3 β '+d.suicide_votes+' (classical) Β· XLM-RoBERTa dissents'
: 'D3 β Immediate Risk Β· XLM-RoBERTa';
}
// ββ Clinical Insight Alert ββββββββββββββββββββββββββββββββββββ
const ci=document.getElementById('clinicalInsight');
const ciTitle=document.getElementById('ciTitle');
const ciBody=document.getElementById('ciBody');
// Masked suicidality requires XLM-RoBERTa (best D3 model) to also flag suicide,
// not just the classical models β prevents false positives on plain depressive text
if(isRisk && isSuicide && notDepressed){
// Masked suicidality β confirmed by XLM-RoBERTa + majority vote
ciTitle.innerHTML='β‘ Clinical Insight β Masked Suicidality Pattern Detected';
ciBody.textContent='This text shows low depressive affect (D2: '+d2res.label+') but high intent resolution (D3: Suicide Risk). This is a clinically documented pre-crisis pattern where a person appears calm and resolved rather than distressed. A sequential pipeline gating D3 behind D2 would have missed this entirely β demonstrating the necessity of the parallel architecture.';
ci.style.display='block';
}else if(isRisk && majorityVsWinner && notDepressed){
// Classical models flag risk but XLM-RoBERTa disagrees β model disagreement
ciTitle.innerHTML='β Clinical Insight β Ensemble Disagreement';
ciBody.textContent=d.suicide_votes+' (classical models), but XLM-RoBERTa rates this as '+d3res.label+' ('+pct(d3res.confidence)+' confidence). XLM-RoBERTa (98.1% accuracy) likely correct here β classical TF-IDF models can over-flag depressive language as suicide risk. Human review recommended.';
ci.style.display='block';
}else if(hasDisorder && notDepressed){
// Disorder type detected but no depressive affect β affect mismatch
ciTitle.innerHTML='β Clinical Insight β Affect Mismatch Detected';
ciBody.textContent='D1 identifies '+d1res.label+' presentation, yet D2 finds no classic depressive affect. This is expected: D2 detects Twitter-style depressive language patterns, while psychotic, atypical, and bipolar presentations often do not match that affect profile. The patient is not presenting with classic depressive symptoms but the disorder classification remains clinically valid.';
ci.style.display='block';
}else{
ci.style.display='none';
}
document.getElementById('elapsed').textContent=d.processing_time_ms+'ms';
// Fixed winner per task: SVM for D1, XLM-RoBERTa for D2 and D3
setW('A', d1res);
setW('B', d2res);
setW('C', d3res);
document.getElementById('results').style.display='block';
document.getElementById('results').scrollIntoView({behavior:'smooth',block:'start'});
}
function setW(id,res){
document.getElementById('wp'+id).textContent=res.label;
document.getElementById('wc'+id).textContent=pct(res.confidence);
setTimeout(()=>document.getElementById('wb'+id).style.width=(res.confidence*100).toFixed(1)+'%',100);
}
function pct(v){return(v*100).toFixed(1)+'%'}
/* ββ CODE MODAL DATA ββ */
const CM_DATA = {
clean_text: {
title: 'clean_text() β Text Preprocessing',
code: `def clean_text(text):
text = str(text).lower()
# remove URLs
text = re.sub(r'http\\S+|www\\S+|https\\S+', '', text)
# remove @mentions
text = re.sub(r'@\\w+', '', text)
# remove # symbol (keep hashtag word)
text = re.sub(r'#', '', text)
# strip all punctuation
text = text.translate(
str.maketrans('', '', string.punctuation)
)
# collapse whitespace
text = re.sub(r'\\s+', ' ', text).strip()
return text`,
src: 'notebooks/DA_Notebook_One.ipynb β cell 5 Β· notebooks/DA_2_Notebook.ipynb β cell 6 Β· predict.py lines 106β113',
why: '<strong>Why lowercase?</strong> "Sad" and "sad" must map to the same TF-IDF token. <strong>Why remove URLs?</strong> Hundreds of unique tokens, zero semantic value β pure noise. <strong>Why keep hashtag words?</strong> "#depressed" β "depressed" preserves the semantic signal, removes the markup. <strong>Why no stemming?</strong> Stemming degrades bigram quality β "kill myself" would become "kill myself" but "killing" β "kill" breaks n-gram boundaries. Same function is used at both training time (notebook) and inference time (predict.py) to guarantee identical preprocessing.',
outputs: [
{label:'Input', val:'"I been going through #Depression after @user check https://t.co/xyz!!"'},
{label:'Output', val:'"i been going through depression after check"'},
{label:'Note', val:'Applied to all 3 datasets before TF-IDF and before XLM-RoBERTa tokenisation'},
]
},
smote: {
title: 'SMOTE β Synthetic Minority Oversampling',
code: `def apply_smote(X_train, y_train):
before = Counter(y_train)
smote = SMOTE(random_state=42)
X_bal, y_bal = smote.fit_resample(X_train, y_train)
after = Counter(y_bal)
print(f'SMOTE: {sum(before.values())} β {sum(after.values())}')
return X_bal, y_bal
# Called AFTER TF-IDF vectorisation, AFTER train/test split
X1_bal, y1_bal = apply_smote(X1_tr_tf, y1_tr)`,
src: 'notebooks/DA_Notebook_One.ipynb β cell 5 (apply_smote def) Β· cell 10 (D1 call) Β· cell 17 (D2 call) Β· D3 skipped',
why: '<strong>Why after TF-IDF?</strong> SMOTE interpolates in feature space β it creates synthetic TF-IDF vectors, not synthetic text. <strong>Why not before the split?</strong> Applying SMOTE before splitting would let synthetic samples leak into the test set β the test set must contain only real data. <strong>Why not class_weight instead?</strong> Class weighting reweights the loss function β it doesn\'t add new training examples. SMOTE was chosen because it physically fills the minority-class region of feature space, giving tree-based models (RF, XGB) more to learn from. <strong>D3 skipped:</strong> D3 is pre-balanced (116K each class) β no intervention needed.',
outputs: [
{label:'D1', val:'11,986 β 17,982 samples (atypical: 1,584 β 2,997, each class equalised)'},
{label:'D2', val:'8,251 β 12,800 samples (Depressed: 1,851 β 6,400)'},
{label:'D3', val:'Skipped β pre-balanced at 116,037 per class'},
]
},
tfidf: {
title: 'TfidfVectorizer β Feature Extraction',
code: `def make_tfidf(X_train, X_test, max_features=50000):
tfidf = TfidfVectorizer(
max_features=50000, # top 50K tokens by corpus frequency
ngram_range=(1, 2), # unigrams AND bigrams
sublinear_tf=True, # log(1+tf) instead of raw tf
min_df=2 # ignore tokens appearing < 2 times
)
Xtr = tfidf.fit_transform(X_train) # fit on train only
Xte = tfidf.transform(X_test) # apply to test (no fit)
return tfidf, Xtr, Xte`,
src: 'notebooks/DA_Notebook_One.ipynb β cell 5 (make_tfidf def) Β· cell 10 (D1) Β· cell 17 (D2) Β· cell 22 (D3)',
why: '<strong>ngram_range=(1,2):</strong> Bigrams capture "kill myself", "not happy", "feeling better" β critical signals that unigrams miss entirely. <strong>sublinear_tf=True:</strong> Applies log(1+tf) to dampen high-frequency word dominance. Without this, common words like "i", "feel" swamp the features. <strong>min_df=2:</strong> Removes hapax legomena (words appearing only once) β they add 0 generalisable information. <strong>fit only on train:</strong> Vocabulary is locked on training data β the test set is transformed using this fixed vocabulary, preventing any data leakage.',
outputs: [
{label:'D1 shape', val:'11,986 Γ 50,000 sparse matrix (tweets Γ features)'},
{label:'D2 shape', val:'8,251 Γ 50,000 sparse matrix'},
{label:'D3 shape', val:'40,000 Γ 50,000 sparse matrix'},
{label:'After SMOTE', val:'D1 becomes 17,982 Γ 50,000, D2 becomes 12,800 Γ 50,000'},
]
},
tokeniser: {
title: 'XLM-RoBERTa Tokeniser',
code: `tokenizer = AutoTokenizer.from_pretrained(
'FacebookAI/xlm-roberta-base'
)
def tokenize_tweets(examples):
return tokenizer(
examples['text'],
max_length=128, # 128 for D1/D2 (tweets avg ~40 tokens)
truncation=True, # cut anything beyond max_length
padding='max_length' # pad shorter inputs to fixed length
)
# D3 uses max_length=256 β Reddit posts avg 200.8 words (~280 tokens)
def tokenize_reddit(examples):
return tokenizer(
examples['text'],
max_length=256,
truncation=True,
padding='max_length'
)`,
src: 'notebooks/DA_2_Notebook.ipynb β cell 9 (tokenize_tweets, max_length=128, D1/D2) Β· cell 21 (tokenize_reddit, max_length=256, D3)',
why: '<strong>SentencePiece subword tokenisation:</strong> Splits unknown words into subword pieces β "suicidal" might become ["su", "ici", "dal"]. No word is truly out-of-vocabulary. <strong>max_length=128 for D1/D2:</strong> Tweets average ~31 words β 40 tokens. 128 is 3Γ headroom. <strong>max_length=256 for D3:</strong> Reddit posts average 200.8 words β 280 tokens β 128 would truncate most of the signal. <strong>padding=\'max_length\':</strong> All batches must be identical length for GPU tensor operations β shorter inputs are padded with [PAD] tokens. The attention mask tells the model to ignore padding.',
outputs: [
{label:'D1/D2 shape', val:'Each input β tensor of shape [128] (input_ids) + [128] (attention_mask)'},
{label:'D3 shape', val:'Each input β tensor of shape [256] Γ 2'},
{label:'Example', val:'"i feel hopeless" β input_ids: [0, 444, 7809, 73542, 2, 1, 1, ...]'},
]
},
lr: {
title: 'Logistic Regression',
code: `LogisticRegression(
max_iter=1000, # enough iterations to converge on 50K features
class_weight='balanced', # backup alongside SMOTE
random_state=42,
n_jobs=-1 # use all CPU cores
)`,
src: 'notebooks/DA_Notebook_One.ipynb β cell 11 (D1) Β· cell 18 (D2) Β· cell 23 (D3)',
why: '<strong>Why use it?</strong> Fast, interpretable linear baseline. On 50,000 TF-IDF features, L2 regularisation prevents overfitting by shrinking large weights toward zero. Outputs calibrated probabilities via softmax β important for confidence scores in the UI. <strong>class_weight=\'balanced\':</strong> Secondary guard alongside SMOTE β the model pays proportionally more attention to minority classes during gradient updates.',
outputs: [
{label:'D1', val:'91.5% accuracy β solid baseline, beaten by SVM'},
{label:'D2', val:'98.9% accuracy'},
{label:'D3', val:'93.2% accuracy'},
]
},
svm: {
title: 'SVM β LinearSVC',
code: `LinearSVC(
C=1.0, # regularisation strength (lower = more reg)
class_weight='balanced',
max_iter=2000,
random_state=42
)
# LinearSVC has no predict_proba β use decision_function + softmax
scores = model.decision_function(vec)[0]
e = np.exp(scores - scores.max())
conf = float(e[pred_idx] / e.sum())`,
src: 'notebooks/DA_Notebook_One.ipynb β cell 11 (D1) Β· cell 18 (D2) Β· cell 23 (D3) Β· predict.py lines 147β154 (confidence fallback)',
why: '<strong>Why SVM wins on D1?</strong> LinearSVC finds the maximum-margin hyperplane in TF-IDF feature space β the optimal linear decision boundary for sparse high-dimensional data. Tweets (31 words avg) produce sparse TF-IDF vectors where the margin is well-defined. Contextual embeddings (XLM-RoBERTa) add no value at this sentence length. <strong>Why LinearSVC over SVC(kernel=\'rbf\')?</strong> Linear kernel scales to 50,000 features. RBF kernel would be O(nΒ²) β computationally infeasible.',
outputs: [
{label:'D1', val:'92.4% accuracy β best model on D1, beats XLM-RoBERTa (90.5%)'},
{label:'D2', val:'97.1% accuracy'},
{label:'D3', val:'77.8% accuracy'},
]
},
xgb: {
title: 'XGBoost β XGBClassifier',
code: `XGBClassifier(
n_estimators=300, # 300 trees built sequentially
learning_rate=0.1, # each tree contributes 10% of its weight
max_depth=6, # max tree depth β controls complexity
eval_metric='logloss',
random_state=42,
n_jobs=-1
)`,
src: 'notebooks/DA_Notebook_One.ipynb β cell 11 (D1) Β· cell 18 (D2) Β· cell 23 (D3)',
why: '<strong>Gradient boosting principle:</strong> Each new tree is trained to correct the residual errors of all previous trees. 300 trees Γ learning_rate=0.1 = strong ensemble. <strong>Why does it collapse on D3?</strong> Vocabulary overlap between depressive and suicidal language in Reddit posts β words like "exhausted", "hopeless" appear in both classes. Boosted trees memorise these majority-class token patterns and fail at full scale (232K). XGBoost is highly sensitive to distribution shifts at this scale, shown by inconsistent H1/H2 results (60.1% vs 71.0%).',
outputs: [
{label:'D1', val:'91.8% accuracy'},
{label:'D2', val:'99.3% accuracy'},
{label:'D3 (50K)', val:'91.6% β performs well on sample'},
{label:'D3 (Full 232K)', val:'70.5% β collapse (lexical overfitting)'},
]
},
xlmr_ft: {
title: 'XLM-RoBERTa Fine-Tuning',
code: `xlmr = AutoModelForSequenceClassification.from_pretrained(
'FacebookAI/xlm-roberta-base',
num_labels=NUM_LABELS # 6 for D1, 2 for D2, 2 for D3
)
args = TrainingArguments(
num_train_epochs=3,
learning_rate=2e-5, # standard BERT fine-tuning rate
per_device_train_batch_size=16, # 8 for D3 (longer sequences)
gradient_accumulation_steps=2, # D3 only β simulates batch=16
warmup_steps=200, # gradual LR increase at start
weight_decay=0.01, # L2 regularisation on weights
load_best_model_at_end=True, # save epoch with lowest val loss
fp16=torch.cuda.is_available() # half precision β 2Γ faster on GPU
)
trainer = Trainer(
model=xlmr, args=args,
train_dataset=train_tok,
eval_dataset=test_tok
)
trainer.train()`,
src: 'notebooks/DA_2_Notebook.ipynb β cell 10 (model init D1) Β· cell 11 (TrainingArguments D1) Β· cell 17 (D2) Β· cell 22 (model init D3) Β· cell 23 (TrainingArguments D3)',
why: '<strong>lr=2e-5:</strong> Standard for fine-tuning BERT-family models. Too high destroys pretrained weights (catastrophic forgetting). Too low fails to converge in 3 epochs. <strong>warmup_steps=200:</strong> LR starts at 0 and linearly ramps β prevents early instability when weights are far from the task optimum. <strong>load_best_model_at_end:</strong> Epoch 3 is not always best β we restore the checkpoint with the lowest validation loss. <strong>D3 batch=8 + accumulation=2:</strong> max_length=256 uses 2Γ GPU memory vs 128. Accumulation simulates batch=16 without OOM.',
outputs: [
{label:'D1', val:'90.5% accuracy (Macro F1: 0.9117, ΞΊ=0.8852)'},
{label:'D2', val:'99.95% accuracy (Macro F1: 0.9993)'},
{label:'D3', val:'98.1% accuracy (Macro F1: 0.9810, ΞΊ=0.9620)'},
]
},
majority_vote: {
title: 'Ensemble Vote β Risk Flag Logic',
code: `# From predict.py β predict_all() function
suicide_count = sum(
1 for r in d3.values()
if 'suicide' in r['label'].lower()
and 'non' not in r['label'].lower()
)
risk_flag = suicide_count >= 3 # majority = β₯3 of 4 models
# d3.values() = results from LR, SVM, XGBoost, XLM-RoBERTa
# XLM-RoBERTa is also checked separately for banner state:
isSuicide = d3['XLM-RoBERTa'].label includes 'suicide' (JS)
# Three UI states:
# risk_flag=True AND XLM-R agrees β RED (High Suicide Risk)
# risk_flag=True AND XLM-R dissents β AMBER (Ensemble Conflict)
# risk_flag=False β GREEN (Low Risk)`,
src: 'predict.py lines 266β270 (suicide_count + risk_flag) Β· predict.py line 296 (suicide_votes string) Β· templates/index.html JS render() β banner state logic',
why: '<strong>Why β₯3/4 threshold?</strong> 1β2 flagging models could be TF-IDF false positives (lexical overfitting). 3+ represents genuine consensus β meaningful signal. <strong>Why check XLM-RoBERTa separately for the banner?</strong> XLM-RoBERTa has the highest D3 accuracy (98.1%) and understands context. If XLM-R disagrees with the majority, the amber "Ensemble Conflict" state is safer than a red alert β it flags uncertainty rather than over-alarming on metaphorical language ("I\'m dying of embarrassment").',
outputs: [
{label:'Threshold', val:'β₯ 3/4 D3 models output "suicide" (not "non-suicide")'},
{label:'Red banner', val:'risk_flag=True AND XLM-RoBERTa confirms suicide'},
{label:'Amber banner', val:'risk_flag=True but XLM-RoBERTa says non-suicide'},
{label:'Green banner', val:'risk_flag=False β fewer than 3 models flagged'},
]
},
eval_metrics: {
title: 'Evaluation β How Metrics Are Computed',
code: `def evaluate_transformer(name, y_true, y_pred,
label_names, ds_key, results_store):
acc = accuracy_score(y_true, y_pred)
macro = f1_score(y_true, y_pred, average='macro')
kappa = cohen_kappa_score(y_true, y_pred)
print(f'Accuracy : {acc*100:.2f}%')
print(f'Macro F1 : {macro:.4f}')
print(f"Cohen's Kappa: {kappa:.4f}")
print(classification_report(y_true, y_pred,
target_names=label_names))
results_store[name] = {
'accuracy': round(acc, 4),
'macro_f1': round(macro, 4),
'kappa': round(kappa, 4)
}
# Same function used for classical models in Notebook 1:
for name, model in models_d1.items():
model.fit(X1_bal, y1_bal) # train on SMOTE-balanced data
preds = model.predict(X1_te_tf) # test on original held-out set
evaluate(name, y1_te, preds, le1.classes_, 'd1', d1_results)`,
src: 'notebooks/DA_Notebook_One.ipynb β cell 5 (evaluate def, classical) Β· notebooks/DA_2_Notebook.ipynb β cell 6 (evaluate_transformer def)',
why: '<strong>Accuracy:</strong> (correct predictions) / (total predictions). Simple but misleading on imbalanced data β a model predicting majority class always gets high accuracy. Valid here because SMOTE balanced the training set and D3 is pre-balanced. <strong>Macro F1:</strong> Averages F1 per class without weighting by class size β penalises models that ignore minority classes. This is the primary metric in the IEEE report. <strong>Cohen\'s Kappa:</strong> Measures agreement beyond what chance alone would produce. Formula: (observed β expected) / (1 β expected). ΞΊ > 0.8 = almost perfect agreement. Reported because the base paper (Tumaliuan 2024) did not report it β we added it as an improvement. <strong>classification_report:</strong> Shows per-class precision, recall, F1 β the full picture behind the headline number.',
outputs: [
{label:'D1 SVM', val:'Accuracy 92.4%, Macro F1 0.9269, ΞΊ=0.9072'},
{label:'D2 XLM-R', val:'Accuracy 99.95%, Macro F1 0.9993, ΞΊ=0.9986'},
{label:'D3 XLM-R', val:'Accuracy 98.1%, Macro F1 0.9810, ΞΊ=0.9620'},
{label:'Atypical F1', val:'0.992 β highest per-class score in the project (D1, after SMOTE)'},
]
},
flask_deploy: {
title: 'Flask App β Deployment & Proxy Mode',
code: `# app.py β auto-detects LOCAL vs PROXY mode at startup
_LOCAL_MODELS = os.path.join(BASE_DIR, 'models', 'classical')
_use_local = os.path.isdir(_LOCAL_MODELS)
@app.route('/predict', methods=['POST'])
def predict():
data = request.get_json()
text = data['text'].strip()
if len(text) > 5000:
return jsonify({'error': 'Text too long'}), 400
if _use_local:
# LOCAL mode β models loaded in memory
result = predict_all(text)
return jsonify(result)
else:
# PROXY mode β forward to HuggingFace Space
r = requests.post(
f'{HF_SPACE_URL}/predict',
json={'text': text},
timeout=120
)
return r.content, r.status_code
# HF_SPACE_URL = 'https://esvanth-mindscan.hf.space'
# Overridable via environment variable`,
src: 'app.py lines 25β27 (mode detection) Β· lines 61β97 (/predict endpoint) Β· line 70 (5000-char limit) Β· line 91 (timeout=120)',
why: '<strong>Why two modes?</strong> The 12 models total ~2GB on disk. Running locally requires the models folder. The HuggingFace Space hosts the same predict.py and models β the proxy just forwards requests there. <strong>Why timeout=120?</strong> The HF Space sleeps after inactivity and takes ~60s to wake. 120s gives headroom. <strong>Why 5000 char limit?</strong> XLM-RoBERTa max_length=256 tokens β ~1500 characters. 5000 chars is a safe upper bound that prevents abuse without being restrictive. <strong>How the browser talks to Flask:</strong> JavaScript fetch() β POST /predict (localhost:5001) β Flask β HF Space β predict_all() β JSON response β render() updates the UI.',
outputs: [
{label:'LOCAL mode', val:'Triggered when models/classical/ directory exists. Loads all 12 models at startup (~30s on CPU).'},
{label:'PROXY mode', val:'Default β no local models needed. Forwards to esvanth-mindscan.hf.space'},
{label:'Timeout', val:'504 returned after 120s if HF Space is sleeping. Auto-wakes in ~60s.'},
{label:'Port', val:'localhost:5001 (overridable via PORT env var)'},
]
},
predict_flow: {
title: 'predict_all() β Full Inference Flow',
code: `def predict_all(raw_text):
# Step 1 β clean text (same function as training)
clean = clean_text(raw_text)
# Step 2 β run all 3 classical models per dataset
# (LR, SVM, XGBoost share the same TF-IDF vector)
def predict_classical(text_clean, ds):
tfidf = _models[f'tfidf_{ds}']
vec = tfidf.transform([text_clean]) # sparse vector
for model_name in ['logistic_regression','svm','xgboost']:
model = _models[f'{model_name}_{ds}']
pred_idx = model.predict(vec)[0]
label = le.classes_[pred_idx]
# SVM has no predict_proba β use softmax(decision_function)
if hasattr(model, 'predict_proba'):
conf = model.predict_proba(vec)[0][pred_idx]
else:
scores = model.decision_function(vec)[0]
e = np.exp(scores - scores.max())
conf = e[pred_idx] / e.sum()
# Step 3 β run XLM-RoBERTa per dataset
def predict_transformer(text_raw, ds):
inputs = tokenizer(text_raw, max_length=max_len,
truncation=True, padding='max_length')
with torch.no_grad():
logits = model(**inputs).logits
probs = torch.softmax(logits, dim=1)[0]
pred_idx = probs.argmax()
# Step 4 β majority vote for risk_flag
suicide_count = sum(1 for r in d3.values()
if 'suicide' in r['label'] and 'non' not in r['label'])
risk_flag = suicide_count >= 3`,
src: 'predict.py β clean_text lines 106β113 Β· predict_classical lines 119β163 Β· predict_transformer lines 166β215 Β· predict_all lines 221β302',
why: '<strong>Why clean the text first?</strong> The TF-IDF vocabulary was built on clean text β passing raw text would miss tokens. XLM-RoBERTa receives the raw text because its SentencePiece tokeniser handles punctuation/URLs natively. <strong>Why one TF-IDF vector for 3 classical models?</strong> All three (LR, SVM, XGBoost) use the same vectoriser β the vector is computed once and reused, saving 2 redundant transformations per dataset. <strong>Why torch.no_grad()?</strong> Inference doesn\'t need gradients β disabling them halves memory usage and speeds up the forward pass. <strong>Why softmax on logits?</strong> The model outputs raw logit scores (unbounded). Softmax converts them to probabilities that sum to 1 β required for the confidence percentage shown in the UI.',
outputs: [
{label:'Input', val:'"I feel exhausted, nothing feels enjoyable"'},
{label:'After clean', val:'"i feel exhausted nothing feels enjoyable"'},
{label:'D1 winner', val:'SVM β Major Depressive (highest confidence)'},
{label:'D2 winner', val:'XLM-RoBERTa β Not Depressed (Twitter Affect Bias β clinical text)'},
{label:'D3 result', val:'risk_flag computed from 4 model votes; XLM-R checked separately for banner'},
{label:'Response time', val:'~200ms local (GPU) Β· ~2β5s proxy (HF Space warm)'},
]
},
split_study: {
title: 'D3 Split Study β RQ2',
code: `# Sample 25K per class (50K total) for the baseline
df3_sample = df3.groupby('label').apply(
lambda x: x.sample(25000, random_state=42)
).reset_index(drop=True)
# Half splits β 12.5K per class each
df3_h1 = df3.groupby('label').apply(
lambda x: x.iloc[:12500]
).reset_index(drop=True)
df3_h2 = df3.groupby('label').apply(
lambda x: x.iloc[12500:25000]
).reset_index(drop=True)
# Full dataset β 116K per class (232K total)
df3_full = df3 # no sampling
# KS test to confirm splits share same distribution
from scipy.stats import ks_2samp
stat, p = ks_2samp(len_sample, len_full)
# p > 0.49 across all splits β identical distributions confirmed`,
src: 'notebooks/DA_3_SplitStudy.ipynb β cell 28 (sampling) Β· cell 4 (TrainingArguments) Β· cell 14 (KS test)',
why: '<strong>What is RQ2?</strong> "Does more training data improve performance?" The split study trains 4 separate XLM-RoBERTa models on 50K, 116K (Γ2), and 232K samples. <strong>KS test:</strong> Kolmogorov-Smirnov test verifies all splits come from the same distribution (p > 0.49) β ruling out that one split has easier examples. <strong>Finding:</strong> Accuracy changes by only 0.3% (98.1% β 98.0%) across 4Γ more data. The 50K sample fully captures the underlying signal distribution.',
outputs: [
{label:'50K sample', val:'98.1% accuracy (XLM-RoBERTa)'},
{label:'H1 (116K)', val:'97.8% accuracy'},
{label:'H2 (116K)', val:'98.0% accuracy'},
{label:'Full (232K)', val:'98.0% accuracy β Ξ=0.1% vs 50K'},
{label:'KS p-value', val:'p > 0.49 across all split pairs β identical distributions'},
]
}
};
function openCM(key){
const d = CM_DATA[key];
if(!d) return;
document.getElementById('cmTitle').textContent = d.title;
// Code tab
document.getElementById('cmt0').innerHTML =
'<pre class="cm-pre">'+escHTML(d.code)+'</pre>'+
'<div class="cm-src">Source: '+escHTML(d.src)+'</div>';
// Why tab
document.getElementById('cmt1').innerHTML =
'<div class="cm-why-body">'+d.why+'</div>';
// Output tab
const rows = d.outputs.map(o=>
'<div class="cm-out-row"><div class="cm-out-lbl">'+escHTML(o.label)+'</div>'+
'<div class="cm-out-val"><em>'+escHTML(o.val)+'</em></div></div>'
).join('');
document.getElementById('cmt2').innerHTML = rows;
// Reset to code tab
switchCMTab(0);
document.getElementById('cmOverlay').classList.add('open');
document.body.style.overflow='hidden';
}
function closeCM(){
document.getElementById('cmOverlay').classList.remove('open');
document.body.style.overflow='';
}
function closeCMOutside(e){
if(e.target===document.getElementById('cmOverlay')) closeCM();
}
function switchCMTab(idx){
document.querySelectorAll('.cm-tab').forEach((t,i)=>t.classList.toggle('active',i===idx));
document.querySelectorAll('.cm-panel').forEach((p,i)=>p.classList.toggle('active',i===idx));
}
function escHTML(s){
return String(s).replace(/&/g,'&').replace(/</g,'<').replace(/>/g,'>');
}
// Close on Escape key
document.addEventListener('keydown',e=>{ if(e.key==='Escape') closeCM(); });
function toggleFaq(btn){
const item=btn.closest('.faq-item');
const wasOpen=item.classList.contains('open');
item.classList.toggle('open',!wasOpen);
}
</script>
</body>
</html> |