Upload folder using huggingface_hub
Browse files- README.md +18 -0
- config.json +30 -0
- model_arch.py +26 -0
- pytorch_model.bin +3 -0
- tokenizer.json +0 -0
- tokenizer_config.json +14 -0
README.md
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
language: en
|
| 3 |
+
tags: [text-classification, emotion-detection, mental-health, hci, distilbert, cnn]
|
| 4 |
+
datasets: [go_emotions]
|
| 5 |
+
metrics: [f1, accuracy]
|
| 6 |
+
---
|
| 7 |
+
# MoodShift — Hybrid DistilBERT + CNN Emotion Classifier
|
| 8 |
+
Novel hybrid architecture for HCI research (ICCA 2026).
|
| 9 |
+
|
| 10 |
+
## Architecture
|
| 11 |
+
- **DistilBERT** → [CLS] global context (768-dim)
|
| 12 |
+
- **CNN (kernels 2,3,4)** → local n-gram emotion patterns (384-dim)
|
| 13 |
+
- **Fusion** → 1152-dim → Linear(512) → Linear(7)
|
| 14 |
+
|
| 15 |
+
## Labels
|
| 16 |
+
positive | sadness | anger | anxiety | confusion | curiosity | neutral
|
| 17 |
+
|
| 18 |
+
Best Val F1: **0.9158**
|
config.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_type": "hybrid_distilbert_cnn",
|
| 3 |
+
"num_labels": 6,
|
| 4 |
+
"id2label": {
|
| 5 |
+
"0": "sadness",
|
| 6 |
+
"1": "joy",
|
| 7 |
+
"2": "love",
|
| 8 |
+
"3": "anger",
|
| 9 |
+
"4": "fear",
|
| 10 |
+
"5": "surprise"
|
| 11 |
+
},
|
| 12 |
+
"label2id": {
|
| 13 |
+
"sadness": 0,
|
| 14 |
+
"joy": 1,
|
| 15 |
+
"love": 2,
|
| 16 |
+
"anger": 3,
|
| 17 |
+
"fear": 4,
|
| 18 |
+
"surprise": 5
|
| 19 |
+
},
|
| 20 |
+
"cnn_embed_dim": 128,
|
| 21 |
+
"cnn_filters": 128,
|
| 22 |
+
"cnn_kernels": [
|
| 23 |
+
2,
|
| 24 |
+
3,
|
| 25 |
+
4
|
| 26 |
+
],
|
| 27 |
+
"max_len": 128,
|
| 28 |
+
"bert_base": "distilbert-base-uncased",
|
| 29 |
+
"best_val_f1": 0.9158
|
| 30 |
+
}
|
model_arch.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import torch, torch.nn as nn, torch.nn.functional as F
|
| 3 |
+
from transformers import DistilBertModel
|
| 4 |
+
|
| 5 |
+
class CNNBranch(nn.Module):
|
| 6 |
+
def __init__(self, vocab=30522, edim=128, nf=128, kernels=(2,3,4)):
|
| 7 |
+
super().__init__()
|
| 8 |
+
self.emb=nn.Embedding(vocab,edim,padding_idx=0)
|
| 9 |
+
self.convs=nn.ModuleList([nn.Conv1d(edim,nf,k,padding=k//2) for k in kernels])
|
| 10 |
+
self.drop=nn.Dropout(0.3); self.out_dim=nf*len(kernels)
|
| 11 |
+
def forward(self,ids):
|
| 12 |
+
x=self.emb(ids).permute(0,2,1)
|
| 13 |
+
return self.drop(torch.cat([F.adaptive_max_pool1d(F.gelu(c(x)),1).squeeze(2) for c in self.convs],1))
|
| 14 |
+
|
| 15 |
+
class HybridClassifier(nn.Module):
|
| 16 |
+
def __init__(self, n_labels=6, vocab=30522, edim=128, nf=128, kernels=(2,3,4), drop=0.3):
|
| 17 |
+
super().__init__()
|
| 18 |
+
self.bert=DistilBertModel.from_pretrained("distilbert-base-uncased")
|
| 19 |
+
self.cnn=CNNBranch(vocab,edim,nf,kernels)
|
| 20 |
+
fused=768+self.cnn.out_dim
|
| 21 |
+
self.head=nn.Sequential(nn.Dropout(drop),nn.Linear(fused,512),nn.GELU(),nn.Dropout(drop*0.7),nn.Linear(512,n_labels))
|
| 22 |
+
self.n_labels=n_labels
|
| 23 |
+
def forward(self, input_ids, attention_mask):
|
| 24 |
+
cls=self.bert(input_ids=input_ids,attention_mask=attention_mask).last_hidden_state[:,0,:]
|
| 25 |
+
cnn=self.cnn(input_ids)
|
| 26 |
+
return self.head(torch.cat([cls,cnn],1))
|
pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:80cb48d3b4da0914f22041e29bb8d1c808e7f728cd2c4fd6c88fd6961f748a81
|
| 3 |
+
size 284087647
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"backend": "tokenizers",
|
| 3 |
+
"cls_token": "[CLS]",
|
| 4 |
+
"do_lower_case": true,
|
| 5 |
+
"is_local": false,
|
| 6 |
+
"mask_token": "[MASK]",
|
| 7 |
+
"model_max_length": 512,
|
| 8 |
+
"pad_token": "[PAD]",
|
| 9 |
+
"sep_token": "[SEP]",
|
| 10 |
+
"strip_accents": null,
|
| 11 |
+
"tokenize_chinese_chars": true,
|
| 12 |
+
"tokenizer_class": "BertTokenizer",
|
| 13 |
+
"unk_token": "[UNK]"
|
| 14 |
+
}
|