Egor-3926 commited on
Commit
d7d2d80
·
verified ·
1 Parent(s): 8107fc8

Upload ToxicLord v1 model

Browse files
Files changed (8) hide show
  1. README.md +120 -3
  2. config.json +37 -0
  3. merges.txt +0 -0
  4. model.safetensors +3 -0
  5. special_tokens_map.json +51 -0
  6. tokenizer.json +0 -0
  7. tokenizer_config.json +62 -0
  8. vocab.json +0 -0
README.md CHANGED
@@ -1,3 +1,120 @@
1
- ---
2
- license: cc-by-nc-nd-4.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: cc-by-nc-nd-4.0
3
+ language:
4
+ - ru
5
+ library_name: transformers
6
+ pipeline_tag: text-classification
7
+ tags:
8
+ - toxicity-classification
9
+ - russian
10
+ - telegram
11
+ - moderation
12
+ - deberta
13
+ base_model: deepvk/deberta-v1-base
14
+ model-index:
15
+ - name: ToxicLord v1
16
+ results:
17
+ - task:
18
+ type: text-classification
19
+ name: Text Classification
20
+ dataset:
21
+ name: Internal held-out toxicity test split
22
+ type: private
23
+ metrics:
24
+ - type: accuracy
25
+ value: 0.968937125748503
26
+ name: Accuracy
27
+ - type: precision
28
+ value: 0.9309514251304697
29
+ name: Toxic precision
30
+ - type: recall
31
+ value: 0.905152224824356
32
+ name: Toxic recall
33
+ - type: f1
34
+ value: 0.9178705719374629
35
+ name: Toxic F1
36
+ - type: f1
37
+ value: 0.9493585102268198
38
+ name: Macro F1
39
+ ---
40
+
41
+ # ToxicLord v1
42
+
43
+ ToxicLord v1 is a Russian text classification model for chat moderation. It classifies messages as `clean` or `toxic` and is tuned for short Russian Telegram-style messages.
44
+
45
+ The model is intended for assistive moderation workflows. It can make mistakes and should be used with logging, review, and project-specific thresholds.
46
+
47
+ ## Labels
48
+
49
+ ```text
50
+ 0: clean
51
+ 1: toxic
52
+ ```
53
+
54
+ ## Recommended Threshold
55
+
56
+ For conservative Telegram moderation, use the toxic probability instead of only argmax:
57
+
58
+ ```text
59
+ toxic if P(toxic) >= 0.90
60
+ ```
61
+
62
+ ## Evaluation
63
+
64
+ Internal held-out test split:
65
+
66
+ ```text
67
+ accuracy: 0.9689
68
+ precision_toxic: 0.9310
69
+ recall_toxic: 0.9052
70
+ f1_toxic: 0.9179
71
+ macro_f1: 0.9494
72
+ ```
73
+
74
+ External fixed benchmark samples at threshold `0.90`:
75
+
76
+ ```text
77
+ Telegram clean chat sample: 2/500 triggered, 0.4% trigger rate
78
+ Toxic sample: 364/500 triggered, 72.8% trigger rate
79
+ ```
80
+
81
+ ## Usage
82
+
83
+ ```python
84
+ import torch
85
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
86
+
87
+ model_id = "Egor-3926/ToxicLord"
88
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
89
+ model = AutoModelForSequenceClassification.from_pretrained(model_id)
90
+ model.eval()
91
+
92
+ text = "ты еблан"
93
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=192)
94
+
95
+ with torch.inference_mode():
96
+ probs = torch.softmax(model(**inputs).logits, dim=-1)[0]
97
+
98
+ clean_score = float(probs[0])
99
+ toxic_score = float(probs[1])
100
+ label = "toxic" if toxic_score >= 0.90 else "clean"
101
+
102
+ print(label, toxic_score)
103
+ ```
104
+
105
+ ## Training Data
106
+
107
+ The model was fine-tuned on a mixture of public Russian toxicity datasets and private moderation annotations/corrections. Raw training data, Telegram logs, user identifiers, and private annotations are not redistributed with this model.
108
+
109
+ ## Limitations
110
+
111
+ - The model is optimized for Russian Telegram-style moderation and may not transfer well to formal text, long documents, or other languages.
112
+ - Short insults and slurs may be classified as toxic even without broader context.
113
+ - Sarcasm, quotes, jokes, reclaimed language, and moderation discussions can be misclassified.
114
+ - The model should not be used as the only source of truth for irreversible moderation actions.
115
+
116
+ ## License
117
+
118
+ This model is released under `cc-by-nc-nd-4.0`.
119
+
120
+ Non-commercial use is allowed with attribution. Commercial use and derivative redistribution are not allowed under this license.
config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DebertaForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "hidden_act": "gelu",
7
+ "hidden_dropout_prob": 0.1,
8
+ "hidden_size": 768,
9
+ "id2label": {
10
+ "0": "clean",
11
+ "1": "toxic"
12
+ },
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "label2id": {
16
+ "clean": 0,
17
+ "toxic": 1
18
+ },
19
+ "layer_norm_eps": 1e-07,
20
+ "legacy": true,
21
+ "max_position_embeddings": 512,
22
+ "max_relative_positions": -1,
23
+ "model_type": "deberta",
24
+ "num_attention_heads": 12,
25
+ "num_hidden_layers": 12,
26
+ "pad_token_id": 0,
27
+ "pooler_dropout": 0,
28
+ "pooler_hidden_act": "gelu",
29
+ "pooler_hidden_size": 768,
30
+ "pos_att_type": null,
31
+ "position_biased_input": true,
32
+ "relative_attention": false,
33
+ "torch_dtype": "float32",
34
+ "transformers_version": "4.55.0",
35
+ "type_vocab_size": 0,
36
+ "vocab_size": 50265
37
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90c59a2d41d3a1934dd483fddca5944d04aa5e756c82f0de5d39f2af019705c9
3
+ size 498562144
special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<pad>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "4": {
37
+ "content": "<mask>",
38
+ "lstrip": true,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ }
44
+ },
45
+ "bos_token": "<s>",
46
+ "clean_up_tokenization_spaces": false,
47
+ "cls_token": "<s>",
48
+ "eos_token": "</s>",
49
+ "errors": "replace",
50
+ "extra_special_tokens": {},
51
+ "mask_token": "<mask>",
52
+ "max_length": 192,
53
+ "model_max_length": 512,
54
+ "pad_token": "<pad>",
55
+ "sep_token": "</s>",
56
+ "stride": 0,
57
+ "tokenizer_class": "RobertaTokenizer",
58
+ "trim_offsets": true,
59
+ "truncation_side": "right",
60
+ "truncation_strategy": "longest_first",
61
+ "unk_token": "<unk>"
62
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff