fzn0x commited on
Commit
2086153
·
verified ·
1 Parent(s): f88e9a7

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -1,35 +1,9 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
  *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
  *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.bin.* filter=lfs diff=lfs merge=lfs -text
2
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
 
 
 
 
4
  *.h5 filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  *.tflite filter=lfs diff=lfs merge=lfs -text
6
+ *.tar.gz filter=lfs diff=lfs merge=lfs -text
7
+ *.ot filter=lfs diff=lfs merge=lfs -text
8
+ *.onnx filter=lfs diff=lfs merge=lfs -text
9
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
 
 
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Ignore Python virtual environment
2
+ bert-env/
3
+
4
+ models/pretrained
LICENSE ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2025 Muhammad Fauzan
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6
+
7
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
8
+
9
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
10
+
README.md CHANGED
@@ -1,3 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
- license: mit
3
- ---
 
 
 
1
+ # Fine-tuned BERT-base-uncased pre-trained model to classify spam SMS.
2
+
3
+ My second project in Natural Language Processing (NLP), where I fine-tuned a bert-base-uncased model to classify spam SMS. This is huge improvements from https://github.com/fzn0x/bert-indonesian-english-hate-comments.
4
+
5
+ ## ✅ Install requirements
6
+
7
+ Install required dependencies
8
+
9
+ ```sh
10
+ pip install --upgrade pip
11
+ pip install -r requirements.txt
12
+ ```
13
+
14
+ ## ✅ Add BERT virtual env
15
+
16
+ write the command below
17
+
18
+ ```sh
19
+ # ✅ Create and activate a virtual environment
20
+ python -m venv bert-env
21
+ source bert-env/bin/activate # On Windows use: bert-env\Scripts\activate
22
+ ```
23
+
24
+ ## ✅ Install CUDA
25
+
26
+ Check if your GPU supports CUDA:
27
+
28
+ ```sh
29
+ nvidia-smi
30
+ ```
31
+
32
+ Then:
33
+
34
+ ```sh
35
+ pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
36
+ PYTORCH_CUDA_ALLOC_CONF=expandable_segments:False
37
+ ```
38
+
39
+ ## 🔧 How to use
40
+
41
+ - Check your device and CUDA availability:
42
+
43
+ ```sh
44
+ python check_device.py
45
+ ```
46
+
47
+ > :warning: Using CPU is not advisable, prefer check your CUDA availability.
48
+
49
+ - Train the model:
50
+
51
+ ```sh
52
+ python scripts/train.py
53
+ ```
54
+
55
+ > :warning: Remove unneeded checkpoint in models/pretrained to save your storage after training
56
+
57
+ - Run prediction:
58
+
59
+ ```sh
60
+ python scripts/predict.py
61
+ ```
62
+
63
+ ✅ Dataset Location: [`data/spam.csv`](./data/spam.csv), modify the dataset to enhance the model based on your needs.
64
+
65
+
66
+ ## 📚 Citations
67
+
68
+ If you use this repository or its ideas, please cite the following:
69
+
70
+ See [`citations.bib`](./citations.bib) for full BibTeX entries.
71
+
72
+ - Wolf et al., *Transformers: State-of-the-Art Natural Language Processing*, EMNLP 2020. [ACL Anthology](https://www.aclweb.org/anthology/2020.emnlp-demos.6)
73
+ - Pedregosa et al., *Scikit-learn: Machine Learning in Python*, JMLR 2011.
74
+ - Almeida & Gómez Hidalgo, *SMS Spam Collection v.1*, UCI Machine Learning Repository (2011). [Kaggle Link](https://www.kaggle.com/datasets/uciml/sms-spam-collection-dataset)
75
+
76
+ ## 🧠 Credits and Libraries Used
77
+
78
+ - [Hugging Face Transformers](https://github.com/huggingface/transformers) – model, tokenizer, and training utilities
79
+ - [scikit-learn](https://scikit-learn.org/stable/) – metrics and preprocessing
80
+ - Logging silencing inspired by Hugging Face GitHub discussions
81
+ - Dataset from [UCI SMS Spam Collection](https://www.kaggle.com/datasets/uciml/sms-spam-collection-dataset)
82
+ - Inspiration from [Kaggle Notebook by Suyash Khare](https://www.kaggle.com/code/suyashkhare/naive-bayes)
83
+
84
+ ## License and Usage
85
+
86
+ License under [MIT license](./LICENSE).
87
+
88
  ---
89
+
90
+ Leave a ⭐ if you think this project is helpful, contributions are welcome.
91
+
92
+ ---
check_device.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
4
+
5
+ print(f"Using device: {device}")
6
+
7
+ if torch.cuda.is_available():
8
+ print(f"CUDA device name: {torch.cuda.get_device_name(0)}")
9
+ else:
10
+ print("No CUDA device available")
citations.bib ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @inproceedings{wolf-etal-2020-transformers,
2
+ title = {Transformers: State-of-the-Art Natural Language Processing},
3
+ author = {Wolf, Thomas and Debut, Lysandre and Sanh, Victor and Chaumond, Julien and Delangue, Clement and Moi, Anthony and Cistac, Pierric and Rault, Tim and Louf, Remi and Funtowicz, Morgan and Brew, Jamie},
4
+ booktitle = {Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations},
5
+ month = oct,
6
+ year = {2020},
7
+ publisher = {Association for Computational Linguistics},
8
+ pages = {38--45},
9
+ url = {https://www.aclweb.org/anthology/2020.emnlp-demos.6}
10
+ }
11
+
12
+ @article{scikit-learn,
13
+ title = {Scikit-learn: Machine Learning in Python},
14
+ author = {Pedregosa, Fabian and Varoquaux, Ga{\"e}l and Gramfort, Alexandre and Michel, Vincent and Thirion, Bertrand and Grisel, Olivier and Blondel, Mathieu and Prettenhofer, Peter and Weiss, Ron and Dubourg, Vincent and Vanderplas, Jake and Passos, Alexandre and Cournapeau, David and Brucher, Matthieu and Perrot, Matthieu and Duchesnay, {\'E}douard},
15
+ journal = {Journal of Machine Learning Research},
16
+ volume = {12},
17
+ pages = {2825--2830},
18
+ year = {2011}
19
+ }
20
+
21
+ @misc{smsspamcollection,
22
+ author = {Tiago A. Almeida and José María Gómez Hidalgo},
23
+ title = {SMS Spam Collection v.1},
24
+ year = {2011},
25
+ howpublished = {\url{https://www.kaggle.com/datasets/uciml/sms-spam-collection-dataset}},
26
+ note = {UCI Machine Learning Repository}
27
+ }
data/spam.csv ADDED
The diff for this file is too large to render. See raw diff
 
logs/.gitkeep ADDED
File without changes
logs/training_logs_20250409_190217.csv ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ loss,grad_norm,learning_rate,epoch,step,eval_loss,eval_accuracy,eval_precision,eval_recall,eval_f1,eval_runtime,eval_samples_per_second,eval_steps_per_second,train_runtime,train_samples_per_second,train_steps_per_second,total_flos,train_loss
2
+ 0.7085,14.144789695739746,1.0000000000000002e-06,0.019120458891013385,10,,,,,,,,,,,,,
3
+ 0.715,9.57529067993164,2.0000000000000003e-06,0.03824091778202677,20,,,,,,,,,,,,,
4
+ 0.6864,10.28545093536377,3e-06,0.05736137667304015,30,,,,,,,,,,,,,
5
+ 0.6486,6.435455799102783,4.000000000000001e-06,0.07648183556405354,40,,,,,,,,,,,,,
6
+ 0.6034,10.346636772155762,5e-06,0.09560229445506692,50,,,,,,,,,,,,,
7
+ 0.4819,8.369226455688477,6e-06,0.1147227533460803,60,,,,,,,,,,,,,
8
+ 0.4548,6.441623210906982,7.000000000000001e-06,0.1338432122370937,70,,,,,,,,,,,,,
9
+ 0.3809,5.808198928833008,8.000000000000001e-06,0.15296367112810708,80,,,,,,,,,,,,,
10
+ 0.2978,6.0641913414001465,9e-06,0.17208413001912046,90,,,,,,,,,,,,,
11
+ 0.2684,3.4851677417755127,1e-05,0.19120458891013384,100,,,,,,,,,,,,,
12
+ 0.2893,4.52321720123291,1.1000000000000001e-05,0.21032504780114722,110,,,,,,,,,,,,,
13
+ 0.2383,2.4263248443603516,1.2e-05,0.2294455066921606,120,,,,,,,,,,,,,
14
+ 0.0793,2.53463077545166,1.3000000000000001e-05,0.248565965583174,130,,,,,,,,,,,,,
15
+ 0.1478,0.6242015361785889,1.4000000000000001e-05,0.2676864244741874,140,,,,,,,,,,,,,
16
+ 0.1932,0.5276176333427429,1.5e-05,0.28680688336520077,150,,,,,,,,,,,,,
17
+ 0.0145,0.23376069962978363,1.6000000000000003e-05,0.30592734225621415,160,,,,,,,,,,,,,
18
+ 0.5524,0.7461658716201782,1.7000000000000003e-05,0.32504780114722753,170,,,,,,,,,,,,,
19
+ 0.2953,0.12811139225959778,1.8e-05,0.3441682600382409,180,,,,,,,,,,,,,
20
+ 0.0117,7.550753593444824,1.9e-05,0.3632887189292543,190,,,,,,,,,,,,,
21
+ 0.3207,0.09973274171352386,2e-05,0.3824091778202677,200,,,,,,,,,,,,,
22
+ 0.007,0.0616782121360302,2.1e-05,0.40152963671128106,210,,,,,,,,,,,,,
23
+ 0.0028,0.058995287865400314,2.2000000000000003e-05,0.42065009560229444,220,,,,,,,,,,,,,
24
+ 0.2869,0.05934325233101845,2.3000000000000003e-05,0.4397705544933078,230,,,,,,,,,,,,,
25
+ 0.1431,0.06777150183916092,2.4e-05,0.4588910133843212,240,,,,,,,,,,,,,
26
+ 0.1617,0.056581199169158936,2.5e-05,0.4780114722753346,250,,,,,,,,,,,,,
27
+ 0.0054,0.28948917984962463,2.6000000000000002e-05,0.497131931166348,260,,,,,,,,,,,,,
28
+ 0.2969,0.048585232347249985,2.7000000000000002e-05,0.5162523900573613,270,,,,,,,,,,,,,
29
+ 0.2765,0.1382359117269516,2.8000000000000003e-05,0.5353728489483748,280,,,,,,,,,,,,,
30
+ 0.0529,0.18732605874538422,2.9e-05,0.5544933078393881,290,,,,,,,,,,,,,
31
+ 0.2739,0.16986824572086334,3e-05,0.5736137667304015,300,,,,,,,,,,,,,
32
+ 0.0084,6.829342842102051,3.1e-05,0.5927342256214149,310,,,,,,,,,,,,,
33
+ 0.0152,0.02746426872909069,3.2000000000000005e-05,0.6118546845124283,320,,,,,,,,,,,,,
34
+ 0.0399,0.044547468423843384,3.3e-05,0.6309751434034416,330,,,,,,,,,,,,,
35
+ 0.0017,0.02045159600675106,3.4000000000000007e-05,0.6500956022944551,340,,,,,,,,,,,,,
36
+ 0.3084,0.03513335809111595,3.5e-05,0.6692160611854685,350,,,,,,,,,,,,,
37
+ 0.0218,72.34526824951172,3.6e-05,0.6883365200764818,360,,,,,,,,,,,,,
38
+ 0.0009,0.026036346331238747,3.7e-05,0.7074569789674953,370,,,,,,,,,,,,,
39
+ 0.2499,0.02853863686323166,3.8e-05,0.7265774378585086,380,,,,,,,,,,,,,
40
+ 0.1683,0.13165156543254852,3.9000000000000006e-05,0.745697896749522,390,,,,,,,,,,,,,
41
+ 0.0016,0.03334072604775429,4e-05,0.7648183556405354,400,,,,,,,,,,,,,
42
+ 0.0159,0.022308047860860825,4.1e-05,0.7839388145315488,410,,,,,,,,,,,,,
43
+ 0.0432,0.016688019037246704,4.2e-05,0.8030592734225621,420,,,,,,,,,,,,,
44
+ 0.7753,1.9672185182571411,4.3e-05,0.8221797323135756,430,,,,,,,,,,,,,
45
+ 0.0365,0.053068261593580246,4.4000000000000006e-05,0.8413001912045889,440,,,,,,,,,,,,,
46
+ 0.3924,0.27952665090560913,4.5e-05,0.8604206500956023,450,,,,,,,,,,,,,
47
+ 0.1889,0.042751893401145935,4.600000000000001e-05,0.8795411089866156,460,,,,,,,,,,,,,
48
+ 0.17,0.17158177495002747,4.7e-05,0.8986615678776291,470,,,,,,,,,,,,,
49
+ 0.0065,0.022402366623282433,4.8e-05,0.9177820267686424,480,,,,,,,,,,,,,
50
+ 0.0014,2.1868600845336914,4.9e-05,0.9369024856596558,490,,,,,,,,,,,,,
51
+ 0.2687,10.01941204071045,5e-05,0.9560229445506692,500,,,,,,,,,,,,,
52
+ 0.0014,0.03392564505338669,4.976359338061466e-05,0.9751434034416826,510,,,,,,,,,,,,,
53
+ 0.1763,0.02802887372672558,4.9527186761229313e-05,0.994263862332696,520,,,,,,,,,,,,,
54
+ ,,,1.0,523,0.11565426737070084,0.9899497487437185,0.9898943997664114,0.9899497487437185,0.98990492019591,92.1004,15.125,0.955,,,,,
55
+ 0.1147,0.07175358384847641,4.929078014184397e-05,1.0133843212237095,530,,,,,,,,,,,,,
56
+ 0.0028,0.1113436296582222,4.905437352245863e-05,1.0325047801147227,540,,,,,,,,,,,,,
57
+ 0.0009,0.01000809296965599,4.8817966903073283e-05,1.0516252390057361,550,,,,,,,,,,,,,
58
+ 0.1679,0.010713610798120499,4.858156028368794e-05,1.0707456978967496,560,,,,,,,,,,,,,
59
+ 0.0004,0.013057024218142033,4.83451536643026e-05,1.089866156787763,570,,,,,,,,,,,,,
60
+ 0.0003,0.008843371644616127,4.810874704491726e-05,1.1089866156787762,580,,,,,,,,,,,,,
61
+ 0.0003,0.009126769378781319,4.787234042553192e-05,1.1281070745697896,590,,,,,,,,,,,,,
62
+ 0.1853,0.008576230145990849,4.763593380614658e-05,1.147227533460803,600,,,,,,,,,,,,,
63
+ 0.0003,0.01004419382661581,4.739952718676123e-05,1.1663479923518165,610,,,,,,,,,,,,,
64
+ 0.3492,0.05982375890016556,4.716312056737589e-05,1.1854684512428297,620,,,,,,,,,,,,,
65
+ 0.0948,0.13171395659446716,4.692671394799055e-05,1.2045889101338432,630,,,,,,,,,,,,,
66
+ 0.0213,0.0346626341342926,4.669030732860521e-05,1.2237093690248566,640,,,,,,,,,,,,,
67
+ 0.2175,0.01486362423747778,4.645390070921986e-05,1.24282982791587,650,,,,,,,,,,,,,
68
+ 0.0917,0.040886640548706055,4.621749408983452e-05,1.2619502868068833,660,,,,,,,,,,,,,
69
+ 0.0007,0.01590738259255886,4.598108747044918e-05,1.2810707456978967,670,,,,,,,,,,,,,
70
+ 0.1779,0.016223158687353134,4.574468085106383e-05,1.3001912045889101,680,,,,,,,,,,,,,
71
+ 0.0748,0.02286047302186489,4.550827423167849e-05,1.3193116634799236,690,,,,,,,,,,,,,
72
+ 0.0007,0.014413115568459034,4.527186761229315e-05,1.338432122370937,700,,,,,,,,,,,,,
73
+ 0.0004,0.022832456976175308,4.50354609929078e-05,1.3575525812619502,710,,,,,,,,,,,,,
74
+ 0.3298,0.00590986805036664,4.479905437352246e-05,1.3766730401529637,720,,,,,,,,,,,,,
75
+ 0.0004,0.018635844811797142,4.456264775413712e-05,1.395793499043977,730,,,,,,,,,,,,,
76
+ 0.0005,0.030725516378879547,4.432624113475177e-05,1.4149139579349903,740,,,,,,,,,,,,,
77
+ 0.0004,0.005838405806571245,4.4089834515366435e-05,1.4340344168260037,750,,,,,,,,,,,,,
78
+ 0.0006,0.3502090871334076,4.3853427895981094e-05,1.4531548757170172,760,,,,,,,,,,,,,
79
+ 0.0545,0.006157968193292618,4.3617021276595746e-05,1.4722753346080306,770,,,,,,,,,,,,,
80
+ 0.3259,0.023588448762893677,4.3380614657210405e-05,1.491395793499044,780,,,,,,,,,,,,,
81
+ 0.0005,0.009129312820732594,4.3144208037825064e-05,1.5105162523900573,790,,,,,,,,,,,,,
82
+ 0.0553,0.013462325558066368,4.2907801418439716e-05,1.5296367112810707,800,,,,,,,,,,,,,
83
+ 0.2948,0.03156350925564766,4.2671394799054375e-05,1.5487571701720841,810,,,,,,,,,,,,,
84
+ 0.2854,0.24748779833316803,4.2434988179669034e-05,1.5678776290630974,820,,,,,,,,,,,,,
85
+ 0.5863,0.062411751598119736,4.219858156028369e-05,1.586998087954111,830,,,,,,,,,,,,,
86
+ 0.2991,0.04214799031615257,4.1962174940898345e-05,1.6061185468451242,840,,,,,,,,,,,,,
87
+ 0.0008,0.013838391751050949,4.1725768321513004e-05,1.6252390057361377,850,,,,,,,,,,,,,
88
+ 0.1731,0.014857972972095013,4.148936170212766e-05,1.644359464627151,860,,,,,,,,,,,,,
89
+ 0.0009,0.01684940792620182,4.1252955082742315e-05,1.6634799235181643,870,,,,,,,,,,,,,
90
+ 0.0064,0.00783159863203764,4.1016548463356974e-05,1.682600382409178,880,,,,,,,,,,,,,
91
+ 0.0003,0.008411649614572525,4.078014184397163e-05,1.7017208413001912,890,,,,,,,,,,,,,
92
+ 0.0003,0.0052153076976537704,4.0543735224586285e-05,1.7208413001912046,900,,,,,,,,,,,,,
93
+ 0.0493,0.005543852690607309,4.030732860520095e-05,1.739961759082218,910,,,,,,,,,,,,,
94
+ 0.3789,0.009990991093218327,4.007092198581561e-05,1.7590822179732313,920,,,,,,,,,,,,,
95
+ 0.0642,0.0287819541990757,3.983451536643026e-05,1.7782026768642447,930,,,,,,,,,,,,,
96
+ 0.0015,0.023501530289649963,3.959810874704492e-05,1.7973231357552581,940,,,,,,,,,,,,,
97
+ 0.0455,0.032593123614788055,3.936170212765958e-05,1.8164435946462714,950,,,,,,,,,,,,,
98
+ 0.0795,0.008558389730751514,3.912529550827423e-05,1.835564053537285,960,,,,,,,,,,,,,
99
+ 0.5774,61.61995315551758,3.888888888888889e-05,1.8546845124282982,970,,,,,,,,,,,,,
100
+ 0.0768,0.7568882703781128,3.865248226950355e-05,1.8738049713193117,980,,,,,,,,,,,,,
101
+ 0.0911,0.017800642177462578,3.84160756501182e-05,1.892925430210325,990,,,,,,,,,,,,,
102
+ 0.0005,0.027177782729268074,3.817966903073286e-05,1.9120458891013383,1000,,,,,,,,,,,,,
103
+ 0.2158,0.018468566238880157,3.794326241134752e-05,1.9311663479923518,1010,,,,,,,,,,,,,
104
+ 0.0007,0.01574050448834896,3.770685579196218e-05,1.9502868068833652,1020,,,,,,,,,,,,,
105
+ 0.1317,207.1902313232422,3.747044917257683e-05,1.9694072657743786,1030,,,,,,,,,,,,,
106
+ 0.0007,0.02454638108611107,3.723404255319149e-05,1.988527724665392,1040,,,,,,,,,,,,,
107
+ ,,,2.0,1046,0.09420914202928543,0.9870782483847811,0.9875541711169815,0.9870782483847811,0.987216078859851,97.487,14.289,0.903,,,,,
108
+ 0.0005,0.011501547880470753,3.699763593380615e-05,2.0076481835564053,1050,,,,,,,,,,,,,
109
+ 0.0466,1.3113888502120972,3.67612293144208e-05,2.026768642447419,1060,,,,,,,,,,,,,
110
+ 0.0004,0.00663415715098381,3.6524822695035466e-05,2.045889101338432,1070,,,,,,,,,,,,,
111
+ 0.0005,0.006375534925609827,3.6288416075650125e-05,2.0650095602294454,1080,,,,,,,,,,,,,
112
+ 0.0002,0.0062795463018119335,3.605200945626478e-05,2.084130019120459,1090,,,,,,,,,,,,,
113
+ 0.0002,0.005897123832255602,3.5815602836879437e-05,2.1032504780114722,1100,,,,,,,,,,,,,
114
+ 0.0002,0.007514381315559149,3.5579196217494095e-05,2.1223709369024855,1110,,,,,,,,,,,,,
115
+ 0.0216,0.003628661623224616,3.534278959810875e-05,2.141491395793499,1120,,,,,,,,,,,,,
116
+ 0.0002,0.004566493909806013,3.5106382978723407e-05,2.1606118546845123,1130,,,,,,,,,,,,,
117
+ 0.0002,0.004096467513591051,3.4869976359338065e-05,2.179732313575526,1140,,,,,,,,,,,,,
118
+ 0.0752,0.0039827860891819,3.463356973995272e-05,2.198852772466539,1150,,,,,,,,,,,,,
119
+ 0.0002,0.0038012678269296885,3.4397163120567377e-05,2.2179732313575524,1160,,,,,,,,,,,,,
120
+ 0.0002,0.014038076624274254,3.4160756501182035e-05,2.237093690248566,1170,,,,,,,,,,,,,
121
+ 0.2925,0.005012439098209143,3.392434988179669e-05,2.2562141491395793,1180,,,,,,,,,,,,,
122
+ 0.0116,0.009254666045308113,3.3687943262411347e-05,2.275334608030593,1190,,,,,,,,,,,,,
123
+ 0.0078,25.19086456298828,3.3451536643026005e-05,2.294455066921606,1200,,,,,,,,,,,,,
124
+ 0.0004,0.0065476433373987675,3.3215130023640664e-05,2.3135755258126194,1210,,,,,,,,,,,,,
125
+ 0.0221,0.006819812580943108,3.2978723404255317e-05,2.332695984703633,1220,,,,,,,,,,,,,
126
+ 0.0003,0.008379259146749973,3.2742316784869975e-05,2.3518164435946463,1230,,,,,,,,,,,,,
127
+ 0.0002,0.004561790265142918,3.2505910165484634e-05,2.3709369024856595,1240,,,,,,,,,,,,,
128
+ 0.0007,0.006553211715072393,3.226950354609929e-05,2.390057361376673,1250,,,,,,,,,,,,,
129
+ 0.0002,0.0044931466691195965,3.203309692671395e-05,2.4091778202676863,1260,,,,,,,,,,,,,
130
+ 0.0002,0.0051203942857682705,3.179669030732861e-05,2.4282982791587,1270,,,,,,,,,,,,,
131
+ 0.0002,0.0044228509068489075,3.156028368794326e-05,2.447418738049713,1280,,,,,,,,,,,,,
132
+ 0.0001,0.00331490277312696,3.132387706855792e-05,2.4665391969407264,1290,,,,,,,,,,,,,
133
+ 0.1774,0.0028139096684753895,3.108747044917258e-05,2.48565965583174,1300,,,,,,,,,,,,,
134
+ 0.0001,0.003723391331732273,3.085106382978723e-05,2.5047801147227533,1310,,,,,,,,,,,,,
135
+ 0.1172,0.20224063098430634,3.061465721040189e-05,2.5239005736137665,1320,,,,,,,,,,,,,
136
+ 0.0011,0.006403639912605286,3.0378250591016548e-05,2.54302103250478,1330,,,,,,,,,,,,,
137
+ 0.0002,0.0035964653361588717,3.0141843971631207e-05,2.5621414913957934,1340,,,,,,,,,,,,,
138
+ 0.0001,0.003212117124348879,2.9905437352245862e-05,2.581261950286807,1350,,,,,,,,,,,,,
139
+ 0.0002,0.0030910419300198555,2.966903073286052e-05,2.6003824091778203,1360,,,,,,,,,,,,,
140
+ 0.0024,0.01269309688359499,2.9432624113475177e-05,2.6195028680688335,1370,,,,,,,,,,,,,
141
+ 0.0001,0.0030236062593758106,2.9196217494089832e-05,2.638623326959847,1380,,,,,,,,,,,,,
142
+ 0.1098,2.9793853759765625,2.895981087470449e-05,2.6577437858508604,1390,,,,,,,,,,,,,
143
+ 0.0026,0.003864363767206669,2.8723404255319154e-05,2.676864244741874,1400,,,,,,,,,,,,,
144
+ 0.0015,0.0035600021947175264,2.848699763593381e-05,2.6959847036328872,1410,,,,,,,,,,,,,
145
+ 0.0002,0.005708142649382353,2.8250591016548468e-05,2.7151051625239004,1420,,,,,,,,,,,,,
146
+ 0.0002,0.005924528930336237,2.8014184397163124e-05,2.734225621414914,1430,,,,,,,,,,,,,
147
+ 0.0001,0.0030974203255027533,2.777777777777778e-05,2.7533460803059273,1440,,,,,,,,,,,,,
148
+ 0.0005,0.0032547072041779757,2.7541371158392438e-05,2.772466539196941,1450,,,,,,,,,,,,,
149
+ 0.0001,0.003584090620279312,2.7304964539007094e-05,2.791586998087954,1460,,,,,,,,,,,,,
150
+ 0.0001,0.0029679432045668364,2.7068557919621753e-05,2.8107074569789674,1470,,,,,,,,,,,,,
151
+ 0.0009,0.004424703773111105,2.6832151300236408e-05,2.8298279158699806,1480,,,,,,,,,,,,,
152
+ 0.0001,0.003592389402911067,2.6595744680851064e-05,2.8489483747609943,1490,,,,,,,,,,,,,
153
+ 0.0001,0.0038199257105588913,2.6359338061465723e-05,2.8680688336520075,1500,,,,,,,,,,,,,
154
+ 0.0001,0.004440006334334612,2.6122931442080378e-05,2.887189292543021,1510,,,,,,,,,,,,,
155
+ 0.0001,0.003449175739660859,2.5886524822695034e-05,2.9063097514340344,1520,,,,,,,,,,,,,
156
+ 0.0001,0.0021378095261752605,2.5650118203309693e-05,2.9254302103250476,1530,,,,,,,,,,,,,
157
+ 0.0001,0.0023634498938918114,2.5413711583924348e-05,2.9445506692160612,1540,,,,,,,,,,,,,
158
+ 0.0001,0.0026110291946679354,2.5177304964539007e-05,2.9636711281070744,1550,,,,,,,,,,,,,
159
+ 0.0001,0.0020405587274581194,2.4940898345153666e-05,2.982791586998088,1560,,,,,,,,,,,,,
160
+ ,,,3.0,1569,0.17737852036952972,0.9921033740129217,0.992086160965269,0.9921033740129217,0.9920411355428165,97.3084,14.315,0.904,,,,,
161
+ 0.0001,0.002212206833064556,2.470449172576832e-05,3.0019120458891013,1570,,,,,,,,,,,,,
162
+ 0.0001,0.0027835641521960497,2.446808510638298e-05,3.0210325047801145,1580,,,,,,,,,,,,,
163
+ 0.0001,0.0024758928921073675,2.4231678486997636e-05,3.040152963671128,1590,,,,,,,,,,,,,
164
+ 0.0001,0.0019498571055009961,2.3995271867612295e-05,3.0592734225621414,1600,,,,,,,,,,,,,
165
+ 0.0001,0.002075971569865942,2.3758865248226954e-05,3.078393881453155,1610,,,,,,,,,,,,,
166
+ 0.0001,0.0031053705606609583,2.352245862884161e-05,3.0975143403441683,1620,,,,,,,,,,,,,
167
+ 0.0001,0.0019424918573349714,2.3286052009456265e-05,3.1166347992351815,1630,,,,,,,,,,,,,
168
+ 0.0292,0.0023057463113218546,2.3049645390070924e-05,3.135755258126195,1640,,,,,,,,,,,,,
169
+ 0.0001,0.0022518427576869726,2.281323877068558e-05,3.1548757170172084,1650,,,,,,,,,,,,,
170
+ 0.0001,0.001956481486558914,2.2576832151300238e-05,3.173996175908222,1660,,,,,,,,,,,,,
171
+ 0.0001,0.002049932023510337,2.2340425531914894e-05,3.1931166347992352,1670,,,,,,,,,,,,,
172
+ 0.0001,0.001981406705453992,2.2104018912529553e-05,3.2122370936902485,1680,,,,,,,,,,,,,
173
+ 0.0001,0.0017047298606485128,2.186761229314421e-05,3.231357552581262,1690,,,,,,,,,,,,,
174
+ 0.1086,0.006344540510326624,2.1631205673758867e-05,3.2504780114722753,1700,,,,,,,,,,,,,
175
+ 0.0001,0.0016727660549804568,2.1394799054373523e-05,3.2695984703632885,1710,,,,,,,,,,,,,
176
+ 0.0001,0.0026623723097145557,2.115839243498818e-05,3.288718929254302,1720,,,,,,,,,,,,,
177
+ 0.0001,0.0038146893493831158,2.0921985815602837e-05,3.3078393881453154,1730,,,,,,,,,,,,,
178
+ 0.0001,0.0020843115635216236,2.0685579196217493e-05,3.3269598470363286,1740,,,,,,,,,,,,,
179
+ 0.0001,0.004382479470223188,2.0449172576832152e-05,3.3460803059273423,1750,,,,,,,,,,,,,
180
+ 0.0001,0.0020541008561849594,2.0212765957446807e-05,3.3652007648183555,1760,,,,,,,,,,,,,
181
+ 0.0001,0.001487684203311801,1.9976359338061466e-05,3.384321223709369,1770,,,,,,,,,,,,,
182
+ 0.0001,0.002201095223426819,1.9739952718676125e-05,3.4034416826003824,1780,,,,,,,,,,,,,
183
+ 0.0001,0.001844276674091816,1.950354609929078e-05,3.4225621414913956,1790,,,,,,,,,,,,,
184
+ 0.0001,0.0015598186291754246,1.926713947990544e-05,3.4416826003824093,1800,,,,,,,,,,,,,
185
+ 0.0001,0.0018409705953672528,1.9030732860520095e-05,3.4608030592734225,1810,,,,,,,,,,,,,
186
+ 0.0001,0.001676562475040555,1.879432624113475e-05,3.479923518164436,1820,,,,,,,,,,,,,
187
+ 0.0001,0.0017594838282093406,1.855791962174941e-05,3.4990439770554493,1830,,,,,,,,,,,,,
188
+ 0.0001,0.0021026760805398226,1.8321513002364065e-05,3.5181644359464626,1840,,,,,,,,,,,,,
189
+ 0.0001,0.0017727910308167338,1.8085106382978724e-05,3.537284894837476,1850,,,,,,,,,,,,,
190
+ 0.0001,0.0013543956447392702,1.7848699763593383e-05,3.5564053537284894,1860,,,,,,,,,,,,,
191
+ 0.0001,0.0012666520196944475,1.761229314420804e-05,3.575525812619503,1870,,,,,,,,,,,,,
192
+ 0.0011,0.0011121392017230392,1.7375886524822697e-05,3.5946462715105163,1880,,,,,,,,,,,,,
193
+ 0.0001,0.001808931352570653,1.7139479905437353e-05,3.6137667304015295,1890,,,,,,,,,,,,,
194
+ 0.0016,23.827749252319336,1.690307328605201e-05,3.632887189292543,1900,,,,,,,,,,,,,
195
+ 0.0001,0.0011950853513553739,1.6666666666666667e-05,3.6520076481835564,1910,,,,,,,,,,,,,
196
+ 0.0001,0.0016685306327417493,1.6430260047281323e-05,3.67112810707457,1920,,,,,,,,,,,,,
197
+ 0.0001,0.0013863686472177505,1.6193853427895982e-05,3.6902485659655833,1930,,,,,,,,,,,,,
198
+ 0.0001,0.0017680851742625237,1.595744680851064e-05,3.7093690248565965,1940,,,,,,,,,,,,,
199
+ 0.0001,0.0012093819677829742,1.5721040189125296e-05,3.7284894837476097,1950,,,,,,,,,,,,,
200
+ 0.0001,0.001656690496020019,1.5484633569739952e-05,3.7476099426386233,1960,,,,,,,,,,,,,
201
+ 0.0001,0.0016745509346947074,1.5248226950354611e-05,3.7667304015296366,1970,,,,,,,,,,,,,
202
+ 0.0,0.0012394479708746076,1.5011820330969268e-05,3.78585086042065,1980,,,,,,,,,,,,,
203
+ 0.0,0.0013073344016447663,1.4775413711583924e-05,3.8049713193116634,1990,,,,,,,,,,,,,
204
+ 0.0004,0.001538074342533946,1.4539007092198581e-05,3.8240917782026767,2000,,,,,,,,,,,,,
205
+ 0.0001,0.0012519847368821502,1.4302600472813242e-05,3.8432122370936903,2010,,,,,,,,,,,,,
206
+ 0.0001,0.0014980221167206764,1.4066193853427897e-05,3.8623326959847035,2020,,,,,,,,,,,,,
207
+ 0.0,0.0017531028715893626,1.3829787234042554e-05,3.881453154875717,2030,,,,,,,,,,,,,
208
+ 0.2722,0.0013574567856267095,1.3593380614657212e-05,3.9005736137667304,2040,,,,,,,,,,,,,
209
+ 0.0,0.001458437880501151,1.3356973995271869e-05,3.9196940726577436,2050,,,,,,,,,,,,,
210
+ 0.0001,0.0012874082894995809,1.3120567375886524e-05,3.9388145315487573,2060,,,,,,,,,,,,,
211
+ 0.0003,0.001217858400195837,1.2884160756501182e-05,3.9579349904397705,2070,,,,,,,,,,,,,
212
+ 0.0,0.0013258416438475251,1.2647754137115839e-05,3.977055449330784,2080,,,,,,,,,,,,,
213
+ 0.0001,0.0012758573284372687,1.2411347517730498e-05,3.9961759082217974,2090,,,,,,,,,,,,,
214
+ ,,,4.0,2092,0.14512090384960175,0.994256999282125,0.9942405741197771,0.994256999282125,0.9942313829690914,97.3316,14.312,0.904,,,,,
215
+ 0.0,0.0013754492392763495,1.2174940898345153e-05,4.015296367112811,2100,,,,,,,,,,,,,
216
+ 0.0,0.001169368508271873,1.1938534278959812e-05,4.034416826003824,2110,,,,,,,,,,,,,
217
+ 0.0,0.0012171813286840916,1.170212765957447e-05,4.053537284894838,2120,,,,,,,,,,,,,
218
+ 0.0,0.0014979930128902197,1.1465721040189125e-05,4.072657743785851,2130,,,,,,,,,,,,,
219
+ 0.0,0.0011387022677809,1.1229314420803782e-05,4.091778202676864,2140,,,,,,,,,,,,,
220
+ 0.0,0.0010951895965263247,1.0992907801418441e-05,4.1108986615678775,2150,,,,,,,,,,,,,
221
+ 0.0,0.0014354470185935497,1.0756501182033098e-05,4.130019120458891,2160,,,,,,,,,,,,,
222
+ 0.0,0.001228899578563869,1.0520094562647754e-05,4.149139579349905,2170,,,,,,,,,,,,,
223
+ 0.0,0.001033512526191771,1.0283687943262411e-05,4.168260038240918,2180,,,,,,,,,,,,,
224
+ 0.0,0.0012202056823298335,1.004728132387707e-05,4.187380497131931,2190,,,,,,,,,,,,,
225
+ 0.0,0.0011071061016991735,9.810874704491727e-06,4.2065009560229445,2200,,,,,,,,,,,,,
226
+ 0.0001,0.001122505054809153,9.574468085106383e-06,4.225621414913958,2210,,,,,,,,,,,,,
227
+ 0.0,0.0009751427569426596,9.33806146572104e-06,4.244741873804971,2220,,,,,,,,,,,,,
228
+ 0.0,0.0010192510671913624,9.101654846335699e-06,4.263862332695985,2230,,,,,,,,,,,,,
229
+ 0.0,0.001029942650347948,8.865248226950355e-06,4.282982791586998,2240,,,,,,,,,,,,,
230
+ 0.0,0.0011594196548685431,8.628841607565012e-06,4.3021032504780115,2250,,,,,,,,,,,,,
231
+ 0.0,0.0010501887882128358,8.392434988179669e-06,4.321223709369025,2260,,,,,,,,,,,,,
232
+ 0.0,0.0011294573778286576,8.156028368794328e-06,4.340344168260038,2270,,,,,,,,,,,,,
233
+ 0.0,0.0034297604579478502,7.919621749408983e-06,4.359464627151052,2280,,,,,,,,,,,,,
234
+ 0.0,0.0012247634585946798,7.68321513002364e-06,4.378585086042065,2290,,,,,,,,,,,,,
235
+ 0.0,0.0010506605030968785,7.446808510638298e-06,4.397705544933078,2300,,,,,,,,,,,,,
236
+ 0.0,0.0012230596039444208,7.210401891252956e-06,4.416826003824092,2310,,,,,,,,,,,,,
237
+ 0.0,0.001271169283427298,6.973995271867613e-06,4.435946462715105,2320,,,,,,,,,,,,,
238
+ 0.0,0.000902852974832058,6.73758865248227e-06,4.455066921606119,2330,,,,,,,,,,,,,
239
+ 0.0001,0.0010444560321047902,6.501182033096927e-06,4.474187380497132,2340,,,,,,,,,,,,,
240
+ 0.0,0.0008997560362331569,6.264775413711583e-06,4.493307839388145,2350,,,,,,,,,,,,,
241
+ 0.0,0.0008620694861747324,6.028368794326241e-06,4.512428298279159,2360,,,,,,,,,,,,,
242
+ 0.0,0.0012552279513329268,5.791962174940899e-06,4.531548757170172,2370,,,,,,,,,,,,,
243
+ 0.0,0.0012899633729830384,5.555555555555556e-06,4.550669216061186,2380,,,,,,,,,,,,,
244
+ 0.0,0.0010450802510604262,5.319148936170213e-06,4.569789674952199,2390,,,,,,,,,,,,,
245
+ 0.0,0.0010679000988602638,5.08274231678487e-06,4.588910133843212,2400,,,,,,,,,,,,,
246
+ 0.0,0.0010980640072375536,4.8463356973995275e-06,4.6080305927342256,2410,,,,,,,,,,,,,
247
+ 0.0,0.001058583497069776,4.609929078014184e-06,4.627151051625239,2420,,,,,,,,,,,,,
248
+ 0.0,0.0010440428741276264,4.373522458628842e-06,4.646271510516252,2430,,,,,,,,,,,,,
249
+ 0.0,0.0012324192794039845,4.137115839243498e-06,4.665391969407266,2440,,,,,,,,,,,,,
250
+ 0.0,0.0009499081061221659,3.9007092198581565e-06,4.684512428298279,2450,,,,,,,,,,,,,
251
+ 0.0,0.0009513849508948624,3.6643026004728133e-06,4.7036328871892925,2460,,,,,,,,,,,,,
252
+ 0.0,0.0011778529733419418,3.4278959810874705e-06,4.722753346080306,2470,,,,,,,,,,,,,
253
+ 0.0,0.0014292733976617455,3.1914893617021277e-06,4.741873804971319,2480,,,,,,,,,,,,,
254
+ 0.0,0.0010084384121000767,2.955082742316785e-06,4.760994263862333,2490,,,,,,,,,,,,,
255
+ 0.0,0.0009264342370443046,2.7186761229314422e-06,4.780114722753346,2500,,,,,,,,,,,,,
256
+ 0.0,0.001134512946009636,2.4822695035460995e-06,4.7992351816443595,2510,,,,,,,,,,,,,
257
+ 0.0,0.0012454226380214095,2.2458628841607567e-06,4.818355640535373,2520,,,,,,,,,,,,,
258
+ 0.0,0.0008258401066996157,2.009456264775414e-06,4.837476099426386,2530,,,,,,,,,,,,,
259
+ 0.0,0.0009193190489895642,1.7730496453900712e-06,4.8565965583174,2540,,,,,,,,,,,,,
260
+ 0.0,0.0008790619322098792,1.5366430260047282e-06,4.875717017208413,2550,,,,,,,,,,,,,
261
+ 0.0,0.0010386372450739145,1.3002364066193854e-06,4.894837476099426,2560,,,,,,,,,,,,,
262
+ 0.0,0.0009708119905553758,1.0638297872340427e-06,4.91395793499044,2570,,,,,,,,,,,,,
263
+ 0.0529,0.00130277534481138,8.274231678486998e-07,4.933078393881453,2580,,,,,,,,,,,,,
264
+ 0.0,0.0011432914761826396,5.91016548463357e-07,4.952198852772467,2590,,,,,,,,,,,,,
265
+ 0.0,0.000866331800352782,3.546099290780142e-07,4.97131931166348,2600,,,,,,,,,,,,,
266
+ 0.0,0.0013562386156991124,1.182033096926714e-07,4.990439770554493,2610,,,,,,,,,,,,,
267
+ ,,,5.0,2615,0.15207715332508087,0.9935391241923905,0.9935213453507815,0.9935391241923905,0.9935029851238194,96.7809,14.393,0.909,,,,,
268
+ ,,,5.0,2615,,,,,,,,,5717.6191,3.654,0.457,1997213326806600.0,0.07184834820294066
models/.gitkeep ADDED
File without changes
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ transformers==4.43.0
2
+ torch==2.6.0
3
+ scikit-learn==1.2.2
4
+ pandas==2.0.1
5
+ nltk==3.8.1
scripts/predict.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import BertTokenizer, BertForSequenceClassification
2
+ import torch
3
+
4
+ tokenizer = BertTokenizer.from_pretrained('./models/pretrained')
5
+ model = BertForSequenceClassification.from_pretrained('./models/pretrained')
6
+
7
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
8
+ model.to(device)
9
+ model.eval()
10
+
11
+ def model_predict(text: str):
12
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(device)
13
+ with torch.no_grad():
14
+ outputs = model(**inputs)
15
+ logits = outputs.logits
16
+ prediction = torch.argmax(logits, dim=1).item()
17
+ return 'SPAM' if prediction == 1 else 'HAM'
18
+
19
+ def predict():
20
+ text = "Hello, do you know with this crypto you can be rich? contact us in 88888"
21
+ predicted_label = model_predict(text)
22
+ print(f"1. Predicted class: {predicted_label}") # EXPECT: SPAM
23
+
24
+ text = "Help me richard!"
25
+ predicted_label = model_predict(text)
26
+ print(f"2. Predicted class: {predicted_label}") # EXPECT: HAM
27
+
28
+ text = "You can buy loopstation for 100$, try buyloopstation.com"
29
+ predicted_label = model_predict(text)
30
+ print(f"3. Predicted class: {predicted_label}") # EXPECT: SPAM
31
+
32
+ text = "Mate, I try to contact your phone, where are you?"
33
+ predicted_label = model_predict(text)
34
+ print(f"4. Predicted class: {predicted_label}") # EXPECT: HAM
35
+
36
+ if __name__ == "__main__":
37
+ predict()
scripts/train.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+
3
+ from datetime import datetime
4
+
5
+ import re
6
+ from collections import Counter
7
+
8
+ import pandas as pd
9
+ import numpy as np
10
+
11
+ import torch
12
+ from torch.nn import CrossEntropyLoss
13
+ from torch.utils.data import Dataset, DataLoader
14
+
15
+ from transformers import (
16
+ BertConfig,
17
+ BertForSequenceClassification,
18
+ BertTokenizer,
19
+ Trainer,
20
+ TrainingArguments,
21
+ EarlyStoppingCallback,
22
+ )
23
+
24
+ from sklearn.model_selection import train_test_split
25
+ from sklearn.metrics import (
26
+ accuracy_score,
27
+ f1_score,
28
+ precision_score,
29
+ recall_score,
30
+ confusion_matrix,
31
+ )
32
+ from sklearn.utils.class_weight import compute_class_weight
33
+
34
+ tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
35
+ config = BertConfig.from_pretrained("bert-base-uncased", num_labels=2)
36
+
37
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
38
+
39
+ class WeightedBertForSequenceClassification(BertForSequenceClassification):
40
+ def __init__(self, config, class_weights):
41
+ super().__init__(config)
42
+ self.class_weights = class_weights
43
+
44
+ def forward(self, input_ids=None, attention_mask=None, labels=None, **kwargs):
45
+ outputs = super().forward(input_ids=input_ids, attention_mask=attention_mask, labels=None, **kwargs)
46
+ logits = outputs.logits
47
+ loss = None
48
+ if labels is not None:
49
+ loss_fct = CrossEntropyLoss(weight=self.class_weights)
50
+ loss = loss_fct(logits.view(-1, self.config.num_labels), labels.view(-1))
51
+ return {"loss": loss, "logits": logits}
52
+
53
+ class SMSClassificationDataset(Dataset):
54
+ def __init__(self, encodings, labels):
55
+ self.encodings = encodings
56
+ self.labels = torch.tensor(labels, dtype=torch.long)
57
+
58
+ def __len__(self):
59
+ return len(self.labels)
60
+
61
+ def __getitem__(self, idx):
62
+ item = {key: val[idx] for key, val in self.encodings.items()}
63
+ item["labels"] = self.labels[idx]
64
+ return item
65
+
66
+ def compute_metrics(eval_pred):
67
+ logits, labels = eval_pred
68
+ predictions = torch.argmax(torch.tensor(logits), dim=1)
69
+
70
+ acc = accuracy_score(labels, predictions)
71
+ precision = precision_score(labels, predictions, average="weighted", zero_division=0)
72
+ recall = recall_score(labels, predictions, average="weighted")
73
+ f1 = f1_score(labels, predictions, average='weighted')
74
+ cm = confusion_matrix(labels, predictions)
75
+
76
+ print("Confusion Matrix:\n", cm)
77
+
78
+ return {
79
+ 'accuracy': acc,
80
+ 'precision': precision,
81
+ 'recall': recall,
82
+ 'f1': f1
83
+ }
84
+
85
+ def train():
86
+ df = pd.read_csv('data/spam.csv', encoding='iso-8859-1')[['label', 'text']]
87
+
88
+ label_mapping = {'spam': 1, 'ham': 0}
89
+ df['label'] = df['label'].map(label_mapping)
90
+
91
+ train_texts, val_texts, train_labels, val_labels = train_test_split(
92
+ df['text'].tolist(), df['label'].tolist(), test_size=0.25, random_state=42)
93
+
94
+ class_weights = compute_class_weight(
95
+ class_weight='balanced',
96
+ classes=np.unique(train_labels),
97
+ y=train_labels
98
+ )
99
+ class_weights = torch.tensor(class_weights, dtype=torch.float).to(device)
100
+
101
+ model = WeightedBertForSequenceClassification(config, class_weights=class_weights)
102
+
103
+ loggers = [logging.getLogger(name) for name in logging.root.manager.loggerDict]
104
+ for logger in loggers:
105
+ if "transformers" in logger.name.lower():
106
+ logger.setLevel(logging.ERROR)
107
+
108
+ model.load_state_dict(BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2, use_safetensors=True, return_dict=False, attn_implementation="sdpa").state_dict(), strict=False)
109
+ model.to(device)
110
+
111
+ train_encodings = tokenizer(train_texts, truncation=True, padding=True, return_tensors="pt")
112
+ val_encodings = tokenizer(val_texts, truncation=True, padding=True, return_tensors="pt")
113
+
114
+ train_dataset = SMSClassificationDataset(train_encodings, train_labels)
115
+ val_dataset = SMSClassificationDataset(val_encodings, val_labels)
116
+
117
+ training_args = TrainingArguments(
118
+ output_dir='./models/pretrained',
119
+ num_train_epochs=5,
120
+ per_device_train_batch_size=8,
121
+ per_device_eval_batch_size=16,
122
+ warmup_steps=500,
123
+ weight_decay=0.01,
124
+ logging_dir='./logs',
125
+ logging_steps=10,
126
+ eval_strategy="epoch",
127
+ report_to="none",
128
+ save_total_limit=1,
129
+ load_best_model_at_end=True,
130
+ save_strategy="epoch",
131
+ )
132
+
133
+ trainer = Trainer(
134
+ model=model,
135
+ args=training_args,
136
+ train_dataset=train_dataset,
137
+ eval_dataset=val_dataset,
138
+ compute_metrics=compute_metrics,
139
+ callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
140
+ )
141
+
142
+ trainer.train()
143
+
144
+ logs = trainer.state.log_history
145
+ df_logs = pd.DataFrame(logs)
146
+
147
+ timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
148
+ df_logs.to_csv(f"logs/training_logs_{timestamp}.csv", index=False)
149
+
150
+ tokenizer.save_pretrained('./models/pretrained')
151
+ model.save_pretrained('./models/pretrained')
152
+
153
+ if __name__ == "__main__":
154
+ train()