AezersX commited on
Commit ·
f282658
1
Parent(s): 40f1040
Upload 8 files
Browse files- .gitattributes +2 -0
- app.py +40 -0
- flagged/log.csv +15 -0
- jigsaw-toxic-comment-classification-challenge/sample_submission.csv/sample_submission.csv +0 -0
- jigsaw-toxic-comment-classification-challenge/test.csv/test.csv +3 -0
- jigsaw-toxic-comment-classification-challenge/test_labels.csv/test_labels.csv +0 -0
- jigsaw-toxic-comment-classification-challenge/train.csv/train.csv +3 -0
- requirements.txt +4 -0
- toxicity.h5 +3 -0
.gitattributes
CHANGED
|
@@ -32,3 +32,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
| 32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
jigsaw-toxic-comment-classification-challenge/test.csv/test.csv filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
jigsaw-toxic-comment-classification-challenge/train.csv/train.csv filter=lfs diff=lfs merge=lfs -text
|
app.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import tensorflow as tf
|
| 4 |
+
import numpy as np
|
| 5 |
+
|
| 6 |
+
from tensorflow.keras.layers import TextVectorization
|
| 7 |
+
import gradio as gr
|
| 8 |
+
from tensorflow.keras.layers import TextVectorization
|
| 9 |
+
|
| 10 |
+
modelbaru = tf.keras.models.load_model('toxicity.h5')
|
| 11 |
+
|
| 12 |
+
MAX_FEATURES = 200000
|
| 13 |
+
data = pd.read_csv(os.path.join('jigsaw-toxic-comment-classification-challenge', 'train.csv', 'train.csv'))
|
| 14 |
+
x = data['comment_text']
|
| 15 |
+
y = data[data.columns[2:]].values
|
| 16 |
+
|
| 17 |
+
vectorizer = TextVectorization(max_tokens=MAX_FEATURES, output_sequence_length=1800, output_mode='int')
|
| 18 |
+
vectorizer.adapt(x.values)
|
| 19 |
+
vectorizer('Yo Whats up')[:3]
|
| 20 |
+
vectorized_text = vectorizer(x.values)
|
| 21 |
+
vectorized_text
|
| 22 |
+
|
| 23 |
+
input_str = vectorizer('yo i fuckin hate you')
|
| 24 |
+
res = modelbaru.predict(np.expand_dims(input_str,0))
|
| 25 |
+
res > 0.5
|
| 26 |
+
data.columns[2:]
|
| 27 |
+
data.columns[2:-1]
|
| 28 |
+
|
| 29 |
+
def score_comment(comment):
|
| 30 |
+
vectorized_comment = vectorizer([comment])
|
| 31 |
+
results = modelbaru.predict(vectorized_comment)
|
| 32 |
+
|
| 33 |
+
text = ''
|
| 34 |
+
for idx, col in enumerate(data.columns[2:-1]):
|
| 35 |
+
text += '{}: {}\n'.format(col, results[0][idx]>0.5)
|
| 36 |
+
|
| 37 |
+
return text
|
| 38 |
+
|
| 39 |
+
interface = gr.Interface(fn=score_comment, inputs=gr.inputs.Textbox(lines=2, placeholder='Toxic Detector by: AezersX'), outputs='text')
|
| 40 |
+
interface.launch(share=True)
|
flagged/log.csv
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
'comment','output','flag','username','timestamp'
|
| 2 |
+
'','toxic: False
|
| 3 |
+
severe_toxic: False
|
| 4 |
+
obscene: False
|
| 5 |
+
threat: False
|
| 6 |
+
insult: False
|
| 7 |
+
','','','2022-08-05 22:09:27.277332'
|
| 8 |
+
'','toxic: False
|
| 9 |
+
severe_toxic: False
|
| 10 |
+
obscene: False
|
| 11 |
+
threat: False
|
| 12 |
+
insult: False
|
| 13 |
+
','','','2022-08-05 22:09:29.066786'
|
| 14 |
+
'test','','','','2022-12-22 19:43:56.796880'
|
| 15 |
+
'test','','','','2022-12-22 19:43:57.836865'
|
jigsaw-toxic-comment-classification-challenge/sample_submission.csv/sample_submission.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
jigsaw-toxic-comment-classification-challenge/test.csv/test.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c2513ce4abb98c4d1d216e3ca0d4377d57589a0989aa8c06a840509a16c786e8
|
| 3 |
+
size 60354593
|
jigsaw-toxic-comment-classification-challenge/test_labels.csv/test_labels.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
jigsaw-toxic-comment-classification-challenge/train.csv/train.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bd4084611bd27c939ba98e5e63bc3e5a2c1a4e99477dcba46c829e4c986c429d
|
| 3 |
+
size 68802655
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
pandas
|
| 2 |
+
tensorflow == 2.1.0
|
| 3 |
+
numpy
|
| 4 |
+
gradio
|
toxicity.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:165dd163b69f9ba71c6a78193a3d2f3a196ff8d0916d4194e8f848f9ae423c7b
|
| 3 |
+
size 77581464
|