Spaces:
Build error
Build error
File size: 1,477 Bytes
8f55ffe | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 | import streamlit as st
import pickle
import string
import nltk
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
# π§ Download required NLTK resources only once
try:
nltk.data.find('tokenizers/punkt')
except LookupError:
nltk.download('punkt')
try:
nltk.data.find('corpora/stopwords')
except LookupError:
nltk.download('stopwords')
# π€ Initialize stemmer
ps = PorterStemmer()
# π Preprocessing function
def transform_text(text):
text = text.lower()
text = nltk.word_tokenize(text)
y = []
for word in text:
if word.isalnum():
y.append(word)
text = y[:]
y.clear()
for word in text:
if word not in stopwords.words('english') and word not in string.punctuation:
y.append(ps.stem(word))
return " ".join(y)
# π¦ Load model and vectorizer
tfidf = pickle.load(open('vectorizer.pkl', 'rb'))
model = pickle.load(open('model.pkl', 'rb'))
# π¬ Streamlit UI
st.title("π© SMS Spam Classifier")
input_sms = st.text_area("Enter the message")
if st.button('Predict'):
# 1. Preprocess
transformed_sms = transform_text(input_sms)
# 2. Vectorize
vector_input = tfidf.transform([transformed_sms])
# 3. Predict
result = model.predict(vector_input)[0]
# 4. Show result
if result == 1:
st.error("π« Spam")
else:
st.success("β
Not Spam")
|