Shiva7706
/

Spam_Message_detection

Model card Files Files and versions

Spam_Message_detection / app.py

Shiva7706's picture

Update app.py

3c1717d verified about 1 year ago

history blame contribute delete

2.1 kB

	import streamlit as st
	import joblib
	import nltk
	from nltk.corpus import stopwords
	from nltk.tokenize import word_tokenize
	import string
	import re


	nltk.download('punkt')
	nltk.download('stopwords')

	def preprocess_text(text):

	text = text.lower()


	text = ''.join([char for char in text if char not in string.punctuation])


	text = re.sub(r'\d+', '', text)


	text = ' '.join(text.split())


	tokens = word_tokenize(text)


	stop_words = set(stopwords.words('english'))
	tokens = [token for token in tokens if token not in stop_words]


	return ' '.join(tokens)


	model = joblib.load('spam_detector_model.joblib')
	vectorizer = joblib.load('tfidf_vectorizer.joblib')


	st.title("📧 Spam Message Detector")

	st.write("""
	This app detects whether a message is spam or not.
	Enter your message below and click 'Analyze' to check!
	""")

	message = st.text_area("Enter your message:", height=100)

	if st.button("Analyze"):
	if message:

	processed_text = preprocess_text(message)


	text_vectorized = vectorizer.transform([processed_text])


	prediction = model.predict(text_vectorized)[0]
	probability = model.predict_proba(text_vectorized)[0]


	st.markdown("### Analysis Result")

	if prediction == 1:
	st.error("🚨 This message is likely SPAM!")
	st.write(f"Confidence: {probability[1]:.2%}")
	else:
	st.success("✅ This message appears to be legitimate.")
	st.write(f"Confidence: {probability[0]:.2%}")


	with st.expander("See preprocessing steps"):
	st.write("Original message:", message)
	st.write("Processed message:", processed_text)
	else:
	st.warning("Please enter a message to analyze.")


	with st.sidebar:
	st.header("About the Model")
	st.write("""
	This spam detector uses an XGBoost classifier trained on a dataset of spam and legitimate messages.

	Model Performance:
	- Training Accuracy: 99.7%
	- Testing Accuracy: 98.9%
	""")