| import gradio as gr |
| import pickle |
| import numpy as np |
| import pandas as pd |
| import nltk |
| from nltk.corpus import stopwords |
| from nltk.stem import PorterStemmer, WordNetLemmatizer |
| import re |
| import warnings |
| warnings.filterwarnings('ignore') |
|
|
| |
| print("Downloading NLTK resources...") |
| nltk.download('stopwords', quiet=True) |
| nltk.download('wordnet', quiet=True) |
| nltk.download('omw-1.4', quiet=True) |
| nltk.download('punkt', quiet=True) |
| print("β
NLTK resources downloaded") |
|
|
| |
| |
| |
|
|
| class TextPreprocessor: |
| """ |
| Advanced text preprocessing pipeline for sentiment analysis. |
| |
| Features: |
| - Lemmatization for better word normalization |
| - Custom stopword filtering (preserves negation words) |
| - URL and email removal |
| - Special character cleaning |
| - Case normalization |
| """ |
| |
| def __init__(self, use_lemmatization=True, remove_stopwords=True): |
| """ |
| Initialize the preprocessor. |
| |
| Parameters: |
| use_lemmatization (bool): Use lemmatization instead of stemming |
| remove_stopwords (bool): Remove stopwords from text |
| """ |
| self.stemmer = PorterStemmer() |
| self.lemmatizer = WordNetLemmatizer() |
| self.use_lemmatization = use_lemmatization |
| self.remove_stopwords = remove_stopwords |
| |
| |
| self.stop_words = set(stopwords.words('english')) |
| |
| |
| negation_words = { |
| 'not', 'no', 'nor', 'neither', 'never', 'none', |
| 'nothing', 'nowhere', "don't", "doesn't", "didn't", |
| "won't", "wouldn't", "can't", "couldn't", "shouldn't", |
| "wasn't", "weren't", "hasn't", "haven't", "hadn't" |
| } |
| self.stop_words = self.stop_words - negation_words |
| |
| def clean_text(self, text: str) -> str: |
| """ |
| Clean and preprocess a single text string. |
| |
| Parameters: |
| text (str): Raw text |
| |
| Returns: |
| str: Cleaned text |
| """ |
| |
| text = text.lower() |
| |
| |
| text = re.sub(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', ' ', text) |
| |
| |
| text = re.sub(r'\S+@\S+', ' ', text) |
| |
| |
| text = re.sub(r'<.*?>', ' ', text) |
| |
| |
| text = re.sub(r'[^a-zA-Z\s]', ' ', text) |
| |
| |
| text = re.sub(r'\s+', ' ', text).strip() |
| |
| |
| words = text.split() |
| |
| |
| if self.remove_stopwords: |
| words = [word for word in words if word not in self.stop_words] |
| |
| |
| if self.use_lemmatization: |
| words = [self.lemmatizer.lemmatize(word, pos='v') for word in words] |
| words = [self.lemmatizer.lemmatize(word, pos='n') for word in words] |
| else: |
| words = [self.stemmer.stem(word) for word in words] |
| |
| return ' '.join(words) |
| |
| def fit_transform(self, texts): |
| """Process multiple texts.""" |
| return [self.clean_text(text) for text in texts] |
| |
| def transform(self, texts): |
| """Process multiple texts (alias for fit_transform).""" |
| return self.fit_transform(texts) |
|
|
| |
| |
| |
|
|
| print("Loading models...") |
| try: |
| with open('best_model.pkl', 'rb') as f: |
| model = pickle.load(f) |
| print("β
Model loaded") |
| |
| with open('tfidf_vectorizer.pkl', 'rb') as f: |
| vectorizer = pickle.load(f) |
| print("β
Vectorizer loaded") |
| |
| with open('preprocessor.pkl', 'rb') as f: |
| preprocessor = pickle.load(f) |
| print("β
Preprocessor loaded") |
| |
| except Exception as e: |
| print(f"β Error loading models: {e}") |
| raise |
|
|
| |
| def extract_features(texts, original_texts): |
| """Extract statistical features from texts.""" |
| features = { |
| 'review_length': [len(text) for text in original_texts], |
| 'word_count': [len(text.split()) for text in texts], |
| 'avg_word_length': [ |
| np.mean([len(word) for word in text.split()]) if text else 0 |
| for text in texts |
| ], |
| 'exclamation_count': [text.count('!') for text in original_texts], |
| 'question_count': [text.count('?') for text in original_texts], |
| 'capital_ratio': [ |
| sum(1 for c in text if c.isupper()) / len(text) if len(text) > 0 else 0 |
| for text in original_texts |
| ] |
| } |
| return pd.DataFrame(features) |
|
|
| |
| def predict_sentiment(review_text): |
| """Predict sentiment for a review.""" |
| if not review_text or not review_text.strip(): |
| return "β οΈ Please enter a review!", "", "", "", "" |
| |
| try: |
| |
| cleaned = preprocessor.clean_text(review_text) |
| |
| |
| vectorized = vectorizer.transform([cleaned]).toarray() |
| |
| |
| add_features = extract_features([cleaned], [review_text]) |
| |
| |
| X_new = np.concatenate([vectorized, add_features.values], axis=1) |
| |
| |
| prediction = model.predict(X_new)[0] |
| |
| |
| if hasattr(model, 'predict_proba'): |
| proba = model.predict_proba(X_new)[0] |
| confidence = max(proba) |
| prob_neg = proba[0] |
| prob_pos = proba[1] |
| else: |
| confidence = None |
| prob_neg = None |
| prob_pos = None |
| |
| |
| sentiment = "β
Positive π" if prediction == 1 else "β Negative π" |
| conf_str = f"{confidence:.2%}" if confidence else "N/A" |
| neg_str = f"{prob_neg:.2%}" if prob_neg else "N/A" |
| pos_str = f"{prob_pos:.2%}" if prob_pos else "N/A" |
| |
| return sentiment, conf_str, neg_str, pos_str, cleaned |
| |
| except Exception as e: |
| return f"β Error: {str(e)}", "", "", "", "" |
|
|
| |
| print("Creating Gradio interface...") |
|
|
| with gr.Blocks( |
| theme=gr.themes.Soft(), |
| title="Restaurant Review Sentiment Analyzer" |
| ) as demo: |
| |
| gr.Markdown(""" |
| # π½οΈ Restaurant Review Sentiment Analyzer |
| ### AI-Powered Sentiment Analysis with Machine Learning |
| |
| Enter a restaurant review to analyze its sentiment in real-time! |
| |
| **Model:** Advanced ML Classification |
| **Accuracy:** 85%+ |
| **Features:** TF-IDF + Statistical Text Analysis |
| """) |
| |
| with gr.Row(): |
| with gr.Column(scale=2): |
| gr.Markdown("### π Enter Your Review") |
| input_text = gr.Textbox( |
| label="Restaurant Review", |
| placeholder="e.g., The food was amazing and the service was excellent!", |
| lines=5 |
| ) |
| |
| with gr.Row(): |
| submit_btn = gr.Button("π Analyze Sentiment", variant="primary", size="lg") |
| clear_btn = gr.ClearButton([input_text], value="ποΈ Clear", size="lg") |
| |
| with gr.Column(scale=2): |
| gr.Markdown("### π Analysis Results") |
| sentiment_output = gr.Textbox(label="π― Predicted Sentiment", interactive=False) |
| confidence_output = gr.Textbox(label="π Confidence Score", interactive=False) |
| |
| with gr.Row(): |
| neg_prob = gr.Textbox(label="π Negative Probability", interactive=False) |
| pos_prob = gr.Textbox(label="π Positive Probability", interactive=False) |
| |
| with gr.Accordion("π Preprocessing Details", open=False): |
| cleaned_output = gr.Textbox( |
| label="Cleaned Review Text (After Preprocessing)", |
| interactive=False, |
| lines=3 |
| ) |
| gr.Markdown(""" |
| **Preprocessing Steps:** |
| 1. Convert to lowercase |
| 2. Remove URLs, emails, HTML tags |
| 3. Remove special characters |
| 4. Remove stopwords (keep negations) |
| 5. Apply lemmatization |
| 6. Extract statistical features |
| """) |
| |
| gr.Markdown("---") |
| gr.Markdown("### π‘ Try These Example Reviews") |
| |
| gr.Examples( |
| examples=[ |
| ["The food was absolutely amazing! Best restaurant I've ever been to!"], |
| ["Terrible service and the food was cold. Never coming back."], |
| ["Outstanding! The staff was friendly and attentive."], |
| ["Worst meal ever. Complete waste of money."], |
| ["Good food but portions were small. Reasonable prices."], |
| ["Fantastic! Every dish was cooked to perfection!"], |
| ], |
| inputs=input_text, |
| label="Click to try" |
| ) |
| |
| gr.Markdown(""" |
| --- |
| ### π About This Model |
| |
| **Machine Learning Pipeline:** |
| - **Preprocessing:** Lemmatization, stopword removal, text normalization |
| - **Features:** TF-IDF (1500 features, bigrams) + 6 statistical features |
| - **Algorithm:** Ensemble machine learning (Random Forest / SVM / Gradient Boosting) |
| - **Accuracy:** 85%+ on test data |
| - **Metrics:** High precision, recall, and F1-score |
| |
| **Technologies:** Python β’ Scikit-learn β’ NLTK β’ Gradio β’ Pandas β’ NumPy |
| |
| **Developer:** Einstein Ellandala | Project: ML-06-BML11 | October 2025 |
| """) |
| |
| submit_btn.click( |
| fn=predict_sentiment, |
| inputs=input_text, |
| outputs=[sentiment_output, confidence_output, neg_prob, pos_prob, cleaned_output] |
| ) |
|
|
| print("β
Gradio interface created") |
| print("π Launching application...") |
|
|
| if __name__ == "__main__": |
| demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True) |
|
|