Spaces:

Elliot89
/

sentiment-analysis-restaurant

Sleeping

App Files Files Community

Elliot89 commited on Oct 19, 2025

Commit

2113fdf

verified ·

1 Parent(s): 9be220c

Update app.py

Browse files

Files changed (1) hide show

app.py +122 -40

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ import numpy as np
 import pandas as pd
 import nltk
 from nltk.corpus import stopwords
-from nltk.stem import WordNetLemmatizer
 import re
 import warnings
 warnings.filterwarnings('ignore')
@@ -17,7 +17,103 @@ nltk.download('omw-1.4', quiet=True)
 nltk.download('punkt', quiet=True)
 print("✅ NLTK resources downloaded")
 # Load models
 print("Loading models...")
 try:
     with open('best_model.pkl', 'rb') as f:
@@ -104,10 +200,7 @@ print("Creating Gradio interface...")
 with gr.Blocks(
     theme=gr.themes.Soft(),
-    title="Restaurant Review Sentiment Analyzer",
-    css="""
-    .gradio-container {font-family: 'Arial', sans-serif;}
-    """
 ) as demo:
     gr.Markdown("""
@@ -116,9 +209,9 @@ with gr.Blocks(
     Enter a restaurant review to analyze its sentiment in real-time!
-    **Model:** Random Forest Classifier
     **Accuracy:** 85%+
-    **Features:** TF-IDF + Statistical Text Features
     """)
     with gr.Row():
@@ -127,8 +220,7 @@ with gr.Blocks(
             input_text = gr.Textbox(
                 label="Restaurant Review",
                 placeholder="e.g., The food was amazing and the service was excellent!",
-                lines=6,
-                max_lines=10
             )
             with gr.Row():
@@ -151,12 +243,13 @@ with gr.Blocks(
             lines=3
         )
         gr.Markdown("""
-        **Preprocessing Steps Applied:**
         1. Convert to lowercase
-        2. Remove special characters and numbers
-        3. Remove stopwords (preserving negations)
-        4. Apply lemmatization
-        5. Extract statistical features
         """)
     gr.Markdown("---")
@@ -164,17 +257,15 @@ with gr.Blocks(
     gr.Examples(
         examples=[
-            ["The food was absolutely amazing! Best restaurant I've ever been to! The service was impeccable."],
-            ["Terrible service and the food was cold. The waiter was rude. Never coming back!"],
-            ["Outstanding experience from start to finish! Every dish was cooked to perfection. Highly recommended!"],
-            ["Worst meal I've ever had. Complete waste of money. Very disappointing experience."],
-            ["Good food but the portions were quite small. Reasonable prices. Service was okay."],
-            ["Fantastic! The ambiance was perfect and the food was delicious. Will definitely return!"],
-            ["Not impressed at all. The quality has really gone downhill. Won't be going back."],
-            ["Absolutely loved everything! Great variety and excellent presentation. Five stars!"]
         ],
         inputs=input_text,
-        label="Click any example to try it"
     )
     gr.Markdown("""
@@ -182,21 +273,17 @@ with gr.Blocks(
     ### 📚 About This Model
     **Machine Learning Pipeline:**
-    - **Preprocessing:** Lemmatization, stopword removal, text cleaning
-    - **Feature Engineering:** TF-IDF vectorization (1500 features, bigrams) + 6 statistical features
-    - **Algorithm:** Random Forest Classifier
-    - **Training:** 6 different models compared, best one deployed
-    - **Evaluation:** Cross-validation, multiple metrics (Accuracy, F1, ROC-AUC)
-    **Technologies Used:**
-    - Python, Scikit-learn, NLTK, Gradio, Pandas, NumPy
-    **Developer:** Einstein Ellandala | Project: ML-06-BML11
-    📓 **Full Project:** [View on GitHub](https://github.com/MrEinsteinE/sentiment-analysis-restaurant)
     """)
-    # Connect button to prediction function
     submit_btn.click(
         fn=predict_sentiment,
         inputs=input_text,
@@ -206,10 +293,5 @@ with gr.Blocks(
 print("✅ Gradio interface created")
 print("🚀 Launching application...")
-# Launch the app
 if __name__ == "__main__":
-    demo.launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        show_error=True
-    )

 import pandas as pd
 import nltk
 from nltk.corpus import stopwords
+from nltk.stem import PorterStemmer, WordNetLemmatizer
 import re
 import warnings
 warnings.filterwarnings('ignore')
 nltk.download('punkt', quiet=True)
 print("✅ NLTK resources downloaded")
+# ============================================================================
+# CRITICAL: Define TextPreprocessor class BEFORE loading the pickle file
+# ============================================================================
+class TextPreprocessor:
+    """
+    Advanced text preprocessing pipeline for sentiment analysis.
+    Features:
+    - Lemmatization for better word normalization
+    - Custom stopword filtering (preserves negation words)
+    - URL and email removal
+    - Special character cleaning
+    - Case normalization
+    """
+    def __init__(self, use_lemmatization=True, remove_stopwords=True):
+        """
+        Initialize the preprocessor.
+        Parameters:
+            use_lemmatization (bool): Use lemmatization instead of stemming
+            remove_stopwords (bool): Remove stopwords from text
+        """
+        self.stemmer = PorterStemmer()
+        self.lemmatizer = WordNetLemmatizer()
+        self.use_lemmatization = use_lemmatization
+        self.remove_stopwords = remove_stopwords
+        # Custom stopwords excluding important sentiment words
+        self.stop_words = set(stopwords.words('english'))
+        # Remove negation words as they're crucial for sentiment
+        negation_words = {
+            'not', 'no', 'nor', 'neither', 'never', 'none',
+            'nothing', 'nowhere', "don't", "doesn't", "didn't",
+            "won't", "wouldn't", "can't", "couldn't", "shouldn't",
+            "wasn't", "weren't", "hasn't", "haven't", "hadn't"
+        }
+        self.stop_words = self.stop_words - negation_words
+    def clean_text(self, text: str) -> str:
+        """
+        Clean and preprocess a single text string.
+        Parameters:
+            text (str): Raw text
+        Returns:
+            str: Cleaned text
+        """
+        # Convert to lowercase
+        text = text.lower()
+        # Remove URLs
+        text = re.sub(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', ' ', text)
+        # Remove email addresses
+        text = re.sub(r'\S+@\S+', ' ', text)
+        # Remove HTML tags
+        text = re.sub(r'<.*?>', ' ', text)
+        # Remove special characters but keep spaces
+        text = re.sub(r'[^a-zA-Z\s]', ' ', text)
+        # Remove extra whitespaces
+        text = re.sub(r'\s+', ' ', text).strip()
+        # Tokenize
+        words = text.split()
+        # Remove stopwords if enabled
+        if self.remove_stopwords:
+            words = [word for word in words if word not in self.stop_words]
+        # Apply lemmatization or stemming
+        if self.use_lemmatization:
+            words = [self.lemmatizer.lemmatize(word, pos='v') for word in words]
+            words = [self.lemmatizer.lemmatize(word, pos='n') for word in words]
+        else:
+            words = [self.stemmer.stem(word) for word in words]
+        return ' '.join(words)
+    def fit_transform(self, texts):
+        """Process multiple texts."""
+        return [self.clean_text(text) for text in texts]
+    def transform(self, texts):
+        """Process multiple texts (alias for fit_transform)."""
+        return self.fit_transform(texts)
+# ============================================================================
 # Load models
+# ============================================================================
 print("Loading models...")
 try:
     with open('best_model.pkl', 'rb') as f:
 with gr.Blocks(
     theme=gr.themes.Soft(),
+    title="Restaurant Review Sentiment Analyzer"
 ) as demo:
     gr.Markdown("""
     Enter a restaurant review to analyze its sentiment in real-time!
+    **Model:** Advanced ML Classification
     **Accuracy:** 85%+
+    **Features:** TF-IDF + Statistical Text Analysis
     """)
     with gr.Row():
             input_text = gr.Textbox(
                 label="Restaurant Review",
                 placeholder="e.g., The food was amazing and the service was excellent!",
+                lines=5
             )
             with gr.Row():
             lines=3
         )
         gr.Markdown("""
+        **Preprocessing Steps:**
         1. Convert to lowercase
+        2. Remove URLs, emails, HTML tags
+        3. Remove special characters
+        4. Remove stopwords (keep negations)
+        5. Apply lemmatization
+        6. Extract statistical features
         """)
     gr.Markdown("---")
     gr.Examples(
         examples=[
+            ["The food was absolutely amazing! Best restaurant I've ever been to!"],
+            ["Terrible service and the food was cold. Never coming back."],
+            ["Outstanding! The staff was friendly and attentive."],
+            ["Worst meal ever. Complete waste of money."],
+            ["Good food but portions were small. Reasonable prices."],
+            ["Fantastic! Every dish was cooked to perfection!"],
         ],
         inputs=input_text,
+        label="Click to try"
     )
     gr.Markdown("""
     ### 📚 About This Model
     **Machine Learning Pipeline:**
+    - **Preprocessing:** Lemmatization, stopword removal, text normalization
+    - **Features:** TF-IDF (1500 features, bigrams) + 6 statistical features
+    - **Algorithm:** Ensemble machine learning (Random Forest / SVM / Gradient Boosting)
+    - **Accuracy:** 85%+ on test data
+    - **Metrics:** High precision, recall, and F1-score
+    **Technologies:** Python • Scikit-learn • NLTK • Gradio • Pandas • NumPy
+    **Developer:** Einstein Ellandala | Project: ML-06-BML11 | October 2025
     """)
     submit_btn.click(
         fn=predict_sentiment,
         inputs=input_text,
 print("✅ Gradio interface created")
 print("🚀 Launching application...")
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)