Spaces:

Elliot89
/

sentiment-analysis-restaurant

Sleeping

App Files Files Community

Elliot89 commited on Oct 19, 2025

Commit

8afe425

verified ·

1 Parent(s): 55f8f35

Update app.py

Browse files

Files changed (1) hide show

app.py +215 -136

app.py CHANGED Viewed

@@ -1,136 +1,215 @@
-## 🌐 Section 16: Gradio Web Application (Corrected & Improved)
-import gradio as gr
-import warnings
-warnings.filterwarnings('ignore')
-# Ensure the required variables are defined from previous cells:
-# - final_best_model
-# - tfidf_vectorizer
-# - preprocessor
-# - final_best_model_name
-# - results_df
-def create_gradio_app(model, vectorizer, preprocessor_obj, model_name, metrics_df):
-    """
-    Creates and launches a Gradio web application for sentiment analysis.
-    Args:
-        model: The trained machine learning model.
-        vectorizer: The fitted TF-IDF vectorizer.
-        preprocessor_obj: The text preprocessor instance.
-        model_name (str): The name of the best model.
-        metrics_df (pd.DataFrame): DataFrame with model performance metrics.
-    """
-    def gradio_predict(review_text):
-        """
-        Prediction function for the Gradio interface. It uses the model
-        and preprocessors passed to the outer function.
-        """
-        if not review_text.strip():
-            return "⚠️ Please enter a review!", "", "", "", ""
-        # Use the existing predict_sentiment function, passing all required arguments
-        result = predict_sentiment(
-            review=review_text,
-            model=model,
-            vectorizer=vectorizer,
-            preprocessor=preprocessor_obj
-        )
-        sentiment = result['sentiment']
-        confidence = f"{result['confidence']:.2%}" if result['confidence'] is not None else "N/A"
-        prob_neg = f"{result['probability_negative']:.2%}" if result['probability_negative'] is not None else "N/A"
-        prob_pos = f"{result['probability_positive']:.2%}" if result['probability_positive'] is not None else "N/A"
-        cleaned = result['cleaned_review']
-        return sentiment, confidence, prob_neg, prob_pos, cleaned
-    # Create the Gradio interface
-    with gr.Blocks(theme=gr.themes.Soft(), title="Restaurant Review Sentiment Analyzer") as demo:
-        gr.Markdown(f"""
-        # 🍽️ Restaurant Review Sentiment Analyzer
-        ### Powered by Machine Learning
-        Enter a restaurant review to analyze its sentiment. The model will predict whether
-        the review is **Positive** or **Negative**.
-        **Model:** {model_name}
-        **Accuracy:** {metrics_df.iloc[0]['Test_Accuracy']:.2%}
-        """)
-        with gr.Row():
-            with gr.Column(scale=2):
-                input_text = gr.Textbox(
-                    label="Enter Restaurant Review",
-                    placeholder="e.g., The food was absolutely amazing! Best restaurant ever!",
-                    lines=5
-                )
-                with gr.Row():
-                    submit_btn = gr.Button("🔍 Analyze Sentiment", variant="primary", size="lg")
-                    clear_btn = gr.ClearButton([input_text], value="🗑️ Clear", size="lg")
-            with gr.Column(scale=2):
-                sentiment_output = gr.Textbox(label="🎯 Predicted Sentiment", interactive=False)
-                confidence_output = gr.Textbox(label="📊 Confidence Score", interactive=False)
-                with gr.Row():
-                    neg_prob = gr.Textbox(label="😞 Negative Probability", interactive=False)
-                    pos_prob = gr.Textbox(label="😊 Positive Probability", interactive=False)
-        with gr.Accordion("🔍 Preprocessing Details", open=False):
-            cleaned_output = gr.Textbox(label="Cleaned Review Text", interactive=False, lines=3)
-        gr.Examples(
-            examples=[
-                ["The food was absolutely amazing! Best restaurant I've ever been to!"],
-                ["Terrible service and the food was cold. Never coming back."],
-                ["Outstanding experience! The staff was friendly and attentive."],
-                ["Worst meal I've ever had. Complete waste of money."],
-            ],
-            inputs=input_text,
-            label="Click to try an example"
-        )
-        # Connect the button to the prediction function
-        submit_btn.click(
-            fn=gradio_predict,
-            inputs=input_text,
-            outputs=[sentiment_output, confidence_output, neg_prob, pos_prob, cleaned_output]
-        )
-    return demo
-# --- LAUNCH THE APP ---
-print('='*70)
-print('🚀 LAUNCHING GRADIO WEB APPLICATION')
-print('='*70)
-try:
-    # Get the final best model and preprocessors from your notebook
-    # Ensure these variable names match what you have in your notebook
-    final_model = trained_models[final_best_model_name]
-    # Create the app instance by passing the required objects
-    gradio_app = create_gradio_app(
-        model=final_model,
-        vectorizer=tfidf_vectorizer,
-        preprocessor_obj=preprocessor,
-        model_name=final_best_model_name,
-        metrics_df=results_df
-    )
-    # Launch the Gradio app
-    # share=True creates a public link and keeps the server running
-    gradio_app.launch(share=True, debug=True)
-    print("\n✅ Gradio app is running!")
-    print("📱 Access the app at the public URL provided above.")
-except NameError as e:
-    print(f"❌ NameError: {e}")
-    print("Please make sure all previous cells in the notebook have been run successfully.")
-    print("Required variables: 'final_best_model_name', 'trained_models', 'tfidf_vectorizer', 'preprocessor', 'results_df'")
-except Exception as e:
-    print(f"❌ An unexpected error occurred: {e}")

+import gradio as gr
+import pickle
+import numpy as np
+import pandas as pd
+import nltk
+from nltk.corpus import stopwords
+from nltk.stem import WordNetLemmatizer
+import re
+import warnings
+warnings.filterwarnings('ignore')
+# Download NLTK data
+print("Downloading NLTK resources...")
+nltk.download('stopwords', quiet=True)
+nltk.download('wordnet', quiet=True)
+nltk.download('omw-1.4', quiet=True)
+nltk.download('punkt', quiet=True)
+print("✅ NLTK resources downloaded")
+# Load models
+print("Loading models...")
+try:
+    with open('best_model.pkl', 'rb') as f:
+        model = pickle.load(f)
+    print("✅ Model loaded")
+    with open('tfidf_vectorizer.pkl', 'rb') as f:
+        vectorizer = pickle.load(f)
+    print("✅ Vectorizer loaded")
+    with open('preprocessor.pkl', 'rb') as f:
+        preprocessor = pickle.load(f)
+    print("✅ Preprocessor loaded")
+except Exception as e:
+    print(f"❌ Error loading models: {e}")
+    raise
+# Feature extraction function
+def extract_features(texts, original_texts):
+    """Extract statistical features from texts."""
+    features = {
+        'review_length': [len(text) for text in original_texts],
+        'word_count': [len(text.split()) for text in texts],
+        'avg_word_length': [
+            np.mean([len(word) for word in text.split()]) if text else 0
+            for text in texts
+        ],
+        'exclamation_count': [text.count('!') for text in original_texts],
+        'question_count': [text.count('?') for text in original_texts],
+        'capital_ratio': [
+            sum(1 for c in text if c.isupper()) / len(text) if len(text) > 0 else 0
+            for text in original_texts
+        ]
+    }
+    return pd.DataFrame(features)
+# Prediction function
+def predict_sentiment(review_text):
+    """Predict sentiment for a review."""
+    if not review_text or not review_text.strip():
+        return "⚠️ Please enter a review!", "", "", "", ""
+    try:
+        # Preprocess
+        cleaned = preprocessor.clean_text(review_text)
+        # Vectorize
+        vectorized = vectorizer.transform([cleaned]).toarray()
+        # Extract additional features
+        add_features = extract_features([cleaned], [review_text])
+        # Combine features
+        X_new = np.concatenate([vectorized, add_features.values], axis=1)
+        # Predict
+        prediction = model.predict(X_new)[0]
+        # Get probabilities if available
+        if hasattr(model, 'predict_proba'):
+            proba = model.predict_proba(X_new)[0]
+            confidence = max(proba)
+            prob_neg = proba[0]
+            prob_pos = proba[1]
+        else:
+            confidence = None
+            prob_neg = None
+            prob_pos = None
+        # Format output
+        sentiment = "✅ Positive 😊" if prediction == 1 else "❌ Negative 😞"
+        conf_str = f"{confidence:.2%}" if confidence else "N/A"
+        neg_str = f"{prob_neg:.2%}" if prob_neg else "N/A"
+        pos_str = f"{prob_pos:.2%}" if prob_pos else "N/A"
+        return sentiment, conf_str, neg_str, pos_str, cleaned
+    except Exception as e:
+        return f"❌ Error: {str(e)}", "", "", "", ""
+# Create Gradio interface
+print("Creating Gradio interface...")
+with gr.Blocks(
+    theme=gr.themes.Soft(),
+    title="Restaurant Review Sentiment Analyzer",
+    css="""
+    .gradio-container {font-family: 'Arial', sans-serif;}
+    """
+) as demo:
+    gr.Markdown("""
+    # 🍽️ Restaurant Review Sentiment Analyzer
+    ### AI-Powered Sentiment Analysis with Machine Learning
+    Enter a restaurant review to analyze its sentiment in real-time!
+    **Model:** Random Forest Classifier
+    **Accuracy:** 85%+
+    **Features:** TF-IDF + Statistical Text Features
+    """)
+    with gr.Row():
+        with gr.Column(scale=2):
+            gr.Markdown("### 📝 Enter Your Review")
+            input_text = gr.Textbox(
+                label="Restaurant Review",
+                placeholder="e.g., The food was amazing and the service was excellent!",
+                lines=6,
+                max_lines=10
+            )
+            with gr.Row():
+                submit_btn = gr.Button("🔍 Analyze Sentiment", variant="primary", size="lg")
+                clear_btn = gr.ClearButton([input_text], value="🗑️ Clear", size="lg")
+        with gr.Column(scale=2):
+            gr.Markdown("### 📊 Analysis Results")
+            sentiment_output = gr.Textbox(label="🎯 Predicted Sentiment", interactive=False)
+            confidence_output = gr.Textbox(label="📈 Confidence Score", interactive=False)
+            with gr.Row():
+                neg_prob = gr.Textbox(label="😞 Negative Probability", interactive=False)
+                pos_prob = gr.Textbox(label="😊 Positive Probability", interactive=False)
+    with gr.Accordion("🔍 Preprocessing Details", open=False):
+        cleaned_output = gr.Textbox(
+            label="Cleaned Review Text (After Preprocessing)",
+            interactive=False,
+            lines=3
+        )
+        gr.Markdown("""
+        **Preprocessing Steps Applied:**
+        1. Convert to lowercase
+        2. Remove special characters and numbers
+        3. Remove stopwords (preserving negations)
+        4. Apply lemmatization
+        5. Extract statistical features
+        """)
+    gr.Markdown("---")
+    gr.Markdown("### 💡 Try These Example Reviews")
+    gr.Examples(
+        examples=[
+            ["The food was absolutely amazing! Best restaurant I've ever been to! The service was impeccable."],
+            ["Terrible service and the food was cold. The waiter was rude. Never coming back!"],
+            ["Outstanding experience from start to finish! Every dish was cooked to perfection. Highly recommended!"],
+            ["Worst meal I've ever had. Complete waste of money. Very disappointing experience."],
+            ["Good food but the portions were quite small. Reasonable prices. Service was okay."],
+            ["Fantastic! The ambiance was perfect and the food was delicious. Will definitely return!"],
+            ["Not impressed at all. The quality has really gone downhill. Won't be going back."],
+            ["Absolutely loved everything! Great variety and excellent presentation. Five stars!"]
+        ],
+        inputs=input_text,
+        label="Click any example to try it"
+    )
+    gr.Markdown("""
+    ---
+    ### 📚 About This Model
+    **Machine Learning Pipeline:**
+    - **Preprocessing:** Lemmatization, stopword removal, text cleaning
+    - **Feature Engineering:** TF-IDF vectorization (1500 features, bigrams) + 6 statistical features
+    - **Algorithm:** Random Forest Classifier
+    - **Training:** 6 different models compared, best one deployed
+    - **Evaluation:** Cross-validation, multiple metrics (Accuracy, F1, ROC-AUC)
+    **Technologies Used:**
+    - Python, Scikit-learn, NLTK, Gradio, Pandas, NumPy
+    **Developer:** Einstein Ellandala | Project: ML-06-BML11
+    📓 **Full Project:** [View on GitHub](https://github.com/MrEinsteinE/sentiment-analysis-restaurant)
+    """)
+    # Connect button to prediction function
+    submit_btn.click(
+        fn=predict_sentiment,
+        inputs=input_text,
+        outputs=[sentiment_output, confidence_output, neg_prob, pos_prob, cleaned_output]
+    )
+print("✅ Gradio interface created")
+print("🚀 Launching application...")
+# Launch the app
+if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        show_error=True
+    )