Spaces:

ktejeshnaidu
/

Smilo

Running

App Files Files Community

ktejeshnaidu commited on Mar 30

Commit

9c42577

verified ·

1 Parent(s): 82d5d02

Upload 7 files

Browse files

Files changed (8) hide show

.gitattributes +1 -0
README.md +68 -0
app.py +132 -0
classes.pkl +0 -0
face_classifier.pth +3 -0
main.py +61 -0
model.py +60 -0
requirements.txt +7 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1 @@


1	+ face_classifier.pth filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,68 @@

+---
+license: mit
+title: Smilo
+sdk: gradio
+emoji: 😃
+colorFrom: blue
+colorTo: green
+short_description: Real-Time Emotion Detection powered by PyTorch & OpenCV
+sdk_version: 5.15.0
+python_version: '3.10'
+app_file: app.py
+pinned: false
+---
+# Smilo 😃
+**Real-Time Emotion Detection powered by PyTorch & OpenCV**
+Smilo is a lightweight, deep learning-based application that detects and classifies 7 facial emotions (Angry 😠, Disgust 😐, Fear 😨, Happy 😃, Neutral 🙂, Sad 😔, Surprise 😮) in real-time. It features both a local desktop interface and a beautiful web-based interactive demo!
+## 🚀 Getting Started
+1. **Clone the repository** and navigate into the project directory:
+   ```bash
+   git clone <repository-url>
+   cd Smilo
+   ```
+2. **Install the required dependencies**:
+   ```bash
+   pip install -r requirements.txt
+   ```
+## 🎮 How to Use
+Smilo offers two distinct ways to interact with the emotion detection model:
+### 1. Web Interface (Recommended)
+Run a beautifully designed web app powered by Gradio. This interface supports uploading photos, capturing snapshots, or streaming live video directly from your webcam.
+```bash
+python app.py
+```
+*After running, click the local link (e.g. `http://127.0.0.1:XXXX`) in your terminal to open it in your browser.*
+### 2. Desktop Application
+Run the classic desktop script. This will instantly launch a video window using your webcam feed, drawing tracking boxes and emotion labels on detected faces.
+```bash
+python main.py
+```
+*Press `q` or click the 'X' button on the video window to quit.*
+## 🧠 Model & Architecture
+- **Face Detection:** Uses OpenCV's optimized Haar Cascades for rapid and highly-efficient face tracking.
+- **Emotion Recognition:** A custom 3-layer Convolutional Neural Network (CNN) built with PyTorch, trained on 128x128 resolution RGB images.
+- **Performance:** System logic utilizes frame-skipping and concurrent processing optimizations to ensure video feeds maintain a lag-free 30+ FPS true 'live' experience.
+## 🛠️ Retraining the Model
+If you wish to augment the model or train it from scratch:
+1. Ensure your dataset is prepared and sorted.
+2. Open and run the `Train_model.ipynb` Jupyter Notebook.
+3. The notebook will automatically guide you through data loading, transformation, model training, and exporting the updated inference weights (`face_classifier.pth`).
+## Notes
+Ensure you have adequate lighting and a clear, frontal view of your face for the most accurate predictions!
+## License
+See the LICENSE file for details.""

app.py ADDED Viewed

	@@ -0,0 +1,132 @@

+import gradio as gr
+import cv2
+import numpy as np
+from model import EmotionPredictor
+# Initialize the predictor
+predictor = EmotionPredictor()
+# Load Haar Cascade for face detection
+face_cascade = cv2.CascadeClassifier(
+    cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
+)
+if face_cascade.empty():
+    raise RuntimeError("Failed to load Haar Cascade")
+def predict_emotion(image):
+    """
+    Predict emotion from an image.
+    Args:
+        image: PIL Image or numpy array
+    Returns:
+        annotated image and emotion prediction
+    """
+    if image is None:
+        return None, "No image provided"
+    # Convert PIL Image to numpy array if needed
+    if isinstance(image, np.ndarray):
+        frame = image
+    else:
+        frame = np.array(image)
+    # Convert RGB to BGR for OpenCV
+    if len(frame.shape) == 3 and frame.shape[2] == 3:
+        frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
+    else:
+        frame_bgr = frame
+    # Convert to grayscale for face detection
+    gray = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2GRAY)
+    # Detect faces
+    detected = face_cascade.detectMultiScale(
+        gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30)
+    )
+    if len(detected) == 0:
+        return frame, "No face detected"
+    # Get the largest face
+    faces = [max(detected, key=lambda r: r[2]*r[3])]
+    # Process the face
+    output_frame = frame_bgr.copy()
+    emotions = []
+    for (x, y, w, h) in faces:
+        # Extract face region
+        face_rgb = cv2.cvtColor(frame_bgr[y:y+h, x:x+w], cv2.COLOR_BGR2RGB)
+        # Predict emotion
+        emotion = predictor.predict(face_rgb)
+        emotions.append(emotion)
+        # Draw rectangle and label
+        cv2.rectangle(output_frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
+        cv2.putText(
+            output_frame, emotion, (x, y - 10),
+            cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2
+        )
+    # Convert back to RGB for display
+    output_frame_rgb = cv2.cvtColor(output_frame, cv2.COLOR_BGR2RGB)
+    # Return annotated image and detected emotion
+    emotion_text = ", ".join(emotions) if emotions else "No emotion detected"
+    return output_frame_rgb, f"Detected emotion(s): {emotion_text}"
+# Create Gradio interface
+with gr.Blocks(title="Smilo😃 - Real-Time Emotion Detection") as demo:
+    gr.HTML("""
+    <div style="background: linear-gradient(90deg, #FF9933 0%, #D0B264 50%, #469F93 100%);
+                padding: 40px;
+                border-radius: 12px;
+                text-align: center;
+                color: white;
+                font-family: 'Helvetica Neue', Arial, sans-serif;
+                margin-bottom: 20px;">
+        <h1 style="color: white; margin: 0; margin-bottom: 10px; font-weight: 900; font-size: 3.5em; display: flex; align-items: center; justify-content: center; gap: 10px;">
+            Smilo <span style="font-size: 0.9em;">😃</span>
+        </h1>
+        <p style="color: #f0f0f0; font-size: 1.2em; margin: 0; font-weight: 400; letter-spacing: 0.5px;">Real-Time Emotion Detection powered by PyTorch</p>
+    </div>
+    """)
+    with gr.Row():
+        with gr.Column():
+            image_input = gr.Image(
+                label="Input Image",
+                type="pil",
+                sources=["upload", "webcam"]
+            )
+            submit_btn = gr.Button("Predict Emotion", variant="primary")
+        with gr.Column():
+            image_output = gr.Image(label="Annotated Image")
+            emotion_output = gr.Textbox(label="Prediction Result", interactive=False)
+    # Connect the function to the button
+    submit_btn.click(
+        fn=predict_emotion,
+        inputs=[image_input],
+        outputs=[image_output, emotion_output]
+    )
+    # Also run prediction when image is uploaded
+    image_input.change(
+        fn=predict_emotion,
+        inputs=[image_input],
+        outputs=[image_output, emotion_output]
+    )
+if __name__ == "__main__":
+    demo.launch()

classes.pkl ADDED Viewed

Binary file (76 Bytes). View file

face_classifier.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:14702d50b48f94ed581f5df763d9271981063ee7a0bae0b4826131a54c38a08c
+size 67502533

main.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import cv2
+from model import EmotionPredictor
+cap = cv2.VideoCapture(0)
+predictor = EmotionPredictor()
+face_cascade = cv2.CascadeClassifier(
+    cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
+)
+if face_cascade.empty():
+    raise RuntimeError("Failed to load Haar Cascade")
+FRAME_SKIP = 2
+frame_count = 0
+current_faces = []
+while True:
+    ret, frame = cap.read()
+    if not ret:
+        break
+    frame_count += 1
+    if frame_count % FRAME_SKIP == 0:
+        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+        detected = face_cascade.detectMultiScale(
+            gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30)
+        )
+        current_faces = []
+        if len(detected) > 0:
+            x, y, w, h = max(detected, key=lambda r: r[2]*r[3])
+            y1, y2 = max(0, y), min(frame.shape[0], y + h)
+            x1, x2 = max(0, x), min(frame.shape[1], x + w)
+            if y2 > y1 and x2 > x1:
+                face_rgb = cv2.cvtColor(frame[y1:y2, x1:x2], cv2.COLOR_BGR2RGB)
+                label = predictor.predict(face_rgb)
+                current_faces.append((x, y, w, h, label))
+    for (x, y, w, h, label) in current_faces:
+        cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)
+        cv2.putText(
+            frame, label, (x, y - 10),
+            cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 255, 255), 2
+        )
+    cv2.imshow("Emotion Detection", frame)
+    if cv2.waitKey(1) & 0xFF == ord("q"):
+        break
+    if cv2.getWindowProperty("Emotion Detection", cv2.WND_PROP_VISIBLE) < 1:
+        break
+cap.release()
+cv2.destroyAllWindows()

model.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import pickle
+from torchvision import transforms
+import numpy as np
+from PIL import Image
+class FaceClassifier(nn.Module):
+    def __init__(self, num_classes):
+        super().__init__()
+        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
+        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
+        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
+        self.pool = nn.MaxPool2d(2, 2)
+        self.dropout = nn.Dropout(0.1)
+        self.fc1 = nn.Linear(128 * 16 * 16, 512)
+        self.fc2 = nn.Linear(512, num_classes)
+    def forward(self, x):
+        x = self.pool(F.relu(self.conv1(x)))
+        x = self.pool(F.relu(self.conv2(x)))
+        x = self.pool(F.relu(self.conv3(x)))
+        x = x.view(-1, 128 * 16 * 16)
+        x = self.dropout(F.relu(self.fc1(x)))
+        x = self.fc2(x)
+        return x
+class EmotionPredictor:
+    def __init__(self):
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        with open("classes.pkl", "rb") as f:
+            self.classes = pickle.load(f)
+        self.model = FaceClassifier(len(self.classes))
+        self.model.load_state_dict(
+            torch.load("face_classifier.pth", map_location=self.device)
+        )
+        self.model.to(self.device).eval()
+        self.transform = transforms.Compose([
+            transforms.Resize((128, 128)),
+            transforms.ToTensor(),
+            transforms.Normalize((0.5,), (0.5,))
+        ])
+    @torch.inference_mode()
+    def predict(self, image_np: np.ndarray) -> str:
+        img = Image.fromarray(image_np)
+        tensor = self.transform(img).unsqueeze(0).to(self.device)
+        output = self.model(tensor)
+        return self.classes[output.argmax(1).item()]

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+torch>=2.1.0
+torchvision>=0.16.0
+numpy>=1.24.3
+opencv-python-headless>=4.8.1
+pillow>=10.0.0
+matplotlib>=3.7.2
+gradio>=5.15.0