Spaces:
Running
Running
Upload 7 files
Browse files- .gitattributes +1 -0
- README.md +68 -0
- app.py +132 -0
- classes.pkl +0 -0
- face_classifier.pth +3 -0
- main.py +61 -0
- model.py +60 -0
- requirements.txt +7 -0
.gitattributes
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
face_classifier.pth filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: mit
|
| 3 |
+
title: Smilo
|
| 4 |
+
sdk: gradio
|
| 5 |
+
emoji: 😃
|
| 6 |
+
colorFrom: blue
|
| 7 |
+
colorTo: green
|
| 8 |
+
short_description: Real-Time Emotion Detection powered by PyTorch & OpenCV
|
| 9 |
+
sdk_version: 5.15.0
|
| 10 |
+
python_version: '3.10'
|
| 11 |
+
app_file: app.py
|
| 12 |
+
pinned: false
|
| 13 |
+
---
|
| 14 |
+
|
| 15 |
+
# Smilo 😃
|
| 16 |
+
**Real-Time Emotion Detection powered by PyTorch & OpenCV**
|
| 17 |
+
|
| 18 |
+
Smilo is a lightweight, deep learning-based application that detects and classifies 7 facial emotions (Angry 😠, Disgust 😐, Fear 😨, Happy 😃, Neutral 🙂, Sad 😔, Surprise 😮) in real-time. It features both a local desktop interface and a beautiful web-based interactive demo!
|
| 19 |
+
|
| 20 |
+
## 🚀 Getting Started
|
| 21 |
+
|
| 22 |
+
1. **Clone the repository** and navigate into the project directory:
|
| 23 |
+
```bash
|
| 24 |
+
git clone <repository-url>
|
| 25 |
+
cd Smilo
|
| 26 |
+
```
|
| 27 |
+
|
| 28 |
+
2. **Install the required dependencies**:
|
| 29 |
+
```bash
|
| 30 |
+
pip install -r requirements.txt
|
| 31 |
+
```
|
| 32 |
+
|
| 33 |
+
## 🎮 How to Use
|
| 34 |
+
|
| 35 |
+
Smilo offers two distinct ways to interact with the emotion detection model:
|
| 36 |
+
|
| 37 |
+
### 1. Web Interface (Recommended)
|
| 38 |
+
Run a beautifully designed web app powered by Gradio. This interface supports uploading photos, capturing snapshots, or streaming live video directly from your webcam.
|
| 39 |
+
```bash
|
| 40 |
+
python app.py
|
| 41 |
+
```
|
| 42 |
+
*After running, click the local link (e.g. `http://127.0.0.1:XXXX`) in your terminal to open it in your browser.*
|
| 43 |
+
|
| 44 |
+
### 2. Desktop Application
|
| 45 |
+
Run the classic desktop script. This will instantly launch a video window using your webcam feed, drawing tracking boxes and emotion labels on detected faces.
|
| 46 |
+
```bash
|
| 47 |
+
python main.py
|
| 48 |
+
```
|
| 49 |
+
*Press `q` or click the 'X' button on the video window to quit.*
|
| 50 |
+
|
| 51 |
+
## 🧠 Model & Architecture
|
| 52 |
+
|
| 53 |
+
- **Face Detection:** Uses OpenCV's optimized Haar Cascades for rapid and highly-efficient face tracking.
|
| 54 |
+
- **Emotion Recognition:** A custom 3-layer Convolutional Neural Network (CNN) built with PyTorch, trained on 128x128 resolution RGB images.
|
| 55 |
+
- **Performance:** System logic utilizes frame-skipping and concurrent processing optimizations to ensure video feeds maintain a lag-free 30+ FPS true 'live' experience.
|
| 56 |
+
|
| 57 |
+
## 🛠️ Retraining the Model
|
| 58 |
+
|
| 59 |
+
If you wish to augment the model or train it from scratch:
|
| 60 |
+
1. Ensure your dataset is prepared and sorted.
|
| 61 |
+
2. Open and run the `Train_model.ipynb` Jupyter Notebook.
|
| 62 |
+
3. The notebook will automatically guide you through data loading, transformation, model training, and exporting the updated inference weights (`face_classifier.pth`).
|
| 63 |
+
|
| 64 |
+
## Notes
|
| 65 |
+
Ensure you have adequate lighting and a clear, frontal view of your face for the most accurate predictions!
|
| 66 |
+
|
| 67 |
+
## License
|
| 68 |
+
See the LICENSE file for details.""
|
app.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import cv2
|
| 3 |
+
import numpy as np
|
| 4 |
+
from model import EmotionPredictor
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
# Initialize the predictor
|
| 8 |
+
predictor = EmotionPredictor()
|
| 9 |
+
|
| 10 |
+
# Load Haar Cascade for face detection
|
| 11 |
+
face_cascade = cv2.CascadeClassifier(
|
| 12 |
+
cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
if face_cascade.empty():
|
| 16 |
+
raise RuntimeError("Failed to load Haar Cascade")
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def predict_emotion(image):
|
| 20 |
+
"""
|
| 21 |
+
Predict emotion from an image.
|
| 22 |
+
|
| 23 |
+
Args:
|
| 24 |
+
image: PIL Image or numpy array
|
| 25 |
+
|
| 26 |
+
Returns:
|
| 27 |
+
annotated image and emotion prediction
|
| 28 |
+
"""
|
| 29 |
+
if image is None:
|
| 30 |
+
return None, "No image provided"
|
| 31 |
+
|
| 32 |
+
# Convert PIL Image to numpy array if needed
|
| 33 |
+
if isinstance(image, np.ndarray):
|
| 34 |
+
frame = image
|
| 35 |
+
else:
|
| 36 |
+
frame = np.array(image)
|
| 37 |
+
|
| 38 |
+
# Convert RGB to BGR for OpenCV
|
| 39 |
+
if len(frame.shape) == 3 and frame.shape[2] == 3:
|
| 40 |
+
frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
|
| 41 |
+
else:
|
| 42 |
+
frame_bgr = frame
|
| 43 |
+
|
| 44 |
+
# Convert to grayscale for face detection
|
| 45 |
+
gray = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2GRAY)
|
| 46 |
+
|
| 47 |
+
# Detect faces
|
| 48 |
+
detected = face_cascade.detectMultiScale(
|
| 49 |
+
gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30)
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
if len(detected) == 0:
|
| 53 |
+
return frame, "No face detected"
|
| 54 |
+
|
| 55 |
+
# Get the largest face
|
| 56 |
+
faces = [max(detected, key=lambda r: r[2]*r[3])]
|
| 57 |
+
|
| 58 |
+
# Process the face
|
| 59 |
+
output_frame = frame_bgr.copy()
|
| 60 |
+
emotions = []
|
| 61 |
+
|
| 62 |
+
for (x, y, w, h) in faces:
|
| 63 |
+
# Extract face region
|
| 64 |
+
face_rgb = cv2.cvtColor(frame_bgr[y:y+h, x:x+w], cv2.COLOR_BGR2RGB)
|
| 65 |
+
|
| 66 |
+
# Predict emotion
|
| 67 |
+
emotion = predictor.predict(face_rgb)
|
| 68 |
+
emotions.append(emotion)
|
| 69 |
+
|
| 70 |
+
# Draw rectangle and label
|
| 71 |
+
cv2.rectangle(output_frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
|
| 72 |
+
cv2.putText(
|
| 73 |
+
output_frame, emotion, (x, y - 10),
|
| 74 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
# Convert back to RGB for display
|
| 78 |
+
output_frame_rgb = cv2.cvtColor(output_frame, cv2.COLOR_BGR2RGB)
|
| 79 |
+
|
| 80 |
+
# Return annotated image and detected emotion
|
| 81 |
+
emotion_text = ", ".join(emotions) if emotions else "No emotion detected"
|
| 82 |
+
|
| 83 |
+
return output_frame_rgb, f"Detected emotion(s): {emotion_text}"
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
# Create Gradio interface
|
| 87 |
+
with gr.Blocks(title="Smilo😃 - Real-Time Emotion Detection") as demo:
|
| 88 |
+
gr.HTML("""
|
| 89 |
+
<div style="background: linear-gradient(90deg, #FF9933 0%, #D0B264 50%, #469F93 100%);
|
| 90 |
+
padding: 40px;
|
| 91 |
+
border-radius: 12px;
|
| 92 |
+
text-align: center;
|
| 93 |
+
color: white;
|
| 94 |
+
font-family: 'Helvetica Neue', Arial, sans-serif;
|
| 95 |
+
margin-bottom: 20px;">
|
| 96 |
+
<h1 style="color: white; margin: 0; margin-bottom: 10px; font-weight: 900; font-size: 3.5em; display: flex; align-items: center; justify-content: center; gap: 10px;">
|
| 97 |
+
Smilo <span style="font-size: 0.9em;">😃</span>
|
| 98 |
+
</h1>
|
| 99 |
+
<p style="color: #f0f0f0; font-size: 1.2em; margin: 0; font-weight: 400; letter-spacing: 0.5px;">Real-Time Emotion Detection powered by PyTorch</p>
|
| 100 |
+
</div>
|
| 101 |
+
""")
|
| 102 |
+
|
| 103 |
+
with gr.Row():
|
| 104 |
+
with gr.Column():
|
| 105 |
+
image_input = gr.Image(
|
| 106 |
+
label="Input Image",
|
| 107 |
+
type="pil",
|
| 108 |
+
sources=["upload", "webcam"]
|
| 109 |
+
)
|
| 110 |
+
submit_btn = gr.Button("Predict Emotion", variant="primary")
|
| 111 |
+
|
| 112 |
+
with gr.Column():
|
| 113 |
+
image_output = gr.Image(label="Annotated Image")
|
| 114 |
+
emotion_output = gr.Textbox(label="Prediction Result", interactive=False)
|
| 115 |
+
|
| 116 |
+
# Connect the function to the button
|
| 117 |
+
submit_btn.click(
|
| 118 |
+
fn=predict_emotion,
|
| 119 |
+
inputs=[image_input],
|
| 120 |
+
outputs=[image_output, emotion_output]
|
| 121 |
+
)
|
| 122 |
+
|
| 123 |
+
# Also run prediction when image is uploaded
|
| 124 |
+
image_input.change(
|
| 125 |
+
fn=predict_emotion,
|
| 126 |
+
inputs=[image_input],
|
| 127 |
+
outputs=[image_output, emotion_output]
|
| 128 |
+
)
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
if __name__ == "__main__":
|
| 132 |
+
demo.launch()
|
classes.pkl
ADDED
|
Binary file (76 Bytes). View file
|
|
|
face_classifier.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:14702d50b48f94ed581f5df763d9271981063ee7a0bae0b4826131a54c38a08c
|
| 3 |
+
size 67502533
|
main.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import cv2
|
| 2 |
+
from model import EmotionPredictor
|
| 3 |
+
|
| 4 |
+
cap = cv2.VideoCapture(0)
|
| 5 |
+
predictor = EmotionPredictor()
|
| 6 |
+
|
| 7 |
+
face_cascade = cv2.CascadeClassifier(
|
| 8 |
+
cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
|
| 9 |
+
)
|
| 10 |
+
|
| 11 |
+
if face_cascade.empty():
|
| 12 |
+
raise RuntimeError("Failed to load Haar Cascade")
|
| 13 |
+
|
| 14 |
+
FRAME_SKIP = 2
|
| 15 |
+
frame_count = 0
|
| 16 |
+
current_faces = []
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
while True:
|
| 20 |
+
ret, frame = cap.read()
|
| 21 |
+
if not ret:
|
| 22 |
+
break
|
| 23 |
+
|
| 24 |
+
frame_count += 1
|
| 25 |
+
|
| 26 |
+
if frame_count % FRAME_SKIP == 0:
|
| 27 |
+
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
| 28 |
+
detected = face_cascade.detectMultiScale(
|
| 29 |
+
gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30)
|
| 30 |
+
)
|
| 31 |
+
current_faces = []
|
| 32 |
+
if len(detected) > 0:
|
| 33 |
+
x, y, w, h = max(detected, key=lambda r: r[2]*r[3])
|
| 34 |
+
|
| 35 |
+
y1, y2 = max(0, y), min(frame.shape[0], y + h)
|
| 36 |
+
x1, x2 = max(0, x), min(frame.shape[1], x + w)
|
| 37 |
+
|
| 38 |
+
if y2 > y1 and x2 > x1:
|
| 39 |
+
face_rgb = cv2.cvtColor(frame[y1:y2, x1:x2], cv2.COLOR_BGR2RGB)
|
| 40 |
+
label = predictor.predict(face_rgb)
|
| 41 |
+
current_faces.append((x, y, w, h, label))
|
| 42 |
+
|
| 43 |
+
for (x, y, w, h, label) in current_faces:
|
| 44 |
+
cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)
|
| 45 |
+
cv2.putText(
|
| 46 |
+
frame, label, (x, y - 10),
|
| 47 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 255, 255), 2
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
cv2.imshow("Emotion Detection", frame)
|
| 51 |
+
|
| 52 |
+
if cv2.waitKey(1) & 0xFF == ord("q"):
|
| 53 |
+
break
|
| 54 |
+
if cv2.getWindowProperty("Emotion Detection", cv2.WND_PROP_VISIBLE) < 1:
|
| 55 |
+
break
|
| 56 |
+
|
| 57 |
+
cap.release()
|
| 58 |
+
cv2.destroyAllWindows()
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
|
model.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
import torch.nn.functional as F
|
| 4 |
+
import pickle
|
| 5 |
+
from torchvision import transforms
|
| 6 |
+
|
| 7 |
+
import numpy as np
|
| 8 |
+
from PIL import Image
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class FaceClassifier(nn.Module):
|
| 12 |
+
def __init__(self, num_classes):
|
| 13 |
+
super().__init__()
|
| 14 |
+
self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
|
| 15 |
+
self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
|
| 16 |
+
self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
|
| 17 |
+
self.pool = nn.MaxPool2d(2, 2)
|
| 18 |
+
|
| 19 |
+
self.dropout = nn.Dropout(0.1)
|
| 20 |
+
self.fc1 = nn.Linear(128 * 16 * 16, 512)
|
| 21 |
+
self.fc2 = nn.Linear(512, num_classes)
|
| 22 |
+
|
| 23 |
+
def forward(self, x):
|
| 24 |
+
x = self.pool(F.relu(self.conv1(x)))
|
| 25 |
+
x = self.pool(F.relu(self.conv2(x)))
|
| 26 |
+
x = self.pool(F.relu(self.conv3(x)))
|
| 27 |
+
x = x.view(-1, 128 * 16 * 16)
|
| 28 |
+
x = self.dropout(F.relu(self.fc1(x)))
|
| 29 |
+
x = self.fc2(x)
|
| 30 |
+
return x
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
class EmotionPredictor:
|
| 35 |
+
def __init__(self):
|
| 36 |
+
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 37 |
+
|
| 38 |
+
with open("classes.pkl", "rb") as f:
|
| 39 |
+
self.classes = pickle.load(f)
|
| 40 |
+
|
| 41 |
+
self.model = FaceClassifier(len(self.classes))
|
| 42 |
+
self.model.load_state_dict(
|
| 43 |
+
torch.load("face_classifier.pth", map_location=self.device)
|
| 44 |
+
)
|
| 45 |
+
self.model.to(self.device).eval()
|
| 46 |
+
|
| 47 |
+
self.transform = transforms.Compose([
|
| 48 |
+
transforms.Resize((128, 128)),
|
| 49 |
+
transforms.ToTensor(),
|
| 50 |
+
transforms.Normalize((0.5,), (0.5,))
|
| 51 |
+
])
|
| 52 |
+
@torch.inference_mode()
|
| 53 |
+
def predict(self, image_np: np.ndarray) -> str:
|
| 54 |
+
img = Image.fromarray(image_np)
|
| 55 |
+
tensor = self.transform(img).unsqueeze(0).to(self.device)
|
| 56 |
+
output = self.model(tensor)
|
| 57 |
+
return self.classes[output.argmax(1).item()]
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
|
requirements.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
torch>=2.1.0
|
| 2 |
+
torchvision>=0.16.0
|
| 3 |
+
numpy>=1.24.3
|
| 4 |
+
opencv-python-headless>=4.8.1
|
| 5 |
+
pillow>=10.0.0
|
| 6 |
+
matplotlib>=3.7.2
|
| 7 |
+
gradio>=5.15.0
|