ktejeshnaidu commited on
Commit
9c42577
·
verified ·
1 Parent(s): 82d5d02

Upload 7 files

Browse files
Files changed (8) hide show
  1. .gitattributes +1 -0
  2. README.md +68 -0
  3. app.py +132 -0
  4. classes.pkl +0 -0
  5. face_classifier.pth +3 -0
  6. main.py +61 -0
  7. model.py +60 -0
  8. requirements.txt +7 -0
.gitattributes ADDED
@@ -0,0 +1 @@
 
 
1
+ face_classifier.pth filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ title: Smilo
4
+ sdk: gradio
5
+ emoji: 😃
6
+ colorFrom: blue
7
+ colorTo: green
8
+ short_description: Real-Time Emotion Detection powered by PyTorch & OpenCV
9
+ sdk_version: 5.15.0
10
+ python_version: '3.10'
11
+ app_file: app.py
12
+ pinned: false
13
+ ---
14
+
15
+ # Smilo 😃
16
+ **Real-Time Emotion Detection powered by PyTorch & OpenCV**
17
+
18
+ Smilo is a lightweight, deep learning-based application that detects and classifies 7 facial emotions (Angry 😠, Disgust 😐, Fear 😨, Happy 😃, Neutral 🙂, Sad 😔, Surprise 😮) in real-time. It features both a local desktop interface and a beautiful web-based interactive demo!
19
+
20
+ ## 🚀 Getting Started
21
+
22
+ 1. **Clone the repository** and navigate into the project directory:
23
+ ```bash
24
+ git clone <repository-url>
25
+ cd Smilo
26
+ ```
27
+
28
+ 2. **Install the required dependencies**:
29
+ ```bash
30
+ pip install -r requirements.txt
31
+ ```
32
+
33
+ ## 🎮 How to Use
34
+
35
+ Smilo offers two distinct ways to interact with the emotion detection model:
36
+
37
+ ### 1. Web Interface (Recommended)
38
+ Run a beautifully designed web app powered by Gradio. This interface supports uploading photos, capturing snapshots, or streaming live video directly from your webcam.
39
+ ```bash
40
+ python app.py
41
+ ```
42
+ *After running, click the local link (e.g. `http://127.0.0.1:XXXX`) in your terminal to open it in your browser.*
43
+
44
+ ### 2. Desktop Application
45
+ Run the classic desktop script. This will instantly launch a video window using your webcam feed, drawing tracking boxes and emotion labels on detected faces.
46
+ ```bash
47
+ python main.py
48
+ ```
49
+ *Press `q` or click the 'X' button on the video window to quit.*
50
+
51
+ ## 🧠 Model & Architecture
52
+
53
+ - **Face Detection:** Uses OpenCV's optimized Haar Cascades for rapid and highly-efficient face tracking.
54
+ - **Emotion Recognition:** A custom 3-layer Convolutional Neural Network (CNN) built with PyTorch, trained on 128x128 resolution RGB images.
55
+ - **Performance:** System logic utilizes frame-skipping and concurrent processing optimizations to ensure video feeds maintain a lag-free 30+ FPS true 'live' experience.
56
+
57
+ ## 🛠️ Retraining the Model
58
+
59
+ If you wish to augment the model or train it from scratch:
60
+ 1. Ensure your dataset is prepared and sorted.
61
+ 2. Open and run the `Train_model.ipynb` Jupyter Notebook.
62
+ 3. The notebook will automatically guide you through data loading, transformation, model training, and exporting the updated inference weights (`face_classifier.pth`).
63
+
64
+ ## Notes
65
+ Ensure you have adequate lighting and a clear, frontal view of your face for the most accurate predictions!
66
+
67
+ ## License
68
+ See the LICENSE file for details.""
app.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import cv2
3
+ import numpy as np
4
+ from model import EmotionPredictor
5
+
6
+
7
+ # Initialize the predictor
8
+ predictor = EmotionPredictor()
9
+
10
+ # Load Haar Cascade for face detection
11
+ face_cascade = cv2.CascadeClassifier(
12
+ cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
13
+ )
14
+
15
+ if face_cascade.empty():
16
+ raise RuntimeError("Failed to load Haar Cascade")
17
+
18
+
19
+ def predict_emotion(image):
20
+ """
21
+ Predict emotion from an image.
22
+
23
+ Args:
24
+ image: PIL Image or numpy array
25
+
26
+ Returns:
27
+ annotated image and emotion prediction
28
+ """
29
+ if image is None:
30
+ return None, "No image provided"
31
+
32
+ # Convert PIL Image to numpy array if needed
33
+ if isinstance(image, np.ndarray):
34
+ frame = image
35
+ else:
36
+ frame = np.array(image)
37
+
38
+ # Convert RGB to BGR for OpenCV
39
+ if len(frame.shape) == 3 and frame.shape[2] == 3:
40
+ frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
41
+ else:
42
+ frame_bgr = frame
43
+
44
+ # Convert to grayscale for face detection
45
+ gray = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2GRAY)
46
+
47
+ # Detect faces
48
+ detected = face_cascade.detectMultiScale(
49
+ gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30)
50
+ )
51
+
52
+ if len(detected) == 0:
53
+ return frame, "No face detected"
54
+
55
+ # Get the largest face
56
+ faces = [max(detected, key=lambda r: r[2]*r[3])]
57
+
58
+ # Process the face
59
+ output_frame = frame_bgr.copy()
60
+ emotions = []
61
+
62
+ for (x, y, w, h) in faces:
63
+ # Extract face region
64
+ face_rgb = cv2.cvtColor(frame_bgr[y:y+h, x:x+w], cv2.COLOR_BGR2RGB)
65
+
66
+ # Predict emotion
67
+ emotion = predictor.predict(face_rgb)
68
+ emotions.append(emotion)
69
+
70
+ # Draw rectangle and label
71
+ cv2.rectangle(output_frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
72
+ cv2.putText(
73
+ output_frame, emotion, (x, y - 10),
74
+ cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2
75
+ )
76
+
77
+ # Convert back to RGB for display
78
+ output_frame_rgb = cv2.cvtColor(output_frame, cv2.COLOR_BGR2RGB)
79
+
80
+ # Return annotated image and detected emotion
81
+ emotion_text = ", ".join(emotions) if emotions else "No emotion detected"
82
+
83
+ return output_frame_rgb, f"Detected emotion(s): {emotion_text}"
84
+
85
+
86
+ # Create Gradio interface
87
+ with gr.Blocks(title="Smilo😃 - Real-Time Emotion Detection") as demo:
88
+ gr.HTML("""
89
+ <div style="background: linear-gradient(90deg, #FF9933 0%, #D0B264 50%, #469F93 100%);
90
+ padding: 40px;
91
+ border-radius: 12px;
92
+ text-align: center;
93
+ color: white;
94
+ font-family: 'Helvetica Neue', Arial, sans-serif;
95
+ margin-bottom: 20px;">
96
+ <h1 style="color: white; margin: 0; margin-bottom: 10px; font-weight: 900; font-size: 3.5em; display: flex; align-items: center; justify-content: center; gap: 10px;">
97
+ Smilo <span style="font-size: 0.9em;">😃</span>
98
+ </h1>
99
+ <p style="color: #f0f0f0; font-size: 1.2em; margin: 0; font-weight: 400; letter-spacing: 0.5px;">Real-Time Emotion Detection powered by PyTorch</p>
100
+ </div>
101
+ """)
102
+
103
+ with gr.Row():
104
+ with gr.Column():
105
+ image_input = gr.Image(
106
+ label="Input Image",
107
+ type="pil",
108
+ sources=["upload", "webcam"]
109
+ )
110
+ submit_btn = gr.Button("Predict Emotion", variant="primary")
111
+
112
+ with gr.Column():
113
+ image_output = gr.Image(label="Annotated Image")
114
+ emotion_output = gr.Textbox(label="Prediction Result", interactive=False)
115
+
116
+ # Connect the function to the button
117
+ submit_btn.click(
118
+ fn=predict_emotion,
119
+ inputs=[image_input],
120
+ outputs=[image_output, emotion_output]
121
+ )
122
+
123
+ # Also run prediction when image is uploaded
124
+ image_input.change(
125
+ fn=predict_emotion,
126
+ inputs=[image_input],
127
+ outputs=[image_output, emotion_output]
128
+ )
129
+
130
+
131
+ if __name__ == "__main__":
132
+ demo.launch()
classes.pkl ADDED
Binary file (76 Bytes). View file
 
face_classifier.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14702d50b48f94ed581f5df763d9271981063ee7a0bae0b4826131a54c38a08c
3
+ size 67502533
main.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ from model import EmotionPredictor
3
+
4
+ cap = cv2.VideoCapture(0)
5
+ predictor = EmotionPredictor()
6
+
7
+ face_cascade = cv2.CascadeClassifier(
8
+ cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
9
+ )
10
+
11
+ if face_cascade.empty():
12
+ raise RuntimeError("Failed to load Haar Cascade")
13
+
14
+ FRAME_SKIP = 2
15
+ frame_count = 0
16
+ current_faces = []
17
+
18
+
19
+ while True:
20
+ ret, frame = cap.read()
21
+ if not ret:
22
+ break
23
+
24
+ frame_count += 1
25
+
26
+ if frame_count % FRAME_SKIP == 0:
27
+ gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
28
+ detected = face_cascade.detectMultiScale(
29
+ gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30)
30
+ )
31
+ current_faces = []
32
+ if len(detected) > 0:
33
+ x, y, w, h = max(detected, key=lambda r: r[2]*r[3])
34
+
35
+ y1, y2 = max(0, y), min(frame.shape[0], y + h)
36
+ x1, x2 = max(0, x), min(frame.shape[1], x + w)
37
+
38
+ if y2 > y1 and x2 > x1:
39
+ face_rgb = cv2.cvtColor(frame[y1:y2, x1:x2], cv2.COLOR_BGR2RGB)
40
+ label = predictor.predict(face_rgb)
41
+ current_faces.append((x, y, w, h, label))
42
+
43
+ for (x, y, w, h, label) in current_faces:
44
+ cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)
45
+ cv2.putText(
46
+ frame, label, (x, y - 10),
47
+ cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 255, 255), 2
48
+ )
49
+
50
+ cv2.imshow("Emotion Detection", frame)
51
+
52
+ if cv2.waitKey(1) & 0xFF == ord("q"):
53
+ break
54
+ if cv2.getWindowProperty("Emotion Detection", cv2.WND_PROP_VISIBLE) < 1:
55
+ break
56
+
57
+ cap.release()
58
+ cv2.destroyAllWindows()
59
+
60
+
61
+
model.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+ import pickle
5
+ from torchvision import transforms
6
+
7
+ import numpy as np
8
+ from PIL import Image
9
+
10
+
11
+ class FaceClassifier(nn.Module):
12
+ def __init__(self, num_classes):
13
+ super().__init__()
14
+ self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
15
+ self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
16
+ self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
17
+ self.pool = nn.MaxPool2d(2, 2)
18
+
19
+ self.dropout = nn.Dropout(0.1)
20
+ self.fc1 = nn.Linear(128 * 16 * 16, 512)
21
+ self.fc2 = nn.Linear(512, num_classes)
22
+
23
+ def forward(self, x):
24
+ x = self.pool(F.relu(self.conv1(x)))
25
+ x = self.pool(F.relu(self.conv2(x)))
26
+ x = self.pool(F.relu(self.conv3(x)))
27
+ x = x.view(-1, 128 * 16 * 16)
28
+ x = self.dropout(F.relu(self.fc1(x)))
29
+ x = self.fc2(x)
30
+ return x
31
+
32
+
33
+
34
+ class EmotionPredictor:
35
+ def __init__(self):
36
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
37
+
38
+ with open("classes.pkl", "rb") as f:
39
+ self.classes = pickle.load(f)
40
+
41
+ self.model = FaceClassifier(len(self.classes))
42
+ self.model.load_state_dict(
43
+ torch.load("face_classifier.pth", map_location=self.device)
44
+ )
45
+ self.model.to(self.device).eval()
46
+
47
+ self.transform = transforms.Compose([
48
+ transforms.Resize((128, 128)),
49
+ transforms.ToTensor(),
50
+ transforms.Normalize((0.5,), (0.5,))
51
+ ])
52
+ @torch.inference_mode()
53
+ def predict(self, image_np: np.ndarray) -> str:
54
+ img = Image.fromarray(image_np)
55
+ tensor = self.transform(img).unsqueeze(0).to(self.device)
56
+ output = self.model(tensor)
57
+ return self.classes[output.argmax(1).item()]
58
+
59
+
60
+
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ torch>=2.1.0
2
+ torchvision>=0.16.0
3
+ numpy>=1.24.3
4
+ opencv-python-headless>=4.8.1
5
+ pillow>=10.0.0
6
+ matplotlib>=3.7.2
7
+ gradio>=5.15.0