Spaces:
Running
Running
Upload 2 files
Browse files
main.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import cv2
|
| 2 |
+
from model import EmotionPredictor
|
| 3 |
+
|
| 4 |
+
cap = cv2.VideoCapture(0)
|
| 5 |
+
predictor = EmotionPredictor()
|
| 6 |
+
|
| 7 |
+
face_cascade = cv2.CascadeClassifier(
|
| 8 |
+
cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
|
| 9 |
+
)
|
| 10 |
+
|
| 11 |
+
if face_cascade.empty():
|
| 12 |
+
raise RuntimeError("Failed to load Haar Cascade")
|
| 13 |
+
|
| 14 |
+
FRAME_SKIP = 2
|
| 15 |
+
frame_count = 0
|
| 16 |
+
current_faces = []
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
while True:
|
| 20 |
+
ret, frame = cap.read()
|
| 21 |
+
if not ret:
|
| 22 |
+
break
|
| 23 |
+
|
| 24 |
+
frame_count += 1
|
| 25 |
+
|
| 26 |
+
if frame_count % FRAME_SKIP == 0:
|
| 27 |
+
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
| 28 |
+
detected = face_cascade.detectMultiScale(
|
| 29 |
+
gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30)
|
| 30 |
+
)
|
| 31 |
+
current_faces = []
|
| 32 |
+
if len(detected) > 0:
|
| 33 |
+
x, y, w, h = max(detected, key=lambda r: r[2]*r[3])
|
| 34 |
+
|
| 35 |
+
y1, y2 = max(0, y), min(frame.shape[0], y + h)
|
| 36 |
+
x1, x2 = max(0, x), min(frame.shape[1], x + w)
|
| 37 |
+
|
| 38 |
+
if y2 > y1 and x2 > x1:
|
| 39 |
+
face_rgb = cv2.cvtColor(frame[y1:y2, x1:x2], cv2.COLOR_BGR2RGB)
|
| 40 |
+
label = predictor.predict(face_rgb)
|
| 41 |
+
current_faces.append((x, y, w, h, label))
|
| 42 |
+
|
| 43 |
+
for (x, y, w, h, label) in current_faces:
|
| 44 |
+
cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)
|
| 45 |
+
cv2.putText(
|
| 46 |
+
frame, label, (x, y - 10),
|
| 47 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 255, 255), 2
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
cv2.imshow("Emotion Detection", frame)
|
| 51 |
+
|
| 52 |
+
if cv2.waitKey(1) & 0xFF == ord("q"):
|
| 53 |
+
break
|
| 54 |
+
if cv2.getWindowProperty("Emotion Detection", cv2.WND_PROP_VISIBLE) < 1:
|
| 55 |
+
break
|
| 56 |
+
|
| 57 |
+
cap.release()
|
| 58 |
+
cv2.destroyAllWindows()
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
|
model.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
import torch.nn.functional as F
|
| 4 |
+
import pickle
|
| 5 |
+
from torchvision import transforms
|
| 6 |
+
|
| 7 |
+
import numpy as np
|
| 8 |
+
from PIL import Image
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class FaceClassifier(nn.Module):
|
| 12 |
+
def __init__(self, num_classes):
|
| 13 |
+
super().__init__()
|
| 14 |
+
self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
|
| 15 |
+
self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
|
| 16 |
+
self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
|
| 17 |
+
self.pool = nn.MaxPool2d(2, 2)
|
| 18 |
+
|
| 19 |
+
self.dropout = nn.Dropout(0.1)
|
| 20 |
+
self.fc1 = nn.Linear(128 * 16 * 16, 512)
|
| 21 |
+
self.fc2 = nn.Linear(512, num_classes)
|
| 22 |
+
|
| 23 |
+
def forward(self, x):
|
| 24 |
+
x = self.pool(F.relu(self.conv1(x)))
|
| 25 |
+
x = self.pool(F.relu(self.conv2(x)))
|
| 26 |
+
x = self.pool(F.relu(self.conv3(x)))
|
| 27 |
+
x = x.view(-1, 128 * 16 * 16)
|
| 28 |
+
x = self.dropout(F.relu(self.fc1(x)))
|
| 29 |
+
x = self.fc2(x)
|
| 30 |
+
return x
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
class EmotionPredictor:
|
| 35 |
+
def __init__(self):
|
| 36 |
+
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 37 |
+
|
| 38 |
+
with open("classes.pkl", "rb") as f:
|
| 39 |
+
self.classes = pickle.load(f)
|
| 40 |
+
|
| 41 |
+
self.model = FaceClassifier(len(self.classes))
|
| 42 |
+
self.model.load_state_dict(
|
| 43 |
+
torch.load("face_classifier.pth", map_location=self.device)
|
| 44 |
+
)
|
| 45 |
+
self.model.to(self.device).eval()
|
| 46 |
+
|
| 47 |
+
self.transform = transforms.Compose([
|
| 48 |
+
transforms.Resize((128, 128)),
|
| 49 |
+
transforms.ToTensor(),
|
| 50 |
+
transforms.Normalize((0.5,), (0.5,))
|
| 51 |
+
])
|
| 52 |
+
@torch.inference_mode()
|
| 53 |
+
def predict(self, image_np: np.ndarray) -> str:
|
| 54 |
+
img = Image.fromarray(image_np)
|
| 55 |
+
tensor = self.transform(img).unsqueeze(0).to(self.device)
|
| 56 |
+
output = self.model(tensor)
|
| 57 |
+
return self.classes[output.argmax(1).item()]
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
|