| |
| |
| |
| |
| |
|
|
| import gradio as gr |
| import torch |
| from transformers import ViTForImageClassification, ViTImageProcessor |
| from PIL import Image |
| import numpy as np |
| import os |
|
|
| print("="*70) |
| print("π AI EMOTION DETECTOR - INITIALIZING") |
| print("="*70) |
|
|
| |
| MODEL_ID = "koyelog/face" |
| DEVICE = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
| print(f"\nπ¦ Model ID: {MODEL_ID}") |
| print(f"π₯οΈ Device: {DEVICE}") |
| print(f"πΎ PyTorch Version: {torch.__version__}") |
|
|
| |
| print("\nβ³ Loading model from HuggingFace...") |
|
|
| try: |
| model = ViTForImageClassification.from_pretrained( |
| MODEL_ID, |
| cache_dir="./model_cache" |
| ) |
| processor = ViTImageProcessor.from_pretrained( |
| MODEL_ID, |
| cache_dir="./model_cache" |
| ) |
| model.to(DEVICE) |
| model.eval() |
| print("β
Model loaded successfully!") |
| print(f"π Model Parameters: {sum(p.numel() for p in model.parameters()):,}") |
| |
| except Exception as e: |
| print(f"β ERROR loading model: {e}") |
| raise |
|
|
| |
| EMOTIONS = { |
| 0: {'name': 'Angry', 'emoji': 'π ', 'color': '#ff4444', 'description': 'Showing anger or frustration'}, |
| 1: {'name': 'Disgust', 'emoji': 'π€’', 'color': '#44ff44', 'description': 'Expressing disgust or dislike'}, |
| 2: {'name': 'Fear', 'emoji': 'π¨', 'color': '#9944ff', 'description': 'Showing fear or anxiety'}, |
| 3: {'name': 'Happy', 'emoji': 'π', 'color': '#ffdd44', 'description': 'Expressing happiness or joy'}, |
| 4: {'name': 'Sad', 'emoji': 'π’', 'color': '#4444ff', 'description': 'Showing sadness or sorrow'}, |
| 5: {'name': 'Surprise', 'emoji': 'π²', 'color': '#ff44ff', 'description': 'Expressing surprise or shock'}, |
| 6: {'name': 'Neutral', 'emoji': 'π', 'color': '#888888', 'description': 'No strong emotion detected'} |
| } |
|
|
| print(f"\nπ Loaded {len(EMOTIONS)} emotion classes:") |
| for idx, emo in EMOTIONS.items(): |
| print(f" {idx}: {emo['emoji']} {emo['name']}") |
|
|
| |
| @torch.no_grad() |
| def predict_emotion(image): |
| """ |
| Predict emotion from image |
| Args: |
| image: PIL Image or numpy array |
| Returns: |
| results_dict: Dictionary for Gradio Label |
| html_output: Formatted HTML result |
| """ |
| |
| if image is None: |
| return None, """ |
| <div style='text-align: center; padding: 40px; color: #ff4444;'> |
| <h2>β οΈ No Image Provided</h2> |
| <p>Please upload an image or use webcam to capture!</p> |
| </div> |
| """ |
| |
| try: |
| |
| if isinstance(image, np.ndarray): |
| image = Image.fromarray(image) |
| |
| |
| if image.mode != 'RGB': |
| image = image.convert('RGB') |
| |
| original_size = image.size |
| print(f"\nπΈ Processing image: {original_size[0]}x{original_size[1]}") |
| |
| |
| inputs = processor(images=image, return_tensors="pt") |
| inputs = {k: v.to(DEVICE) for k, v in inputs.items()} |
| |
| |
| outputs = model(**inputs) |
| logits = outputs.logits |
| probs = torch.nn.functional.softmax(logits, dim=-1)[0].cpu() |
| |
| |
| predicted_id = torch.argmax(probs).item() |
| confidence = probs[predicted_id].item() |
| |
| |
| emotion = EMOTIONS[predicted_id] |
| |
| print(f"π― Prediction: {emotion['emoji']} {emotion['name']}") |
| print(f"π Confidence: {confidence*100:.2f}%") |
| print(f"π Top 3 emotions:") |
| top3_indices = torch.topk(probs, 3).indices |
| for idx in top3_indices: |
| print(f" {EMOTIONS[idx.item()]['emoji']} {EMOTIONS[idx.item()]['name']}: {probs[idx]*100:.2f}%") |
| |
| |
| results = { |
| f"{EMOTIONS[i]['emoji']} {EMOTIONS[i]['name']}": float(probs[i]) |
| for i in range(len(EMOTIONS)) |
| } |
| |
| |
| html = generate_result_html( |
| emotion['name'], |
| emotion['emoji'], |
| emotion['color'], |
| emotion['description'], |
| confidence, |
| probs |
| ) |
| |
| return results, html |
| |
| except Exception as e: |
| print(f"β ERROR during prediction: {e}") |
| import traceback |
| traceback.print_exc() |
| |
| error_html = f""" |
| <div style='text-align: center; padding: 40px; background: #ffe6e6; border-radius: 15px;'> |
| <h2 style='color: #ff4444;'>β Prediction Error</h2> |
| <p style='color: #666;'>{str(e)}</p> |
| <p style='color: #999; font-size: 0.9em;'>Please try a different image</p> |
| </div> |
| """ |
| return None, error_html |
|
|
| |
| def generate_result_html(name, emoji, color, description, confidence, probs): |
| """Generate beautiful HTML result display""" |
| |
| |
| bars_html = "" |
| for idx in sorted(range(len(EMOTIONS)), key=lambda i: probs[i], reverse=True): |
| emo = EMOTIONS[idx] |
| prob = probs[idx].item() |
| percentage = prob * 100 |
| bar_width = min(percentage, 100) |
| |
| bars_html += f""" |
| <div style='margin: 12px 0;'> |
| <div style='display: flex; justify-content: space-between; align-items: center; margin-bottom: 6px;'> |
| <div style='display: flex; align-items: center; gap: 10px;'> |
| <span style='font-size: 1.8em;'>{emo['emoji']}</span> |
| <span style='font-weight: 600; color: #333;'>{emo['name']}</span> |
| </div> |
| <span style='font-weight: 700; color: {emo['color']}; font-size: 1.1em;'>{percentage:.1f}%</span> |
| </div> |
| <div style='width: 100%; background: #e9ecef; border-radius: 10px; height: 12px; overflow: hidden; box-shadow: inset 0 2px 4px rgba(0,0,0,0.06);'> |
| <div style='width: {bar_width}%; background: linear-gradient(90deg, {emo['color']}, {emo['color']}dd); height: 100%; transition: width 0.8s cubic-bezier(0.4, 0, 0.2, 1); border-radius: 10px;'></div> |
| </div> |
| </div> |
| """ |
| |
| |
| html = f""" |
| <div style='font-family: "Segoe UI", -apple-system, BlinkMacSystemFont, sans-serif; max-width: 100%;'> |
| |
| <!-- Main Result Card --> |
| <div style=' |
| text-align: center; |
| padding: 50px 30px; |
| background: linear-gradient(135deg, {color}18 0%, {color}30 100%); |
| border-radius: 25px; |
| box-shadow: 0 10px 40px rgba(0,0,0,0.12); |
| margin-bottom: 30px; |
| border: 2px solid {color}40; |
| '> |
| <div style=' |
| font-size: 120px; |
| margin: 0 0 20px 0; |
| animation: bounceIn 0.8s cubic-bezier(0.68, -0.55, 0.265, 1.55); |
| display: inline-block; |
| '> |
| {emoji} |
| </div> |
| |
| <h1 style=' |
| color: {color}; |
| font-size: 3.5em; |
| margin: 20px 0 10px 0; |
| font-weight: 800; |
| text-shadow: 2px 2px 8px rgba(0,0,0,0.1); |
| letter-spacing: -1px; |
| '> |
| {name} |
| </h1> |
| |
| <p style=' |
| font-size: 1.3em; |
| color: #555; |
| margin: 15px 0; |
| font-weight: 500; |
| '> |
| {description} |
| </p> |
| |
| <div style=' |
| display: inline-flex; |
| align-items: center; |
| gap: 15px; |
| margin: 25px 0; |
| padding: 15px 35px; |
| background: white; |
| border-radius: 50px; |
| box-shadow: 0 4px 20px rgba(0,0,0,0.1); |
| '> |
| <span style='font-size: 1.2em; color: #666;'>Confidence:</span> |
| <span style='font-size: 2em; font-weight: 800; color: {color};'>{confidence*100:.1f}%</span> |
| </div> |
| |
| <!-- Animated Confidence Bar --> |
| <div style=' |
| width: 100%; |
| max-width: 500px; |
| height: 50px; |
| background: #e9ecef; |
| border-radius: 25px; |
| overflow: hidden; |
| margin: 30px auto 0; |
| box-shadow: inset 0 4px 8px rgba(0,0,0,0.1); |
| position: relative; |
| '> |
| <div style=' |
| width: {confidence*100}%; |
| height: 100%; |
| background: linear-gradient(90deg, {color}, {color}cc); |
| border-radius: 25px; |
| transition: width 1.5s cubic-bezier(0.4, 0, 0.2, 1); |
| display: flex; |
| align-items: center; |
| justify-content: center; |
| box-shadow: 0 0 20px {color}80; |
| '> |
| <span style=' |
| color: white; |
| font-weight: 800; |
| font-size: 1.3em; |
| text-shadow: 0 2px 4px rgba(0,0,0,0.3); |
| '> |
| {confidence*100:.1f}% |
| </span> |
| </div> |
| </div> |
| </div> |
| |
| <!-- Detailed Breakdown --> |
| <div style=' |
| background: white; |
| padding: 35px; |
| border-radius: 20px; |
| box-shadow: 0 8px 32px rgba(0,0,0,0.08); |
| border: 1px solid #e9ecef; |
| '> |
| <h2 style=' |
| margin: 0 0 25px 0; |
| color: #333; |
| font-size: 1.8em; |
| font-weight: 700; |
| display: flex; |
| align-items: center; |
| gap: 10px; |
| '> |
| π Detailed Emotion Analysis |
| </h2> |
| |
| {bars_html} |
| </div> |
| |
| <!-- Model Info Footer --> |
| <div style=' |
| margin-top: 25px; |
| padding: 20px; |
| background: linear-gradient(135deg, #f8f9fa, #e9ecef); |
| border-radius: 15px; |
| text-align: center; |
| font-size: 0.9em; |
| color: #666; |
| '> |
| <p style='margin: 5px 0;'> |
| <strong>Model:</strong> koyelog/face (Vision Transformer) | |
| <strong>Accuracy:</strong> 98.80% | |
| <strong>Parameters:</strong> 85.8M |
| </p> |
| </div> |
| </div> |
| |
| <style> |
| @keyframes bounceIn {{ |
| 0% {{ |
| opacity: 0; |
| transform: scale(0.3) translateY(-50px); |
| }} |
| 50% {{ |
| opacity: 1; |
| transform: scale(1.05); |
| }} |
| 70% {{ |
| transform: scale(0.9); |
| }} |
| 100% {{ |
| transform: scale(1); |
| }} |
| }} |
| </style> |
| """ |
| |
| return html |
|
|
| |
| print("\nπ¨ Building Gradio interface...") |
|
|
| |
| custom_css = """ |
| .gradio-container { |
| font-family: 'Segoe UI', -apple-system, BlinkMacSystemFont, sans-serif !important; |
| max-width: 1400px !important; |
| } |
| |
| .main-header { |
| text-align: center; |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); |
| color: white; |
| padding: 60px 30px; |
| border-radius: 25px; |
| margin-bottom: 40px; |
| box-shadow: 0 15px 50px rgba(102, 126, 234, 0.3); |
| } |
| |
| .tab-nav button { |
| font-size: 18px !important; |
| font-weight: 600 !important; |
| padding: 18px 30px !important; |
| } |
| |
| .gr-button-primary { |
| background: linear-gradient(135deg, #667eea, #764ba2) !important; |
| border: none !important; |
| font-size: 18px !important; |
| font-weight: 600 !important; |
| padding: 16px 40px !important; |
| border-radius: 12px !important; |
| transition: all 0.3s ease !important; |
| } |
| |
| .gr-button-primary:hover { |
| transform: translateY(-2px) !important; |
| box-shadow: 0 8px 25px rgba(102, 126, 234, 0.4) !important; |
| } |
| |
| footer { |
| visibility: hidden !important; |
| } |
| """ |
|
|
| |
| with gr.Blocks( |
| theme=gr.themes.Soft( |
| primary_hue="purple", |
| secondary_hue="pink", |
| font=gr.themes.GoogleFont("Inter") |
| ), |
| css=custom_css, |
| title="π AI Emotion Detector | koyelog", |
| analytics_enabled=False |
| ) as demo: |
| |
| |
| gr.HTML(""" |
| <div class="main-header"> |
| <h1 style='font-size: 4em; margin: 0; font-weight: 900; text-shadow: 3px 3px 6px rgba(0,0,0,0.2);'> |
| π AI Emotion Detector |
| </h1> |
| <p style='font-size: 1.5em; margin: 20px 0 10px; opacity: 0.95; font-weight: 500;'> |
| Powered by Vision Transformer | 98.80% Validation Accuracy |
| </p> |
| <p style='font-size: 1.1em; opacity: 0.85;'> |
| Model: <strong>koyelog/face</strong> | 85.8M Parameters | Real-time Detection |
| </p> |
| <div style='margin-top: 20px; display: flex; gap: 15px; justify-content: center; flex-wrap: wrap;'> |
| <span style='background: rgba(255,255,255,0.25); padding: 10px 25px; border-radius: 25px; backdrop-filter: blur(10px);'> |
| π Angry |
| </span> |
| <span style='background: rgba(255,255,255,0.25); padding: 10px 25px; border-radius: 25px; backdrop-filter: blur(10px);'> |
| π€’ Disgust |
| </span> |
| <span style='background: rgba(255,255,255,0.25); padding: 10px 25px; border-radius: 25px; backdrop-filter: blur(10px);'> |
| π¨ Fear |
| </span> |
| <span style='background: rgba(255,255,255,0.25); padding: 10px 25px; border-radius: 25px; backdrop-filter: blur(10px);'> |
| π Happy |
| </span> |
| <span style='background: rgba(255,255,255,0.25); padding: 10px 25px; border-radius: 25px; backdrop-filter: blur(10px);'> |
| π’ Sad |
| </span> |
| <span style='background: rgba(255,255,255,0.25); padding: 10px 25px; border-radius: 25px; backdrop-filter: blur(10px);'> |
| π² Surprise |
| </span> |
| <span style='background: rgba(255,255,255,0.25); padding: 10px 25px; border-radius: 25px; backdrop-filter: blur(10px);'> |
| π Neutral |
| </span> |
| </div> |
| </div> |
| """) |
| |
| with gr.Tabs(): |
| |
| |
| with gr.Tab("πΉ Live Webcam Detection"): |
| gr.Markdown(""" |
| ### π₯ Capture Your Emotion in Real-Time |
| Click the camera button to capture your face and instantly detect your emotion! |
| """) |
| |
| with gr.Row(): |
| with gr.Column(scale=1): |
| webcam_input = gr.Image( |
| sources=["webcam"], |
| type="pil", |
| label="πΈ Your Face", |
| streaming=False, |
| mirror_webcam=True |
| ) |
| webcam_button = gr.Button( |
| "π Detect My Emotion", |
| variant="primary", |
| size="lg", |
| scale=1 |
| ) |
| |
| with gr.Column(scale=1): |
| webcam_html = gr.HTML(label="π― Emotion Result") |
| webcam_label = gr.Label( |
| label="π Emotion Probabilities", |
| num_top_classes=7 |
| ) |
| |
| webcam_button.click( |
| fn=predict_emotion, |
| inputs=webcam_input, |
| outputs=[webcam_label, webcam_html] |
| ) |
| |
| |
| with gr.Tab("πΌοΈ Upload Image"): |
| gr.Markdown(""" |
| ### π€ Upload or Drag & Drop Face Image |
| Supports JPG, PNG, JPEG formats. Best results with front-facing, well-lit photos! |
| """) |
| |
| with gr.Row(): |
| with gr.Column(scale=1): |
| image_input = gr.Image( |
| type="pil", |
| label="πΌοΈ Upload Face Image", |
| sources=["upload", "clipboard"] |
| ) |
| image_button = gr.Button( |
| "π Detect Emotion", |
| variant="primary", |
| size="lg" |
| ) |
| |
| with gr.Column(scale=1): |
| image_html = gr.HTML(label="π― Emotion Result") |
| image_label = gr.Label( |
| label="π Emotion Probabilities", |
| num_top_classes=7 |
| ) |
| |
| image_button.click( |
| fn=predict_emotion, |
| inputs=image_input, |
| outputs=[image_label, image_html] |
| ) |
| |
| |
| gr.HTML(""" |
| <div style=' |
| text-align: center; |
| margin-top: 60px; |
| padding: 50px 30px; |
| background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%); |
| border-radius: 25px; |
| box-shadow: 0 8px 32px rgba(0,0,0,0.08); |
| '> |
| <h2 style='color: #333; margin-bottom: 30px; font-size: 2em;'> |
| π Model Information |
| </h2> |
| |
| <div style=' |
| display: grid; |
| grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); |
| gap: 25px; |
| margin: 30px 0; |
| '> |
| <div style='background: white; padding: 25px; border-radius: 15px; box-shadow: 0 4px 16px rgba(0,0,0,0.06);'> |
| <p style='font-weight: 700; color: #667eea; font-size: 1.1em; margin-bottom: 10px;'>Model ID</p> |
| <p style='font-size: 1.2em; color: #333; font-weight: 600;'>koyelog/face</p> |
| </div> |
| <div style='background: white; padding: 25px; border-radius: 15px; box-shadow: 0 4px 16px rgba(0,0,0,0.06);'> |
| <p style='font-weight: 700; color: #667eea; font-size: 1.1em; margin-bottom: 10px;'>Architecture</p> |
| <p style='font-size: 1.2em; color: #333; font-weight: 600;'>Vision Transformer (ViT)</p> |
| </div> |
| <div style='background: white; padding: 25px; border-radius: 15px; box-shadow: 0 4px 16px rgba(0,0,0,0.06);'> |
| <p style='font-weight: 700; color: #667eea; font-size: 1.1em; margin-bottom: 10px;'>Parameters</p> |
| <p style='font-size: 1.2em; color: #333; font-weight: 600;'>85.8 Million</p> |
| </div> |
| <div style='background: white; padding: 25px; border-radius: 15px; box-shadow: 0 4px 16px rgba(0,0,0,0.06);'> |
| <p style='font-weight: 700; color: #667eea; font-size: 1.1em; margin-bottom: 10px;'>Accuracy</p> |
| <p style='font-size: 1.2em; color: #333; font-weight: 600;'>Train: 99.29% | Val: 98.80%</p> |
| </div> |
| </div> |
| |
| <div style='margin: 30px 0; padding: 25px; background: white; border-radius: 15px; box-shadow: 0 4px 16px rgba(0,0,0,0.06);'> |
| <p style='font-weight: 700; color: #333; font-size: 1.3em; margin-bottom: 15px;'> |
| Training Details |
| </p> |
| <p style='color: #666; font-size: 1.05em; line-height: 1.6;'> |
| <strong>Dataset:</strong> 181,230 images across 7 emotion categories<br> |
| <strong>Training Epochs:</strong> 20 epochs with dual T4 GPUs<br> |
| <strong>Best Epoch:</strong> Epoch 20/20 (Val Acc: 98.80%)<br> |
| <strong>License:</strong> MIT License |
| </p> |
| </div> |
| |
| <p style='color: #666; font-size: 1.05em; margin-top: 30px; line-height: 1.6;'> |
| β οΈ <strong>Best Results:</strong> Front-facing photos | Good lighting | Single face | Clear expressions |
| </p> |
| |
| <p style='color: #999; font-size: 0.95em; margin-top: 30px;'> |
| Created by <strong style='color: #667eea;'>Koyeliya Ghosh</strong><br> |
| <a href='https://huggingface.co/koyelog/face' target='_blank' style='color: #667eea; font-weight: 600;'> |
| View Model on HuggingFace β |
| </a> |
| </p> |
| </div> |
| """) |
|
|
| |
| if __name__ == "__main__": |
| print("\n" + "="*70) |
| print("π LAUNCHING EMOTION DETECTION APP") |
| print("="*70) |
| print("β
Model loaded and ready") |
| print("β
Gradio interface built") |
| print("β
Starting server...\n") |
| |
| demo.launch( |
| server_name="0.0.0.0", |
| server_port=7860, |
| share=False, |
| show_error=True, |
| show_api=True |
| ) |
|
|