Spaces:

Rishi2455
/

Human-Activity-Recognition

Running

File size: 6,513 Bytes

"""
🏃 Human Activity Recognition — Gradio Demo
Fine-tuned MobileNetV2 classifying 15 human activities from images.
Model: Rishi2455/Human-Activity-Recognition
"""

import os
import gradio as gr
import tensorflow as tf
import numpy as np
from PIL import Image
from huggingface_hub import hf_hub_download

# ── Configuration ────────────────────────────────────────────────────────────
MODEL_REPO = "Rishi2455/Human-Activity-Recognition"
MODEL_FILE = "mobilenetv2_finetuned.h5"
IMG_SIZE = (224, 224)

CLASS_NAMES = [
    "Calling", "Clapping", "Cycling", "Dancing", "Drinking",
    "Eating", "Fighting", "Hugging", "Laughing", "Listening to Music",
    "Running", "Sitting", "Sleeping", "Texting", "Using Laptop",
]

ACTIVITY_EMOJI = {
    "Calling": "📞", "Clapping": "👏", "Cycling": "🚴", "Dancing": "💃",
    "Drinking": "🥤", "Eating": "🍽️", "Fighting": "🥊", "Hugging": "🤗",
    "Laughing": "😂", "Listening to Music": "🎧", "Running": "🏃",
    "Sitting": "🪑", "Sleeping": "😴", "Texting": "📱", "Using Laptop": "💻",
}

# ── Download & load model ───────────────────────────────────────────────────
print("⬇️  Downloading model...")
model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
print("🔧  Loading model...")
model = tf.keras.models.load_model(model_path, compile=False)
print("✅  Model loaded!")

# ── Example images (baked into the repo under examples/) ────────────────────
EXAMPLE_DIR = "examples"
EXAMPLE_FILES = [
    "calling.jpg", "clapping.jpg", "cycling.jpg", "dancing.jpg",
    "drinking.jpg", "eating.jpg", "fighting.jpg", "hugging.jpg",
    "laughing.jpg", "listening_to_music.jpg", "running.jpg",
    "sitting.jpg", "sleeping.jpg", "texting.jpg", "using_laptop.jpg",
]
example_paths = [
    os.path.join(EXAMPLE_DIR, f)
    for f in EXAMPLE_FILES
    if os.path.exists(os.path.join(EXAMPLE_DIR, f))
]
print(f"📸  Found {len(example_paths)} example images.")

# ── Inference ────────────────────────────────────────────────────────────────
def predict(pil_img: Image.Image) -> dict:
    """Classify a human activity from an image."""
    if pil_img is None:
        return {}
    img = pil_img.convert("RGB").resize(IMG_SIZE)
    arr = np.expand_dims(np.array(img, dtype=np.float32), axis=0)
    arr = tf.keras.applications.mobilenet_v2.preprocess_input(arr)
    preds = model.predict(arr, verbose=0)[0]
    emoji_labels = {
        f"{ACTIVITY_EMOJI.get(c, '')} {c}": float(preds[i])
        for i, c in enumerate(CLASS_NAMES)
    }
    return emoji_labels

def clear_all():
    """Reset both image and predictions."""
    return None, None

# ── Gradio UI ────────────────────────────────────────────────────────────────
DESCRIPTION = """
Upload a photo of a person performing an activity, and the model will predict which of **15 activities** they are doing.

**Supported activities:** Calling · Clapping · Cycling · Dancing · Drinking · Eating · Fighting · Hugging · Laughing · Listening to Music · Running · Sitting · Sleeping · Texting · Using Laptop

**Model:** [MobileNetV2](https://huggingface.co/Rishi2455/Human-Activity-Recognition) fine-tuned on the [Human Action Recognition dataset](https://huggingface.co/datasets/Bingsu/Human_Action_Recognition)
"""

css = """
.main-header { text-align: center; margin-bottom: 0.5rem; }
.main-header h1 { font-size: 2.2rem; margin-bottom: 0; }
.footer { text-align: center; margin-top: 1rem; color: #888; font-size: 0.85rem; }
"""

with gr.Blocks(
    theme=gr.themes.Soft(
        primary_hue="blue",
        secondary_hue="sky",
        font=gr.themes.GoogleFont("Inter"),
    ),
    css=css,
    title="🏃 Human Activity Recognition",
    analytics_enabled=False,
) as demo:

    # Header
    gr.HTML("""
    <div class="main-header">
        <h1>🏃 Human Activity Recognition</h1>
        <p style="color: #555; font-size: 1.1rem;">Powered by MobileNetV2 · 15 Activity Classes</p>
    </div>
    """)

    gr.Markdown(DESCRIPTION)

    with gr.Row(equal_height=True):
        with gr.Column(scale=1):
            image_input = gr.Image(
                type="pil",
                label="📸 Upload Image",
                sources=["upload", "webcam", "clipboard"],
                height=380,
            )
            with gr.Row():
                clear_btn = gr.Button(
                    "🗑️ Clear",
                    variant="secondary",
                    size="lg",
                )
                submit_btn = gr.Button(
                    "🔍 Classify Activity",
                    variant="primary",
                    size="lg",
                )

        with gr.Column(scale=1):
            label_output = gr.Label(
                num_top_classes=5,
                label="📊 Prediction Results",
            )

    # Examples — all 15 activity classes, baked into the repo
    if example_paths:
        gr.Examples(
            examples=example_paths,
            inputs=image_input,
            outputs=label_output,
            fn=predict,
            cache_examples=True,
            label="🖼️ Try these examples — one for each activity",
        )

    # Event handlers
    clear_btn.click(
        fn=clear_all,
        inputs=[],
        outputs=[image_input, label_output],
    )
    submit_btn.click(
        fn=predict,
        inputs=image_input,
        outputs=label_output,
        api_name="predict",
    )
    image_input.change(
        fn=predict,
        inputs=image_input,
        outputs=label_output,
        api_name=False,
    )

    # Footer
    gr.HTML("""
    <div class="footer">
        Made with ❤️ using <a href="https://www.gradio.app/" target="_blank">Gradio</a> &
        <a href="https://huggingface.co/" target="_blank">Hugging Face</a> ·
        <a href="https://huggingface.co/Rishi2455/Human-Activity-Recognition" target="_blank">Model Card</a>
    </div>
    """)

# Launch with show_api=True
demo.launch(show_api=True)