""" ๐Ÿƒ Human Activity Recognition โ€” Gradio Demo Fine-tuned MobileNetV2 classifying 15 human activities from images. Model: Rishi2455/Human-Activity-Recognition """ import os import gradio as gr import tensorflow as tf import numpy as np from PIL import Image from huggingface_hub import hf_hub_download # โ”€โ”€ Configuration โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ MODEL_REPO = "Rishi2455/Human-Activity-Recognition" MODEL_FILE = "mobilenetv2_finetuned.h5" IMG_SIZE = (224, 224) CLASS_NAMES = [ "Calling", "Clapping", "Cycling", "Dancing", "Drinking", "Eating", "Fighting", "Hugging", "Laughing", "Listening to Music", "Running", "Sitting", "Sleeping", "Texting", "Using Laptop", ] ACTIVITY_EMOJI = { "Calling": "๐Ÿ“ž", "Clapping": "๐Ÿ‘", "Cycling": "๐Ÿšด", "Dancing": "๐Ÿ’ƒ", "Drinking": "๐Ÿฅค", "Eating": "๐Ÿฝ๏ธ", "Fighting": "๐ŸฅŠ", "Hugging": "๐Ÿค—", "Laughing": "๐Ÿ˜‚", "Listening to Music": "๐ŸŽง", "Running": "๐Ÿƒ", "Sitting": "๐Ÿช‘", "Sleeping": "๐Ÿ˜ด", "Texting": "๐Ÿ“ฑ", "Using Laptop": "๐Ÿ’ป", } # โ”€โ”€ Download & load model โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ print("โฌ‡๏ธ Downloading model...") model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE) print("๐Ÿ”ง Loading model...") model = tf.keras.models.load_model(model_path, compile=False) print("โœ… Model loaded!") # โ”€โ”€ Example images (baked into the repo under examples/) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ EXAMPLE_DIR = "examples" EXAMPLE_FILES = [ "calling.jpg", "clapping.jpg", "cycling.jpg", "dancing.jpg", "drinking.jpg", "eating.jpg", "fighting.jpg", "hugging.jpg", "laughing.jpg", "listening_to_music.jpg", "running.jpg", "sitting.jpg", "sleeping.jpg", "texting.jpg", "using_laptop.jpg", ] example_paths = [ os.path.join(EXAMPLE_DIR, f) for f in EXAMPLE_FILES if os.path.exists(os.path.join(EXAMPLE_DIR, f)) ] print(f"๐Ÿ“ธ Found {len(example_paths)} example images.") # โ”€โ”€ Inference โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ def predict(pil_img: Image.Image) -> dict: """Classify a human activity from an image.""" if pil_img is None: return {} img = pil_img.convert("RGB").resize(IMG_SIZE) arr = np.expand_dims(np.array(img, dtype=np.float32), axis=0) arr = tf.keras.applications.mobilenet_v2.preprocess_input(arr) preds = model.predict(arr, verbose=0)[0] emoji_labels = { f"{ACTIVITY_EMOJI.get(c, '')} {c}": float(preds[i]) for i, c in enumerate(CLASS_NAMES) } return emoji_labels def clear_all(): """Reset both image and predictions.""" return None, None # โ”€โ”€ Gradio UI โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ DESCRIPTION = """ Upload a photo of a person performing an activity, and the model will predict which of **15 activities** they are doing. **Supported activities:** Calling ยท Clapping ยท Cycling ยท Dancing ยท Drinking ยท Eating ยท Fighting ยท Hugging ยท Laughing ยท Listening to Music ยท Running ยท Sitting ยท Sleeping ยท Texting ยท Using Laptop **Model:** [MobileNetV2](https://huggingface.co/Rishi2455/Human-Activity-Recognition) fine-tuned on the [Human Action Recognition dataset](https://huggingface.co/datasets/Bingsu/Human_Action_Recognition) """ css = """ .main-header { text-align: center; margin-bottom: 0.5rem; } .main-header h1 { font-size: 2.2rem; margin-bottom: 0; } .footer { text-align: center; margin-top: 1rem; color: #888; font-size: 0.85rem; } """ with gr.Blocks( theme=gr.themes.Soft( primary_hue="blue", secondary_hue="sky", font=gr.themes.GoogleFont("Inter"), ), css=css, title="๐Ÿƒ Human Activity Recognition", analytics_enabled=False, ) as demo: # Header gr.HTML("""

๐Ÿƒ Human Activity Recognition

Powered by MobileNetV2 ยท 15 Activity Classes

""") gr.Markdown(DESCRIPTION) with gr.Row(equal_height=True): with gr.Column(scale=1): image_input = gr.Image( type="pil", label="๐Ÿ“ธ Upload Image", sources=["upload", "webcam", "clipboard"], height=380, ) with gr.Row(): clear_btn = gr.Button( "๐Ÿ—‘๏ธ Clear", variant="secondary", size="lg", ) submit_btn = gr.Button( "๐Ÿ” Classify Activity", variant="primary", size="lg", ) with gr.Column(scale=1): label_output = gr.Label( num_top_classes=5, label="๐Ÿ“Š Prediction Results", ) # Examples โ€” all 15 activity classes, baked into the repo if example_paths: gr.Examples( examples=example_paths, inputs=image_input, outputs=label_output, fn=predict, cache_examples=True, label="๐Ÿ–ผ๏ธ Try these examples โ€” one for each activity", ) # Event handlers clear_btn.click( fn=clear_all, inputs=[], outputs=[image_input, label_output], ) submit_btn.click( fn=predict, inputs=image_input, outputs=label_output, api_name="predict", ) image_input.change( fn=predict, inputs=image_input, outputs=label_output, api_name=False, ) # Footer gr.HTML(""" """) # Launch with show_api=True demo.launch(show_api=True)