Spaces:

TrungTran
/

faceage_ClientScan

Running

App Files Files Community

TrungTran commited on 21 days ago

Commit

7e0629f

verified ·

1 Parent(s): fbf77cb

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +311 -0

app.py ADDED Viewed

	@@ -0,0 +1,311 @@

+"""
+FaceAge-DINOv3 — Gradio demo for HuggingFace Spaces.
+Face detection : YuNet (OpenCV built-in, ~350 KB model, no extra deps)
+Age/gender     : FaceAge-DINOv3 ONNX (CPU, ~1.2 GB)
+"""
+import urllib.request
+import os
+import numpy as np
+import gradio as gr
+from PIL import Image, ImageDraw, ImageFont
+# ---------------------------------------------------------------------------
+# Age/gender preprocessing  (ImageNet normalisation, matches training)
+# ---------------------------------------------------------------------------
+_MEAN     = np.array([0.485, 0.456, 0.406], dtype=np.float32)
+_STD      = np.array([0.229, 0.224, 0.225], dtype=np.float32)
+_IMG_SIZE = 224
+def _preprocess(img_rgb: np.ndarray) -> np.ndarray:
+    """HxWx3 uint8 RGB  →  1x3x224x224 float32."""
+    from PIL import Image as _PIL
+    pil = _PIL.fromarray(img_rgb).resize((_IMG_SIZE, _IMG_SIZE), _PIL.BICUBIC)
+    arr = np.asarray(pil, dtype=np.float32) / 255.0
+    arr = (arr - _MEAN) / _STD
+    return np.ascontiguousarray(arr.transpose(2, 0, 1)[np.newaxis])
+def _decode_age(logits: np.ndarray) -> float:
+    """CORAL: age = Σ sigmoid(logits)."""
+    logits = np.clip(logits, -88.0, 88.0)
+    return float((1.0 / (1.0 + np.exp(-logits))).sum())
+def _decode_gender(logits: np.ndarray) -> tuple[str, float]:
+    ex    = np.exp(logits - logits.max())
+    probs = ex / ex.sum()
+    idx   = int(probs.argmax())
+    return ("male" if idx == 1 else "female"), float(probs[idx])
+# ---------------------------------------------------------------------------
+# Age/gender model  (ONNX, loaded from HF Hub)
+# ---------------------------------------------------------------------------
+_ORT_SESSION = None
+_ORT_IN_NAME = None
+def _load_age_model():
+    global _ORT_SESSION, _ORT_IN_NAME
+    if _ORT_SESSION is not None:
+        return
+    import onnxruntime as ort
+    from huggingface_hub import hf_hub_download
+    print("[AgeModel] Downloading ONNX from HuggingFace Hub …")
+    onnx_path = hf_hub_download(
+        repo_id  = "TrungTran/faceage-dino",
+        filename = "faceage_dino_fp32.onnx",
+    )
+    opts = ort.SessionOptions()
+    opts.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
+    opts.intra_op_num_threads = 4
+    _ORT_SESSION = ort.InferenceSession(
+        onnx_path, sess_options=opts,
+        providers=["CPUExecutionProvider"],
+    )
+    _ORT_IN_NAME = _ORT_SESSION.get_inputs()[0].name
+    print(f"[AgeModel] Ready  ({onnx_path})")
+def _predict_crop(face_rgb: np.ndarray) -> dict:
+    x = _preprocess(face_rgb)
+    age_logits, gender_logits = _ORT_SESSION.run(None, {_ORT_IN_NAME: x})
+    age            = _decode_age(age_logits[0])
+    gender, conf   = _decode_gender(gender_logits[0])
+    return {"age": age, "gender": gender, "conf": conf}
+# ---------------------------------------------------------------------------
+# YuNet face detector  (cv2.FaceDetectorYN, OpenCV ≥ 4.5.4)
+# ---------------------------------------------------------------------------
+_YUNET_URL  = (
+    "https://github.com/opencv/opencv_zoo/raw/main/models/"
+    "face_detection_yunet/face_detection_yunet_2023mar.onnx"
+)
+_YUNET_PATH = "/tmp/face_detection_yunet_2023mar.onnx"
+_DETECTOR   = None
+def _load_detector():
+    global _DETECTOR
+    if _DETECTOR is not None:
+        return
+    # Download model if not cached
+    if not os.path.exists(_YUNET_PATH):
+        print(f"[YuNet] Downloading model …")
+        try:
+            urllib.request.urlretrieve(_YUNET_URL, _YUNET_PATH)
+            print(f"[YuNet] Saved to {_YUNET_PATH}")
+        except Exception as e:
+            print(f"[YuNet] Download failed: {e} — face detection disabled")
+            _DETECTOR = "unavailable"
+            return
+    import cv2
+    try:
+        _DETECTOR = cv2.FaceDetectorYN.create(
+            model           = _YUNET_PATH,
+            config          = "",
+            input_size      = (320, 320),
+            score_threshold = 0.6,
+            nms_threshold   = 0.3,
+            top_k           = 100,
+        )
+        print("[YuNet] Face detector ready")
+    except Exception as e:
+        print(f"[YuNet] Init failed: {e} — face detection disabled")
+        _DETECTOR = "unavailable"
+def _detect_faces(img_rgb: np.ndarray,
+                  min_face_px: int = 20) -> list[tuple[int, int, int, int]]:
+    """
+    Returns list of (x0, y0, x1, y1) sorted by area (largest first).
+    Falls back to empty list if YuNet is unavailable.
+    """
+    if _DETECTOR == "unavailable" or _DETECTOR is None:
+        return []
+    import cv2
+    h, w  = img_rgb.shape[:2]
+    img_bgr = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2BGR)
+    # YuNet requires input_size to match the image
+    _DETECTOR.setInputSize((w, h))
+    _, faces = _DETECTOR.detect(img_bgr)   # faces: None or Nx15
+    if faces is None:
+        return []
+    bboxes = []
+    for face in faces:
+        x, y, fw, fh = face[:4].astype(int)
+        x0, y0 = max(0, x),      max(0, y)
+        x1, y1 = min(w, x + fw), min(h, y + fh)
+        if (x1 - x0) >= min_face_px and (y1 - y0) >= min_face_px:
+            bboxes.append((x0, y0, x1, y1))
+    bboxes.sort(key=lambda b: (b[2] - b[0]) * (b[3] - b[1]), reverse=True)
+    return bboxes
+# ---------------------------------------------------------------------------
+# Drawing
+# ---------------------------------------------------------------------------
+_PALETTE = ["#FF6B6B", "#4ECDC4", "#45B7D1", "#96CEB4",
+            "#FFEAA7", "#DDA0DD", "#98D8C8", "#F7DC6F"]
+_FONT_PATHS = [
+    "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
+    "/usr/share/fonts/dejavu/DejaVuSans-Bold.ttf",
+    "/System/Library/Fonts/Helvetica.ttc",
+]
+def _get_font(size: int):
+    for path in _FONT_PATHS:
+        try:
+            return ImageFont.truetype(path, size)
+        except Exception:
+            pass
+    return ImageFont.load_default()
+def _draw_results(pil_img: Image.Image, results: list[dict]) -> Image.Image:
+    draw     = ImageDraw.Draw(pil_img)
+    font_lg  = _get_font(20)
+    font_sm  = _get_font(15)
+    for i, r in enumerate(results):
+        color = _PALETTE[i % len(_PALETTE)]
+        bbox  = r.get("bbox")
+        label = f"{r['gender']}  {r['age']:.0f} y"
+        if bbox:
+            x0, y0, x1, y1 = bbox
+            # Box
+            draw.rectangle([x0, y0, x1, y1], outline=color, width=3)
+            # Label background
+            tw = int(draw.textlength(label, font=font_lg))
+            th = 24
+            lx0, ly0 = x0, max(0, y0 - th - 4)
+            draw.rectangle([lx0, ly0, lx0 + tw + 10, ly0 + th + 4], fill=color)
+            draw.text((lx0 + 5, ly0 + 2), label, fill="white", font=font_lg)
+        else:
+            # Full-image fallback — overlay in top-left corner
+            full_label = f"{r['gender']}  {r['age']:.0f} y  ({r['conf']:.0%})"
+            tw = int(draw.textlength(full_label, font=font_lg))
+            draw.rectangle([8, 8, tw + 18, 38], fill=color)
+            draw.text((13, 10), full_label, fill="white", font=font_lg)
+    return pil_img
+# ---------------------------------------------------------------------------
+# Main predict function
+# ---------------------------------------------------------------------------
+def predict(image: Image.Image, max_faces: int,
+            conf_thresh: float) -> tuple[Image.Image, str]:
+    if image is None:
+        return None, "⬆️  Please upload a photo."
+    _load_age_model()
+    _load_detector()
+    img_rgb = np.asarray(image.convert("RGB"))
+    bboxes  = _detect_faces(img_rgb)[:max_faces]
+    results = []
+    if bboxes:
+        for bbox in bboxes:
+            x0, y0, x1, y1 = bbox
+            crop = img_rgb[y0:y1, x0:x1]
+            r    = _predict_crop(crop)
+            r["bbox"] = bbox
+            results.append(r)
+    else:
+        # No faces found — run on the entire image
+        r = _predict_crop(img_rgb)
+        results.append(r)
+    # Annotated output image
+    out_img = image.convert("RGB").copy()
+    out_img = _draw_results(out_img, results)
+    # Text summary
+    lines = []
+    mode  = f"({len(bboxes)} face{'s' if len(bboxes)!=1 else ''} detected)" \
+            if bboxes else "(no face detected — full image used)"
+    lines.append(f"**{mode}**\n")
+    for i, r in enumerate(results, 1):
+        icon = "👨" if r["gender"] == "male" else "👩"
+        lines.append(
+            f"{icon} **Face {i}** — Age **{r['age']:.1f}**  ·  "
+            f"{r['gender']} ({r['conf']:.0%})"
+        )
+    summary = "\n".join(lines)
+    return out_img, summary
+# ---------------------------------------------------------------------------
+# Gradio UI
+# ---------------------------------------------------------------------------
+_DESC = """
+## FaceAge-DINOv3 — Age & Gender Estimation
+Upload a photo. **YuNet** auto-detects faces, then **FaceAge-DINOv3** predicts age and gender.
+| | |
+|--|--|
+| 🏆 LAGENDA 84k MAE | **3.760** (beats MiVOLO v2 measured 3.859) |
+| 🧠 Backbone | DINOv3-ViT-L/16 (Meta AI, 307M params) |
+| ⚡ Speed | ~100 ms / face on CPU (ONNX FP32) |
+| 🔍 Detector | YuNet (OpenCV, ~350 KB) |
+[📄 Model Card](https://huggingface.co/trungthanhtran/faceage-dino)
+"""
+with gr.Blocks(title="FaceAge-DINOv3", theme=gr.themes.Soft()) as demo:
+    gr.Markdown(_DESC)
+    with gr.Row():
+        with gr.Column(scale=1):
+            inp_img   = gr.Image(type="pil", label="📷 Upload photo")
+            with gr.Row():
+                inp_max   = gr.Slider(1, 10, value=5, step=1,
+                                      label="Max faces")
+                inp_conf  = gr.Slider(0.3, 0.9, value=0.6, step=0.05,
+                                      label="Detection confidence")
+            btn = gr.Button("🔍 Predict", variant="primary", size="lg")
+        with gr.Column(scale=1):
+            out_img  = gr.Image(type="pil", label="Result")
+            out_text = gr.Markdown()
+    btn.click(
+        fn      = predict,
+        inputs  = [inp_img, inp_max, inp_conf],
+        outputs = [out_img, out_text],
+    )
+    gr.Markdown("""
+---
+*Trained on LAGENDA · IMDB-Clean · UTKFace · AgeDB · FairFace · Open Images.*
+*DINOv3-ViT-L pretrained by Meta AI on LVD-1.68B images.*
+""")
+if __name__ == "__main__":
+    demo.launch()