""" FaceAge ClientScan — Gradio demo for HuggingFace Spaces. Face detection : YuNet (OpenCV built-in, ~350 KB model, no extra deps) Age/gender : FaceAge ClientScan ONNX (CPU, ~1.2 GB) """ import os import numpy as np _HF_TOKEN = os.environ.get("HF_TOKEN") or None import gradio as gr from PIL import Image, ImageDraw, ImageFont # --------------------------------------------------------------------------- # Age/gender preprocessing (ImageNet normalisation, matches training) # --------------------------------------------------------------------------- _MEAN = np.array([0.485, 0.456, 0.406], dtype=np.float32) _STD = np.array([0.229, 0.224, 0.225], dtype=np.float32) _IMG_SIZE = 224 def _preprocess(img_rgb: np.ndarray) -> np.ndarray: """HxWx3 uint8 RGB → 1x3x224x224 float32.""" from PIL import Image as _PIL pil = _PIL.fromarray(img_rgb).resize((_IMG_SIZE, _IMG_SIZE), _PIL.BICUBIC) arr = np.asarray(pil, dtype=np.float32) / 255.0 arr = (arr - _MEAN) / _STD return np.ascontiguousarray(arr.transpose(2, 0, 1)[np.newaxis]) def _decode_age(logits: np.ndarray) -> float: """CORAL: age = Σ sigmoid(logits).""" logits = np.clip(logits, -88.0, 88.0) return float((1.0 / (1.0 + np.exp(-logits))).sum()) def _decode_gender(logits: np.ndarray) -> tuple[str, float]: ex = np.exp(logits - logits.max()) probs = ex / ex.sum() idx = int(probs.argmax()) return ("male" if idx == 1 else "female"), float(probs[idx]) # --------------------------------------------------------------------------- # Age/gender model (ONNX, loaded from HF Hub) # --------------------------------------------------------------------------- _ORT_SESSION = None _ORT_IN_NAME = None def _load_age_model(): global _ORT_SESSION, _ORT_IN_NAME if _ORT_SESSION is not None: return import onnxruntime as ort from huggingface_hub import hf_hub_download print("[AgeModel] Downloading ONNX from HuggingFace Hub …") onnx_path = hf_hub_download( repo_id = "TrungTran/faceage_ClientScan", filename = "faceage_dino_fp32.onnx", token = _HF_TOKEN, ) opts = ort.SessionOptions() opts.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL opts.intra_op_num_threads = 4 _ORT_SESSION = ort.InferenceSession( onnx_path, sess_options=opts, providers=["CPUExecutionProvider"], ) _ORT_IN_NAME = _ORT_SESSION.get_inputs()[0].name print(f"[AgeModel] Ready ({onnx_path})") def _predict_crop(face_rgb: np.ndarray) -> dict: x = _preprocess(face_rgb) age_logits, gender_logits = _ORT_SESSION.run(None, {_ORT_IN_NAME: x}) age = _decode_age(age_logits[0]) gender, conf = _decode_gender(gender_logits[0]) return {"age": age, "gender": gender, "conf": conf} # --------------------------------------------------------------------------- # YuNet face detector (cv2.FaceDetectorYN, loaded from HuggingFace Hub) # --------------------------------------------------------------------------- _YUNET_REPO = "opencv/face_detection_yunet" _YUNET_FILE = "face_detection_yunet_2023mar.onnx" _DETECTOR = None def _load_detector(): global _DETECTOR if _DETECTOR is not None: return from huggingface_hub import hf_hub_download import cv2 try: yunet_path = hf_hub_download(repo_id=_YUNET_REPO, filename=_YUNET_FILE) print(f"[YuNet] Model: {yunet_path}") except Exception as e: print(f"[YuNet] Download failed: {e} — face detection disabled") _DETECTOR = "unavailable" return try: _DETECTOR = cv2.FaceDetectorYN.create( model = yunet_path, config = "", input_size = (320, 320), score_threshold = 0.6, nms_threshold = 0.3, top_k = 100, ) print("[YuNet] Face detector ready") except Exception as e: print(f"[YuNet] Init failed: {e} — face detection disabled") _DETECTOR = "unavailable" _FACE_PAD = 0.10 # 10% proportional padding — matches LAGENDA benchmark MAE=3.555 def _detect_faces(img_rgb: np.ndarray, min_face_px: int = 20) -> list[tuple[int, int, int, int]]: """ Returns list of (x0, y0, x1, y1) with 10% proportional padding, sorted by area desc. Falls back to empty list if YuNet is unavailable. """ if _DETECTOR == "unavailable" or _DETECTOR is None: return [] import cv2 h, w = img_rgb.shape[:2] img_bgr = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2BGR) _DETECTOR.setInputSize((w, h)) _, faces = _DETECTOR.detect(img_bgr) # None or Nx15: [x,y,w,h, ...] if faces is None: return [] bboxes = [] for face in faces: x, y, fw, fh = face[:4].astype(int) # 10% proportional padding (matches training/benchmark setup) pw = int(fw * _FACE_PAD) ph = int(fh * _FACE_PAD) x0 = max(0, x - pw) y0 = max(0, y - ph) x1 = min(w, x + fw + pw) y1 = min(h, y + fh + ph) if (x1 - x0) >= min_face_px and (y1 - y0) >= min_face_px: bboxes.append((x0, y0, x1, y1)) bboxes.sort(key=lambda b: (b[2] - b[0]) * (b[3] - b[1]), reverse=True) return bboxes # --------------------------------------------------------------------------- # Drawing # --------------------------------------------------------------------------- _PALETTE = ["#FF6B6B", "#4ECDC4", "#45B7D1", "#96CEB4", "#FFEAA7", "#DDA0DD", "#98D8C8", "#F7DC6F"] _FONT_PATHS = [ "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", "/usr/share/fonts/dejavu/DejaVuSans-Bold.ttf", "/System/Library/Fonts/Helvetica.ttc", ] def _get_font(size: int): for path in _FONT_PATHS: try: return ImageFont.truetype(path, size) except Exception: pass return ImageFont.load_default() def _draw_results(pil_img: Image.Image, results: list[dict]) -> Image.Image: draw = ImageDraw.Draw(pil_img) font_lg = _get_font(20) font_sm = _get_font(15) for i, r in enumerate(results): color = _PALETTE[i % len(_PALETTE)] bbox = r.get("bbox") label = f"{r['gender']} {r['age']:.1f} y" if bbox: x0, y0, x1, y1 = bbox # Box draw.rectangle([x0, y0, x1, y1], outline=color, width=3) # Label background tw = int(draw.textlength(label, font=font_lg)) th = 24 lx0, ly0 = x0, max(0, y0 - th - 4) draw.rectangle([lx0, ly0, lx0 + tw + 10, ly0 + th + 4], fill=color) draw.text((lx0 + 5, ly0 + 2), label, fill="white", font=font_lg) else: # Full-image fallback — overlay in top-left corner full_label = f"{r['gender']} {r['age']:.1f} y ({r['conf']:.0%})" tw = int(draw.textlength(full_label, font=font_lg)) draw.rectangle([8, 8, tw + 18, 38], fill=color) draw.text((13, 10), full_label, fill="white", font=font_lg) return pil_img # --------------------------------------------------------------------------- # Main predict function # --------------------------------------------------------------------------- def predict(image: Image.Image, max_faces: int, conf_thresh: float) -> tuple[Image.Image, str]: if image is None: return None, "⬆️ Please upload a photo." _load_age_model() _load_detector() img_rgb = np.asarray(image.convert("RGB")) bboxes = _detect_faces(img_rgb)[:max_faces] results = [] if bboxes: for bbox in bboxes: x0, y0, x1, y1 = bbox crop = img_rgb[y0:y1, x0:x1] r = _predict_crop(crop) r["bbox"] = bbox results.append(r) else: # No faces found — run on the entire image r = _predict_crop(img_rgb) results.append(r) # Annotated output image out_img = image.convert("RGB").copy() out_img = _draw_results(out_img, results) # Text summary lines = [] mode = f"({len(bboxes)} face{'s' if len(bboxes)!=1 else ''} detected)" \ if bboxes else "(no face detected — full image used)" lines.append(f"**{mode}**\n") for i, r in enumerate(results, 1): icon = "👨" if r["gender"] == "male" else "👩" lines.append( f"{icon} **Face {i}** — Age **{r['age']:.1f}** · " f"{r['gender']} ({r['conf']:.0%})" ) summary = "\n".join(lines) return out_img, summary # --------------------------------------------------------------------------- # Gradio UI # --------------------------------------------------------------------------- _DESC = """ ## FaceAge ClientScan — Age & Gender Estimation Upload a photo. **YuNet** auto-detects faces, then **FaceAge ClientScan** predicts age and gender. | | | |--|--| | 🏆 LAGENDA 84k MAE | **3.555** (beats MiVOLO v2 paper 3.650, face-only) | | 🧠 Backbone | DINOv3-ViT-L/16 (Meta AI, 307M params) | | ⚡ Speed | ~100 ms / face on CPU (ONNX FP32) | | 🔍 Detector | YuNet (OpenCV, ~350 KB) | [📄 Model Card](https://huggingface.co/TrungTran/faceage_ClientScan) """ with gr.Blocks(title="FaceAge ClientScan", theme=gr.themes.Soft()) as demo: gr.Markdown(_DESC) with gr.Row(): with gr.Column(scale=1): inp_img = gr.Image(type="pil", label="📷 Upload photo or use webcam", sources=["upload", "webcam", "clipboard"]) with gr.Row(): inp_max = gr.Slider(1, 10, value=5, step=1, label="Max faces") inp_conf = gr.Slider(0.3, 0.9, value=0.6, step=0.05, label="Detection confidence") btn = gr.Button("🔍 Predict", variant="primary", size="lg") with gr.Column(scale=1): out_img = gr.Image(type="pil", label="Result") out_text = gr.Markdown() btn.click( fn = predict, inputs = [inp_img, inp_max, inp_conf], outputs = [out_img, out_text], ) gr.Markdown(""" --- *Our Collection: 4M Images.* *DINOv3-ViT-L pretrained by Meta AI on LVD-1.68B images.* """) if __name__ == "__main__": demo.launch()