cledouxluma
/

facedet

Model card Files Files and versions

xet

Community

cledouxluma commited on 15 days ago

Commit

afaa2cf

verified ·

1 Parent(s): 2cc83d8

Upload models/head.py with huggingface_hub

Browse files

Files changed (1) hide show

models/head.py +176 -0

models/head.py ADDED Viewed

	@@ -0,0 +1,176 @@

+"""
+SCRFD Detection Head — shared-weight, multi-task, scale-aware.
+Design from SCRFD paper:
+- Weight sharing across pyramid levels (parameter-efficient)
+- GroupNorm for batch-size independence
+- Separate cls and reg branches (GFL-style)
+- Optional landmark branch (RetinaFace-style 5-point)
+Output per anchor:
+- Classification: 1 score (face quality score via GFL)
+- Box regression: 4 values (distance from anchor center to box edges)
+- Landmarks (optional): 10 values (5 x,y offsets from anchor center)
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from typing import List, Tuple, Optional
+import math
+class SCRFDHead(nn.Module):
+    """
+    Shared detection head applied to each FPN level.
+    Args:
+        in_channels: Input channels from neck
+        num_classes: Number of classes (1 for face detection)
+        num_anchors: Anchors per spatial location per level
+        feat_channels: Hidden channel width in head convolutions
+        stacked_convs: Number of stacked 3×3 convs in each branch
+        use_gn: Use GroupNorm (vs BatchNorm)
+        use_landmarks: Enable 5-point landmark regression branch
+    """
+    def __init__(self,
+                 in_channels: int = 64,
+                 num_classes: int = 1,
+                 num_anchors: int = 2,
+                 feat_channels: int = 64,
+                 stacked_convs: int = 2,
+                 use_gn: bool = True,
+                 use_landmarks: bool = False):
+        super().__init__()
+        self.num_classes = num_classes
+        self.num_anchors = num_anchors
+        self.use_landmarks = use_landmarks
+        # Classification branch
+        cls_convs = []
+        for i in range(stacked_convs):
+            ch_in = in_channels if i == 0 else feat_channels
+            cls_convs.append(nn.Conv2d(ch_in, feat_channels, 3, 1, 1, bias=False))
+            if use_gn:
+                gn_groups = min(16, feat_channels)
+                while feat_channels % gn_groups != 0:
+                    gn_groups -= 1
+                cls_convs.append(nn.GroupNorm(gn_groups, feat_channels))
+            else:
+                cls_convs.append(nn.BatchNorm2d(feat_channels))
+            cls_convs.append(nn.ReLU(inplace=True))
+        self.cls_convs = nn.Sequential(*cls_convs)
+        self.cls_out = nn.Conv2d(feat_channels, num_anchors * num_classes, 3, 1, 1)
+        # Box regression branch
+        reg_convs = []
+        for i in range(stacked_convs):
+            ch_in = in_channels if i == 0 else feat_channels
+            reg_convs.append(nn.Conv2d(ch_in, feat_channels, 3, 1, 1, bias=False))
+            if use_gn:
+                gn_groups = min(16, feat_channels)
+                while feat_channels % gn_groups != 0:
+                    gn_groups -= 1
+                reg_convs.append(nn.GroupNorm(gn_groups, feat_channels))
+            else:
+                reg_convs.append(nn.BatchNorm2d(feat_channels))
+            reg_convs.append(nn.ReLU(inplace=True))
+        self.reg_convs = nn.Sequential(*reg_convs)
+        self.reg_out = nn.Conv2d(feat_channels, num_anchors * 4, 3, 1, 1)
+        # Landmark branch (optional)
+        if use_landmarks:
+            lmk_convs = []
+            for i in range(stacked_convs):
+                ch_in = in_channels if i == 0 else feat_channels
+                lmk_convs.append(nn.Conv2d(ch_in, feat_channels, 3, 1, 1, bias=False))
+                if use_gn:
+                    gn_groups = min(16, feat_channels)
+                    while feat_channels % gn_groups != 0:
+                        gn_groups -= 1
+                    lmk_convs.append(nn.GroupNorm(gn_groups, feat_channels))
+                else:
+                    lmk_convs.append(nn.BatchNorm2d(feat_channels))
+                lmk_convs.append(nn.ReLU(inplace=True))
+            self.lmk_convs = nn.Sequential(*lmk_convs)
+            self.lmk_out = nn.Conv2d(feat_channels, num_anchors * 10, 3, 1, 1)
+        self._init_weights()
+    def _init_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.normal_(m.weight, std=0.01)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+        # Initialize cls bias for focal loss (prevents initial instability)
+        # Prior probability = 0.01
+        prior_prob = 0.01
+        bias_init = -math.log((1 - prior_prob) / prior_prob)
+        nn.init.constant_(self.cls_out.bias, bias_init)
+    def forward_single(self, x: torch.Tensor) -> dict:
+        """Forward pass for a single FPN level."""
+        cls_feat = self.cls_convs(x)
+        cls_score = self.cls_out(cls_feat)  # [B, A*C, H, W]
+        reg_feat = self.reg_convs(x)
+        bbox_pred = self.reg_out(reg_feat)  # [B, A*4, H, W]
+        result = {'cls_score': cls_score, 'bbox_pred': bbox_pred}
+        if self.use_landmarks:
+            lmk_feat = self.lmk_convs(x)
+            lmk_pred = self.lmk_out(lmk_feat)  # [B, A*10, H, W]
+            result['lmk_pred'] = lmk_pred
+        return result
+    def forward(self, features: Tuple[torch.Tensor, ...]) -> dict:
+        """
+        Forward on all FPN levels.
+        Args:
+            features: (P3, P4, P5) from neck
+        Returns:
+            dict with keys 'cls_scores', 'bbox_preds', optionally 'lmk_preds'
+            Each value is a list of tensors, one per level.
+        """
+        cls_scores = []
+        bbox_preds = []
+        lmk_preds = []
+        for feat in features:
+            out = self.forward_single(feat)
+            cls_scores.append(out['cls_score'])
+            bbox_preds.append(out['bbox_pred'])
+            if self.use_landmarks:
+                lmk_preds.append(out['lmk_pred'])
+        result = {'cls_scores': cls_scores, 'bbox_preds': bbox_preds}
+        if self.use_landmarks:
+            result['lmk_preds'] = lmk_preds
+        return result
+# ──────────────────────── Configuration presets ────────────────────────
+HEAD_CONFIGS = {
+    'scrfd_34g': dict(feat_channels=64, stacked_convs=3),
+    'scrfd_10g': dict(feat_channels=56, stacked_convs=2),
+    'scrfd_2.5g': dict(feat_channels=40, stacked_convs=2),
+    'scrfd_0.5g': dict(feat_channels=16, stacked_convs=2),
+}
+def build_head(name: str, in_channels: int, **kwargs) -> SCRFDHead:
+    """Build detection head by model name."""
+    cfg = HEAD_CONFIGS.get(name, {})
+    cfg.update(kwargs)
+    return SCRFDHead(in_channels=in_channels, **cfg)