File size: 7,316 Bytes
f19fabc | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 | """
SCRFD Backbones β NAS-searched ResNet-style with computation redistribution.
Key insight from paper: Standard classification backbones over-invest compute in
C5 features (stride 32), which are useless for tiny face detection. SCRFD
redistributes compute toward earlier stages (C2/C3) for stride-8 feature quality.
Configurations (from paper Table 3):
- SCRFD-34GF: stages=[3,12,28,3], widths=[56,88,248,304], groups=[1,1,1,1]
- SCRFD-10GF: stages=[3,10,16,3], widths=[36,64,144,224], groups=[1,1,1,1]
- SCRFD-2.5GF: stages=[2,4,4,3], widths=[24,48,96,160], groups=[1,1,1,1]
- SCRFD-0.5GF: stages=[2,2,4,2], widths=[16,32,64,128], groups=[1,1,1,1]
"""
import torch
import torch.nn as nn
import torch.nn.functional as F
from typing import List, Tuple, Dict, Optional
import math
class ConvBNReLU(nn.Module):
"""Conv + BatchNorm + ReLU building block."""
def __init__(self, in_ch: int, out_ch: int, kernel: int = 3,
stride: int = 1, padding: int = 1, groups: int = 1,
use_relu: bool = True):
super().__init__()
self.conv = nn.Conv2d(in_ch, out_ch, kernel, stride, padding,
groups=groups, bias=False)
self.bn = nn.BatchNorm2d(out_ch)
self.relu = nn.ReLU(inplace=True) if use_relu else nn.Identity()
def forward(self, x: torch.Tensor) -> torch.Tensor:
return self.relu(self.bn(self.conv(x)))
class BasicBlock(nn.Module):
"""ResNet BasicBlock with optional group convolution."""
expansion = 1
def __init__(self, in_ch: int, out_ch: int, stride: int = 1,
groups: int = 1, downsample: Optional[nn.Module] = None):
super().__init__()
self.conv1 = ConvBNReLU(in_ch, out_ch, 3, stride, 1, groups)
self.conv2 = ConvBNReLU(out_ch, out_ch, 3, 1, 1, groups, use_relu=False)
self.downsample = downsample
self.relu = nn.ReLU(inplace=True)
def forward(self, x: torch.Tensor) -> torch.Tensor:
identity = x
out = self.conv1(x)
out = self.conv2(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
return self.relu(out)
class BottleneckBlock(nn.Module):
"""ResNet Bottleneck with optional group convolution."""
expansion = 4
def __init__(self, in_ch: int, out_ch: int, stride: int = 1,
groups: int = 1, downsample: Optional[nn.Module] = None):
super().__init__()
mid_ch = out_ch # bottleneck width
self.conv1 = ConvBNReLU(in_ch, mid_ch, 1, 1, 0)
self.conv2 = ConvBNReLU(mid_ch, mid_ch, 3, stride, 1, groups)
self.conv3 = ConvBNReLU(mid_ch, out_ch * self.expansion, 1, 1, 0, use_relu=False)
self.downsample = downsample
self.relu = nn.ReLU(inplace=True)
def forward(self, x: torch.Tensor) -> torch.Tensor:
identity = x
out = self.conv1(x)
out = self.conv2(out)
out = self.conv3(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
return self.relu(out)
class SCRFDBackbone(nn.Module):
"""
SCRFD backbone with NAS-searched stage depths and widths.
For SCRFD, we use BasicBlock (expansion=1) since the searched widths
already account for channel capacity β no need for bottleneck expansion.
Returns feature maps at strides [8, 16, 32] (C3, C4, C5).
"""
def __init__(self, stages: List[int], widths: List[int],
groups: List[int] = None, in_channels: int = 3,
block_type: str = 'basic'):
super().__init__()
assert len(stages) == 4 and len(widths) == 4
if groups is None:
groups = [1, 1, 1, 1]
Block = BasicBlock if block_type == 'basic' else BottleneckBlock
# Stem: stride 2 conv + stride 2 maxpool β effective stride 4
self.stem = nn.Sequential(
ConvBNReLU(in_channels, widths[0], 3, 2, 1),
ConvBNReLU(widths[0], widths[0], 3, 1, 1),
nn.MaxPool2d(3, 2, 1),
)
# Stage 1: stride 1 (output stride = 4)
self.layer1 = self._make_layer(Block, widths[0], widths[0], stages[0],
stride=1, groups=groups[0])
# Stage 2: stride 2 (output stride = 8) β C3
self.layer2 = self._make_layer(Block, widths[0], widths[1], stages[1],
stride=2, groups=groups[1])
# Stage 3: stride 2 (output stride = 16) β C4
self.layer3 = self._make_layer(Block, widths[1], widths[2], stages[2],
stride=2, groups=groups[2])
# Stage 4: stride 2 (output stride = 32) β C5
self.layer4 = self._make_layer(Block, widths[2], widths[3], stages[3],
stride=2, groups=groups[3])
self.out_channels = [widths[1], widths[2], widths[3]]
self._init_weights()
def _make_layer(self, block, in_ch: int, out_ch: int, num_blocks: int,
stride: int = 1, groups: int = 1) -> nn.Sequential:
downsample = None
if stride != 1 or in_ch != out_ch * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(in_ch, out_ch * block.expansion, 1, stride, bias=False),
nn.BatchNorm2d(out_ch * block.expansion),
)
layers = [block(in_ch, out_ch, stride, groups, downsample)]
in_ch = out_ch * block.expansion
for _ in range(1, num_blocks):
layers.append(block(in_ch, out_ch, 1, groups))
return nn.Sequential(*layers)
def _init_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
x = self.stem(x)
c2 = self.layer1(x) # stride 4
c3 = self.layer2(c2) # stride 8
c4 = self.layer3(c3) # stride 16
c5 = self.layer4(c4) # stride 32
return c3, c4, c5
# ββββββββββββββββββββββββ Configuration presets ββββββββββββββββββββββββ
BACKBONE_CONFIGS = {
'scrfd_34g': dict(stages=[3, 12, 28, 3], widths=[56, 88, 248, 304]),
'scrfd_10g': dict(stages=[3, 10, 16, 3], widths=[36, 64, 144, 224]),
'scrfd_2.5g': dict(stages=[2, 4, 4, 3], widths=[24, 48, 96, 160]),
'scrfd_0.5g': dict(stages=[2, 2, 4, 2], widths=[16, 32, 64, 128]),
# ResNet variants for comparison
'resnet50': dict(stages=[3, 4, 6, 3], widths=[64, 128, 256, 512], block_type='bottleneck'),
'resnet18': dict(stages=[2, 2, 2, 2], widths=[64, 128, 256, 512]),
}
def build_backbone(name: str, **kwargs) -> SCRFDBackbone:
"""Build a backbone by name."""
if name not in BACKBONE_CONFIGS:
raise ValueError(f"Unknown backbone: {name}. Options: {list(BACKBONE_CONFIGS.keys())}")
cfg = {**BACKBONE_CONFIGS[name], **kwargs}
return SCRFDBackbone(**cfg)
|