cledouxluma commited on
Commit
2cc83d8
·
verified ·
1 Parent(s): 20e9cd1

Upload models/losses.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. models/losses.py +197 -0
models/losses.py ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Loss functions for SCRFD face detection.
3
+
4
+ SCRFD uses:
5
+ 1. Generalized Focal Loss (GFL/QFL) for classification — jointly represents
6
+ classification score and localization quality in a single prediction.
7
+ 2. DIoU Loss for bounding box regression — better gradient signal for
8
+ non-overlapping boxes and directly minimizes distance between box centers.
9
+
10
+ References:
11
+ - GFL: "Generalized Focal Loss" (Li et al., 2020)
12
+ - DIoU: "Distance-IoU Loss" (Zheng et al., 2020)
13
+ """
14
+
15
+ import torch
16
+ import torch.nn as nn
17
+ import torch.nn.functional as F
18
+ from typing import Optional
19
+
20
+
21
+ class GFocalLoss(nn.Module):
22
+ """
23
+ Quality Focal Loss (QFL) — Generalized Focal Loss for classification.
24
+
25
+ Instead of binary {0,1} targets, QFL uses continuous quality scores
26
+ [0, 1] where the target is the IoU between predicted and GT boxes.
27
+ This jointly trains classification confidence and localization quality.
28
+
29
+ Loss = -|y - σ|^β * ((1-y)log(1-σ) + y*log(σ))
30
+
31
+ where y ∈ [0,1] is quality target, σ is predicted score, β is focusing param.
32
+ """
33
+
34
+ def __init__(self, beta: float = 2.0, reduction: str = 'mean'):
35
+ super().__init__()
36
+ self.beta = beta
37
+ self.reduction = reduction
38
+
39
+ def forward(self, pred: torch.Tensor, target: torch.Tensor,
40
+ weight: Optional[torch.Tensor] = None) -> torch.Tensor:
41
+ """
42
+ Args:
43
+ pred: [N] predicted scores (logits)
44
+ target: [N] quality targets in [0, 1]
45
+ weight: [N] optional sample weights
46
+ """
47
+ pred_sigmoid = pred.sigmoid()
48
+ scale_factor = (pred_sigmoid - target).abs().pow(self.beta)
49
+
50
+ # Binary cross-entropy with continuous targets
51
+ bce = F.binary_cross_entropy_with_logits(pred, target, reduction='none')
52
+ loss = scale_factor * bce
53
+
54
+ if weight is not None:
55
+ loss = loss * weight
56
+
57
+ if self.reduction == 'mean':
58
+ return loss.sum() / max(weight.sum() if weight is not None else target.gt(0).sum(), 1)
59
+ elif self.reduction == 'sum':
60
+ return loss.sum()
61
+ return loss
62
+
63
+
64
+ class FocalLoss(nn.Module):
65
+ """
66
+ Standard Focal Loss for binary classification.
67
+
68
+ FL(p) = -α * (1-p)^γ * log(p) for positive
69
+ = -(1-α) * p^γ * log(1-p) for negative
70
+
71
+ Used as fallback when QFL is not appropriate.
72
+ """
73
+
74
+ def __init__(self, alpha: float = 0.25, gamma: float = 2.0,
75
+ reduction: str = 'mean'):
76
+ super().__init__()
77
+ self.alpha = alpha
78
+ self.gamma = gamma
79
+ self.reduction = reduction
80
+
81
+ def forward(self, pred: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
82
+ pred_sigmoid = pred.sigmoid()
83
+ target = target.float()
84
+
85
+ # Focal weights
86
+ pt = pred_sigmoid * target + (1 - pred_sigmoid) * (1 - target)
87
+ focal_weight = (1 - pt).pow(self.gamma)
88
+ alpha_weight = self.alpha * target + (1 - self.alpha) * (1 - target)
89
+
90
+ bce = F.binary_cross_entropy_with_logits(pred, target, reduction='none')
91
+ loss = alpha_weight * focal_weight * bce
92
+
93
+ if self.reduction == 'mean':
94
+ return loss.mean()
95
+ elif self.reduction == 'sum':
96
+ return loss.sum()
97
+ return loss
98
+
99
+
100
+ class DIoULoss(nn.Module):
101
+ """
102
+ Distance-IoU Loss for bounding box regression.
103
+
104
+ DIoU = IoU - (ρ²(b, b_gt) / c²)
105
+
106
+ where ρ is Euclidean distance between box centers and c is diagonal
107
+ length of the smallest enclosing box. This provides better gradients
108
+ for non-overlapping boxes (common with tiny faces) and directly
109
+ optimizes center alignment.
110
+
111
+ Loss = 1 - DIoU ∈ [0, 2]
112
+ """
113
+
114
+ def __init__(self, reduction: str = 'mean'):
115
+ super().__init__()
116
+ self.reduction = reduction
117
+
118
+ def forward(self, pred: torch.Tensor, target: torch.Tensor,
119
+ weight: Optional[torch.Tensor] = None) -> torch.Tensor:
120
+ """
121
+ Args:
122
+ pred: [N, 4] predicted boxes (x1, y1, x2, y2)
123
+ target: [N, 4] target boxes (x1, y1, x2, y2)
124
+ weight: [N] optional per-box weights
125
+ """
126
+ # Intersection
127
+ inter_x1 = torch.max(pred[:, 0], target[:, 0])
128
+ inter_y1 = torch.max(pred[:, 1], target[:, 1])
129
+ inter_x2 = torch.min(pred[:, 2], target[:, 2])
130
+ inter_y2 = torch.min(pred[:, 3], target[:, 3])
131
+ inter = (inter_x2 - inter_x1).clamp(min=0) * (inter_y2 - inter_y1).clamp(min=0)
132
+
133
+ # Union
134
+ area_pred = (pred[:, 2] - pred[:, 0]) * (pred[:, 3] - pred[:, 1])
135
+ area_target = (target[:, 2] - target[:, 0]) * (target[:, 3] - target[:, 1])
136
+ union = area_pred + area_target - inter
137
+
138
+ iou = inter / (union + 1e-6)
139
+
140
+ # Center distance
141
+ pred_cx = (pred[:, 0] + pred[:, 2]) / 2
142
+ pred_cy = (pred[:, 1] + pred[:, 3]) / 2
143
+ target_cx = (target[:, 0] + target[:, 2]) / 2
144
+ target_cy = (target[:, 1] + target[:, 3]) / 2
145
+ center_dist_sq = (pred_cx - target_cx).pow(2) + (pred_cy - target_cy).pow(2)
146
+
147
+ # Smallest enclosing box diagonal
148
+ enclose_x1 = torch.min(pred[:, 0], target[:, 0])
149
+ enclose_y1 = torch.min(pred[:, 1], target[:, 1])
150
+ enclose_x2 = torch.max(pred[:, 2], target[:, 2])
151
+ enclose_y2 = torch.max(pred[:, 3], target[:, 3])
152
+ enclose_diag_sq = (enclose_x2 - enclose_x1).pow(2) + (enclose_y2 - enclose_y1).pow(2)
153
+
154
+ diou = iou - center_dist_sq / (enclose_diag_sq + 1e-6)
155
+ loss = 1 - diou
156
+
157
+ if weight is not None:
158
+ loss = loss * weight
159
+
160
+ if self.reduction == 'mean':
161
+ return loss.sum() / max(weight.sum() if weight is not None else loss.shape[0], 1)
162
+ elif self.reduction == 'sum':
163
+ return loss.sum()
164
+ return loss
165
+
166
+
167
+ class LandmarkLoss(nn.Module):
168
+ """
169
+ Smooth L1 loss for facial landmark regression (optional multi-task head).
170
+
171
+ Used when landmark annotations are available (e.g., RetinaFace 5-point
172
+ landmarks on WIDER FACE). Auxiliary landmark supervision improves
173
+ detection AP by ~1% (RetinaFace paper finding).
174
+ """
175
+
176
+ def __init__(self, beta: float = 1.0, reduction: str = 'mean'):
177
+ super().__init__()
178
+ self.beta = beta
179
+ self.reduction = reduction
180
+
181
+ def forward(self, pred: torch.Tensor, target: torch.Tensor,
182
+ weight: Optional[torch.Tensor] = None) -> torch.Tensor:
183
+ """
184
+ Args:
185
+ pred: [N, 10] predicted landmarks (5 points × 2 coords)
186
+ target: [N, 10] target landmarks
187
+ weight: [N] optional mask for visible landmarks
188
+ """
189
+ loss = F.smooth_l1_loss(pred, target, beta=self.beta, reduction='none')
190
+ loss = loss.sum(dim=1) # Sum over 10 coords per face
191
+
192
+ if weight is not None:
193
+ loss = loss * weight
194
+
195
+ if self.reduction == 'mean':
196
+ return loss.sum() / max(weight.sum() if weight is not None else loss.shape[0], 1)
197
+ return loss.sum()