| |
| |
| |
| |
| import torch |
| import torch.nn as nn |
| from torch.autograd import Variable |
| from library.ssd import jaccard, intersect |
| import numpy as np |
|
|
| class SSDSingleClassLoss(nn.Module): |
| """SSD Loss Function |
| Compute Targets: |
| 1) Produce indices for positive matches by matching ground truth boxes |
| with (default) 'priorboxes' that have jaccard index > threshold parameter |
| (default threshold: 0.5). |
| |
| 2) Calculates location and confidence loss for positive matches |
| |
| 3) Hard negative mining to filter the excessive number of negative examples |
| that comes with using a large number of default bounding boxes. |
| - Negative match background CFs are sorted in ascending order (less confident pred. first) |
| - If Positive match exists |
| - Nneg is calculated by Mining_Neg2PosRatio * Npos, clipped below with min_NegMiningSample |
| - Smallest Nneg background CFs are selected, CF's above maxBackroundCFforLossCalc are ommitted and used in loss calc |
| - If there is no positive match, min_NegMiningSample less confident background CFs are taken in to loss |
| |
| Objective Loss: |
| L(x,c,l,g) = [(LconfPosMatch(x, c)) / Npos] + |
| [(λ * LconfNegMatch(x, c)) / Nneg] + [(α*Lloc(x,l,g)) / Npos] |
| |
| |
| Where, LconfPosMatch is the log softmax person class conf loss of positive matched boxes, |
| LconfNegMatch is the log softmax background class conf loss of negative matched boxes, |
| Lloc is the SmoothL1 Loss weighted by α which is set to 1 by cross val for original multiclass SSD. |
| |
| Args: |
| c: class confidences, |
| l: predicted boxes, |
| g: ground truth boxes |
| Npos: number of matched default boxes |
| Neg: number of negative matches used in loss function after negative mining |
| x: positive match selector |
| """ |
|
|
| def __init__(self, Anchor_box_wh, Anchor_box_xy, alpha = 1, Jaccardtreshold = 0.5, |
| Mining_Neg2PosRatio = 6, min_NegMiningSample = 10, maxBackroundCFforLossCalc = 0.5, negConfLosslambda = 1.0, |
| regularizedLayers = None): |
| ''' |
| Args: |
| Anchor_box_wh: (tensor) Anchor boxes (cx,cy, w, h) form in original image, Shape: [numPreds=5376,4] |
| Anchor_box_xy: (tensor) Anchor boxes (cxmin,cymin, xmax, ymax) form in original image, Shape: [numPreds=5376,4] |
| ''' |
| |
| super(SSDSingleClassLoss, self).__init__() |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
| self.device = device |
| self.Anchor_box_wh = Anchor_box_wh |
| self.Anchor_box_xy = Anchor_box_xy |
| self.alpha = alpha |
| self.Jaccardtreshold = Jaccardtreshold |
| |
| self.Mining_Neg2PosRatio = Mining_Neg2PosRatio |
| self.min_NegMiningSample = min_NegMiningSample |
| self.maxBackroundCFforLossCalc = maxBackroundCFforLossCalc |
| self.negConfLosslambda = negConfLosslambda |
| |
| self.regularizedLayers = regularizedLayers |
| |
| |
| self.var_x = 0.1 |
| self.var_y = 0.1 |
| self.var_w = 0.2 |
| self.var_h = 0.2 |
| |
|
|
| def forward(self, pred_box_delt, pred_CF ,GT_box_wh, model= None): |
| """Multibox Loss |
| Args: |
| pred_box_delt : (tensor) Location predictions in delta form (dcx, dcy, dw, dh), shape[numPreds=5376,4] |
| pred_CF : (tensor) Confidence predictions (person, nonperson), shape[numPreds=5376,2] |
| GT_box_wh : (tensor) Ground truth boxes in (xmin, ymin, w, h) form, shape [numObjects, 4] |
| """ |
| |
| |
| device =self.device |
| alpha = self.alpha |
| Jaccardtreshold = self.Jaccardtreshold |
| Mining_Neg2PosRatio = self.Mining_Neg2PosRatio |
| min_NegMiningSample = self.min_NegMiningSample |
| maxBackroundCFforLossCalc = self.maxBackroundCFforLossCalc |
| negConfLosslambda = self.negConfLosslambda |
| |
| reg = torch.tensor(.0).to(device) |
| |
| if (len(GT_box_wh)==0): |
| loss_l = torch.tensor([.0]) |
| num_pos = 0 |
| else: |
| GT_box_wh = GT_box_wh[:,1:] |
| |
| |
| GT_box_cxcy_wh = GT_box_wh.clone().to(device) |
| GT_box_cxcy_wh[:,0] = GT_box_wh[:,0]+GT_box_wh[:,2]/2 |
| GT_box_cxcy_wh[:,1] = GT_box_wh[:,1]+GT_box_wh[:,3]/2 |
| |
| |
| GT_box_xy = GT_box_wh.detach().clone().to(device) |
| GT_box_xy[:,2] = GT_box_wh[:,2] + GT_box_wh[:,0] |
| GT_box_xy[:,3] = GT_box_wh[:,3] + GT_box_wh[:,1] |
|
|
| |
| JaccardIndices = jaccard(self.Anchor_box_xy,GT_box_xy) |
| posMatches = torch.nonzero(JaccardIndices >= Jaccardtreshold) |
| negMatchAnchIdx = torch.nonzero(JaccardIndices.max(dim=1).values < Jaccardtreshold).flatten() |
| |
| |
| |
| posMatchAnchIdx = posMatches[:,0] |
| posMatchGTIdx = posMatches[:,1] |
|
|
| pred_backGrCF = pred_CF[:,1] |
| negMatch_pred_backGrCF = pred_backGrCF[negMatchAnchIdx] |
|
|
| |
| posMatchAnchs = self.Anchor_box_wh[posMatchAnchIdx] |
| num_pos = posMatches.shape[0] |
|
|
| if num_pos: |
| posMatch_pred_box_delt = pred_box_delt[posMatchAnchIdx] |
| posMatch_pred_CF = pred_CF[posMatchAnchIdx][:,0] |
| |
| posMatchGTs = GT_box_cxcy_wh[posMatchGTIdx] |
|
|
|
|
| |
| ghat_cx = (posMatchGTs[:,0]-posMatchAnchs[:,0])/posMatchAnchs[:,2]/self.var_x |
| ghat_cy = (posMatchGTs[:,1]-posMatchAnchs[:,1])/posMatchAnchs[:,3]/self.var_y |
| ghat_w = torch.log(posMatchGTs[:,2]/posMatchAnchs[:,2])/self.var_w |
| ghat_h = torch.log(posMatchGTs[:,3]/posMatchAnchs[:,3])/self.var_h |
| ghat = torch.cat((ghat_cx.unsqueeze(1), ghat_cy.unsqueeze(1), ghat_w.unsqueeze(1), ghat_h.unsqueeze(1)),dim=1) |
|
|
| |
| smoothL1 = torch.nn.SmoothL1Loss(reduction='sum', beta=1.0).to(device) |
| ghat_1D = ghat.view(1,-1) |
| posMatch_pred_box_delt_1D = posMatch_pred_box_delt.view(1,-1) |
| loc_loss = smoothL1(posMatch_pred_box_delt_1D, ghat_1D) |
|
|
| |
| posMatch_CF_loss = -torch.log(posMatch_pred_CF).sum() |
| |
|
|
| |
| negMatch_pred_backGrCF,_=negMatch_pred_backGrCF.sort(0, descending=False) |
| |
| |
| |
| num_hardmined_negative = int(np.max([num_pos*Mining_Neg2PosRatio,min_NegMiningSample])) |
| num_hardmined_negative = int(np.min([num_hardmined_negative, negMatch_pred_backGrCF.shape[0]])) |
| negMatch_pred_backGrCF_mined = negMatch_pred_backGrCF[0:num_hardmined_negative] |
| |
| negMatch_pred_backGrCF_mined = negMatch_pred_backGrCF_mined[negMatch_pred_backGrCF_mined<maxBackroundCFforLossCalc] |
| num_hardmined_negative = negMatch_pred_backGrCF_mined.shape[0] |
| |
| |
| negMatch_CF_losses_mined = -torch.log(negMatch_pred_backGrCF_mined) |
| negMatch_CF_loss = negMatch_CF_losses_mined.sum() |
| if (num_hardmined_negative == 0): |
| negMatch_CF_loss = torch.tensor(.0) |
| else: |
| negMatch_CF_loss = (negMatch_CF_loss / num_hardmined_negative)*negConfLosslambda |
| |
| |
| |
| |
| loss_l = alpha*loc_loss / num_pos |
| |
| posMatch_CF_loss = posMatch_CF_loss / num_pos |
| loss_c = (posMatch_CF_loss) + (negMatch_CF_loss) |
| |
| else: |
| |
| |
| |
| loss_l = torch.tensor(.0) |
| posMatch_CF_loss = torch.tensor(.0) |
| |
| negCFs_sorted, _ = pred_CF[:,1].view(-1,1).sort(0,descending=False) |
| num_hardmined_negative = int(min_NegMiningSample) |
| negMatch_pred_backGrCF_mined = negCFs_sorted[0:num_hardmined_negative] |
| negMatch_CF_losses_mined = -torch.log(negMatch_pred_backGrCF_mined) |
| negMatch_CF_loss = negMatch_CF_losses_mined.sum() |
| negMatch_CF_loss = (negMatch_CF_loss / num_hardmined_negative)*negConfLosslambda |
| loss_c = negMatch_CF_loss |
| |
| |
| if model != None: |
| if (self.regularizedLayers != None): |
| for layer,lamb in self.regularizedLayers: |
| layer_attribute = getattr(model, layer) |
| m = layer_attribute.op.weight.numel() + layer_attribute.op.bias.numel() |
| reg += ((layer_attribute.op.bias.view(1,-1)**2).sum() + (layer_attribute.op.weight.view(1,-1)**2).sum())*lamb/m |
|
|
| |
| |
| |
| |
| |
| |
| |
| return loss_l + reg, loss_c |