當前位置：首頁 > 编程资源 > 编程问答 >内容正文

编程问答

L1/L2/smooth_l1_loss/center_loss+Dice Loss+focal loss+各种IOU loss+kl散度

發(fā)布時間：2024/7/23 编程问答 28 豆豆

生活随笔收集整理的這篇文章主要介紹了 L1/L2/smooth_l1_loss/center_loss+Dice Loss+focal loss+各种IOU loss+kl散度小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

一．L1/L2/smooth_l1_loss/center_loss公式與代碼

1.公式

L2公式：

smooth_L1公式:?

2.三種函數numpy代碼實現

import numpy as np import matplotlib.pyplot as plt#y = |x| def L1():x = np.arange(-2, 2, 0.01)y = abs(x)plt.figure()plt.plot(x, y, 'b', label='l1')# plt.show()#y = x^2 def L2():x = np.arange(-2, 2, 0.01)y = x**2plt.plot(x, y, 'g', label='l2')# plt.show()#y = 0.5*x**2 |x|<=1 #y = |x|-0.5 |x|>1 def smooth_l1():x = np.arange(-2, 2, 0.01)t = abs(x)y = np.where(t <= 1, 0.5*t**2, t-0.5)plt.plot(x, y, 'r', label='smooth_l1')plt.legend(loc='best')plt.show()if __name__ == '__main__':L1()L2()smooth_l1()

可看出，L1在0點處導數不唯一，會影響收斂，smooth L1對于離群點更加魯棒，即：相比于L2損失函數，其對離群點、異常值（outlier）不敏感，梯度變化相對更小．也就是當預測框與ｇｔ相差過大時,梯度值不至于很大，當預測框與ｇｔ相差較小時,梯度值足夠小．

3．tensorflow實現smoothL1函數

函數：?

def smooth_l1_loss(y_true, y_pred):"""Implements Smooth-L1 loss.y_true and y_pred are typically: [N, 4], but could be any shape."""diff = tf.abs(y_true - y_pred)less_than_one = tf.cast(tf.less(diff, 1.0), "float32")loss = (less_than_one * 0.5 * diff**2) + (1 - less_than_one) * (diff - 0.5)print(loss)with tf.Session() as sess:sess.run(tf.global_variables_initializer())print(sess.run(loss))return loss

4.softmax的交叉熵loss

代碼:

import torch.nn as nn import torch.nn.functional as F import torch class L(nn.Module):def __init__(self):super(L, self).__init__()def forward(self, out, label):loss = F.cross_entropy(out, label)return lossdef debug_softmax_loss():batch_size = 4class_nums = 10label = torch.tensor([1, 2, 3, 1])out = torch.rand(batch_size, class_nums)criterion = L()cost = criterion(out, label)print('==cost:', cost)if __name__ == '__main__':debug_softmax_loss()

5.Modified Softmax loss

目的：去除權重模長和偏置對loss的影響

代碼:

class Modified(nn.Module):def __init__(self):super(Modified, self).__init__()self.weight = nn.Parameter(torch.Tensor(2, 10)) # (input,output)nn.init.xavier_uniform_(self.weight)self.weight.data.uniform_(-1, 1).renorm_(2, 1, 1e-5).mul_(1e5)#對列進行歸一化# 因為renorm采用的是maxnorm，所以先縮小再放大以防止norm結果小于1def forward(self, x, label):w = self.weightw = w.renorm(2, 1, 1e-5).mul(1e5)#對列進行歸一化out = x.mm(w)loss = F.cross_entropy(out, label)return lossdef debug_Modified_softmax_loss():batch_size = 4feature_nums = 2label = torch.tensor([1, 2, 3, 1])feature = torch.rand(batch_size, feature_nums)criterion = Modified()cost = criterion(feature, label)print('==cost:', cost)if __name__ == '__main__':# debug_softmax_loss()debug_Modified_softmax_loss()

6.normFace loss

在上述loss的基礎上去除feature模長的影響

class NormFace(nn.Module):def __init__(self):super(NormFace, self).__init__()self.weight = nn.Parameter(torch.Tensor(2, 10)) # (input,output)nn.init.xavier_uniform_(self.weight)self.weight.data.uniform_(-1, 1).renorm_(2, 1, 1e-5).mul_(1e5)self.s = 16# 因為renorm采用的是maxnorm，所以先縮小再放大以防止norm結果小于1def forward(self, x, label):cosine = F.normalize(x).mm(F.normalize(self.weight, dim=0))loss = F.cross_entropy(self.s * cosine, label)return loss def debug_norm_loss():batch_size = 4feature_nums = 2label = torch.tensor([1, 2, 3, 1])feature = torch.rand(batch_size, feature_nums)criterion = NormFace()cost = criterion(feature, label)print('==cost:', cost)if __name__ == '__main__':# debug_softmax_loss()# debug_Modified_softmax_loss()debug_norm_loss()

7.InsightFace(ArcSoftmax) loss

class ArcMarginProduct(nn.Module):def __init__(self, s=32, m=0.5):super(ArcMarginProduct, self).__init__()self.in_feature = 2self.out_feature = 10self.s = sself.m = mself.weight = nn.Parameter(torch.Tensor(2, 10)) # (input,output)nn.init.xavier_uniform_(self.weight)self.weight.data.renorm_(2, 1, 1e-5).mul_(1e5)self.cos_m = math.cos(m)self.sin_m = math.sin(m)# # 為了保證cos(theta+m)在0-pi單調遞減：# self.th = math.cos(3.1415926 - m)# self.mm = math.sin(3.1415926 - m) * mdef forward(self, x, label):cosine = F.normalize(x).mm(F.normalize(self.weight, dim=0))cosine = cosine.clamp(-1, 1) # 數值穩(wěn)定sine = torch.sqrt(torch.max(1.0 - torch.pow(cosine, 2), torch.ones(cosine.shape) * 1e-7)) # 數值穩(wěn)定##print(self.sin_m)phi = cosine * self.cos_m - sine * self.sin_m # 兩角和公式( cos(theta+m) )# # 為了保證cos(theta+m)在0-pi單調遞減：# phi = torch.where((cosine - self.th) > 0, phi, cosine - self.mm)#必要性未知#one_hot = torch.zeros_like(cosine)one_hot.scatter_(1, label.view(-1, 1), 1)output = (one_hot * phi) + ((1.0 - one_hot) * cosine)output = output * self.sloss = F.cross_entropy(output, label)return output, lossdef debug_insight_loss():batch_size = 4feature_nums = 2label = torch.tensor([1, 2, 3, 1])feature = torch.rand(batch_size, feature_nums)criterion = ArcMarginProduct()_, cost = criterion(feature, label)print('==cost:', cost) if __name__ == '__main__':# debug_softmax_loss()# debug_Modified_softmax_loss()# debug_norm_loss()debug_insight_loss()

8.center loss

中心損失函數公式：

其中c_yi為第yi類訓練樣本深度特征的均值點。

由于中心點損失函數值考慮類內差異性，而交叉熵損失函數只考慮類間差異性，一般會把中心損失函數和交叉熵損失函數配合起來用各取所長。這樣網絡最終的目標函數可以表示為：

class centerloss(nn.Module):def __init__(self):super(centerloss, self).__init__()self.center = nn.Parameter(10 * torch.randn(10, 2))self.lamda = 0.2self.weight = nn.Parameter(torch.Tensor(2, 10)) # (input,output)nn.init.xavier_uniform_(self.weight)def forward(self, feature, label):batch_size = label.size()[0]nCenter = self.center.index_select(dim=0, index=label)distance = feature.dist(nCenter)centerloss = (1 / 2.0 / batch_size) * distanceout = feature.mm(self.weight)ceLoss = F.cross_entropy(out, label)return out, ceLoss + self.lamda * centerloss def debug_center_loss():batch_size = 4feature_nums = 2label = torch.tensor([1, 2, 3, 1])feature = torch.rand(batch_size, feature_nums)criterion = centerloss()_, cost = criterion(feature, label)print('==cost:', cost) if __name__ == '__main__':# debug_softmax_loss()# debug_Modified_softmax_loss()# debug_norm_loss()# debug_insight_loss()debug_center_loss()

9.label smooth losss

目的:平滑標簽

import torch import torch.nn as nn import torch.nn.functional as F class LabelSmoothLoss(nn.Module):def __init__(self, smoothing=0.0):super(LabelSmoothLoss, self).__init__()self.smoothing = smoothingdef forward(self, input, target):log_prob = F.log_softmax(input, dim=-1)#(n,classnums)weight = input.new_ones(input.size()) * self.smoothing / (input.size(-1) - 1.)#(n,classnums)print('==weight:', weight)weight.scatter_(1, target.unsqueeze(-1), (1. - self.smoothing))#(n,classnums)print('==weight:', weight)loss = (-weight * log_prob).sum(dim=-1).mean()#(n,classnums)return loss def debug_label_smooth_loss():batch_size = 4class_num = 10input_ = torch.rand(batch_size, class_num)label = torch.tensor([1, 2, 3, 1])criterion = LabelSmoothLoss(smoothing=0.1)cost = criterion(input_, label)print('==cost:', cost) if __name__ == '__main__':debug_label_smooth_loss()

10.ohem loss

OHEM是只取3:1的負樣本去計算loss，之外的負樣本權重置零，而focal loss取了所有負樣本，根據難度給了不同的權重。與focal　loss差別在于，有些難度的負樣本可能在3:1之外。

class BalanceCrossEntropyLoss(nn.Module):'''Balanced cross entropy loss.Shape:- Input: :math:`(N, 1, H, W)`- GT: :math:`(N, 1, H, W)`, same shape as the input- Mask: :math:`(N, H, W)`, same spatial shape as the input- Output: scalar.Examples::>>> m = nn.Sigmoid()>>> loss = nn.BCELoss()>>> input = torch.randn(3, requires_grad=True)>>> target = torch.empty(3).random_(2)>>> output = loss(m(input), target)>>> output.backward()'''def __init__(self, negative_ratio=3.0, eps=1e-6):super(BalanceCrossEntropyLoss, self).__init__()self.negative_ratio = negative_ratioself.eps = epsdef forward(self,pred: torch.Tensor,gt: torch.Tensor,return_origin=False):'''Args:pred: shape :math:`(N, 1, H, W)`, the prediction of networkgt: shape :math:`(N, 1, H, W)`, the target'''positive = gt.byte()negative = (1 - gt).byte()positive_count = int(positive.float().sum())negative_count = min(int(negative.float().sum()), int(positive_count * self.negative_ratio))loss = nn.functional.binary_cross_entropy(pred, gt, reduction='none')positive_loss = loss * positive.float()negative_loss = loss * negative.float()# negative_loss, _ = torch.topk(negative_loss.view(-1).contiguous(), negative_count)negative_loss, _ = negative_loss.view(-1).topk(negative_count)balance_loss = (positive_loss.sum() + negative_loss.sum()) / (positive_count + negative_count + self.eps)if return_origin:return balance_loss, lossif positive_count == 0:return loss.sum()/(int(negative.float().sum()) + self.eps)else:return balance_loss

二.Dice loss?

1.公式

dice系數公式為:

其用于評估兩個樣本的相似性的度量，其他距離參考這篇文章。

dice loss可以寫為:

其中,dice系數的公式也可以寫為

? ? ? ??

而f1 score也是這個,也就是dice loss其實是優(yōu)化f1 score.

dice loss 是一種「區(qū)域相關」的loss。也就是某像素點的loss以及梯度值不僅和該點的label以及預測值相關，和其他點的label以及預測值也相關，這點和交叉熵loss 不同,正是由于這種特效,導致dice loss更關注于正樣本,而交叉熵loss關注每個樣本,當負樣本更多時,loss主要就由負樣本貢獻.

class DiceLoss(nn.Module):'''Loss function from https://arxiv.org/abs/1707.03237,where iou computation is introduced heatmap manner to measure thediversity bwtween tow heatmaps.'''def __init__(self, eps=1e-6):super(DiceLoss, self).__init__()self.eps = epsdef forward(self, pred: torch.Tensor, gt, weights=None):'''　pred: one or two heatmaps of shape (N, 1, H, W),the losses of tow heatmaps are added together.gt: (N, 1, H, W)'''return self._compute(pred, gt, weights)def _compute(self, pred, gt, weights):if pred.dim() == 4:pred = pred[:, 0, :, :]gt = gt[:, 0, :, :]assert pred.shape == gt.shape# assert pred.shape == mask.shape# if weights is not None:# assert weights.shape == mask.shape# mask = weights * maskintersection = (pred * gt).sum()union = pred.sum() + gt.sum() + self.epsloss = 1 - 2.0 * intersection / unionassert loss <= 1return loss

三.Focal?loss

四.各種IOU loss

五.證明 K-L 散度大于等于零

所以這里設計loss函數的時候,f(x)滿足面積為1, g(x)也是，故對于g(x)需要是softmax

torch 代碼:

from __future__ import absolute_import from __future__ import division from __future__ import print_functionimport torch import torch.nn as nn import torch.nn.functional as F from torch.autograd import Variablefrom mmpose.models.registry import LOSSES@LOSSES.register_module() class KLDiscretLoss(nn.Module):def __init__(self):super(KLDiscretLoss, self).__init__()self.LogSoftmax = nn.LogSoftmax(dim=1) # [B, LOGITS]self.criterion_ = nn.KLDivLoss(reduction='none')def criterion(self, dec_outs, labels):# dec_outs [bs, dim]# labels [bs, dim]# import pdb; pdb.set_trace()scores = self.LogSoftmax(dec_outs)loss = torch.mean(self.criterion_(scores, labels), dim=1)#(bs, )return lossdef forward(self, output_x, output_y, target_x, target_y, target_weight):# import pdb; pdb.set_trace()num_joints = output_x.size(1)loss = 0# output_x [bs, 17, 192*2]# output_y [bs, 17, 256*2]# target_x [bs, 17, 192*2]# target_y [bs, 17, 256*2]# target_weight [bs, 17]for idx in range(num_joints):coord_x_pred = output_x[:, idx].squeeze() #[bs, 192*2]coord_y_pred = output_y[:, idx].squeeze() #[bs, 256*2]coord_x_gt = target_x[:, idx].squeeze() #[bs, 192*2]coord_y_gt = target_y[:, idx].squeeze() #[bs, 256*2]weight = target_weight[:, idx].squeeze() #[bs, ]# import pdb; pdb.set_trace()loss += (self.criterion(coord_x_pred, coord_x_gt).mul(weight).mean())loss += (self.criterion(coord_y_pred, coord_y_gt).mul(weight).mean())return loss / num_jointsdef debug():loss = KLDiscretLoss()target_x = torch.rand((32, 17, 192*2))target_y = torch.rand((32, 17, 256*2))output_x = torch.rand((32, 17, 192*2))output_y = torch.rand((32, 17, 256*2))target_weight = torch.ones((32, 17, 1))cost = loss(output_x, output_y, target_x, target_y, target_weight)print('==cost:', cost) if __name__ == '__main__':debug()

參考:

語義分割之dice loss深度分析-技術圈

總結

以上是生活随笔為你收集整理的L1/L2/smooth_l1_loss/center_loss+Dice Loss+focal loss+各种IOU loss+kl散度的全部內容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯，歡迎將生活随笔推薦給好友。

上一篇：吴恩达《机器学习》学习笔记八——逻辑回归
下一篇：图像分割II