當前位置：首頁 > 编程资源 > 编程问答 >内容正文

编程问答

PP-LCNet-YoloV5

發(fā)布時間：2023/12/20 编程问答 37 豆豆

生活随笔收集整理的這篇文章主要介紹了 PP-LCNet-YoloV5 小編覺得挺不錯的,現(xiàn)在分享給大家,幫大家做個參考.

背景簡介

前段時間看到了百度新出的一篇論文，提出了一種基于MKLDNN加速策略的輕量級CPU網(wǎng)絡(luò)，即PP-LCNet，它提高了輕量級模型在多任務上的性能,對于計算機視覺的下游任務，如目標檢測、語義分割等，也有很好的表現(xiàn)。以下是論文鏈接和開源的基于PaddlePaddle的實現(xiàn)。

arXiv: https://arxiv.org/pdf/2109.15099.pdf

code: https://github.com/PaddlePaddle/PaddleClas

論文很短，模型結(jié)構(gòu)也十分簡潔，沒有特別創(chuàng)新的部分，應該是屬于深挖技術(shù)細節(jié)并細心整理的工程應用梳理性質(zhì)的文章，里面有一些極其實用的工程細節(jié)，非常值得一讀。

Pytorch實現(xiàn)PP-LCNet

簡單瀏覽了一下網(wǎng)上對該文章的解讀。

快到飛起的輕量級網(wǎng)絡(luò)怎能不讓人心動？可惜原版PP-LCNet只有PaddlePaddle的實現(xiàn)，對于我這樣的Pytorch玩家沒法直接白嫖，不過好在PaddlePaddle和Pytorch的動態(tài)圖機制極其相似，參考相關(guān)代碼，實現(xiàn)起來也并不難，下面貼一下我用Pytorch的實現(xiàn)

import os import torch import torch.nn as nnNET_CONFIG = {"blocks2":# k, in_c, out_c, s, use_se[[3, 16, 32, 1, False]],"blocks3": [[3, 32, 64, 2, False], [3, 64, 64, 1, False]],"blocks4": [[3, 64, 128, 2, False], [3, 128, 128, 1, False]],"blocks5": [[3, 128, 256, 2, False], [5, 256, 256, 1, False],[5, 256, 256, 1, False], [5, 256, 256, 1, False],[5, 256, 256, 1, False], [5, 256, 256, 1, False]],"blocks6": [[5, 256, 512, 2, True], [5, 512, 512, 1, True]] }def autopad(k, p=None):if p is None:p = k // 2 if isinstance(k, int) else [x // 2 for x in k]return pdef make_divisible(v, divisor=8, min_value=None):if min_value is None:min_value = divisornew_v = max(min_value, int(v + divisor / 2) // divisor * divisor)if new_v < 0.9 * v:new_v += divisorreturn new_vclass HardSwish(nn.Module):def __init__(self, inplace=True):super(HardSwish, self).__init__()self.relu6 = nn.ReLU6(inplace=inplace)def forward(self, x):return x * self.relu6(x+3) / 6class HardSigmoid(nn.Module):def __init__(self, inplace=True):super(HardSigmoid, self).__init__()self.relu6 = nn.ReLU6(inplace=inplace)def forward(self, x):return (self.relu6(x+3)) / 6class SELayer(nn.Module):def __init__(self, channel, reduction=16):super(SELayer, self).__init__()self.avgpool = nn.AdaptiveAvgPool2d(1)self.fc = nn.Sequential(nn.Linear(channel, channel // reduction, bias=False),nn.ReLU(inplace=True),nn.Linear(channel // reduction, channel, bias=False),HardSigmoid())def forward(self, x):b, c, h, w = x.size()y = self.avgpool(x).view(b, c)y = self.fc(y).view(b, c, 1, 1)return x * y.expand_as(x)class DepthwiseSeparable(nn.Module):def __init__(self, inp, oup, dw_size, stride, use_se=False):super(DepthwiseSeparable, self).__init__()self.use_se = use_seself.stride = strideself.inp = inpself.oup = oupself.dw_size = dw_sizeself.dw_sp = nn.Sequential(nn.Conv2d(self.inp, self.inp, kernel_size=self.dw_size, stride=self.stride,padding=autopad(self.dw_size, None), groups=self.inp, bias=False),nn.BatchNorm2d(self.inp),HardSwish(),nn.Conv2d(self.inp, self.oup, kernel_size=1, stride=1, padding=0, bias=False),nn.BatchNorm2d(self.oup),HardSwish(),)self.se = SELayer(self.oup)def forward(self, x):x = self.dw_sp(x)if self.use_se:x = self.se(x)return xclass PP_LCNet(nn.Module):def __init__(self, scale=1.0, class_num=10, class_expand=1280, dropout_prob=0.2):super(PP_LCNet, self).__init__()self.scale = scaleself.conv1 = nn.Conv2d(3, out_channels=make_divisible(16 * self.scale),kernel_size=3, stride=2, padding=1, bias=False)# k, in_c, out_c, s, use_se inp, oup, dw_size, stride, use_se=Falseself.blocks2 = nn.Sequential(*[DepthwiseSeparable(inp=make_divisible(in_c * self.scale),oup=make_divisible(out_c * self.scale),dw_size=k, stride=s, use_se=use_se)for i, (k, in_c, out_c, s, use_se) in enumerate(NET_CONFIG["blocks2"])])self.blocks3 = nn.Sequential(*[DepthwiseSeparable(inp=make_divisible(in_c * self.scale),oup=make_divisible(out_c * self.scale),dw_size=k, stride=s, use_se=use_se)for i, (k, in_c, out_c, s, use_se) in enumerate(NET_CONFIG["blocks3"])])self.blocks4 = nn.Sequential(*[DepthwiseSeparable(inp=make_divisible(in_c * self.scale),oup=make_divisible(out_c * self.scale),dw_size=k, stride=s, use_se=use_se)for i, (k, in_c, out_c, s, use_se) in enumerate(NET_CONFIG["blocks4"])])# k, in_c, out_c, s, use_se inp, oup, dw_size, stride, use_se=Falseself.blocks5 = nn.Sequential(*[DepthwiseSeparable(inp=make_divisible(in_c * self.scale),oup=make_divisible(out_c * self.scale),dw_size=k, stride=s, use_se=use_se)for i, (k, in_c, out_c, s, use_se) in enumerate(NET_CONFIG["blocks5"])])self.blocks6 = nn.Sequential(*[DepthwiseSeparable(inp=make_divisible(in_c * self.scale),oup=make_divisible(out_c * self.scale),dw_size=k, stride=s, use_se=use_se)for i, (k, in_c, out_c, s, use_se) in enumerate(NET_CONFIG["blocks6"])])self.GAP = nn.AdaptiveAvgPool2d(1)self.last_conv = nn.Conv2d(in_channels=make_divisible(NET_CONFIG["blocks6"][-1][2] * scale),out_channels=class_expand,kernel_size=1, stride=1, padding=0, bias=False)self.hardswish = HardSwish()self.dropout = nn.Dropout(p=dropout_prob)self.fc = nn.Linear(class_expand, class_num)def forward(self, x):x = self.conv1(x)x = self.blocks2(x)x = self.blocks3(x)x = self.blocks4(x)x = self.blocks5(x)x = self.blocks6(x)x = self.GAP(x)x = self.last_conv(x)x = self.hardswish(x)x = self.dropout(x)x = torch.flatten(x, start_dim=1, end_dim=-1)x = self.fc(x)return xdef PPLCNET_x0_25(**kwargs):model = PP_LCNet(scale=0.25, **kwargs)return modeldef PPLCNET_x0_35(**kwargs):model = PP_LCNet(scale=0.35, **kwargs)return modeldef PPLCNET_x0_5(**kwargs):model = PP_LCNet(scale=0.5, **kwargs)return modeldef PPLCNET_x0_75(**kwargs):model = PP_LCNet(scale=0.75, **kwargs)return modeldef PPLCNET_x1_0(**kwargs):model = PP_LCNet(scale=1.0, **kwargs)return modeldef PPLCNET_x1_5(**kwargs):model = PP_LCNet(scale=1.5, **kwargs)return modeldef PPLCNET_x2_0(**kwargs):model = PP_LCNet(scale=2.0, **kwargs)return modeldef PPLCNET_x2_5(**kwargs):model = PP_LCNet(scale=2.5, **kwargs)return modelif __name__ == '__main__':model = PPLCNET_x1_5()input = torch.randn(1, 3, 224, 224)print(input.shape)output = model(input)print(output.shape)

PP-LCNet-YoloV5

既然已經(jīng)實現(xiàn)了Pytorch版的PP-LCNet，接下里就是實際應用環(huán)節(jié)了，因為我的工作主要以檢測、追蹤為主，首先想到的自然就是目標檢測的經(jīng)典模型——YoloV5了,PP-LCNet有0.25,0.35，0.5,0.75，1.0,1.5,2.0,2.5一個八種模型，這里以PPLCNet_x_1_0為例,在原版YoloV5基礎(chǔ)上修改以下三個文件

common.py

# 增加如下代碼 #-------------------------------------PP_LCNet------------------------------------------------------ NET_CONFIG = {"blocks2":# k, in_c, out_c, s, use_se[[3, 16, 32, 1, False]],"blocks3": [[3, 32, 64, 2, False], [3, 64, 64, 1, False]],"blocks4": [[3, 64, 128, 2, False], [3, 128, 128, 1, False]],"blocks5": [[3, 128, 256, 2, False], [5, 256, 256, 1, False],[5, 256, 256, 1, False], [5, 256, 256, 1, False],[5, 256, 256, 1, False], [5, 256, 256, 1, False]],"blocks6": [[5, 256, 512, 2, True], [5, 512, 512, 1, True]] } BLOCK_LIST = ["blocks2", "blocks3", "blocks4", "blocks5", "blocks6"]def make_divisible_LC(v, divisor=8, min_value=None):if min_value is None:min_value = divisornew_v = max(min_value, int(v + divisor / 2) // divisor * divisor)if new_v < 0.9 * v:new_v += divisorreturn new_vclass HardSwish(nn.Module):def __init__(self, inplace=True):super(HardSwish, self).__init__()self.relu6 = nn.ReLU6(inplace=inplace)def forward(self, x):return x * self.relu6(x+3) / 6class HardSigmoid(nn.Module):def __init__(self, inplace=True):super(HardSigmoid, self).__init__()self.relu6 = nn.ReLU6(inplace=inplace)def forward(self, x):return (self.relu6(x+3)) / 6class SELayer(nn.Module):def __init__(self, channel, reduction=16):super(SELayer, self).__init__()self.avgpool = nn.AdaptiveAvgPool2d(1)self.fc = nn.Sequential(nn.Linear(channel, channel // reduction, bias=False),nn.ReLU(inplace=True),nn.Linear(channel // reduction, channel, bias=False),HardSigmoid())def forward(self, x):b, c, h, w = x.size()y = self.avgpool(x).view(b, c)y = self.fc(y).view(b, c, 1, 1)return x * y.expand_as(x)class DepthwiseSeparable(nn.Module):def __init__(self, inp, oup, dw_size, stride, use_se=False):super(DepthwiseSeparable, self).__init__()self.use_se = use_seself.stride = strideself.inp = inpself.oup = oupself.dw_size = dw_sizeself.dw_sp = nn.Sequential(nn.Conv2d(self.inp, self.inp, kernel_size=self.dw_size, stride=self.stride,padding=autopad(self.dw_size, None), groups=self.inp, bias=False),nn.BatchNorm2d(self.inp),HardSwish(),nn.Conv2d(self.inp, self.oup, kernel_size=1, stride=1, padding=0, bias=False),nn.BatchNorm2d(self.oup),HardSwish(),)self.se = SELayer(self.oup)def forward(self, x):x = self.dw_sp(x)if self.use_se:x = self.se(x)return xclass PPLC_Conv(nn.Module):def __init__(self, scale):super(PPLC_Conv, self).__init__()self.scale = scaleself.conv = nn.Conv2d(3, out_channels=make_divisible_LC(16 * self.scale),kernel_size=3, stride=2, padding=1, bias=False)def forward(self, x):return self.conv(x)class PPLC_Block(nn.Module):def __init__(self, scale, block_num):super(PPLC_Block, self).__init__()self.scale = scaleself.block_num = BLOCK_LIST[block_num]self.block = nn.Sequential(*[DepthwiseSeparable(inp=make_divisible_LC(in_c * self.scale),oup=make_divisible_LC(out_c * self.scale),dw_size=k, stride=s, use_se=use_se)for i, (k, in_c, out_c, s, use_se) in enumerate(NET_CONFIG[self.block_num])])def forward(self, x):return self.block(x)

yolo.py

# 修改parse_model函數(shù) def parse_model(d, ch): # model_dict, input_channels(3)LOGGER.info('\n%3s%18s%3s%10s %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchorsno = na * (nc + 5) # number of outputs = anchors * (classes + 5)layers, save, c2 = [], [], ch[-1] # layers, savelist, ch outfor i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, argsm = eval(m) if isinstance(m, str) else m # eval stringsfor j, a in enumerate(args):try:args[j] = eval(a) if isinstance(a, str) else a # eval stringsexcept:passn = n_ = max(round(n * gd), 1) if n > 1 else n # depth gainif m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,BottleneckCSP, C3, C3TR, C3SPP, C3Ghost]:c1, c2 = ch[f], args[0] if c2 != no: # if not outputc2 = make_divisible(c2 * gw, 8)args = [c1, c2, *args[1:]]if m in [BottleneckCSP, C3, C3TR, C3Ghost]:args.insert(2, n) # number of repeatsn = 1elif m is nn.BatchNorm2d:args = [ch[f]]elif m is Concat:c2 = sum([ch[x] for x in f])elif m is Detect:args.append([ch[x] for x in f])if isinstance(args[1], int): # number of anchorsargs[1] = [list(range(args[1] * 2))] * len(f)elif m is Contract:c2 = ch[f] * args[0] ** 2elif m is Expand:c2 = ch[f] // args[0] ** 2 # 添加加該部分代碼 #--------------------------------------------- elif m is PPLC_Conv:c2 = args[0]args = args[1:]elif m is PPLC_Block:c2 = args[0]args = args[1:] #----------------------------------------------else:c2 = ch[f]m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args) # modulet = str(m)[8:-2].replace('__main__.', '') # module typenp = sum([x.numel() for x in m_.parameters()]) # number paramsm_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number paramsLOGGER.info('%3s%18s%3s%10.0f %-40s%-30s' % (i, f, n_, np, t, args)) # printsave.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelistlayers.append(m_)if i == 0:ch = []ch.append(c2)return nn.Sequential(*layers), sorted(save)

yolov5_LCNet.yaml

# YOLOv5 🚀 by Ultralytics, GPL-3.0 license# Parameters nc: 80 # number of classes depth_multiple: 0.33 # model depth multiple width_multiple: 0.50 # layer channel multiple anchors:- [10,13, 16,30, 33,23] # P3/8- [30,61, 62,45, 59,119] # P4/16- [116,90, 156,198, 373,326] # P5/32# YOLOv5 backbone backbone:[[-1, 1, PPLC_Conv, [16, 1]],[-1, 1, PPLC_Block, [32, 1, 0]],[-1, 1, PPLC_Block, [64, 1, 1]],[-1, 1, PPLC_Block, [128, 1, 2]],[-1, 1, PPLC_Block, [256, 1, 3]],[-1, 1, PPLC_Block, [512, 1, 4]],]# YOLOv5 head head:[[-1, 1, Conv, [512, 1, 1]],[-1, 1, nn.Upsample, [None, 2, 'nearest']],[[-1, 4], 1, Concat, [1]], # cat backbone P4[-1, 3, C3, [512, False]], # 13[-1, 1, Conv, [256, 1, 1]],[-1, 1, nn.Upsample, [None, 2, 'nearest']],[[-1, 3], 1, Concat, [1]], # cat backbone P3[-1, 3, C3, [256, False]], # 17 (P3/8-small)[-1, 1, Conv, [256, 3, 2]],[[-1, 10], 1, Concat, [1]], # cat head P4[-1, 3, C3, [512, False]], # 20 (P4/16-medium)[-1, 1, Conv, [512, 3, 2]],[[-1, 6], 1, Concat, [1]], # cat head P5[-1, 3, C3, [1024, False]], # 23 (P5/32-large)[[13, 16, 19], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)]

想做一下消融實驗做一下對比，但是看了一眼手里的1650Ti，留下了貧窮的淚水。。。后面有機會再更新一下效果吧。
完整項目見Github
https://github.com/OutBreak-hui/Yolov5-PP-LCNet