网页设计制作网站步骤,wordpress多站点必备插件,手机网站布局技术,dw网站制作效果怎么做经典目标检测YOLO系列(三)YOLOV3的复现(1)总体网络架构及前向处理过程
和之前实现的YOLOv2一样#xff0c;根据《YOLO目标检测》(ISBN:9787115627094)一书#xff0c;在不脱离YOLOv3的大部分核心理念的前提下#xff0c;重构一款较新的YOLOv3检测器#xff0c;来对YOLOv3有…经典目标检测YOLO系列(三)YOLOV3的复现(1)总体网络架构及前向处理过程
和之前实现的YOLOv2一样根据《YOLO目标检测》(ISBN:9787115627094)一书在不脱离YOLOv3的大部分核心理念的前提下重构一款较新的YOLOv3检测器来对YOLOv3有更加深刻的认识。
书中源码连接: RT-ODLab: YOLO Tutorial
1、YOLOv3网络架构
1.1 DarkNet53主干网络
这里使用原版YOLOv3中提出的DarkNet53作为主干网络(backbone)。这里作者还提供了DarkNetTiny版本的网络结构。可以在https://github.com/yjh0410/image_classification_pytorch中手动下载作者提供的在ImageNet数据集的预训练权重。
1.1.1 DarkNet53的残差模块 DarkNet53主要就是由一系列残差模块组成的组成为【1、2、8、8、4】。 首先我们搭建了由1×1卷积层和3×3卷积层组成的Bottleneck模块其中shortcut参数用于决定是否使用残差连接。
# RT-ODLab/models/detectors/yolov3/yolov3_basic.py
# BottleNeck
class Bottleneck(nn.Module):def __init__(self,in_dim,out_dim,expand_ratio0.5,shortcutFalse,depthwiseFalse,act_typesilu,norm_typeBN):super(Bottleneck, self).__init__()inter_dim int(out_dim * expand_ratio) # hidden channels self.cv1 Conv(in_dim, inter_dim, k1, norm_typenorm_type, act_typeact_type)self.cv2 Conv(inter_dim, out_dim, k3, p1, norm_typenorm_type, act_typeact_type, depthwisedepthwise)self.shortcut shortcut and in_dim out_dimdef forward(self, x):h self.cv2(self.cv1(x))return x h if self.shortcut else h然后我们构建ResBlock类通过调整nblocks决定使用多少个Bottleneck模块。
# RT-ODLab/models/detectors/yolov3/yolov3_basic.py
# ResBlock
class ResBlock(nn.Module):def __init__(self,in_dim,out_dim,nblocks1,act_typesilu,norm_typeBN):super(ResBlock, self).__init__()assert in_dim out_dimself.m nn.Sequential(*[Bottleneck(in_dim, out_dim, expand_ratio0.5, shortcutTrue,norm_typenorm_type, act_typeact_type)for _ in range(nblocks)])def forward(self, x):return self.m(x)1.1.2 构建DarkNet53网络
使用经典的【1、2、8、8、4】结构堆叠残差模块层与层之间的降采样操作由stride2的卷积来实现。这里使用SiLU替代LeakyReLU激活函数SiLU是Sigmoid和ReLU的改进版。SiLU具备无上界有下界、平滑、非单调的特性。DarkNet53返回C3、C4和C5三个尺度的特征图目的是做FPN以及多级检测。源码中作者还提供了一个DarkNetTiny版本的网络结构。完成yolov3_backbone的搭建后可以在yolov3.py文件中通过build_backbone函数进行调用。
# RT-ODLab/models/detectors/yolov3/yolov3_backbone.py
import torch
import torch.nn as nntry:from .yolov3_basic import Conv, ResBlock
except:from yolov3_basic import Conv, ResBlockmodel_urls {darknet_tiny: https://github.com/yjh0410/image_classification_pytorch/releases/download/weight/darknet_tiny.pth,darknet53: https://github.com/yjh0410/image_classification_pytorch/releases/download/weight/darknet53_silu.pth
}# --------------------- DarkNet-53 -----------------------
## DarkNet-53
class DarkNet53(nn.Module):def __init__(self, act_typesilu, norm_typeBN):super(DarkNet53, self).__init__()self.feat_dims [256, 512, 1024]# P1self.layer_1 nn.Sequential(Conv(3, 32, k3, p1, act_typeact_type, norm_typenorm_type),Conv(32, 64, k3, p1, s2, act_typeact_type, norm_typenorm_type),ResBlock(64, 64, nblocks1, act_typeact_type, norm_typenorm_type))# P2self.layer_2 nn.Sequential(Conv(64, 128, k3, p1, s2, act_typeact_type, norm_typenorm_type),ResBlock(128, 128, nblocks2, act_typeact_type, norm_typenorm_type))# P3self.layer_3 nn.Sequential(Conv(128, 256, k3, p1, s2, act_typeact_type, norm_typenorm_type),ResBlock(256, 256, nblocks8, act_typeact_type, norm_typenorm_type))# P4self.layer_4 nn.Sequential(Conv(256, 512, k3, p1, s2, act_typeact_type, norm_typenorm_type),ResBlock(512, 512, nblocks8, act_typeact_type, norm_typenorm_type))# P5self.layer_5 nn.Sequential(Conv(512, 1024, k3, p1, s2, act_typeact_type, norm_typenorm_type),ResBlock(1024, 1024, nblocks4, act_typeact_type, norm_typenorm_type))def forward(self, x):c1 self.layer_1(x)c2 self.layer_2(c1)c3 self.layer_3(c2)c4 self.layer_4(c3)c5 self.layer_5(c4)outputs [c3, c4, c5]return outputs## DarkNet-Tiny
class DarkNetTiny(nn.Module):def __init__(self, act_typesilu, norm_typeBN):super(DarkNetTiny, self).__init__()self.feat_dims [64, 128, 256]# stride 2self.layer_1 nn.Sequential(Conv(3, 16, k3, p1, s2, act_typeact_type, norm_typenorm_type),ResBlock(16, 16, nblocks1, act_typeact_type, norm_typenorm_type))# stride 4self.layer_2 nn.Sequential(Conv(16, 32, k3, p1, s2, act_typeact_type, norm_typenorm_type),ResBlock(32, 32, nblocks1, act_typeact_type, norm_typenorm_type))# stride 8self.layer_3 nn.Sequential(Conv(32, 64, k3, p1, s2, act_typeact_type, norm_typenorm_type),ResBlock(64, 64, nblocks3, act_typeact_type, norm_typenorm_type))# stride 16self.layer_4 nn.Sequential(Conv(64, 128, k3, p1, s2, act_typeact_type, norm_typenorm_type),ResBlock(128, 128, nblocks3, act_typeact_type, norm_typenorm_type))# stride 32self.layer_5 nn.Sequential(Conv(128, 256, k3, p1, s2, act_typeact_type, norm_typenorm_type),ResBlock(256, 256, nblocks2, act_typeact_type, norm_typenorm_type))def forward(self, x):c1 self.layer_1(x)c2 self.layer_2(c1)c3 self.layer_3(c2)c4 self.layer_4(c3)c5 self.layer_5(c4)outputs [c3, c4, c5]return outputs# --------------------- Functions -----------------------
def build_backbone(model_namedarknet53, pretrainedFalse): Constructs a darknet-53 model.Args:pretrained (bool): If True, returns a model pre-trained on ImageNetif model_name darknet53:backbone DarkNet53(act_typesilu, norm_typeBN)feat_dims backbone.feat_dimselif model_name darknet_tiny:backbone DarkNetTiny(act_typesilu, norm_typeBN)feat_dims backbone.feat_dimsif pretrained:url model_urls[model_name]if url is not None:print(Loading pretrained weight ...)checkpoint torch.hub.load_state_dict_from_url(urlurl, map_locationcpu, check_hashTrue)# checkpoint state dictcheckpoint_state_dict checkpoint.pop(model)# model state dictmodel_state_dict backbone.state_dict()# checkfor k in list(checkpoint_state_dict.keys()):if k in model_state_dict:shape_model tuple(model_state_dict[k].shape)shape_checkpoint tuple(checkpoint_state_dict[k].shape)if shape_model ! shape_checkpoint:checkpoint_state_dict.pop(k)else:checkpoint_state_dict.pop(k)print(k)backbone.load_state_dict(checkpoint_state_dict)else:print(No backbone pretrained: DarkNet53) return backbone, feat_dimsif __name__ __main__:import timefrom thop import profilemodel, feats build_backbone(model_namedarknet53, pretrainedTrue)x torch.randn(1, 3, 224, 224)t0 time.time()outputs model(x)t1 time.time()print(Time: , t1 - t0)for out in outputs:print(out.shape)x torch.randn(1, 3, 224, 224)print()flops, params profile(model, inputs(x, ), verboseFalse)print()print(GFLOPs : {:.2f}.format(flops / 1e9 * 2))print(Params : {:.2f} M.format(params / 1e6))1.2 搭建neck网络
1.2.1 添加SPPF模块
原始的YOLOv3中neck只有特征金字塔后来又出现了添加了SPP模块的YOLOv3后续版本也能找到SPP模块因此我们继续使用之前自己实现的YOLOv1、YOLOv2中的SPPF模块。代码在RT-ODLab/models/detectors/yolov3/yolov3_neck.py文件中和之前一致不在赘述。对于添加的SPPF模块仅仅用来处理主干网络输出的C5特征图这样可以提高网络的感受野。另外激活函数换为SiLU。 1.2.2 添加特征金字塔
在YOLOv3特征金字塔的基础上做了一些改进。 去除YOLOv3最后3层单独的3×3卷积替换为3层1×1卷积将每个尺度的通道数调整为256方便后续利用解耦检测头进行检测。 # RT-ODLab/models/detectors/yolov3/yolov3_fpn.py
import torch
import torch.nn as nn
import torch.nn.functional as Ffrom .yolov3_basic import Conv, ConvBlocks# Yolov3FPN
class Yolov3FPN(nn.Module):def __init__(self,in_dims[256, 512, 1024],width1.0,depth1.0,out_dimNone,act_typesilu,norm_typeBN):super(Yolov3FPN, self).__init__()self.in_dims in_dimsself.out_dim out_dimc3, c4, c5 in_dims# P5 - P4self.top_down_layer_1 ConvBlocks(c5, int(512*width), act_typeact_type, norm_typenorm_type)self.reduce_layer_1 Conv(int(512*width), int(256*width), k1, act_typeact_type, norm_typenorm_type)# P4 - P3self.top_down_layer_2 ConvBlocks(c4 int(256*width), int(256*width), act_typeact_type, norm_typenorm_type)self.reduce_layer_2 Conv(int(256*width), int(128*width), k1, act_typeact_type, norm_typenorm_type)# P3self.top_down_layer_3 ConvBlocks(c3 int(128*width), int(128*width), act_typeact_type, norm_typenorm_type)# output proj layersif out_dim is not None:# output proj layersself.out_layers nn.ModuleList([Conv(in_dim, out_dim, k1,norm_typenorm_type, act_typeact_type)for in_dim in [int(128 * width), int(256 * width), int(512 * width)]])self.out_dim [out_dim] * 3else:self.out_layers Noneself.out_dim [int(128 * width), int(256 * width), int(512 * width)]def forward(self, features):c3, c4, c5 features# p5/32# 1、经过Convolutional Set1得到P5p5 self.top_down_layer_1(c5)# p4/16# 2、P5先降维然后进行上采样拼接后经过Convolutional Set2得到P4p5_up F.interpolate(self.reduce_layer_1(p5), scale_factor2.0)p4 self.top_down_layer_2(torch.cat([c4, p5_up], dim1))# P3/8# 3、同样P3先降维然后进行上采样拼接后经过Convolutional Set3得到P3p4_up F.interpolate(self.reduce_layer_2(p4), scale_factor2.0)p3 self.top_down_layer_3(torch.cat([c3, p4_up], dim1))out_feats [p3, p4, p5]# output proj layersif self.out_layers is not None:# output proj layersout_feats_proj []# 4、对p3, p4, p5分别调整通道数为256for feat, layer in zip(out_feats, self.out_layers):out_feats_proj.append(layer(feat))return out_feats_projreturn out_featsdef build_fpn(cfg, in_dims, out_dimNone):model cfg[fpn]# build neckif model yolov3_fpn:fpn_net Yolov3FPN(in_dimsin_dims,out_dimout_dim,widthcfg[width],depthcfg[depth],act_typecfg[fpn_act],norm_typecfg[fpn_norm])return fpn_net1.3 搭建检测头
官方YOLOv3中的检测头是耦合的将置信度、类别及边界框由1层1×1卷积在一个特张图上全部预测出来。我们这里使用两条并行分支同时去完成分类和定位继续采用解耦检测头。尽管不同尺度的解耦检测头的结构相同但是参数不共享这一点不同于RetinaNet的检测头。
# RT-ODLab/models/detectors/yolov3/yolov3_head.py
import torch
import torch.nn as nn
try:from .yolov3_basic import Conv
except:from yolov3_basic import Convclass DecoupledHead(nn.Module):def __init__(self, cfg, in_dim, out_dim, num_classes80):super().__init__()print()print(Head: Decoupled Head)self.in_dim in_dimself.num_cls_headcfg[num_cls_head]self.num_reg_headcfg[num_reg_head]self.act_typecfg[head_act]self.norm_typecfg[head_norm]# cls headcls_feats []self.cls_out_dim max(out_dim, num_classes)for i in range(cfg[num_cls_head]):if i 0:cls_feats.append(Conv(in_dim, self.cls_out_dim, k3, p1, s1, act_typeself.act_type,norm_typeself.norm_type,depthwisecfg[head_depthwise]))else:cls_feats.append(Conv(self.cls_out_dim, self.cls_out_dim, k3, p1, s1, act_typeself.act_type,norm_typeself.norm_type,depthwisecfg[head_depthwise]))# reg headreg_feats []self.reg_out_dim max(out_dim, 64)for i in range(cfg[num_reg_head]):if i 0:reg_feats.append(Conv(in_dim, self.reg_out_dim, k3, p1, s1, act_typeself.act_type,norm_typeself.norm_type,depthwisecfg[head_depthwise]))else:reg_feats.append(Conv(self.reg_out_dim, self.reg_out_dim, k3, p1, s1, act_typeself.act_type,norm_typeself.norm_type,depthwisecfg[head_depthwise]))self.cls_feats nn.Sequential(*cls_feats)self.reg_feats nn.Sequential(*reg_feats)def forward(self, x):in_feats: (Tensor) [B, C, H, W]cls_feats self.cls_feats(x)reg_feats self.reg_feats(x)return cls_feats, reg_feats# build detection head
def build_head(cfg, in_dim, out_dim, num_classes80):head DecoupledHead(cfg, in_dim, out_dim, num_classes) return head因为需要在三个尺度上都需要检测头因此使用nn.ModuleList完成。
# RT-ODLab/models/detectors/yolov3/yolov3.py
# YOLOv3
class YOLOv3(nn.Module):def __init__(self,cfg,device,num_classes20,conf_thresh0.01,topk100,nms_thresh0.5,trainableFalse,deployFalse,nms_class_agnosticFalse):super(YOLOv3, self).__init__()......# ------------------- Network Structure -------------------## 主干网络self.backbone, feats_dim build_backbone(cfg[backbone], trainablecfg[pretrained])## 颈部网络: SPP模块self.neck build_neck(cfg, in_dimfeats_dim[-1], out_dimfeats_dim[-1])feats_dim[-1] self.neck.out_dim## 颈部网络: 特征金字塔self.fpn build_fpn(cfgcfg, in_dimsfeats_dim, out_dimint(256*cfg[width]))self.head_dim self.fpn.out_dim## 检测头self.non_shared_heads nn.ModuleList([build_head(cfg, head_dim, head_dim, num_classes) for head_dim in self.head_dim])1.4 搭建预测层
最后我们搭建每个尺度的预测层。
对于类别预测我们在解耦检测头的类别分支后接一层1×1卷积去做分类对于边界框预测我们在解耦检测头的回归分支后接一层1×1卷积去做定位对于置信度预测我们在解耦检测头的回归分支后接一层1×1卷积预测边界框的预测框。 # RT-ODLab/models/detectors/yolov3/yolov3.py## 预测层self.obj_preds nn.ModuleList([nn.Conv2d(head.reg_out_dim, 1 * self.num_anchors, kernel_size1) for head in self.non_shared_heads]) self.cls_preds nn.ModuleList([nn.Conv2d(head.cls_out_dim, self.num_classes * self.num_anchors, kernel_size1) for head in self.non_shared_heads]) self.reg_preds nn.ModuleList([nn.Conv2d(head.reg_out_dim, 4 * self.num_anchors, kernel_size1) for head in self.non_shared_heads]) 1.5 改进YOLOv3的详细网络图
至此我们完成了YOLOv3的网络结构的搭建详解网络图如下 2、YOLOv3的前向推理过程
2.1 解耦边界框坐标
2.1.1 先验框矩阵的生成
YOLOv3网络配置参数如下我们从中能看到anchor_size变量。这是基于kmeans聚类在COCO数据集上聚类出的先验框由于COCO数据集更大、图片更加丰富因此我们将这几个先验框用在VOC数据集上。
# RT-ODLab/config/model_config/yolov3_config.py
# YOLOv3 Configyolov3_cfg {yolov3:{# ---------------- Model config ----------------## Backbonebackbone: darknet53,pretrained: True,stride: [8, 16, 32], # P3, P4, P5width: 1.0,depth: 1.0,max_stride: 32,## Neckneck: sppf,expand_ratio: 0.5,pooling_size: 5,neck_act: silu,neck_norm: BN,neck_depthwise: False,## FPNfpn: yolov3_fpn,fpn_act: silu,fpn_norm: BN,fpn_depthwise: False,## Headhead: decoupled_head,head_act: silu,head_norm: BN,num_cls_head: 2,num_reg_head: 2,head_depthwise: False,anchor_size: [[10, 13], [16, 30], [33, 23], # P3[30, 61], [62, 45], [59, 119], # P4[116, 90], [156, 198], [373, 326]], # P5# ---------------- Train config ----------------## inputtrans_type: yolov5_large,multi_scale: [0.5, 1.0],# ---------------- Assignment config ----------------## matcheriou_thresh: 0.5,# ---------------- Loss config ----------------## loss weightloss_obj_weight: 1.0,loss_cls_weight: 1.0,loss_box_weight: 5.0,# ---------------- Train config ----------------trainer_type: yolov8,},yolov3_tiny:{# ---------------- Model config ----------------## Backbonebackbone: darknet_tiny,pretrained: True,stride: [8, 16, 32], # P3, P4, P5width: 0.25,depth: 0.34,max_stride: 32,## Neckneck: sppf,expand_ratio: 0.5,pooling_size: 5,neck_act: silu,neck_norm: BN,neck_depthwise: False,## FPNfpn: yolov3_fpn,fpn_act: silu,fpn_norm: BN,fpn_depthwise: False,## Headhead: decoupled_head,head_act: silu,head_norm: BN,num_cls_head: 2,num_reg_head: 2,head_depthwise: False,anchor_size: [[10, 13], [16, 30], [33, 23], # P3[30, 61], [62, 45], [59, 119], # P4[116, 90], [156, 198], [373, 326]], # P5# ---------------- Train config ----------------## inputtrans_type: yolov5_nano,multi_scale: [0.5, 1.0],# ---------------- Assignment config ----------------## matcheriou_thresh: 0.5,# ---------------- Loss config ----------------## loss weightloss_obj_weight: 1.0,loss_cls_weight: 1.0,loss_box_weight: 5.0,# ---------------- Train config ----------------trainer_type: yolov8,},}YOLOv3在C3、C4和C5每个特征图上在每个网格处放置3个先验框。 C3特征图每个网格处放置(10, 13)、(16, 30)、(33, 23)三个先验框用来检测较小的物体。C4特征图每个网格处放置(30, 61)、(62, 45)、(59, 119)三个先验框用来检测中等大小的物体。C5特征图每个网格处放置(116, 90)、(156, 198)、(373, 326)三个先验框用来检测较大的物体。 YOLOv3先验框矩阵生成的代码逻辑和YOLOv2相同。只是多1个level参数用于标记是三个尺度的哪一个。每一个尺度都需要生成相应的先验框矩阵。 # RT-ODLab/models/detectors/yolov3/yolov3.py## generate anchor pointsdef generate_anchors(self, level, fmp_size):fmp_size: (List) [H, W]level0, 默认缩放后的图像为416×416那么经过8倍下采样后 fmp_size为52×52level1, 默认缩放后的图像为416×416那么经过16倍下采样后fmp_size为26×26level2, 默认缩放后的图像为416×416那么经过32倍下采样后fmp_size为13×13# 1、特征图的宽和高fmp_h, fmp_w fmp_size# [KA, 2]anchor_size self.anchor_size[level]# 2、生成网格的x坐标和y坐标anchor_y, anchor_x torch.meshgrid([torch.arange(fmp_h), torch.arange(fmp_w)])# 3、将xy两部分的坐标拼接起来shape为[H, W, 2]# 再转换下, shape变为[HW, 2]anchor_xy torch.stack([anchor_x, anchor_y], dim-1).float().view(-1, 2)# 4、引入了anchor box机制每个网格包含A个anchor因此每个(grid_x, grid_y)的坐标需要复制A(Anchor nums)份# 相当于 每个level每个网格左上角的坐标点复制3份 作为3个不同宽高anchor box的中心点# [HW, 2] - [HW, KA, 2] - [M, 2]anchor_xy anchor_xy.unsqueeze(1).repeat(1, self.num_anchors, 1)anchor_xy anchor_xy.view(-1, 2).to(self.device)# 5、每一个特征图的3组anchor box的宽高都复制fmp_size(例如: 13×13)份# [KA, 2] - [1, KA, 2] - [HW, KA, 2] - [M, 2]anchor_wh anchor_size.unsqueeze(0).repeat(fmp_h*fmp_w, 1, 1)anchor_wh anchor_wh.view(-1, 2).to(self.device)# 6、将中心点和宽高cat起来得到的shape为[M, 4]# level0, 其中M52×52×3 表示feature map为52×52每个网格有3组anchor box# level1, 其中M26×26×3 表示feature map为26×26每个网格有3组anchor box# level2, 其中M13×13×3 表示feature map为13×13每个网格有3组anchor boxanchors torch.cat([anchor_xy, anchor_wh], dim-1)return anchors2.1.2 解算边界框
生成先验框矩阵后我们就能通过边界框偏移量reg_pred解耦出边界框坐标box_pred。在前向推理中和之前YOLOv2逻辑一致仅仅是多了多级检测部分的代码需要经过for循环收集三个尺度的obj_preds, cls_preds和box_preds预测。
# RT-ODLab/models/detectors/yolov3/yolov3.py
import torch
import torch.nn as nnfrom utils.misc import multiclass_nmsfrom .yolov3_backbone import build_backbone
from .yolov3_neck import build_neck
from .yolov3_fpn import build_fpn
from .yolov3_head import build_head# YOLOv3
class YOLOv3(nn.Module):def __init__(self,cfg,device,num_classes20,conf_thresh0.01,topk100,nms_thresh0.5,trainableFalse,deployFalse,nms_class_agnosticFalse):super(YOLOv3, self).__init__()# ------------------- Basic parameters -------------------self.cfg cfg # 模型配置文件self.device device # cuda或者是cpuself.num_classes num_classes # 类别的数量self.trainable trainable # 训练的标记self.conf_thresh conf_thresh # 得分阈值self.nms_thresh nms_thresh # NMS阈值self.topk topk # topkself.stride [8, 16, 32] # 网络的输出步长self.deploy deployself.nms_class_agnostic nms_class_agnostic# ------------------- Anchor box -------------------self.num_levels 3self.num_anchors len(cfg[anchor_size]) // self.num_levelsself.anchor_size torch.as_tensor(cfg[anchor_size]).float().view(self.num_levels, self.num_anchors, 2) # [S, A, 2]# ------------------- Network Structure -------------------## 主干网络self.backbone, feats_dim build_backbone(cfg[backbone], trainablecfg[pretrained])## 颈部网络: SPP模块self.neck build_neck(cfg, in_dimfeats_dim[-1], out_dimfeats_dim[-1])feats_dim[-1] self.neck.out_dim## 颈部网络: 特征金字塔self.fpn build_fpn(cfgcfg, in_dimsfeats_dim, out_dimint(256*cfg[width]))self.head_dim self.fpn.out_dim## 检测头self.non_shared_heads nn.ModuleList([build_head(cfg, head_dim, head_dim, num_classes) for head_dim in self.head_dim])## 预测层self.obj_preds nn.ModuleList([nn.Conv2d(head.reg_out_dim, 1 * self.num_anchors, kernel_size1) for head in self.non_shared_heads]) self.cls_preds nn.ModuleList([nn.Conv2d(head.cls_out_dim, self.num_classes * self.num_anchors, kernel_size1) for head in self.non_shared_heads]) self.reg_preds nn.ModuleList([nn.Conv2d(head.reg_out_dim, 4 * self.num_anchors, kernel_size1) for head in self.non_shared_heads]) # ---------------------- Basic Functions ----------------------## generate anchor pointsdef generate_anchors(self, level, fmp_size):......## post-processdef post_process(self, obj_preds, cls_preds, box_preds):pass# ---------------------- Main Process for Inference ----------------------torch.no_grad()def inference(self, x):# x.shape (1, 3, 416, 416)# 主干网络# pyramid_feats[0] (1, 256, 52, 52)# pyramid_feats[1] (1, 512, 26, 26)# pyramid_feats[2] (1, 1024, 13, 13)pyramid_feats self.backbone(x)# 颈部网络SPPF# pyramid_feats[-1] (1, 1024, 13, 13)pyramid_feats[-1] self.neck(pyramid_feats[-1])# 特征金字塔# pyramid_feats[0] (1, 256, 52, 52)# pyramid_feats[1] (1, 256, 26, 26)# pyramid_feats[2] (1, 256, 13, 13)pyramid_feats self.fpn(pyramid_feats)# 检测头all_obj_preds []all_cls_preds []all_box_preds []for level, (feat, head) in enumerate(zip(pyramid_feats, self.non_shared_heads)):cls_feat, reg_feat head(feat)# 回归分支和分类分支分别经过1×1卷积得到预测结果# [1, C, H, W]# level0, obj_pred(1, 3, 52, 52),cls_pred(1, 3*20, 52, 52),cls_pred(1, 3*4, 52, 52)# level1, obj_pred(1, 3, 26, 26),cls_pred(1, 3*20, 26, 26),cls_pred(1, 3*4, 26, 26)# level2, obj_pred(1, 3, 13, 13),cls_pred(1, 3*20, 13, 13),cls_pred(1, 3*4, 13, 13)obj_pred self.obj_preds[level](reg_feat)cls_pred self.cls_preds[level](cls_feat)reg_pred self.reg_preds[level](reg_feat)# 每一个尺度都需要生成边界框矩阵# anchors: [M, 2]fmp_size cls_pred.shape[-2:]anchors self.generate_anchors(level, fmp_size)# [1, AC, H, W] - [H, W, AC] - [M, C]obj_pred obj_pred[0].permute(1, 2, 0).contiguous().view(-1, 1)cls_pred cls_pred[0].permute(1, 2, 0).contiguous().view(-1, self.num_classes)reg_pred reg_pred[0].permute(1, 2, 0).contiguous().view(-1, 4)# decode bbox# 解算边界框ctr_pred (torch.sigmoid(reg_pred[..., :2]) anchors[..., :2]) * self.stride[level]wh_pred torch.exp(reg_pred[..., 2:]) * anchors[..., 2:]pred_x1y1 ctr_pred - wh_pred * 0.5pred_x2y2 ctr_pred wh_pred * 0.5box_pred torch.cat([pred_x1y1, pred_x2y2], dim-1)all_obj_preds.append(obj_pred)all_cls_preds.append(cls_pred)all_box_preds.append(box_pred)# 循环结束就得到了all_obj_preds, all_cls_preds, all_box_preds# 然后进行后处理if self.deploy:obj_preds torch.cat(all_obj_preds, dim0)cls_preds torch.cat(all_cls_preds, dim0)box_preds torch.cat(all_box_preds, dim0)scores torch.sqrt(obj_preds.sigmoid() * cls_preds.sigmoid())bboxes box_preds# [n_anchors_all, 4 C]outputs torch.cat([bboxes, scores], dim-1)return outputselse:# post processbboxes, scores, labels self.post_process(all_obj_preds, all_cls_preds, all_box_preds)return bboxes, scores, labels# ---------------------- Main Process for Training ----------------------def forward(self, x):if not self.trainable:return self.inference(x)else:......
2.2 后处理
经过for循环得到三个尺度所有的预测后就进入到了后处理阶段。和YOLOv2的后处理的代码逻辑相同但是因为多了多级检测因此需要通过for循环对每一个尺度的预测进行后处理。实现后处理的代码后模型的forward函数就清晰了不再赘述。 # RT-ODLab/models/detectors/yolov3/yolov3.py## post-processdef post_process(self, obj_preds, cls_preds, box_preds):Input:obj_preds: List(Tensor) [[H x W x A, 1], ...] ,即[[52×52×3, 1], [26×26×3, 1], [13×13×3, 1]]cls_preds: List(Tensor) [[H x W x A, C], ...] ,即[[52×52×3, 20], [26×26×3, 20], [13×13×3, 20]]box_preds: List(Tensor) [[H x W x A, 4], ...] ,即[[52×52×3, 4], [26×26×3, 4], [13×13×3, 4]]anchors: List(Tensor) [[H x W x A, 2], ...]all_scores []all_labels []all_bboxes []# 对每一个尺度循环for obj_pred_i, cls_pred_i, box_pred_i in zip(obj_preds, cls_preds, box_preds):# (H x W x KA x C,)scores_i (torch.sqrt(obj_pred_i.sigmoid() * cls_pred_i.sigmoid())).flatten()# 1、topk操作# Keep top k top scoring indices only.num_topk min(self.topk, box_pred_i.size(0))# torch.sort is actually faster than .topk (at least on GPUs)predicted_prob, topk_idxs scores_i.sort(descendingTrue)topk_scores predicted_prob[:num_topk]topk_idxs topk_idxs[:num_topk]# 2、滤掉低得分边界框的score低于给定的阈值的预测边界框# filter out the proposals with low confidence scorekeep_idxs topk_scores self.conf_threshscores topk_scores[keep_idxs]topk_idxs topk_idxs[keep_idxs]# 获取flatten之前topk_scores所在的idx以及相应的labelanchor_idxs torch.div(topk_idxs, self.num_classes, rounding_modefloor)labels topk_idxs % self.num_classesbboxes box_pred_i[anchor_idxs]all_scores.append(scores)all_labels.append(labels)all_bboxes.append(bboxes)# 将三个尺度的预测结果concat起来然后进行nmsscores torch.cat(all_scores)labels torch.cat(all_labels)bboxes torch.cat(all_bboxes)# to cpu numpyscores scores.cpu().numpy()labels labels.cpu().numpy()bboxes bboxes.cpu().numpy()# nms# 3、滤掉那些针对同一目标的冗余检测。scores, labels, bboxes multiclass_nms(scores, labels, bboxes, self.nms_thresh, self.num_classes, self.nms_class_agnostic)return bboxes, scores, labels接下来就到了正样本的匹配和损失函数计算了、以及数据预处理。
正样本的匹配和损失函数计算我们会延续之前YOLOv2的做法。对于数据预处理、数据增强等我们不再采用之前SSD风格的处理手段而是选择YOLOv5的数据处理方法来训练我们的YOLOv3。 文章转载自: http://www.morning.yrjhr.cn.gov.cn.yrjhr.cn http://www.morning.drcnf.cn.gov.cn.drcnf.cn http://www.morning.zrnph.cn.gov.cn.zrnph.cn http://www.morning.ymbqr.cn.gov.cn.ymbqr.cn http://www.morning.mbmh.cn.gov.cn.mbmh.cn http://www.morning.yrflh.cn.gov.cn.yrflh.cn http://www.morning.gfnsh.cn.gov.cn.gfnsh.cn http://www.morning.qghjc.cn.gov.cn.qghjc.cn http://www.morning.hotlads.com.gov.cn.hotlads.com http://www.morning.rlqqy.cn.gov.cn.rlqqy.cn http://www.morning.xkgyh.cn.gov.cn.xkgyh.cn http://www.morning.dtpqw.cn.gov.cn.dtpqw.cn http://www.morning.gpcy.cn.gov.cn.gpcy.cn http://www.morning.dtnzk.cn.gov.cn.dtnzk.cn http://www.morning.kxxld.cn.gov.cn.kxxld.cn http://www.morning.qdrrh.cn.gov.cn.qdrrh.cn http://www.morning.yxkyl.cn.gov.cn.yxkyl.cn http://www.morning.mrccd.cn.gov.cn.mrccd.cn http://www.morning.tfrlj.cn.gov.cn.tfrlj.cn http://www.morning.hdpcn.cn.gov.cn.hdpcn.cn http://www.morning.rdzlh.cn.gov.cn.rdzlh.cn http://www.morning.jkbqs.cn.gov.cn.jkbqs.cn http://www.morning.xlztn.cn.gov.cn.xlztn.cn http://www.morning.sffwz.cn.gov.cn.sffwz.cn http://www.morning.plznfnh.cn.gov.cn.plznfnh.cn http://www.morning.rdkqt.cn.gov.cn.rdkqt.cn http://www.morning.qggxt.cn.gov.cn.qggxt.cn http://www.morning.srltq.cn.gov.cn.srltq.cn http://www.morning.bmts.cn.gov.cn.bmts.cn http://www.morning.ntwxt.cn.gov.cn.ntwxt.cn http://www.morning.skfkx.cn.gov.cn.skfkx.cn http://www.morning.bpmnc.cn.gov.cn.bpmnc.cn http://www.morning.rfxg.cn.gov.cn.rfxg.cn http://www.morning.sjwiki.com.gov.cn.sjwiki.com http://www.morning.tfrmx.cn.gov.cn.tfrmx.cn http://www.morning.jrpmf.cn.gov.cn.jrpmf.cn http://www.morning.nwjzc.cn.gov.cn.nwjzc.cn http://www.morning.wrkcw.cn.gov.cn.wrkcw.cn http://www.morning.bdtpd.cn.gov.cn.bdtpd.cn http://www.morning.cyjjp.cn.gov.cn.cyjjp.cn http://www.morning.mjzgg.cn.gov.cn.mjzgg.cn http://www.morning.cfybl.cn.gov.cn.cfybl.cn http://www.morning.lpbrp.cn.gov.cn.lpbrp.cn http://www.morning.ckntb.cn.gov.cn.ckntb.cn http://www.morning.hwlk.cn.gov.cn.hwlk.cn http://www.morning.jrhmh.cn.gov.cn.jrhmh.cn http://www.morning.ttfh.cn.gov.cn.ttfh.cn http://www.morning.rdwm.cn.gov.cn.rdwm.cn http://www.morning.srkzd.cn.gov.cn.srkzd.cn http://www.morning.mlcnh.cn.gov.cn.mlcnh.cn http://www.morning.wgkz.cn.gov.cn.wgkz.cn http://www.morning.yrjfb.cn.gov.cn.yrjfb.cn http://www.morning.ddgl.com.cn.gov.cn.ddgl.com.cn http://www.morning.rxwfg.cn.gov.cn.rxwfg.cn http://www.morning.hsgxj.cn.gov.cn.hsgxj.cn http://www.morning.jfsbs.cn.gov.cn.jfsbs.cn http://www.morning.rfgkf.cn.gov.cn.rfgkf.cn http://www.morning.zphlb.cn.gov.cn.zphlb.cn http://www.morning.pcgmw.cn.gov.cn.pcgmw.cn http://www.morning.cybch.cn.gov.cn.cybch.cn http://www.morning.ktlxk.cn.gov.cn.ktlxk.cn http://www.morning.rmjxp.cn.gov.cn.rmjxp.cn http://www.morning.jcxyq.cn.gov.cn.jcxyq.cn http://www.morning.mrfr.cn.gov.cn.mrfr.cn http://www.morning.wynnb.cn.gov.cn.wynnb.cn http://www.morning.ncfky.cn.gov.cn.ncfky.cn http://www.morning.tgmwy.cn.gov.cn.tgmwy.cn http://www.morning.tbhlc.cn.gov.cn.tbhlc.cn http://www.morning.hcxhz.cn.gov.cn.hcxhz.cn http://www.morning.mrtdq.cn.gov.cn.mrtdq.cn http://www.morning.brcdf.cn.gov.cn.brcdf.cn http://www.morning.jfymz.cn.gov.cn.jfymz.cn http://www.morning.zrqs.cn.gov.cn.zrqs.cn http://www.morning.xswrb.cn.gov.cn.xswrb.cn http://www.morning.mstrb.cn.gov.cn.mstrb.cn http://www.morning.yyzgl.cn.gov.cn.yyzgl.cn http://www.morning.rgzc.cn.gov.cn.rgzc.cn http://www.morning.yrdkl.cn.gov.cn.yrdkl.cn http://www.morning.gzxnj.cn.gov.cn.gzxnj.cn http://www.morning.wqbrg.cn.gov.cn.wqbrg.cn