神经网络-YoloV3复现(网络搭建篇)

一、前言

  • 系统:win10
  • 环境:python3.7
  • 框架:pytorch1.1

完整代码会在最后给出

二、Darknet网络模版

2.1 模板说明

网络模板如下图所示,YoloV3只借鉴到了前面的卷积部分(有修改),后面的Avgpool、Connected、Softmax层去掉。注意事项

  • 每一个Convolutional内都包含Con2d、Bn、LeakyRelu三个部分(这是固定搭配),为了方便使用,建议单独实现
  • 每一个Residual都包含两个Convolutional

img

2.2 部分代码实现

  • Convolutional单独实现
1
2
3
4
5
6
7
8
9
10
11
class Conv2d(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size, stride, padding, bias):
super(Conv2d, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size,stride=stride,padding=padding,bias=bias),
nn.BatchNorm2d(out_channels),
nn.LeakyReLU(0.1)
)

def forward(self, x):
return self.conv(x)
  • Residual单独实现
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
class BasicBlock(nn.Module):
def __init__(self, inplanes, planes):
super(BasicBlock, self).__init__()

self.Conv1 = Conv2d(inplanes, planes[0], kernel_size=1, stride=1, padding=0, bias=False)
self.Conv2 = Conv2d(planes[0], planes[1], kernel_size=3, stride=1, padding=1, bias=False)

def forward(self, x):
residual = x

out = self.Conv1(x)
out = self.Conv2(out)

out += residual
return out

三、YoloV3网络结构

(找了好久才找齐这两幅图片——darknet和YoloV3改进,单独看一个都不好看懂)

3.1 网络说明

网络如下所示1,前面的一部分借鉴了darknet53结构,后面从Convolutional Set开始由YoloV3作者自己改进,有三个输出,添加了上采样。具体原理可以参考论文或者其他优秀博客

yolo系列之yolo v3【深度解析】 里面有张神图,转载需要版权,故需要点进去看

img

3.2 部分代码实现

3.2.1 Convolutional Set
1
2
3
4
5
6
7
8
9
10
11
12
13
class ConvSet(nn.Module):  # inplanes->inplanes
def __init__(self, inplanes, outplanes):
super(ConvSet, self).__init__()
self.convset = nn.Sequential(
Conv2d(inplanes, outplanes, 1, 1, 0, False),
Conv2d(outplanes, outplanes, 3, 1, 1, False),
Conv2d(outplanes, outplanes * 2, 1, 1, 0, False),
Conv2d(outplanes * 2, outplanes * 2, 3, 1, 1, False),
Conv2d(outplanes * 2, outplanes, 1, 1, 0, False)
)

def forward(self, x):
return self.convset(x)

看到上述的代码实现,心中涌现几个问题:

  • 图中信息只给出了卷积核的大小,并不知道填充位(padding)
  • 图中也没有给出中间层输入通道和输出通道的数量,这些该怎么确定

在没有完整的看完论文之前有以下猜测

  • 第一个卷积一般按照给定的输入输出(inplanes, outplanes),卷积核为1
  • 如果卷积为3,则输入和输出一样,填充(padding)此时为1。(这应该是检测功能实现部位)
  • 后面再遇到卷积核为1,则需要进行通道变换(放大,或者缩小)。(这应该是选择功能实现部位)

看完论文后,我想打人!!!没有说网络是怎么建立的!

3.2.2 UpSampling
1
2
3
4
5
6
7
class Upsampling(nn.Module):
def __init__(self):
super(Upsampling, self).__init__()

def forward(self, x):
# interpolate 上采样专用函数,scale_factor放大倍数,mode插值模式
return nn.functional.interpolate(x, scale_factor=2, mode='nearest')

四、总结

根据图片和论文其实无法得到每一个卷积层具体的参数,估计需要有一套自己对卷积网络的理解才能独自建立网络。对于复现来说,最难的不是网络搭建,而是loss损失函数设计和数据集设计,见后文。

五、完整代码

  • 代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
# -*- coding: utf-8 -*-
# @Time : 2019/8/23 18:07
# @Author : zwenc
# @File : net.py

import time
import torch
import torch.nn as nn
import math
from collections import OrderedDict
import torch.nn.functional


class Conv2d(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size, stride, padding, bias):
super(Conv2d, self).__init__()

self.conv = nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding,
bias=bias),
nn.BatchNorm2d(out_channels),
nn.LeakyReLU(0.1)
)

def forward(self, x):
return self.conv(x)


class BasicBlock(nn.Module):
def __init__(self, inplanes, planes):
super(BasicBlock, self).__init__()

self.Conv1 = Conv2d(inplanes, planes[0], kernel_size=1, stride=1, padding=0, bias=False)
self.Conv2 = Conv2d(planes[0], planes[1], kernel_size=3, stride=1, padding=1, bias=False)

def forward(self, x):
residual = x

out = self.Conv1(x)
out = self.Conv2(out)

out += residual
return out


class ConvSet(nn.Module): # inplanes->inplanes
def __init__(self, inplanes, outplanes):
super(ConvSet, self).__init__()
self.convset = nn.Sequential(
Conv2d(inplanes, outplanes, 1, 1, 0, False),
Conv2d(outplanes, outplanes, 3, 1, 1, False),
Conv2d(outplanes, outplanes * 2, 1, 1, 0, False),
Conv2d(outplanes * 2, outplanes * 2, 3, 1, 1, False),
Conv2d(outplanes * 2, outplanes, 1, 1, 0, False)
)

def forward(self, x):
return self.convset(x)


class Upsampling(nn.Module):
def __init__(self):
super(Upsampling, self).__init__()

def forward(self, x):
# interpolate 上采样专用函数,scale_factor放大倍数,mode插值模式
return nn.functional.interpolate(x, scale_factor=2, mode='nearest')


class Yolo3Net(nn.Module):
def __init__(self, class_nums ,layers = [1, 2, 8, 8, 4]): # 默认使用darknet53
super(Yolo3Net, self).__init__()
self.class_nums = class_nums
self.inplanes = 32
self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(self.inplanes)
self.relu1 = nn.LeakyReLU(0.1)

self.layer1 = self._make_layer([32, 64], layers[0], 0)
self.layer2 = self._make_layer([64, 128], layers[1], 1)
self.layer3 = self._make_layer([128, 256], layers[2], 2)
self.layer4 = self._make_layer([256, 512], layers[3], 3)
self.layer5 = self._make_layer([512, 1024], layers[4], 4)

self.convset_13 = ConvSet(1024, 512)
self.delection_13 = nn.Sequential(
Conv2d(512, 512, 3, 1, 1, False),
# 3*(1+5) = 18 num_anchors = 3, num_classes = 1
nn.Conv2d(512, 3 * (self.class_nums + 5), kernel_size=1, stride=1, padding=0, bias=False)
)
self.up13_to_26 = nn.Sequential(
Conv2d(512, 256, 1, 1, 0, False),
Upsampling()
)

self.convset_26 = ConvSet(768, 512) # 512 + 256 = 768
self.delection_26 = nn.Sequential(
Conv2d(512, 512, 3, 1, 1, False),
nn.Conv2d(512, 3 * (self.class_nums + 5), kernel_size=1, stride=1, padding=0, bias=False)
)
self.up26_to_52 = nn.Sequential(
Conv2d(512, 256, 1, 1, 0, False),
Upsampling()
)

self.convset_52 = ConvSet(512, 512) # 256 + 256 = 512
self.delection_52 = nn.Sequential(
Conv2d(512, 512, 3, 1, 1, False),
nn.Conv2d(512, 3 * (self.class_nums + 5), kernel_size=1, stride=1, padding=0, bias=False)
)

self.layers_out_filters = [64, 128, 256, 512, 1024]

for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()

def _make_layer(self, planes, blocks, layer_num):
layers = []

# downsample
layers.append(("ds_conv", nn.Conv2d(self.inplanes, planes[1], kernel_size=3, stride=2, padding=1, bias=False)))
layers.append(("ds_bn", nn.BatchNorm2d(planes[1])))
layers.append(("ds_relu", nn.LeakyReLU(0.1)))
# blocks
self.inplanes = planes[1]
for i in range(0, blocks):
layers.append(("residual_{}".format(i), BasicBlock(self.inplanes, planes)))
return nn.Sequential(OrderedDict(layers))

def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu1(x)

x = self.layer1(x)
x = self.layer2(x)
x_52 = self.layer3(x) # darknet 输出1
x_26 = self.layer4(x_52) # darknet 输出2
x_13 = self.layer5(x_26) # darknet 输出3

x_13 = self.convset_13(x_13)
out_13 = self.delection_13(x_13) # 网络输出1

x_13 = self.up13_to_26(x_13)

x_26 = torch.cat((x_13, x_26), dim=1) # 26 和 26 拼接
x_26 = self.convset_26(x_26)
out_26 = self.delection_26(x_26) # 网络输出2

x_26 = self.up26_to_52(x_26)

# x_26.size() = torch.Size([?, 256, 52, 52])
# x_52.size() = torch.Size([?, 256, 52, 52])
x_52 = torch.cat((x_26, x_52), dim=1) # dim = 1, 表示在第二个位置进行合成
x_52 = self.convset_52(x_52)
out_52 = self.delection_52(x_52) # 网络输出3

return out_52, out_26, out_13

if __name__ == '__main__':

model = Yolo3Net(class_nums=5)
print(model)
model.eval()

for i in range(2):
t1 = time.time()
x = torch.rand(1, 3, 416, 416)
out3 = model(x)
for out in out3:
print(out.shape)
cnt = time.time() - t1
print(cnt)
  • 运行测试
1
2
3
4
5
6
7
8
torch.Size([1, 30, 52, 52])
torch.Size([1, 30, 26, 26])
torch.Size([1, 30, 13, 13])
2.550520658493042
torch.Size([1, 30, 52, 52])
torch.Size([1, 30, 26, 26])
torch.Size([1, 30, 13, 13])
2.7060494422912598

参考文献

1. yolov3 darknet53网络及mobilenet改进
-------------本文结束感谢您的阅读-------------
0%