经典网络结构——GoogleNet

参考文献

2014年
GoogleNet解读
论文翻译中文
论文原文
论文中文翻译

模型代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
"""google net in pytorch



[1] Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed,
Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, Andrew Rabinovich.

Going Deeper with Convolutions
https://arxiv.org/abs/1409.4842v1
"""

import torch
import torch.nn as nn

class Inception(nn.Module):
def __init__(self, input_channels, n1x1, n3x3_reduce, n3x3, n5x5_reduce, n5x5, pool_proj):
super().__init__()

#1x1conv branch
self.b1 = nn.Sequential(
nn.Conv2d(input_channels, n1x1, kernel_size=1),
nn.BatchNorm2d(n1x1),
nn.ReLU(inplace=True)
)

#1x1conv -> 3x3conv branch
self.b2 = nn.Sequential(
nn.Conv2d(input_channels, n3x3_reduce, kernel_size=1),
nn.BatchNorm2d(n3x3_reduce),
nn.ReLU(inplace=True),
nn.Conv2d(n3x3_reduce, n3x3, kernel_size=3, padding=1),
nn.BatchNorm2d(n3x3),
nn.ReLU(inplace=True)
)

#1x1conv -> 5x5conv branch
#we use 2 3x3 conv filters stacked instead
#of 1 5x5 filters to obtain the same receptive
#field with fewer parameters
self.b3 = nn.Sequential(
nn.Conv2d(input_channels, n5x5_reduce, kernel_size=1),
nn.BatchNorm2d(n5x5_reduce),
nn.ReLU(inplace=True),
nn.Conv2d(n5x5_reduce, n5x5, kernel_size=3, padding=1),
nn.BatchNorm2d(n5x5, n5x5),
nn.ReLU(inplace=True),
nn.Conv2d(n5x5, n5x5, kernel_size=3, padding=1),
nn.BatchNorm2d(n5x5),
nn.ReLU(inplace=True)
)

#3x3pooling -> 1x1conv
#same conv
self.b4 = nn.Sequential(
nn.MaxPool2d(3, stride=1, padding=1),
nn.Conv2d(input_channels, pool_proj, kernel_size=1),
nn.BatchNorm2d(pool_proj),
nn.ReLU(inplace=True)
)

def forward(self, x):
return torch.cat([self.b1(x), self.b2(x), self.b3(x), self.b4(x)], dim=1)


class GoogleNet(nn.Module):

def __init__(self, num_class=3):
super().__init__()
self.prelayer = nn.Sequential(
nn.Conv2d(1, 64, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
nn.Conv2d(64, 64, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
nn.Conv2d(64, 192, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(192),
nn.ReLU(inplace=True),
)

#although we only use 1 conv layer as prelayer,
#we still use name a3, b3.......
self.a3 = Inception(192, 64, 96, 128, 16, 32, 32)
self.b3 = Inception(256, 128, 128, 192, 32, 96, 64)

##"""In general, an Inception network is a network consisting of
##modules of the above type stacked upon each other, with occasional
##max-pooling layers with stride 2 to halve the resolution of the
##grid"""
self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)

self.a4 = Inception(480, 192, 96, 208, 16, 48, 64)
self.b4 = Inception(512, 160, 112, 224, 24, 64, 64)
self.c4 = Inception(512, 128, 128, 256, 24, 64, 64)
self.d4 = Inception(512, 112, 144, 288, 32, 64, 64)
self.e4 = Inception(528, 256, 160, 320, 32, 128, 128)

self.a5 = Inception(832, 256, 160, 320, 32, 128, 128)
self.b5 = Inception(832, 384, 192, 384, 48, 128, 128)

#input feature size: 8*8*1024
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.dropout = nn.Dropout2d(p=0.4)
self.linear = nn.Linear(1024, num_class)

def forward(self, x):
x = self.prelayer(x)
x = self.maxpool(x)
x = self.a3(x)
x = self.b3(x)

x = self.maxpool(x)

x = self.a4(x)
x = self.b4(x)
x = self.c4(x)
x = self.d4(x)
x = self.e4(x)

x = self.maxpool(x)

x = self.a5(x)
x = self.b5(x)

#"""It was found that a move from fully connected layers to
#average pooling improved the top-1 accuracy by about 0.6%,
#however the use of dropout remained essential even after
#removing the fully connected layers."""
x = self.avgpool(x)
x = self.dropout(x)
x = x.view(x.size()[0], -1)
x = self.linear(x)

return x

def googlenet():
return GoogleNet()


# import torch
# from torchsummary import summary

# # Instantiate the GoogleNet model
# model = googlenet()

# # Move the model to the device (e.g., GPU if available)
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model.to(device)

# # Print the model summary
# summary(model, (1, 33, 1025)) # Adjust the input size (channels, height, width) as needed


经典网络结构——GoogleNet
https://chenlidbk.xyz/2024/04/27/deeplearnpaper3/
作者
chenchangqing
发布于
2024年4月27日
许可协议