代码之家  ›  专栏  ›  技术社区  ›  batuman

Google Net的感受野计算

  •  0
  • batuman  · 技术社区  · 6 年前

    我正在努力理解GoogleNet的感受域。

    我使用的程序如下所示。

    import math
    convnet =   [[7,2,3],[1,1,0],[3,2,0],[1,1,0],[1,1,0],[1,1,0],[1,1,0],[3,1,1],[1,1,0],[3,2,0],[1,1,0],[1,1,0],[3,1,1],[1,1,0],[1,1,0],[1,1,0],[3,1,1],[1,1,0],[3,2,0],[1,1,0],[1,1,0],[3,1,1],[1,1,0],[5,3,0],[1,1,0],[1,1,0],[3,1,1],[1,1,0],[1,1,0],[1,1,0],[3,1,1],[1,1,0],[1,1,0],[1,1,0],[3,1,1],[1,1,0],[5,3,0],[1,1,0],[1,1,0],[3,1,1],[1,1,0],[3,2,1],[1,1,0],[1,1,0],[3,1,1],[1,1,0],[1,1,0],[1,1,0],[3,1,1],[1,1,0],[7,1,1]]
    layer_names = ["conv1/7x7_s2","conv1/relu_7x7","pool1/3x3_s2","pool1/norm1","conv2/3x3_reduce","conv2/relu_3x3_reduce","conv2/3x3","conv2/relu_3x3","pool2/3x3_s2","inception_3a/3x3_reduce","inception_3a/relu_3x3_reduce","inception_3a/3x3","inception_3a/relu_3x3","inception_3b/3x3_reduce","inception_3b/relu_3x3_reduce","inception_3b/3x3","inception_3b/relu_3x3","pool3/3x3_s2","inception_4a/3x3_reduce","inception_4a/relu_3x3_reduce","inception_4a/3x3","inception_4a/relu_3x3","loss1/ave_pool","inception_4b/3x3_reduce","inception_4b/relu_3x3_reduce","inception_4b/3x3","inception_4b/relu_3x3","inception_4c/3x3_reduce","inception_4c/relu_3x3_reduce","inception_4c/3x3","inception_4c/relu_3x3","inception_4d/3x3_reduce","inception_4d/relu_3x3_reduce","inception_4d/3x3","inception_4d/relu_3x3","loss2/ave_pool","inception_4e/3x3_reduce","inception_4e/relu_3x3_reduce","inception_4e/3x3","inception_4e/relu_3x3","pool4/3x3_s2","inception_5a/3x3_reduce","inception_5a/relu_3x3_reduce","inception_5a/3x3","inception_5a/relu_3x3","inception_5b/3x3_reduce","inception_5b/relu_3x3_reduce","inception_5b/3x3","inception_5b/relu_3x3","pool5/7x7_s1"]
    imsize = 720
    def outFromIn(isz, layernum, net = convnet):
        if layernum>len(net): layernum=len(net)
    
        totstride = 1
        insize = isz
        #for layerparams in net:
        for layer in range(layernum):
            fsize, stride, pad = net[layer]
            outsize = (insize - fsize + 2*pad) / stride + 1
            insize = outsize
            totstride = totstride * stride
        return outsize, totstride
    
    def inFromOut( layernum, net = convnet):
        if layernum>len(net): layernum=len(net)
        outsize = 1
        #for layerparams in net:
        for layer in reversed(range(layernum)):
            fsize, stride, pad = net[layer]
            outsize = ((outsize -1)* stride) + fsize
        RFsize = outsize
        return RFsize
    
    if __name__ == '__main__':
    
        print "layer output sizes given image = %dx%d" % (imsize, imsize)
        for i in range(len(convnet)):
            p = outFromIn(imsize,i+1)
            rf = inFromOut(i+1)
            print "Layer Name = %s, Output size = %3d, Stride = % 3d, RF size = %3d" % (layer_names[i], p[0], p[1], rf)
    

    每层的感受野计算如下

    layer output sizes given image = 224x224
    Layer Name = conv1/7x7_s2, Output size = 112, Stride =   2, RF size =   7
    Layer Name = conv1/relu_7x7, Output size = 112, Stride =   2, RF size =   7
    Layer Name = pool1/3x3_s2, Output size =  55, Stride =   4, RF size =  11
    Layer Name = pool1/norm1, Output size =  55, Stride =   4, RF size =  11
    Layer Name = conv2/3x3_reduce, Output size =  55, Stride =   4, RF size =  11
    Layer Name = conv2/relu_3x3_reduce, Output size =  55, Stride =   4, RF size =  11
    Layer Name = conv2/3x3, Output size =  55, Stride =   4, RF size =  19
    Layer Name = conv2/relu_3x3, Output size =  55, Stride =   4, RF size =  19
    Layer Name = pool2/3x3_s2, Output size =  27, Stride =   8, RF size =  27
    Layer Name = inception_3a/3x3_reduce, Output size =  27, Stride =   8, RF size =  27
    Layer Name = inception_3a/relu_3x3_reduce, Output size =  27, Stride =   8, RF size =  27
    Layer Name = inception_3a/3x3, Output size =  27, Stride =   8, RF size =  43
    Layer Name = inception_3a/relu_3x3, Output size =  27, Stride =   8, RF size =  43
    Layer Name = inception_3b/3x3_reduce, Output size =  27, Stride =   8, RF size =  43
    Layer Name = inception_3b/relu_3x3_reduce, Output size =  27, Stride =   8, RF size =  43
    Layer Name = inception_3b/3x3, Output size =  27, Stride =   8, RF size =  59
    Layer Name = inception_3b/relu_3x3, Output size =  27, Stride =   8, RF size =  59
    Layer Name = pool3/3x3_s2, Output size =  13, Stride =  16, RF size =  75
    Layer Name = inception_4a/3x3_reduce, Output size =  13, Stride =  16, RF size =  75
    Layer Name = inception_4a/relu_3x3_reduce, Output size =  13, Stride =  16, RF size =  75
    Layer Name = inception_4a/3x3, Output size =  13, Stride =  16, RF size = 107
    Layer Name = inception_4a/relu_3x3, Output size =  13, Stride =  16, RF size = 107
    Layer Name = inception_4b/3x3_reduce, Output size =  13, Stride =  16, RF size = 107
    Layer Name = inception_4b/relu_3x3_reduce, Output size =  13, Stride =  16, RF size = 107
    Layer Name = inception_4b/3x3, Output size =  13, Stride =  16, RF size = 139
    Layer Name = inception_4b/relu_3x3, Output size =  13, Stride =  16, RF size = 139
    Layer Name = inception_4c/3x3_reduce, Output size =  13, Stride =  16, RF size = 139
    Layer Name = inception_4c/relu_3x3_reduce, Output size =  13, Stride =  16, RF size = 139
    Layer Name = inception_4c/3x3, Output size =  13, Stride =  16, RF size = 171
    Layer Name = inception_4c/relu_3x3, Output size =  13, Stride =  16, RF size = 171
    Layer Name = inception_4d/3x3_reduce, Output size =  13, Stride =  16, RF size = 171
    Layer Name = inception_4d/relu_3x3_reduce, Output size =  13, Stride =  16, RF size = 171
    Layer Name = inception_4d/3x3, Output size =  13, Stride =  16, RF size = 203
    Layer Name = inception_4d/relu_3x3, Output size =  13, Stride =  16, RF size = 203
    Layer Name = inception_4e/3x3_reduce, Output size =  13, Stride =  16, RF size = 203
    Layer Name = inception_4e/relu_3x3_reduce, Output size =  13, Stride =  16, RF size = 203
    Layer Name = inception_4e/3x3, Output size =  13, Stride =  16, RF size = 235
    Layer Name = inception_4e/relu_3x3, Output size =  13, Stride =  16, RF size = 235
    Layer Name = pool4/3x3_s2, Output size =   7, Stride =  32, RF size = 267
    Layer Name = inception_5a/3x3_reduce, Output size =   7, Stride =  32, RF size = 267
    Layer Name = inception_5a/relu_3x3_reduce, Output size =   7, Stride =  32, RF size = 267
    Layer Name = inception_5a/3x3, Output size =   7, Stride =  32, RF size = 331
    Layer Name = inception_5a/relu_3x3, Output size =   7, Stride =  32, RF size = 331
    Layer Name = inception_5b/3x3_reduce, Output size =   7, Stride =  32, RF size = 331
    Layer Name = inception_5b/relu_3x3_reduce, Output size =   7, Stride =  32, RF size = 331
    Layer Name = inception_5b/3x3, Output size =   7, Stride =  32, RF size = 395
    Layer Name = inception_5b/relu_3x3, Output size =   7, Stride =  32, RF size = 395
    Layer Name = pool5/7x7_s1, Output size =   3, Stride =  32, RF size = 587
    

    我不明白的是,对于224的输入图像大小,最后一层应该有大约224的感受野大小。

    pool5/7x7\u s1层现在是587。

    初始模块Inception\u 4e具有与输入图像大小224大致相同的大小的感受野。

    我的计算有什么问题? 本文明确指出,在RGB颜色空间中,接受野为224 x 224,平均值为零。

    1 回复  |  直到 6 年前
        1
  •  1
  •   Shai    6 年前

    感受野与输入大小无关:
    感受野的大小告诉您在计算网络的单个输出时,“像素”会影响(或参与)什么。
    请注意,在计算感受野时,您完全忽略了填充(!)。

    另一方面,当您想要计算网络的输出大小时,您需要考虑内核大小和跨距(影响感受野),但是 而且 输入大小和填充。

    因此,感受野大小和输入/输出大小是相关的,但绝不相同。

    很好的指南 receptive field arithmetics