Some cleanup and fixes for initial BlurPool impl. Still some testing and tweaks to go...

rwightman · chris-ha458 · commit 6cdeca24a389 · 2020-04-07T21:12:08.000+09:00
diff --git a/timm/models/layers/blurpool.py b/timm/models/layers/blurpool.py
@@ -1,14 +1,17 @@
-'''
+"""
 BlurPool layer inspired by
-Kornia's Max_BlurPool2d
-and
-Making Convolutional Networks Shift-Invariant Again :cite:`zhang2019shiftinvar`
+ - Kornia's Max_BlurPool2d
+ - Making Convolutional Networks Shift-Invariant Again :cite:`zhang2019shiftinvar`
+
+Hacked together by Chris Ha and Ross Wightman
+"""
 
-'''
 
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
+import numpy as np
+from .padding import get_padding
 
 
 class BlurPool2d(nn.Module):
@@ -25,30 +28,30 @@ class BlurPool2d(nn.Module):
     Examples:
     """
 
-    def __init__(self, channels=None, blur_filter_size=3, stride=2) -> None:
+    def __init__(self, channels, blur_filter_size=3, stride=2) -> None:
         super(BlurPool2d, self).__init__()
-        assert blur_filter_size in [3, 5]
+        assert blur_filter_size > 1
         self.channels = channels
         self.blur_filter_size = blur_filter_size
         self.stride = stride
 
-        if blur_filter_size == 3:
-            pad_size = [1] * 4
-            blur_matrix = torch.Tensor([[1., 2., 1]]) / 4  # binomial filter b2
-        else:
-            pad_size = [2] * 4
-            blur_matrix = torch.Tensor([[1., 4., 6., 4., 1.]]) / 16  # binomial filter b4
-
+        pad_size = [get_padding(blur_filter_size, stride, dilation=1)] * 4
         self.padding = nn.ReflectionPad2d(pad_size)
-        blur_filter = blur_matrix * blur_matrix.T
+
+        blur_matrix = (np.poly1d((0.5, 0.5)) ** (blur_filter_size - 1)).coeffs
+        blur_filter = torch.Tensor(blur_matrix[:, None] * blur_matrix[None, :])
+        # FIXME figure a clean hack to prevent the filter from getting saved in weights, but still
+        # plays nice with recursive module apply for fn like .cuda(), .type(), etc -RW
         self.register_buffer('blur_filter', blur_filter[None, None, :, :].repeat((self.channels, 1, 1, 1)))
 
     def forward(self, input_tensor: torch.Tensor) -> torch.Tensor:  # type: ignore
         if not torch.is_tensor(input_tensor):
-            raise TypeError("Input input type is not a torch.Tensor. Got {}"
-                            .format(type(input_tensor)))
+            raise TypeError("Input input type is not a torch.Tensor. Got {}".format(type(input_tensor)))
         if not len(input_tensor.shape) == 4:
-            raise ValueError("Invalid input shape, we expect BxCxHxW. Got: {}"
-                             .format(input_tensor.shape))
+            raise ValueError("Invalid input shape, we expect BxCxHxW. Got: {}".format(input_tensor.shape))
         # apply blur_filter on input
-        return F.conv2d(self.padding(input_tensor), self.blur_filter, stride=self.stride, groups=input_tensor.shape[1])
+        return F.conv2d(
+            self.padding(input_tensor),
+            self.blur_filter.type(input_tensor.dtype),
+            stride=self.stride,
+            groups=input_tensor.shape[1])
diff --git a/timm/models/resnet.py b/timm/models/resnet.py
@@ -127,21 +127,14 @@ def __init__(self, inplanes, planes, stride=1, downsample=None, cardinality=1, b
         first_planes = planes // reduce_first
         outplanes = planes * self.expansion
         first_dilation = first_dilation or dilation
-        self.blur = blur
 
-        if blur and stride==2:
-            self.conv1 = nn.Conv2d(
-                inplanes, first_planes, kernel_size=3, stride=1, padding=first_dilation,
-                dilation=first_dilation, bias=False)
-            self.blurpool=BlurPool2d(channels=first_planes)
-        else:
-            self.conv1 = nn.Conv2d(
-            inplanes, first_planes, kernel_size=3, stride=stride, padding=first_dilation,
+        self.conv1 = nn.Conv2d(
+            inplanes, first_planes, kernel_size=3, stride=1 if blur else stride, padding=first_dilation,
             dilation=first_dilation, bias=False)
-            self.blurpool = None
-        
         self.bn1 = norm_layer(first_planes)
         self.act1 = act_layer(inplace=True)
+        self.blurpool = BlurPool2d(channels=first_planes) if stride == 2 and blur else None
+
         self.conv2 = nn.Conv2d(
             first_planes, outplanes, kernel_size=3, padding=dilation, dilation=dilation, bias=False)
         self.bn2 = norm_layer(outplanes)
@@ -165,11 +158,9 @@ def forward(self, x):
         x = self.bn1(x)
         if self.drop_block is not None:
             x = self.drop_block(x)
+        x = self.act1(x)
         if self.blurpool is not None:
-            x = self.act1(x)
             x = self.blurpool(x)
-        else:
-            x = self.act1(x)
 
         x = self.conv2(x)
         x = self.bn2(x)
@@ -209,19 +200,13 @@ def __init__(self, inplanes, planes, stride=1, downsample=None, cardinality=1, b
         self.bn1 = norm_layer(first_planes)
         self.act1 = act_layer(inplace=True)
 
-        if blur and stride==2:
-            self.conv2 = nn.Conv2d(
-                first_planes, width, kernel_size=3, stride=1,
-                padding=first_dilation, dilation=first_dilation, groups=cardinality, bias=False)
-            self.blurpool = BlurPool2d(channels=width)
-        else:
-            self.conv2 = nn.Conv2d(
-                first_planes, width, kernel_size=3, stride=stride,
-                padding=first_dilation, dilation=first_dilation, groups=cardinality, bias=False)
-            self.blurpool = None
-
+        self.conv2 = nn.Conv2d(
+            first_planes, width, kernel_size=3, stride=1 if blur else stride,
+            padding=first_dilation, dilation=first_dilation, groups=cardinality, bias=False)
         self.bn2 = norm_layer(width)
         self.act2 = act_layer(inplace=True)
+        self.blurpool = BlurPool2d(channels=width) if stride == 2 and blur else None
+
         self.conv3 = nn.Conv2d(width, outplanes, kernel_size=1, bias=False)
         self.bn3 = norm_layer(outplanes)
 
@@ -251,6 +236,8 @@ def forward(self, x):
         if self.drop_block is not None:
             x = self.drop_block(x)
         x = self.act2(x)
+        if self.blurpool is not None:
+            x = self.blurpool(x)
 
         x = self.conv3(x)
         x = self.bn3(x)
@@ -412,11 +399,12 @@ def __init__(self, block, layers, num_classes=1000, in_chans=3,
             self.conv1 = nn.Conv2d(in_chans, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False)
         self.bn1 = norm_layer(self.inplanes)
         self.act1 = act_layer(inplace=True)
-        # Stem Blur
+        # Stem Pooling
         if 'max' in blur :
             self.maxpool = nn.Sequential(*[
-            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
-            BlurPool2d(channels=self.inplanes)])
+                nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
+                BlurPool2d(channels=self.inplanes, stride=2)
+            ])
         else :
             self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 
@@ -470,8 +458,8 @@ def _make_layer(self, block, planes, blocks, stride=1, dilation=1, reduce_first=
 
         block_kwargs = dict(
             cardinality=self.cardinality, base_width=self.base_width, reduce_first=reduce_first,
-            dilation=dilation, **kwargs)
-        layers = [block(self.inplanes, planes, stride, downsample, first_dilation=first_dilation, blur=self.blur, **block_kwargs)]
+            dilation=dilation, blur=self.blur, **kwargs)
+        layers = [block(self.inplanes, planes, stride, downsample, first_dilation=first_dilation, **block_kwargs)]
         self.inplanes = planes * block.expansion
         layers += [block(self.inplanes, planes, **block_kwargs) for _ in range(1, blocks)]
 
@@ -1075,7 +1063,7 @@ def resnetblur18(pretrained=False, num_classes=1000, in_chans=3, **kwargs):
 def resnetblur50(pretrained=False, num_classes=1000, in_chans=3, **kwargs):
     """Constructs a ResNet-50 model. With assembled-cnn style blur
     """
-    default_cfg = default_cfgs['resnetblur18']
-    model = ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, in_chans=in_chans, blur='strided', **kwargs)
+    default_cfg = default_cfgs['resnetblur50']
+    model = ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, in_chans=in_chans, blur='max_strided', **kwargs)
     model.default_cfg = default_cfg
     return model