ya basic & bottle

ayasyrev · ayasyrev · commit 8f4b92ae3f66 · 2022-12-27T17:06:09.000+03:00
diff --git a/src/model_constructor/universal_blocks.py b/src/model_constructor/universal_blocks.py
@@ -140,7 +140,9 @@ def forward(self, x: torch.Tensor):  # type: ignore
 
 
 class YaResBlock(nn.Module):
-    """YaResBlock. Reduce by pool instead of stride 2"""
+    """YaResBlock. Reduce by pool instead of stride 2.
+    Universal model, as XResNet.
+    If `expansion=1` - `Basic` block, else - `Bottleneck`"""
 
     def __init__(
         self,
diff --git a/src/model_constructor/yaresnet.py b/src/model_constructor/yaresnet.py
@@ -4,44 +4,48 @@
 from collections import OrderedDict
 from typing import Callable, Union
 
-import torch.nn as nn
+import torch
+from torch import nn
 from torch.nn import Mish
 
+from model_constructor.helpers import nn_seq
+
 from .layers import ConvBnAct, get_act
-from .model_constructor import ModelConstructor
+from .model_constructor import ListStrMod, ModelConstructor
 
 __all__ = [
-    "YaResBlock",
+    "YaBasicBlock",
+    "YaBottleneckBlock",
+    "YaResNet",
     "YaResNet34",
     "YaResNet50",
 ]
 
 
-class YaResBlock(nn.Module):
-    """YaResBlock. Reduce by pool instead of stride 2"""
+class YaBasicBlock(nn.Module):
+    """Ya Basic block.
+    Reduce by pool instead of stride 2"""
 
     def __init__(
         self,
-        expansion: int,
         in_channels: int,
-        mid_channels: int,
+        out_channels: int,
         stride: int = 1,
-        conv_layer=ConvBnAct,
+        conv_layer: type[ConvBnAct] = ConvBnAct,
         act_fn: type[nn.Module] = nn.ReLU,
         zero_bn: bool = True,
         bn_1st: bool = True,
         groups: int = 1,
         dw: bool = False,
         div_groups: Union[None, int] = None,
         pool: Union[Callable[[], nn.Module], None] = None,
-        se: Union[type[nn.Module], None] = None,
-        sa: Union[type[nn.Module], None] = None,
+        se: Union[nn.Module, None] = None,
+        sa: Union[nn.Module, None] = None,
     ):
         super().__init__()
         # pool defined at ModelConstructor.
-        out_channels, in_channels = mid_channels * expansion, in_channels * expansion
         if div_groups is not None:  # check if groups != 1 and div_groups
-            groups = int(mid_channels / div_groups)
+            groups = int(out_channels / div_groups)
 
         if stride != 1:
             if pool is None:
@@ -51,74 +55,133 @@ def __init__(
                 self.reduce = pool()
         else:
             self.reduce = None
-        if expansion == 1:
-            layers = [
-                (
-                    "conv_0",
-                    conv_layer(
-                        in_channels,
-                        mid_channels,
-                        3,
-                        stride=1,
-                        act_fn=act_fn,
-                        bn_1st=bn_1st,
-                        groups=in_channels if dw else groups,
-                    ),
+
+        layers: ListStrMod = [
+            (
+                "conv_0",
+                conv_layer(
+                    in_channels,
+                    out_channels,
+                    3,
+                    act_fn=act_fn,
+                    bn_1st=bn_1st,
+                    groups=in_channels if dw else groups,
                 ),
-                (
-                    "conv_1",
-                    conv_layer(
-                        mid_channels,
-                        out_channels,
-                        3,
-                        zero_bn=zero_bn,
-                        act_fn=False,
-                        bn_1st=bn_1st,
-                        groups=mid_channels if dw else groups,
-                    ),
+            ),
+            (
+                "conv_1",
+                conv_layer(
+                    out_channels,
+                    out_channels,
+                    3,
+                    zero_bn=zero_bn,
+                    act_fn=False,
+                    bn_1st=bn_1st,
+                    groups=out_channels if dw else groups,
                 ),
-            ]
+            ),
+        ]
+        if se:
+            layers.append(("se", se(out_channels)))
+        if sa:
+            layers.append(("sa", sa(out_channels)))
+        self.convs = nn_seq(layers)
+
+        if in_channels != out_channels:
+            self.id_conv = conv_layer(
+                in_channels,
+                out_channels,
+                1,
+                stride=1,
+                act_fn=False,
+            )
         else:
-            layers = [
-                (
-                    "conv_0",
-                    conv_layer(
-                        in_channels,
-                        mid_channels,
-                        1,
-                        act_fn=act_fn,
-                        bn_1st=bn_1st,
-                    ),
+            self.id_conv = None
+        self.merge = get_act(act_fn)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:  # type: ignore
+        if self.reduce:
+            x = self.reduce(x)
+        identity = self.id_conv(x) if self.id_conv is not None else x
+        return self.merge(self.convs(x) + identity)
+
+
+class YaBottleneckBlock(nn.Module):
+    """Ya Bottleneck block.
+    Reduce by pool instead of stride 2"""
+
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        stride: int = 1,
+        expansion: int = 4,
+        conv_layer: type[ConvBnAct] = ConvBnAct,
+        act_fn: type[nn.Module] = nn.ReLU,
+        zero_bn: bool = True,
+        bn_1st: bool = True,
+        groups: int = 1,
+        dw: bool = False,
+        div_groups: Union[None, int] = None,
+        pool: Union[Callable[[], nn.Module], None] = None,
+        se: Union[nn.Module, None] = None,
+        sa: Union[nn.Module, None] = None,
+    ):
+        super().__init__()
+        # pool defined at ModelConstructor.
+        mid_channels = out_channels // expansion
+        if div_groups is not None:  # check if groups != 1 and div_groups
+            groups = int(mid_channels / div_groups)
+
+        if stride != 1:
+            if pool is None:
+                self.reduce = conv_layer(in_channels, in_channels, 1, stride=2)
+                # warnings.warn("pool not passed")  # need to warn?
+            else:
+                self.reduce = pool()
+        else:
+            self.reduce = None
+
+        layers: ListStrMod = [
+            (
+                "conv_0",
+                conv_layer(
+                    in_channels,
+                    mid_channels,
+                    1,
+                    act_fn=act_fn,
+                    bn_1st=bn_1st,
+                ),
+            ),
+            (
+                "conv_1",
+                conv_layer(
+                    mid_channels,
+                    mid_channels,
+                    3,
+                    act_fn=act_fn,
+                    bn_1st=bn_1st,
+                    groups=mid_channels if dw else groups,
                 ),
-                (
-                    "conv_1",
-                    conv_layer(
-                        mid_channels,
-                        mid_channels,
-                        3,
-                        stride=1,
-                        act_fn=act_fn,
-                        bn_1st=bn_1st,
-                        groups=mid_channels if dw else groups,
-                    ),
+            ),
+            (
+                "conv_2",
+                conv_layer(
+                    mid_channels,
+                    out_channels,
+                    1,
+                    zero_bn=zero_bn,
+                    act_fn=False,
+                    bn_1st=bn_1st,
                 ),
-                (
-                    "conv_2",
-                    conv_layer(
-                        mid_channels,
-                        out_channels,
-                        1,
-                        zero_bn=zero_bn,
-                        act_fn=False,
-                        bn_1st=bn_1st,
-                    ),
-                ),  # noqa E501
-            ]
+            ),
+        ]
         if se:
-            layers.append(("se", se(out_channels)))  # type: ignore
+            layers.append(("se", se(out_channels)))
         if sa:
-            layers.append(("sa", sa(out_channels)))  # type: ignore
-        self.convs = nn.Sequential(OrderedDict(layers))
+            layers.append(("sa", sa(out_channels)))
+        self.convs = nn_seq(layers)
+
         if in_channels != out_channels:
             self.id_conv = conv_layer(
                 in_channels,
@@ -131,20 +194,23 @@ def __init__(
             self.id_conv = None
         self.merge = get_act(act_fn)
 
-    def forward(self, x):
+    def forward(self, x: torch.Tensor) -> torch.Tensor:  # type: ignore
         if self.reduce:
             x = self.reduce(x)
         identity = self.id_conv(x) if self.id_conv is not None else x
         return self.merge(self.convs(x) + identity)
 
 
-class YaResNet34(ModelConstructor):
-    block: type[nn.Module] = YaResBlock
-    expansion: int = 1
-    layers: list[int] = [3, 4, 6, 3]
+class YaResNet(ModelConstructor):
+    block: type[nn.Module] = YaBasicBlock
     stem_sizes: list[int] = [3, 32, 64, 64]
     act_fn: type[nn.Module] = Mish
 
 
+class YaResNet34(YaResNet):
+    stem_sizes: list[int] = [3, 32, 64, 64]
+
+
 class YaResNet50(YaResNet34):
-    expansion: int = 4
+    block: type[nn.Module] = YaBottleneckBlock
+    block_sizes: list[int] = [256, 512, 1024, 2048]
diff --git a/tests/test_blocks.py b/tests/test_blocks.py
@@ -6,6 +6,7 @@
 
 from model_constructor.layers import SEModule, SimpleSelfAttention
 from model_constructor.model_constructor import BasicBlock, BottleneckBlock
+from model_constructor.yaresnet import YaBasicBlock, YaBottleneckBlock
 
 from .parameters import ids_fn
 
@@ -14,8 +15,12 @@
 
 
 params = dict(
-    Block=[BasicBlock, BottleneckBlock],
-    # expansion=[1, 2],
+    Block=[
+        BasicBlock,
+        BottleneckBlock,
+        YaBasicBlock,
+        YaBottleneckBlock,
+    ],
     out_channels=[8, 16],
     stride=[1, 2],
     div_groups=[None, 2],
@@ -34,7 +39,6 @@ def pytest_generate_tests(metafunc):
 def test_block(Block, out_channels, stride, div_groups, pool, se, sa):
     """test block"""
     in_channels = 8
-    # out_channels = mid_channels * expansion
     block = Block(
         in_channels,
         out_channels,
@@ -48,3 +52,18 @@ def test_block(Block, out_channels, stride, div_groups, pool, se, sa):
     out = block(xb)
     out_size = img_size if stride == 1 else img_size // stride
     assert out.shape == torch.Size([bs_test, out_channels, out_size, out_size])
+
+
+def test_block_dw(Block, out_channels, stride):
+    """test block, dw=1"""
+    in_channels = 8
+    block = Block(
+        in_channels,
+        out_channels,
+        stride,
+        dw=1,
+    )
+    xb = torch.randn(bs_test, in_channels, img_size, img_size)
+    out = block(xb)
+    out_size = img_size if stride == 1 else img_size // stride
+    assert out.shape == torch.Size([bs_test, out_channels, out_size, out_size])
diff --git a/tests/test_blocks_universal.py b/tests/test_blocks_universal.py
@@ -49,3 +49,20 @@ def test_block(Block, expansion, mid_channels, stride, div_groups, pool, se, sa)
     out = block(xb)
     out_size = img_size if stride == 1 else img_size // stride
     assert out.shape == torch.Size([bs_test, out_channels, out_size, out_size])
+
+
+def test_block_dw(Block, expansion, mid_channels, stride):
+    """test block, dw=1"""
+    in_channels = 8
+    out_channels = mid_channels * expansion
+    block = Block(
+        expansion,
+        in_channels,
+        mid_channels,
+        stride,
+        dw=1,
+    )
+    xb = torch.randn(bs_test, in_channels * expansion, img_size, img_size)
+    out = block(xb)
+    out_size = img_size if stride == 1 else img_size // stride
+    assert out.shape == torch.Size([bs_test, out_channels, out_size, out_size])
diff --git a/tests/test_mc.py b/tests/test_mc.py
@@ -2,10 +2,8 @@
 
 import torch
 
-from model_constructor.layers import (SEModule, SEModuleConv,
-                                      SimpleSelfAttention)
-from model_constructor.model_constructor import (BottleneckBlock,
-                                                 ModelConstructor)
+from model_constructor.layers import SEModule, SEModuleConv, SimpleSelfAttention
+from model_constructor.model_constructor import BottleneckBlock, ModelConstructor
 
 bs_test = 4
 in_chans = 3
diff --git a/tests/test_models.py b/tests/test_models.py