Add layer scale to hieradet

rwightman · rwightman · commit 17923a66bb27 · 2024-08-21T11:23:39.000-07:00
diff --git a/timm/models/hieradet_sam2.py b/timm/models/hieradet_sam2.py
@@ -1,4 +1,5 @@
 import math
+from copy import deepcopy
 from functools import partial
 from typing import Callable, Dict, List, Optional, Tuple, Union
 
@@ -8,7 +9,7 @@
 from torch.jit import Final
 
 from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
-from timm.layers import PatchEmbed, Mlp, DropPath, ClNormMlpClassifierHead, PatchDropout, \
+from timm.layers import PatchEmbed, Mlp, DropPath, ClNormMlpClassifierHead, LayerScale, \
     get_norm_layer, get_act_layer, init_weight_jax, init_weight_vit, to_2tuple, use_fused_attn
 
 from ._builder import build_model_with_cfg
@@ -121,11 +122,12 @@ def __init__(
         dim_out: int,
         num_heads: int,
         mlp_ratio: float = 4.0,
-        drop_path: float = 0.0,
         q_stride: Optional[Tuple[int, int]] = None,
         norm_layer: Union[nn.Module, str] = "LayerNorm",
         act_layer: Union[nn.Module, str] = "GELU",
         window_size: int = 0,
+        init_values: Optional[float] = None,
+        drop_path: float = 0.0,
     ):
         super().__init__()
         norm_layer = get_norm_layer(norm_layer)
@@ -135,43 +137,38 @@ def __init__(
         self.dim = dim
         self.dim_out = dim_out
         self.q_stride = q_stride
+
+        if dim != dim_out:
+            self.proj = nn.Linear(dim, dim_out)
+        else:
+            self.proj = nn.Identity()
+        self.pool = None
         if self.q_stride:
-            q_pool = nn.MaxPool2d(
+            # note make a different instance for this Module so that it's not shared with attn module
+            self.pool = nn.MaxPool2d(
                 kernel_size=q_stride,
                 stride=q_stride,
                 ceil_mode=False,
             )
-        else:
-            q_pool = None
 
         self.norm1 = norm_layer(dim)
         self.attn = MultiScaleAttention(
             dim,
             dim_out,
             num_heads=num_heads,
-            q_pool=q_pool,
+            q_pool=deepcopy(self.pool),
         )
-        self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
+        self.ls1 = LayerScale(dim_out, init_values) if init_values is not None else nn.Identity()
+        self.drop_path1 = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
 
         self.norm2 = norm_layer(dim_out)
         self.mlp = Mlp(
             dim_out,
             int(dim_out * mlp_ratio),
             act_layer=act_layer,
         )
-
-        if dim != dim_out:
-            self.proj = nn.Linear(dim, dim_out)
-        else:
-            self.proj = nn.Identity()
-        self.pool = None
-        if self.q_stride:
-            # note make a different instance for this Module so that it's not shared with attn module
-            self.pool = nn.MaxPool2d(
-                kernel_size=q_stride,
-                stride=q_stride,
-                ceil_mode=False,
-            )
+        self.ls2 = LayerScale(dim_out, init_values) if init_values is not None else nn.Identity()
+        self.drop_path2 = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
         shortcut = x  # B, H, W, C
@@ -206,9 +203,8 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
             x = window_unpartition(x, window_size, (Hp, Wp))
             x = x[:, :H, :W, :].contiguous()  # unpad
 
-        x = shortcut + self.drop_path(x)
-
-        x = x + self.drop_path(self.mlp(self.norm2(x)))
+        x = shortcut + self.drop_path1(self.ls1(x))
+        x = x + self.drop_path2(self.ls2(self.mlp(self.norm2(x))))
         return x
 
 
@@ -280,6 +276,7 @@ def __init__(
                 16,
                 20,
             ),
+            init_values: Optional[float] = None,
             weight_init: str = '',
             fix_init: bool = True,
             head_init_scale: float = 0.001,
@@ -628,7 +625,7 @@ def sam2_hiera_large(pretrained=False, **kwargs):
 
 @register_model
 def hieradet_small(pretrained=False, **kwargs):
-    model_args = dict(stages=(1, 2, 11, 2), global_att_blocks=(7, 10, 13), window_spec=(8, 4, 16, 8))
+    model_args = dict(stages=(1, 2, 11, 2), global_att_blocks=(7, 10, 13), window_spec=(8, 4, 16, 8), init_values=1e-5)
     return _create_hiera_det('hieradet_small', pretrained=pretrained, **dict(model_args, **kwargs))