huggingface
diff --git a/‎tests/test_models.py‎
Lines changed: 25 additions & 1 deletion b/‎tests/test_models.py‎
Lines changed: 25 additions & 1 deletion
diff --git a/‎timm/__init__.py‎
Lines changed: 2 additions & 1 deletion b/‎timm/__init__.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎timm/models/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎timm/models/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎timm/models/densenet.py‎
Lines changed: 18 additions & 18 deletions b/‎timm/models/densenet.py‎
Lines changed: 18 additions & 18 deletions
diff --git a/‎timm/models/dpn.py‎
Lines changed: 48 additions & 16 deletions b/‎timm/models/dpn.py‎
Lines changed: 48 additions & 16 deletions
diff --git a/‎timm/models/efficientnet.py‎
Lines changed: 11 additions & 7 deletions b/‎timm/models/efficientnet.py‎
Lines changed: 11 additions & 7 deletions
@@ -4,7 +4,7 @@
 import os
 import fnmatch
 
-from timm import list_models, create_model
+from timm import list_models, create_model, set_scriptable
 
 
 if 'GITHUB_ACTIONS' in os.environ and 'Linux' in platform.system():
@@ -53,6 +53,8 @@ def test_model_backward(model_name, batch_size):
     inputs = torch.randn((batch_size, *input_size))
     outputs = model(inputs)
     outputs.mean().backward()
+    for n, x in model.named_parameters():
+        assert x.grad is not None, f'No gradient for {n}'
     num_grad = sum([x.grad.numel() for x in model.parameters() if x.grad is not None])
 
     assert outputs.shape[-1] == 42
@@ -83,3 +85,25 @@ def test_model_default_cfgs(model_name, batch_size):
         assert outputs.shape[-1] == pool_size[-1] and outputs.shape[-2] == pool_size[-2]
     assert any([k.startswith(classifier) for k in state_dict.keys()]), f'{classifier} not in model params'
     assert any([k.startswith(first_conv) for k in state_dict.keys()]), f'{first_conv} not in model params'
+
+
+EXCLUDE_JIT_FILTERS = [
+    '*iabn*', 'tresnet*',  # models using inplace abn unlikely to ever be scriptable
+    'dla*', 'hrnet*',  # hopefully fix at some point
+]
+
+
+@pytest.mark.timeout(120)
+@pytest.mark.parametrize('model_name', list_models(exclude_filters=EXCLUDE_FILTERS + EXCLUDE_JIT_FILTERS))
+@pytest.mark.parametrize('batch_size', [1])
+def test_model_forward_torchscript(model_name, batch_size):
+    """Run a single forward pass with each model"""
+    with set_scriptable(True):
+        model = create_model(model_name, pretrained=False)
+    model.eval()
+    input_size = (3, 128, 128)  # jit compile is already a bit slow and we've tested normal res already...
+    model = torch.jit.script(model)
+    outputs = model(torch.randn((batch_size, *input_size)))
+
+    assert outputs.shape[0] == batch_size
+    assert not torch.isnan(outputs).any(), 'Output included NaNs'
@@ -1,2 +1,3 @@
 from .version import __version__
-from .models import create_model, list_models, is_model, list_modules, model_entrypoint
+from .models import create_model, list_models, is_model, list_modules, model_entrypoint, \
+    is_scriptable, is_exportable, set_scriptable, set_exportable
@@ -20,9 +20,11 @@
 from .tresnet import *
 from .resnest import *
 from .regnet import *
+from .vovnet import *
 
 from .registry import *
 from .factory import create_model
 from .helpers import load_checkpoint, resume_checkpoint
 from .layers import TestTimePoolHead, apply_test_time_pool
 from .layers import convert_splitbn_model
+from .layers import is_scriptable, is_exportable, set_scriptable, set_exportable, is_no_jit, set_no_jit
@@ -41,13 +41,13 @@ def _cfg(url=''):
 
 
 class DenseLayer(nn.Module):
-    def __init__(self, num_input_features, growth_rate, bn_size, norm_act_layer=BatchNormAct2d,
+    def __init__(self, num_input_features, growth_rate, bn_size, norm_layer=BatchNormAct2d,
                  drop_rate=0., memory_efficient=False):
         super(DenseLayer, self).__init__()
-        self.add_module('norm1', norm_act_layer(num_input_features)),
+        self.add_module('norm1', norm_layer(num_input_features)),
         self.add_module('conv1', nn.Conv2d(
             num_input_features, bn_size * growth_rate, kernel_size=1, stride=1, bias=False)),
-        self.add_module('norm2', norm_act_layer(bn_size * growth_rate)),
+        self.add_module('norm2', norm_layer(bn_size * growth_rate)),
         self.add_module('conv2', nn.Conv2d(
             bn_size * growth_rate, growth_rate, kernel_size=3, stride=1, padding=1, bias=False)),
         self.drop_rate = float(drop_rate)
@@ -109,15 +109,15 @@ def forward(self, x):  # noqa: F811
 class DenseBlock(nn.ModuleDict):
     _version = 2
 
-    def __init__(self, num_layers, num_input_features, bn_size, growth_rate, norm_act_layer=nn.ReLU,
+    def __init__(self, num_layers, num_input_features, bn_size, growth_rate, norm_layer=nn.ReLU,
                  drop_rate=0., memory_efficient=False):
         super(DenseBlock, self).__init__()
         for i in range(num_layers):
             layer = DenseLayer(
                 num_input_features + i * growth_rate,
                 growth_rate=growth_rate,
                 bn_size=bn_size,
-                norm_act_layer=norm_act_layer,
+                norm_layer=norm_layer,
                 drop_rate=drop_rate,
                 memory_efficient=memory_efficient,
             )
@@ -132,9 +132,9 @@ def forward(self, init_features):
 
 
 class DenseTransition(nn.Sequential):
-    def __init__(self, num_input_features, num_output_features, norm_act_layer=nn.BatchNorm2d, aa_layer=None):
+    def __init__(self, num_input_features, num_output_features, norm_layer=nn.BatchNorm2d, aa_layer=None):
         super(DenseTransition, self).__init__()
-        self.add_module('norm', norm_act_layer(num_input_features))
+        self.add_module('norm', norm_layer(num_input_features))
         self.add_module('conv', nn.Conv2d(
             num_input_features, num_output_features, kernel_size=1, stride=1, bias=False))
         if aa_layer is not None:
@@ -160,7 +160,7 @@ class DenseNet(nn.Module):
 
     def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16), bn_size=4, stem_type='',
                  num_classes=1000, in_chans=3, global_pool='avg',
-                 norm_act_layer=BatchNormAct2d, aa_layer=None, drop_rate=0, memory_efficient=False):
+                 norm_layer=BatchNormAct2d, aa_layer=None, drop_rate=0, memory_efficient=False):
         self.num_classes = num_classes
         self.drop_rate = drop_rate
         super(DenseNet, self).__init__()
@@ -181,17 +181,17 @@ def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16), bn_size=4, stem
                 stem_chs_2 = num_init_features if 'narrow' in stem_type else 6 * (growth_rate // 4)
             self.features = nn.Sequential(OrderedDict([
                 ('conv0', nn.Conv2d(in_chans, stem_chs_1, 3, stride=2, padding=1, bias=False)),
-                ('norm0', norm_act_layer(stem_chs_1)),
+                ('norm0', norm_layer(stem_chs_1)),
                 ('conv1', nn.Conv2d(stem_chs_1, stem_chs_2, 3, stride=1, padding=1, bias=False)),
-                ('norm1', norm_act_layer(stem_chs_2)),
+                ('norm1', norm_layer(stem_chs_2)),
                 ('conv2', nn.Conv2d(stem_chs_2, num_init_features, 3, stride=1, padding=1, bias=False)),
-                ('norm2', norm_act_layer(num_init_features)),
+                ('norm2', norm_layer(num_init_features)),
                 ('pool0', stem_pool),
             ]))
         else:
             self.features = nn.Sequential(OrderedDict([
                 ('conv0', nn.Conv2d(in_chans, num_init_features, kernel_size=7, stride=2, padding=3, bias=False)),
-                ('norm0', norm_act_layer(num_init_features)),
+                ('norm0', norm_layer(num_init_features)),
                 ('pool0', stem_pool),
             ]))
 
@@ -203,7 +203,7 @@ def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16), bn_size=4, stem
                 num_input_features=num_features,
                 bn_size=bn_size,
                 growth_rate=growth_rate,
-                norm_act_layer=norm_act_layer,
+                norm_layer=norm_layer,
                 drop_rate=drop_rate,
                 memory_efficient=memory_efficient
             )
@@ -212,12 +212,12 @@ def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16), bn_size=4, stem
             if i != len(block_config) - 1:
                 trans = DenseTransition(
                     num_input_features=num_features, num_output_features=num_features // 2,
-                    norm_act_layer=norm_act_layer)
+                    norm_layer=norm_layer)
                 self.features.add_module('transition%d' % (i + 1), trans)
                 num_features = num_features // 2
 
         # Final batch norm
-        self.features.add_module('norm5', norm_act_layer(num_features))
+        self.features.add_module('norm5', norm_layer(num_features))
 
         # Linear layer
         self.num_features = num_features
@@ -346,7 +346,7 @@ def norm_act_fn(num_features, **kwargs):
         return create_norm_act('EvoNormBatch', num_features, jit=True, **kwargs)
     model = _densenet(
         'densenet121d', growth_rate=32, block_config=(6, 12, 24, 16), stem_type='deep',
-        norm_act_layer=norm_act_fn, pretrained=pretrained, **kwargs)
+        norm_layer=norm_act_fn, pretrained=pretrained, **kwargs)
     return model
 
 
@@ -359,7 +359,7 @@ def norm_act_fn(num_features, **kwargs):
         return create_norm_act('EvoNormSample', num_features, jit=True, **kwargs)
     model = _densenet(
         'densenet121d', growth_rate=32, block_config=(6, 12, 24, 16), stem_type='deep',
-        norm_act_layer=norm_act_fn, pretrained=pretrained, **kwargs)
+        norm_layer=norm_act_fn, pretrained=pretrained, **kwargs)
     return model
 
 
@@ -372,7 +372,7 @@ def norm_act_fn(num_features, **kwargs):
         return create_norm_act('iabn', num_features, **kwargs)
     model = _densenet(
         'densenet121tn', growth_rate=32, block_config=(6, 12, 24, 16), stem_type='deep',
-        norm_act_layer=norm_act_fn, pretrained=pretrained, **kwargs)
+        norm_layer=norm_act_fn, pretrained=pretrained, **kwargs)
     return model
 
 
 
@@ -10,6 +10,7 @@
 from __future__ import print_function
 
 from collections import OrderedDict
+from typing import Union, Optional, List, Tuple
 
 import torch
 import torch.nn as nn
@@ -54,8 +55,19 @@ def __init__(self, in_chs, activation_fn=nn.ReLU(inplace=True)):
         self.bn = nn.BatchNorm2d(in_chs, eps=0.001)
         self.act = activation_fn
 
+    @torch.jit._overload_method  # noqa: F811
     def forward(self, x):
-        x = torch.cat(x, dim=1) if isinstance(x, tuple) else x
+        # type: (Tuple[torch.Tensor, torch.Tensor]) -> (torch.Tensor)
+        pass
+
+    @torch.jit._overload_method  # noqa: F811
+    def forward(self, x):
+        # type: (torch.Tensor) -> (torch.Tensor)
+        pass
+
+    def forward(self, x):
+        if isinstance(x, tuple):
+            x = torch.cat(x, dim=1)
         return self.act(self.bn(x))
 
 
@@ -107,6 +119,8 @@ def __init__(
             self.key_stride = 1
             self.has_proj = False
 
+        self.c1x1_w_s1 = None
+        self.c1x1_w_s2 = None
         if self.has_proj:
             # Using different member names here to allow easier parameter key matching for conversion
             if self.key_stride == 2:
@@ -115,6 +129,7 @@ def __init__(
             else:
                 self.c1x1_w_s1 = BnActConv2d(
                     in_chs=in_chs, out_chs=num_1x1_c + 2 * inc, kernel_size=1, stride=1)
+
         self.c1x1_a = BnActConv2d(in_chs=in_chs, out_chs=num_1x1_a, kernel_size=1, stride=1)
         self.c3x3_b = BnActConv2d(
             in_chs=num_1x1_a, out_chs=num_3x3_b, kernel_size=3,
@@ -125,27 +140,46 @@ def __init__(
             self.c1x1_c2 = nn.Conv2d(num_3x3_b, inc, kernel_size=1, bias=False)
         else:
             self.c1x1_c = BnActConv2d(in_chs=num_3x3_b, out_chs=num_1x1_c + inc, kernel_size=1, stride=1)
+            self.c1x1_c1 = None
+            self.c1x1_c2 = None
 
+    @torch.jit._overload_method  # noqa: F811
     def forward(self, x):
-        x_in = torch.cat(x, dim=1) if isinstance(x, tuple) else x
-        if self.has_proj:
-            if self.key_stride == 2:
-                x_s = self.c1x1_w_s2(x_in)
-            else:
-                x_s = self.c1x1_w_s1(x_in)
-            x_s1 = x_s[:, :self.num_1x1_c, :, :]
-            x_s2 = x_s[:, self.num_1x1_c:, :, :]
+        # type: (Tuple[torch.Tensor, torch.Tensor]) -> Tuple[torch.Tensor, torch.Tensor]
+        pass
+
+    @torch.jit._overload_method  # noqa: F811
+    def forward(self, x):
+        # type: (torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]
+        pass
+
+    def forward(self, x) -> Tuple[torch.Tensor, torch.Tensor]:
+        if isinstance(x, tuple):
+            x_in = torch.cat(x, dim=1)
         else:
+            x_in = x
+        if self.c1x1_w_s1 is None and self.c1x1_w_s2 is None:
+            # self.has_proj == False, torchscript requires condition on module == None
             x_s1 = x[0]
             x_s2 = x[1]
+        else:
+            # self.has_proj == True
+            if self.c1x1_w_s1 is not None:
+                # self.key_stride = 1
+                x_s = self.c1x1_w_s1(x_in)
+            else:
+                # self.key_stride = 2
+                x_s = self.c1x1_w_s2(x_in)
+            x_s1 = x_s[:, :self.num_1x1_c, :, :]
+            x_s2 = x_s[:, self.num_1x1_c:, :, :]
         x_in = self.c1x1_a(x_in)
         x_in = self.c3x3_b(x_in)
-        if self.b:
-            x_in = self.c1x1_c(x_in)
+        x_in = self.c1x1_c(x_in)
+        if self.c1x1_c1 is not None:
+            # self.b == True, using None check for torchscript compat
             out1 = self.c1x1_c1(x_in)
             out2 = self.c1x1_c2(x_in)
         else:
-            x_in = self.c1x1_c(x_in)
             out1 = x_in[:, :self.num_1x1_c, :, :]
             out2 = x_in[:, self.num_1x1_c:, :, :]
         resid = x_s1 + out1
@@ -167,11 +201,9 @@ def __init__(self, small=False, num_init_features=64, k_r=96, groups=32,
 
         # conv1
         if small:
-            blocks['conv1_1'] = InputBlock(
-                num_init_features, in_chans=in_chans, kernel_size=3, padding=1)
+            blocks['conv1_1'] = InputBlock(num_init_features, in_chans=in_chans, kernel_size=3, padding=1)
         else:
-            blocks['conv1_1'] = InputBlock(
-                num_init_features, in_chans=in_chans, kernel_size=7, padding=3)
+            blocks['conv1_1'] = InputBlock(num_init_features, in_chans=in_chans, kernel_size=7, padding=3)
 
         # conv2
         bw = 64 * bw_factor
 
@@ -24,11 +24,15 @@
 
 Hacked together by Ross Wightman
 """
+import torch.nn as nn
+import torch.nn.functional as F
+
 from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD
-from .efficientnet_builder import *
+from .efficientnet_blocks import round_channels, resolve_bn_args, resolve_act_layer, BN_EPS_TF_DEFAULT
+from .efficientnet_builder import EfficientNetBuilder, decode_arch_def, efficientnet_init_weights
 from .feature_hooks import FeatureHooks
 from .helpers import load_pretrained, adapt_model_from_file
-from .layers import SelectAdaptivePool2d
+from .layers import SelectAdaptivePool2d, create_conv2d
 from .registry import register_model
 
 __all__ = ['EfficientNet']
@@ -631,7 +635,7 @@ def _gen_mobilenet_v2(
         fix_stem=fix_stem_head,
         channel_multiplier=channel_multiplier,
         norm_kwargs=resolve_bn_args(kwargs),
-        act_layer=nn.ReLU6,
+        act_layer=resolve_act_layer(kwargs, 'relu6'),
         **kwargs
     )
     model = _create_model(model_kwargs, default_cfgs[variant], pretrained)
@@ -741,7 +745,7 @@ def _gen_efficientnet(variant, channel_multiplier=1.0, depth_multiplier=1.0, pre
         num_features=round_channels(1280, channel_multiplier, 8, None),
         stem_size=32,
         channel_multiplier=channel_multiplier,
-        act_layer=Swish,
+        act_layer=resolve_act_layer(kwargs, 'swish'),
         norm_kwargs=resolve_bn_args(kwargs),
         variant=variant,
         **kwargs,
@@ -772,7 +776,7 @@ def _gen_efficientnet_edge(variant, channel_multiplier=1.0, depth_multiplier=1.0
         stem_size=32,
         channel_multiplier=channel_multiplier,
         norm_kwargs=resolve_bn_args(kwargs),
-        act_layer=nn.ReLU,
+        act_layer=resolve_act_layer(kwargs, 'relu'),
         **kwargs,
     )
     model = _create_model(model_kwargs, default_cfgs[variant], pretrained)
@@ -802,7 +806,7 @@ def _gen_efficientnet_condconv(
         stem_size=32,
         channel_multiplier=channel_multiplier,
         norm_kwargs=resolve_bn_args(kwargs),
-        act_layer=Swish,
+        act_layer=resolve_act_layer(kwargs, 'swish'),
         **kwargs,
     )
     model = _create_model(model_kwargs, default_cfgs[variant], pretrained)
@@ -842,7 +846,7 @@ def _gen_efficientnet_lite(variant, channel_multiplier=1.0, depth_multiplier=1.0
         stem_size=32,
         fix_stem=True,
         channel_multiplier=channel_multiplier,
-        act_layer=nn.ReLU6,
+        act_layer=resolve_act_layer(kwargs, 'relu6'),
         norm_kwargs=resolve_bn_args(kwargs),
         **kwargs,
     )