huggingface
diff --git a/‎sotabench.py‎
Lines changed: 9 additions & 0 deletions b/‎sotabench.py‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎tests/test_inference.py‎
Lines changed: 0 additions & 19 deletions b/‎tests/test_inference.py‎
Lines changed: 0 additions & 19 deletions
diff --git a/‎tests/test_models.py‎
Lines changed: 85 additions & 0 deletions b/‎tests/test_models.py‎
Lines changed: 85 additions & 0 deletions
diff --git a/‎timm/models/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎timm/models/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎timm/models/dla.py‎
Lines changed: 7 additions & 3 deletions b/‎timm/models/dla.py‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎timm/models/gluon_xception.py‎
Lines changed: 1 addition & 1 deletion b/‎timm/models/gluon_xception.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎timm/models/hrnet.py‎
Lines changed: 1 addition & 1 deletion b/‎timm/models/hrnet.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎timm/models/inception_v3.py‎
Lines changed: 1 addition & 1 deletion b/‎timm/models/inception_v3.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎timm/models/layers/se.py‎
Lines changed: 2 additions & 2 deletions b/‎timm/models/layers/se.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎timm/models/mobilenetv3.py‎
Lines changed: 1 addition & 1 deletion b/‎timm/models/mobilenetv3.py‎
Lines changed: 1 addition & 1 deletion
@@ -126,6 +126,15 @@ def _entry(model_name, paper_model_name, paper_arxiv_id, batch_size=BATCH_SIZE,
     _entry('skresnet34', 'SK-ResNet-34', '1903.06586'),
     _entry('skresnext50_32x4d', 'SKNet-50', '1903.06586'),
 
+    _entry('ecaresnetlight', 'ECA-ResNet-Light', '1910.03151',
+           model_desc='A tweaked ResNet50d with ECA attn.'),
+    _entry('ecaresnet50d', 'ECA-ResNet-50d', '1910.03151',
+           model_desc='A ResNet50d with ECA attn'),
+    _entry('ecaresnet101d', 'ECA-ResNet-101d', '1910.03151',
+           model_desc='A ResNet101d with ECA attn'),
+
+    _entry('resnetblur50', 'ResNet-Blur-50', '1904.11486'),
+
     _entry('tf_efficientnet_b0', 'EfficientNet-B0 (AutoAugment)', '1905.11946',
            model_desc='Ported from official Google AI Tensorflow weights'),
     _entry('tf_efficientnet_b1', 'EfficientNet-B1 (AutoAugment)', '1905.11946',
 
@@ -0,0 +1,85 @@
+import pytest
+import torch
+import platform
+import os
+import fnmatch
+
+from timm import list_models, create_model
+
+
+if 'GITHUB_ACTIONS' in os.environ and 'Linux' in platform.system():
+    # GitHub Linux runner is slower and hits memory limits sooner than MacOS, exclude bigger models
+    EXCLUDE_FILTERS = ['*efficientnet_l2*', '*resnext101_32x48d']
+else:
+    EXCLUDE_FILTERS = []
+MAX_FWD_SIZE = 384
+MAX_BWD_SIZE = 128
+MAX_FWD_FEAT_SIZE = 448
+
+
+@pytest.mark.timeout(120)
+@pytest.mark.parametrize('model_name', list_models(exclude_filters=EXCLUDE_FILTERS))
+@pytest.mark.parametrize('batch_size', [1])
+def test_model_forward(model_name, batch_size):
+    """Run a single forward pass with each model"""
+    model = create_model(model_name, pretrained=False)
+    model.eval()
+
+    input_size = model.default_cfg['input_size']
+    if any([x > MAX_FWD_SIZE for x in input_size]):
+        # cap forward test at max res 448 * 448 to keep resource down
+        input_size = tuple([min(x, MAX_FWD_SIZE) for x in input_size])
+    inputs = torch.randn((batch_size, *input_size))
+    outputs = model(inputs)
+
+    assert outputs.shape[0] == batch_size
+    assert not torch.isnan(outputs).any(), 'Output included NaNs'
+
+
+@pytest.mark.timeout(120)
+# DLA models have an issue TBD, add them to exclusions
+@pytest.mark.parametrize('model_name', list_models(exclude_filters=EXCLUDE_FILTERS + ['dla*']))
+@pytest.mark.parametrize('batch_size', [2])
+def test_model_backward(model_name, batch_size):
+    """Run a single forward pass with each model"""
+    model = create_model(model_name, pretrained=False, num_classes=42)
+    num_params = sum([x.numel() for x in model.parameters()])
+    model.eval()
+
+    input_size = model.default_cfg['input_size']
+    if any([x > MAX_BWD_SIZE for x in input_size]):
+        # cap backward test at 128 * 128 to keep resource usage down
+        input_size = tuple([min(x, MAX_BWD_SIZE) for x in input_size])
+    inputs = torch.randn((batch_size, *input_size))
+    outputs = model(inputs)
+    outputs.mean().backward()
+    num_grad = sum([x.grad.numel() for x in model.parameters() if x.grad is not None])
+
+    assert outputs.shape[-1] == 42
+    assert num_params == num_grad, 'Some parameters are missing gradients'
+    assert not torch.isnan(outputs).any(), 'Output included NaNs'
+
+
+@pytest.mark.timeout(120)
+@pytest.mark.parametrize('model_name', list_models())
+@pytest.mark.parametrize('batch_size', [1])
+def test_model_default_cfgs(model_name, batch_size):
+    """Run a single forward pass with each model"""
+    model = create_model(model_name, pretrained=False)
+    model.eval()
+    state_dict = model.state_dict()
+    cfg = model.default_cfg
+
+    classifier = cfg['classifier']
+    first_conv = cfg['first_conv']
+    pool_size = cfg['pool_size']
+    input_size = model.default_cfg['input_size']
+
+    if all([x <= MAX_FWD_FEAT_SIZE for x in input_size]) and \
+            not any([fnmatch.fnmatch(model_name, x) for x in EXCLUDE_FILTERS]):
+        # pool size only checked if default res <= 448 * 448 to keep resource down
+        input_size = tuple([min(x, MAX_FWD_FEAT_SIZE) for x in input_size])
+        outputs = model.forward_features(torch.randn((batch_size, *input_size)))
+        assert outputs.shape[-1] == pool_size[-1] and outputs.shape[-2] == pool_size[-2]
+    assert any([k.startswith(classifier) for k in state_dict.keys()]), f'{classifier} not in model params'
+    assert any([k.startswith(first_conv) for k in state_dict.keys()]), f'{first_conv} not in model params'
@@ -19,6 +19,7 @@
 from .sknet import *
 from .tresnet import *
 from .resnest import *
+from .regnet import *
 
 from .registry import *
 from .factory import create_model
 
@@ -237,8 +237,11 @@ def __init__(self, levels, block, in_channels, out_channels, stride=1,
 
     def forward(self, x, residual=None, children=None):
         children = [] if children is None else children
-        bottom = self.downsample(x) if self.downsample else x
-        residual = self.project(bottom) if self.project else bottom
+        # FIXME the way downsample / project are used here and residual is passed to next level up
+        # the tree, the residual is overridden and some project weights are thus never used and
+        # have no gradients. This appears to be an issue with the original model / weights.
+        bottom = self.downsample(x) if self.downsample is not None else x
+        residual = self.project(bottom) if self.project is not None else bottom
         if self.level_root:
             children.append(bottom)
         x1 = self.tree1(x, residual)
@@ -355,7 +358,8 @@ def dla60_res2next(pretrained=None, num_classes=1000, in_chans=3, **kwargs):
 @register_model
 def dla34(pretrained=None, num_classes=1000, in_chans=3, **kwargs):  # DLA-34
     default_cfg = default_cfgs['dla34']
-    model = DLA([1, 1, 1, 2, 2, 1], [16, 32, 64, 128, 256, 512], block=DlaBasic, **kwargs)
+    model = DLA([1, 1, 1, 2, 2, 1], [16, 32, 64, 128, 256, 512], block=DlaBasic,
+                num_classes=num_classes, in_chans=in_chans, **kwargs)
     model.default_cfg = default_cfg
     if pretrained:
         load_pretrained(model, default_cfg, num_classes, in_chans)
 
@@ -36,7 +36,7 @@
         'url': '',
         'input_size': (3, 299, 299),
         'crop_pct': 0.875,
-        'pool_size': (10, 10),
+        'pool_size': (5, 5),
         'interpolation': 'bicubic',
         'mean': IMAGENET_DEFAULT_MEAN,
         'std': IMAGENET_DEFAULT_STD,
 
@@ -34,7 +34,7 @@ def _cfg(url='', **kwargs):
         'num_classes': 1000, 'input_size': (3, 224, 224), 'pool_size': (7, 7),
         'crop_pct': 0.875, 'interpolation': 'bilinear',
         'mean': IMAGENET_DEFAULT_MEAN, 'std': IMAGENET_DEFAULT_STD,
-        'first_conv': 'conv1', 'classifier': 'fc',
+        'first_conv': 'conv1', 'classifier': 'classifier',
         **kwargs
     }
 
 
@@ -14,7 +14,7 @@ def _cfg(url='', **kwargs):
         'num_classes': 1000, 'input_size': (3, 299, 299), 'pool_size': (8, 8),
         'crop_pct': 0.875, 'interpolation': 'bicubic',
         'mean': IMAGENET_INCEPTION_MEAN, 'std': IMAGENET_INCEPTION_STD,
-        'first_conv': 'conv1', 'classifier': 'fc',
+        'first_conv': 'Conv2d_1a_3x3', 'classifier': 'fc',
         **kwargs
     }
 
 
@@ -3,10 +3,10 @@
 
 class SEModule(nn.Module):
 
-    def __init__(self, channels, reduction=16, act_layer=nn.ReLU):
+    def __init__(self, channels, reduction=16, act_layer=nn.ReLU, min_channels=8, reduction_channels=None):
         super(SEModule, self).__init__()
         self.avg_pool = nn.AdaptiveAvgPool2d(1)
-        reduction_channels = max(channels // reduction, 8)
+        reduction_channels = reduction_channels or max(channels // reduction, min_channels)
         self.fc1 = nn.Conv2d(
             channels, reduction_channels, kernel_size=1, padding=0, bias=True)
         self.act = act_layer(inplace=True)
 
@@ -21,7 +21,7 @@
 
 def _cfg(url='', **kwargs):
     return {
-        'url': url, 'num_classes': 1000, 'input_size': (3, 224, 224), 'pool_size': (7, 7),
+        'url': url, 'num_classes': 1000, 'input_size': (3, 224, 224), 'pool_size': (1, 1),
         'crop_pct': 0.875, 'interpolation': 'bilinear',
         'mean': IMAGENET_DEFAULT_MEAN, 'std': IMAGENET_DEFAULT_STD,
         'first_conv': 'conv_stem', 'classifier': 'classifier',
Original file line number	Diff line number	Diff line change
`@@ -34,7 +34,7 @@ def _cfg(url='', **kwargs):`
`34`	`34`	`'num_classes': 1000, 'input_size': (3, 224, 224), 'pool_size': (7, 7),`
`35`	`35`	`'crop_pct': 0.875, 'interpolation': 'bilinear',`
`36`	`36`	`'mean': IMAGENET_DEFAULT_MEAN, 'std': IMAGENET_DEFAULT_STD,`
`37`		`- 'first_conv': 'conv1', 'classifier': 'fc',`
	`37`	`+ 'first_conv': 'conv1', 'classifier': 'classifier',`
`38`	`38`	`**kwargs`
`39`	`39`	`}`
`40`	`40`
Original file line number	Diff line number	Diff line change
`@@ -14,7 +14,7 @@ def _cfg(url='', **kwargs):`
`14`	`14`	`'num_classes': 1000, 'input_size': (3, 299, 299), 'pool_size': (8, 8),`
`15`	`15`	`'crop_pct': 0.875, 'interpolation': 'bicubic',`
`16`	`16`	`'mean': IMAGENET_INCEPTION_MEAN, 'std': IMAGENET_INCEPTION_STD,`
`17`		`- 'first_conv': 'conv1', 'classifier': 'fc',`
	`17`	`+ 'first_conv': 'Conv2d_1a_3x3', 'classifier': 'fc',`
`18`	`18`	`**kwargs`
`19`	`19`	`}`
`20`	`20`