Convert mobilenetv3 to multi-weight, tweak PretrainedCfg metadata

rwightman · rwightman · commit 656e1776de42 · 2022-12-16T09:29:13.000-08:00
diff --git a/timm/models/_pretrained.py b/timm/models/_pretrained.py
@@ -45,9 +45,11 @@ class PretrainedCfg:
     classifier: Optional[str] = None
 
     license: Optional[str] = None
-    source_url: Optional[str] = None
-    paper: Optional[str] = None
-    notes: Optional[str] = None
+    description: Optional[str] = None
+    origin_url: Optional[str] = None
+    paper_name: Optional[str] = None
+    paper_ids: Optional[Union[str, Tuple[str]]] = None
+    notes: Optional[Tuple[str]] = None
 
     @property
     def has_weights(self):
diff --git a/timm/models/mobilenetv3.py b/timm/models/mobilenetv3.py
@@ -21,93 +21,12 @@
     round_channels, resolve_bn_args, resolve_act_layer, BN_EPS_TF_DEFAULT
 from ._features import FeatureInfo, FeatureHooks
 from ._manipulate import checkpoint_seq
+from ._pretrained import generate_default_cfgs
 from ._registry import register_model
 
 __all__ = ['MobileNetV3', 'MobileNetV3Features']
 
 
-def _cfg(url='', **kwargs):
-    return {
-        'url': url, 'num_classes': 1000, 'input_size': (3, 224, 224), 'pool_size': (7, 7),
-        'crop_pct': 0.875, 'interpolation': 'bilinear',
-        'mean': IMAGENET_DEFAULT_MEAN, 'std': IMAGENET_DEFAULT_STD,
-        'first_conv': 'conv_stem', 'classifier': 'classifier',
-        **kwargs
-    }
-
-
-default_cfgs = {
-    'mobilenetv3_large_075': _cfg(url=''),
-    'mobilenetv3_large_100': _cfg(
-        interpolation='bicubic',
-        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mobilenetv3_large_100_ra-f55367f5.pth'),
-    'mobilenetv3_large_100_miil': _cfg(
-        interpolation='bilinear', mean=(0., 0., 0.), std=(1., 1., 1.),
-        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tresnet/mobilenetv3_large_100_1k_miil_78_0-66471c13.pth'),
-    'mobilenetv3_large_100_miil_in21k': _cfg(
-        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tresnet/mobilenetv3_large_100_in21k_miil-d71cc17b.pth',
-        interpolation='bilinear', mean=(0., 0., 0.), std=(1., 1., 1.), num_classes=11221),
-
-    'mobilenetv3_small_050': _cfg(
-        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mobilenetv3_small_050_lambc-4b7bbe87.pth',
-        interpolation='bicubic'),
-    'mobilenetv3_small_075': _cfg(
-        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mobilenetv3_small_075_lambc-384766db.pth',
-        interpolation='bicubic'),
-    'mobilenetv3_small_100': _cfg(
-        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mobilenetv3_small_100_lamb-266a294c.pth',
-        interpolation='bicubic'),
-
-    'mobilenetv3_rw': _cfg(
-        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mobilenetv3_100-35495452.pth',
-        interpolation='bicubic'),
-
-    'tf_mobilenetv3_large_075': _cfg(
-        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mobilenetv3_large_075-150ee8b0.pth',
-        mean=IMAGENET_INCEPTION_MEAN, std=IMAGENET_INCEPTION_STD),
-    'tf_mobilenetv3_large_100': _cfg(
-        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mobilenetv3_large_100-427764d5.pth',
-        mean=IMAGENET_INCEPTION_MEAN, std=IMAGENET_INCEPTION_STD),
-    'tf_mobilenetv3_large_minimal_100': _cfg(
-        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mobilenetv3_large_minimal_100-8596ae28.pth',
-        mean=IMAGENET_INCEPTION_MEAN, std=IMAGENET_INCEPTION_STD),
-    'tf_mobilenetv3_small_075': _cfg(
-        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mobilenetv3_small_075-da427f52.pth',
-        mean=IMAGENET_INCEPTION_MEAN, std=IMAGENET_INCEPTION_STD),
-    'tf_mobilenetv3_small_100': _cfg(
-        url= 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mobilenetv3_small_100-37f49e2b.pth',
-        mean=IMAGENET_INCEPTION_MEAN, std=IMAGENET_INCEPTION_STD),
-    'tf_mobilenetv3_small_minimal_100': _cfg(
-        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mobilenetv3_small_minimal_100-922a7843.pth',
-        mean=IMAGENET_INCEPTION_MEAN, std=IMAGENET_INCEPTION_STD),
-
-    'fbnetv3_b': _cfg(
-        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/fbnetv3_b_224-ead5d2a1.pth',
-        test_input_size=(3, 256, 256), crop_pct=0.95),
-    'fbnetv3_d': _cfg(
-        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/fbnetv3_d_224-c98bce42.pth',
-        test_input_size=(3, 256, 256), crop_pct=0.95),
-    'fbnetv3_g': _cfg(
-        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/fbnetv3_g_240-0b1df83b.pth',
-        input_size=(3, 240, 240), test_input_size=(3, 288, 288), crop_pct=0.95, pool_size=(8, 8)),
-
-    "lcnet_035": _cfg(),
-    "lcnet_050": _cfg(
-        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/lcnet_050-f447553b.pth',
-        interpolation='bicubic',
-    ),
-    "lcnet_075": _cfg(
-        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/lcnet_075-318cad2c.pth',
-        interpolation='bicubic',
-    ),
-    "lcnet_100": _cfg(
-        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/lcnet_100-a929038c.pth',
-        interpolation='bicubic',
-    ),
-    "lcnet_150": _cfg(),
-}
-
-
 class MobileNetV3(nn.Module):
     """ MobiletNet-V3
 
@@ -124,9 +43,24 @@ class MobileNetV3(nn.Module):
     """
 
     def __init__(
-            self, block_args, num_classes=1000, in_chans=3, stem_size=16, fix_stem=False, num_features=1280,
-            head_bias=True, pad_type='', act_layer=None, norm_layer=None, se_layer=None, se_from_exp=True,
-            round_chs_fn=round_channels, drop_rate=0., drop_path_rate=0., global_pool='avg'):
+            self,
+            block_args,
+            num_classes=1000,
+            in_chans=3,
+            stem_size=16,
+            fix_stem=False,
+            num_features=1280,
+            head_bias=True,
+            pad_type='',
+            act_layer=None,
+            norm_layer=None,
+            se_layer=None,
+            se_from_exp=True,
+            round_chs_fn=round_channels,
+            drop_rate=0.,
+            drop_path_rate=0.,
+            global_pool='avg',
+    ):
         super(MobileNetV3, self).__init__()
         act_layer = act_layer or nn.ReLU
         norm_layer = norm_layer or nn.BatchNorm2d
@@ -145,8 +79,15 @@ def __init__(
 
         # Middle stages (IR/ER/DS Blocks)
         builder = EfficientNetBuilder(
-            output_stride=32, pad_type=pad_type, round_chs_fn=round_chs_fn, se_from_exp=se_from_exp,
-            act_layer=act_layer, norm_layer=norm_layer, se_layer=se_layer, drop_path_rate=drop_path_rate)
+            output_stride=32,
+            pad_type=pad_type,
+            round_chs_fn=round_chs_fn,
+            se_from_exp=se_from_exp,
+            act_layer=act_layer,
+            norm_layer=norm_layer,
+            se_layer=se_layer,
+            drop_path_rate=drop_path_rate,
+        )
         self.blocks = nn.Sequential(*builder(stem_size, block_args))
         self.feature_info = builder.features
         head_chs = builder.in_chs
@@ -225,9 +166,23 @@ class MobileNetV3Features(nn.Module):
     """
 
     def __init__(
-            self, block_args, out_indices=(0, 1, 2, 3, 4), feature_location='bottleneck', in_chans=3,
-            stem_size=16, fix_stem=False, output_stride=32, pad_type='', round_chs_fn=round_channels,
-            se_from_exp=True, act_layer=None, norm_layer=None, se_layer=None, drop_rate=0., drop_path_rate=0.):
+            self,
+            block_args,
+            out_indices=(0, 1, 2, 3, 4),
+            feature_location='bottleneck',
+            in_chans=3,
+            stem_size=16,
+            fix_stem=False,
+            output_stride=32,
+            pad_type='',
+            round_chs_fn=round_channels,
+            se_from_exp=True,
+            act_layer=None,
+            norm_layer=None,
+            se_layer=None,
+            drop_rate=0.,
+            drop_path_rate=0.,
+    ):
         super(MobileNetV3Features, self).__init__()
         act_layer = act_layer or nn.ReLU
         norm_layer = norm_layer or nn.BatchNorm2d
@@ -243,9 +198,16 @@ def __init__(
 
         # Middle stages (IR/ER/DS Blocks)
         builder = EfficientNetBuilder(
-            output_stride=output_stride, pad_type=pad_type, round_chs_fn=round_chs_fn, se_from_exp=se_from_exp,
-            act_layer=act_layer, norm_layer=norm_layer, se_layer=se_layer,
-            drop_path_rate=drop_path_rate, feature_location=feature_location)
+            output_stride=output_stride,
+            pad_type=pad_type,
+            round_chs_fn=round_chs_fn,
+            se_from_exp=se_from_exp,
+            act_layer=act_layer,
+            norm_layer=norm_layer,
+            se_layer=se_layer,
+            drop_path_rate=drop_path_rate,
+            feature_location=feature_location,
+        )
         self.blocks = nn.Sequential(*builder(stem_size, block_args))
         self.feature_info = FeatureInfo(builder.features, out_indices)
         self._stage_out_idx = {v['stage']: i for i, v in enumerate(self.feature_info) if i in out_indices}
@@ -286,7 +248,9 @@ def _create_mnv3(variant, pretrained=False, **kwargs):
         kwargs_filter = ('num_classes', 'num_features', 'head_conv', 'head_bias', 'global_pool')
         model_cls = MobileNetV3Features
     model = build_model_with_cfg(
-        model_cls, variant, pretrained,
+        model_cls,
+        variant,
+        pretrained,
         pretrained_strict=not features_only,
         kwargs_filter=kwargs_filter,
         **kwargs)
@@ -567,6 +531,110 @@ def _gen_lcnet(variant, channel_multiplier=1.0, pretrained=False, **kwargs):
     return model
 
 
+def _cfg(url='', **kwargs):
+    return {
+        'url': url, 'num_classes': 1000, 'input_size': (3, 224, 224), 'pool_size': (7, 7),
+        'crop_pct': 0.875, 'interpolation': 'bilinear',
+        'mean': IMAGENET_DEFAULT_MEAN, 'std': IMAGENET_DEFAULT_STD,
+        'first_conv': 'conv_stem', 'classifier': 'classifier',
+        **kwargs
+    }
+
+
+default_cfgs = generate_default_cfgs({
+    'mobilenetv3_large_075.untrained': _cfg(url=''),
+    'mobilenetv3_large_100.ra_in1k': _cfg(
+        interpolation='bicubic',
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mobilenetv3_large_100_ra-f55367f5.pth',
+        hf_hub_id='timm/'),
+    'mobilenetv3_large_100.miil_in21k_ft_in1k': _cfg(
+        interpolation='bilinear', mean=(0., 0., 0.), std=(1., 1., 1.),
+        origin_url='https://github.com/Alibaba-MIIL/ImageNet21K',
+        paper_ids='arXiv:2104.10972v4',
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tresnet/mobilenetv3_large_100_1k_miil_78_0-66471c13.pth',
+        hf_hub_id='timm/'),
+    'mobilenetv3_large_100.miil_in21k': _cfg(
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tresnet/mobilenetv3_large_100_in21k_miil-d71cc17b.pth',
+        hf_hub_id='timm/',
+        origin_url='https://github.com/Alibaba-MIIL/ImageNet21K',
+        paper_ids='arXiv:2104.10972v4',
+        interpolation='bilinear', mean=(0., 0., 0.), std=(1., 1., 1.), num_classes=11221),
+
+    'mobilenetv3_small_050.lamb_in1k': _cfg(
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mobilenetv3_small_050_lambc-4b7bbe87.pth',
+        hf_hub_id='timm/',
+        interpolation='bicubic'),
+    'mobilenetv3_small_075.lamb_in1k': _cfg(
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mobilenetv3_small_075_lambc-384766db.pth',
+        hf_hub_id='timm/',
+        interpolation='bicubic'),
+    'mobilenetv3_small_100.lamb_in1k': _cfg(
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mobilenetv3_small_100_lamb-266a294c.pth',
+        hf_hub_id='timm/',
+        interpolation='bicubic'),
+
+    'mobilenetv3_rw.rmsp_in1k': _cfg(
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mobilenetv3_100-35495452.pth',
+        interpolation='bicubic'),
+
+    'tf_mobilenetv3_large_075.in1k': _cfg(
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mobilenetv3_large_075-150ee8b0.pth',
+        hf_hub_id='timm/',
+        mean=IMAGENET_INCEPTION_MEAN, std=IMAGENET_INCEPTION_STD),
+    'tf_mobilenetv3_large_100.in1k': _cfg(
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mobilenetv3_large_100-427764d5.pth',
+        hf_hub_id='timm/',
+        mean=IMAGENET_INCEPTION_MEAN, std=IMAGENET_INCEPTION_STD),
+    'tf_mobilenetv3_large_minimal_100.in1k': _cfg(
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mobilenetv3_large_minimal_100-8596ae28.pth',
+        hf_hub_id='timm/',
+        mean=IMAGENET_INCEPTION_MEAN, std=IMAGENET_INCEPTION_STD),
+    'tf_mobilenetv3_small_075.in1k': _cfg(
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mobilenetv3_small_075-da427f52.pth',
+        hf_hub_id='timm/',
+        mean=IMAGENET_INCEPTION_MEAN, std=IMAGENET_INCEPTION_STD),
+    'tf_mobilenetv3_small_100.in1k': _cfg(
+        url= 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mobilenetv3_small_100-37f49e2b.pth',
+        hf_hub_id='timm/',
+        mean=IMAGENET_INCEPTION_MEAN, std=IMAGENET_INCEPTION_STD),
+    'tf_mobilenetv3_small_minimal_100.in1k': _cfg(
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mobilenetv3_small_minimal_100-922a7843.pth',
+        hf_hub_id='timm/',
+        mean=IMAGENET_INCEPTION_MEAN, std=IMAGENET_INCEPTION_STD),
+
+    'fbnetv3_b.ra2_in1k': _cfg(
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/fbnetv3_b_224-ead5d2a1.pth',
+        hf_hub_id='timm/',
+        test_input_size=(3, 256, 256), crop_pct=0.95),
+    'fbnetv3_d.ra2_in1k': _cfg(
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/fbnetv3_d_224-c98bce42.pth',
+        hf_hub_id='timm/',
+        test_input_size=(3, 256, 256), crop_pct=0.95),
+    'fbnetv3_g.ra2_in1k': _cfg(
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/fbnetv3_g_240-0b1df83b.pth',
+        hf_hub_id='timm/',
+        input_size=(3, 240, 240), test_input_size=(3, 288, 288), crop_pct=0.95, pool_size=(8, 8)),
+
+    "lcnet_035.untrained": _cfg(),
+    "lcnet_050.ra2_in1k": _cfg(
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/lcnet_050-f447553b.pth',
+        hf_hub_id='timm/',
+        interpolation='bicubic',
+    ),
+    "lcnet_075.ra2_in1k": _cfg(
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/lcnet_075-318cad2c.pth',
+        hf_hub_id='timm/',
+        interpolation='bicubic',
+    ),
+    "lcnet_100.ra2_in1k": _cfg(
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/lcnet_100-a929038c.pth',
+        hf_hub_id='timm/',
+        interpolation='bicubic',
+    ),
+    "lcnet_150.untrained": _cfg(),
+})
+
+
 @register_model
 def mobilenetv3_large_075(pretrained=False, **kwargs):
     """ MobileNet V3 """
@@ -581,24 +649,6 @@ def mobilenetv3_large_100(pretrained=False, **kwargs):
     return model
 
 
-@register_model
-def mobilenetv3_large_100_miil(pretrained=False, **kwargs):
-    """ MobileNet V3
-    Weights taken from: https://github.com/Alibaba-MIIL/ImageNet21K
-    """
-    model = _gen_mobilenet_v3('mobilenetv3_large_100_miil', 1.0, pretrained=pretrained, **kwargs)
-    return model
-
-
-@register_model
-def mobilenetv3_large_100_miil_in21k(pretrained=False, **kwargs):
-    """ MobileNet V3, 21k pretraining
-    Weights taken from: https://github.com/Alibaba-MIIL/ImageNet21K
-    """
-    model = _gen_mobilenet_v3('mobilenetv3_large_100_miil_in21k', 1.0, pretrained=pretrained, **kwargs)
-    return model
-
-
 @register_model
 def mobilenetv3_small_050(pretrained=False, **kwargs):
     """ MobileNet V3 """