@@ -105,6 +105,10 @@ def _cfg(url='', **kwargs):
105105 'L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npz' ,
106106 input_size = (3 , 384 , 384 ), crop_pct = 1.0 ),
107107
108+ 'vit_huge_patch14_224' : _cfg (url = '' ),
109+ 'vit_giant_patch14_224' : _cfg (url = '' ),
110+ 'vit_gigantic_patch14_224' : _cfg (url = '' ),
111+
108112 # patch models, imagenet21k (weights from official Google JAX impl)
109113 'vit_tiny_patch16_224_in21k' : _cfg (
110114 url = 'https://storage.googleapis.com/vit_models/augreg/Ti_16-i21k-300ep-lr_0.001-aug_none-wd_0.03-do_0.0-sd_0.0.npz' ,
@@ -715,6 +719,33 @@ def vit_base_patch32_sam_224(pretrained=False, **kwargs):
715719 return model
716720
717721
722+ @register_model
723+ def vit_huge_patch14_224 (pretrained = False , ** kwargs ):
724+ """ ViT-Huge model (ViT-H/14) from original paper (https://arxiv.org/abs/2010.11929).
725+ """
726+ model_kwargs = dict (patch_size = 14 , embed_dim = 1280 , depth = 32 , num_heads = 16 , ** kwargs )
727+ model = _create_vision_transformer ('vit_huge_patch14_224' , pretrained = pretrained , ** model_kwargs )
728+ return model
729+
730+
731+ @register_model
732+ def vit_giant_patch14_224 (pretrained = False , ** kwargs ):
733+ """ ViT-Giant model (ViT-g/14) from `Scaling Vision Transformers` - https://arxiv.org/abs/2106.04560
734+ """
735+ model_kwargs = dict (patch_size = 14 , embed_dim = 1408 , mlp_ratio = 48 / 11 , depth = 40 , num_heads = 16 , ** kwargs )
736+ model = _create_vision_transformer ('vit_giant_patch14_224' , pretrained = pretrained , ** model_kwargs )
737+ return model
738+
739+
740+ @register_model
741+ def vit_gigantic_patch14_224 (pretrained = False , ** kwargs ):
742+ """ ViT-Gigantic model (ViT-G/14) from `Scaling Vision Transformers` - https://arxiv.org/abs/2106.04560
743+ """
744+ model_kwargs = dict (patch_size = 14 , embed_dim = 1664 , mlp_ratio = 64 / 13 , depth = 48 , num_heads = 16 , ** kwargs )
745+ model = _create_vision_transformer ('vit_gigantic_patch14_224' , pretrained = pretrained , ** model_kwargs )
746+ return model
747+
748+
718749@register_model
719750def vit_tiny_patch16_224_in21k (pretrained = False , ** kwargs ):
720751 """ ViT-Tiny (Vit-Ti/16).
0 commit comments