Skip to content

Commit fa8c84e

Browse files
committed
Update maxvit_tiny_256 weight to better iter, add coatnet / maxvit / maxxvit model defs for future runs
1 parent de40f66 commit fa8c84e

File tree

1 file changed

+134
-5
lines changed

1 file changed

+134
-5
lines changed

timm/models/maxxvit.py

Lines changed: 134 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ def _cfg(url='', **kwargs):
8282
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/coatnet_1_rw_224_sw-5cae1ea8.pth'
8383
),
8484
'coatnet_2_rw_224': _cfg(url=''),
85+
'coatnet_3_rw_224': _cfg(url=''),
8586

8687
# Highly experimental configs
8788
'coatnet_bn_0_rw_224': _cfg(
@@ -94,6 +95,8 @@ def _cfg(url='', **kwargs):
9495
'coatnet_rmlp_0_rw_224': _cfg(url=''),
9596
'coatnet_rmlp_1_rw_224': _cfg(
9697
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/coatnet_rmlp_1_rw_224_sw-9051e6c3.pth'),
98+
'coatnet_rmlp_2_rw_224': _cfg(url=''),
99+
'coatnet_rmlp_3_rw_224': _cfg(url=''),
97100
'coatnet_nano_cc_224': _cfg(url=''),
98101
'coatnext_nano_rw_224': _cfg(url=''),
99102

@@ -122,10 +125,19 @@ def _cfg(url='', **kwargs):
122125
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/maxvit_rmlp_nano_rw_256_sw-c17bb0d6.pth',
123126
input_size=(3, 256, 256), pool_size=(8, 8)),
124127
'maxvit_rmlp_tiny_rw_256': _cfg(
125-
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/maxvit_rmlp_tiny_rw_256_sw-2da819a5.pth',
128+
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/maxvit_rmlp_tiny_rw_256_sw-bbef0ff5.pth',
126129
input_size=(3, 256, 256), pool_size=(8, 8)),
130+
'maxvit_rmlp_small_rw_224': _cfg(
131+
url=''),
132+
'maxvit_rmlp_small_rw_256': _cfg(
133+
url='',
134+
input_size=(3, 256, 256), pool_size=(8, 8)),
135+
127136
'maxvit_tiny_pm_256': _cfg(url='', input_size=(3, 256, 256), pool_size=(8, 8)),
137+
128138
'maxxvit_nano_rw_256': _cfg(url='', input_size=(3, 256, 256), pool_size=(8, 8)),
139+
'maxxvit_tiny_rw_256': _cfg(url='', input_size=(3, 256, 256), pool_size=(8, 8)),
140+
'maxxvit_small_rw_256': _cfg(url='', input_size=(3, 256, 256), pool_size=(8, 8)),
129141

130142
# Trying to be like the MaxViT paper configs
131143
'maxvit_tiny_224': _cfg(url=''),
@@ -182,7 +194,7 @@ class MaxxVitConvCfg:
182194
attn_layer: str = 'se'
183195
attn_act_layer: str = 'silu'
184196
attn_ratio: float = 0.25
185-
init_values: Optional[float] = 1e-5 # for ConvNeXt block
197+
init_values: Optional[float] = 1e-6 # for ConvNeXt block, ignored by MBConv
186198
act_layer: str = 'gelu'
187199
norm_layer: str = ''
188200
norm_layer_cl: str = ''
@@ -218,10 +230,12 @@ def _rw_coat_cfg(
218230
pool_type='avg2',
219231
conv_output_bias=False,
220232
conv_attn_early=False,
233+
conv_attn_act_layer='relu',
221234
conv_norm_layer='',
222235
transformer_shortcut_bias=True,
223236
transformer_norm_layer='layernorm2d',
224237
transformer_norm_layer_cl='layernorm',
238+
init_values=None,
225239
rel_pos_type='bias',
226240
rel_pos_dim=512,
227241
):
@@ -246,14 +260,15 @@ def _rw_coat_cfg(
246260
expand_output=False,
247261
output_bias=conv_output_bias,
248262
attn_early=conv_attn_early,
249-
attn_act_layer='relu',
263+
attn_act_layer=conv_attn_act_layer,
250264
act_layer='silu',
251265
norm_layer=conv_norm_layer,
252266
),
253267
transformer_cfg=MaxxVitTransformerCfg(
254268
expand_first=False,
255269
shortcut_bias=transformer_shortcut_bias,
256270
pool_type=pool_type,
271+
init_values=init_values,
257272
norm_layer=transformer_norm_layer,
258273
norm_layer_cl=transformer_norm_layer_cl,
259274
rel_pos_type=rel_pos_type,
@@ -272,6 +287,7 @@ def _rw_max_cfg(
272287
transformer_norm_layer_cl='layernorm',
273288
window_size=None,
274289
dim_head=32,
290+
init_values=None,
275291
rel_pos_type='bias',
276292
rel_pos_dim=512,
277293
):
@@ -296,6 +312,7 @@ def _rw_max_cfg(
296312
pool_type=pool_type,
297313
dim_head=dim_head,
298314
window_size=window_size,
315+
init_values=init_values,
299316
norm_layer=transformer_norm_layer,
300317
norm_layer_cl=transformer_norm_layer_cl,
301318
rel_pos_type=rel_pos_type,
@@ -312,7 +329,8 @@ def _next_cfg(
312329
transformer_norm_layer='layernorm2d',
313330
transformer_norm_layer_cl='layernorm',
314331
window_size=None,
315-
rel_pos_type='bias',
332+
init_values=1e-6,
333+
rel_pos_type='mlp', # MLP by default for maxxvit
316334
rel_pos_dim=512,
317335
):
318336
# For experimental models with convnext instead of mbconv
@@ -322,13 +340,15 @@ def _next_cfg(
322340
stride_mode=stride_mode,
323341
pool_type=pool_type,
324342
expand_output=False,
343+
init_values=init_values,
325344
norm_layer=conv_norm_layer,
326345
norm_layer_cl=conv_norm_layer_cl,
327346
),
328347
transformer_cfg=MaxxVitTransformerCfg(
329348
expand_first=False,
330349
pool_type=pool_type,
331350
window_size=window_size,
351+
init_values=init_values,
332352
norm_layer=transformer_norm_layer,
333353
norm_layer_cl=transformer_norm_layer_cl,
334354
rel_pos_type=rel_pos_type,
@@ -381,7 +401,21 @@ def _next_cfg(
381401
embed_dim=(128, 256, 512, 1024),
382402
depths=(2, 6, 14, 2),
383403
stem_width=(64, 128),
384-
**_rw_coat_cfg(stride_mode='dw'),
404+
**_rw_coat_cfg(
405+
stride_mode='dw',
406+
conv_attn_act_layer='silu',
407+
init_values=1e-6,
408+
),
409+
),
410+
coatnet_3_rw_224=MaxxVitCfg(
411+
embed_dim=(192, 384, 768, 1536),
412+
depths=(2, 6, 14, 2),
413+
stem_width=(96, 192),
414+
**_rw_coat_cfg(
415+
stride_mode='dw',
416+
conv_attn_act_layer='silu',
417+
init_values=1e-6,
418+
),
385419
),
386420

387421
# Highly experimental configs
@@ -428,6 +462,29 @@ def _next_cfg(
428462
rel_pos_dim=384, # was supposed to be 512, woops
429463
),
430464
),
465+
coatnet_rmlp_2_rw_224=MaxxVitCfg(
466+
embed_dim=(128, 256, 512, 1024),
467+
depths=(2, 6, 14, 2),
468+
stem_width=(64, 128),
469+
**_rw_coat_cfg(
470+
stride_mode='dw',
471+
conv_attn_act_layer='silu',
472+
init_values=1e-6,
473+
rel_pos_type='mlp'
474+
),
475+
),
476+
coatnet_rmlp_3_rw_224=MaxxVitCfg(
477+
embed_dim=(192, 384, 768, 1536),
478+
depths=(2, 6, 14, 2),
479+
stem_width=(96, 192),
480+
**_rw_coat_cfg(
481+
stride_mode='dw',
482+
conv_attn_act_layer='silu',
483+
init_values=1e-6,
484+
rel_pos_type='mlp'
485+
),
486+
),
487+
431488
coatnet_nano_cc_224=MaxxVitCfg(
432489
embed_dim=(64, 128, 256, 512),
433490
depths=(3, 4, 6, 3),
@@ -504,6 +561,7 @@ def _next_cfg(
504561
stem_width=(32, 64),
505562
**_rw_max_cfg(),
506563
),
564+
507565
maxvit_rmlp_pico_rw_256=MaxxVitCfg(
508566
embed_dim=(32, 64, 128, 256),
509567
depths=(2, 2, 5, 2),
@@ -525,13 +583,35 @@ def _next_cfg(
525583
stem_width=(32, 64),
526584
**_rw_max_cfg(rel_pos_type='mlp'),
527585
),
586+
maxvit_rmlp_small_rw_224=MaxxVitCfg(
587+
embed_dim=(96, 192, 384, 768),
588+
depths=(2, 2, 5, 2),
589+
block_type=('M',) * 4,
590+
stem_width=(32, 64),
591+
**_rw_max_cfg(
592+
rel_pos_type='mlp',
593+
init_values=1e-6,
594+
),
595+
),
596+
maxvit_rmlp_small_rw_256=MaxxVitCfg(
597+
embed_dim=(96, 192, 384, 768),
598+
depths=(2, 2, 5, 2),
599+
block_type=('M',) * 4,
600+
stem_width=(32, 64),
601+
**_rw_max_cfg(
602+
rel_pos_type='mlp',
603+
init_values=1e-6,
604+
),
605+
),
606+
528607
maxvit_tiny_pm_256=MaxxVitCfg(
529608
embed_dim=(64, 128, 256, 512),
530609
depths=(2, 2, 5, 2),
531610
block_type=('PM',) * 4,
532611
stem_width=(32, 64),
533612
**_rw_max_cfg(),
534613
),
614+
535615
maxxvit_nano_rw_256=MaxxVitCfg(
536616
embed_dim=(64, 128, 256, 512),
537617
depths=(1, 2, 3, 1),
@@ -540,6 +620,20 @@ def _next_cfg(
540620
weight_init='normal',
541621
**_next_cfg(),
542622
),
623+
maxxvit_tiny_rw_256=MaxxVitCfg(
624+
embed_dim=(64, 128, 256, 512),
625+
depths=(2, 2, 5, 2),
626+
block_type=('M',) * 4,
627+
stem_width=(32, 64),
628+
**_next_cfg(),
629+
),
630+
maxxvit_small_rw_256=MaxxVitCfg(
631+
embed_dim=(96, 192, 384, 768),
632+
depths=(2, 2, 5, 2),
633+
block_type=('M',) * 4,
634+
stem_width=(48, 96),
635+
**_next_cfg(),
636+
),
543637

544638
# Trying to be like the MaxViT paper configs
545639
maxvit_tiny_224=MaxxVitCfg(
@@ -1641,6 +1735,11 @@ def coatnet_2_rw_224(pretrained=False, **kwargs):
16411735
return _create_maxxvit('coatnet_2_rw_224', pretrained=pretrained, **kwargs)
16421736

16431737

1738+
@register_model
1739+
def coatnet_3_rw_224(pretrained=False, **kwargs):
1740+
return _create_maxxvit('coatnet_3_rw_224', pretrained=pretrained, **kwargs)
1741+
1742+
16441743
@register_model
16451744
def coatnet_bn_0_rw_224(pretrained=False, **kwargs):
16461745
return _create_maxxvit('coatnet_bn_0_rw_224', pretrained=pretrained, **kwargs)
@@ -1661,6 +1760,16 @@ def coatnet_rmlp_1_rw_224(pretrained=False, **kwargs):
16611760
return _create_maxxvit('coatnet_rmlp_1_rw_224', pretrained=pretrained, **kwargs)
16621761

16631762

1763+
@register_model
1764+
def coatnet_rmlp_2_rw_224(pretrained=False, **kwargs):
1765+
return _create_maxxvit('coatnet_rmlp_2_rw_224', pretrained=pretrained, **kwargs)
1766+
1767+
1768+
@register_model
1769+
def coatnet_rmlp_3_rw_224(pretrained=False, **kwargs):
1770+
return _create_maxxvit('coatnet_rmlp_3_rw_224', pretrained=pretrained, **kwargs)
1771+
1772+
16641773
@register_model
16651774
def coatnet_nano_cc_224(pretrained=False, **kwargs):
16661775
return _create_maxxvit('coatnet_nano_cc_224', pretrained=pretrained, **kwargs)
@@ -1736,6 +1845,16 @@ def maxvit_rmlp_tiny_rw_256(pretrained=False, **kwargs):
17361845
return _create_maxxvit('maxvit_rmlp_tiny_rw_256', pretrained=pretrained, **kwargs)
17371846

17381847

1848+
@register_model
1849+
def maxvit_rmlp_small_rw_224(pretrained=False, **kwargs):
1850+
return _create_maxxvit('maxvit_rmlp_small_rw_224', pretrained=pretrained, **kwargs)
1851+
1852+
1853+
@register_model
1854+
def maxvit_rmlp_small_rw_256(pretrained=False, **kwargs):
1855+
return _create_maxxvit('maxvit_rmlp_small_rw_256', pretrained=pretrained, **kwargs)
1856+
1857+
17391858
@register_model
17401859
def maxvit_tiny_pm_256(pretrained=False, **kwargs):
17411860
return _create_maxxvit('maxvit_tiny_pm_256', pretrained=pretrained, **kwargs)
@@ -1746,6 +1865,16 @@ def maxxvit_nano_rw_256(pretrained=False, **kwargs):
17461865
return _create_maxxvit('maxxvit_nano_rw_256', pretrained=pretrained, **kwargs)
17471866

17481867

1868+
@register_model
1869+
def maxxvit_tiny_rw_256(pretrained=False, **kwargs):
1870+
return _create_maxxvit('maxxvit_tiny_rw_256', pretrained=pretrained, **kwargs)
1871+
1872+
1873+
@register_model
1874+
def maxxvit_small_rw_256(pretrained=False, **kwargs):
1875+
return _create_maxxvit('maxxvit_small_rw_256', pretrained=pretrained, **kwargs)
1876+
1877+
17491878
@register_model
17501879
def maxvit_tiny_224(pretrained=False, **kwargs):
17511880
return _create_maxxvit('maxvit_tiny_224', pretrained=pretrained, **kwargs)

0 commit comments

Comments
 (0)