Skip to content

Commit cd950e6

Browse files
authored
Merge pull request #1823 from leng-yue/fix-layer-scale
[Fix] Update dinov2 layerscale init values
2 parents 049b133 + c308dbc commit cd950e6

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

timm/models/vision_transformer.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1982,7 +1982,7 @@ def vit_small_patch14_dinov2(pretrained=False, **kwargs) -> VisionTransformer:
19821982
""" ViT-S/14 for DINOv2
19831983
"""
19841984
model_args = dict(
1985-
patch_size=14, embed_dim=384, depth=12, num_heads=6, init_values=1.0, img_size=518,
1985+
patch_size=14, embed_dim=384, depth=12, num_heads=6, init_values=1e-5, img_size=518,
19861986
)
19871987
model = _create_vision_transformer(
19881988
'vit_small_patch14_dinov2', pretrained=pretrained, **dict(model_args, **kwargs))
@@ -1994,7 +1994,7 @@ def vit_base_patch14_dinov2(pretrained=False, **kwargs) -> VisionTransformer:
19941994
""" ViT-B/14 for DINOv2
19951995
"""
19961996
model_args = dict(
1997-
patch_size=14, embed_dim=768, depth=12, num_heads=12, init_values=1.0, img_size=518,
1997+
patch_size=14, embed_dim=768, depth=12, num_heads=12, init_values=1e-5, img_size=518,
19981998
)
19991999
model = _create_vision_transformer(
20002000
'vit_base_patch14_dinov2', pretrained=pretrained, **dict(model_args, **kwargs))
@@ -2006,7 +2006,7 @@ def vit_large_patch14_dinov2(pretrained=False, **kwargs) -> VisionTransformer:
20062006
""" ViT-L/14 for DINOv2
20072007
"""
20082008
model_args = dict(
2009-
patch_size=14, embed_dim=1024, depth=24, num_heads=16, init_values=1.0, img_size=518,
2009+
patch_size=14, embed_dim=1024, depth=24, num_heads=16, init_values=1e-5, img_size=518,
20102010
)
20112011
model = _create_vision_transformer(
20122012
'vit_large_patch14_dinov2', pretrained=pretrained, **dict(model_args, **kwargs))
@@ -2024,7 +2024,7 @@ def vit_giant_patch14_dinov2(pretrained=False, **kwargs) -> VisionTransformer:
20242024
# With SwiGLUPacked, we need to set hidden_features = 2 * 4096 = 8192
20252025

20262026
model_args = dict(
2027-
patch_size=14, embed_dim=1536, depth=40, num_heads=24, init_values=1.0,
2027+
patch_size=14, embed_dim=1536, depth=40, num_heads=24, init_values=1e-5,
20282028
mlp_ratio=2.66667 * 2, mlp_layer=SwiGLUPacked, img_size=518, act_layer=nn.SiLU
20292029
)
20302030
model = _create_vision_transformer(

0 commit comments

Comments
 (0)