@@ -230,8 +230,8 @@ def __init__(
230230 self .mlp = Mlp (in_features = dim , hidden_features = int (dim * mlp_ratio ), act_layer = act_layer , drop = drop )
231231
232232 if eta is not None : # LayerScale Initialization (no layerscale when None)
233- self .gamma1 = nn .Parameter (eta * torch .ones (dim ), requires_grad = True )
234- self .gamma2 = nn .Parameter (eta * torch .ones (dim ), requires_grad = True )
233+ self .gamma1 = nn .Parameter (eta * torch .ones (dim ))
234+ self .gamma2 = nn .Parameter (eta * torch .ones (dim ))
235235 else :
236236 self .gamma1 , self .gamma2 = 1.0 , 1.0
237237
@@ -308,9 +308,9 @@ def __init__(
308308 self .norm2 = norm_layer (dim )
309309 self .mlp = Mlp (in_features = dim , hidden_features = int (dim * mlp_ratio ), act_layer = act_layer , drop = drop )
310310
311- self .gamma1 = nn .Parameter (eta * torch .ones (dim ), requires_grad = True )
312- self .gamma3 = nn .Parameter (eta * torch .ones (dim ), requires_grad = True )
313- self .gamma2 = nn .Parameter (eta * torch .ones (dim ), requires_grad = True )
311+ self .gamma1 = nn .Parameter (eta * torch .ones (dim ))
312+ self .gamma3 = nn .Parameter (eta * torch .ones (dim ))
313+ self .gamma2 = nn .Parameter (eta * torch .ones (dim ))
314314
315315 def forward (self , x , H : int , W : int ):
316316 x = x + self .drop_path (self .gamma1 * self .attn (self .norm1 (x )))
0 commit comments