Skip to content

Commit 9b74e4c

Browse files
Every model forward() should have **kwargs (#42603)
* Add **kwargs into every Model.forward() * Add the test back in * And the others I missed * Fix udop test * Fix fast2speech2conformer test * make fixup
1 parent 3cdccba commit 9b74e4c

File tree

239 files changed

+856
-47
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

239 files changed

+856
-47
lines changed

src/transformers/models/align/modeling_align.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1004,6 +1004,7 @@ def forward(
10041004
pixel_values: Optional[torch.FloatTensor] = None,
10051005
output_hidden_states: Optional[bool] = None,
10061006
return_dict: Optional[bool] = None,
1007+
**kwargs,
10071008
) -> Union[tuple, BaseModelOutputWithPoolingAndNoAttention]:
10081009
r"""
10091010
Examples:
@@ -1169,6 +1170,7 @@ def forward(
11691170
output_attentions: Optional[bool] = None,
11701171
output_hidden_states: Optional[bool] = None,
11711172
return_dict: Optional[bool] = None,
1173+
**kwargs,
11721174
) -> Union[tuple, AlignOutput]:
11731175
r"""
11741176
return_loss (`bool`, *optional*):

src/transformers/models/altclip/modeling_altclip.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -891,6 +891,7 @@ def forward(
891891
output_hidden_states: Optional[bool] = None,
892892
interpolate_pos_encoding: bool = False,
893893
return_dict: Optional[bool] = None,
894+
**kwargs,
894895
) -> Union[tuple, BaseModelOutputWithPooling]:
895896
r"""
896897
Examples:
@@ -970,6 +971,7 @@ def forward(
970971
output_attentions: Optional[bool] = None,
971972
output_hidden_states: Optional[bool] = None,
972973
return_dict: Optional[bool] = None,
974+
**kwargs,
973975
) -> Union[tuple[torch.Tensor], BaseModelOutputWithPoolingAndCrossAttentions]:
974976
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
975977
output_hidden_states = (
@@ -1061,6 +1063,7 @@ def forward(
10611063
output_attentions: Optional[bool] = None,
10621064
return_dict: Optional[bool] = None,
10631065
output_hidden_states: Optional[bool] = None,
1066+
**kwargs,
10641067
) -> Union[tuple, BaseModelOutputWithPoolingAndProjection]:
10651068
r"""
10661069
Examples:
@@ -1236,6 +1239,7 @@ def forward(
12361239
output_hidden_states: Optional[bool] = None,
12371240
interpolate_pos_encoding: bool = False,
12381241
return_dict: Optional[bool] = None,
1242+
**kwargs,
12391243
) -> Union[tuple, AltCLIPOutput]:
12401244
r"""
12411245
return_loss (`bool`, *optional*):

src/transformers/models/audioflamingo3/modeling_audioflamingo3.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,7 @@ def forward(
323323
self,
324324
input_features: torch.Tensor,
325325
input_features_mask: Optional[torch.Tensor] = None,
326+
**kwargs,
326327
):
327328
r"""
328329
Args:

src/transformers/models/audioflamingo3/modular_audioflamingo3.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ def forward(
6060
self,
6161
input_features: torch.Tensor,
6262
input_features_mask: Optional[torch.Tensor] = None,
63+
**kwargs,
6364
):
6465
r"""
6566
Args:

src/transformers/models/autoformer/modeling_autoformer.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -903,6 +903,7 @@ def forward(
903903
output_attentions: Optional[bool] = None,
904904
output_hidden_states: Optional[bool] = None,
905905
return_dict: Optional[bool] = None,
906+
**kwargs,
906907
) -> Union[tuple, BaseModelOutput]:
907908
r"""
908909
Args:
@@ -1024,6 +1025,7 @@ def forward(
10241025
output_hidden_states: Optional[bool] = None,
10251026
return_dict: Optional[bool] = None,
10261027
cache_position: Optional[torch.Tensor] = None,
1028+
**kwargs,
10271029
) -> Union[tuple, AutoFormerDecoderOutput]:
10281030
r"""
10291031
Args:
@@ -1360,6 +1362,7 @@ def forward(
13601362
use_cache: Optional[bool] = None,
13611363
return_dict: Optional[bool] = None,
13621364
cache_position: Optional[torch.Tensor] = None,
1365+
**kwargs,
13631366
) -> Union[AutoformerModelOutput, tuple]:
13641367
r"""
13651368
past_values (`torch.FloatTensor` of shape `(batch_size, sequence_length)`):
@@ -1610,6 +1613,7 @@ def forward(
16101613
output_attentions: Optional[bool] = None,
16111614
use_cache: Optional[bool] = None,
16121615
return_dict: Optional[bool] = None,
1616+
**kwargs,
16131617
) -> Union[Seq2SeqTSPredictionOutput, tuple]:
16141618
r"""
16151619
past_values (`torch.FloatTensor` of shape `(batch_size, sequence_length)`):

src/transformers/models/bark/modeling_bark.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,7 @@ def forward(
426426
output_hidden_states: Optional[bool] = None,
427427
return_dict: Optional[bool] = None,
428428
cache_position: Optional[torch.Tensor] = None,
429+
**kwargs,
429430
) -> Union[tuple[torch.Tensor], CausalLMOutputWithPast]:
430431
r"""
431432
input_embeds (`torch.FloatTensor` of shape `(batch_size, input_sequence_length, hidden_size)`, *optional*):
@@ -1028,6 +1029,7 @@ def forward(
10281029
output_attentions: Optional[bool] = None,
10291030
output_hidden_states: Optional[bool] = None,
10301031
return_dict: Optional[bool] = None,
1032+
**kwargs,
10311033
) -> Union[tuple[torch.Tensor], MaskedLMOutput]:
10321034
r"""
10331035
codebook_idx (`int`):

src/transformers/models/bart/modeling_bart.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -547,6 +547,7 @@ def forward(
547547
output_attentions: Optional[bool] = None,
548548
output_hidden_states: Optional[bool] = None,
549549
return_dict: Optional[bool] = None,
550+
**kwargs,
550551
) -> Union[tuple, BaseModelOutput]:
551552
r"""
552553
Args:
@@ -694,6 +695,7 @@ def forward(
694695
output_hidden_states: Optional[bool] = None,
695696
return_dict: Optional[bool] = None,
696697
cache_position: Optional[torch.LongTensor] = None,
698+
**kwargs,
697699
) -> Union[tuple, BaseModelOutputWithPastAndCrossAttentions]:
698700
r"""
699701
Args:
@@ -921,6 +923,7 @@ def forward(
921923
output_hidden_states: Optional[bool] = None,
922924
return_dict: Optional[bool] = None,
923925
cache_position: Optional[torch.LongTensor] = None,
926+
**kwargs,
924927
) -> Union[tuple, Seq2SeqModelOutput]:
925928
r"""
926929
decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
@@ -1067,6 +1070,7 @@ def forward(
10671070
output_hidden_states: Optional[bool] = None,
10681071
return_dict: Optional[bool] = None,
10691072
cache_position: Optional[torch.LongTensor] = None,
1073+
**kwargs,
10701074
) -> Union[tuple, Seq2SeqLMOutput]:
10711075
r"""
10721076
decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
@@ -1228,6 +1232,7 @@ def forward(
12281232
output_hidden_states: Optional[bool] = None,
12291233
return_dict: Optional[bool] = None,
12301234
cache_position: Optional[torch.LongTensor] = None,
1235+
**kwargs,
12311236
) -> Union[tuple, Seq2SeqSequenceClassifierOutput]:
12321237
r"""
12331238
decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
@@ -1360,6 +1365,7 @@ def forward(
13601365
output_hidden_states: Optional[bool] = None,
13611366
return_dict: Optional[bool] = None,
13621367
cache_position: Optional[torch.LongTensor] = None,
1368+
**kwargs,
13631369
) -> Union[tuple, Seq2SeqQuestionAnsweringModelOutput]:
13641370
r"""
13651371
decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
@@ -1505,6 +1511,7 @@ def forward(
15051511
return_dict: Optional[bool] = None,
15061512
cache_position: Optional[torch.LongTensor] = None,
15071513
logits_to_keep: Union[int, torch.Tensor] = 0,
1514+
**kwargs,
15081515
) -> Union[tuple, CausalLMOutputWithCrossAttentions]:
15091516
r"""
15101517
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):

src/transformers/models/beit/modeling_beit.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -726,6 +726,7 @@ def forward(
726726
output_hidden_states: Optional[bool] = None,
727727
interpolate_pos_encoding: bool = False,
728728
return_dict: Optional[bool] = None,
729+
**kwargs,
729730
) -> Union[tuple, BeitModelOutputWithPooling]:
730731
r"""
731732
bool_masked_pos (`torch.BoolTensor` of shape `(batch_size, num_patches)`, *optional*):
@@ -818,6 +819,7 @@ def forward(
818819
output_hidden_states: Optional[bool] = None,
819820
interpolate_pos_encoding: bool = False,
820821
return_dict: Optional[bool] = None,
822+
**kwargs,
821823
) -> Union[tuple, MaskedLMOutput]:
822824
r"""
823825
bool_masked_pos (`torch.BoolTensor` of shape `(batch_size, num_patches)`):
@@ -911,6 +913,7 @@ def forward(
911913
output_hidden_states: Optional[bool] = None,
912914
interpolate_pos_encoding: bool = False,
913915
return_dict: Optional[bool] = None,
916+
**kwargs,
914917
) -> Union[tuple, ImageClassifierOutput]:
915918
r"""
916919
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
@@ -1244,6 +1247,7 @@ def forward(
12441247
output_hidden_states: Optional[bool] = None,
12451248
interpolate_pos_encoding: bool = False,
12461249
return_dict: Optional[bool] = None,
1250+
**kwargs,
12471251
) -> Union[tuple, SemanticSegmenterOutput]:
12481252
r"""
12491253
labels (`torch.LongTensor` of shape `(batch_size, height, width)`, *optional*):
@@ -1371,6 +1375,7 @@ def forward(
13711375
output_hidden_states: Optional[bool] = None,
13721376
output_attentions: Optional[bool] = None,
13731377
return_dict: Optional[bool] = None,
1378+
**kwargs,
13741379
) -> BackboneOutput:
13751380
r"""
13761381
Examples:

src/transformers/models/big_bird/modeling_big_bird.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1918,6 +1918,7 @@ def forward(
19181918
output_attentions: Optional[bool] = None,
19191919
output_hidden_states: Optional[bool] = None,
19201920
return_dict: Optional[bool] = None,
1921+
**kwargs,
19211922
) -> Union[BigBirdForPreTrainingOutput, tuple[torch.FloatTensor]]:
19221923
r"""
19231924
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
@@ -2028,6 +2029,7 @@ def forward(
20282029
output_attentions: Optional[bool] = None,
20292030
output_hidden_states: Optional[bool] = None,
20302031
return_dict: Optional[bool] = None,
2032+
**kwargs,
20312033
) -> Union[MaskedLMOutput, tuple[torch.FloatTensor]]:
20322034
r"""
20332035
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
@@ -2277,6 +2279,7 @@ def forward(
22772279
output_attentions: Optional[bool] = None,
22782280
output_hidden_states: Optional[bool] = None,
22792281
return_dict: Optional[bool] = None,
2282+
**kwargs,
22802283
) -> Union[SequenceClassifierOutput, tuple[torch.FloatTensor]]:
22812284
r"""
22822285
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
@@ -2394,6 +2397,7 @@ def forward(
23942397
output_attentions: Optional[bool] = None,
23952398
output_hidden_states: Optional[bool] = None,
23962399
return_dict: Optional[bool] = None,
2400+
**kwargs,
23972401
) -> Union[MultipleChoiceModelOutput, tuple[torch.FloatTensor]]:
23982402
r"""
23992403
input_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`):
@@ -2500,6 +2504,7 @@ def forward(
25002504
output_attentions: Optional[bool] = None,
25012505
output_hidden_states: Optional[bool] = None,
25022506
return_dict: Optional[bool] = None,
2507+
**kwargs,
25032508
) -> Union[TokenClassifierOutput, tuple[torch.FloatTensor]]:
25042509
r"""
25052510
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
@@ -2591,6 +2596,7 @@ def forward(
25912596
output_attentions: Optional[bool] = None,
25922597
output_hidden_states: Optional[bool] = None,
25932598
return_dict: Optional[bool] = None,
2599+
**kwargs,
25942600
) -> Union[BigBirdForQuestionAnsweringModelOutput, tuple[torch.FloatTensor]]:
25952601
r"""
25962602
question_lengths (`torch.LongTensor` of shape `(batch_size, 1)`, *optional*):

src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1595,6 +1595,7 @@ def forward(
15951595
output_attentions: Optional[bool] = None,
15961596
output_hidden_states: Optional[bool] = None,
15971597
return_dict: Optional[bool] = None,
1598+
**kwargs,
15981599
):
15991600
r"""
16001601
Args:
@@ -1868,6 +1869,7 @@ def forward(
18681869
output_hidden_states: Optional[bool] = None,
18691870
return_dict: Optional[bool] = None,
18701871
cache_position: Optional[torch.Tensor] = None,
1872+
**kwargs,
18711873
):
18721874
r"""
18731875
Args:
@@ -2097,6 +2099,7 @@ def forward(
20972099
output_hidden_states: Optional[bool] = None,
20982100
return_dict: Optional[bool] = None,
20992101
cache_position: Optional[torch.LongTensor] = None,
2102+
**kwargs,
21002103
) -> Union[tuple, Seq2SeqModelOutput]:
21012104
r"""
21022105
decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
@@ -2235,6 +2238,7 @@ def forward(
22352238
output_hidden_states: Optional[bool] = None,
22362239
return_dict: Optional[bool] = None,
22372240
cache_position: Optional[torch.LongTensor] = None,
2241+
**kwargs,
22382242
) -> Union[tuple, Seq2SeqLMOutput]:
22392243
r"""
22402244
decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
@@ -2369,6 +2373,7 @@ def forward(
23692373
output_hidden_states: Optional[bool] = None,
23702374
return_dict: Optional[bool] = None,
23712375
cache_position: Optional[torch.LongTensor] = None,
2376+
**kwargs,
23722377
) -> Union[tuple, Seq2SeqSequenceClassifierOutput]:
23732378
r"""
23742379
decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
@@ -2490,6 +2495,7 @@ def forward(
24902495
output_hidden_states: Optional[bool] = None,
24912496
return_dict: Optional[bool] = None,
24922497
cache_position: Optional[torch.LongTensor] = None,
2498+
**kwargs,
24932499
) -> Union[tuple, Seq2SeqQuestionAnsweringModelOutput]:
24942500
r"""
24952501
decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
@@ -2616,6 +2622,7 @@ def forward(
26162622
return_dict: Optional[bool] = None,
26172623
cache_position: Optional[torch.LongTensor] = None,
26182624
logits_to_keep: Union[int, torch.Tensor] = 0,
2625+
**kwargs,
26192626
) -> Union[tuple, CausalLMOutputWithCrossAttentions]:
26202627
r"""
26212628
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):

0 commit comments

Comments
 (0)