Skip to content

Commit 8bcac7d

Browse files
committed
Add missing arguments
Signed-off-by: cyy <cyyever@outlook.com>
1 parent f4d57f2 commit 8bcac7d

23 files changed

+31
-32
lines changed

src/transformers/models/aria/modular_aria.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1255,8 +1255,7 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
12551255
class AriaTextAttention(LlamaAttention):
12561256
"""Multi-headed attention from 'Attention Is All You Need' paper"""
12571257

1258-
def __init__(self, config: AriaTextConfig, layer_idx: int):
1259-
super().__init__()
1258+
pass
12601259

12611260

12621261
class AriaTextDecoderLayer(LlamaDecoderLayer):
@@ -1273,7 +1272,7 @@ class AriaTextDecoderLayer(LlamaDecoderLayer):
12731272
"""
12741273

12751274
def __init__(self, config: AriaTextConfig, layer_idx: int):
1276-
super().__init__(self)
1275+
super().__init__(self, layer_idx)
12771276
self.mlp = AriaTextMoELayer(config)
12781277

12791278

@@ -1306,7 +1305,7 @@ class AriaPreTrainedModel(LlamaPreTrainedModel):
13061305
_supports_attention_backend = True
13071306

13081307
def _init_weights(self, module):
1309-
LlamaPreTrainedModel._init_weights(module)
1308+
LlamaPreTrainedModel._init_weights(self, module)
13101309
if isinstance(module, AriaProjector):
13111310
nn.init.trunc_normal_(module.query, std=self.config.initializer_range)
13121311

src/transformers/models/cohere2/modular_cohere2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,7 @@ class Cohere2Attention(CohereAttention, nn.Module):
275275
"""Multi-headed attention from 'Attention Is All You Need' paper"""
276276

277277
def __init__(self, config: Cohere2Config, layer_idx: Optional[int] = None):
278-
nn.Module.__init__()
278+
nn.Module.__init__(self)
279279
self.config = config
280280
self.layer_idx = layer_idx
281281
self.head_dim = getattr(config, "head_dim", config.hidden_size // config.num_attention_heads)

src/transformers/models/data2vec/modular_data2vec_audio.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ def forward(self, hidden_states):
114114

115115
class Data2VecAudioFeatureEncoder(Wav2Vec2FeatureEncoder, nn.Module):
116116
def __init__(self, config):
117-
nn.Module.__init__()
117+
nn.Module.__init__(self)
118118
self.conv_layers = nn.ModuleList(
119119
[Data2VecAudioConvLayer(config, layer_id=i) for i in range(config.num_feat_extract_layers)]
120120
)

src/transformers/models/deepseek_v2/modular_deepseek_v2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -505,7 +505,7 @@ def __init__(self, config: DeepseekV2Config, layer_idx: int):
505505

506506
class DeepseekV2PreTrainedModel(LlamaPreTrainedModel):
507507
def _init_weights(self, module):
508-
LlamaPreTrainedModel._init_weights(module)
508+
LlamaPreTrainedModel._init_weights(self, module)
509509
if isinstance(module, DeepseekV2MoEGate):
510510
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
511511

src/transformers/models/deepseek_v3/modular_deepseek_v3.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -341,7 +341,7 @@ def __init__(self, config: DeepseekV3Config, layer_idx: int):
341341

342342
class DeepseekV3PreTrainedModel(LlamaPreTrainedModel):
343343
def _init_weights(self, module):
344-
LlamaPreTrainedModel._init_weights(module)
344+
LlamaPreTrainedModel._init_weights(self, module)
345345
if isinstance(module, DeepseekV3TopkRouter):
346346
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
347347

src/transformers/models/dia/modular_dia.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ class DiaSelfAttention(LlamaAttention, nn.Module):
111111
"""Multi-headed attention from 'Attention Is All You Need' paper"""
112112

113113
def __init__(self, config: Union[DiaEncoderConfig, DiaDecoderConfig], layer_idx: int, is_causal: bool = False):
114-
nn.Module.__init__()
114+
nn.Module.__init__(self)
115115
self.config = config
116116
self.layer_idx = layer_idx
117117
self.hidden_size = config.hidden_size

src/transformers/models/diffllama/modular_diffllama.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -408,7 +408,7 @@ class DiffLlamaPreTrainedModel(LlamaPreTrainedModel):
408408
_supports_attention_backend = False
409409

410410
def _init_weights(self, module):
411-
LlamaPreTrainedModel._init_weights(module)
411+
LlamaPreTrainedModel._init_weights(self, module)
412412
if isinstance(module, DiffLlamaAttention):
413413
module.lambda_q1.data.normal_(0, self.config.lambda_std_dev)
414414
module.lambda_k1.data.normal_(0, self.config.lambda_std_dev)

src/transformers/models/doge/modular_doge.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -576,7 +576,7 @@ class DogePreTrainedModel(LlamaPreTrainedModel):
576576

577577
def _init_weights(self, module):
578578
"""Initialize the weights"""
579-
LlamaPreTrainedModel._init_weights(module)
579+
LlamaPreTrainedModel._init_weights(self, module)
580580
if isinstance(module, DogeAttention):
581581
if hasattr(module, "A"):
582582
module.A.data.zero_()

src/transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ class Ernie4_5_MoePreTrainedModel(MixtralPreTrainedModel):
224224
}
225225

226226
def _init_weights(self, module):
227-
MixtralPreTrainedModel._init_weights(module)
227+
MixtralPreTrainedModel._init_weights(self, module)
228228
if isinstance(module, Ernie4_5_MoeStatics):
229229
module.e_score_correction_bias.data.zero_()
230230

src/transformers/models/evolla/modular_evolla.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ def forward(self, q: torch.Tensor, k: torch.Tensor) -> tuple[torch.Tensor, torch
129129

130130
class EvollaSaProtSelfAttention(EsmSelfAttention, nn.Module):
131131
def __init__(self, config, position_embedding_type=None, layer_idx=None):
132-
nn.Module.__init__()
132+
nn.Module.__init__(self)
133133
self.config = config
134134

135135
if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
@@ -781,7 +781,7 @@ class EvollaPreTrainedModel(LlamaPreTrainedModel):
781781

782782
def _init_weights(self, module):
783783
std = self.config.initializer_range
784-
LlamaPreTrainedModel._init_weights(module)
784+
LlamaPreTrainedModel._init_weights(self, module)
785785
if isinstance(module, EvollaSequenceAlignerCrossAttention):
786786
module.gate_attention.zero_()
787787
module.gate_ffw.zero_()

0 commit comments

Comments
 (0)