diff --git a/official/nlp/modeling/layers/block_diag_feedforward.py b/official/nlp/modeling/layers/block_diag_feedforward.py index 2a8ea278aa9..a781d7afa23 100644 --- a/official/nlp/modeling/layers/block_diag_feedforward.py +++ b/official/nlp/modeling/layers/block_diag_feedforward.py @@ -59,7 +59,7 @@ def __init__( kernel_constraint: Optional[tf.keras.constraints.Constraint] = None, bias_constraint: Optional[tf.keras.constraints.Constraint] = None, **kwargs): # pylint: disable=g-doc-args - super(BlockDiagFeedforward, self).__init__(**kwargs) + super().__init__(**kwargs) self._intermediate_size = intermediate_size self._intermediate_activation = intermediate_activation self._dropout = dropout @@ -156,7 +156,7 @@ def get_config(self): "bias_constraint": tf.keras.constraints.serialize(self._bias_constraint) } - base_config = super(BlockDiagFeedforward, self).get_config() + base_config = super().get_config() return dict(list(base_config.items()) + list(config.items())) def call(self, inputs): diff --git a/official/nlp/modeling/layers/gaussian_process.py b/official/nlp/modeling/layers/gaussian_process.py index 6d50fd42cc9..618000577f1 100644 --- a/official/nlp/modeling/layers/gaussian_process.py +++ b/official/nlp/modeling/layers/gaussian_process.py @@ -116,7 +116,7 @@ def __init__(self, name: (string) Layer name. **gp_output_kwargs: Additional keyword arguments to dense output layer. """ - super(RandomFeatureGaussianProcess, self).__init__(name=name, dtype=dtype) + super().__init__(name=name, dtype=dtype) self.units = units self.num_inducing = num_inducing diff --git a/official/nlp/modeling/layers/masked_lm.py b/official/nlp/modeling/layers/masked_lm.py index c622d91b752..2d02f71c77a 100644 --- a/official/nlp/modeling/layers/masked_lm.py +++ b/official/nlp/modeling/layers/masked_lm.py @@ -47,7 +47,7 @@ def __init__(self, output='logits', name=None, **kwargs): - super(MaskedLM, self).__init__(name=name, **kwargs) + super().__init__(name=name, **kwargs) self.embedding_table = embedding_table self.activation = activation self.initializer = tf.keras.initializers.get(initializer) @@ -73,7 +73,7 @@ def build(self, input_shape): initializer='zeros', trainable=True) - super(MaskedLM, self).build(input_shape) + super().build(input_shape) def call(self, sequence_data, masked_positions): masked_lm_input = self._gather_indexes(sequence_data, masked_positions) diff --git a/official/nlp/modeling/layers/masked_softmax.py b/official/nlp/modeling/layers/masked_softmax.py index db0a0fcaaf1..51a859027f1 100644 --- a/official/nlp/modeling/layers/masked_softmax.py +++ b/official/nlp/modeling/layers/masked_softmax.py @@ -53,7 +53,7 @@ def __init__(self, self._normalization_axes = (-1,) else: self._normalization_axes = normalization_axes - super(MaskedSoftmax, self).__init__(**kwargs) + super().__init__(**kwargs) def call(self, scores, mask=None): @@ -81,5 +81,5 @@ def get_config(self): 'mask_expansion_axes': self._mask_expansion_axes, 'normalization_axes': self._normalization_axes } - base_config = super(MaskedSoftmax, self).get_config() + base_config = super().get_config() return dict(list(base_config.items()) + list(config.items())) diff --git a/official/nlp/modeling/layers/mat_mul_with_margin.py b/official/nlp/modeling/layers/mat_mul_with_margin.py index 9bc8721d20f..25f4ed23a18 100644 --- a/official/nlp/modeling/layers/mat_mul_with_margin.py +++ b/official/nlp/modeling/layers/mat_mul_with_margin.py @@ -36,7 +36,7 @@ def __init__(self, logit_scale=1.0, logit_margin=0.0, **kwargs): - super(MatMulWithMargin, self).__init__(**kwargs) + super().__init__(**kwargs) self.logit_scale = logit_scale self.logit_margin = logit_margin @@ -61,7 +61,7 @@ def get_config(self): config = { 'logit_scale': self.logit_scale, 'logit_margin': self.logit_margin} - config.update(super(MatMulWithMargin, self).get_config()) + config.update(super().get_config()) return config @classmethod diff --git a/official/nlp/modeling/layers/mobile_bert_layers.py b/official/nlp/modeling/layers/mobile_bert_layers.py index 94f28f2c217..c1ae9ced8f0 100644 --- a/official/nlp/modeling/layers/mobile_bert_layers.py +++ b/official/nlp/modeling/layers/mobile_bert_layers.py @@ -26,7 +26,7 @@ class NoNorm(tf.keras.layers.Layer): """Apply element-wise linear transformation to the last dimension.""" def __init__(self, name=None): - super(NoNorm, self).__init__(name=name) + super().__init__(name=name) def build(self, shape): kernal_size = shape[-1] @@ -98,7 +98,7 @@ def __init__(self, dropout_rate: Dropout rate. **kwargs: keyword arguments. """ - super(MobileBertEmbedding, self).__init__(**kwargs) + super().__init__(**kwargs) self.word_vocab_size = word_vocab_size self.word_embed_size = word_embed_size self.type_vocab_size = type_vocab_size @@ -222,7 +222,7 @@ def __init__(self, Raises: ValueError: A Tensor shape or parameter is invalid. """ - super(MobileBertTransformer, self).__init__(**kwargs) + super().__init__(**kwargs) self.hidden_size = hidden_size self.num_attention_heads = num_attention_heads self.intermediate_size = intermediate_size @@ -459,7 +459,7 @@ def __init__(self, `predictions`. **kwargs: keyword arguments. """ - super(MobileBertMaskedLM, self).__init__(**kwargs) + super().__init__(**kwargs) self.embedding_table = embedding_table self.activation = activation self.initializer = tf.keras.initializers.get(initializer) diff --git a/official/nlp/modeling/layers/multi_channel_attention.py b/official/nlp/modeling/layers/multi_channel_attention.py index cf3a6aca27a..94c22aee333 100644 --- a/official/nlp/modeling/layers/multi_channel_attention.py +++ b/official/nlp/modeling/layers/multi_channel_attention.py @@ -49,7 +49,7 @@ def __init__(self, kernel_constraint=None, bias_constraint=None, **kwargs): - super(VotingAttention, self).__init__(**kwargs) + super().__init__(**kwargs) self._num_heads = num_heads self._head_size = head_size self._kernel_initializer = tf.keras.initializers.get(kernel_initializer) @@ -82,7 +82,7 @@ def build(self, unused_input_shapes): kernel_initializer=tf_utils.clone_initializer(self._kernel_initializer), bias_initializer=tf_utils.clone_initializer(self._bias_initializer), **common_kwargs) - super(VotingAttention, self).build(unused_input_shapes) + super().build(unused_input_shapes) def call(self, encoder_outputs, doc_attention_mask): num_docs = tf_utils.get_shape_list(encoder_outputs, expected_rank=[4])[1] @@ -123,7 +123,7 @@ class MultiChannelAttention(tf.keras.layers.MultiHeadAttention): """ def _build_attention(self, rank): - super(MultiChannelAttention, self)._build_attention(rank) # pytype: disable=attribute-error # typed-keras + super()._build_attention(rank) # pytype: disable=attribute-error # typed-keras self._masked_softmax = masked_softmax.MaskedSoftmax(mask_expansion_axes=[2]) def call(self, diff --git a/official/nlp/modeling/layers/on_device_embedding.py b/official/nlp/modeling/layers/on_device_embedding.py index be000427f4e..6cc5a05b4fe 100644 --- a/official/nlp/modeling/layers/on_device_embedding.py +++ b/official/nlp/modeling/layers/on_device_embedding.py @@ -47,7 +47,7 @@ def __init__(self, scale_factor=None, **kwargs): - super(OnDeviceEmbedding, self).__init__(**kwargs) + super().__init__(**kwargs) self._vocab_size = vocab_size self._embedding_width = embedding_width self._initializer = initializer @@ -62,7 +62,7 @@ def get_config(self): "use_one_hot": self._use_one_hot, "scale_factor": self._scale_factor, } - base_config = super(OnDeviceEmbedding, self).get_config() + base_config = super().get_config() return dict(list(base_config.items()) + list(config.items())) def build(self, input_shape): @@ -72,7 +72,7 @@ def build(self, input_shape): initializer=self._initializer, dtype=tf.float32) - super(OnDeviceEmbedding, self).build(input_shape) + super().build(input_shape) def call(self, inputs): flat_inputs = tf.reshape(inputs, [-1]) diff --git a/official/nlp/modeling/layers/position_embedding.py b/official/nlp/modeling/layers/position_embedding.py index 86ee2fc6e99..8f27460d9e4 100644 --- a/official/nlp/modeling/layers/position_embedding.py +++ b/official/nlp/modeling/layers/position_embedding.py @@ -53,7 +53,7 @@ def __init__(self, seq_axis=1, **kwargs): - super(PositionEmbedding, self).__init__(**kwargs) + super().__init__(**kwargs) if max_length is None: raise ValueError( "`max_length` must be an Integer, not `None`." @@ -81,7 +81,7 @@ def build(self, input_shape): shape=[weight_sequence_length, width], initializer=self._initializer) - super(PositionEmbedding, self).build(input_shape) + super().build(input_shape) def call(self, inputs): input_shape = tf.shape(inputs) diff --git a/official/nlp/modeling/layers/reuse_attention.py b/official/nlp/modeling/layers/reuse_attention.py index 4e7a9d2a78c..75778cdc9ea 100644 --- a/official/nlp/modeling/layers/reuse_attention.py +++ b/official/nlp/modeling/layers/reuse_attention.py @@ -223,7 +223,7 @@ def __init__(self, kernel_constraint=None, bias_constraint=None, **kwargs): - super(ReuseMultiHeadAttention, self).__init__(**kwargs) + super().__init__(**kwargs) self._num_heads = num_heads self._key_dim = key_dim self._value_dim = value_dim if value_dim else key_dim @@ -301,7 +301,7 @@ def get_config(self): "key_shape": self._key_shape, "value_shape": self._value_shape, } - base_config = super(ReuseMultiHeadAttention, self).get_config() + base_config = super().get_config() return dict(list(base_config.items()) + list(config.items())) @classmethod diff --git a/official/nlp/modeling/layers/routing.py b/official/nlp/modeling/layers/routing.py index 6eb42f1e4b9..ce0b6875ab2 100644 --- a/official/nlp/modeling/layers/routing.py +++ b/official/nlp/modeling/layers/routing.py @@ -33,7 +33,7 @@ def __init__(self, self._vocab_size = vocab_size self._init_importance = init_importance self._moving_average_beta = moving_average_beta - super(TokenImportanceWithMovingAvg, self).__init__(**kwargs) + super().__init__(**kwargs) def build(self, input_shape): self._importance_embedding = self.add_weight( @@ -51,7 +51,7 @@ def get_config(self): "moving_average_beta": self._moving_average_beta, } - base_config = super(TokenImportanceWithMovingAvg, self).get_config() + base_config = super().get_config() return dict(list(base_config.items()) + list(config.items())) def update_token_importance(self, token_ids, importance): @@ -80,7 +80,7 @@ def __init__(self, **kwargs): self._top_k = top_k self._random_k = random_k - super(SelectTopK, self).__init__(**kwargs) + super().__init__(**kwargs) def get_config(self): config = { @@ -89,7 +89,7 @@ def get_config(self): "random_k": self._random_k, } - base_config = super(SelectTopK, self).get_config() + base_config = super().get_config() return dict(list(base_config.items()) + list(config.items())) def call(self, inputs): diff --git a/official/nlp/modeling/layers/spectral_normalization.py b/official/nlp/modeling/layers/spectral_normalization.py index 253060b1460..aa81dbe1f09 100644 --- a/official/nlp/modeling/layers/spectral_normalization.py +++ b/official/nlp/modeling/layers/spectral_normalization.py @@ -74,11 +74,11 @@ def __init__(self, if not isinstance(layer, tf.keras.layers.Layer): raise ValueError('`layer` must be a `tf.keras.layer.Layer`. ' 'Observed `{}`'.format(layer)) - super(SpectralNormalization, self).__init__( + super().__init__( layer, name=wrapper_name, **kwargs) def build(self, input_shape): - super(SpectralNormalization, self).build(input_shape) + super().build(input_shape) self.layer.kernel._aggregation = self.aggregation # pylint: disable=protected-access self._dtype = self.layer.kernel.dtype @@ -193,7 +193,7 @@ def __init__(self, raise ValueError( 'layer must be a `tf.keras.layer.Conv2D` instance. You passed: {input}' .format(input=layer)) - super(SpectralNormalizationConv2D, self).__init__(layer, **kwargs) + super().__init__(layer, **kwargs) def build(self, input_shape): if not self.layer.built: @@ -238,7 +238,7 @@ def build(self, input_shape): dtype=self.dtype, aggregation=self.aggregation) - super(SpectralNormalizationConv2D, self).build() + super().build() def call(self, inputs): u_update_op, v_update_op, w_update_op = self.update_weights() diff --git a/official/nlp/modeling/layers/tn_expand_condense.py b/official/nlp/modeling/layers/tn_expand_condense.py index 2cfa40beb44..406044cda65 100644 --- a/official/nlp/modeling/layers/tn_expand_condense.py +++ b/official/nlp/modeling/layers/tn_expand_condense.py @@ -66,7 +66,7 @@ def __init__(self, if 'input_shape' not in kwargs and 'input_dim' in kwargs: kwargs['input_shape'] = (kwargs.pop('input_dim'),) - super(TNExpandCondense, self).__init__(**kwargs) + super().__init__(**kwargs) assert proj_multiplier in [ 2, 4, 6, 8, 10, 12 @@ -86,7 +86,7 @@ def build(self, input_shape: List[int]) -> None: 'The last dimension of the inputs to `TNExpandCondense` ' 'should be defined. Found `None`.') - super(TNExpandCondense, self).build(input_shape) + super().build(input_shape) self.proj_size = self.proj_multiplier * input_shape[-1] @@ -178,5 +178,5 @@ def get_config(self) -> Dict[Any, Any]: getattr(self, initializer_arg)) # Get base config - base_config = super(TNExpandCondense, self).get_config() + base_config = super().get_config() return dict(list(base_config.items()) + list(config.items())) diff --git a/official/nlp/modeling/layers/tn_transformer_expand_condense.py b/official/nlp/modeling/layers/tn_transformer_expand_condense.py index 5ce5ef9bf6d..53705a1faa4 100644 --- a/official/nlp/modeling/layers/tn_transformer_expand_condense.py +++ b/official/nlp/modeling/layers/tn_transformer_expand_condense.py @@ -78,7 +78,7 @@ def __init__(self, intermediate_dropout=0.0, attention_initializer=None, **kwargs): - super(TNTransformerExpandCondense, self).__init__(**kwargs) + super().__init__(**kwargs) self._num_heads = num_attention_heads self._intermediate_size = intermediate_size @@ -170,7 +170,7 @@ def build(self, input_shape): epsilon=self._norm_epsilon, dtype=tf.float32) - super(TNTransformerExpandCondense, self).build(input_shape) + super().build(input_shape) def get_config(self): config = { @@ -211,7 +211,7 @@ def get_config(self): "attention_initializer": tf.keras.initializers.serialize(self._attention_initializer) } - base_config = super(TNTransformerExpandCondense, self).get_config() + base_config = super().get_config() return dict(list(base_config.items()) + list(config.items())) def call(self, inputs): diff --git a/official/nlp/modeling/layers/transformer_xl.py b/official/nlp/modeling/layers/transformer_xl.py index d59116f06e2..462d80c2534 100644 --- a/official/nlp/modeling/layers/transformer_xl.py +++ b/official/nlp/modeling/layers/transformer_xl.py @@ -103,7 +103,7 @@ def __init__(self, **kwargs): """Initializes TransformerXLBlock layer.""" - super(TransformerXLBlock, self).__init__(**kwargs) + super().__init__(**kwargs) self._vocab_size = vocab_size self._num_heads = num_attention_heads self._head_size = head_size @@ -181,7 +181,7 @@ def build(self, input_shape): axis=-1, epsilon=self._norm_epsilon) - super(TransformerXLBlock, self).build(input_shape) + super().build(input_shape) def get_config(self): config = { @@ -210,7 +210,7 @@ def get_config(self): "inner_dropout": self._inner_dropout, } - base_config = super(TransformerXLBlock, self).get_config() + base_config = super().get_config() return dict(list(base_config.items()) + list(config.items())) def call(self, @@ -371,7 +371,7 @@ def __init__(self, inner_activation="relu", **kwargs): """Initializes TransformerXL.""" - super(TransformerXL, self).__init__(**kwargs) + super().__init__(**kwargs) self._vocab_size = vocab_size self._initializer = initializer @@ -461,7 +461,7 @@ def get_config(self): "inner_activation": self._inner_activation, } - base_config = super(TransformerXL, self).get_config() + base_config = super().get_config() return dict(list(base_config.items()) + list(config.items())) def call(self,