From 464bf50d294ee005590df94205a87e31258f3644 Mon Sep 17 00:00:00 2001
From: Lutz Roeder <lutzroeder@users.noreply.github.com>
Date: Sat, 4 Nov 2023 16:55:02 -0700
Subject: [PATCH] Update keras-metadata.json

---
 source/keras-metadata.json | 1218 ++++++++++++++++++++----------------
 1 file changed, 688 insertions(+), 530 deletions(-)

diff --git a/source/keras-metadata.json b/source/keras-metadata.json
index e3bc6239ce4..fd7d239d8fe 100644
--- a/source/keras-metadata.json
+++ b/source/keras-metadata.json
@@ -5,8 +5,12 @@
     "description": "Applies an activation function to an output.",
     "attributes": [
       {
-        "description": "Activation function, such as `tf.nn.relu`, or string name of\n    built-in activation function, such as \"relu\".",
+        "description": "Activation function. It could be a callable, or the name of\n        an activation from the `keras.activations` namespace.",
         "name": "activation"
+      },
+      {
+        "name": "**kwargs",
+        "description": "Base layer keyword arguments, such as `name` and `dtype`."
       }
     ],
     "inputs": [
@@ -23,7 +27,7 @@
     ],
     "examples": [
       {
-        "code": ">>> layer = tf.keras.layers.Activation('relu')\n>>> output = layer([-3.0, -1.0, 0.0, 2.0])\n>>> list(output.numpy())\n[0.0, 0.0, 0.0, 2.0]\n>>> layer = tf.keras.layers.Activation(tf.nn.relu)\n>>> output = layer([-3.0, -1.0, 0.0, 2.0])\n>>> list(output.numpy())\n[0.0, 0.0, 0.0, 2.0]"
+        "code": ">>> layer = keras.layers.Activation('relu')\n>>> layer([-3.0, -1.0, 0.0, 2.0])\n[0.0, 0.0, 0.0, 2.0]\n>>> layer = keras.layers.Activation(keras.activations.relu)\n>>> layer([-3.0, -1.0, 0.0, 2.0])\n[0.0, 0.0, 0.0, 2.0]"
       }
     ]
   },
@@ -57,7 +61,7 @@
   {
     "name": "Add",
     "module": "tensorflow.keras.layers",
-    "description": "Layer that adds a list of inputs.\n\nIt takes as input a list of tensors,\nall of the same shape, and returns\na single tensor (also of the same shape).",
+    "description": "Performs elementwise addition operation.\n\nIt takes as input a list of tensors, all of the same shape,\nand returns a single tensor (also of the same shape).",
     "inputs": [
       {
         "name": "inputs",
@@ -71,18 +75,18 @@
     ],
     "examples": [
       {
-        "code": ">>> input_shape = (2, 3, 4)\n>>> x1 = tf.random.normal(input_shape)\n>>> x2 = tf.random.normal(input_shape)\n>>> y = tf.keras.layers.Add()([x1, x2])\n>>> print(y.shape)\n(2, 3, 4)"
+        "code": ">>> input_shape = (2, 3, 4)\n>>> x1 = np.random.rand(*input_shape)\n>>> x2 = np.random.rand(*input_shape)\n>>> y = keras.layers.Add()([x1, x2])"
       },
       {
-        "summary": "Used in a functional model:",
-        "code": ">>> input1 = tf.keras.layers.Input(shape=(16,))\n>>> x1 = tf.keras.layers.Dense(8, activation='relu')(input1)\n>>> input2 = tf.keras.layers.Input(shape=(32,))\n>>> x2 = tf.keras.layers.Dense(8, activation='relu')(input2)\n>>> # equivalent to `added = tf.keras.layers.add([x1, x2])`\n>>> added = tf.keras.layers.Add()([x1, x2])\n>>> out = tf.keras.layers.Dense(4)(added)\n>>> model = tf.keras.models.Model(inputs=[input1, input2], outputs=out)"
+        "summary": "Usage in a Keras model:",
+        "code": ">>> input1 = keras.layers.Input(shape=(16,))\n>>> x1 = keras.layers.Dense(8, activation='relu')(input1)\n>>> input2 = keras.layers.Input(shape=(32,))\n>>> x2 = keras.layers.Dense(8, activation='relu')(input2)\n>>> # equivalent to `added = keras.layers.add([x1, x2])`\n>>> added = keras.layers.Add()([x1, x2])\n>>> out = keras.layers.Dense(4)(added)\n>>> model = keras.models.Model(inputs=[input1, input2], outputs=out)"
       }
     ]
   },
   {
     "name": "Attention",
     "module": "tensorflow.keras.layers",
-    "description": "Dot-product attention layer, a.k.a. Luong-style attention.\n\nInputs are `query` tensor of shape `[batch_size, Tq, dim]`, `value` tensor\nof shape `[batch_size, Tv, dim]` and `key` tensor of shape\n`[batch_size, Tv, dim]`. The calculation follows the steps:\n\n1. Calculate scores with shape `[batch_size, Tq, Tv]` as a `query`-`key` dot\n    product: `scores = tf.matmul(query, key, transpose_b=True)`.\n2. Use scores to calculate a distribution with shape\n    `[batch_size, Tq, Tv]`: `distribution = tf.nn.softmax(scores)`.\n3. Use `distribution` to create a linear combination of `value` with\n     shape `[batch_size, Tq, dim]`:\n     `return tf.matmul(distribution, value)`.",
+    "description": "Dot-product attention layer, a.k.a. Luong-style attention.\n\nInputs are a list with 2 or 3 elements:\n1. A `query` tensor of shape `(batch_size, Tq, dim)`.\n2. A `value` tensor of shape `(batch_size, Tv, dim)`.\n3. A optional `key` tensor of shape `(batch_size, Tv, dim)`. If none\n    supplied, `value` will be used as a `key`.\n\nThe calculation follows the steps:\n1. Calculate attention scores using `query` and `key` with shape\n    `(batch_size, Tq, Tv)`.\n2. Use scores to calculate a softmax distribution with shape\n    `(batch_size, Tq, Tv)`.\n3. Use the softmax distribution to create a linear combination of `value`\n    with shape `(batch_size, Tq, dim)`.",
     "attributes": [
       {
         "description": "If `True`, will create a scalar variable to scale the\n        attention scores.",
@@ -93,15 +97,15 @@
         "name": "causal"
       },
       {
-        "description": "Float between 0 and 1. Fraction of the units to drop for the\n        attention scores. Defaults to 0.0.",
+        "description": "Float between 0 and 1. Fraction of the units to drop for the\n        attention scores. Defaults to `0.0`.",
         "name": "dropout"
       },
       {
-        "description": "List of the following tensors:\n        * query: Query `Tensor` of shape `[batch_size, Tq, dim]`.\n        * value: Value `Tensor` of shape `[batch_size, Tv, dim]`.\n        * key: Optional key `Tensor` of shape `[batch_size, Tv, dim]`. If\n            not given, will use `value` for both `key` and `value`, which is\n            the most common case.",
+        "description": "List of the following tensors:\n        - `query`: Query tensor of shape `(batch_size, Tq, dim)`.\n        - `value`: Value tensor of shape `(batch_size, Tv, dim)`.\n        - `key`: Optional key tensor of shape `(batch_size, Tv, dim)`. If\n            not given, will use `value` for both `key` and `value`, which is\n            the most common case.",
         "name": "inputs"
       },
       {
-        "description": "List of the following tensors:\n        * query_mask: A boolean mask `Tensor` of shape `[batch_size, Tq]`.\n            If given, the output will be zero at the positions where\n            `mask==False`.\n        * value_mask: A boolean mask `Tensor` of shape `[batch_size, Tv]`.\n            If given, will apply the mask such that values at positions\n             where `mask==False` do not contribute to the result.",
+        "description": "List of the following tensors:\n        - `query_mask`: A boolean mask tensor of shape `(batch_size, Tq)`.\n            If given, the output will be zero at the positions where\n            `mask==False`.\n        - `value_mask`: A boolean mask tensor of shape `(batch_size, Tv)`.\n            If given, will apply the mask such that values at positions\n             where `mask==False` do not contribute to the result.",
         "name": "mask"
       },
       {
@@ -114,11 +118,11 @@
       },
       {
         "name": "score_mode",
-        "description": "Function to use to compute attention scores, one of\n        `{\"dot\", \"concat\"}`. `\"dot\"` refers to the dot product between the\n        query and key vectors. `\"concat\"` refers to the hyperbolic tangent\n        of the concatenation of the query and key vectors."
+        "description": "Function to use to compute attention scores, one of\n        `{\"dot\", \"concat\"}`. `\"dot\"` refers to the dot product between the\n        query and key vectors. `\"concat\"` refers to the hyperbolic tangent\n        of the concatenation of the `query` and `key` vectors.\n\nCall Args:"
       },
       {
         "name": "use_causal_mask",
-        "description": "Boolean. Set to `True` for decoder self-attention. Adds\n        a mask such that position `i` cannot attend to positions `j > i`.\n        This prevents the flow of information from the future towards the\n        past.\n        Defaults to `False`.\n\nOutput:\n\n    Attention outputs of shape `[batch_size, Tq, dim]`.\n    [Optional] Attention scores after masking and softmax with shape\n        `[batch_size, Tq, Tv]`.\n\nThe meaning of `query`, `value` and `key` depend on the application. In the\ncase of text similarity, for example, `query` is the sequence embeddings of\nthe first piece of text and `value` is the sequence embeddings of the second\npiece of text. `key` is usually the same tensor as `value`.\n\nHere is a code example for using `Attention` in a CNN+Attention network:\n\n```python\n# Variable-length int sequences.\nquery_input = tf.keras.Input(shape=(None,), dtype='int32')\nvalue_input = tf.keras.Input(shape=(None,), dtype='int32')\n\n# Embedding lookup.\ntoken_embedding = tf.keras.layers.Embedding(input_dim=1000, output_dim=64)\n# Query embeddings of shape [batch_size, Tq, dimension].\nquery_embeddings = token_embedding(query_input)\n# Value embeddings of shape [batch_size, Tv, dimension].\nvalue_embeddings = token_embedding(value_input)\n\n# CNN layer.\ncnn_layer = tf.keras.layers.Conv1D(\n    filters=100,\n    kernel_size=4,\n    # Use 'same' padding so outputs have the same shape as inputs.\n    padding='same')\n# Query encoding of shape [batch_size, Tq, filters].\nquery_seq_encoding = cnn_layer(query_embeddings)\n# Value encoding of shape [batch_size, Tv, filters].\nvalue_seq_encoding = cnn_layer(value_embeddings)\n\n# Query-value attention of shape [batch_size, Tq, filters].\nquery_value_attention_seq = tf.keras.layers.Attention()(\n    [query_seq_encoding, value_seq_encoding])\n\n# Reduce over the sequence axis to produce encodings of shape\n# [batch_size, filters].\nquery_encoding = tf.keras.layers.GlobalAveragePooling1D()(\n    query_seq_encoding)\nquery_value_attention = tf.keras.layers.GlobalAveragePooling1D()(\n    query_value_attention_seq)\n\n# Concatenate query and document encodings to produce a DNN input layer.\ninput_layer = tf.keras.layers.Concatenate()(\n    [query_encoding, query_value_attention])\n\n# Add DNN layers, and create Model.\n# ...\n```"
+        "description": "Boolean. Set to `True` for decoder self-attention. Adds\n        a mask such that position `i` cannot attend to positions `j > i`.\n        This prevents the flow of information from the future towards the\n        past. Defaults to `False`.\n\nOutput:\n    Attention outputs of shape `(batch_size, Tq, dim)`.\n    (Optional) Attention scores after masking and softmax with shape\n        `(batch_size, Tq, Tv)`."
       }
     ]
   },
@@ -126,7 +130,7 @@
     "name": "Average",
     "module": "tensorflow.keras.layers",
     "category": "Tensor",
-    "description": "Layer that averages a list of inputs element-wise.\n\nIt takes as input a list of tensors, all of the same shape, and returns\na single tensor (also of the same shape).",
+    "description": "Averages a list of inputs element-wise..\n\nIt takes as input a list of tensors, all of the same shape,\nand returns a single tensor (also of the same shape).",
     "inputs": [
       {
         "name": "inputs",
@@ -140,11 +144,11 @@
     ],
     "examples": [
       {
-        "code": ">>> x1 = np.ones((2, 2))\n>>> x2 = np.zeros((2, 2))\n>>> y = tf.keras.layers.Average()([x1, x2])\n>>> y.numpy().tolist()\n[[0.5, 0.5], [0.5, 0.5]]"
+        "code": ">>> input_shape = (2, 3, 4)\n>>> x1 = np.random.rand(*input_shape)\n>>> x2 = np.random.rand(*input_shape)\n>>> y = keras.layers.Average()([x1, x2])"
       },
       {
-        "summary": "Usage in a functional model:",
-        "code": ">>> input1 = tf.keras.layers.Input(shape=(16,))\n>>> x1 = tf.keras.layers.Dense(8, activation='relu')(input1)\n>>> input2 = tf.keras.layers.Input(shape=(32,))\n>>> x2 = tf.keras.layers.Dense(8, activation='relu')(input2)\n>>> avg = tf.keras.layers.Average()([x1, x2])\n>>> out = tf.keras.layers.Dense(4)(avg)\n>>> model = tf.keras.models.Model(inputs=[input1, input2], outputs=out)"
+        "summary": "Usage in a Keras model:",
+        "code": ">>> input1 = keras.layers.Input(shape=(16,))\n>>> x1 = keras.layers.Dense(8, activation='relu')(input1)\n>>> input2 = keras.layers.Input(shape=(32,))\n>>> x2 = keras.layers.Dense(8, activation='relu')(input2)\n>>> # equivalent to `y = keras.layers.average([x1, x2])`\n>>> y = keras.layers.Average()([x1, x2])\n>>> out = keras.layers.Dense(4)(y)\n>>> model = keras.models.Model(inputs=[input1, input2], outputs=out)"
       }
     ]
   },
@@ -152,113 +156,141 @@
     "name": "AveragePooling1D",
     "module": "tensorflow.keras.layers",
     "category": "Pool",
-    "description": "Average pooling for temporal data.\n\nDownsamples the input representation by taking the average value over the\nwindow defined by `pool_size`. The window is shifted by `strides`.  The\nresulting output when using \"valid\" padding option has a shape of:\n`output_shape = (input_shape - pool_size + 1) / strides)`\n\nThe resulting output shape when using the \"same\" padding option is:\n`output_shape = input_shape / strides`\n\nFor example, for strides=1 and padding=\"valid\":\n\n```\n>>> x = tf.constant([1., 2., 3., 4., 5.])\n>>> x = tf.reshape(x, [1, 5, 1])\n>>> x\n<tf.Tensor: shape=(1, 5, 1), dtype=float32, numpy=\n  array([[[1.],\n          [2.],\n          [3.],\n          [4.],\n          [5.]], dtype=float32)>\n>>> avg_pool_1d = tf.keras.layers.AveragePooling1D(pool_size=2,\n...    strides=1, padding='valid')\n>>> avg_pool_1d(x)\n<tf.Tensor: shape=(1, 4, 1), dtype=float32, numpy=\narray([[[1.5],\n        [2.5],\n        [3.5],\n        [4.5]]], dtype=float32)>\n```\n\nFor example, for strides=2 and padding=\"valid\":\n\n```\n>>> x = tf.constant([1., 2., 3., 4., 5.])\n>>> x = tf.reshape(x, [1, 5, 1])\n>>> x\n<tf.Tensor: shape=(1, 5, 1), dtype=float32, numpy=\n  array([[[1.],\n          [2.],\n          [3.],\n          [4.],\n          [5.]], dtype=float32)>\n>>> avg_pool_1d = tf.keras.layers.AveragePooling1D(pool_size=2,\n...    strides=2, padding='valid')\n>>> avg_pool_1d(x)\n<tf.Tensor: shape=(1, 2, 1), dtype=float32, numpy=\narray([[[1.5],\n        [3.5]]], dtype=float32)>\n```\n\nFor example, for strides=1 and padding=\"same\":\n\n```\n>>> x = tf.constant([1., 2., 3., 4., 5.])\n>>> x = tf.reshape(x, [1, 5, 1])\n>>> x\n<tf.Tensor: shape=(1, 5, 1), dtype=float32, numpy=\n  array([[[1.],\n          [2.],\n          [3.],\n          [4.],\n          [5.]], dtype=float32)>\n>>> avg_pool_1d = tf.keras.layers.AveragePooling1D(pool_size=2,\n...    strides=1, padding='same')\n>>> avg_pool_1d(x)\n<tf.Tensor: shape=(1, 5, 1), dtype=float32, numpy=\narray([[[1.5],\n        [2.5],\n        [3.5],\n        [4.5],\n        [5.]]], dtype=float32)>\n```",
+    "description": "Average pooling for temporal data.\n\nDownsamples the input representation by taking the average value over the\nwindow defined by `pool_size`. The window is shifted by `strides`.  The\nresulting output when using \"valid\" padding option has a shape of:\n`output_shape = (input_shape - pool_size + 1) / strides)`\n\nThe resulting output shape when using the \"same\" padding option is:\n`output_shape = input_shape / strides`",
     "attributes": [
       {
-        "description": "Integer, size of the average pooling windows.",
+        "description": "int, size of the max pooling window.",
         "name": "pool_size"
       },
       {
-        "description": "Integer, or None. Factor by which to downscale.\n    E.g. 2 will halve the input.\n    If None, it will default to `pool_size`.",
+        "description": "int or None. Specifies how much the pooling window moves\n        for each pooling step. If None, it will default to `pool_size`.",
         "name": "strides"
       },
       {
-        "description": "One of `\"valid\"` or `\"same\"` (case-insensitive).\n    `\"valid\"` means no padding. `\"same\"` results in padding evenly to\n    the left/right or up/down of the input such that output has the same\n    height/width dimension as the input.",
+        "description": "string, either `\"valid\"` or `\"same\"` (case-insensitive).\n        `\"valid\"` means no padding. `\"same\"` results in padding evenly to\n        the left/right or up/down of the input such that output has the same\n        height/width dimension as the input.",
         "name": "padding"
       },
       {
-        "description": "A string,\n    one of `channels_last` (default) or `channels_first`.\n    The ordering of the dimensions in the inputs.\n    `channels_last` corresponds to inputs with shape\n    `(batch, steps, features)` while `channels_first`\n    corresponds to inputs with shape\n    `(batch, features, steps)`.",
+        "description": "string, either `\"channels_last\"` or `\"channels_first\"`.\n        The ordering of the dimensions in the inputs. `\"channels_last\"`\n        corresponds to inputs with shape `(batch, steps, features)`\n        while `\"channels_first\"` corresponds to inputs with shape\n        `(batch, features, steps)`. It defaults to the `image_data_format`\n        value found in your Keras config file at `~/.keras/keras.json`.\n        If you never set it, then it will be `\"channels_last\"`.",
         "name": "data_format"
       }
     ],
     "inputs": [
       {
-        "description": "- If `data_format='channels_last'`:\n  3D tensor with shape `(batch_size, steps, features)`.\n- If `data_format='channels_first'`:\n  3D tensor with shape `(batch_size, features, steps)`.",
+        "description": "- If `data_format=\"channels_last\"`:\n    3D tensor with shape `(batch_size, steps, features)`.\n- If `data_format=\"channels_first\"`:\n    3D tensor with shape `(batch_size, features, steps)`.",
         "name": "input"
       }
     ],
     "outputs": [
       {
-        "description": "- If `data_format='channels_last'`:\n  3D tensor with shape `(batch_size, downsampled_steps, features)`.\n- If `data_format='channels_first'`:\n  3D tensor with shape `(batch_size, features, downsampled_steps)`.",
+        "description": "- If `data_format=\"channels_last\"`:\n    3D tensor with shape `(batch_size, downsampled_steps, features)`.\n- If `data_format=\"channels_first\"`:\n    3D tensor with shape `(batch_size, features, downsampled_steps)`.",
         "name": "output"
       }
+    ],
+    "examples": [
+      {
+        "summary": "`strides=1` and `padding=\"valid\"`:",
+        "code": ">>> x = np.array([1., 2., 3., 4., 5.])\n>>> x = np.reshape(x, [1, 5, 1])\n>>> avg_pool_1d = keras.layers.AveragePooling1D(pool_size=2,\n...    strides=1, padding=\"valid\")\n>>> avg_pool_1d(x)"
+      },
+      {
+        "summary": "`strides=2` and `padding=\"valid\"`:",
+        "code": ">>> x = np.array([1., 2., 3., 4., 5.])\n>>> x = np.reshape(x, [1, 5, 1])\n>>> avg_pool_1d = keras.layers.AveragePooling1D(pool_size=2,\n...    strides=2, padding=\"valid\")\n>>> avg_pool_1d(x)"
+      },
+      {
+        "summary": "`strides=1` and `padding=\"same\"`:",
+        "code": ">>> x = np.array([1., 2., 3., 4., 5.])\n>>> x = np.reshape(x, [1, 5, 1])\n>>> avg_pool_1d = keras.layers.AveragePooling1D(pool_size=2,\n...    strides=1, padding=\"same\")\n>>> avg_pool_1d(x)"
+      }
     ]
   },
   {
     "name": "AveragePooling2D",
     "module": "tensorflow.keras.layers",
     "category": "Pool",
-    "description": "Average pooling operation for spatial data.\n\nDownsamples the input along its spatial dimensions (height and width)\nby taking the average value over an input window\n(of size defined by `pool_size`) for each channel of the input.\nThe window is shifted by `strides` along each dimension.\n\nThe resulting output when using `\"valid\"` padding option has a shape\n(number of rows or columns) of:\n`output_shape = math.floor((input_shape - pool_size) / strides) + 1`\n(when `input_shape >= pool_size`)\n\nThe resulting output shape when using the `\"same\"` padding option is:\n`output_shape = math.floor((input_shape - 1) / strides) + 1`\n\nFor example, for `strides=(1, 1)` and `padding=\"valid\"`:\n\n```\n>>> x = tf.constant([[1., 2., 3.],\n...                  [4., 5., 6.],\n...                  [7., 8., 9.]])\n>>> x = tf.reshape(x, [1, 3, 3, 1])\n>>> avg_pool_2d = tf.keras.layers.AveragePooling2D(pool_size=(2, 2),\n...    strides=(1, 1), padding='valid')\n>>> avg_pool_2d(x)\n<tf.Tensor: shape=(1, 2, 2, 1), dtype=float32, numpy=\n  array([[[[3.],\n           [4.]],\n          [[6.],\n           [7.]]]], dtype=float32)>\n```\n\nFor example, for `stride=(2, 2)` and `padding=\"valid\"`:\n\n```\n>>> x = tf.constant([[1., 2., 3., 4.],\n...                  [5., 6., 7., 8.],\n...                  [9., 10., 11., 12.]])\n>>> x = tf.reshape(x, [1, 3, 4, 1])\n>>> avg_pool_2d = tf.keras.layers.AveragePooling2D(pool_size=(2, 2),\n...    strides=(2, 2), padding='valid')\n>>> avg_pool_2d(x)\n<tf.Tensor: shape=(1, 1, 2, 1), dtype=float32, numpy=\n  array([[[[3.5],\n           [5.5]]]], dtype=float32)>\n```\n\nFor example, for `strides=(1, 1)` and `padding=\"same\"`:\n\n```\n>>> x = tf.constant([[1., 2., 3.],\n...                  [4., 5., 6.],\n...                  [7., 8., 9.]])\n>>> x = tf.reshape(x, [1, 3, 3, 1])\n>>> avg_pool_2d = tf.keras.layers.AveragePooling2D(pool_size=(2, 2),\n...    strides=(1, 1), padding='same')\n>>> avg_pool_2d(x)\n<tf.Tensor: shape=(1, 3, 3, 1), dtype=float32, numpy=\n  array([[[[3.],\n           [4.],\n           [4.5]],\n          [[6.],\n           [7.],\n           [7.5]],\n          [[7.5],\n           [8.5],\n           [9.]]]], dtype=float32)>\n```",
+    "description": "Average pooling operation for 2D spatial data.\n\nDownsamples the input along its spatial dimensions (height and width)\nby taking the average value over an input window\n(of size defined by `pool_size`) for each channel of the input.\nThe window is shifted by `strides` along each dimension.\n\nThe resulting output when using the `\"valid\"` padding option has a spatial\nshape (number of rows or columns) of:\n`output_shape = math.floor((input_shape - pool_size) / strides) + 1`\n(when `input_shape >= pool_size`)\n\nThe resulting output shape when using the `\"same\"` padding option is:\n`output_shape = math.floor((input_shape - 1) / strides) + 1`",
     "attributes": [
       {
         "default": "channels_last",
-        "description": "A string,\n    one of `channels_last` (default) or `channels_first`.\n    The ordering of the dimensions in the inputs.\n    `channels_last` corresponds to inputs with shape\n    `(batch, height, width, channels)` while `channels_first`\n    corresponds to inputs with shape\n    `(batch, channels, height, width)`.\n    When unspecified, uses\n    `image_data_format` value found in your Keras config file at\n     `~/.keras/keras.json` (if exists) else 'channels_last'.\n    Defaults to 'channels_last'.",
+        "description": "string, either `\"channels_last\"` or `\"channels_first\"`.\n        The ordering of the dimensions in the inputs. `\"channels_last\"`\n        corresponds to inputs with shape `(batch, height, width, channels)`\n        while `\"channels_first\"` corresponds to inputs with shape\n        `(batch, channels, height, width)`. It defaults to the\n        `image_data_format` value found in your Keras config file at\n        `~/.keras/keras.json`. If you never set it, then it will be\n        `\"channels_last\"`.",
         "name": "data_format"
       },
       {
-        "description": "integer or tuple of 2 integers,\n    factors by which to downscale (vertical, horizontal).\n    `(2, 2)` will halve the input in both spatial dimension.\n    If only one integer is specified, the same window length\n    will be used for both dimensions.",
+        "description": "int or tuple of 2 integers, factors by which to downscale\n        (dim1, dim2). If only one integer is specified, the same\n        window length will be used for all dimensions.",
         "name": "pool_size"
       },
       {
-        "description": "Integer, tuple of 2 integers, or None.\n    Strides values.\n    If None, it will default to `pool_size`.",
+        "description": "int or tuple of 2 integers, or None. Strides values. If None,\n        it will default to `pool_size`. If only one int is specified, the\n        same stride size will be used for all dimensions.",
         "name": "strides"
       },
       {
-        "description": "One of `\"valid\"` or `\"same\"` (case-insensitive).\n    `\"valid\"` means no padding. `\"same\"` results in padding evenly to\n    the left/right or up/down of the input such that output has the same\n    height/width dimension as the input.",
+        "description": "string, either `\"valid\"` or `\"same\"` (case-insensitive).\n        `\"valid\"` means no padding. `\"same\"` results in padding evenly to\n        the left/right or up/down of the input such that output has the same\n        height/width dimension as the input.",
         "name": "padding"
       }
     ],
     "inputs": [
       {
-        "description": "- If `data_format='channels_last'`:\n  4D tensor with shape `(batch_size, rows, cols, channels)`.\n- If `data_format='channels_first'`:\n  4D tensor with shape `(batch_size, channels, rows, cols)`.",
+        "description": "- If `data_format=\"channels_last\"`:\n    4D tensor with shape `(batch_size, height, width, channels)`.\n- If `data_format=\"channels_first\"`:\n    4D tensor with shape `(batch_size, channels, height, width)`.",
         "name": "input"
       }
     ],
     "outputs": [
       {
-        "description": "- If `data_format='channels_last'`:\n  4D tensor with shape `(batch_size, pooled_rows, pooled_cols, channels)`.\n- If `data_format='channels_first'`:\n  4D tensor with shape `(batch_size, channels, pooled_rows, pooled_cols)`.",
+        "description": "- If `data_format=\"channels_last\"`:\n    4D tensor with shape\n    `(batch_size, pooled_height, pooled_width, channels)`.\n- If `data_format=\"channels_first\"`:\n    4D tensor with shape\n    `(batch_size, channels, pooled_height, pooled_width)`.",
         "name": "output"
       }
+    ],
+    "examples": [
+      {
+        "summary": "`strides=(1, 1)` and `padding=\"valid\"`:",
+        "code": ">>> x = np.array([[1., 2., 3.],\n...               [4., 5., 6.],\n...               [7., 8., 9.]])\n>>> x = np.reshape(x, [1, 3, 3, 1])\n>>> avg_pool_2d = keras.layers.AveragePooling2D(pool_size=(2, 2),\n...    strides=(1, 1), padding=\"valid\")\n>>> avg_pool_2d(x)"
+      },
+      {
+        "summary": "`strides=(2, 2)` and `padding=\"valid\"`:",
+        "code": ">>> x = np.array([[1., 2., 3., 4.],\n...              [5., 6., 7., 8.],\n...              [9., 10., 11., 12.]])\n>>> x = np.reshape(x, [1, 3, 4, 1])\n>>> avg_pool_2d = keras.layers.AveragePooling2D(pool_size=(2, 2),\n...    strides=(2, 2), padding=\"valid\")\n>>> avg_pool_2d(x)"
+      },
+      {
+        "summary": "`stride=(1, 1)` and `padding=\"same\"`:",
+        "code": ">>> x = np.array([[1., 2., 3.],\n...                  [4., 5., 6.],\n...                  [7., 8., 9.]])\n>>> x = np.reshape(x, [1, 3, 3, 1])\n>>> avg_pool_2d = keras.layers.AveragePooling2D(pool_size=(2, 2),\n...    strides=(1, 1), padding=\"same\")\n>>> avg_pool_2d(x)"
+      }
     ]
   },
   {
     "name": "AveragePooling3D",
     "module": "tensorflow.keras.layers",
-    "description": "Average pooling operation for 3D data (spatial or spatio-temporal).\n\nDownsamples the input along its spatial dimensions (depth, height, and\nwidth) by taking the average value over an input window\n(of size defined by `pool_size`) for each channel of the input.\nThe window is shifted by `strides` along each dimension.",
+    "description": "Average pooling operation for 3D data (spatial or spatio-temporal).\n\nDownsamples the input along its spatial dimensions (depth, height, and\nwidth) by taking the average value over an input window (of size defined by\n`pool_size`) for each channel of the input. The window is shifted by\n`strides` along each dimension.",
     "attributes": [
       {
-        "description": "tuple of 3 integers,\n    factors by which to downscale (dim1, dim2, dim3).\n    `(2, 2, 2)` will halve the size of the 3D input in each dimension.",
+        "description": "int or tuple of 3 integers, factors by which to downscale\n        (dim1, dim2, dim3). If only one integer is specified, the same\n        window length will be used for all dimensions.",
         "name": "pool_size"
       },
       {
-        "description": "tuple of 3 integers, or None. Strides values.",
+        "description": "int or tuple of 3 integers, or None. Strides values. If None,\n        it will default to `pool_size`. If only one int is specified, the\n        same stride size will be used for all dimensions.",
         "name": "strides"
       },
       {
-        "description": "One of `\"valid\"` or `\"same\"` (case-insensitive).\n    `\"valid\"` means no padding. `\"same\"` results in padding evenly to\n    the left/right or up/down of the input such that output has the same\n    height/width dimension as the input.",
+        "description": "string, either `\"valid\"` or `\"same\"` (case-insensitive).\n        `\"valid\"` means no padding. `\"same\"` results in padding evenly to\n        the left/right or up/down of the input such that output has the same\n        height/width dimension as the input.",
         "name": "padding"
       },
       {
         "default": "channels_last",
-        "description": "A string,\n    one of `channels_last` (default) or `channels_first`.\n    The ordering of the dimensions in the inputs.\n    `channels_last` corresponds to inputs with shape\n    `(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)`\n    while `channels_first` corresponds to inputs with shape\n    `(batch, channels, spatial_dim1, spatial_dim2, spatial_dim3)`.\n    When unspecified, uses\n    `image_data_format` value found in your Keras config file at\n     `~/.keras/keras.json` (if exists) else 'channels_last'.\n    Defaults to 'channels_last'.",
+        "description": "string, either `\"channels_last\"` or `\"channels_first\"`.\n        The ordering of the dimensions in the inputs. `\"channels_last\"`\n        corresponds to inputs with shape\n        `(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)` while\n        `\"channels_first\"` corresponds to inputs with shape\n        `(batch, channels, spatial_dim1, spatial_dim2, spatial_dim3)`.\n        It defaults to the `image_data_format` value found in your Keras\n        config file at `~/.keras/keras.json`. If you never set it, then it\n        will be `\"channels_last\"`.",
         "name": "data_format"
       }
     ],
     "inputs": [
       {
-        "description": "- If `data_format='channels_last'`:\n  5D tensor with shape:\n  `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)`\n- If `data_format='channels_first'`:\n  5D tensor with shape:\n  `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`",
+        "description": "- If `data_format=\"channels_last\"`:\n    5D tensor with shape:\n    `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)`\n- If `data_format=\"channels_first\"`:\n    5D tensor with shape:\n    `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`",
         "name": "input"
       }
     ],
     "outputs": [
       {
-        "description": "- If `data_format='channels_last'`:\n  5D tensor with shape:\n  `(batch_size, pooled_dim1, pooled_dim2, pooled_dim3, channels)`\n- If `data_format='channels_first'`:\n  5D tensor with shape:\n  `(batch_size, channels, pooled_dim1, pooled_dim2, pooled_dim3)`",
+        "description": "- If `data_format=\"channels_last\"`:\n    5D tensor with shape:\n    `(batch_size, pooled_dim1, pooled_dim2, pooled_dim3, channels)`\n- If `data_format=\"channels_first\"`:\n    5D tensor with shape:\n    `(batch_size, channels, pooled_dim1, pooled_dim2, pooled_dim3)`",
         "name": "output"
       }
     ],
     "examples": [
       {
-        "code": "depth = 30\nheight = 30\nwidth = 30\ninput_channels = 3\n\ninputs = tf.keras.Input(shape=(depth, height, width, input_channels))\nlayer = tf.keras.layers.AveragePooling3D(pool_size=3)\noutputs = layer(inputs)  # Shape: (batch_size, 10, 10, 10, 3)"
+        "code": "depth = 30\nheight = 30\nwidth = 30\nchannels = 3\n\ninputs = keras.layers.Input(shape=(depth, height, width, channels))\nlayer = keras.layers.AveragePooling3D(pool_size=3)\noutputs = layer(inputs)  # Shape: (batch_size, 10, 10, 10, 3)"
       }
     ]
   },
@@ -360,11 +392,11 @@
     "name": "BatchNormalization",
     "module": "tensorflow.keras.layers",
     "category": "Normalization",
-    "description": "Layer that normalizes its inputs.\n\nBatch normalization applies a transformation that maintains the mean output\nclose to 0 and the output standard deviation close to 1.\n\nImportantly, batch normalization works differently during training and\nduring inference.\n\n**During training** (i.e. when using `fit()` or when calling the layer/model\nwith the argument `training=True`), the layer normalizes its output using\nthe mean and standard deviation of the current batch of inputs. That is to\nsay, for each channel being normalized, the layer returns\n`gamma * (batch - mean(batch)) / sqrt(var(batch) + epsilon) + beta`, where:\n\n- `epsilon` is small constant (configurable as part of the constructor\narguments)\n- `gamma` is a learned scaling factor (initialized as 1), which\ncan be disabled by passing `scale=False` to the constructor.\n- `beta` is a learned offset factor (initialized as 0), which\ncan be disabled by passing `center=False` to the constructor.\n\n**During inference** (i.e. when using `evaluate()` or `predict()` or when\ncalling the layer/model with the argument `training=False` (which is the\ndefault), the layer normalizes its output using a moving average of the\nmean and standard deviation of the batches it has seen during training. That\nis to say, it returns\n`gamma * (batch - self.moving_mean) / sqrt(self.moving_var+epsilon) + beta`.\n\n`self.moving_mean` and `self.moving_var` are non-trainable variables that\nare updated each time the layer in called in training mode, as such:\n\n- `moving_mean = moving_mean * momentum + mean(batch) * (1 - momentum)`\n- `moving_var = moving_var * momentum + var(batch) * (1 - momentum)`\n\nAs such, the layer will only normalize its inputs during inference\n*after having been trained on data that has similar statistics as the\ninference data*.\n\nWhen `synchronized=True` is set and if this layer is used within a\n`tf.distribute` strategy, there will be an `allreduce` call\nto aggregate batch statistics across all replicas at every\ntraining step. Setting `synchronized` has no impact when the model is\ntrained without specifying any distribution strategy.\n\nExample usage:\n\n```python\nstrategy = tf.distribute.MirroredStrategy()\n\nwith strategy.scope():\n  model = tf.keras.Sequential()\n  model.add(tf.keras.layers.Dense(16))\n  model.add(tf.keras.layers.BatchNormalization(synchronized=True))\n```",
+    "description": "Layer that normalizes its inputs.\n\nBatch normalization applies a transformation that maintains the mean output\nclose to 0 and the output standard deviation close to 1.\n\nImportantly, batch normalization works differently during training and\nduring inference.\n\n**During training** (i.e. when using `fit()` or when calling the layer/model\nwith the argument `training=True`), the layer normalizes its output using\nthe mean and standard deviation of the current batch of inputs. That is to\nsay, for each channel being normalized, the layer returns\n`gamma * (batch - mean(batch)) / sqrt(var(batch) + epsilon) + beta`, where:\n\n- `epsilon` is small constant (configurable as part of the constructor\narguments)\n- `gamma` is a learned scaling factor (initialized as 1), which\ncan be disabled by passing `scale=False` to the constructor.\n- `beta` is a learned offset factor (initialized as 0), which\ncan be disabled by passing `center=False` to the constructor.\n\n**During inference** (i.e. when using `evaluate()` or `predict()` or when\ncalling the layer/model with the argument `training=False` (which is the\ndefault), the layer normalizes its output using a moving average of the\nmean and standard deviation of the batches it has seen during training. That\nis to say, it returns\n`gamma * (batch - self.moving_mean) / sqrt(self.moving_var+epsilon) + beta`.\n\n`self.moving_mean` and `self.moving_var` are non-trainable variables that\nare updated each time the layer in called in training mode, as such:\n\n- `moving_mean = moving_mean * momentum + mean(batch) * (1 - momentum)`\n- `moving_var = moving_var * momentum + var(batch) * (1 - momentum)`\n\nAs such, the layer will only normalize its inputs during inference\n*after having been trained on data that has similar statistics as the\ninference data*.",
     "attributes": [
       {
         "default": -1,
-        "description": "Integer, the axis that should be normalized (typically the features\n    axis). For instance, after a `Conv2D` layer with\n    `data_format=\"channels_first\"`, set `axis=1` in `BatchNormalization`.",
+        "description": "Integer, the axis that should be normalized\n        (typically the features axis). For instance, after a `Conv2D` layer\n        with `data_format=\"channels_first\"`, use `axis=1`.",
         "name": "axis"
       },
       {
@@ -379,13 +411,13 @@
       },
       {
         "default": true,
-        "description": "If True, multiply by `gamma`. If False, `gamma` is not used. When\n    the next layer is linear (also e.g. `nn.relu`), this can be disabled\n    since the scaling will be done by the next layer.",
+        "description": "If `True`, multiply by `gamma`. If `False`, `gamma` is not used.\n        When the next layer is linear this can be disabled\n        since the scaling will be done by the next layer.",
         "name": "scale",
         "type": "boolean"
       },
       {
         "default": true,
-        "description": "If True, add offset of `beta` to normalized tensor. If False,\n    `beta` is ignored.",
+        "description": "If `True`, add offset of `beta` to normalized tensor.\n        If `False`, `beta` is ignored.",
         "name": "center",
         "type": "boolean"
       },
@@ -473,7 +505,11 @@
       },
       {
         "name": "synchronized",
-        "description": "If True, synchronizes the global batch statistics (mean and\n    variance) for the layer across all devices at each training step in a\n    distributed training strategy. If False, each replica uses its own\n    local batch statistics. Only relevant when used inside a\n    `tf.distribute` strategy."
+        "description": "Only applicable with the TensorFlow backend.\n        If `True`, synchronizes the global batch statistics (mean and\n        variance) for the layer across all devices at each training step\n        in a distributed training strategy.\n        If `False`, each replica uses its own local batch statistics."
+      },
+      {
+        "name": "**kwargs",
+        "description": "Base layer keyword arguments (e.g. `name` and `dtype`)."
       }
     ],
     "inputs": [
@@ -514,11 +550,11 @@
     "attributes": [
       {
         "default": "concat",
-        "description": "Mode by which outputs of the forward and backward RNNs will be\n    combined. One of {'sum', 'mul', 'concat', 'ave', None}. If None, the\n    outputs will not be combined, they will be returned as a list. Default\n    value is 'concat'.",
+        "description": "Mode by which outputs of the forward and backward RNNs\n        will be combined. One of `{\"sum\", \"mul\", \"concat\", \"ave\", None}`.\n        If `None`, the outputs will not be combined,\n        they will be returned as a list. Defaults to `\"concat\"`.",
         "name": "merge_mode"
       },
       {
-        "description": "`keras.layers.RNN` instance, such as `keras.layers.LSTM` or\n    `keras.layers.GRU`. It could also be a `keras.layers.Layer` instance\n    that meets the following criteria:\n    1. Be a sequence-processing layer (accepts 3D+ inputs).\n    2. Have a `go_backwards`, `return_sequences` and `return_state`\n      attribute (with the same semantics as for the `RNN` class).\n    3. Have an `input_spec` attribute.\n    4. Implement serialization via `get_config()` and `from_config()`.\n    Note that the recommended way to create new RNN layers is to write a\n    custom RNN cell and use it with `keras.layers.RNN`, instead of\n    subclassing `keras.layers.Layer` directly.\n    - When the `returns_sequences` is true, the output of the masked\n    timestep will be zero regardless of the layer's original\n    `zero_output_for_mask` value.",
+        "description": "`keras.layers.RNN` instance, such as\n        `keras.layers.LSTM` or `keras.layers.GRU`.\n        It could also be a `keras.layers.Layer` instance\n        that meets the following criteria:\n        1. Be a sequence-processing layer (accepts 3D+ inputs).\n        2. Have a `go_backwards`, `return_sequences` and `return_state`\n        attribute (with the same semantics as for the `RNN` class).\n        3. Have an `input_spec` attribute.\n        4. Implement serialization via `get_config()` and `from_config()`.\n        Note that the recommended way to create new RNN layers is to write a\n        custom RNN cell and use it with `keras.layers.RNN`, instead of\n        subclassing `keras.layers.Layer` directly.\n        When `return_sequences` is `True`, the output of the masked\n        timestep will be zero regardless of the layer's original\n        `zero_output_for_mask` value.",
         "name": "layer"
       },
       {
@@ -526,7 +562,7 @@
         "name": "weights"
       },
       {
-        "description": "Optional `keras.layers.RNN`, or `keras.layers.Layer`\n    instance to be used to handle backwards input processing.\n    If `backward_layer` is not provided, the layer instance passed as the\n    `layer` argument will be used to generate the backward layer\n    automatically.\n    Note that the provided `backward_layer` layer should have properties\n    matching those of the `layer` argument, in particular it should have the\n    same values for `stateful`, `return_states`, `return_sequences`, etc.\n    In addition, `backward_layer` and `layer` should have different\n    `go_backwards` argument values.\n    A `ValueError` will be raised if these requirements are not met.",
+        "description": "Optional `keras.layers.RNN`,\n        or `keras.layers.Layer` instance to be used to handle\n        backwards input processing.\n        If `backward_layer` is not provided, the layer instance passed\n        as the `layer` argument will be used to generate the backward layer\n        automatically.\n        Note that the provided `backward_layer` layer should have properties\n        matching those of the `layer` argument, in particular\n        it should have the same values for `stateful`, `return_states`,\n        `return_sequences`, etc. In addition, `backward_layer`\n        and `layer` should have different `go_backwards` argument values.\n        A `ValueError` will be raised if these requirements are not met.",
         "name": "backward_layer"
       }
     ],
@@ -542,7 +578,7 @@
     ],
     "examples": [
       {
-        "code": "model = Sequential()\nmodel.add(Bidirectional(LSTM(10, return_sequences=True),\n                             input_shape=(5, 10)))\nmodel.add(Bidirectional(LSTM(10)))\nmodel.add(Dense(5))\nmodel.add(Activation('softmax'))\nmodel.compile(loss='categorical_crossentropy', optimizer='rmsprop')\n\n# With custom backward layer\nmodel = Sequential()\nforward_layer = LSTM(10, return_sequences=True)\nbackward_layer = LSTM(10, activation='relu', return_sequences=True,\n                      go_backwards=True)\nmodel.add(Bidirectional(forward_layer, backward_layer=backward_layer,\n                        input_shape=(5, 10)))\nmodel.add(Dense(5))\nmodel.add(Activation('softmax'))\nmodel.compile(loss='categorical_crossentropy', optimizer='rmsprop')"
+        "code": "model = Sequential([\n    Input(shape=(5, 10)),\n    Bidirectional(LSTM(10, return_sequences=True),\n    Bidirectional(LSTM(10)),\n    Dense(5, activation=\"softmax\"),\n])\nmodel.compile(loss='categorical_crossentropy', optimizer='rmsprop')\n\n# With custom backward layer\nforward_layer = LSTM(10, return_sequences=True)\nbackward_layer = LSTM(10, activation='relu', return_sequences=True,\n                      go_backwards=True)\nmodel = Sequential([\n    Input(shape=(5, 10)),\n    Bidirectional(forward_layer, backward_layer=backward_layer),\n    Dense(5, activation=\"softmax\"),\n])\nmodel.compile(loss='categorical_crossentropy', optimizer='rmsprop')"
       }
     ]
   },
@@ -550,14 +586,14 @@
     "name": "Concatenate",
     "module": "tensorflow.keras.layers",
     "category": "Tensor",
-    "description": "Layer that concatenates a list of inputs.\n\nIt takes as input a list of tensors, all of the same shape except\nfor the concatenation axis, and returns a single tensor that is the\nconcatenation of all inputs.\n\n```\n>>> x = np.arange(20).reshape(2, 2, 5)\n>>> print(x)\n[[[ 0  1  2  3  4]\n  [ 5  6  7  8  9]]\n [[10 11 12 13 14]\n  [15 16 17 18 19]]]\n>>> y = np.arange(20, 30).reshape(2, 1, 5)\n>>> print(y)\n[[[20 21 22 23 24]]\n [[25 26 27 28 29]]]\n>>> tf.keras.layers.Concatenate(axis=1)([x, y])\n<tf.Tensor: shape=(2, 3, 5), dtype=int64, numpy=\narray([[[ 0,  1,  2,  3,  4],\n        [ 5,  6,  7,  8,  9],\n        [20, 21, 22, 23, 24]],\n       [[10, 11, 12, 13, 14],\n        [15, 16, 17, 18, 19],\n        [25, 26, 27, 28, 29]]])>\n```\n\n```\n>>> x1 = tf.keras.layers.Dense(8)(np.arange(10).reshape(5, 2))\n>>> x2 = tf.keras.layers.Dense(8)(np.arange(10, 20).reshape(5, 2))\n>>> concatted = tf.keras.layers.Concatenate()([x1, x2])\n>>> concatted.shape\nTensorShape([5, 16])\n```",
+    "description": "Concatenates a list of inputs.\n\nIt takes as input a list of tensors, all of the same shape except\nfor the concatenation axis, and returns a single tensor that is the\nconcatenation of all inputs.",
     "attributes": [
       {
         "description": "Axis along which to concatenate.",
         "name": "axis"
       },
       {
-        "description": "standard layer keyword arguments.\n",
+        "description": "Standard layer keyword arguments.",
         "name": "**kwargs"
       }
     ],
@@ -571,47 +607,56 @@
       {
         "name": "output"
       }
+    ],
+    "examples": [
+      {
+        "code": ">>> x = np.arange(20).reshape(2, 2, 5)\n>>> y = np.arange(20, 30).reshape(2, 1, 5)\n>>> keras.layers.Concatenate(axis=1)([x, y])"
+      },
+      {
+        "summary": "Usage in a Keras model:",
+        "code": ">>> x1 = keras.layers.Dense(8)(np.arange(10).reshape(5, 2))\n>>> x2 = keras.layers.Dense(8)(np.arange(10, 20).reshape(5, 2))\n>>> y = keras.layers.Concatenate()([x1, x2])"
+      }
     ]
   },
   {
     "name": "Conv1D",
     "module": "tensorflow.keras.layers",
     "category": "Layer",
-    "description": "1D convolution layer (e.g. temporal convolution).\n\nThis layer creates a convolution kernel that is convolved\nwith the layer input over a single spatial (or temporal) dimension\nto produce a tensor of outputs.\nIf `use_bias` is True, a bias vector is created and added to the outputs.\nFinally, if `activation` is not `None`,\nit is applied to the outputs as well.\n\nWhen using this layer as the first layer in a model,\nprovide an `input_shape` argument\n(tuple of integers or `None`, e.g.\n`(10, 128)` for sequences of 10 vectors of 128-dimensional vectors,\nor `(None, 128)` for variable-length sequences of 128-dimensional vectors.",
+    "description": "1D convolution layer (e.g. temporal convolution).\n\nThis layer creates a convolution kernel that is convolved with the layer\ninput over a single spatial (or temporal) dimension to produce a tensor of\noutputs. If `use_bias` is True, a bias vector is created and added to the\noutputs. Finally, if `activation` is not `None`, it is applied to the\noutputs as well.",
     "attributes": [
       {
         "default": "linear",
-        "description": "Activation function to use.\n    If you don't specify anything, no activation is applied\n    (see `keras.activations`).",
+        "description": "Activation function. If `None`, no activation is applied.",
         "name": "activation"
       },
       {
         "default": "valid",
-        "description": "One of `\"valid\"`, `\"same\"` or `\"causal\"` (case-insensitive).\n    `\"valid\"` means no padding. `\"same\"` results in padding with zeros\n    evenly to the left/right or up/down of the input such that output has\n    the same height/width dimension as the input.\n    `\"causal\"` results in causal (dilated) convolutions, e.g. `output[t]`\n    does not depend on `input[t+1:]`. Useful when modeling temporal data\n    where the model should not violate the temporal order.\n    See [WaveNet: A Generative Model for Raw Audio, section\n      2.1](https://arxiv.org/abs/1609.03499).",
+        "description": "string, `\"valid\"`, `\"same\"` or `\"causal\"`(case-insensitive).\n        `\"valid\"` means no padding. `\"same\"` results in padding evenly to\n        the left/right or up/down of the input such that output has the same\n        height/width dimension as the input. `\"causal\"` results in causal\n        (dilated) convolutions, e.g. `output[t]` does not depend on\n        `input[t+1:]`. Useful when modeling temporal data where the model\n        should not violate the temporal order.\n        See [WaveNet: A Generative Model for Raw Audio, section2.1](\n        https://arxiv.org/abs/1609.03499).",
         "name": "padding"
       },
       {
         "default": true,
-        "description": "Boolean, whether the layer uses a bias vector.",
+        "description": "bool, if `True`, bias will be added to the output.",
         "name": "use_bias",
         "visible": false
       },
       {
         "default": "channels_last",
-        "description": "A string, one of `channels_last` (default) or\n    `channels_first`. The ordering of the dimensions in the inputs.\n    `channels_last` corresponds to inputs with shape `(batch_size, width,\n    channels)` while `channels_first` corresponds to inputs with shape\n    `(batch_size, channels, width)`. Note that the `channels_first` format\n    is currently not supported by TensorFlow on CPU.",
+        "description": "string, either `\"channels_last\"` or `\"channels_first\"`.\n        The ordering of the dimensions in the inputs. `\"channels_last\"`\n        corresponds to inputs with shape `(batch, steps, features)`\n        while `\"channels_first\"` corresponds to inputs with shape\n        `(batch, features, steps)`. It defaults to the `image_data_format`\n        value found in your Keras config file at `~/.keras/keras.json`.\n        If you never set it, then it will be `\"channels_last\"`.",
         "name": "data_format"
       },
       {
         "default": [
           1
         ],
-        "description": "An integer or tuple/list of a single integer,\n    specifying the stride length of the convolution.\n    Specifying any stride value != 1 is incompatible with specifying\n    any `dilation_rate` value != 1.",
+        "description": "int or tuple/list of 1 integer, specifying the stride length\n        of the convolution. `strides > 1` is incompatible with\n        `dilation_rate > 1`.",
         "name": "strides"
       },
       {
         "default": [
           1
         ],
-        "description": "an integer or tuple/list of a single integer, specifying\n    the dilation rate to use for dilated convolution.\n    Currently, specifying any `dilation_rate` value != 1 is\n    incompatible with specifying any `strides` value != 1.",
+        "description": "int or tuple/list of 1 integers, specifying the dilation\n        rate to use for dilated convolution.",
         "name": "dilation_rate"
       },
       {
@@ -619,7 +664,7 @@
           "class_name": "Zeros",
           "config": {}
         },
-        "description": "Initializer for the bias vector\n    (see `keras.initializers`). Defaults to 'zeros'.",
+        "description": "Initializer for the bias vector. If `None`, the\n        default initializer (`\"zeros\"`) will be used.",
         "name": "bias_initializer",
         "visible": false
       },
@@ -633,49 +678,49 @@
             "seed": null
           }
         },
-        "description": "Initializer for the `kernel` weights matrix\n    (see `keras.initializers`). Defaults to 'glorot_uniform'.",
+        "description": "Initializer for the convolution kernel. If `None`,\n        the default initializer (`\"glorot_uniform\"`) will be used.",
         "name": "kernel_initializer",
         "visible": false
       },
       {
-        "description": "Integer, the dimensionality of the output space\n    (i.e. the number of output filters in the convolution).",
+        "description": "int, the dimension of the output space (the number of filters\n        in the convolution).",
         "name": "filters"
       },
       {
-        "description": "An integer or tuple/list of a single integer,\n    specifying the length of the 1D convolution window.",
+        "description": "int or tuple/list of 1 integer, specifying the size of the\n        convolution window.",
         "name": "kernel_size"
       },
       {
-        "description": "Regularizer function applied to\n    the `kernel` weights matrix (see `keras.regularizers`).",
+        "description": "Optional regularizer for the convolution kernel.",
         "name": "kernel_regularizer",
         "visible": false
       },
       {
-        "description": "Regularizer function applied to the bias vector\n    (see `keras.regularizers`).",
+        "description": "Optional regularizer for the bias vector.",
         "name": "bias_regularizer",
         "visible": false
       },
       {
-        "description": "Regularizer function applied to\n    the output of the layer (its \"activation\")\n    (see `keras.regularizers`).",
+        "description": "Optional regularizer function for the output.",
         "name": "activity_regularizer",
         "visible": false
       },
       {
-        "description": "Constraint function applied to the kernel matrix\n    (see `keras.constraints`).",
+        "description": "Optional projection function to be applied to the\n        kernel after being updated by an `Optimizer` (e.g. used to implement\n        norm constraints or value constraints for layer weights). The\n        function must take as input the unprojected variable and must return\n        the projected variable (which must have the same shape). Constraints\n        are not safe to use when doing asynchronous distributed training.",
         "name": "kernel_constraint"
       },
       {
-        "description": "Constraint function applied to the bias vector\n    (see `keras.constraints`).",
+        "description": "Optional projection function to be applied to the\n        bias after being updated by an `Optimizer`.",
         "name": "bias_constraint"
       },
       {
-        "description": "A positive integer specifying the number of groups in which the\n    input is split along the channel axis. Each group is convolved\n    separately with `filters / groups` filters. The output is the\n    concatenation of all the `groups` results along the channel axis.\n    Input channels and `filters` must both be divisible by `groups`.",
+        "description": "A positive int specifying the number of groups in which the\n        input is split along the channel axis. Each group is convolved\n        separately with `filters // groups` filters. The output is the\n        concatenation of all the `groups` results along the channel axis.\n        Input channels and `filters` must both be divisible by `groups`.",
         "name": "groups"
       }
     ],
     "inputs": [
       {
-        "description": "3+D tensor with shape: `batch_shape + (steps, input_dim)`",
+        "description": "- If `data_format=\"channels_last\"`:\n    A 3D tensor with shape: `(batch_shape, steps, channels)`\n- If `data_format=\"channels_first\"`:\n    A 3D tensor with shape: `(batch_shape, channels, steps)`",
         "name": "input"
       },
       {
@@ -687,16 +732,13 @@
     ],
     "outputs": [
       {
-        "description": "3+D tensor with shape: `batch_shape + (new_steps, filters)`\n  `steps` value might have changed due to padding or strides.",
+        "description": "- If `data_format=\"channels_last\"`:\n    A 3D tensor with shape: `(batch_shape, new_steps, filters)`\n- If `data_format=\"channels_first\"`:\n    A 3D tensor with shape: `(batch_shape, filters, new_steps)`",
         "name": "output"
       }
     ],
     "examples": [
       {
-        "code": ">>> # The inputs are 128-length vectors with 10 timesteps, and the\n>>> # batch size is 4.\n>>> input_shape = (4, 10, 128)\n>>> x = tf.random.normal(input_shape)\n>>> y = tf.keras.layers.Conv1D(\n... 32, 3, activation='relu',input_shape=input_shape[1:])(x)\n>>> print(y.shape)\n(4, 8, 32)"
-      },
-      {
-        "code": ">>> # With extended batch shape [4, 7] (e.g. weather data where batch\n>>> # dimensions correspond to spatial location and the third dimension\n>>> # corresponds to time.)\n>>> input_shape = (4, 7, 10, 128)\n>>> x = tf.random.normal(input_shape)\n>>> y = tf.keras.layers.Conv1D(\n... 32, 3, activation='relu', input_shape=input_shape[2:])(x)\n>>> print(y.shape)\n(4, 7, 8, 32)"
+        "code": ">>> # The inputs are 128-length vectors with 10 timesteps, and the\n>>> # batch size is 4.\n>>> x = np.random.rand(4, 10, 128)\n>>> y = keras.layers.Conv1D(32, 3, activation='relu')(x)\n>>> print(y.shape)\n(4, 8, 32)"
       }
     ]
   },
@@ -704,28 +746,28 @@
     "name": "Conv2D",
     "module": "tensorflow.keras.layers",
     "category": "Layer",
-    "description": "2D convolution layer (e.g. spatial convolution over images).\n\nThis layer creates a convolution kernel that is convolved\nwith the layer input to produce a tensor of\noutputs. If `use_bias` is True,\na bias vector is created and added to the outputs. Finally, if\n`activation` is not `None`, it is applied to the outputs as well.\n\nWhen using this layer as the first layer in a model,\nprovide the keyword argument `input_shape`\n(tuple of integers or `None`, does not include the sample axis),\ne.g. `input_shape=(128, 128, 3)` for 128x128 RGB pictures\nin `data_format=\"channels_last\"`. You can use `None` when\na dimension has variable size.",
+    "description": "2D convolution layer.\n\nThis layer creates a convolution kernel that is convolved with the layer\ninput over a single spatial (or temporal) dimension to produce a tensor of\noutputs. If `use_bias` is True, a bias vector is created and added to the\noutputs. Finally, if `activation` is not `None`, it is applied to the\noutputs as well.",
     "attributes": [
       {
         "default": "linear",
-        "description": "Activation function to use. If you don't specify anything, no\n    activation is applied (see `keras.activations`).",
+        "description": "Activation function. If `None`, no activation is applied.",
         "name": "activation"
       },
       {
         "default": "valid",
-        "description": "one of `\"valid\"` or `\"same\"` (case-insensitive).\n    `\"valid\"` means no padding. `\"same\"` results in padding with zeros\n    evenly to the left/right or up/down of the input. When `padding=\"same\"`\n    and `strides=1`, the output has the same size as the input.",
+        "description": "string, either `\"valid\"` or `\"same\"` (case-insensitive).\n        `\"valid\"` means no padding. `\"same\"` results in padding evenly to\n        the left/right or up/down of the input such that output has the same\n        height/width dimension as the input.",
         "name": "padding"
       },
       {
         "default": true,
-        "description": "Boolean, whether the layer uses a bias vector.",
+        "description": "bool, if `True`, bias will be added to the output.",
         "name": "use_bias",
         "type": "boolean",
         "visible": false
       },
       {
         "default": "channels_last",
-        "description": "A string, one of `channels_last` (default) or\n    `channels_first`.  The ordering of the dimensions in the inputs.\n    `channels_last` corresponds to inputs with shape `(batch_size, height,\n    width, channels)` while `channels_first` corresponds to inputs with\n    shape `(batch_size, channels, height, width)`. If left unspecified, it\n    uses the `image_data_format` value found in your Keras config file at\n    `~/.keras/keras.json` (if exists) else 'channels_last'.\n    Note that the `channels_first` format is currently not\n    supported by TensorFlow on CPU. Defaults to 'channels_last'.",
+        "description": "string, either `\"channels_last\"` or `\"channels_first\"`.\n        The ordering of the dimensions in the inputs. `\"channels_last\"`\n        corresponds to inputs with shape\n        `(batch_size, channels, height, width)`\n        while `\"channels_first\"` corresponds to inputs with shape\n        `(batch_size, channels, height, width)`. It defaults to the\n        `image_data_format` value found in your Keras config file at\n        `~/.keras/keras.json`. If you never set it, then it will be\n        `\"channels_last\"`.",
         "name": "data_format"
       },
       {
@@ -733,7 +775,7 @@
           1,
           1
         ],
-        "description": "An integer or tuple/list of 2 integers, specifying the strides of\n    the convolution along the height and width. Can be a single integer to\n    specify the same value for all spatial dimensions. Specifying any stride\n    value != 1 is incompatible with specifying any `dilation_rate` value !=\n    1.",
+        "description": "int or tuple/list of 2 integer, specifying the stride length\n        of the convolution. `strides > 1` is incompatible with\n        `dilation_rate > 1`.",
         "name": "strides"
       },
       {
@@ -741,7 +783,7 @@
           1,
           1
         ],
-        "description": "an integer or tuple/list of 2 integers, specifying the\n    dilation rate to use for dilated convolution. Can be a single integer to\n    specify the same value for all spatial dimensions. Currently, specifying\n    any `dilation_rate` value != 1 is incompatible with specifying any\n    stride value != 1.",
+        "description": "int or tuple/list of 2 integers, specifying the dilation\n        rate to use for dilated convolution.",
         "name": "dilation_rate"
       },
       {
@@ -749,7 +791,7 @@
           "class_name": "Zeros",
           "config": {}
         },
-        "description": "Initializer for the bias vector (see\n    `keras.initializers`). Defaults to 'zeros'.",
+        "description": "Initializer for the bias vector. If `None`, the\n        default initializer (`\"zeros\"`) will be used.",
         "name": "bias_initializer",
         "visible": false
       },
@@ -763,51 +805,51 @@
             "seed": null
           }
         },
-        "description": "Initializer for the `kernel` weights matrix (see\n    `keras.initializers`). Defaults to 'glorot_uniform'.",
+        "description": "Initializer for the convolution kernel. If `None`,\n        the default initializer (`\"glorot_uniform\"`) will be used.",
         "name": "kernel_initializer",
         "visible": false
       },
       {
-        "description": "Integer, the dimensionality of the output space (i.e. the number\n    of output filters in the convolution).",
+        "description": "int, the dimension of the output space (the number of filters\n        in the convolution).",
         "name": "filters"
       },
       {
-        "description": "An integer or tuple/list of 2 integers, specifying the height\n    and width of the 2D convolution window. Can be a single integer to\n    specify the same value for all spatial dimensions.",
+        "description": "int or tuple/list of 2 integer, specifying the size of the\n        convolution window.",
         "name": "kernel_size"
       },
       {
-        "description": "Regularizer function applied to the `kernel` weights\n    matrix (see `keras.regularizers`).",
+        "description": "Optional regularizer for the convolution kernel.",
         "name": "kernel_regularizer",
         "visible": false
       },
       {
-        "description": "Regularizer function applied to the bias vector (see\n    `keras.regularizers`).",
+        "description": "Optional regularizer for the bias vector.",
         "name": "bias_regularizer",
         "visible": false
       },
       {
-        "description": "Regularizer function applied to the output of the\n    layer (its \"activation\") (see `keras.regularizers`).",
+        "description": "Optional regularizer function for the output.",
         "name": "activity_regularizer",
         "visible": false
       },
       {
-        "description": "Constraint function applied to the kernel matrix (see\n    `keras.constraints`).",
+        "description": "Optional projection function to be applied to the\n        kernel after being updated by an `Optimizer` (e.g. used to implement\n        norm constraints or value constraints for layer weights). The\n        function must take as input the unprojected variable and must return\n        the projected variable (which must have the same shape). Constraints\n        are not safe to use when doing asynchronous distributed training.",
         "name": "kernel_constraint",
         "visible": false
       },
       {
-        "description": "Constraint function applied to the bias vector (see\n    `keras.constraints`).",
+        "description": "Optional projection function to be applied to the\n        bias after being updated by an `Optimizer`.",
         "name": "bias_constraint",
         "visible": false
       },
       {
-        "description": "A positive integer specifying the number of groups in which the\n    input is split along the channel axis. Each group is convolved\n    separately with `filters / groups` filters. The output is the\n    concatenation of all the `groups` results along the channel axis. Input\n    channels and `filters` must both be divisible by `groups`.",
+        "description": "A positive int specifying the number of groups in which the\n        input is split along the channel axis. Each group is convolved\n        separately with `filters // groups` filters. The output is the\n        concatenation of all the `groups` results along the channel axis.\n        Input channels and `filters` must both be divisible by `groups`.",
         "name": "groups"
       }
     ],
     "inputs": [
       {
-        "description": "4+D tensor with shape: `batch_shape + (channels, rows, cols)` if\n  `data_format='channels_first'`\nor 4+D tensor with shape: `batch_shape + (rows, cols, channels)` if\n  `data_format='channels_last'`.",
+        "description": "- If `data_format=\"channels_last\"`:\n    A 4D tensor with shape: `(batch_size, height, width, channels)`\n- If `data_format=\"channels_first\"`:\n    A 4D tensor with shape: `(batch_size, channels, height, width)`",
         "name": "input"
       },
       {
@@ -819,22 +861,13 @@
     ],
     "outputs": [
       {
-        "description": "4+D tensor with shape: `batch_shape + (filters, new_rows, new_cols)` if\n`data_format='channels_first'` or 4+D tensor with shape: `batch_shape +\n  (new_rows, new_cols, filters)` if `data_format='channels_last'`.  `rows`\n  and `cols` values might have changed due to padding.",
+        "description": "- If `data_format=\"channels_last\"`:\n    A 4D tensor with shape: `(batch_size, new_height, new_width, filters)`\n- If `data_format=\"channels_first\"`:\n    A 4D tensor with shape: `(batch_size, filters, new_height, new_width)`",
         "name": "output"
       }
     ],
     "examples": [
       {
-        "code": ">>> # The inputs are 28x28 RGB images with `channels_last` and the batch\n>>> # size is 4.\n>>> input_shape = (4, 28, 28, 3)\n>>> x = tf.random.normal(input_shape)\n>>> y = tf.keras.layers.Conv2D(\n... 2, 3, activation='relu', input_shape=input_shape[1:])(x)\n>>> print(y.shape)\n(4, 26, 26, 2)"
-      },
-      {
-        "code": ">>> # With `dilation_rate` as 2.\n>>> input_shape = (4, 28, 28, 3)\n>>> x = tf.random.normal(input_shape)\n>>> y = tf.keras.layers.Conv2D(\n...     2, 3,\n...     activation='relu',\n...     dilation_rate=2,\n...     input_shape=input_shape[1:])(x)\n>>> print(y.shape)\n(4, 24, 24, 2)"
-      },
-      {
-        "code": ">>> # With `padding` as \"same\".\n>>> input_shape = (4, 28, 28, 3)\n>>> x = tf.random.normal(input_shape)\n>>> y = tf.keras.layers.Conv2D(\n... 2, 3, activation='relu', padding=\"same\", input_shape=input_shape[1:])(x)\n>>> print(y.shape)\n(4, 28, 28, 2)"
-      },
-      {
-        "code": ">>> # With extended batch shape [4, 7]:\n>>> input_shape = (4, 7, 28, 28, 3)\n>>> x = tf.random.normal(input_shape)\n>>> y = tf.keras.layers.Conv2D(\n... 2, 3, activation='relu', input_shape=input_shape[2:])(x)\n>>> print(y.shape)\n(4, 7, 26, 26, 2)"
+        "code": ">>> x = np.random.rand(4, 10, 10, 128)\n>>> y = keras.layers.Conv2D(32, 3, activation='relu')(x)\n>>> print(y.shape)\n(4, 8, 8, 32)"
       }
     ]
   },
@@ -842,40 +875,40 @@
     "name": "Conv2DTranspose",
     "module": "tensorflow.keras.layers",
     "category": "Layer",
-    "description": "Transposed convolution layer (sometimes called Deconvolution).\n\nThe need for transposed convolutions generally arises\nfrom the desire to use a transformation going in the opposite direction\nof a normal convolution, i.e., from something that has the shape of the\noutput of some convolution to something that has the shape of its input\nwhile maintaining a connectivity pattern that is compatible with\nsaid convolution.\n\nWhen using this layer as the first layer in a model,\nprovide the keyword argument `input_shape`\n(tuple of integers or `None`, does not include the sample axis),\ne.g. `input_shape=(128, 128, 3)` for 128x128 RGB pictures\nin `data_format=\"channels_last\"`.",
+    "description": "2D transposed convolution layer.\n\nThe need for transposed convolutions generally arise from the desire to use\na transformation going in the opposite direction of a normal convolution,\ni.e., from something that has the shape of the output of some convolution\nto something that has the shape of its input while maintaining a\nconnectivity pattern that is compatible with said convolution.",
     "attributes": [
       {
-        "description": "Integer, the dimensionality of the output space\n    (i.e. the number of output filters in the convolution).",
+        "description": "int, the dimension of the output space (the number of filters\n        in the transposed convolution).",
         "name": "filters"
       },
       {
-        "description": "An integer or tuple/list of 2 integers, specifying the\n    height and width of the 2D convolution window.\n    Can be a single integer to specify the same value for\n    all spatial dimensions.",
+        "description": "int or tuple/list of 1 integer, specifying the size of the\n        transposed convolution window.",
         "name": "kernel_size"
       },
       {
-        "description": "An integer or tuple/list of 2 integers,\n    specifying the strides of the convolution along the height and width.\n    Can be a single integer to specify the same value for\n    all spatial dimensions.\n    Specifying any stride value != 1 is incompatible with specifying\n    any `dilation_rate` value != 1.",
+        "description": "int or tuple/list of 1 integer, specifying the stride length\n        of the transposed convolution. `strides > 1` is incompatible with\n        `dilation_rate > 1`.",
         "name": "strides"
       },
       {
-        "description": "one of `\"valid\"` or `\"same\"` (case-insensitive).\n    `\"valid\"` means no padding. `\"same\"` results in padding with zeros\n    evenly to the left/right or up/down of the input such that output has\n    the same height/width dimension as the input.",
+        "description": "string, either `\"valid\"` or `\"same\"` (case-insensitive).\n        `\"valid\"` means no padding. `\"same\"` results in padding evenly to\n        the left/right or up/down of the input such that output has the same\n        height/width dimension as the input.",
         "name": "padding"
       },
       {
         "default": "channels_last",
-        "description": "A string,\n    one of `channels_last` (default) or `channels_first`.\n    The ordering of the dimensions in the inputs.\n    `channels_last` corresponds to inputs with shape\n    `(batch_size, height, width, channels)` while `channels_first`\n    corresponds to inputs with shape\n    `(batch_size, channels, height, width)`.\n    When unspecified, uses `image_data_format` value found in your Keras\n    config file at `~/.keras/keras.json` (if exists) else 'channels_last'.\n    Defaults to \"channels_last\".",
+        "description": "string, either `\"channels_last\"` or `\"channels_first\"`.\n        The ordering of the dimensions in the inputs. `\"channels_last\"`\n        corresponds to inputs with shape\n        `(batch_size, channels, height, width)`\n        while `\"channels_first\"` corresponds to inputs with shape\n        `(batch_size, channels, height, width)`. It defaults to the\n        `image_data_format` value found in your Keras config file at\n        `~/.keras/keras.json`. If you never set it, then it will be\n        `\"channels_last\"`.",
         "name": "data_format"
       },
       {
-        "description": "an integer, specifying the dilation rate for all spatial\n    dimensions for dilated convolution. Specifying different dilation rates\n    for different dimensions is not supported.\n    Currently, specifying any `dilation_rate` value != 1 is\n    incompatible with specifying any stride value != 1.",
+        "description": "int or tuple/list of 1 integers, specifying the dilation\n        rate to use for dilated transposed convolution.",
         "name": "dilation_rate"
       },
       {
-        "description": "Activation function to use.\n    If you don't specify anything, no activation is applied\n    (see `keras.activations`).",
+        "description": "Activation function. If `None`, no activation is applied.",
         "name": "activation"
       },
       {
         "default": true,
-        "description": "Boolean, whether the layer uses a bias vector.",
+        "description": "bool, if `True`, bias will be added to the output.",
         "name": "use_bias",
         "visible": false
       },
@@ -889,7 +922,7 @@
             "seed": null
           }
         },
-        "description": "Initializer for the `kernel` weights matrix\n    (see `keras.initializers`). Defaults to 'glorot_uniform'.",
+        "description": "Initializer for the convolution kernel. If `None`,\n        the default initializer (`\"glorot_uniform\"`) will be used.",
         "name": "kernel_initializer",
         "visible": false
       },
@@ -898,31 +931,31 @@
           "class_name": "Zeros",
           "config": {}
         },
-        "description": "Initializer for the bias vector\n    (see `keras.initializers`). Defaults to 'zeros'.",
+        "description": "Initializer for the bias vector. If `None`, the\n        default initializer (`\"zeros\"`) will be used.",
         "name": "bias_initializer",
         "visible": false
       },
       {
-        "description": "Regularizer function applied to\n    the `kernel` weights matrix (see `keras.regularizers`).",
+        "description": "Optional regularizer for the convolution kernel.",
         "name": "kernel_regularizer",
         "visible": false
       },
       {
-        "description": "Regularizer function applied to the bias vector\n    (see `keras.regularizers`).",
+        "description": "Optional regularizer for the bias vector.",
         "name": "bias_regularizer",
         "visible": false
       },
       {
-        "description": "Regularizer function applied to\n    the output of the layer (its \"activation\") (see `keras.regularizers`).",
+        "description": "Optional regularizer function for the output.",
         "name": "activity_regularizer",
         "visible": false
       },
       {
-        "description": "Constraint function applied to the kernel matrix\n    (see `keras.constraints`).",
+        "description": "Optional projection function to be applied to the\n        kernel after being updated by an `Optimizer` (e.g. used to implement\n        norm constraints or value constraints for layer weights). The\n        function must take as input the unprojected variable and must return\n        the projected variable (which must have the same shape). Constraints\n        are not safe to use when doing asynchronous distributed training.",
         "name": "kernel_constraint"
       },
       {
-        "description": "Constraint function applied to the bias vector\n    (see `keras.constraints`).",
+        "description": "Optional projection function to be applied to the\n        bias after being updated by an `Optimizer`.",
         "name": "bias_constraint"
       },
       {
@@ -932,7 +965,7 @@
     ],
     "inputs": [
       {
-        "description": "4D tensor with shape:\n`(batch_size, channels, rows, cols)` if data_format='channels_first'\nor 4D tensor with shape:\n`(batch_size, rows, cols, channels)` if data_format='channels_last'.",
+        "description": "- If `data_format=\"channels_last\"`:\n    A 4D tensor with shape: `(batch_size, height, width, channels)`\n- If `data_format=\"channels_first\"`:\n    A 4D tensor with shape: `(batch_size, channels, height, width)`",
         "name": "input"
       },
       {
@@ -944,16 +977,21 @@
     ],
     "outputs": [
       {
-        "description": "4D tensor with shape:\n`(batch_size, filters, new_rows, new_cols)` if\ndata_format='channels_first'\nor 4D tensor with shape:\n`(batch_size, new_rows, new_cols, filters)` if\ndata_format='channels_last'.  `rows` and `cols` values might have changed\ndue to padding.\nIf `output_padding` is specified:\n```\nnew_rows = ((rows - 1) * strides[0] + kernel_size[0] - 2 * padding[0] +\noutput_padding[0])\nnew_cols = ((cols - 1) * strides[1] + kernel_size[1] - 2 * padding[1] +\noutput_padding[1])\n```",
+        "description": "- If `data_format=\"channels_last\"`:\n    A 4D tensor with shape: `(batch_size, new_height, new_width, filters)`\n- If `data_format=\"channels_first\"`:\n    A 4D tensor with shape: `(batch_size, filters, new_height, new_width)`",
         "name": "output"
       }
     ],
     "references": [
       {
-        "description": "[A guide to convolution arithmetic for deep learning](https://arxiv.org/abs/1603.07285v1)"
+        "description": "[A guide to convolution arithmetic for deep learning]( https://arxiv.org/abs/1603.07285v1)"
       },
       {
-        "description": "[Deconvolutional Networks](https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf)"
+        "description": "[Deconvolutional Networks]( https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf) "
+      }
+    ],
+    "examples": [
+      {
+        "code": ">>> x = np.random.rand(4, 10, 8, 128)\n>>> y = keras.layers.Conv2DTranspose(32, 2, 2, activation='relu')(x)\n>>> print(y.shape)\n(4, 20, 16, 32)"
       }
     ]
   },
@@ -961,98 +999,95 @@
     "name": "Conv3D",
     "module": "tensorflow.keras.layers",
     "category": "Layer",
-    "description": "3D convolution layer (e.g. spatial convolution over volumes).\n\nThis layer creates a convolution kernel that is convolved\nwith the layer input to produce a tensor of\noutputs. If `use_bias` is True,\na bias vector is created and added to the outputs. Finally, if\n`activation` is not `None`, it is applied to the outputs as well.\n\nWhen using this layer as the first layer in a model,\nprovide the keyword argument `input_shape`\n(tuple of integers or `None`, does not include the sample axis),\ne.g. `input_shape=(128, 128, 128, 1)` for 128x128x128 volumes\nwith a single channel,\nin `data_format=\"channels_last\"`.",
+    "description": "3D convolution layer.\n\nThis layer creates a convolution kernel that is convolved with the layer\ninput over a single spatial (or temporal) dimension to produce a tensor of\noutputs. If `use_bias` is True, a bias vector is created and added to the\noutputs. Finally, if `activation` is not `None`, it is applied to the\noutputs as well.",
     "attributes": [
       {
-        "description": "Integer, the dimensionality of the output space (i.e. the number\n    of output filters in the convolution).",
+        "description": "int, the dimension of the output space (the number of filters\n        in the convolution).",
         "name": "filters"
       },
       {
-        "description": "An integer or tuple/list of 3 integers, specifying the depth,\n    height and width of the 3D convolution window. Can be a single integer\n    to specify the same value for all spatial dimensions.",
+        "description": "int or tuple/list of 3 integer, specifying the size of the\n        convolution window.",
         "name": "kernel_size"
       },
       {
-        "description": "An integer or tuple/list of 3 integers, specifying the strides of\n    the convolution along each spatial dimension. Can be a single integer to\n    specify the same value for all spatial dimensions. Specifying any stride\n    value != 1 is incompatible with specifying any `dilation_rate` value !=\n    1.",
+        "description": "int or tuple/list of 3 integer, specifying the stride length\n        of the convolution. `strides > 1` is incompatible with\n        `dilation_rate > 1`.",
         "name": "strides"
       },
       {
-        "description": "one of `\"valid\"` or `\"same\"` (case-insensitive).\n    `\"valid\"` means no padding. `\"same\"` results in padding with zeros\n    evenly to the left/right or up/down of the input such that output has\n    the same height/width dimension as the input.",
+        "description": "string, either `\"valid\"` or `\"same\"` (case-insensitive).\n        `\"valid\"` means no padding. `\"same\"` results in padding evenly to\n        the left/right or up/down of the input such that output has the same\n        height/width dimension as the input.",
         "name": "padding"
       },
       {
-        "description": "A string, one of `channels_last` (default) or\n    `channels_first`.  The ordering of the dimensions in the inputs.\n    `channels_last` corresponds to inputs with shape `batch_shape +\n    (spatial_dim1, spatial_dim2, spatial_dim3, channels)` while\n    `channels_first` corresponds to inputs with shape `batch_shape +\n    (channels, spatial_dim1, spatial_dim2, spatial_dim3)`. When unspecified,\n    uses `image_data_format` value found in your Keras config file at\n    `~/.keras/keras.json` (if exists) else 'channels_last'. Note that the\n    `channels_first` format is currently not supported by TensorFlow on CPU.\n    Defaults to 'channels_last'.",
+        "description": "string, either `\"channels_last\"` or `\"channels_first\"`.\n        The ordering of the dimensions in the inputs. `\"channels_last\"`\n        corresponds to inputs with shape\n        `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)`\n        while `\"channels_first\"` corresponds to inputs with shape\n        `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)`.\n        It defaults to the `image_data_format` value found in your Keras\n        config file at `~/.keras/keras.json`. If you never set it, then it\n        will be `\"channels_last\"`.",
         "name": "data_format"
       },
       {
-        "description": "an integer or tuple/list of 3 integers, specifying the\n    dilation rate to use for dilated convolution. Can be a single integer to\n    specify the same value for all spatial dimensions. Currently, specifying\n    any `dilation_rate` value != 1 is incompatible with specifying any\n    stride value != 1.",
+        "description": "int or tuple/list of 3 integers, specifying the dilation\n        rate to use for dilated convolution.",
         "name": "dilation_rate"
       },
       {
-        "description": "Activation function to use. If you don't specify anything, no\n    activation is applied (see `keras.activations`).",
+        "description": "Activation function. If `None`, no activation is applied.",
         "name": "activation"
       },
       {
         "default": true,
-        "description": "Boolean, whether the layer uses a bias vector.",
+        "description": "bool, if `True`, bias will be added to the output.",
         "name": "use_bias",
         "visible": false
       },
       {
-        "description": "Initializer for the `kernel` weights matrix (see\n    `keras.initializers`). Defaults to 'glorot_uniform'.",
+        "description": "Initializer for the convolution kernel. If `None`,\n        the default initializer (`\"glorot_uniform\"`) will be used.",
         "name": "kernel_initializer",
         "visible": false
       },
       {
-        "description": "Initializer for the bias vector (see\n    `keras.initializers`). Defaults to 'zeros'.",
+        "description": "Initializer for the bias vector. If `None`, the\n        default initializer (`\"zeros\"`) will be used.",
         "name": "bias_initializer",
         "visible": false
       },
       {
-        "description": "Regularizer function applied to the `kernel` weights\n    matrix (see `keras.regularizers`).",
+        "description": "Optional regularizer for the convolution kernel.",
         "name": "kernel_regularizer",
         "visible": false
       },
       {
-        "description": "Regularizer function applied to the bias vector (see\n    `keras.regularizers`).",
+        "description": "Optional regularizer for the bias vector.",
         "name": "bias_regularizer",
         "visible": false
       },
       {
-        "description": "Regularizer function applied to the output of the\n    layer (its \"activation\") (see `keras.regularizers`).",
+        "description": "Optional regularizer function for the output.",
         "name": "activity_regularizer",
         "visible": false
       },
       {
-        "description": "Constraint function applied to the kernel matrix (see\n    `keras.constraints`).",
+        "description": "Optional projection function to be applied to the\n        kernel after being updated by an `Optimizer` (e.g. used to implement\n        norm constraints or value constraints for layer weights). The\n        function must take as input the unprojected variable and must return\n        the projected variable (which must have the same shape). Constraints\n        are not safe to use when doing asynchronous distributed training.",
         "name": "kernel_constraint"
       },
       {
-        "description": "Constraint function applied to the bias vector (see\n    `keras.constraints`).",
+        "description": "Optional projection function to be applied to the\n        bias after being updated by an `Optimizer`.",
         "name": "bias_constraint"
       },
       {
-        "description": "A positive integer specifying the number of groups in which the\n    input is split along the channel axis. Each group is convolved\n    separately with `filters / groups` filters. The output is the\n    concatenation of all the `groups` results along the channel axis. Input\n    channels and `filters` must both be divisible by `groups`.",
+        "description": "A positive int specifying the number of groups in which the\n        input is split along the channel axis. Each group is convolved\n        separately with `filters // groups` filters. The output is the\n        concatenation of all the `groups` results along the channel axis.\n        Input channels and `filters` must both be divisible by `groups`.",
         "name": "groups"
       }
     ],
     "inputs": [
       {
-        "description": "5+D tensor with shape: `batch_shape + (channels, conv_dim1, conv_dim2,\n  conv_dim3)` if data_format='channels_first'\nor 5+D tensor with shape: `batch_shape + (conv_dim1, conv_dim2, conv_dim3,\n  channels)` if data_format='channels_last'.",
+        "description": "- If `data_format=\"channels_last\"`:\n    5D tensor with shape:\n    `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)`\n- If `data_format=\"channels_first\"`:\n    5D tensor with shape:\n    `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`",
         "name": "input"
       }
     ],
     "outputs": [
       {
-        "description": "5+D tensor with shape: `batch_shape + (filters, new_conv_dim1,\n  new_conv_dim2, new_conv_dim3)` if data_format='channels_first'\nor 5+D tensor with shape: `batch_shape + (new_conv_dim1, new_conv_dim2,\n  new_conv_dim3, filters)` if data_format='channels_last'.\n  `new_conv_dim1`, `new_conv_dim2` and `new_conv_dim3` values might have\n  changed due to padding.",
+        "description": "- If `data_format=\"channels_last\"`:\n    5D tensor with shape:\n    `(batch_size, new_spatial_dim1, new_spatial_dim2, new_spatial_dim3,\n    filters)`\n- If `data_format=\"channels_first\"`:\n    5D tensor with shape:\n    `(batch_size, filters, new_spatial_dim1, new_spatial_dim2,\n    new_spatial_dim3)`",
         "name": "output"
       }
     ],
     "examples": [
       {
-        "code": ">>> # The inputs are 28x28x28 volumes with a single channel, and the\n>>> # batch size is 4\n>>> input_shape =(4, 28, 28, 28, 1)\n>>> x = tf.random.normal(input_shape)\n>>> y = tf.keras.layers.Conv3D(\n... 2, 3, activation='relu', input_shape=input_shape[1:])(x)\n>>> print(y.shape)\n(4, 26, 26, 26, 2)"
-      },
-      {
-        "code": ">>> # With extended batch shape [4, 7], e.g. a batch of 4 videos of\n>>> # 3D frames, with 7 frames per video.\n>>> input_shape = (4, 7, 28, 28, 28, 1)\n>>> x = tf.random.normal(input_shape)\n>>> y = tf.keras.layers.Conv3D(\n... 2, 3, activation='relu', input_shape=input_shape[2:])(x)\n>>> print(y.shape)\n(4, 7, 26, 26, 26, 2)"
+        "code": ">>> x = np.random.rand(4, 10, 10, 10, 128)\n>>> y = keras.layers.Conv3D(32, 3, activation='relu')(x)\n>>> print(y.shape)\n(4, 8, 8, 8, 32)"
       }
     ]
   },
@@ -1062,32 +1097,32 @@
     "description": "2D Convolutional LSTM.\n\nSimilar to an LSTM layer, but the input transformations\nand recurrent transformations are both convolutional.",
     "attributes": [
       {
-        "description": "Integer, the dimensionality of the output space (i.e. the number\n    of output filters in the convolution).",
+        "description": "int, the dimension of the output space (the number of filters\n        in the convolution).",
         "name": "filters"
       },
       {
-        "description": "An integer or tuple/list of n integers, specifying the\n    dimensions of the convolution window.",
+        "description": "int or tuple/list of 2 integers, specifying the size of the\n        convolution window.",
         "name": "kernel_size"
       },
       {
-        "description": "An integer or tuple/list of n integers, specifying the strides of\n    the convolution. Specifying any stride value != 1 is incompatible with\n    specifying any `dilation_rate` value != 1.",
+        "description": "int or tuple/list of 2 integers, specifying the stride length\n        of the convolution. `strides > 1` is incompatible with\n        `dilation_rate > 1`.",
         "name": "strides"
       },
       {
-        "description": "One of `\"valid\"` or `\"same\"` (case-insensitive). `\"valid\"` means\n    no padding. `\"same\"` results in padding evenly to the left/right or\n    up/down of the input such that output has the same height/width\n    dimension as the input.",
+        "description": "string, `\"valid\"` or `\"same\"` (case-insensitive).\n        `\"valid\"` means no padding. `\"same\"` results in padding evenly to\n        the left/right or up/down of the input such that output has the same\n        height/width dimension as the input.",
         "name": "padding"
       },
       {
         "default": "channels_last",
-        "description": "A string, one of `channels_last` (default) or\n    `channels_first`.  The ordering of the dimensions in the inputs.\n    `channels_last` corresponds to inputs with shape `(batch, time, ...,\n    channels)` while `channels_first` corresponds to inputs with shape\n    `(batch, time, channels, ...)`. When unspecified, uses\n    `image_data_format` value found in your Keras config file at\n    `~/.keras/keras.json` (if exists) else 'channels_last'.\n    Defaults to 'channels_last'.",
+        "description": "string, either `\"channels_last\"` or `\"channels_first\"`.\n        The ordering of the dimensions in the inputs. `\"channels_last\"`\n        corresponds to inputs with shape `(batch, steps, features)`\n        while `\"channels_first\"` corresponds to inputs with shape\n        `(batch, features, steps)`. It defaults to the `image_data_format`\n        value found in your Keras config file at `~/.keras/keras.json`.\n        If you never set it, then it will be `\"channels_last\"`.",
         "name": "data_format"
       },
       {
-        "description": "An integer or tuple/list of n integers, specifying the\n    dilation rate to use for dilated convolution. Currently, specifying any\n    `dilation_rate` value != 1 is incompatible with specifying any `strides`\n    value != 1.",
+        "description": "int or tuple/list of 2 integers, specifying the dilation\n        rate to use for dilated convolution.",
         "name": "dilation_rate"
       },
       {
-        "description": "Activation function to use. By default hyperbolic tangent\n    activation function is applied (`tanh(x)`).",
+        "description": "Activation function to use. By default hyperbolic tangent\n        activation function is applied (`tanh(x)`).",
         "name": "activation"
       },
       {
@@ -1101,12 +1136,12 @@
         "visible": false
       },
       {
-        "description": "Initializer for the `kernel` weights matrix, used for\n    the linear transformation of the inputs.",
+        "description": "Initializer for the `kernel` weights matrix,\n        used for the linear transformation of the inputs.",
         "name": "kernel_initializer",
         "visible": false
       },
       {
-        "description": "Initializer for the `recurrent_kernel` weights\n    matrix, used for the linear transformation of the recurrent state.",
+        "description": "Initializer for the `recurrent_kernel` weights\n        matrix, used for the linear transformation of the recurrent state.",
         "name": "recurrent_initializer",
         "visible": false
       },
@@ -1116,16 +1151,16 @@
         "visible": false
       },
       {
-        "description": "Boolean. If True, add 1 to the bias of the forget gate\n    at initialization. Use in combination with `bias_initializer=\"zeros\"`.\n    This is recommended in [Jozefowicz et al., 2015](\n    http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf)",
+        "description": "Boolean. If `True`, add 1 to the bias of the forget\n        gate at initialization.\n        Use in combination with `bias_initializer=\"zeros\"`.\n        This is recommended in [Jozefowicz et al., 2015](\n        http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf)",
         "name": "unit_forget_bias"
       },
       {
-        "description": "Regularizer function applied to the `kernel` weights\n    matrix.",
+        "description": "Regularizer function applied to the `kernel` weights\n        matrix.",
         "name": "kernel_regularizer",
         "visible": false
       },
       {
-        "description": "Regularizer function applied to the\n    `recurrent_kernel` weights matrix.",
+        "description": "Regularizer function applied to the\n        `recurrent_kernel` weights matrix.",
         "name": "recurrent_regularizer",
         "visible": false
       },
@@ -1140,12 +1175,12 @@
         "visible": false
       },
       {
-        "description": "Constraint function applied to the `kernel` weights\n    matrix.",
+        "description": "Constraint function applied to the `kernel` weights\n        matrix.",
         "name": "kernel_constraint",
         "visible": false
       },
       {
-        "description": "Constraint function applied to the\n    `recurrent_kernel` weights matrix.",
+        "description": "Constraint function applied to the\n        `recurrent_kernel` weights matrix.",
         "name": "recurrent_constraint",
         "visible": false
       },
@@ -1155,40 +1190,48 @@
         "visible": false
       },
       {
-        "description": "Boolean. Whether to return the last output in the output\n    sequence, or the full sequence. (default False)",
+        "description": "Boolean. Whether to return the last output\n        in the output sequence, or the full sequence. Default: `False`.",
         "name": "return_sequences"
       },
       {
-        "description": "Boolean (default False). If True, process the input sequence\n    backwards.",
+        "description": "Boolean (default: `False`).\n        If `True`, process the input sequence backwards and return the\n        reversed sequence.",
         "name": "go_backwards"
       },
       {
-        "description": "Boolean (default False). If True, the last state for each sample\n    at index i in a batch will be used as initial state for the sample of\n    index i in the following batch.",
+        "description": "Boolean (default False). If `True`, the last state\n        for each sample at index i in a batch will be used as initial\n        state for the sample of index i in the following batch.",
         "name": "stateful"
       },
       {
         "default": 0,
-        "description": "Float between 0 and 1. Fraction of the units to drop for the\n    linear transformation of the inputs.",
+        "description": "Float between 0 and 1. Fraction of the units to drop for the\n        linear transformation of the inputs.",
         "name": "dropout"
       },
       {
-        "description": "Float between 0 and 1. Fraction of the units to drop\n    for the linear transformation of the recurrent state.",
+        "description": "Float between 0 and 1. Fraction of the units to drop\n        for the linear transformation of the recurrent state.",
         "name": "recurrent_dropout"
       },
       {
-        "description": "Boolean Whether to return the last state in addition to the\n    output. (default False)",
+        "description": "Boolean. Whether to return the last state in addition\n        to the output. Default: `False`.",
         "name": "return_state"
+      },
+      {
+        "name": "seed",
+        "description": "Random seed for dropout."
+      },
+      {
+        "name": "unroll",
+        "description": "Boolean (default: `False`).\n        If `True`, the network will be unrolled,\n        else a symbolic loop will be used.\n        Unrolling can speed-up a RNN,\n        although it tends to be more memory-intensive.\n        Unrolling is only suitable for short sequences."
       }
     ],
     "inputs": [
       {
-        "description": "- If data_format='channels_first'\n    5D tensor with shape:\n    `(samples, time, channels, rows, cols)`\n- If data_format='channels_last'\n    5D tensor with shape:\n    `(samples, time, rows, cols, channels)`",
+        "description": "- If `data_format='channels_first'`:\n    5D tensor with shape: `(samples, time, channels, rows, cols)`\n- If `data_format='channels_last'`:\n    5D tensor with shape: `(samples, time, rows, cols, channels)`",
         "name": "input"
       }
     ],
     "outputs": [
       {
-        "description": "- If `return_state`: a list of tensors. The first tensor is the output.\n  The remaining tensors are the last states,\n  each 4D tensor with shape: `(samples, filters, new_rows, new_cols)` if\n    data_format='channels_first'\n  or shape: `(samples, new_rows, new_cols, filters)` if\n    data_format='channels_last'. `rows` and `cols` values might have\n    changed due to padding.\n- If `return_sequences`: 5D tensor with shape: `(samples, timesteps,\n  filters, new_rows, new_cols)` if data_format='channels_first'\n  or shape: `(samples, timesteps, new_rows, new_cols, filters)` if\n    data_format='channels_last'.\n- Else, 4D tensor with shape: `(samples, filters, new_rows, new_cols)` if\n  data_format='channels_first'\n  or shape: `(samples, new_rows, new_cols, filters)` if\n    data_format='channels_last'.",
+        "description": "- If `return_state`: a list of tensors. The first tensor is the output.\n    The remaining tensors are the last states,\n    each 4D tensor with shape: `(samples, filters, new_rows, new_cols)` if\n    `data_format='channels_first'`\n    or shape: `(samples, new_rows, new_cols, filters)` if\n    `data_format='channels_last'`. `rows` and `cols` values might have\n    changed due to padding.\n- If `return_sequences`: 5D tensor with shape: `(samples, timesteps,\n    filters, new_rows, new_cols)` if data_format='channels_first'\n    or shape: `(samples, timesteps, new_rows, new_cols, filters)` if\n    `data_format='channels_last'`.\n- Else, 4D tensor with shape: `(samples, filters, new_rows, new_cols)` if\n    `data_format='channels_first'`\n    or shape: `(samples, new_rows, new_cols, filters)` if\n    `data_format='channels_last'`.",
         "name": "output"
       }
     ],
@@ -1198,6 +1241,9 @@
       }
     ],
     "references": [
+      {
+        "description": " "
+      },
       {
         "description": "[Shi et al., 2015](http://arxiv.org/abs/1506.04214v1) (the current implementation does not include the feedback loop on the cells output)."
       }
@@ -1207,27 +1253,27 @@
     "name": "Convolution2D",
     "module": "tensorflow.keras.layers",
     "category": "Layer",
-    "description": "2D convolution layer (e.g. spatial convolution over images).\n\nThis layer creates a convolution kernel that is convolved\nwith the layer input to produce a tensor of\noutputs. If `use_bias` is True,\na bias vector is created and added to the outputs. Finally, if\n`activation` is not `None`, it is applied to the outputs as well.\n\nWhen using this layer as the first layer in a model,\nprovide the keyword argument `input_shape`\n(tuple of integers or `None`, does not include the sample axis),\ne.g. `input_shape=(128, 128, 3)` for 128x128 RGB pictures\nin `data_format=\"channels_last\"`. You can use `None` when\na dimension has variable size.",
+    "description": "2D convolution layer.\n\nThis layer creates a convolution kernel that is convolved with the layer\ninput over a single spatial (or temporal) dimension to produce a tensor of\noutputs. If `use_bias` is True, a bias vector is created and added to the\noutputs. Finally, if `activation` is not `None`, it is applied to the\noutputs as well.",
     "attributes": [
       {
         "default": "linear",
-        "description": "Activation function to use. If you don't specify anything, no\n    activation is applied (see `keras.activations`).",
+        "description": "Activation function. If `None`, no activation is applied.",
         "name": "activation"
       },
       {
         "default": "valid",
-        "description": "one of `\"valid\"` or `\"same\"` (case-insensitive).\n    `\"valid\"` means no padding. `\"same\"` results in padding with zeros\n    evenly to the left/right or up/down of the input. When `padding=\"same\"`\n    and `strides=1`, the output has the same size as the input.",
+        "description": "string, either `\"valid\"` or `\"same\"` (case-insensitive).\n        `\"valid\"` means no padding. `\"same\"` results in padding evenly to\n        the left/right or up/down of the input such that output has the same\n        height/width dimension as the input.",
         "name": "padding"
       },
       {
         "default": true,
-        "description": "Boolean, whether the layer uses a bias vector.",
+        "description": "bool, if `True`, bias will be added to the output.",
         "name": "use_bias",
         "visible": false
       },
       {
         "default": "channels_last",
-        "description": "A string, one of `channels_last` (default) or\n    `channels_first`.  The ordering of the dimensions in the inputs.\n    `channels_last` corresponds to inputs with shape `(batch_size, height,\n    width, channels)` while `channels_first` corresponds to inputs with\n    shape `(batch_size, channels, height, width)`. If left unspecified, it\n    uses the `image_data_format` value found in your Keras config file at\n    `~/.keras/keras.json` (if exists) else 'channels_last'.\n    Note that the `channels_first` format is currently not\n    supported by TensorFlow on CPU. Defaults to 'channels_last'.",
+        "description": "string, either `\"channels_last\"` or `\"channels_first\"`.\n        The ordering of the dimensions in the inputs. `\"channels_last\"`\n        corresponds to inputs with shape\n        `(batch_size, channels, height, width)`\n        while `\"channels_first\"` corresponds to inputs with shape\n        `(batch_size, channels, height, width)`. It defaults to the\n        `image_data_format` value found in your Keras config file at\n        `~/.keras/keras.json`. If you never set it, then it will be\n        `\"channels_last\"`.",
         "name": "data_format"
       },
       {
@@ -1235,7 +1281,7 @@
           1,
           1
         ],
-        "description": "An integer or tuple/list of 2 integers, specifying the strides of\n    the convolution along the height and width. Can be a single integer to\n    specify the same value for all spatial dimensions. Specifying any stride\n    value != 1 is incompatible with specifying any `dilation_rate` value !=\n    1.",
+        "description": "int or tuple/list of 2 integer, specifying the stride length\n        of the convolution. `strides > 1` is incompatible with\n        `dilation_rate > 1`.",
         "name": "strides"
       },
       {
@@ -1243,7 +1289,7 @@
           1,
           1
         ],
-        "description": "an integer or tuple/list of 2 integers, specifying the\n    dilation rate to use for dilated convolution. Can be a single integer to\n    specify the same value for all spatial dimensions. Currently, specifying\n    any `dilation_rate` value != 1 is incompatible with specifying any\n    stride value != 1.",
+        "description": "int or tuple/list of 2 integers, specifying the dilation\n        rate to use for dilated convolution.",
         "name": "dilation_rate"
       },
       {
@@ -1255,7 +1301,7 @@
           "class_name": "Zeros",
           "config": {}
         },
-        "description": "Initializer for the bias vector (see\n    `keras.initializers`). Defaults to 'zeros'.",
+        "description": "Initializer for the bias vector. If `None`, the\n        default initializer (`\"zeros\"`) will be used.",
         "name": "bias_initializer",
         "visible": false
       },
@@ -1269,49 +1315,49 @@
             "seed": null
           }
         },
-        "description": "Initializer for the `kernel` weights matrix (see\n    `keras.initializers`). Defaults to 'glorot_uniform'.",
+        "description": "Initializer for the convolution kernel. If `None`,\n        the default initializer (`\"glorot_uniform\"`) will be used.",
         "name": "kernel_initializer",
         "visible": false
       },
       {
-        "description": "Integer, the dimensionality of the output space (i.e. the number\n    of output filters in the convolution).",
+        "description": "int, the dimension of the output space (the number of filters\n        in the convolution).",
         "name": "filters"
       },
       {
-        "description": "An integer or tuple/list of 2 integers, specifying the height\n    and width of the 2D convolution window. Can be a single integer to\n    specify the same value for all spatial dimensions.",
+        "description": "int or tuple/list of 2 integer, specifying the size of the\n        convolution window.",
         "name": "kernel_size"
       },
       {
-        "description": "Regularizer function applied to the `kernel` weights\n    matrix (see `keras.regularizers`).",
+        "description": "Optional regularizer for the convolution kernel.",
         "name": "kernel_regularizer",
         "visible": false
       },
       {
-        "description": "Regularizer function applied to the bias vector (see\n    `keras.regularizers`).",
+        "description": "Optional regularizer for the bias vector.",
         "name": "bias_regularizer",
         "visible": false
       },
       {
-        "description": "Regularizer function applied to the output of the\n    layer (its \"activation\") (see `keras.regularizers`).",
+        "description": "Optional regularizer function for the output.",
         "name": "activity_regularizer",
         "visible": false
       },
       {
-        "description": "Constraint function applied to the kernel matrix (see\n    `keras.constraints`).",
+        "description": "Optional projection function to be applied to the\n        kernel after being updated by an `Optimizer` (e.g. used to implement\n        norm constraints or value constraints for layer weights). The\n        function must take as input the unprojected variable and must return\n        the projected variable (which must have the same shape). Constraints\n        are not safe to use when doing asynchronous distributed training.",
         "name": "kernel_constraint"
       },
       {
-        "description": "Constraint function applied to the bias vector (see\n    `keras.constraints`).",
+        "description": "Optional projection function to be applied to the\n        bias after being updated by an `Optimizer`.",
         "name": "bias_constraint"
       },
       {
-        "description": "A positive integer specifying the number of groups in which the\n    input is split along the channel axis. Each group is convolved\n    separately with `filters / groups` filters. The output is the\n    concatenation of all the `groups` results along the channel axis. Input\n    channels and `filters` must both be divisible by `groups`.",
+        "description": "A positive int specifying the number of groups in which the\n        input is split along the channel axis. Each group is convolved\n        separately with `filters // groups` filters. The output is the\n        concatenation of all the `groups` results along the channel axis.\n        Input channels and `filters` must both be divisible by `groups`.",
         "name": "groups"
       }
     ],
     "inputs": [
       {
-        "description": "4+D tensor with shape: `batch_shape + (channels, rows, cols)` if\n  `data_format='channels_first'`\nor 4+D tensor with shape: `batch_shape + (rows, cols, channels)` if\n  `data_format='channels_last'`.",
+        "description": "- If `data_format=\"channels_last\"`:\n    A 4D tensor with shape: `(batch_size, height, width, channels)`\n- If `data_format=\"channels_first\"`:\n    A 4D tensor with shape: `(batch_size, channels, height, width)`",
         "name": "input"
       },
       {
@@ -1323,22 +1369,13 @@
     ],
     "outputs": [
       {
-        "description": "4+D tensor with shape: `batch_shape + (filters, new_rows, new_cols)` if\n`data_format='channels_first'` or 4+D tensor with shape: `batch_shape +\n  (new_rows, new_cols, filters)` if `data_format='channels_last'`.  `rows`\n  and `cols` values might have changed due to padding.",
+        "description": "- If `data_format=\"channels_last\"`:\n    A 4D tensor with shape: `(batch_size, new_height, new_width, filters)`\n- If `data_format=\"channels_first\"`:\n    A 4D tensor with shape: `(batch_size, filters, new_height, new_width)`",
         "name": "output"
       }
     ],
     "examples": [
       {
-        "code": ">>> # The inputs are 28x28 RGB images with `channels_last` and the batch\n>>> # size is 4.\n>>> input_shape = (4, 28, 28, 3)\n>>> x = tf.random.normal(input_shape)\n>>> y = tf.keras.layers.Conv2D(\n... 2, 3, activation='relu', input_shape=input_shape[1:])(x)\n>>> print(y.shape)\n(4, 26, 26, 2)"
-      },
-      {
-        "code": ">>> # With `dilation_rate` as 2.\n>>> input_shape = (4, 28, 28, 3)\n>>> x = tf.random.normal(input_shape)\n>>> y = tf.keras.layers.Conv2D(\n...     2, 3,\n...     activation='relu',\n...     dilation_rate=2,\n...     input_shape=input_shape[1:])(x)\n>>> print(y.shape)\n(4, 24, 24, 2)"
-      },
-      {
-        "code": ">>> # With `padding` as \"same\".\n>>> input_shape = (4, 28, 28, 3)\n>>> x = tf.random.normal(input_shape)\n>>> y = tf.keras.layers.Conv2D(\n... 2, 3, activation='relu', padding=\"same\", input_shape=input_shape[1:])(x)\n>>> print(y.shape)\n(4, 28, 28, 2)"
-      },
-      {
-        "code": ">>> # With extended batch shape [4, 7]:\n>>> input_shape = (4, 7, 28, 28, 3)\n>>> x = tf.random.normal(input_shape)\n>>> y = tf.keras.layers.Conv2D(\n... 2, 3, activation='relu', input_shape=input_shape[2:])(x)\n>>> print(y.shape)\n(4, 7, 26, 26, 2)"
+        "code": ">>> x = np.random.rand(4, 10, 10, 128)\n>>> y = keras.layers.Conv2D(32, 3, activation='relu')(x)\n>>> print(y.shape)\n(4, 8, 8, 32)"
       }
     ]
   },
@@ -1349,7 +1386,7 @@
     "description": "Cropping layer for 1D input (e.g. temporal sequence).\n\nIt crops along the time dimension (axis 1).",
     "attributes": [
       {
-        "description": "Int or tuple of int (length 2)\n    How many units should be trimmed off at the beginning and end of\n    the cropping dimension (axis 1).\n    If a single int is provided, the same value will be used for both.",
+        "description": "Int, or tuple of int (length 2), or dictionary.\n        - If int: how many units should be trimmed off at the beginning and\n          end of the cropping dimension (axis 1).\n        - If tuple of 2 ints: how many units should be trimmed off at the\n          beginning and end of the cropping dimension\n          (`(left_crop, right_crop)`).",
         "name": "cropping"
       }
     ],
@@ -1367,7 +1404,7 @@
     ],
     "examples": [
       {
-        "code": ">>> input_shape = (2, 3, 2)\n>>> x = np.arange(np.prod(input_shape)).reshape(input_shape)\n>>> print(x)\n[[[ 0  1]\n  [ 2  3]\n  [ 4  5]]\n [[ 6  7]\n  [ 8  9]\n  [10 11]]]\n>>> y = tf.keras.layers.Cropping1D(cropping=1)(x)\n>>> print(y)\ntf.Tensor(\n  [[[2 3]]\n   [[8 9]]], shape=(2, 1, 2), dtype=int64)"
+        "code": ">>> input_shape = (2, 3, 2)\n>>> x = np.arange(np.prod(input_shape)).reshape(input_shape)\n>>> x\n[[[ 0  1]\n  [ 2  3]\n  [ 4  5]]\n [[ 6  7]\n  [ 8  9]\n  [10 11]]]\n>>> y = keras.layers.Cropping1D(cropping=1)(x)\n>>> y\n[[[2 3]]\n [[8 9]]]"
       }
     ]
   },
@@ -1378,29 +1415,29 @@
     "description": "Cropping layer for 2D input (e.g. picture).\n\nIt crops along spatial dimensions, i.e. height and width.",
     "attributes": [
       {
-        "description": "Int, or tuple of 2 ints, or tuple of 2 tuples of 2 ints.\n    - If int: the same symmetric cropping\n      is applied to height and width.\n    - If tuple of 2 ints:\n      interpreted as two different\n      symmetric cropping values for height and width:\n      `(symmetric_height_crop, symmetric_width_crop)`.\n    - If tuple of 2 tuples of 2 ints:\n      interpreted as\n      `((top_crop, bottom_crop), (left_crop, right_crop))`",
+        "description": "Int, or tuple of 2 ints, or tuple of 2 tuples of 2 ints.\n        - If int: the same symmetric cropping is applied to height and\n          width.\n        - If tuple of 2 ints: interpreted as two different symmetric\n          cropping values for height and width:\n          `(symmetric_height_crop, symmetric_width_crop)`.\n        - If tuple of 2 tuples of 2 ints: interpreted as\n          `((top_crop, bottom_crop), (left_crop, right_crop))`.",
         "name": "cropping"
       },
       {
-        "description": "A string,\n    one of `channels_last` (default) or `channels_first`.\n    The ordering of the dimensions in the inputs.\n    `channels_last` corresponds to inputs with shape\n    `(batch_size, height, width, channels)` while `channels_first`\n    corresponds to inputs with shape\n    `(batch_size, channels, height, width)`.\n    When unspecified, uses\n    `image_data_format` value found in your Keras config file at\n     `~/.keras/keras.json` (if exists) else 'channels_last'.\n    Defaults to 'channels_last'.",
+        "description": "A string, one of `\"channels_last\"` (default) or\n        `\"channels_first\"`. The ordering of the dimensions in the inputs.\n        `\"channels_last\"` corresponds to inputs with shape\n        `(batch_size, height, width, channels)` while `\"channels_first\"`\n        corresponds to inputs with shape\n        `(batch_size, channels, height, width)`.\n        When unspecified, uses `image_data_format` value found in your Keras\n        config file at `~/.keras/keras.json` (if exists). Defaults to\n        `\"channels_last\"`.",
         "name": "data_format"
       }
     ],
     "inputs": [
       {
-        "description": "4D tensor with shape:\n- If `data_format` is `\"channels_last\"`:\n  `(batch_size, rows, cols, channels)`\n- If `data_format` is `\"channels_first\"`:\n  `(batch_size, channels, rows, cols)`",
+        "description": "4D tensor with shape:\n- If `data_format` is `\"channels_last\"`:\n  `(batch_size, height, width, channels)`\n- If `data_format` is `\"channels_first\"`:\n  `(batch_size, channels, height, width)`",
         "name": "input"
       }
     ],
     "outputs": [
       {
-        "description": "4D tensor with shape:\n- If `data_format` is `\"channels_last\"`:\n  `(batch_size, cropped_rows, cropped_cols, channels)`\n- If `data_format` is `\"channels_first\"`:\n  `(batch_size, channels, cropped_rows, cropped_cols)`",
+        "description": "4D tensor with shape:\n- If `data_format` is `\"channels_last\"`:\n  `(batch_size, cropped_height, cropped_width, channels)`\n- If `data_format` is `\"channels_first\"`:\n  `(batch_size, channels, cropped_height, cropped_width)`",
         "name": "output"
       }
     ],
     "examples": [
       {
-        "code": ">>> input_shape = (2, 28, 28, 3)\n>>> x = np.arange(np.prod(input_shape)).reshape(input_shape)\n>>> y = tf.keras.layers.Cropping2D(cropping=((2, 2), (4, 4)))(x)\n>>> print(y.shape)\n(2, 24, 20, 3)"
+        "code": ">>> input_shape = (2, 28, 28, 3)\n>>> x = np.arange(np.prod(input_shape)).reshape(input_shape)\n>>> y = keras.layers.Cropping2D(cropping=((2, 2), (4, 4)))(x)\n>>> y.shape\n(2, 24, 20, 3)"
       }
     ]
   },
@@ -1408,28 +1445,33 @@
     "name": "Cropping3D",
     "module": "tensorflow.keras.layers",
     "category": "Shape",
-    "description": "Cropping layer for 3D data (e.g. spatial or spatio-temporal).\n\n  Examples:\n\n```\n>>> input_shape = (2, 28, 28, 10, 3)\n>>> x = np.arange(np.prod(input_shape)).reshape(input_shape)\n>>> y = tf.keras.layers.Cropping3D(cropping=(2, 4, 2))(x)\n>>> print(y.shape)\n(2, 24, 20, 6, 3)\n```",
+    "description": "Cropping layer for 3D data (e.g. spatial or spatio-temporal).",
     "attributes": [
       {
-        "description": "Int, or tuple of 3 ints, or tuple of 3 tuples of 2 ints.\n    - If int: the same symmetric cropping\n      is applied to depth, height, and width.\n    - If tuple of 3 ints: interpreted as two different\n      symmetric cropping values for depth, height, and width:\n      `(symmetric_dim1_crop, symmetric_dim2_crop, symmetric_dim3_crop)`.\n    - If tuple of 3 tuples of 2 ints: interpreted as\n      `((left_dim1_crop, right_dim1_crop), (left_dim2_crop,\n        right_dim2_crop), (left_dim3_crop, right_dim3_crop))`",
+        "description": "Int, or tuple of 3 ints, or tuple of 3 tuples of 2 ints.\n        - If int: the same symmetric cropping is applied to depth, height,\n          and width.\n        - If tuple of 3 ints: interpreted as three different symmetric\n          cropping values for depth, height, and width:\n          `(symmetric_dim1_crop, symmetric_dim2_crop, symmetric_dim3_crop)`.\n        - If tuple of 3 tuples of 2 ints: interpreted as\n          `((left_dim1_crop, right_dim1_crop), (left_dim2_crop,\n          right_dim2_crop), (left_dim3_crop, right_dim3_crop))`.",
         "name": "cropping"
       },
       {
-        "description": "A string,\n    one of `channels_last` (default) or `channels_first`.\n    The ordering of the dimensions in the inputs.\n    `channels_last` corresponds to inputs with shape\n    `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)`\n    while `channels_first` corresponds to inputs with shape\n    `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`.\n    When unspecified, uses\n    `image_data_format` value found in your Keras config file at\n     `~/.keras/keras.json` (if exists) else 'channels_last'.\n    Defaults to 'channels_last'.",
+        "description": "A string, one of `\"channels_last\"` (default) or\n        `\"channels_first\"`. The ordering of the dimensions in the inputs.\n        `\"channels_last\"` corresponds to inputs with shape\n        `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)`\n        while `\"channels_first\"` corresponds to inputs with shape\n        `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`.\n        When unspecified, uses `image_data_format` value found in your Keras\n        config file at `~/.keras/keras.json` (if exists). Defaults to\n        `\"channels_last\"`.",
         "name": "data_format"
       }
     ],
     "inputs": [
       {
-        "description": "5D tensor with shape:\n- If `data_format` is `\"channels_last\"`:\n  `(batch_size, first_axis_to_crop, second_axis_to_crop,\n  third_axis_to_crop, depth)`\n- If `data_format` is `\"channels_first\"`:\n  `(batch_size, depth, first_axis_to_crop, second_axis_to_crop,\n    third_axis_to_crop)`",
+        "description": "5D tensor with shape:\n- If `data_format` is `\"channels_last\"`:\n  `(batch_size, first_axis_to_crop, second_axis_to_crop,\n  third_axis_to_crop, channels)`\n- If `data_format` is `\"channels_first\"`:\n  `(batch_size, channels, first_axis_to_crop, second_axis_to_crop,\n  third_axis_to_crop)`",
         "name": "input"
       }
     ],
     "outputs": [
       {
-        "description": "5D tensor with shape:\n- If `data_format` is `\"channels_last\"`:\n  `(batch_size, first_cropped_axis, second_cropped_axis,\n  third_cropped_axis, depth)`\n- If `data_format` is `\"channels_first\"`:\n  `(batch_size, depth, first_cropped_axis, second_cropped_axis,\n    third_cropped_axis)`",
+        "description": "5D tensor with shape:\n- If `data_format` is `\"channels_last\"`:\n  `(batch_size, first_cropped_axis, second_cropped_axis,\n  third_cropped_axis, channels)`\n- If `data_format` is `\"channels_first\"`:\n  `(batch_size, channels, first_cropped_axis, second_cropped_axis,\n  third_cropped_axis)`",
         "name": "output"
       }
+    ],
+    "examples": [
+      {
+        "code": ">>> input_shape = (2, 28, 28, 10, 3)\n>>> x = np.arange(np.prod(input_shape)).reshape(input_shape)\n>>> y = keras.layers.Cropping3D(cropping=(2, 4, 2))(x)\n>>> y.shape\n(2, 24, 20, 6, 3)"
+      }
     ]
   },
   {
@@ -1571,7 +1613,7 @@
     "name": "Dense",
     "module": "tensorflow.keras.layers",
     "category": "Layer",
-    "description": "Just your regular densely-connected NN layer.\n\n`Dense` implements the operation:\n`output = activation(dot(input, kernel) + bias)`\nwhere `activation` is the element-wise activation function\npassed as the `activation` argument, `kernel` is a weights matrix\ncreated by the layer, and `bias` is a bias vector created by the layer\n(only applicable if `use_bias` is `True`). These are all attributes of\n`Dense`.\n\nNote: If the input to the layer has a rank greater than 2, then `Dense`\ncomputes the dot product between the `inputs` and the `kernel` along the\nlast axis of the `inputs` and axis 0 of the `kernel` (using `tf.tensordot`).\nFor example, if input has dimensions `(batch_size, d0, d1)`, then we create\na `kernel` with shape `(d1, units)`, and the `kernel` operates along axis 2\nof the `input`, on every sub-tensor of shape `(1, 1, d1)` (there are\n`batch_size * d0` such sub-tensors).  The output in this case will have\nshape `(batch_size, d0, units)`.\n\nBesides, layer attributes cannot be modified after the layer has been called\nonce (except the `trainable` attribute).\nWhen a popular kwarg `input_shape` is passed, then keras will create\nan input layer to insert before the current layer. This can be treated\nequivalent to explicitly defining an `InputLayer`.",
+    "description": "Just your regular densely-connected NN layer.\n\n`Dense` implements the operation:\n`output = activation(dot(input, kernel) + bias)`\nwhere `activation` is the element-wise activation function\npassed as the `activation` argument, `kernel` is a weights matrix\ncreated by the layer, and `bias` is a bias vector created by the layer\n(only applicable if `use_bias` is `True`).\n\nNote: If the input to the layer has a rank greater than 2, `Dense`\ncomputes the dot product between the `inputs` and the `kernel` along the\nlast axis of the `inputs` and axis 0 of the `kernel` (using `tf.tensordot`).\nFor example, if input has dimensions `(batch_size, d0, d1)`, then we create\na `kernel` with shape `(d1, units)`, and the `kernel` operates along axis 2\nof the `input`, on every sub-tensor of shape `(1, 1, d1)` (there are\n`batch_size * d0` such sub-tensors). The output in this case will have\nshape `(batch_size, d0, units)`.",
     "attributes": [
       {
         "description": "Positive integer, dimensionality of the output space.",
@@ -1745,18 +1787,18 @@
   {
     "name": "Dot",
     "module": "tensorflow.keras.layers",
-    "description": "Layer that computes a dot product between samples in two tensors.\n\nE.g. if applied to a list of two tensors `a` and `b` of shape\n`(batch_size, n)`, the output will be a tensor of shape `(batch_size, 1)`\nwhere each entry `i` will be the dot product between\n`a[i]` and `b[i]`.\n\n```\n>>> x = np.arange(10).reshape(1, 5, 2)\n>>> print(x)\n[[[0 1]\n  [2 3]\n  [4 5]\n  [6 7]\n  [8 9]]]\n>>> y = np.arange(10, 20).reshape(1, 2, 5)\n>>> print(y)\n[[[10 11 12 13 14]\n  [15 16 17 18 19]]]\n>>> tf.keras.layers.Dot(axes=(1, 2))([x, y])\n<tf.Tensor: shape=(1, 2, 2), dtype=int64, numpy=\narray([[[260, 360],\n        [320, 445]]])>\n```\n\n```\n>>> x1 = tf.keras.layers.Dense(8)(np.arange(10).reshape(5, 2))\n>>> x2 = tf.keras.layers.Dense(8)(np.arange(10, 20).reshape(5, 2))\n>>> dotted = tf.keras.layers.Dot(axes=1)([x1, x2])\n>>> dotted.shape\nTensorShape([5, 1])\n```",
+    "description": "Computes element-wise dot product of two tensors.\n\nIt takes a list of inputs of size 2, and the axes\ncorresponding to each input along with the dot product\nis to be performed.\n\nLet's say `x` and `y` are the two input tensors with shapes\n`(2, 3, 5)` and `(2, 10, 3)`. The batch dimension should be\nof same size for both the inputs, and `axes` should correspond\nto the dimensions that have the same size in the corresponding\ninputs. e.g. with `axes=(1, 2)`, the dot product of `x`, and `y`\nwill result in a tensor with shape `(2, 5, 10)`",
     "attributes": [
       {
-        "description": "Integer or tuple of integers,\n    axis or axes along which to take the dot product.",
+        "description": "Integer or tuple of integers, axis or axes along which to\n        take the dot product. If a tuple, should be two integers\n        corresponding to the desired axis from the first input and the\n        desired axis from the second input, respectively. Note that the\n        size of the two selected axes must match.",
         "name": "axes"
       },
       {
-        "description": "Whether to L2-normalize samples along the\n    dot product axis before taking the dot product.\n    If set to True, then the output of the dot product\n    is the cosine proximity between the two samples.",
+        "description": "Whether to L2-normalize samples along the dot product axis\n        before taking the dot product. If set to `True`, then\n        the output of the dot product is the cosine proximity\n        between the two samples.",
         "name": "normalize"
       },
       {
-        "description": "Standard layer keyword arguments.\n",
+        "description": "Standard layer keyword arguments.",
         "name": "**kwargs"
       }
     ],
@@ -1772,20 +1814,29 @@
       {
         "name": "z"
       }
+    ],
+    "examples": [
+      {
+        "code": ">>> x = np.arange(10).reshape(1, 5, 2)\n>>> y = np.arange(10, 20).reshape(1, 2, 5)\n>>> keras.layers.Dot(axes=(1, 2))([x, y])"
+      },
+      {
+        "summary": "Usage in a Keras model:",
+        "code": ">>> x1 = keras.layers.Dense(8)(np.arange(10).reshape(5, 2))\n>>> x2 = keras.layers.Dense(8)(np.arange(10, 20).reshape(5, 2))\n>>> y = keras.layers.Dot(axes=1)([x1, x2])"
+      }
     ]
   },
   {
     "name": "Dropout",
     "module": "tensorflow.keras.layers",
     "category": "Dropout",
-    "description": "Applies Dropout to the input.\n\nThe Dropout layer randomly sets input units to 0 with a frequency of `rate`\nat each step during training time, which helps prevent overfitting.\nInputs not set to 0 are scaled up by 1/(1 - rate) such that the sum over\nall inputs is unchanged.\n\nNote that the Dropout layer only applies when `training` is set to True\nsuch that no values are dropped during inference. When using `model.fit`,\n`training` will be appropriately set to True automatically, and in other\ncontexts, you can set the kwarg explicitly to True when calling the layer.\n\n(This is in contrast to setting `trainable=False` for a Dropout layer.\n`trainable` does not affect the layer's behavior, as Dropout does\nnot have any variables/weights that can be frozen during training.)\n\n```\n>>> tf.random.set_seed(0)\n>>> layer = tf.keras.layers.Dropout(.2, input_shape=(2,))\n>>> data = np.arange(10).reshape(5, 2).astype(np.float32)\n>>> print(data)\n[[0. 1.]\n [2. 3.]\n [4. 5.]\n [6. 7.]\n [8. 9.]]\n>>> outputs = layer(data, training=True)\n>>> print(outputs)\ntf.Tensor(\n[[ 0.    1.25]\n [ 2.5   3.75]\n [ 5.    6.25]\n [ 7.5   8.75]\n [10.    0.  ]], shape=(5, 2), dtype=float32)\n```",
+    "description": "Applies dropout to the input.\n\nThe `Dropout` layer randomly sets input units to 0 with a frequency of\n`rate` at each step during training time, which helps prevent overfitting.\nInputs not set to 0 are scaled up by `1 / (1 - rate)` such that the sum over\nall inputs is unchanged.\n\nNote that the `Dropout` layer only applies when `training` is set to `True`\nin `call()`, such that no values are dropped during inference.\nWhen using `model.fit`, `training` will be appropriately set to `True`\nautomatically. In other contexts, you can set the argument explicitly\nto `True` when calling the layer.\n\n(This is in contrast to setting `trainable=False` for a `Dropout` layer.\n`trainable` does not affect the layer's behavior, as `Dropout` does\nnot have any variables/weights that can be frozen during training.)",
     "attributes": [
       {
         "description": "Float between 0 and 1. Fraction of the input units to drop.",
         "name": "rate"
       },
       {
-        "description": "1D integer tensor representing the shape of the\n    binary dropout mask that will be multiplied with the input.\n    For instance, if your inputs have shape\n    `(batch_size, timesteps, features)` and\n    you want the dropout mask to be the same for all timesteps,\n    you can use `noise_shape=(batch_size, 1, features)`.",
+        "description": "1D integer tensor representing the shape of the\n        binary dropout mask that will be multiplied with the input.\n        For instance, if your inputs have shape\n        `(batch_size, timesteps, features)` and\n        you want the dropout mask to be the same for all timesteps,\n        you can use `noise_shape=(batch_size, 1, features)`.",
         "name": "noise_shape"
       },
       {
@@ -1813,11 +1864,15 @@
     "name": "ELU",
     "module": "tensorflow.keras.layers",
     "category": "Activation",
-    "description": "Exponential Linear Unit.\n\nIt follows:\n\n```\n    f(x) =  alpha * (exp(x) - 1.) for x < 0\n    f(x) = x for x >= 0\n```",
+    "description": "Applies an Exponential Linear Unit function to an output.\n\nFormula:\n\n```\nf(x) = alpha * (exp(x) - 1.) for x < 0\nf(x) = x for x >= 0\n```",
     "attributes": [
       {
-        "description": "Scale for the negative factor.",
+        "description": "float, slope of negative section. Defaults to `1.0`.",
         "name": "alpha"
+      },
+      {
+        "name": "**kwargs",
+        "description": "Base layer keyword arguments, such as `name` and `dtype`."
       }
     ],
     "inputs": [
@@ -1842,11 +1897,11 @@
     "name": "Embedding",
     "module": "tensorflow.keras.layers",
     "category": "Transform",
-    "description": "Turns positive integers (indexes) into dense vectors of fixed size.\n\ne.g. `[[4], [20]] -> [[0.25, 0.1], [0.6, -0.2]]`\n\nThis layer can only be used on positive integer inputs of a fixed range. The\n`tf.keras.layers.TextVectorization`, `tf.keras.layers.StringLookup`,\nand `tf.keras.layers.IntegerLookup` preprocessing layers can help prepare\ninputs for an `Embedding` layer.\n\nThis layer accepts `tf.Tensor`, `tf.RaggedTensor` and `tf.SparseTensor`\ninput.",
+    "description": "Turns positive integers (indexes) into dense vectors of fixed size.\n\ne.g. `[[4], [20]] -> [[0.25, 0.1], [0.6, -0.2]]`\n\nThis layer can only be used on positive integer inputs of a fixed range.",
     "attributes": [
       {
         "default": false,
-        "description": "Boolean, whether or not the input value 0 is a special\n    \"padding\" value that should be masked out. This is useful when using\n    recurrent layers which may take variable length input. If this is\n    `True`, then all subsequent layers in the model need to support masking\n    or an exception will be raised. If mask_zero is set to True, as a\n    consequence, index 0 cannot be used in the vocabulary (input_dim should\n    equal size of vocabulary + 1).",
+        "description": "Boolean, whether or not the input value 0 is a special\n        \"padding\" value that should be masked out.\n        This is useful when using recurrent layers which\n        may take variable length input. If this is `True`,\n        then all subsequent layers in the model need\n        to support masking or an exception will be raised.\n        If mask_zero is set to True, as a consequence,\n        index 0 cannot be used in the vocabulary (input_dim should\n        equal size of vocabulary + 1).",
         "name": "mask_zero"
       },
       {
@@ -1858,12 +1913,12 @@
             "seed": null
           }
         },
-        "description": "Initializer for the `embeddings`\n    matrix (see `keras.initializers`).",
+        "description": "Initializer for the `embeddings`\n        matrix (see `keras.initializers`).",
         "name": "embeddings_initializer",
         "visible": false
       },
       {
-        "description": "Integer. Size of the vocabulary,\n    i.e. maximum integer index + 1.",
+        "description": "Integer. Size of the vocabulary,\n        i.e. maximum integer index + 1.",
         "name": "input_dim"
       },
       {
@@ -1871,12 +1926,12 @@
         "name": "output_dim"
       },
       {
-        "description": "Regularizer function applied to\n    the `embeddings` matrix (see `keras.regularizers`).",
+        "description": "Regularizer function applied to\n        the `embeddings` matrix (see `keras.regularizers`).",
         "name": "embeddings_regularizer",
         "visible": false
       },
       {
-        "description": "Constraint function applied to\n    the `embeddings` matrix (see `keras.constraints`).",
+        "description": "Constraint function applied to\n        the `embeddings` matrix (see `keras.constraints`).",
         "name": "embeddings_constraint"
       },
       {
@@ -1903,7 +1958,7 @@
     ],
     "outputs": [
       {
-        "description": "3D tensor with shape: `(batch_size, input_length, output_dim)`.\n\n**Note on variable placement:**\nBy default, if a GPU is available, the embedding matrix will be placed on\nthe GPU. This achieves the best performance, but it might cause issues:\n\n- You may be using an optimizer that does not support sparse GPU kernels.\nIn this case you will see an error upon training your model.\n- Your embedding matrix may be too large to fit on your GPU. In this case\nyou will see an Out Of Memory (OOM) error.\n\nIn such cases, you should place the embedding matrix on the CPU memory.\nYou can do so with a device scope, as such:\n\n```python\nwith tf.device('cpu:0'):\n  embedding_layer = Embedding(...)\n  embedding_layer.build()\n```\n\nThe pre-built `embedding_layer` instance can then be added to a `Sequential`\nmodel (e.g. `model.add(embedding_layer)`), called in a Functional model\n(e.g. `x = embedding_layer(x)`), or used in a subclassed model.",
+        "description": "3D tensor with shape: `(batch_size, input_length, output_dim)`.",
         "name": "output"
       }
     ],
@@ -1914,7 +1969,7 @@
     ],
     "examples": [
       {
-        "code": ">>> model = tf.keras.Sequential()\n>>> model.add(tf.keras.layers.Embedding(1000, 64, input_length=10))\n>>> # The model will take as input an integer matrix of size (batch,\n>>> # input_length), and the largest integer (i.e. word index) in the input\n>>> # should be no larger than 999 (vocabulary size).\n>>> # Now model.output_shape is (None, 10, 64), where `None` is the batch\n>>> # dimension.\n>>> input_array = np.random.randint(1000, size=(32, 10))\n>>> model.compile('rmsprop', 'mse')\n>>> output_array = model.predict(input_array)\n>>> print(output_array.shape)\n(32, 10, 64)"
+        "code": ">>> model = keras.Sequential()\n>>> model.add(keras.layers.Embedding(1000, 64, input_length=10))\n>>> # The model will take as input an integer matrix of size (batch,\n>>> # input_length), and the largest integer (i.e. word index) in the input\n>>> # should be no larger than 999 (vocabulary size).\n>>> # Now model.output_shape is (None, 10, 64), where `None` is the batch\n>>> # dimension.\n>>> input_array = np.random.randint(1000, size=(32, 10))\n>>> model.compile('rmsprop', 'mse')\n>>> output_array = model.predict(input_array)\n>>> print(output_array.shape)\n(32, 10, 64)"
       }
     ]
   },
@@ -1926,7 +1981,7 @@
     "attributes": [
       {
         "default": "channels_last",
-        "description": "A string,\n    one of `channels_last` (default) or `channels_first`.\n    The ordering of the dimensions in the inputs.\n    `channels_last` corresponds to inputs with shape\n    `(batch, ..., channels)` while `channels_first` corresponds to\n    inputs with shape `(batch, channels, ...)`.\n    When unspecified, uses\n    `image_data_format` value found in your Keras config file at\n     `~/.keras/keras.json` (if exists) else 'channels_last'.\n    Defaults to 'channels_last'.",
+        "description": "A string, one of `\"channels_last\"` (default) or\n        `\"channels_first\"`. The ordering of the dimensions in the inputs.\n        `\"channels_last\"` corresponds to inputs with shape\n        `(batch, ..., channels)` while `\"channels_first\"` corresponds to\n        inputs with shape `(batch, channels, ...)`.\n        When unspecified, uses `image_data_format` value found in your Keras\n        config file at `~/.keras/keras.json` (if exists). Defaults to\n        `\"channels_last\"`.",
         "name": "data_format"
       }
     ],
@@ -1942,10 +1997,7 @@
     ],
     "examples": [
       {
-        "code": ">>> model = tf.keras.Sequential()\n>>> model.add(tf.keras.layers.Conv2D(64, 3, 3, input_shape=(3, 32, 32)))\n>>> model.output_shape\n(None, 1, 10, 64)"
-      },
-      {
-        "code": ">>> model.add(Flatten())\n>>> model.output_shape\n(None, 640)"
+        "code": ">>> x = keras.Input(shape=(10, 64))\n>>> y = keras.layers.Flatten()(x)\n>>> y.shape\n(None, 640)"
       }
     ]
   },
@@ -1956,29 +2008,29 @@
     "description": "Global average pooling operation for temporal data.",
     "attributes": [
       {
-        "description": "A string,\n    one of `channels_last` (default) or `channels_first`.\n    The ordering of the dimensions in the inputs.\n    `channels_last` corresponds to inputs with shape\n    `(batch, steps, features)` while `channels_first`\n    corresponds to inputs with shape\n    `(batch, features, steps)`.",
+        "description": "string, either `\"channels_last\"` or `\"channels_first\"`.\n        The ordering of the dimensions in the inputs. `\"channels_last\"`\n        corresponds to inputs with shape `(batch, steps, features)`\n        while `\"channels_first\"` corresponds to inputs with shape\n        `(batch, features, steps)`. It defaults to the `image_data_format`\n        value found in your Keras config file at `~/.keras/keras.json`.\n        If you never set it, then it will be `\"channels_last\"`.",
         "name": "data_format"
       },
       {
         "name": "keepdims",
-        "description": "A boolean, whether to keep the temporal dimension or not.\n    If `keepdims` is `False` (default), the rank of the tensor is reduced\n    for spatial dimensions.\n    If `keepdims` is `True`, the temporal dimension are retained with\n    length 1.\n    The behavior is the same as for `tf.reduce_mean` or `np.mean`."
+        "description": "A boolean, whether to keep the temporal dimension or not.\n        If `keepdims` is `False` (default), the rank of the tensor is\n        reduced for spatial dimensions. If `keepdims` is `True`, the\n        temporal dimension are retained with length 1.\n        The behavior is the same as for `tf.reduce_mean` or `np.mean`."
       }
     ],
     "inputs": [
       {
-        "description": "- If `data_format='channels_last'`:\n  3D tensor with shape:\n  `(batch_size, steps, features)`\n- If `data_format='channels_first'`:\n  3D tensor with shape:\n  `(batch_size, features, steps)`",
+        "description": "- If `data_format='channels_last'`:\n    3D tensor with shape:\n    `(batch_size, steps, features)`\n- If `data_format='channels_first'`:\n    3D tensor with shape:\n    `(batch_size, features, steps)`",
         "name": "input"
       }
     ],
     "outputs": [
       {
-        "description": "- If `keepdims`=False:\n  2D tensor with shape `(batch_size, features)`.\n- If `keepdims`=True:\n  - If `data_format='channels_last'`:\n    3D tensor with shape `(batch_size, 1, features)`\n  - If `data_format='channels_first'`:\n    3D tensor with shape `(batch_size, features, 1)`",
+        "description": "- If `keepdims=False`:\n    2D tensor with shape `(batch_size, features)`.\n- If `keepdims=True`:\n    - If `data_format=\"channels_last\"`:\n        3D tensor with shape `(batch_size, 1, features)`\n    - If `data_format=\"channels_first\"`:\n        3D tensor with shape `(batch_size, features, 1)`",
         "name": "output"
       }
     ],
     "examples": [
       {
-        "code": ">>> input_shape = (2, 3, 4)\n>>> x = tf.random.normal(input_shape)\n>>> y = tf.keras.layers.GlobalAveragePooling1D()(x)\n>>> print(y.shape)\n(2, 4)"
+        "code": ">>> x = np.random.rand(2, 3, 4)\n>>> y = keras.layers.GlobalAveragePooling1D()(x)\n>>> y.shape\n(2, 4)"
       }
     ]
   },
@@ -1986,33 +2038,33 @@
     "name": "GlobalAveragePooling2D",
     "module": "tensorflow.keras.layers",
     "category": "Pool",
-    "description": "Global average pooling operation for spatial data.",
+    "description": "Global average pooling operation for 2D data.",
     "attributes": [
       {
         "default": "channels_last",
-        "description": "A string,\n      one of `channels_last` (default) or `channels_first`.\n      The ordering of the dimensions in the inputs.\n      `channels_last` corresponds to inputs with shape\n      `(batch, height, width, channels)` while `channels_first`\n      corresponds to inputs with shape\n      `(batch, channels, height, width)`.\n      When unspecified, uses `image_data_format` value found\n      in your Keras config file at `~/.keras/keras.json`\n      (if exists) else 'channels_last'. Defaults to 'channels_last'.",
+        "description": "string, either `\"channels_last\"` or `\"channels_first\"`.\n        The ordering of the dimensions in the inputs. `\"channels_last\"`\n        corresponds to inputs with shape `(batch, height, width, channels)`\n        while `\"channels_first\"` corresponds to inputs with shape\n        `(batch, features, height, weight)`. It defaults to the\n        `image_data_format` value found in your Keras config file at\n        `~/.keras/keras.json`. If you never set it, then it will be\n        `\"channels_last\"`.",
         "name": "data_format"
       },
       {
         "name": "keepdims",
-        "description": "A boolean, whether to keep the spatial dimensions or not.\n      If `keepdims` is `False` (default), the rank of the tensor is reduced\n      for spatial dimensions.\n      If `keepdims` is `True`, the spatial dimensions are retained with\n      length 1.\n      The behavior is the same as for `tf.reduce_mean` or `np.mean`."
+        "description": "A boolean, whether to keep the temporal dimension or not.\n        If `keepdims` is `False` (default), the rank of the tensor is\n        reduced for spatial dimensions. If `keepdims` is `True`, the\n        spatial dimension are retained with length 1.\n        The behavior is the same as for `tf.reduce_mean` or `np.mean`."
       }
     ],
     "inputs": [
       {
-        "description": "- If `data_format='channels_last'`:\n  4D tensor with shape `(batch_size, rows, cols, channels)`.\n- If `data_format='channels_first'`:\n  4D tensor with shape `(batch_size, channels, rows, cols)`.",
+        "description": "- If `data_format='channels_last'`:\n    4D tensor with shape:\n    `(batch_size, height, width, channels)`\n- If `data_format='channels_first'`:\n    4D tensor with shape:\n    `(batch_size, channels, height, width)`",
         "name": "input"
       }
     ],
     "outputs": [
       {
-        "description": "- If `keepdims`=False:\n  2D tensor with shape `(batch_size, channels)`.\n- If `keepdims`=True:\n  - If `data_format='channels_last'`:\n    4D tensor with shape `(batch_size, 1, 1, channels)`\n  - If `data_format='channels_first'`:\n    4D tensor with shape `(batch_size, channels, 1, 1)`",
+        "description": "- If `keepdims=False`:\n    2D tensor with shape `(batch_size, channels)`.\n- If `keepdims=True`:\n    - If `data_format=\"channels_last\"`:\n        4D tensor with shape `(batch_size, 1, 1, channels)`\n    - If `data_format=\"channels_first\"`:\n        4D tensor with shape `(batch_size, channels, 1, 1)`",
         "name": "output"
       }
     ],
     "examples": [
       {
-        "code": ">>> input_shape = (2, 4, 5, 3)\n>>> x = tf.random.normal(input_shape)\n>>> y = tf.keras.layers.GlobalAveragePooling2D()(x)\n>>> print(y.shape)\n(2, 3)"
+        "code": ">>> x = np.random.rand(2, 4, 5, 3)\n>>> y = keras.layers.GlobalAveragePooling2D()(x)\n>>> y.shape\n(2, 3)"
       }
     ]
   },
@@ -2020,61 +2072,66 @@
     "name": "GlobalMaxPooling1D",
     "module": "tensorflow.keras.layers",
     "category": "Pool",
-    "description": "Global max pooling operation for 1D temporal data.\n\nDownsamples the input representation by taking the maximum value over\nthe time dimension.\n\nFor example:\n\n```\n>>> x = tf.constant([[1., 2., 3.], [4., 5., 6.], [7., 8., 9.]])\n>>> x = tf.reshape(x, [3, 3, 1])\n>>> x\n<tf.Tensor: shape=(3, 3, 1), dtype=float32, numpy=\narray([[[1.], [2.], [3.]],\n       [[4.], [5.], [6.]],\n       [[7.], [8.], [9.]]], dtype=float32)>\n>>> max_pool_1d = tf.keras.layers.GlobalMaxPooling1D()\n>>> max_pool_1d(x)\n<tf.Tensor: shape=(3, 1), dtype=float32, numpy=\narray([[3.],\n       [6.],\n       [9.], dtype=float32)>\n```",
+    "description": "Global max pooling operation for temporal data.",
     "attributes": [
       {
-        "description": "A string,\n    one of `channels_last` (default) or `channels_first`.\n    The ordering of the dimensions in the inputs.\n    `channels_last` corresponds to inputs with shape\n    `(batch, steps, features)` while `channels_first`\n    corresponds to inputs with shape\n    `(batch, features, steps)`.",
+        "description": "string, either `\"channels_last\"` or `\"channels_first\"`.\n        The ordering of the dimensions in the inputs. `\"channels_last\"`\n        corresponds to inputs with shape `(batch, steps, features)`\n        while `\"channels_first\"` corresponds to inputs with shape\n        `(batch, features, steps)`. It defaults to the `image_data_format`\n        value found in your Keras config file at `~/.keras/keras.json`.\n        If you never set it, then it will be `\"channels_last\"`.",
         "name": "data_format"
       },
       {
         "name": "keepdims",
-        "description": "A boolean, whether to keep the temporal dimension or not.\n    If `keepdims` is `False` (default), the rank of the tensor is reduced\n    for spatial dimensions.\n    If `keepdims` is `True`, the temporal dimension are retained with\n    length 1.\n    The behavior is the same as for `tf.reduce_max` or `np.max`."
+        "description": "A boolean, whether to keep the temporal dimension or not.\n        If `keepdims` is `False` (default), the rank of the tensor is\n        reduced for spatial dimensions. If `keepdims` is `True`, the\n        temporal dimension are retained with length 1.\n        The behavior is the same as for `tf.reduce_mean` or `np.mean`."
       }
     ],
     "inputs": [
       {
-        "description": "- If `data_format='channels_last'`:\n  3D tensor with shape:\n  `(batch_size, steps, features)`\n- If `data_format='channels_first'`:\n  3D tensor with shape:\n  `(batch_size, features, steps)`",
+        "description": "- If `data_format='channels_last'`:\n    3D tensor with shape:\n    `(batch_size, steps, features)`\n- If `data_format='channels_first'`:\n    3D tensor with shape:\n    `(batch_size, features, steps)`",
         "name": "input"
       }
     ],
     "outputs": [
       {
-        "description": "- If `keepdims`=False:\n  2D tensor with shape `(batch_size, features)`.\n- If `keepdims`=True:\n  - If `data_format='channels_last'`:\n    3D tensor with shape `(batch_size, 1, features)`\n  - If `data_format='channels_first'`:\n    3D tensor with shape `(batch_size, features, 1)`",
+        "description": "- If `keepdims=False`:\n    2D tensor with shape `(batch_size, features)`.\n- If `keepdims=True`:\n    - If `data_format=\"channels_last\"`:\n        3D tensor with shape `(batch_size, 1, features)`\n    - If `data_format=\"channels_first\"`:\n        3D tensor with shape `(batch_size, features, 1)`",
         "name": "output"
       }
+    ],
+    "examples": [
+      {
+        "code": ">>> x = np.random.rand(2, 3, 4)\n>>> y = keras.layers.GlobalMaxPooling1D()(x)\n>>> y.shape\n(2, 4)"
+      }
     ]
   },
   {
     "name": "GlobalMaxPooling2D",
     "module": "tensorflow.keras.layers",
     "category": "Pool",
-    "description": "Global max pooling operation for spatial data.",
+    "description": "Global max pooling operation for 2D data.",
     "attributes": [
       {
         "default": "channels_last",
-        "description": "A string,\n    one of `channels_last` (default) or `channels_first`.\n    The ordering of the dimensions in the inputs.\n    `channels_last` corresponds to inputs with shape\n    `(batch, height, width, channels)` while `channels_first`\n    corresponds to inputs with shape\n    `(batch, channels, height, width)`.\n    When unspecified, uses\n    `image_data_format` value found in your Keras config file at\n     `~/.keras/keras.json` (if exists) else 'channels_last'.\n    Defaults to 'channels_last'.",
+        "description": "string, either `\"channels_last\"` or `\"channels_first\"`.\n        The ordering of the dimensions in the inputs. `\"channels_last\"`\n        corresponds to inputs with shape `(batch, height, width, channels)`\n        while `\"channels_first\"` corresponds to inputs with shape\n        `(batch, features, height, weight)`. It defaults to the\n        `image_data_format` value found in your Keras config file at\n        `~/.keras/keras.json`. If you never set it, then it will be\n        `\"channels_last\"`.",
         "name": "data_format"
       },
       {
         "name": "keepdims",
-        "description": "A boolean, whether to keep the spatial dimensions or not.\n    If `keepdims` is `False` (default), the rank of the tensor is reduced\n    for spatial dimensions.\n    If `keepdims` is `True`, the spatial dimensions are retained with\n    length 1.\n    The behavior is the same as for `tf.reduce_max` or `np.max`."
+        "description": "A boolean, whether to keep the temporal dimension or not.\n        If `keepdims` is `False` (default), the rank of the tensor is\n        reduced for spatial dimensions. If `keepdims` is `True`, the\n        spatial dimension are retained with length 1.\n        The behavior is the same as for `tf.reduce_mean` or `np.mean`."
       }
     ],
     "inputs": [
       {
-        "description": "- If `data_format='channels_last'`:\n  4D tensor with shape `(batch_size, rows, cols, channels)`.\n- If `data_format='channels_first'`:\n  4D tensor with shape `(batch_size, channels, rows, cols)`.",
+        "description": "- If `data_format='channels_last'`:\n    4D tensor with shape:\n    `(batch_size, height, width, channels)`\n- If `data_format='channels_first'`:\n    4D tensor with shape:\n    `(batch_size, channels, height, width)`",
         "name": "input"
       }
     ],
     "outputs": [
       {
-        "description": "- If `keepdims`=False:\n  2D tensor with shape `(batch_size, channels)`.\n- If `keepdims`=True:\n  - If `data_format='channels_last'`:\n    4D tensor with shape `(batch_size, 1, 1, channels)`\n  - If `data_format='channels_first'`:\n    4D tensor with shape `(batch_size, channels, 1, 1)`",
+        "description": "- If `keepdims=False`:\n    2D tensor with shape `(batch_size, channels)`.\n- If `keepdims=True`:\n    - If `data_format=\"channels_last\"`:\n        4D tensor with shape `(batch_size, 1, 1, channels)`\n    - If `data_format=\"channels_first\"`:\n        4D tensor with shape `(batch_size, channels, 1, 1)`",
         "name": "output"
       }
     ],
     "examples": [
       {
-        "code": ">>> input_shape = (2, 4, 5, 3)\n>>> x = tf.random.normal(input_shape)\n>>> y = tf.keras.layers.GlobalMaxPooling2D()(x)\n>>> print(y.shape)\n(2, 3)"
+        "code": ">>> x = np.random.rand(2, 4, 5, 3)\n>>> y = keras.layers.GlobalMaxPooling2D()(x)\n>>> y.shape\n(2, 3)"
       }
     ]
   },
@@ -2082,21 +2139,21 @@
     "name": "GRU",
     "module": "tensorflow.keras.layers",
     "category": "Layer",
-    "description": "Gated Recurrent Unit - Cho et al. 2014.\n\nSee [the Keras RNN API guide](https://www.tensorflow.org/guide/keras/rnn)\nfor details about the usage of RNN API.\n\nBased on available runtime hardware and constraints, this layer\nwill choose different implementations (cuDNN-based or pure-TensorFlow)\nto maximize the performance. If a GPU is available and all\nthe arguments to the layer meet the requirement of the cuDNN kernel\n(see below for details), the layer will use a fast cuDNN implementation.\n\nThe requirements to use the cuDNN implementation are:\n\n1. `activation` == `tanh`\n2. `recurrent_activation` == `sigmoid`\n3. `recurrent_dropout` == 0\n4. `unroll` is `False`\n5. `use_bias` is `True`\n6. `reset_after` is `True`\n7. Inputs, if use masking, are strictly right-padded.\n8. Eager execution is enabled in the outermost context.\n\nThere are two variants of the GRU implementation. The default one is based\non [v3](https://arxiv.org/abs/1406.1078v3) and has reset gate applied to\nhidden state before matrix multiplication. The other one is based on\n[original](https://arxiv.org/abs/1406.1078v1) and has the order reversed.\n\nThe second variant is compatible with CuDNNGRU (GPU-only) and allows\ninference on CPU. Thus it has separate biases for `kernel` and\n`recurrent_kernel`. To use this variant, set `reset_after=True` and\n`recurrent_activation='sigmoid'`.\n\nFor example:\n\n```\n>>> inputs = tf.random.normal([32, 10, 8])\n>>> gru = tf.keras.layers.GRU(4)\n>>> output = gru(inputs)\n>>> print(output.shape)\n(32, 4)\n>>> gru = tf.keras.layers.GRU(4, return_sequences=True, return_state=True)\n>>> whole_sequence_output, final_state = gru(inputs)\n>>> print(whole_sequence_output.shape)\n(32, 10, 4)\n>>> print(final_state.shape)\n(32, 4)\n```",
+    "description": "Gated Recurrent Unit - Cho et al. 2014.\n\nBased on available runtime hardware and constraints, this layer\nwill choose different implementations (cuDNN-based or backend-native)\nto maximize the performance. If a GPU is available and all\nthe arguments to the layer meet the requirement of the cuDNN kernel\n(see below for details), the layer will use a fast cuDNN implementation\nwhen using the TensorFlow backend.\n\nThe requirements to use the cuDNN implementation are:\n\n1. `activation` == `tanh`\n2. `recurrent_activation` == `sigmoid`\n3. `dropout` == 0 and `recurrent_dropout` == 0\n4. `unroll` is `False`\n5. `use_bias` is `True`\n6. `reset_after` is `True`\n7. Inputs, if use masking, are strictly right-padded.\n8. Eager execution is enabled in the outermost context.\n\nThere are two variants of the GRU implementation. The default one is based\non [v3](https://arxiv.org/abs/1406.1078v3) and has reset gate applied to\nhidden state before matrix multiplication. The other one is based on\n[original](https://arxiv.org/abs/1406.1078v1) and has the order reversed.\n\nThe second variant is compatible with CuDNNGRU (GPU-only) and allows\ninference on CPU. Thus it has separate biases for `kernel` and\n`recurrent_kernel`. To use this variant, set `reset_after=True` and\n`recurrent_activation='sigmoid'`.\n\nFor example:\n\n```\n>>> inputs = np.random.random((32, 10, 8))\n>>> gru = keras.layers.GRU(4)\n>>> output = gru(inputs)\n>>> output.shape\n(32, 4)\n>>> gru = keras.layers.GRU(4, return_sequences=True, return_state=True)\n>>> whole_sequence_output, final_state = gru(inputs)\n>>> whole_sequence_output.shape\n(32, 10, 4)\n>>> final_state.shape\n(32, 4)\n```",
     "attributes": [
       {
         "default": "tanh",
-        "description": "Activation function to use.",
+        "description": "Activation function to use.\n        Default: hyperbolic tangent (`tanh`).\n        If you pass `None`, no activation is applied\n        (ie. \"linear\" activation: `a(x) = x`).",
         "name": "activation"
       },
       {
         "default": "hard_sigmoid",
-        "description": "Activation function to use\n    for the recurrent step.",
+        "description": "Activation function to use\n        for the recurrent step.\n        Default: sigmoid (`sigmoid`).\n        If you pass `None`, no activation is applied\n        (ie. \"linear\" activation: `a(x) = x`).",
         "name": "recurrent_activation"
       },
       {
         "default": true,
-        "description": "Boolean, (default `True`), whether the layer uses a bias vector.",
+        "description": "Boolean, (default `True`), whether the layer\n        should use a bias vector.",
         "name": "use_bias",
         "visible": false
       },
@@ -2110,7 +2167,7 @@
             "seed": null
           }
         },
-        "description": "Initializer for the `kernel` weights matrix,\n    used for the linear transformation of the inputs. Default:\n    `glorot_uniform`.",
+        "description": "Initializer for the `kernel` weights matrix,\n        used for the linear transformation of the inputs. Default:\n        `\"glorot_uniform\"`.",
         "name": "kernel_initializer",
         "visible": false
       },
@@ -2122,7 +2179,7 @@
             "seed": null
           }
         },
-        "description": "Initializer for the `recurrent_kernel`\n     weights matrix, used for the linear transformation of the recurrent\n     state. Default: `orthogonal`.",
+        "description": "Initializer for the `recurrent_kernel`\n        weights matrix, used for the linear transformation of the recurrent\n        state. Default: `\"orthogonal\"`.",
         "name": "recurrent_initializer",
         "visible": false
       },
@@ -2131,13 +2188,13 @@
           "class_name": "Zeros",
           "config": {}
         },
-        "description": "Initializer for the bias vector. Default: `zeros`.",
+        "description": "Initializer for the bias vector. Default: `\"zeros\"`.",
         "name": "bias_initializer",
         "visible": false
       },
       {
         "default": 0,
-        "description": "Float between 0 and 1. Fraction of the units to drop for the\n    linear transformation of the inputs. Default: 0.",
+        "description": "Float between 0 and 1. Fraction of the units to drop for the\n        linear transformation of the inputs. Default: 0.",
         "name": "dropout"
       },
       {
@@ -2147,27 +2204,27 @@
       },
       {
         "default": false,
-        "description": "Boolean. Whether to return the last output\n    in the output sequence, or the full sequence. Default: `False`.",
+        "description": "Boolean. Whether to return the last output\n        in the output sequence, or the full sequence. Default: `False`.",
         "name": "return_sequences"
       },
       {
         "default": false,
-        "description": "Boolean. Whether to return the last state in addition to the\n    output. Default: `False`.",
+        "description": "Boolean. Whether to return the last state in addition\n        to the output. Default: `False`.",
         "name": "return_state"
       },
       {
         "default": false,
-        "description": "Boolean (default `False`).\n    If True, process the input sequence backwards and return the\n    reversed sequence.",
+        "description": "Boolean (default `False`).\n        If `True`, process the input sequence backwards and return the\n        reversed sequence.",
         "name": "go_backwards"
       },
       {
         "default": false,
-        "description": "Boolean (default False). If True, the last state\n    for each sample at index i in a batch will be used as initial\n    state for the sample of index i in the following batch.",
+        "description": "Boolean (default: `False`). If `True`, the last state\n        for each sample at index i in a batch will be used as initial\n        state for the sample of index i in the following batch.",
         "name": "stateful"
       },
       {
         "default": false,
-        "description": "Boolean (default False).\n    If True, the network will be unrolled,\n    else a symbolic loop will be used.\n    Unrolling can speed-up a RNN,\n    although it tends to be more memory-intensive.\n    Unrolling is only suitable for short sequences.",
+        "description": "Boolean (default: `False`).\n        If `True`, the network will be unrolled,\n        else a symbolic loop will be used.\n        Unrolling can speed-up a RNN,\n        although it tends to be more memory-intensive.\n        Unrolling is only suitable for short sequences.",
         "name": "unroll"
       },
       {
@@ -2175,39 +2232,39 @@
         "name": "units"
       },
       {
-        "description": "Regularizer function applied to the `kernel` weights\n    matrix. Default: `None`.",
+        "description": "Regularizer function applied to the `kernel` weights\n        matrix. Default: `None`.",
         "name": "kernel_regularizer",
         "visible": false
       },
       {
-        "description": "Regularizer function applied to the\n    `recurrent_kernel` weights matrix. Default: `None`.",
+        "description": "Regularizer function applied to the\n        `recurrent_kernel` weights matrix. Default: `None`.",
         "name": "recurrent_regularizer",
         "visible": false
       },
       {
-        "description": "Regularizer function applied to the bias vector.",
+        "description": "Regularizer function applied to the bias vector.\n        Default: `None`.",
         "name": "bias_regularizer",
         "visible": false
       },
       {
-        "description": "Regularizer function applied to the output of the\n    layer (its \"activation\"). Default: `None`.",
+        "description": "Regularizer function applied to the output of the\n        layer (its \"activation\"). Default: `None`.",
         "name": "activity_regularizer",
         "visible": false
       },
       {
-        "description": "Constraint function applied to the `kernel` weights\n    matrix. Default: `None`.",
+        "description": "Constraint function applied to the `kernel` weights\n        matrix. Default: `None`.",
         "name": "kernel_constraint"
       },
       {
-        "description": "Constraint function applied to the\n    `recurrent_kernel` weights matrix. Default: `None`.",
+        "description": "Constraint function applied to the\n        `recurrent_kernel` weights matrix. Default: `None`.",
         "name": "recurrent_constraint"
       },
       {
-        "description": "Constraint function applied to the bias vector. Default:\n    `None`.",
+        "description": "Constraint function applied to the bias vector.\n        Default: `None`.",
         "name": "bias_constraint"
       },
       {
-        "description": "Float between 0 and 1. Fraction of the units to drop\n    for the linear transformation of the recurrent state. Default: 0.",
+        "description": "Float between 0 and 1. Fraction of the units to drop\n        for the linear transformation of the recurrent state. Default: 0.",
         "name": "recurrent_dropout"
       },
       {
@@ -2215,12 +2272,16 @@
         "name": "Default"
       },
       {
-        "description": "GRU convention (whether to apply reset gate after or\n    before matrix multiplication). False = \"before\",\n    True = \"after\" (default and cuDNN compatible).",
+        "description": "GRU convention (whether to apply reset gate after or\n        before matrix multiplication). `False` is `\"before\"`,\n        `True` is `\"after\"` (default and cuDNN compatible).",
         "name": "reset_after"
       },
       {
         "description": "The shape format of the `inputs` and `outputs` tensors.\n    If True, the inputs and outputs will be in shape\n    `[timesteps, batch, feature]`, whereas in the False case, it will be\n    `[batch, timesteps, feature]`. Using `time_major = True` is a bit more\n    efficient because it avoids transposes at the beginning and end of the\n    RNN calculation. However, most TensorFlow data is batch-major, so by\n    default this function accepts input and emits output in batch-major\n    form.",
         "name": "time_major"
+      },
+      {
+        "name": "seed",
+        "description": "Random seed for dropout."
       }
     ],
     "inputs": [
@@ -2260,76 +2321,76 @@
   {
     "name": "GRUCell",
     "module": "tensorflow.keras.layers",
-    "description": "Cell class for the GRU layer.\n\nSee [the Keras RNN API guide](https://www.tensorflow.org/guide/keras/rnn)\nfor details about the usage of RNN API.\n\nThis class processes one step within the whole time sequence input, whereas\n`tf.keras.layer.GRU` processes the whole sequence.\n\nFor example:\n\n```\n>>> inputs = tf.random.normal([32, 10, 8])\n>>> rnn = tf.keras.layers.RNN(tf.keras.layers.GRUCell(4))\n>>> output = rnn(inputs)\n>>> print(output.shape)\n(32, 4)\n>>> rnn = tf.keras.layers.RNN(\n...    tf.keras.layers.GRUCell(4),\n...    return_sequences=True,\n...    return_state=True)\n>>> whole_sequence_output, final_state = rnn(inputs)\n>>> print(whole_sequence_output.shape)\n(32, 10, 4)\n>>> print(final_state.shape)\n(32, 4)\n```",
+    "description": "Cell class for the GRU layer.\n\nThis class processes one step within the whole time sequence input, whereas\n`keras.layer.GRU` processes the whole sequence.",
     "attributes": [
       {
         "description": "Positive integer, dimensionality of the output space.",
         "name": "units"
       },
       {
-        "description": "Activation function to use. Default: hyperbolic tangent\n    (`tanh`). If you pass None, no activation is applied\n    (ie. \"linear\" activation: `a(x) = x`).",
+        "description": "Activation function to use. Default: hyperbolic tangent\n        (`tanh`). If you pass None, no activation is applied\n        (ie. \"linear\" activation: `a(x) = x`).",
         "name": "activation"
       },
       {
-        "description": "Activation function to use for the recurrent step.",
+        "description": "Activation function to use for the recurrent step.\n        Default: sigmoid (`sigmoid`). If you pass `None`, no activation is\n        applied (ie. \"linear\" activation: `a(x) = x`).",
         "name": "recurrent_activation"
       },
       {
         "default": true,
-        "description": "Boolean, (default `True`), whether the layer uses a bias vector.",
+        "description": "Boolean, (default `True`), whether the layer\n        should use a bias vector.",
         "name": "use_bias",
         "visible": false
       },
       {
-        "description": "Initializer for the `kernel` weights matrix,\n    used for the linear transformation of the inputs. Default:\n    `glorot_uniform`.",
+        "description": "Initializer for the `kernel` weights matrix,\n        used for the linear transformation of the inputs. Default:\n        `\"glorot_uniform\"`.",
         "name": "kernel_initializer",
         "visible": false
       },
       {
-        "description": "Initializer for the `recurrent_kernel`\n    weights matrix, used for the linear transformation of the recurrent\n    state.  Default: `orthogonal`.",
+        "description": "Initializer for the `recurrent_kernel`\n        weights matrix, used for the linear transformation\n        of the recurrent state. Default: `\"orthogonal\"`.",
         "name": "recurrent_initializer",
         "visible": false
       },
       {
-        "description": "Initializer for the bias vector. Default: `zeros`.",
+        "description": "Initializer for the bias vector. Default: `\"zeros\"`.",
         "name": "bias_initializer",
         "visible": false
       },
       {
-        "description": "Regularizer function applied to the `kernel` weights\n    matrix. Default: `None`.",
+        "description": "Regularizer function applied to the `kernel` weights\n        matrix. Default: `None`.",
         "name": "kernel_regularizer",
         "visible": false
       },
       {
-        "description": "Regularizer function applied to the\n    `recurrent_kernel` weights matrix. Default: `None`.",
+        "description": "Regularizer function applied to the\n        `recurrent_kernel` weights matrix. Default: `None`.",
         "name": "recurrent_regularizer",
         "visible": false
       },
       {
-        "description": "Regularizer function applied to the bias vector.",
+        "description": "Regularizer function applied to the bias vector.\n        Default: `None`.",
         "name": "bias_regularizer",
         "visible": false
       },
       {
-        "description": "Constraint function applied to the `kernel` weights\n    matrix. Default: `None`.",
+        "description": "Constraint function applied to the `kernel` weights\n        matrix. Default: `None`.",
         "name": "kernel_constraint"
       },
       {
-        "description": "Constraint function applied to the\n    `recurrent_kernel` weights matrix. Default: `None`.",
+        "description": "Constraint function applied to the\n        `recurrent_kernel` weights matrix. Default: `None`.",
         "name": "recurrent_constraint"
       },
       {
-        "description": "Constraint function applied to the bias vector. Default:\n    `None`.",
+        "description": "Constraint function applied to the bias vector.\n        Default: `None`.",
         "name": "bias_constraint"
       },
       {
         "default": 0,
-        "description": "Float between 0 and 1. Fraction of the units to drop for the\n    linear transformation of the inputs. Default: 0.",
+        "description": "Float between 0 and 1. Fraction of the units to drop for the\n        linear transformation of the inputs. Default: 0.",
         "name": "dropout"
       },
       {
         "default": 0,
-        "description": "Float between 0 and 1. Fraction of the units to drop\n    for the linear transformation of the recurrent state. Default: 0.",
+        "description": "Float between 0 and 1. Fraction of the units to drop\n        for the linear transformation of the recurrent state. Default: 0.",
         "name": "recurrent_dropout"
       },
       {
@@ -2341,8 +2402,17 @@
         "name": "Default"
       },
       {
-        "description": "GRU convention (whether to apply reset gate after or\n    before matrix multiplication). False = \"before\",\n    True = \"after\" (default and cuDNN compatible).",
+        "description": "GRU convention (whether to apply reset gate after or\n        before matrix multiplication). False = \"before\",\n        True = \"after\" (default and cuDNN compatible).",
         "name": "reset_after"
+      },
+      {
+        "name": "seed",
+        "description": "Random seed for dropout."
+      }
+    ],
+    "examples": [
+      {
+        "code": ">>> inputs = np.random.random((32, 10, 8))\n>>> rnn = keras.layers.RNN(keras.layers.GRUCell(4))\n>>> output = rnn(inputs)\n>>> output.shape\n(32, 4)\n>>> rnn = keras.layers.RNN(\n...    keras.layers.GRUCell(4),\n...    return_sequences=True,\n...    return_state=True)\n>>> whole_sequence_output, final_state = rnn(inputs)\n>>> whole_sequence_output.shape\n(32, 10, 4)\n>>> final_state.shape\n(32, 4)"
       }
     ]
   },
@@ -2399,14 +2469,14 @@
     "name": "InputSpec",
     "module": "tensorflow.keras.layers",
     "category": "Data",
-    "description": "Specifies the rank, dtype and shape of every input to a layer.\n\nLayers can expose (if appropriate) an `input_spec` attribute:\nan instance of `InputSpec`, or a nested structure of `InputSpec` instances\n(one per input tensor). These objects enable the layer to run input\ncompatibility checks for input structure, input rank, input shape, and\ninput dtype.\n\nA None entry in a shape is compatible with any dimension,\na None shape is compatible with any shape.",
+    "description": "Specifies the rank, dtype and shape of every input to a layer.\n\nLayers can expose (if appropriate) an `input_spec` attribute:\nan instance of `InputSpec`, or a nested structure of `InputSpec` instances\n(one per input tensor). These objects enable the layer to run input\ncompatibility checks for input structure, input rank, input shape, and\ninput dtype for the first argument of `Layer.__call__`.\n\nA `None` entry in a shape is compatible with any dimension.",
     "attributes": [
       {
         "description": "Expected DataType of the input.",
         "name": "dtype"
       },
       {
-        "description": "Shape tuple, expected shape of the input\n    (may include None for unchecked axes). Includes the batch size.",
+        "description": "Shape tuple, expected shape of the input\n        (may include None for unchecked axes). Includes the batch size.",
         "name": "shape"
       },
       {
@@ -2422,43 +2492,43 @@
         "name": "min_ndim"
       },
       {
-        "description": "Dictionary mapping integer axes to\n    a specific dimension value.",
+        "description": "Dictionary mapping integer axes to\n        a specific dimension value.",
         "name": "axes"
       },
       {
-        "description": "If True, then allow inputs of rank N+1 as long\n    as the last axis of the input is 1, as well as inputs of rank N-1\n    as long as the last axis of the spec is 1.",
+        "description": "If True, then allow inputs of rank N+1 as long\n        as the last axis of the input is 1, as well as inputs of rank N-1\n        as long as the last axis of the spec is 1.",
         "name": "allow_last_axis_squeeze"
       },
       {
-        "description": "Expected key corresponding to this input when passing data as\n    a dictionary.",
+        "description": "Expected key corresponding to this input when passing data as\n        a dictionary.",
         "name": "name"
       }
     ],
     "examples": [
       {
-        "code": "class MyLayer(Layer):\n    def __init__(self):\n        super(MyLayer, self).__init__()\n        # The layer will accept inputs with\n        # shape (?, 28, 28) & (?, 28, 28, 1)\n        # and raise an appropriate error message otherwise.\n        self.input_spec = InputSpec(\n            shape=(None, 28, 28, 1),\n            allow_last_axis_squeeze=True)"
+        "code": "class MyLayer(Layer):\n    def __init__(self):\n        super(MyLayer, self).__init__()\n        # The layer will accept inputs with\n        # shape (*, 28, 28) & (*, 28, 28, 1)\n        # and raise an appropriate error message otherwise.\n        self.input_spec = InputSpec(\n            shape=(None, 28, 28, 1),\n            allow_last_axis_squeeze=True)"
       }
     ]
   },
   {
     "name": "Lambda",
     "module": "tensorflow.keras.layers",
-    "description": "Wraps arbitrary expressions as a `Layer` object.\n\nThe `Lambda` layer exists so that arbitrary expressions can be used\nas a `Layer` when constructing Sequential\nand Functional API models. `Lambda` layers are best suited for simple\noperations or quick experimentation. For more advanced use cases, follow\n[this guide](\nhttps://www.tensorflow.org/guide/keras/custom_layers_and_models)\nfor subclassing `tf.keras.layers.Layer`.\n\nWARNING: `tf.keras.layers.Lambda` layers have (de)serialization limitations!\n\nThe main reason to subclass `tf.keras.layers.Layer` instead of using a\n`Lambda` layer is saving and inspecting a Model. `Lambda` layers\nare saved by serializing the Python bytecode, which is fundamentally\nnon-portable. They should only be loaded in the same environment where\nthey were saved. Subclassed layers can be saved in a more portable way\nby overriding their `get_config()` method. Models that rely on\nsubclassed Layers are also often easier to visualize and reason about.",
+    "description": "Wraps arbitrary expressions as a `Layer` object.\n\nThe `Lambda` layer exists so that arbitrary expressions can be used\nas a `Layer` when constructing Sequential\nand Functional API models. `Lambda` layers are best suited for simple\noperations or quick experimentation. For more advanced use cases,\nprefer writing new subclasses of `Layer`.\n\nWARNING: `Lambda` layers have (de)serialization limitations!\n\nThe main reason to subclass `Layer` instead of using a\n`Lambda` layer is saving and inspecting a model. `Lambda` layers\nare saved by serializing the Python bytecode, which is fundamentally\nnon-portable and potentially unsafe.\nThey should only be loaded in the same environment where\nthey were saved. Subclassed layers can be saved in a more portable way\nby overriding their `get_config()` method. Models that rely on\nsubclassed Layers are also often easier to visualize and reason about.",
     "attributes": [
       {
-        "description": "The function to be evaluated. Takes input tensor as first\n    argument.",
+        "description": "The function to be evaluated. Takes input tensor as first\n        argument.",
         "name": "function"
       },
       {
-        "description": "Expected output shape from function. This argument can be\n    inferred if not explicitly provided. Can be a tuple or function. If a\n    tuple, it only specifies the first dimension onward;\n    sample dimension is assumed either the same as the input:\n    `output_shape = (input_shape[0], ) + output_shape` or, the input is\n    `None` and the sample dimension is also `None`:\n    `output_shape = (None, ) + output_shape` If a function, it specifies the\n    entire shape as a function of the input shape:\n    `output_shape = f(input_shape)`",
+        "description": "Expected output shape from function. This argument\n        can usually be inferred if not explicitly provided.\n        Can be a tuple or function. If a tuple, it only specifies\n        the first dimension onward; sample dimension is assumed\n        either the same as the input:\n        `output_shape = (input_shape[0], ) + output_shape` or,\n        the input is `None` and the sample dimension is also `None`:\n        `output_shape = (None, ) + output_shape`.\n        If a function, it specifies the\n        entire shape as a function of the input shape:\n        `output_shape = f(input_shape)`.",
         "name": "output_shape"
       },
       {
-        "description": "Optional dictionary of keyword arguments to be passed to the\n    function.\n\nInput shape: Arbitrary. Use the keyword argument input_shape (tuple of\n  integers, does not include the samples axis) when using this layer as the\n  first layer in a model.\n\nOutput shape: Specified by `output_shape` argument",
+        "description": "Optional dictionary of keyword arguments to be passed to the\n        function.",
         "name": "arguments"
       },
       {
-        "description": "Either None (indicating no masking) or a callable with the same\n    signature as the `compute_mask` layer method, or a tensor that will be\n    returned as output mask regardless of what the input is.",
+        "description": "Either None (indicating no masking) or a callable with the same\n        signature as the `compute_mask` layer method, or a tensor\n        that will be returned as output mask regardless\n        of what the input is.",
         "name": "mask"
       }
     ],
@@ -2478,17 +2548,6 @@
     "examples": [
       {
         "code": "# add a x -> x^2 layer\nmodel.add(Lambda(lambda x: x ** 2))"
-      },
-      {
-        "code": "# add a layer that returns the concatenation\n# of the positive part of the input and\n# the opposite of the negative part\n\ndef antirectifier(x):\n    x -= K.mean(x, axis=1, keepdims=True)\n    x = K.l2_normalize(x, axis=1)\n    pos = K.relu(x)\n    neg = K.relu(-x)\n    return K.concatenate([pos, neg], axis=1)\n\nmodel.add(Lambda(antirectifier))"
-      },
-      {
-        "summary": "**Note on Variables:**\nWhile it is possible to use Variables with Lambda layers,\nthis practice is discouraged as it can easily lead to bugs.\nFor instance, consider the following layer:",
-        "code": "scale = tf.Variable(1.)\nscale_layer = tf.keras.layers.Lambda(lambda x: x * scale)"
-      },
-      {
-        "summary": "Because `scale_layer` does not directly track the `scale` variable, it will\nnot appear in `scale_layer.trainable_weights` and will therefore not be\ntrained if `scale_layer` is used in a Model.\nA better pattern is to write a subclassed Layer:",
-        "code": "class ScaleLayer(tf.keras.layers.Layer):\n    def __init__(self, **kwargs):\n        super().__init__(**kwargs)\n        self.scale = tf.Variable(1.)\n\n    def call(self, inputs):\n        return inputs * self.scale"
       }
     ]
   },
@@ -2496,11 +2555,19 @@
     "name": "LeakyReLU",
     "module": "tensorflow.keras.layers",
     "category": "Activation",
-    "description": "Leaky version of a Rectified Linear Unit.\n\nIt allows a small gradient when the unit is not active:\n\n```\n    f(x) = alpha * x if x < 0\n    f(x) = x if x >= 0\n```",
+    "description": "Leaky version of a Rectified Linear Unit activation layer.\n\nThis layer allows a small gradient when the unit is not active.\n\nFormula:\n\n``` python\nf(x) = alpha * x if x < 0\nf(x) = x if x >= 0\n```",
     "attributes": [
       {
         "description": "Float >= `0.`. Negative slope coefficient. Defaults to `0.3`.",
         "name": "alpha"
+      },
+      {
+        "name": "negative_slope",
+        "description": "Float >= 0.0. Negative slope coefficient.\n      Defaults to `0.3`."
+      },
+      {
+        "name": "**kwargs",
+        "description": "Base layer keyword arguments, such as\n        `name` and `dtype`."
       }
     ],
     "inputs": [
@@ -2522,7 +2589,7 @@
     ],
     "examples": [
       {
-        "code": ">>> layer = tf.keras.layers.LeakyReLU()\n>>> output = layer([-3.0, -1.0, 0.0, 2.0])\n>>> list(output.numpy())\n[-0.9, -0.3, 0.0, 2.0]\n>>> layer = tf.keras.layers.LeakyReLU(alpha=0.1)\n>>> output = layer([-3.0, -1.0, 0.0, 2.0])\n>>> list(output.numpy())\n[-0.3, -0.1, 0.0, 2.0]"
+        "code": "leaky_relu_layer = LeakyReLU(negative_slope=0.5)\ninput = np.array([-10, -5, 0.0, 5, 10])\nresult = leaky_relu_layer(input)\n# result = [-5. , -2.5,  0. ,  5. , 10.]"
       }
     ]
   },
@@ -2712,7 +2779,7 @@
     "name": "LSTM",
     "module": "tensorflow.keras.layers",
     "category": "Layer",
-    "description": "Long Short-Term Memory layer - Hochreiter 1997.\n\nSee [the Keras RNN API guide](https://www.tensorflow.org/guide/keras/rnn)\nfor details about the usage of RNN API.\n\nBased on available runtime hardware and constraints, this layer\nwill choose different implementations (cuDNN-based or pure-TensorFlow)\nto maximize the performance. If a GPU is available and all\nthe arguments to the layer meet the requirement of the cuDNN kernel\n(see below for details), the layer will use a fast cuDNN implementation.\n\nThe requirements to use the cuDNN implementation are:\n\n1. `activation` == `tanh`\n2. `recurrent_activation` == `sigmoid`\n3. `recurrent_dropout` == 0\n4. `unroll` is `False`\n5. `use_bias` is `True`\n6. Inputs, if use masking, are strictly right-padded.\n7. Eager execution is enabled in the outermost context.\n\nFor example:\n\n```\n>>> inputs = tf.random.normal([32, 10, 8])\n>>> lstm = tf.keras.layers.LSTM(4)\n>>> output = lstm(inputs)\n>>> print(output.shape)\n(32, 4)\n>>> lstm = tf.keras.layers.LSTM(4, return_sequences=True, return_state=True)\n>>> whole_seq_output, final_memory_state, final_carry_state = lstm(inputs)\n>>> print(whole_seq_output.shape)\n(32, 10, 4)\n>>> print(final_memory_state.shape)\n(32, 4)\n>>> print(final_carry_state.shape)\n(32, 4)\n```",
+    "description": "Long Short-Term Memory layer - Hochreiter 1997.\n\nBased on available runtime hardware and constraints, this layer\nwill choose different implementations (cuDNN-based or backend-native)\nto maximize the performance. If a GPU is available and all\nthe arguments to the layer meet the requirement of the cuDNN kernel\n(see below for details), the layer will use a fast cuDNN implementation\nwhen using the TensorFlow backend.\nThe requirements to use the cuDNN implementation are:\n\n1. `activation` == `tanh`\n2. `recurrent_activation` == `sigmoid`\n3. `dropout` == 0 and `recurrent_dropout` == 0\n4. `unroll` is `False`\n5. `use_bias` is `True`\n6. Inputs, if use masking, are strictly right-padded.\n7. Eager execution is enabled in the outermost context.\n\nFor example:\n\n```\n>>> inputs = np.random.random((32, 10, 8))\n>>> lstm = keras.layers.LSTM(4)\n>>> output = lstm(inputs)\n>>> output.shape\n(32, 4)\n>>> lstm = keras.layers.LSTM(\n...     4, return_sequences=True, return_state=True)\n>>> whole_seq_output, final_memory_state, final_carry_state = lstm(inputs)\n>>> whole_seq_output.shape\n(32, 10, 4)\n>>> final_memory_state.shape\n(32, 4)\n>>> final_carry_state.shape\n(32, 4)\n```",
     "attributes": [
       {
         "description": "Positive integer, dimensionality of the output space.",
@@ -2720,82 +2787,82 @@
       },
       {
         "default": "tanh",
-        "description": "Activation function to use.",
+        "description": "Activation function to use.\n        Default: hyperbolic tangent (`tanh`).\n        If you pass `None`, no activation is applied\n        (ie. \"linear\" activation: `a(x) = x`).",
         "name": "activation"
       },
       {
         "default": "hard_sigmoid",
-        "description": "Activation function to use for the recurrent step.",
+        "description": "Activation function to use\n        for the recurrent step.\n        Default: sigmoid (`sigmoid`).\n        If you pass `None`, no activation is applied\n        (ie. \"linear\" activation: `a(x) = x`).",
         "name": "recurrent_activation"
       },
       {
-        "description": "Boolean (default `True`), whether the layer uses a bias vector.",
+        "description": "Boolean, (default `True`), whether the layer\n        should use a bias vector.",
         "name": "use_bias",
         "visible": false
       },
       {
-        "description": "Initializer for the `kernel` weights matrix, used for\n    the linear transformation of the inputs. Default: `glorot_uniform`.",
+        "description": "Initializer for the `kernel` weights matrix,\n        used for the linear transformation of the inputs. Default:\n        `\"glorot_uniform\"`.",
         "name": "kernel_initializer",
         "visible": false
       },
       {
-        "description": "Initializer for the `recurrent_kernel` weights\n    matrix, used for the linear transformation of the recurrent state.",
+        "description": "Initializer for the `recurrent_kernel`\n        weights matrix, used for the linear transformation of the recurrent\n        state. Default: `\"orthogonal\"`.",
         "name": "recurrent_initializer",
         "visible": false
       },
       {
-        "description": "Initializer for the bias vector. Default: `zeros`.",
+        "description": "Initializer for the bias vector. Default: `\"zeros\"`.",
         "name": "bias_initializer",
         "visible": false
       },
       {
         "default": true,
-        "description": "Boolean (default `True`). If True, add 1 to the bias of\n    the forget gate at initialization. Setting it to true will also force\n    `bias_initializer=\"zeros\"`. This is recommended in [Jozefowicz et\n        al.](http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf).",
+        "description": "Boolean (default `True`). If `True`,\n        add 1 to the bias of the forget gate at initialization.\n        Setting it to `True` will also force `bias_initializer=\"zeros\"`.\n        This is recommended in [Jozefowicz et al.](\n        https://github.com/mlresearch/v37/blob/gh-pages/jozefowicz15.pdf)",
         "name": "unit_forget_bias"
       },
       {
-        "description": "Regularizer function applied to the `kernel` weights\n    matrix. Default: `None`.",
+        "description": "Regularizer function applied to the `kernel` weights\n        matrix. Default: `None`.",
         "name": "kernel_regularizer",
         "visible": false
       },
       {
-        "description": "Regularizer function applied to the\n    `recurrent_kernel` weights matrix. Default: `None`.",
+        "description": "Regularizer function applied to the\n        `recurrent_kernel` weights matrix. Default: `None`.",
         "name": "recurrent_regularizer",
         "visible": false
       },
       {
-        "description": "Regularizer function applied to the bias vector.",
+        "description": "Regularizer function applied to the bias vector.\n        Default: `None`.",
         "name": "bias_regularizer",
         "visible": false
       },
       {
-        "description": "Regularizer function applied to the output of the\n    layer (its \"activation\"). Default: `None`.",
+        "description": "Regularizer function applied to the output of the\n        layer (its \"activation\"). Default: `None`.",
         "name": "activity_regularizer",
         "visible": false
       },
       {
-        "description": "Constraint function applied to the `kernel` weights\n    matrix. Default: `None`.",
+        "description": "Constraint function applied to the `kernel` weights\n        matrix. Default: `None`.",
         "name": "kernel_constraint",
         "visible": false
       },
       {
-        "description": "Constraint function applied to the\n    `recurrent_kernel` weights matrix. Default: `None`.",
+        "description": "Constraint function applied to the\n        `recurrent_kernel` weights matrix. Default: `None`.",
         "name": "recurrent_constraint",
         "visible": false
       },
       {
-        "description": "Constraint function applied to the bias vector. Default:\n    `None`.",
+        "description": "Constraint function applied to the bias vector.\n        Default: `None`.",
         "name": "bias_constraint",
         "visible": false
       },
       {
         "default": 0,
-        "description": "Float between 0 and 1. Fraction of the units to drop for the\n    linear transformation of the inputs. Default: 0.",
+        "description": "Float between 0 and 1. Fraction of the units to drop for the\n        linear transformation of the inputs. Default: 0.",
         "name": "dropout"
       },
       {
         "default": 0,
-        "description": "Float between 0 and 1. Fraction of the units to drop\n    for the linear transformation of the recurrent state. Default: 0.",
+        "description": "Float between 0 and 1. Fraction of the units to drop\n        for the linear transformation of the recurrent state. Default: 0.",
         "name": "recurrent_dropout"
       },
       {
@@ -2805,27 +2872,27 @@
       },
       {
         "default": false,
-        "description": "Boolean. Whether to return the last output in the output\n    sequence, or the full sequence. Default: `False`.",
+        "description": "Boolean. Whether to return the last output\n        in the output sequence, or the full sequence. Default: `False`.",
         "name": "return_sequences"
       },
       {
         "default": false,
-        "description": "Boolean. Whether to return the last state in addition to the\n    output. Default: `False`.",
+        "description": "Boolean. Whether to return the last state in addition\n        to the output. Default: `False`.",
         "name": "return_state"
       },
       {
         "default": false,
-        "description": "Boolean (default `False`). If True, process the input\n    sequence backwards and return the reversed sequence.",
+        "description": "Boolean (default: `False`).\n        If `True`, process the input sequence backwards and return the\n        reversed sequence.",
         "name": "go_backwards"
       },
       {
         "default": false,
-        "description": "Boolean (default `False`). If True, the last state for each\n  sample at index i in a batch will be used as initial state for the sample\n    of index i in the following batch.",
+        "description": "Boolean (default: `False`). If `True`, the last state\n        for each sample at index i in a batch will be used as initial\n        state for the sample of index i in the following batch.",
         "name": "stateful"
       },
       {
         "default": false,
-        "description": "Boolean (default `False`). If True, the network will be unrolled,\n    else a symbolic loop will be used. Unrolling can speed-up a RNN,\n    although it tends to be more memory-intensive. Unrolling is only\n    suitable for short sequences.",
+        "description": "Boolean (default False).\n        If `True`, the network will be unrolled,\n        else a symbolic loop will be used.\n        Unrolling can speed-up a RNN,\n        although it tends to be more memory-intensive.\n        Unrolling is only suitable for short sequences.",
         "name": "unroll"
       },
       {
@@ -2835,6 +2902,10 @@
       {
         "description": "The shape format of the `inputs` and `outputs` tensors.\n    If True, the inputs and outputs will be in shape\n    `[timesteps, batch, feature]`, whereas in the False case, it will be\n    `[batch, timesteps, feature]`. Using `time_major = True` is a bit more\n    efficient because it avoids transposes at the beginning and end of the\n    RNN calculation. However, most TensorFlow data is batch-major, so by\n    default this function accepts input and emits output in batch-major\n    form.",
         "name": "time_major"
+      },
+      {
+        "name": "seed",
+        "description": "Random seed for dropout."
       }
     ],
     "inputs": [
@@ -2874,73 +2945,73 @@
   {
     "name": "LSTMCell",
     "module": "tensorflow.keras.layers",
-    "description": "Cell class for the LSTM layer.\n\nSee [the Keras RNN API guide](https://www.tensorflow.org/guide/keras/rnn)\nfor details about the usage of RNN API.\n\nThis class processes one step within the whole time sequence input, whereas\n`tf.keras.layer.LSTM` processes the whole sequence.\n\nFor example:\n\n```\n>>> inputs = tf.random.normal([32, 10, 8])\n>>> rnn = tf.keras.layers.RNN(tf.keras.layers.LSTMCell(4))\n>>> output = rnn(inputs)\n>>> print(output.shape)\n(32, 4)\n>>> rnn = tf.keras.layers.RNN(\n...    tf.keras.layers.LSTMCell(4),\n...    return_sequences=True,\n...    return_state=True)\n>>> whole_seq_output, final_memory_state, final_carry_state = rnn(inputs)\n>>> print(whole_seq_output.shape)\n(32, 10, 4)\n>>> print(final_memory_state.shape)\n(32, 4)\n>>> print(final_carry_state.shape)\n(32, 4)\n```",
+    "description": "Cell class for the LSTM layer.\n\nThis class processes one step within the whole time sequence input, whereas\n`keras.layer.LSTM` processes the whole sequence.",
     "attributes": [
       {
         "description": "Positive integer, dimensionality of the output space.",
         "name": "units"
       },
       {
-        "description": "`a(x) = x`).",
+        "description": "Activation function to use. Default: hyperbolic tangent\n        (`tanh`). If you pass None, no activation is applied\n        (ie. \"linear\" activation: `a(x) = x`).",
         "name": "activation"
       },
       {
-        "description": "Activation function to use for the recurrent step.",
+        "description": "Activation function to use for the recurrent step.\n        Default: sigmoid (`sigmoid`). If you pass `None`, no activation is\n        applied (ie. \"linear\" activation: `a(x) = x`).",
         "name": "recurrent_activation"
       },
       {
         "default": true,
-        "description": "Boolean, (default `True`), whether the layer uses a bias vector.",
+        "description": "Boolean, (default `True`), whether the layer\n        should use a bias vector.",
         "name": "use_bias"
       },
       {
-        "description": "Initializer for the `kernel` weights matrix, used for\n    the linear transformation of the inputs. Default: `glorot_uniform`.",
+        "description": "Initializer for the `kernel` weights matrix,\n        used for the linear transformation of the inputs. Default:\n        `\"glorot_uniform\"`.",
         "name": "kernel_initializer"
       },
       {
-        "description": "Initializer for the `recurrent_kernel` weights\n    matrix, used for the linear transformation of the recurrent state.",
+        "description": "Initializer for the `recurrent_kernel`\n        weights matrix, used for the linear transformation\n        of the recurrent state. Default: `\"orthogonal\"`.",
         "name": "recurrent_initializer"
       },
       {
-        "description": "Initializer for the bias vector. Default: `zeros`.",
+        "description": "Initializer for the bias vector. Default: `\"zeros\"`.",
         "name": "bias_initializer"
       },
       {
-        "description": "Boolean (default `True`). If True, add 1 to the bias of\n    the forget gate at initialization. Setting it to true will also force\n    `bias_initializer=\"zeros\"`. This is recommended in [Jozefowicz et\n      al.](https://github.com/mlresearch/v37/blob/gh-pages/jozefowicz15.pdf)",
+        "description": "Boolean (default `True`). If `True`,\n        add 1 to the bias of the forget gate at initialization.\n        Setting it to `True` will also force `bias_initializer=\"zeros\"`.\n        This is recommended in [Jozefowicz et al.](\n        https://github.com/mlresearch/v37/blob/gh-pages/jozefowicz15.pdf)",
         "name": "unit_forget_bias"
       },
       {
-        "description": "Regularizer function applied to the `kernel` weights\n    matrix. Default: `None`.",
+        "description": "Regularizer function applied to the `kernel` weights\n        matrix. Default: `None`.",
         "name": "kernel_regularizer"
       },
       {
-        "description": "Regularizer function applied to\n    the `recurrent_kernel` weights matrix. Default: `None`.",
+        "description": "Regularizer function applied to the\n        `recurrent_kernel` weights matrix. Default: `None`.",
         "name": "recurrent_regularizer"
       },
       {
-        "description": "Regularizer function applied to the bias vector.",
+        "description": "Regularizer function applied to the bias vector.\n        Default: `None`.",
         "name": "bias_regularizer"
       },
       {
-        "description": "Constraint function applied to the `kernel` weights\n    matrix. Default: `None`.",
+        "description": "Constraint function applied to the `kernel` weights\n        matrix. Default: `None`.",
         "name": "kernel_constraint"
       },
       {
-        "description": "Constraint function applied to the\n    `recurrent_kernel` weights matrix. Default: `None`.",
+        "description": "Constraint function applied to the\n        `recurrent_kernel` weights matrix. Default: `None`.",
         "name": "recurrent_constraint"
       },
       {
-        "description": "Constraint function applied to the bias vector. Default:\n    `None`.",
+        "description": "Constraint function applied to the bias vector.\n        Default: `None`.",
         "name": "bias_constraint"
       },
       {
         "default": 0,
-        "description": "Float between 0 and 1. Fraction of the units to drop for the\n    linear transformation of the inputs. Default: 0.",
+        "description": "Float between 0 and 1. Fraction of the units to drop for the\n        linear transformation of the inputs. Default: 0.",
         "name": "dropout"
       },
       {
         "default": 0,
-        "description": "Float between 0 and 1. Fraction of the units to drop\n    for the linear transformation of the recurrent state. Default: 0.",
+        "description": "Float between 0 and 1. Fraction of the units to drop\n        for the linear transformation of the recurrent state. Default: 0.",
         "name": "recurrent_dropout"
       },
       {
@@ -2950,6 +3021,15 @@
       {
         "description": "`None`.",
         "name": "Default"
+      },
+      {
+        "name": "seed",
+        "description": "Random seed for dropout."
+      }
+    ],
+    "examples": [
+      {
+        "code": ">>> inputs = np.random.random((32, 10, 8))\n>>> rnn = keras.layers.RNN(keras.layers.LSTMCell(4))\n>>> output = rnn(inputs)\n>>> output.shape\n(32, 4)\n>>> rnn = keras.layers.RNN(\n...    keras.layers.LSTMCell(4),\n...    return_sequences=True,\n...    return_state=True)\n>>> whole_sequence_output, final_state = rnn(inputs)\n>>> whole_sequence_output.shape\n(32, 10, 4)\n>>> final_state.shape\n(32, 4)"
       }
     ]
   },
@@ -2965,8 +3045,8 @@
     ],
     "examples": [
       {
-        "summary": "Consider a Numpy data array `x` of shape `(samples, timesteps, features)`,\nto be fed to an LSTM layer. You want to mask timestep #3 and #5 because you\nlack data for these timesteps. You can:\n- Set `x[:, 3, :] = 0.` and `x[:, 5, :] = 0.`\n- Insert a `Masking` layer with `mask_value=0.` before the LSTM layer:",
-        "code": "samples, timesteps, features = 32, 10, 8\ninputs = np.random.random([samples, timesteps, features]).astype(np.float32)\ninputs[:, 3, :] = 0.\ninputs[:, 5, :] = 0.\n\nmodel = tf.keras.models.Sequential()\nmodel.add(tf.keras.layers.Masking(mask_value=0.,\n                                  input_shape=(timesteps, features)))\nmodel.add(tf.keras.layers.LSTM(32))\n\noutput = model(inputs)\n# The time step 3 and 5 will be skipped from LSTM calculation."
+        "summary": "Consider a NumPy data array `x` of shape `(samples, timesteps, features)`,\nto be fed to an LSTM layer. You want to mask timestep #3 and #5 because you\nlack data for these timesteps. You can:\n- Set `x[:, 3, :] = 0.` and `x[:, 5, :] = 0.`\n- Insert a `Masking` layer with `mask_value=0.` before the LSTM layer:",
+        "code": "samples, timesteps, features = 32, 10, 8\ninputs = np.random.random([samples, timesteps, features]).astype(np.float32)\ninputs[:, 3, :] = 0.\ninputs[:, 5, :] = 0.\n\nmodel = keras.models.Sequential()\nmodel.add(keras.layers.Masking(mask_value=0.)\nmodel.add(keras.layers.LSTM(32))\noutput = model(inputs)\n# The time step 3 and 5 will be skipped from LSTM calculation."
       }
     ]
   },
@@ -2974,7 +3054,7 @@
     "name": "Maximum",
     "module": "tensorflow.keras.layers",
     "category": "Tensor",
-    "description": "Layer that computes the maximum (element-wise) a list of inputs.\n\nIt takes as input a list of tensors, all of the same shape, and returns\na single tensor (also of the same shape).\n\n```\n>>> tf.keras.layers.Maximum()([np.arange(5).reshape(5, 1),\n...                            np.arange(5, 10).reshape(5, 1)])\n<tf.Tensor: shape=(5, 1), dtype=int64, numpy=\narray([[5],\n     [6],\n     [7],\n     [8],\n     [9]])>\n```\n\n```\n>>> x1 = tf.keras.layers.Dense(8)(np.arange(10).reshape(5, 2))\n>>> x2 = tf.keras.layers.Dense(8)(np.arange(10, 20).reshape(5, 2))\n>>> maxed = tf.keras.layers.Maximum()([x1, x2])\n>>> maxed.shape\nTensorShape([5, 8])\n```",
+    "description": "Computes element-wise maximum on a list of inputs.\n\nIt takes as input a list of tensors, all of the same shape,\nand returns a single tensor (also of the same shape).",
     "inputs": [
       {
         "name": "inputs",
@@ -2985,22 +3065,31 @@
       {
         "name": "output"
       }
+    ],
+    "examples": [
+      {
+        "code": ">>> input_shape = (2, 3, 4)\n>>> x1 = np.random.rand(*input_shape)\n>>> x2 = np.random.rand(*input_shape)\n>>> y = keras.layers.Maximum()([x1, x2])"
+      },
+      {
+        "summary": "Usage in a Keras model:",
+        "code": ">>> input1 = keras.layers.Input(shape=(16,))\n>>> x1 = keras.layers.Dense(8, activation='relu')(input1)\n>>> input2 = keras.layers.Input(shape=(32,))\n>>> x2 = keras.layers.Dense(8, activation='relu')(input2)\n>>> # equivalent to `y = keras.layers.maximum([x1, x2])`\n>>> y = keras.layers.Maximum()([x1, x2])\n>>> out = keras.layers.Dense(4)(y)\n>>> model = keras.models.Model(inputs=[input1, input2], outputs=out)"
+      }
     ]
   },
   {
     "name": "MaxPooling1D",
     "module": "tensorflow.keras.layers",
     "category": "Pool",
-    "description": "Max pooling operation for 1D temporal data.\n\nDownsamples the input representation by taking the maximum value over a\nspatial window of size `pool_size`. The window is shifted by `strides`.  The\nresulting output, when using the `\"valid\"` padding option, has a shape of:\n`output_shape = (input_shape - pool_size + 1) / strides)`\n\nThe resulting output shape when using the `\"same\"` padding option is:\n`output_shape = input_shape / strides`\n\nFor example, for `strides=1` and `padding=\"valid\"`:\n\n```\n>>> x = tf.constant([1., 2., 3., 4., 5.])\n>>> x = tf.reshape(x, [1, 5, 1])\n>>> max_pool_1d = tf.keras.layers.MaxPooling1D(pool_size=2,\n...    strides=1, padding='valid')\n>>> max_pool_1d(x)\n<tf.Tensor: shape=(1, 4, 1), dtype=float32, numpy=\narray([[[2.],\n        [3.],\n        [4.],\n        [5.]]], dtype=float32)>\n```\n\nFor example, for `strides=2` and `padding=\"valid\"`:\n\n```\n>>> x = tf.constant([1., 2., 3., 4., 5.])\n>>> x = tf.reshape(x, [1, 5, 1])\n>>> max_pool_1d = tf.keras.layers.MaxPooling1D(pool_size=2,\n...    strides=2, padding='valid')\n>>> max_pool_1d(x)\n<tf.Tensor: shape=(1, 2, 1), dtype=float32, numpy=\narray([[[2.],\n        [4.]]], dtype=float32)>\n```\n\nFor example, for `strides=1` and `padding=\"same\"`:\n\n```\n>>> x = tf.constant([1., 2., 3., 4., 5.])\n>>> x = tf.reshape(x, [1, 5, 1])\n>>> max_pool_1d = tf.keras.layers.MaxPooling1D(pool_size=2,\n...    strides=1, padding='same')\n>>> max_pool_1d(x)\n<tf.Tensor: shape=(1, 5, 1), dtype=float32, numpy=\narray([[[2.],\n        [3.],\n        [4.],\n        [5.],\n        [5.]]], dtype=float32)>\n```",
+    "description": "Max pooling operation for 1D temporal data.\n\nDownsamples the input representation by taking the maximum value over a\nspatial window of size `pool_size`. The window is shifted by `strides`.\n\nThe resulting output when using the `\"valid\"` padding option has a shape of:\n`output_shape = (input_shape - pool_size + 1) / strides)`.\n\nThe resulting output shape when using the `\"same\"` padding option is:\n`output_shape = input_shape / strides`",
     "attributes": [
       {
         "default": "channels_last",
-        "description": "A string,\n    one of `channels_last` (default) or `channels_first`.\n    The ordering of the dimensions in the inputs.\n    `channels_last` corresponds to inputs with shape\n    `(batch, steps, features)` while `channels_first`\n    corresponds to inputs with shape\n    `(batch, features, steps)`.",
+        "description": "string, either `\"channels_last\"` or `\"channels_first\"`.\n        The ordering of the dimensions in the inputs. `\"channels_last\"`\n        corresponds to inputs with shape `(batch, steps, features)`\n        while `\"channels_first\"` corresponds to inputs with shape\n        `(batch, features, steps)`. It defaults to the `image_data_format`\n        value found in your Keras config file at `~/.keras/keras.json`.\n        If you never set it, then it will be `\"channels_last\"`.",
         "name": "data_format"
       },
       {
         "default": "valid",
-        "description": "One of `\"valid\"` or `\"same\"` (case-insensitive).\n    `\"valid\"` means no padding. `\"same\"` results in padding evenly to\n    the left/right or up/down of the input such that output has the same\n    height/width dimension as the input.",
+        "description": "string, either `\"valid\"` or `\"same\"` (case-insensitive).\n        `\"valid\"` means no padding. `\"same\"` results in padding evenly to\n        the left/right or up/down of the input such that output has the same\n        height/width dimension as the input.",
         "name": "padding"
       },
       {
@@ -3008,7 +3097,7 @@
           2,
           2
         ],
-        "description": "Integer, size of the max pooling window.",
+        "description": "int, size of the max pooling window.",
         "name": "pool_size"
       },
       {
@@ -3016,37 +3105,51 @@
           2,
           2
         ],
-        "description": "Integer, or None. Specifies how much the pooling window moves\n    for each pooling step.\n    If None, it will default to `pool_size`.",
+        "description": "int or None. Specifies how much the pooling window moves\n        for each pooling step. If None, it will default to `pool_size`.",
         "name": "strides"
       }
     ],
     "inputs": [
       {
-        "description": "- If `data_format='channels_last'`:\n  3D tensor with shape `(batch_size, steps, features)`.\n- If `data_format='channels_first'`:\n  3D tensor with shape `(batch_size, features, steps)`.",
+        "description": "- If `data_format=\"channels_last\"`:\n    3D tensor with shape `(batch_size, steps, features)`.\n- If `data_format=\"channels_first\"`:\n    3D tensor with shape `(batch_size, features, steps)`.",
         "name": "input"
       }
     ],
     "outputs": [
       {
-        "description": "- If `data_format='channels_last'`:\n  3D tensor with shape `(batch_size, downsampled_steps, features)`.\n- If `data_format='channels_first'`:\n  3D tensor with shape `(batch_size, features, downsampled_steps)`.",
+        "description": "- If `data_format=\"channels_last\"`:\n    3D tensor with shape `(batch_size, downsampled_steps, features)`.\n- If `data_format=\"channels_first\"`:\n    3D tensor with shape `(batch_size, features, downsampled_steps)`.",
         "name": "output"
       }
+    ],
+    "examples": [
+      {
+        "summary": "`strides=1` and `padding=\"valid\"`:",
+        "code": ">>> x = np.array([1., 2., 3., 4., 5.])\n>>> x = np.reshape(x, [1, 5, 1])\n>>> max_pool_1d = keras.layers.MaxPooling1D(pool_size=2,\n...    strides=1, padding=\"valid\")\n>>> max_pool_1d(x)"
+      },
+      {
+        "summary": "`strides=2` and `padding=\"valid\"`:",
+        "code": ">>> x = np.array([1., 2., 3., 4., 5.])\n>>> x = np.reshape(x, [1, 5, 1])\n>>> max_pool_1d = keras.layers.MaxPooling1D(pool_size=2,\n...    strides=2, padding=\"valid\")\n>>> max_pool_1d(x)"
+      },
+      {
+        "summary": "`strides=1` and `padding=\"same\"`:",
+        "code": ">>> x = np.array([1., 2., 3., 4., 5.])\n>>> x = np.reshape(x, [1, 5, 1])\n>>> max_pool_1d = keras.layers.MaxPooling1D(pool_size=2,\n...    strides=1, padding=\"same\")\n>>> max_pool_1d(x)"
+      }
     ]
   },
   {
     "name": "MaxPooling2D",
     "module": "tensorflow.keras.layers",
     "category": "Pool",
-    "description": "Max pooling operation for 2D spatial data.\n\nDownsamples the input along its spatial dimensions (height and width)\nby taking the maximum value over an input window\n(of size defined by `pool_size`) for each channel of the input.\nThe window is shifted by `strides` along each dimension.\n\nThe resulting output,\nwhen using the `\"valid\"` padding option, has a spatial shape\n(number of rows or columns) of:\n`output_shape = math.floor((input_shape - pool_size) / strides) + 1`\n(when `input_shape >= pool_size`)\n\nThe resulting output shape when using the `\"same\"` padding option is:\n`output_shape = math.floor((input_shape - 1) / strides) + 1`\n\nFor example, for `strides=(1, 1)` and `padding=\"valid\"`:\n\n```\n>>> x = tf.constant([[1., 2., 3.],\n...                  [4., 5., 6.],\n...                  [7., 8., 9.]])\n>>> x = tf.reshape(x, [1, 3, 3, 1])\n>>> max_pool_2d = tf.keras.layers.MaxPooling2D(pool_size=(2, 2),\n...    strides=(1, 1), padding='valid')\n>>> max_pool_2d(x)\n<tf.Tensor: shape=(1, 2, 2, 1), dtype=float32, numpy=\n  array([[[[5.],\n           [6.]],\n          [[8.],\n           [9.]]]], dtype=float32)>\n```\n\nFor example, for `strides=(2, 2)` and `padding=\"valid\"`:\n\n```\n>>> x = tf.constant([[1., 2., 3., 4.],\n...                  [5., 6., 7., 8.],\n...                  [9., 10., 11., 12.]])\n>>> x = tf.reshape(x, [1, 3, 4, 1])\n>>> max_pool_2d = tf.keras.layers.MaxPooling2D(pool_size=(2, 2),\n...    strides=(2, 2), padding='valid')\n>>> max_pool_2d(x)\n<tf.Tensor: shape=(1, 1, 2, 1), dtype=float32, numpy=\n  array([[[[6.],\n           [8.]]]], dtype=float32)>\n```\n\nUsage Example:\n\n```\n>>> input_image = tf.constant([[[[1.], [1.], [2.], [4.]],\n...                            [[2.], [2.], [3.], [2.]],\n...                            [[4.], [1.], [1.], [1.]],\n...                            [[2.], [2.], [1.], [4.]]]])\n>>> output = tf.constant([[[[1], [0]],\n...                       [[0], [1]]]])\n>>> model = tf.keras.models.Sequential()\n>>> model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2),\n...    input_shape=(4, 4, 1)))\n>>> model.compile('adam', 'mean_squared_error')\n>>> model.predict(input_image, steps=1)\narray([[[[2.],\n         [4.]],\n        [[4.],\n         [4.]]]], dtype=float32)\n```\n\nFor example, for stride=(1, 1) and padding=\"same\":\n\n```\n>>> x = tf.constant([[1., 2., 3.],\n...                  [4., 5., 6.],\n...                  [7., 8., 9.]])\n>>> x = tf.reshape(x, [1, 3, 3, 1])\n>>> max_pool_2d = tf.keras.layers.MaxPooling2D(pool_size=(2, 2),\n...    strides=(1, 1), padding='same')\n>>> max_pool_2d(x)\n<tf.Tensor: shape=(1, 3, 3, 1), dtype=float32, numpy=\n  array([[[[5.],\n           [6.],\n           [6.]],\n          [[8.],\n           [9.],\n           [9.]],\n          [[8.],\n           [9.],\n           [9.]]]], dtype=float32)>\n```",
+    "description": "Max pooling operation for 2D spatial data.\n\nDownsamples the input along its spatial dimensions (height and width)\nby taking the maximum value over an input window\n(of size defined by `pool_size`) for each channel of the input.\nThe window is shifted by `strides` along each dimension.\n\nThe resulting output when using the `\"valid\"` padding option has a spatial\nshape (number of rows or columns) of:\n`output_shape = math.floor((input_shape - pool_size) / strides) + 1`\n(when `input_shape >= pool_size`)\n\nThe resulting output shape when using the `\"same\"` padding option is:\n`output_shape = math.floor((input_shape - 1) / strides) + 1`",
     "attributes": [
       {
         "default": "channels_last",
-        "description": "A string,\n    one of `channels_last` (default) or `channels_first`.\n    The ordering of the dimensions in the inputs.\n    `channels_last` corresponds to inputs with shape\n    `(batch, height, width, channels)` while `channels_first`\n    corresponds to inputs with shape\n    `(batch, channels, height, width)`.\n    When unspecified, uses\n    `image_data_format` value found in your Keras config file at\n     `~/.keras/keras.json` (if exists) else 'channels_last'.\n    Defaults to 'channels_last'.",
+        "description": "string, either `\"channels_last\"` or `\"channels_first\"`.\n        The ordering of the dimensions in the inputs. `\"channels_last\"`\n        corresponds to inputs with shape `(batch, height, width, channels)`\n        while `\"channels_first\"` corresponds to inputs with shape\n        `(batch, channels, height, width)`. It defaults to the\n        `image_data_format` value found in your Keras config file at\n        `~/.keras/keras.json`. If you never set it, then it will be\n        `\"channels_last\"`.",
         "name": "data_format"
       },
       {
         "default": "valid",
-        "description": "One of `\"valid\"` or `\"same\"` (case-insensitive).\n    `\"valid\"` means no padding. `\"same\"` results in padding evenly to\n    the left/right or up/down of the input such that output has the same\n    height/width dimension as the input.",
+        "description": "string, either `\"valid\"` or `\"same\"` (case-insensitive).\n        `\"valid\"` means no padding. `\"same\"` results in padding evenly to\n        the left/right or up/down of the input such that output has the same\n        height/width dimension as the input.",
         "name": "padding"
       },
       {
@@ -3054,7 +3157,7 @@
           2,
           2
         ],
-        "description": "integer or tuple of 2 integers,\n    window size over which to take the maximum.\n    `(2, 2)` will take the max value over a 2x2 pooling window.\n    If only one integer is specified, the same window length\n    will be used for both dimensions.",
+        "description": "int or tuple of 2 integers, factors by which to downscale\n        (dim1, dim2). If only one integer is specified, the same\n        window length will be used for all dimensions.",
         "name": "pool_size"
       },
       {
@@ -3062,69 +3165,83 @@
           2,
           2
         ],
-        "description": "Integer, tuple of 2 integers, or None.\n    Strides values.  Specifies how far the pooling window moves\n    for each pooling step. If None, it will default to `pool_size`.",
+        "description": "int or tuple of 2 integers, or None. Strides values. If None,\n        it will default to `pool_size`. If only one int is specified, the\n        same stride size will be used for all dimensions.",
         "name": "strides"
       }
     ],
     "inputs": [
       {
-        "description": "- If `data_format='channels_last'`:\n  4D tensor with shape `(batch_size, rows, cols, channels)`.\n- If `data_format='channels_first'`:\n  4D tensor with shape `(batch_size, channels, rows, cols)`.",
+        "description": "- If `data_format=\"channels_last\"`:\n    4D tensor with shape `(batch_size, height, width, channels)`.\n- If `data_format=\"channels_first\"`:\n    4D tensor with shape `(batch_size, channels, height, width)`.",
         "name": "input"
       }
     ],
     "outputs": [
       {
-        "description": "- If `data_format='channels_last'`:\n  4D tensor with shape `(batch_size, pooled_rows, pooled_cols, channels)`.\n- If `data_format='channels_first'`:\n  4D tensor with shape `(batch_size, channels, pooled_rows, pooled_cols)`.",
+        "description": "- If `data_format=\"channels_last\"`:\n    4D tensor with shape\n    `(batch_size, pooled_height, pooled_width, channels)`.\n- If `data_format=\"channels_first\"`:\n    4D tensor with shape\n    `(batch_size, channels, pooled_height, pooled_width)`.",
         "name": "output"
       }
+    ],
+    "examples": [
+      {
+        "summary": "`strides=(1, 1)` and `padding=\"valid\"`:",
+        "code": ">>> x = np.array([[1., 2., 3.],\n...               [4., 5., 6.],\n...               [7., 8., 9.]])\n>>> x = np.reshape(x, [1, 3, 3, 1])\n>>> max_pool_2d = keras.layers.MaxPooling2D(pool_size=(2, 2),\n...    strides=(1, 1), padding=\"valid\")\n>>> max_pool_2d(x)"
+      },
+      {
+        "summary": "`strides=(2, 2)` and `padding=\"valid\"`:",
+        "code": ">>> x = np.array([[1., 2., 3., 4.],\n...               [5., 6., 7., 8.],\n...               [9., 10., 11., 12.]])\n>>> x = np.reshape(x, [1, 3, 4, 1])\n>>> max_pool_2d = keras.layers.MaxPooling2D(pool_size=(2, 2),\n...    strides=(2, 2), padding=\"valid\")\n>>> max_pool_2d(x)"
+      },
+      {
+        "summary": "`stride=(1, 1)` and `padding=\"same\"`:",
+        "code": ">>> x = np.array([[1., 2., 3.],\n...               [4., 5., 6.],\n...               [7., 8., 9.]])\n>>> x = np.reshape(x, [1, 3, 3, 1])\n>>> max_pool_2d = keras.layers.MaxPooling2D(pool_size=(2, 2),\n...    strides=(1, 1), padding=\"same\")\n>>> max_pool_2d(x)"
+      }
     ]
   },
   {
     "name": "MaxPooling3D",
     "module": "tensorflow.keras.layers",
     "category": "Pool",
-    "description": "Max pooling operation for 3D data (spatial or spatio-temporal).\n\nDownsamples the input along its spatial dimensions (depth, height, and\nwidth) by taking the maximum value over an input window (of size defined by\n`pool_size`) for each channel of the input.  The window is shifted by\n`strides` along each dimension.",
+    "description": "Max pooling operation for 3D data (spatial or spatio-temporal).\n\nDownsamples the input along its spatial dimensions (depth, height, and\nwidth) by taking the maximum value over an input window (of size defined by\n`pool_size`) for each channel of the input. The window is shifted by\n`strides` along each dimension.",
     "attributes": [
       {
-        "description": "Tuple of 3 integers,\n    factors by which to downscale (dim1, dim2, dim3).\n    `(2, 2, 2)` will halve the size of the 3D input in each dimension.",
+        "description": "int or tuple of 3 integers, factors by which to downscale\n        (dim1, dim2, dim3). If only one integer is specified, the same\n        window length will be used for all dimensions.",
         "name": "pool_size"
       },
       {
-        "description": "tuple of 3 integers, or None. Strides values.",
+        "description": "int or tuple of 3 integers, or None. Strides values. If None,\n        it will default to `pool_size`. If only one int is specified, the\n        same stride size will be used for all dimensions.",
         "name": "strides"
       },
       {
-        "description": "One of `\"valid\"` or `\"same\"` (case-insensitive).\n    `\"valid\"` means no padding. `\"same\"` results in padding evenly to\n    the left/right or up/down of the input such that output has the same\n    height/width dimension as the input.",
+        "description": "string, either `\"valid\"` or `\"same\"` (case-insensitive).\n        `\"valid\"` means no padding. `\"same\"` results in padding evenly to\n        the left/right or up/down of the input such that output has the same\n        height/width dimension as the input.",
         "name": "padding"
       },
       {
         "default": "channels_last",
-        "description": "A string,\n    one of `channels_last` (default) or `channels_first`.\n    The ordering of the dimensions in the inputs.\n    `channels_last` corresponds to inputs with shape\n    `(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)`\n    while `channels_first` corresponds to inputs with shape\n    `(batch, channels, spatial_dim1, spatial_dim2, spatial_dim3)`.\n    When unspecified, uses\n    `image_data_format` value found in your Keras config file at\n     `~/.keras/keras.json` (if exists) else 'channels_last'.\n    Defaults to 'channels_last'.",
+        "description": "string, either `\"channels_last\"` or `\"channels_first\"`.\n        The ordering of the dimensions in the inputs. `\"channels_last\"`\n        corresponds to inputs with shape\n        `(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)` while\n        `\"channels_first\"` corresponds to inputs with shape\n        `(batch, channels, spatial_dim1, spatial_dim2, spatial_dim3)`.\n        It defaults to the `image_data_format` value found in your Keras\n        config file at `~/.keras/keras.json`. If you never set it, then it\n        will be `\"channels_last\"`.",
         "name": "data_format"
       }
     ],
     "inputs": [
       {
-        "description": "- If `data_format='channels_last'`:\n  5D tensor with shape:\n  `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)`\n- If `data_format='channels_first'`:\n  5D tensor with shape:\n  `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`",
+        "description": "- If `data_format=\"channels_last\"`:\n    5D tensor with shape:\n    `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)`\n- If `data_format=\"channels_first\"`:\n    5D tensor with shape:\n    `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`",
         "name": "input"
       }
     ],
     "outputs": [
       {
-        "description": "- If `data_format='channels_last'`:\n  5D tensor with shape:\n  `(batch_size, pooled_dim1, pooled_dim2, pooled_dim3, channels)`\n- If `data_format='channels_first'`:\n  5D tensor with shape:\n  `(batch_size, channels, pooled_dim1, pooled_dim2, pooled_dim3)`",
+        "description": "- If `data_format=\"channels_last\"`:\n    5D tensor with shape:\n    `(batch_size, pooled_dim1, pooled_dim2, pooled_dim3, channels)`\n- If `data_format=\"channels_first\"`:\n    5D tensor with shape:\n    `(batch_size, channels, pooled_dim1, pooled_dim2, pooled_dim3)`",
         "name": "output"
       }
     ],
     "examples": [
       {
-        "code": "depth = 30\nheight = 30\nwidth = 30\ninput_channels = 3\n\ninputs = tf.keras.Input(shape=(depth, height, width, input_channels))\nlayer = tf.keras.layers.MaxPooling3D(pool_size=3)\noutputs = layer(inputs)  # Shape: (batch_size, 10, 10, 10, 3)"
+        "code": "depth = 30\nheight = 30\nwidth = 30\nchannels = 3\n\ninputs = keras.layers.Input(shape=(depth, height, width, channels))\nlayer = keras.layers.MaxPooling3D(pool_size=3)\noutputs = layer(inputs)  # Shape: (batch_size, 10, 10, 10, 3)"
       }
     ]
   },
   {
     "name": "MultiHeadAttention",
     "module": "tensorflow.keras.layers",
-    "description": "MultiHeadAttention layer.\n\nThis is an implementation of multi-headed attention as described in the\npaper \"Attention is all you Need\" (Vaswani et al., 2017).\nIf `query`, `key,` `value` are the same, then\nthis is self-attention. Each timestep in `query` attends to the\ncorresponding sequence in `key`, and returns a fixed-width vector.\n\nThis layer first projects `query`, `key` and `value`. These are\n(effectively) a list of tensors of length `num_attention_heads`, where the\ncorresponding shapes are `(batch_size, <query dimensions>, key_dim)`,\n`(batch_size, <key/value dimensions>, key_dim)`,\n`(batch_size, <key/value dimensions>, value_dim)`.\n\nThen, the query and key tensors are dot-producted and scaled. These are\nsoftmaxed to obtain attention probabilities. The value tensors are then\ninterpolated by these probabilities, then concatenated back to a single\ntensor.\n\nFinally, the result tensor with the last dimension as value_dim can take an\nlinear projection and return.\n\nWhen using `MultiHeadAttention` inside a custom layer, the custom layer must\nimplement its own `build()` method and call `MultiHeadAttention`'s\n`_build_from_signature()` there.\nThis enables weights to be restored correctly when the model is loaded.",
+    "description": "MultiHeadAttention layer.\n\nThis is an implementation of multi-headed attention as described in the\npaper \"Attention is all you Need\"\n[Vaswani et al., 2017](https://arxiv.org/abs/1706.03762).\nIf `query`, `key,` `value` are the same, then\nthis is self-attention. Each timestep in `query` attends to the\ncorresponding sequence in `key`, and returns a fixed-width vector.\n\nThis layer first projects `query`, `key` and `value`. These are\n(effectively) a list of tensors of length `num_attention_heads`, where the\ncorresponding shapes are `(batch_size, <query dimensions>, key_dim)`,\n`(batch_size, <key/value dimensions>, key_dim)`,\n`(batch_size, <key/value dimensions>, value_dim)`.\n\nThen, the query and key tensors are dot-producted and scaled. These are\nsoftmaxed to obtain attention probabilities. The value tensors are then\ninterpolated by these probabilities, then concatenated back to a single\ntensor.\n\nFinally, the result tensor with the last dimension as `value_dim` can take\na linear projection and return.",
     "attributes": [
       {
         "description": "Number of attention heads.",
@@ -3197,7 +3314,7 @@
   {
     "name": "Multiply",
     "module": "tensorflow.keras.layers",
-    "description": "Layer that multiplies (element-wise) a list of inputs.\n\nIt takes as input a list of tensors, all of the same shape, and returns\na single tensor (also of the same shape).\n\n```\n>>> tf.keras.layers.Multiply()([np.arange(5).reshape(5, 1),\n...                             np.arange(5, 10).reshape(5, 1)])\n<tf.Tensor: shape=(5, 1), dtype=int64, numpy=\narray([[ 0],\n     [ 6],\n     [14],\n     [24],\n     [36]])>\n```\n\n```\n>>> x1 = tf.keras.layers.Dense(8)(np.arange(10).reshape(5, 2))\n>>> x2 = tf.keras.layers.Dense(8)(np.arange(10, 20).reshape(5, 2))\n>>> multiplied = tf.keras.layers.Multiply()([x1, x2])\n>>> multiplied.shape\nTensorShape([5, 8])\n```",
+    "description": "Performs elementwise multiplication.\n\nIt takes as input a list of tensors, all of the same shape,\nand returns a single tensor (also of the same shape).",
     "inputs": [
       {
         "name": "inputs",
@@ -3208,6 +3325,15 @@
       {
         "name": "output"
       }
+    ],
+    "examples": [
+      {
+        "code": ">>> input_shape = (2, 3, 4)\n>>> x1 = np.random.rand(*input_shape)\n>>> x2 = np.random.rand(*input_shape)\n>>> y = keras.layers.Multiply()([x1, x2])"
+      },
+      {
+        "summary": "Usage in a Keras model:",
+        "code": ">>> input1 = keras.layers.Input(shape=(16,))\n>>> x1 = keras.layers.Dense(8, activation='relu')(input1)\n>>> input2 = keras.layers.Input(shape=(32,))\n>>> x2 = keras.layers.Dense(8, activation='relu')(input2)\n>>> # equivalent to `y = keras.layers.multiply([x1, x2])`\n>>> y = keras.layers.Multiply()([x1, x2])\n>>> out = keras.layers.Dense(4)(y)\n>>> model = keras.models.Model(inputs=[input1, input2], outputs=out)"
+      }
     ]
   },
   {
@@ -3217,13 +3343,13 @@
     "description": "Permutes the dimensions of the input according to a given pattern.\n\nUseful e.g. connecting RNNs and convnets.",
     "attributes": [
       {
-        "description": "Tuple of integers. Permutation pattern does not include the\n    samples dimension. Indexing starts at 1.\n    For instance, `(2, 1)` permutes the first and second dimensions\n    of the input.",
+        "description": "Tuple of integers. Permutation pattern does not include the\n        batch dimension. Indexing starts at 1.\n        For instance, `(2, 1)` permutes the first and second dimensions\n        of the input.",
         "name": "dims"
       }
     ],
     "inputs": [
       {
-        "description": "Arbitrary. Use the keyword argument `input_shape`\n(tuple of integers, does not include the samples axis)\nwhen using this layer as the first layer in a model.",
+        "description": "Arbitrary.",
         "name": "input"
       }
     ],
@@ -3235,7 +3361,7 @@
     ],
     "examples": [
       {
-        "code": "model = Sequential()\nmodel.add(Permute((2, 1), input_shape=(10, 64)))\n# now: model.output_shape == (None, 64, 10)\n# note: `None` is the batch dimension"
+        "code": ">>> x = keras.Input(shape=(10, 64))\n>>> y = keras.layers.Permute((2, 1))(x)\n>>> y.shape\n(None, 64, 10)"
       }
     ]
   },
@@ -3243,7 +3369,7 @@
     "name": "PReLU",
     "module": "tensorflow.keras.layers",
     "category": "Activation",
-    "description": "Parametric Rectified Linear Unit.\n\nIt follows:\n\n```\n    f(x) = alpha * x for x < 0\n    f(x) = x for x >= 0\n```\n\nwhere `alpha` is a learned array with the same shape as x.",
+    "description": "Parametric Rectified Linear Unit activation layer.\n\nFormula:\n``` python\nf(x) = alpha * x for x < 0\nf(x) = x for x >= 0\n```\nwhere `alpha` is a learned array with the same shape as x.",
     "attributes": [
       {
         "description": "Initializer function for the weights.",
@@ -3259,8 +3385,12 @@
         "name": "alpha_constraint"
       },
       {
-        "description": "The axes along which to share learnable\n        parameters for the activation function.\n        For example, if the incoming feature maps\n        are from a 2D convolution\n        with output shape `(batch, height, width, channels)`,\n        and you wish to share parameters across space\n        so that each filter only has one set of parameters,\n        set `shared_axes=[1, 2]`.",
+        "description": "The axes along which to share learnable parameters for the\n        activation function. For example, if the incoming feature maps are\n        from a 2D convolution with output shape\n        `(batch, height, width, channels)`, and you wish to share parameters\n        across space so that each filter only has one set of parameters,\n        set `shared_axes=[1, 2]`.",
         "name": "shared_axes"
+      },
+      {
+        "name": "**kwargs",
+        "description": "Base layer keyword arguments, such as `name` and `dtype`."
       }
     ],
     "inputs": [
@@ -3288,19 +3418,23 @@
     "name": "ReLU",
     "module": "tensorflow.keras.layers",
     "category": "Activation",
-    "description": "Rectified Linear Unit activation function.\n\nWith default values, it returns element-wise `max(x, 0)`.\n\nOtherwise, it follows:\n\n```\n    f(x) = max_value if x >= max_value\n    f(x) = x if threshold <= x < max_value\n    f(x) = negative_slope * (x - threshold) otherwise\n```",
+    "description": "Rectified Linear Unit activation function layer.\n\nFormula:\n``` python\nf(x) = max(x,0)\nf(x) = max_value if x >= max_value\nf(x) = x if threshold <= x < max_value\nf(x) = negative_slope * (x - threshold) otherwise\n```",
     "attributes": [
       {
         "description": "Float >= 0. Maximum activation value. None means unlimited.\n        Defaults to `None`.",
         "name": "max_value"
       },
       {
-        "description": "Float >= 0. Negative slope coefficient.\n        Defaults to `0.`.",
+        "description": "Float >= 0. Negative slope coefficient.\n        Defaults to `0.0`.",
         "name": "negative_slope"
       },
       {
-        "description": "Float >= 0. Threshold value for thresholded activation.\n        Defaults to `0.`.",
+        "description": "Float >= 0. Threshold value for thresholded activation.\n        Defaults to `0.0`.",
         "name": "threshold"
+      },
+      {
+        "name": "**kwargs",
+        "description": "Base layer keyword arguments, such as `name` and `dtype`."
       }
     ],
     "inputs": [
@@ -3317,7 +3451,7 @@
     ],
     "examples": [
       {
-        "code": ">>> layer = tf.keras.layers.ReLU()\n>>> output = layer([-3.0, -1.0, 0.0, 2.0])\n>>> list(output.numpy())\n[0.0, 0.0, 0.0, 2.0]\n>>> layer = tf.keras.layers.ReLU(max_value=1.0)\n>>> output = layer([-3.0, -1.0, 0.0, 2.0])\n>>> list(output.numpy())\n[0.0, 0.0, 0.0, 1.0]\n>>> layer = tf.keras.layers.ReLU(negative_slope=1.0)\n>>> output = layer([-3.0, -1.0, 0.0, 2.0])\n>>> list(output.numpy())\n[-3.0, -1.0, 0.0, 2.0]\n>>> layer = tf.keras.layers.ReLU(threshold=1.5)\n>>> output = layer([-3.0, -1.0, 1.0, 2.0])\n>>> list(output.numpy())\n[0.0, 0.0, 0.0, 2.0]"
+        "code": "relu_layer = keras.layers.activations.ReLU(\n    max_value=10,\n    negative_slope=0.5,\n    threshold=0,\n)\ninput = np.array([-10, -5, 0.0, 5, 10])\nresult = relu_layer(input)\n# result = [-5. , -2.5,  0. ,  5. , 10.]"
       }
     ]
   },
@@ -3328,25 +3462,25 @@
     "description": "Repeats the input n times.",
     "attributes": [
       {
-        "description": "Integer, repetition factor.\nInput shape: 2D tensor of shape `(num_samples, features)`.\nOutput shape: 3D tensor of shape `(num_samples, n, features)`.",
+        "description": "Integer, repetition factor.",
         "name": "n"
       }
     ],
     "inputs": [
       {
-        "description": "2D tensor of shape `(num_samples, features)`.",
+        "description": "2D tensor with shape `(batch_size, features)`.",
         "name": "input"
       }
     ],
     "outputs": [
       {
-        "description": "3D tensor of shape `(num_samples, n, features)`.",
+        "description": "3D tensor with shape `(batch_size, n, features)`.",
         "name": "output"
       }
     ],
     "examples": [
       {
-        "code": "model = Sequential()\nmodel.add(Dense(32, input_dim=32))\n# now: model.output_shape == (None, 32)\n# note: `None` is the batch dimension\n\nmodel.add(RepeatVector(3))\n# now: model.output_shape == (None, 3, 32)"
+        "code": ">>> x = keras.Input(shape=(32,))\n>>> y = keras.layers.RepeatVector(3)(x)\n>>> y.shape\n(None, 3, 32)"
       }
     ]
   },
@@ -3357,31 +3491,28 @@
     "description": "Layer that reshapes inputs into the given shape.",
     "attributes": [
       {
-        "description": "target shape. Tuple of integers.\n    Does not include the batch axis.\n",
+        "description": "Target shape. Tuple of integers, does not include the\n        samples dimension (batch size).",
         "name": "target_shape"
       }
     ],
     "inputs": [
       {
-        "description": "Arbitrary, although all dimensions in the input shape must be known/fixed.\nUse the keyword argument `input_shape` (tuple of integers, does not\ninclude the samples/batch size axis) when using this layer as the first\nlayer in a model.",
+        "description": "Arbitrary, although all dimensions in the input shape must be\nknown/fixed. Use the keyword argument `input_shape` (tuple of integers,\ndoes not include the samples/batch size axis) when using this layer as\nthe first layer in a model.",
         "name": "input"
       }
     ],
     "outputs": [
       {
-        "description": "`(batch_size,) + target_shape`",
+        "description": "`(batch_size, *target_shape)`",
         "name": "output"
       }
     ],
     "examples": [
       {
-        "code": ">>> # as first layer in a Sequential model\n>>> model = tf.keras.Sequential()\n>>> model.add(tf.keras.layers.Reshape((3, 4), input_shape=(12,)))\n>>> # model.output_shape == (None, 3, 4), `None` is the batch size.\n>>> model.output_shape\n(None, 3, 4)"
-      },
-      {
-        "code": ">>> # as intermediate layer in a Sequential model\n>>> model.add(tf.keras.layers.Reshape((6, 2)))\n>>> model.output_shape\n(None, 6, 2)"
+        "code": ">>> x = keras.Input(shape=(12,))\n>>> y = keras.layers.Reshape((3, 4))(x)\n>>> y.shape\n(None, 3, 4)"
       },
       {
-        "code": ">>> # also supports shape inference using `-1` as dimension\n>>> model.add(tf.keras.layers.Reshape((-1, 2, 2)))\n>>> model.output_shape\n(None, 3, 2, 2)"
+        "code": ">>> # also supports shape inference using `-1` as dimension\n>>> y = keras.layers.Reshape((-1, 2, 2))(x)\n>>> y.shape\n(None, 3, 2, 2)"
       }
     ]
   },
@@ -3389,35 +3520,35 @@
     "name": "RNN",
     "module": "tensorflow.keras.layers",
     "category": "Layer",
-    "description": "Base class for recurrent layers.\n\nSee [the Keras RNN API guide](https://www.tensorflow.org/guide/keras/rnn)\nfor details about the usage of RNN API.",
+    "description": "Base class for recurrent layers.",
     "attributes": [
       {
         "default": false,
-        "description": "Boolean (default `False`). Whether to return the last\n    output in the output sequence, or the full sequence.",
+        "description": "Boolean (default `False`). Whether to return the last\n        output in the output sequence, or the full sequence.",
         "name": "return_sequences"
       },
       {
         "default": false,
-        "description": "Boolean (default `False`). Whether to return the last state\n    in addition to the output.",
+        "description": "Boolean (default `False`).\n        Whether to return the last state in addition to the output.",
         "name": "return_state"
       },
       {
         "default": false,
-        "description": "Boolean (default `False`).\n    If True, process the input sequence backwards and return the\n    reversed sequence.",
+        "description": "Boolean (default `False`).\n        If `True`, process the input sequence backwards and return the\n        reversed sequence.",
         "name": "go_backwards"
       },
       {
         "default": false,
-        "description": "Boolean (default `False`). If True, the last state\n    for each sample at index i in a batch will be used as initial\n    state for the sample of index i in the following batch.",
+        "description": "Boolean (default `False`). If True, the last state\n        for each sample at index `i` in a batch will be used as initial\n        state for the sample of index `i` in the following batch.",
         "name": "stateful"
       },
       {
         "default": false,
-        "description": "Boolean (default `False`).\n    If True, the network will be unrolled, else a symbolic loop will be\n    used. Unrolling can speed-up a RNN, although it tends to be more\n    memory-intensive. Unrolling is only suitable for short sequences.",
+        "description": "Boolean (default `False`).\n        If True, the network will be unrolled, else a symbolic loop will be\n        used. Unrolling can speed-up a RNN, although it tends to be more\n        memory-intensive. Unrolling is only suitable for short sequences.",
         "name": "unroll"
       },
       {
-        "description": "A RNN cell instance or a list of RNN cell instances.\n    A RNN cell is a class that has:\n    - A `call(input_at_t, states_at_t)` method, returning\n      `(output_at_t, states_at_t_plus_1)`. The call method of the\n      cell can also take the optional argument `constants`, see\n      section \"Note on passing external constants\" below.\n    - A `state_size` attribute. This can be a single integer\n      (single state) in which case it is the size of the recurrent\n      state. This can also be a list/tuple of integers (one size per state).\n      The `state_size` can also be TensorShape or tuple/list of\n      TensorShape, to represent high dimension state.\n    - A `output_size` attribute. This can be a single integer or a\n      TensorShape, which represent the shape of the output. For backward\n      compatible reason, if this attribute is not available for the\n      cell, the value will be inferred by the first element of the\n      `state_size`.\n    - A `get_initial_state(inputs=None, batch_size=None, dtype=None)`\n      method that creates a tensor meant to be fed to `call()` as the\n      initial state, if the user didn't specify any initial state via other\n      means. The returned initial state should have a shape of\n      [batch_size, cell.state_size]. The cell might choose to create a\n      tensor full of zeros, or full of other values based on the cell's\n      implementation.\n      `inputs` is the input tensor to the RNN layer, which should\n      contain the batch size as its shape[0], and also dtype. Note that\n      the shape[0] might be `None` during the graph construction. Either\n      the `inputs` or the pair of `batch_size` and `dtype` are provided.\n      `batch_size` is a scalar tensor that represents the batch size\n      of the inputs. `dtype` is `tf.DType` that represents the dtype of\n      the inputs.\n      For backward compatibility, if this method is not implemented\n      by the cell, the RNN layer will create a zero filled tensor with the\n      size of [batch_size, cell.state_size].\n    In the case that `cell` is a list of RNN cell instances, the cells\n    will be stacked on top of each other in the RNN, resulting in an\n    efficient stacked RNN.",
+        "description": "A RNN cell instance or a list of RNN cell instances.\n        A RNN cell is a class that has:\n        - A `call(input_at_t, states_at_t)` method, returning\n        `(output_at_t, states_at_t_plus_1)`. The call method of the\n        cell can also take the optional argument `constants`, see\n        section \"Note on passing external constants\" below.\n        - A `state_size` attribute. This can be a single integer\n        (single state) in which case it is the size of the recurrent\n        state. This can also be a list/tuple of integers\n        (one size per state).\n        - A `output_size` attribute, a single integer.\n        - A `get_initial_state(batch_size=None)`\n        method that creates a tensor meant to be fed to `call()` as the\n        initial state, if the user didn't specify any initial state\n        via other means. The returned initial state should have\n        shape `(batch_size, cell.state_size)`.\n        The cell might choose to create a tensor full of zeros,\n        or other values based on the cell's implementation.\n        `inputs` is the input tensor to the RNN layer, with shape\n        `(batch_size, timesteps, features)`.\n        If this method is not implemented\n        by the cell, the RNN layer will create a zero filled tensor\n        with shape `(batch_size, cell.state_size)`.\n        In the case that `cell` is a list of RNN cell instances, the cells\n        will be stacked on top of each other in the RNN, resulting in an\n        efficient stacked RNN.",
         "name": "cell"
       },
       {
@@ -3433,25 +3564,25 @@
         "name": "time_major"
       },
       {
-        "description": "Boolean (default `False`).\n    Whether the output should use zeros for the masked timesteps. Note that\n    this field is only used when `return_sequences` is True and mask is\n    provided. It can useful if you want to reuse the raw output sequence of\n    the RNN without interference from the masked timesteps, eg, merging\n    bidirectional RNNs.",
+        "description": "Boolean (default `False`).\n        Whether the output should use zeros for the masked timesteps.\n        Note that this field is only used when `return_sequences`\n        is `True` and `mask` is provided.\n        It can useful if you want to reuse the raw output sequence of\n        the RNN without interference from the masked timesteps, e.g.,\n        merging bidirectional RNNs.",
         "name": "zero_output_for_mask"
       }
     ],
     "inputs": [
       {
-        "description": "N-D tensor with shape `[batch_size, timesteps, ...]` or\n`[timesteps, batch_size, ...]` when time_major is True.",
+        "description": "3-D tensor with shape `(batch_size, timesteps, features)`.",
         "name": "input"
       }
     ],
     "outputs": [
       {
-        "description": "- If `return_state`: a list of tensors. The first tensor is\n    the output. The remaining tensors are the last states,\n    each with shape `[batch_size, state_size]`, where `state_size` could\n    be a high dimension tensor shape.\n  - If `return_sequences`: N-D tensor with shape\n    `[batch_size, timesteps, output_size]`, where `output_size` could\n    be a high dimension tensor shape, or\n    `[timesteps, batch_size, output_size]` when `time_major` is True.\n  - Else, N-D tensor with shape `[batch_size, output_size]`, where\n    `output_size` could be a high dimension tensor shape.\n\nMasking:\n  This layer supports masking for input data with a variable number\n  of timesteps. To introduce masks to your data,\n  use an [tf.keras.layers.Embedding] layer with the `mask_zero` parameter\n  set to `True`.\n\nNote on using statefulness in RNNs:\n  You can set RNN layers to be 'stateful', which means that the states\n  computed for the samples in one batch will be reused as initial states\n  for the samples in the next batch. This assumes a one-to-one mapping\n  between samples in different successive batches.\n\n  To enable statefulness:\n    - Specify `stateful=True` in the layer constructor.\n    - Specify a fixed batch size for your model, by passing\n      If sequential model:\n        `batch_input_shape=(...)` to the first layer in your model.\n      Else for functional model with 1 or more Input layers:\n        `batch_shape=(...)` to all the first layers in your model.\n      This is the expected shape of your inputs\n      *including the batch size*.\n      It should be a tuple of integers, e.g. `(32, 10, 100)`.\n    - Specify `shuffle=False` when calling `fit()`.\n\n  To reset the states of your model, call `.reset_states()` on either\n  a specific layer, or on your entire model.\n\nNote on specifying the initial state of RNNs:\n  You can specify the initial state of RNN layers symbolically by\n  calling them with the keyword argument `initial_state`. The value of\n  `initial_state` should be a tensor or list of tensors representing\n  the initial state of the RNN layer.\n\n  You can specify the initial state of RNN layers numerically by\n  calling `reset_states` with the keyword argument `states`. The value of\n  `states` should be a numpy array or list of numpy arrays representing\n  the initial state of the RNN layer.\n\nNote on passing external constants to RNNs:\n  You can pass \"external\" constants to the cell using the `constants`\n  keyword argument of `RNN.__call__` (as well as `RNN.call`) method. This\n  requires that the `cell.call` method accepts the same keyword argument\n  `constants`. Such constants can be used to condition the cell\n  transformation on additional static inputs (not changing over time),\n  a.k.a. an attention mechanism.",
+        "description": "- If `return_state`: a list of tensors. The first tensor is\nthe output. The remaining tensors are the last states,\neach with shape `(batch_size, state_size)`, where `state_size` could\nbe a high dimension tensor shape.\n- If `return_sequences`: 3D tensor with shape\n`(batch_size, timesteps, output_size)`.\n\nMasking:\n\nThis layer supports masking for input data with a variable number\nof timesteps. To introduce masks to your data,\nuse a `keras.layers.Embedding` layer with the `mask_zero` parameter\nset to `True`.\n\nNote on using statefulness in RNNs:\n\nYou can set RNN layers to be 'stateful', which means that the states\ncomputed for the samples in one batch will be reused as initial states\nfor the samples in the next batch. This assumes a one-to-one mapping\nbetween samples in different successive batches.\n\nTo enable statefulness:\n\n- Specify `stateful=True` in the layer constructor.\n- Specify a fixed batch size for your model, by passing\nIf sequential model:\n    `batch_input_shape=(...)` to the first layer in your model.\nElse for functional model with 1 or more Input layers:\n    `batch_shape=(...)` to all the first layers in your model.\nThis is the expected shape of your inputs\n*including the batch size*.\nIt should be a tuple of integers, e.g. `(32, 10, 100)`.\n- Specify `shuffle=False` when calling `fit()`.\n\nTo reset the states of your model, call `.reset_states()` on either\na specific layer, or on your entire model.\n\nNote on specifying the initial state of RNNs:\n\nYou can specify the initial state of RNN layers symbolically by\ncalling them with the keyword argument `initial_state`. The value of\n`initial_state` should be a tensor or list of tensors representing\nthe initial state of the RNN layer.\n\nYou can specify the initial state of RNN layers numerically by\ncalling `reset_states` with the keyword argument `states`. The value of\n`states` should be a numpy array or list of numpy arrays representing\nthe initial state of the RNN layer.",
         "name": "output"
       }
     ],
     "examples": [
       {
-        "code": "from keras.src.layers import RNN\nfrom keras.src import backend\n\n# First, let's define a RNN Cell, as a layer subclass.\nclass MinimalRNNCell(keras.layers.Layer):\n\n    def __init__(self, units, **kwargs):\n        self.units = units\n        self.state_size = units\n        super(MinimalRNNCell, self).__init__(**kwargs)\n\n    def build(self, input_shape):\n        self.kernel = self.add_weight(shape=(input_shape[-1], self.units),\n                                      initializer='uniform',\n                                      name='kernel')\n        self.recurrent_kernel = self.add_weight(\n            shape=(self.units, self.units),\n            initializer='uniform',\n            name='recurrent_kernel')\n        self.built = True\n\n    def call(self, inputs, states):\n        prev_output = states[0]\n        h = backend.dot(inputs, self.kernel)\n        output = h + backend.dot(prev_output, self.recurrent_kernel)\n        return output, [output]\n\n# Let's use this cell in a RNN layer:\n\ncell = MinimalRNNCell(32)\nx = keras.Input((None, 5))\nlayer = RNN(cell)\ny = layer(x)\n\n# Here's how to use the cell to build a stacked RNN:\n\ncells = [MinimalRNNCell(32), MinimalRNNCell(64)]\nx = keras.Input((None, 5))\nlayer = RNN(cells)\ny = layer(x)"
+        "code": "from keras.layers import RNN\nfrom keras import ops\n\n# First, let's define a RNN Cell, as a layer subclass.\nclass MinimalRNNCell(keras.layers.Layer):\n\n    def __init__(self, units, **kwargs):\n        super().__init__(**kwargs)\n        self.units = units\n        self.state_size = units\n\n    def build(self, input_shape):\n        self.kernel = self.add_weight(shape=(input_shape[-1], self.units),\n                                      initializer='uniform',\n                                      name='kernel')\n        self.recurrent_kernel = self.add_weight(\n            shape=(self.units, self.units),\n            initializer='uniform',\n            name='recurrent_kernel')\n        self.built = True\n\n    def call(self, inputs, states):\n        prev_output = states[0]\n        h = ops.matmul(inputs, self.kernel)\n        output = h + ops.matmul(prev_output, self.recurrent_kernel)\n        return output, [output]\n\n# Let's use this cell in a RNN layer:\n\ncell = MinimalRNNCell(32)\nx = keras.Input((None, 5))\nlayer = RNN(cell)\ny = layer(x)\n\n# Here's how to use the cell to build a stacked RNN:\n\ncells = [MinimalRNNCell(32), MinimalRNNCell(64)]\nx = keras.Input((None, 5))\nlayer = RNN(cells)\ny = layer(x)"
       }
     ]
   },
@@ -3459,30 +3590,30 @@
     "name": "SeparableConv1D",
     "module": "tensorflow.keras.layers",
     "category": "Layer",
-    "description": "Depthwise separable 1D convolution.\n\nThis layer performs a depthwise convolution that acts separately on\nchannels, followed by a pointwise convolution that mixes channels.\nIf `use_bias` is True and a bias initializer is provided,\nit adds a bias vector to the output.\nIt then optionally applies an activation function to produce the final\noutput.",
+    "description": "1D separable convolution layer.\n\nThis layer performs a depthwise convolution that acts separately on\nchannels, followed by a pointwise convolution that mixes channels.\nIf `use_bias` is True and a bias initializer is provided,\nit adds a bias vector to the output. It then optionally applies an\nactivation function to produce the final output.",
     "attributes": [
       {
-        "description": "Integer, the dimensionality of the output space (i.e. the number\n    of filters in the convolution).",
+        "description": "int, the dimensionality of the output space (i.e. the number\n        of filters in the pointwise convolution).",
         "name": "filters"
       },
       {
-        "description": "A single integer specifying the spatial\n    dimensions of the filters.",
+        "description": "int or tuple/list of 1 integers, specifying the size of the\n        depthwise convolution window.",
         "name": "kernel_size"
       },
       {
-        "description": "A single integer specifying the strides\n    of the convolution.\n    Specifying any `stride` value != 1 is incompatible with specifying\n    any `dilation_rate` value != 1.",
+        "description": "int or tuple/list of 1 integers, specifying the stride length\n        of the depthwise convolution. If only one int is specified, the same\n        stride size will be used for all dimensions. `strides > 1` is\n        incompatible with `dilation_rate > 1`.",
         "name": "strides"
       },
       {
-        "description": "One of `\"valid\"`, `\"same\"`, or `\"causal\"` (case-insensitive).\n    `\"valid\"` means no padding. `\"same\"` results in padding with zeros\n    evenly to the left/right or up/down of the input such that output has\n    the same height/width dimension as the input. `\"causal\"` results in\n    causal (dilated) convolutions, e.g. `output[t]` does not depend on\n    `input[t+1:]`.",
+        "description": "string, either `\"valid\"` or `\"same\"` (case-insensitive).\n        `\"valid\"` means no padding. `\"same\"` results in padding evenly to\n        the left/right or up/down of the input such that output has the same\n        height/width dimension as the input.",
         "name": "padding"
       },
       {
-        "description": "A string, one of `channels_last` (default) or\n    `channels_first`.  The ordering of the dimensions in the inputs.\n    `channels_last` corresponds to inputs with shape\n    `(batch_size, length, channels)` while `channels_first` corresponds to\n    inputs with shape `(batch_size, channels, length)`.",
+        "description": "string, either `\"channels_last\"` or `\"channels_first\"`.\n        The ordering of the dimensions in the inputs. `\"channels_last\"`\n        corresponds to inputs with shape `(batch, steps, features)`\n        while `\"channels_first\"` corresponds to inputs with shape\n        `(batch, features, steps)`. It defaults to the `image_data_format`\n        value found in your Keras config file at `~/.keras/keras.json`.\n        If you never set it, then it will be `\"channels_last\"`.",
         "name": "data_format"
       },
       {
-        "description": "A single integer, specifying\n    the dilation rate to use for dilated convolution.",
+        "description": "int or tuple/list of 1 integers, specifying the dilation\n        rate to use for dilated convolution. If only one int is specified,\n        the same dilation rate will be used for all dimensions.\n     depth_multiplier: The number of depthwise convolution output channels\n        for each input channel. The total number of depthwise convolution\n        output channels will be equal to `input_channel * depth_multiplier`.",
         "name": "dilation_rate"
       },
       {
@@ -3490,51 +3621,51 @@
         "name": "depth_multiplier"
       },
       {
-        "description": "Activation function to use.\n    If you don't specify anything, no activation is applied\n    (see `keras.activations`).",
+        "description": "Activation function. If `None`, no activation is applied.",
         "name": "activation"
       },
       {
-        "description": "Boolean, whether the layer uses a bias.",
+        "description": "bool, if `True`, bias will be added to the output.",
         "name": "use_bias"
       },
       {
-        "description": "An initializer for the depthwise convolution kernel\n    (see `keras.initializers`). If None, then the default initializer\n    ('glorot_uniform') will be used.",
+        "description": "An initializer for the depthwise convolution\n        kernel. If None, then the default initializer (`\"glorot_uniform\"`)\n        will be used.",
         "name": "depthwise_initializer"
       },
       {
-        "description": "An initializer for the pointwise convolution kernel\n    (see `keras.initializers`). If None, then the default initializer\n    ('glorot_uniform') will be used.",
+        "description": "An initializer for the pointwise convolution\n        kernel. If None, then the default initializer (`\"glorot_uniform\"`)\n        will be used.",
         "name": "pointwise_initializer"
       },
       {
-        "description": "An initializer for the bias vector. If None, the default\n    initializer ('zeros') will be used (see `keras.initializers`).",
+        "description": "An initializer for the bias vector. If None, the\n        default initializer ('\"zeros\"') will be used.",
         "name": "bias_initializer"
       },
       {
-        "description": "Optional regularizer for the depthwise\n    convolution kernel (see `keras.regularizers`).",
+        "description": "Optional regularizer for the depthwise\n        convolution kernel.",
         "name": "depthwise_regularizer"
       },
       {
-        "description": "Optional regularizer for the pointwise\n    convolution kernel (see `keras.regularizers`).",
+        "description": "Optional regularizer for the pointwise\n        convolution kernel.",
         "name": "pointwise_regularizer"
       },
       {
-        "description": "Optional regularizer for the bias vector\n    (see `keras.regularizers`).",
+        "description": "Optional regularizer for the bias vector.",
         "name": "bias_regularizer"
       },
       {
-        "description": "Optional regularizer function for the output\n    (see `keras.regularizers`).",
+        "description": "Optional regularizer function for the output.",
         "name": "activity_regularizer"
       },
       {
-        "description": "Optional projection function to be applied to the\n    depthwise kernel after being updated by an `Optimizer` (e.g. used for\n    norm constraints or value constraints for layer weights). The function\n    must take as input the unprojected variable and must return the\n    projected variable (which must have the same shape). Constraints are\n    not safe to use when doing asynchronous distributed training\n    (see `keras.constraints`).",
+        "description": "Optional projection function to be applied to the\n        depthwise kernel after being updated by an `Optimizer` (e.g. used\n        for norm constraints or value constraints for layer weights). The\n        function must take as input the unprojected variable and must return\n        the projected variable (which must have the same shape).",
         "name": "depthwise_constraint"
       },
       {
-        "description": "Optional projection function to be applied to the\n    pointwise kernel after being updated by an `Optimizer`\n    (see `keras.constraints`).",
+        "description": "Optional projection function to be applied to the\n        pointwise kernel after being updated by an `Optimizer`.",
         "name": "pointwise_constraint"
       },
       {
-        "description": "Optional projection function to be applied to the\n    bias after being updated by an `Optimizer`\n    (see `keras.constraints`).",
+        "description": "Optional projection function to be applied to the\n        bias after being updated by an `Optimizer`.",
         "name": "bias_constraint"
       },
       {
@@ -3544,42 +3675,47 @@
     ],
     "inputs": [
       {
-        "description": "3D tensor with shape:\n`(batch_size, channels, steps)` if data_format='channels_first'\nor 3D tensor with shape:\n`(batch_size, steps, channels)` if data_format='channels_last'.",
+        "description": "- If `data_format=\"channels_last\"`:\n    A 3D tensor with shape: `(batch_shape, steps, channels)`\n- If `data_format=\"channels_first\"`:\n    A 3D tensor with shape: `(batch_shape, channels, steps)`",
         "name": "input"
       }
     ],
     "outputs": [
       {
-        "description": "3D tensor with shape:\n`(batch_size, filters, new_steps)` if data_format='channels_first'\nor 3D tensor with shape:\n`(batch_size,  new_steps, filters)` if data_format='channels_last'.\n`new_steps` value might have changed due to padding or strides.",
+        "description": "- If `data_format=\"channels_last\"`:\n    A 3D tensor with shape: `(batch_shape, new_steps, filters)`\n- If `data_format=\"channels_first\"`:\n    A 3D tensor with shape: `(batch_shape, filters, new_steps)`",
         "name": "output"
       }
+    ],
+    "examples": [
+      {
+        "code": ">>> x = np.random.rand(4, 10, 12)\n>>> y = keras.layers.SeparableConv1D(3, 4, 3, 2, activation='relu')(x)\n>>> print(y.shape)\n(4, 4, 4)"
+      }
     ]
   },
   {
     "name": "SeparableConv2D",
     "module": "tensorflow.keras.layers",
     "category": "Layer",
-    "description": "Depthwise separable 2D convolution.\n\nSeparable convolutions consist of first performing\na depthwise spatial convolution\n(which acts on each input channel separately)\nfollowed by a pointwise convolution which mixes the resulting\noutput channels. The `depth_multiplier` argument controls how many\noutput channels are generated per input channel in the depthwise step.\n\nIntuitively, separable convolutions can be understood as\na way to factorize a convolution kernel into two smaller kernels,\nor as an extreme version of an Inception block.",
+    "description": "2D separable convolution layer.\n\nThis layer performs a depthwise convolution that acts separately on\nchannels, followed by a pointwise convolution that mixes channels.\nIf `use_bias` is True and a bias initializer is provided,\nit adds a bias vector to the output. It then optionally applies an\nactivation function to produce the final output.",
     "attributes": [
       {
         "default": "linear",
-        "description": "Activation function to use.\n    If you don't specify anything, no activation is applied\n    (see `keras.activations`).",
+        "description": "Activation function. If `None`, no activation is applied.",
         "name": "activation"
       },
       {
         "default": "valid",
-        "description": "one of `\"valid\"` or `\"same\"` (case-insensitive).\n    `\"valid\"` means no padding. `\"same\"` results in padding with zeros\n    evenly to the left/right or up/down of the input such that output has\n    the same height/width dimension as the input.",
+        "description": "string, either `\"valid\"` or `\"same\"` (case-insensitive).\n        `\"valid\"` means no padding. `\"same\"` results in padding evenly to\n        the left/right or up/down of the input such that output has the same\n        height/width dimension as the input.",
         "name": "padding"
       },
       {
         "default": true,
-        "description": "Boolean, whether the layer uses a bias vector.",
+        "description": "bool, if `True`, bias will be added to the output.",
         "name": "use_bias",
         "visible": false
       },
       {
         "default": "channels_last",
-        "description": "A string,\n    one of `channels_last` (default) or `channels_first`.\n    The ordering of the dimensions in the inputs.\n    `channels_last` corresponds to inputs with shape\n    `(batch_size, height, width, channels)` while `channels_first`\n    corresponds to inputs with shape\n    `(batch_size, channels, height, width)`.\n    When unspecified, uses `image_data_format` value found in your Keras\n    config file at `~/.keras/keras.json` (if exists) else 'channels_last'.\n    Defaults to 'channels_last'.",
+        "description": "string, either `\"channels_last\"` or `\"channels_first\"`.\n        The ordering of the dimensions in the inputs. `\"channels_last\"`\n        corresponds to inputs with shape `(batch, steps, features)`\n        while `\"channels_first\"` corresponds to inputs with shape\n        `(batch, features, steps)`. It defaults to the `image_data_format`\n        value found in your Keras config file at `~/.keras/keras.json`.\n        If you never set it, then it will be `\"channels_last\"`.",
         "name": "data_format"
       },
       {
@@ -3587,7 +3723,7 @@
           1,
           1
         ],
-        "description": "An integer or tuple/list of 2 integers,\n    specifying the strides of the convolution along the height and width.\n    Can be a single integer to specify the same value for\n    all spatial dimensions. Current implementation only supports equal\n    length strides in the row and column dimensions.\n    Specifying any stride value != 1 is incompatible with specifying\n    any `dilation_rate` value != 1.",
+        "description": "int or tuple/list of 2 integers, specifying the stride length\n        of the depthwise convolution. If only one int is specified, the same\n        stride size will be used for all dimensions. `strides > 1` is\n        incompatible with `dilation_rate > 1`.",
         "name": "strides"
       },
       {
@@ -3595,12 +3731,12 @@
           1,
           1
         ],
-        "description": "An integer or tuple/list of 2 integers, specifying\n    the dilation rate to use for dilated convolution.",
+        "description": "int or tuple/list of 2 integers, specifying the dilation\n        rate to use for dilated convolution. If only one int is specified,\n        the same dilation rate will be used for all dimensions.",
         "name": "dilation_rate"
       },
       {
         "default": 1,
-        "description": "The number of depthwise convolution output channels\n    for each input channel.\n    The total number of depthwise convolution output\n    channels will be equal to `filters_in * depth_multiplier`.",
+        "description": "The number of depthwise convolution output channels\n        for each input channel. The total number of depthwise convolution\n        output channels will be equal to `input_channel * depth_multiplier`.",
         "name": "depth_multiplier"
       },
       {
@@ -3613,7 +3749,7 @@
             "seed": null
           }
         },
-        "description": "An initializer for the pointwise convolution kernel\n    (see `keras.initializers`). If None, then the default initializer\n    ('glorot_uniform') will be used.",
+        "description": "An initializer for the pointwise convolution\n        kernel. If None, then the default initializer (`\"glorot_uniform\"`)\n        will be used.",
         "name": "pointwise_initializer",
         "visible": false
       },
@@ -3627,7 +3763,7 @@
             "seed": null
           }
         },
-        "description": "An initializer for the depthwise convolution kernel\n    (see `keras.initializers`). If None, then the default initializer\n    ('glorot_uniform') will be used.",
+        "description": "An initializer for the depthwise convolution\n        kernel. If None, then the default initializer (`\"glorot_uniform\"`)\n        will be used.",
         "name": "depthwise_initializer",
         "visible": false
       },
@@ -3636,7 +3772,7 @@
           "class_name": "Zeros",
           "config": {}
         },
-        "description": "An initializer for the bias vector. If None, the default\n    initializer ('zeros') will be used (see `keras.initializers`).",
+        "description": "An initializer for the bias vector. If None, the\n        default initializer ('\"zeros\"') will be used.",
         "name": "bias_initializer",
         "visible": false
       },
@@ -3654,50 +3790,50 @@
         "visible": false
       },
       {
-        "description": "Integer, the dimensionality of the output space\n    (i.e. the number of output filters in the convolution).",
+        "description": "int, the dimensionality of the output space (i.e. the number\n        of filters in the pointwise convolution).",
         "name": "filters"
       },
       {
-        "description": "An integer or tuple/list of 2 integers, specifying the\n    height and width of the 2D convolution window.\n    Can be a single integer to specify the same value for\n    all spatial dimensions.",
+        "description": "int or tuple/list of 2 integers, specifying the size of the\n        depthwise convolution window.",
         "name": "kernel_size"
       },
       {
-        "description": "Regularizer function applied to\n    the depthwise kernel matrix (see `keras.regularizers`).",
+        "description": "Optional regularizer for the depthwise\n        convolution kernel.",
         "name": "depthwise_regularizer",
         "visible": false
       },
       {
-        "description": "Regularizer function applied to\n    the pointwise kernel matrix (see `keras.regularizers`).",
+        "description": "Optional regularizer for the pointwise\n        convolution kernel.",
         "name": "pointwise_regularizer",
         "visible": false
       },
       {
-        "description": "Regularizer function applied to the bias vector\n    (see `keras.regularizers`).",
+        "description": "Optional regularizer for the bias vector.",
         "name": "bias_regularizer",
         "visible": false
       },
       {
-        "description": "Regularizer function applied to\n    the output of the layer (its \"activation\")\n    (see `keras.regularizers`).",
+        "description": "Optional regularizer function for the output.",
         "name": "activity_regularizer",
         "visible": false
       },
       {
-        "description": "Constraint function applied to\n    the depthwise kernel matrix\n    (see `keras.constraints`).",
+        "description": "Optional projection function to be applied to the\n        depthwise kernel after being updated by an `Optimizer` (e.g. used\n        for norm constraints or value constraints for layer weights). The\n        function must take as input the unprojected variable and must return\n        the projected variable (which must have the same shape).",
         "name": "depthwise_constraint",
         "visible": false
       },
       {
-        "description": "Constraint function applied to\n    the pointwise kernel matrix\n    (see `keras.constraints`).",
+        "description": "Optional projection function to be applied to the\n        pointwise kernel after being updated by an `Optimizer`.",
         "name": "pointwise_constraint"
       },
       {
-        "description": "Constraint function applied to the bias vector\n    (see `keras.constraints`).",
+        "description": "Optional projection function to be applied to the\n        bias after being updated by an `Optimizer`.",
         "name": "bias_constraint"
       }
     ],
     "inputs": [
       {
-        "description": "4D tensor with shape:\n`(batch_size, channels, rows, cols)` if data_format='channels_first'\nor 4D tensor with shape:\n`(batch_size, rows, cols, channels)` if data_format='channels_last'.",
+        "description": "- If `data_format=\"channels_last\"`:\n    A 4D tensor with shape: `(batch_size, height, width, channels)`\n- If `data_format=\"channels_first\"`:\n    A 4D tensor with shape: `(batch_size, channels, height, width)`",
         "name": "input"
       },
       {
@@ -3709,9 +3845,14 @@
     ],
     "outputs": [
       {
-        "description": "4D tensor with shape:\n`(batch_size, filters, new_rows, new_cols)` if\ndata_format='channels_first'\nor 4D tensor with shape:\n`(batch_size, new_rows, new_cols, filters)` if\ndata_format='channels_last'.  `rows` and `cols` values might have changed\ndue to padding.",
+        "description": "- If `data_format=\"channels_last\"`:\n    A 4D tensor with shape: `(batch_size, new_height, new_width, filters)`\n- If `data_format=\"channels_first\"`:\n    A 4D tensor with shape: `(batch_size, filters, new_height, new_width)`",
         "name": "output"
       }
+    ],
+    "examples": [
+      {
+        "code": ">>> x = np.random.rand(4, 10, 10, 12)\n>>> y = keras.layers.SeparableConv2D(3, 4, 3, 2, activation='relu')(x)\n>>> print(y.shape)\n(4, 4, 4, 4)"
+      }
     ]
   },
   {
@@ -3722,41 +3863,41 @@
     "name": "SimpleRNN",
     "module": "tensorflow.keras.layers",
     "category": "Layer",
-    "description": "Fully-connected RNN where the output is to be fed back to input.\n\nSee [the Keras RNN API guide](https://www.tensorflow.org/guide/keras/rnn)\nfor details about the usage of RNN API.",
+    "description": "Fully-connected RNN where the output is to be fed back as the new input.",
     "attributes": [
       {
         "default": false,
-        "description": "Boolean. Whether to return the last output\n    in the output sequence, or the full sequence. Default: `False`.",
+        "description": "Boolean. Whether to return the last output\n        in the output sequence, or the full sequence. Default: `False`.",
         "name": "return_sequences"
       },
       {
         "default": false,
-        "description": "Boolean. Whether to return the last state\n    in addition to the output. Default: `False`",
+        "description": "Boolean. Whether to return the last state\n        in addition to the output. Default: `False`.",
         "name": "return_state"
       },
       {
         "default": false,
-        "description": "Boolean (default False).\n    If True, process the input sequence backwards and return the\n    reversed sequence.",
+        "description": "Boolean (default: `False`).\n        If `True`, process the input sequence backwards and return the\n        reversed sequence.",
         "name": "go_backwards"
       },
       {
         "default": false,
-        "description": "Boolean (default False). If True, the last state\n    for each sample at index i in a batch will be used as initial\n    state for the sample of index i in the following batch.",
+        "description": "Boolean (default: `False`). If `True`, the last state\n        for each sample at index i in a batch will be used as initial\n        state for the sample of index i in the following batch.",
         "name": "stateful"
       },
       {
         "default": false,
-        "description": "Boolean (default False).\n    If True, the network will be unrolled,\n    else a symbolic loop will be used.\n    Unrolling can speed-up a RNN,\n    although it tends to be more memory-intensive.\n    Unrolling is only suitable for short sequences.",
+        "description": "Boolean (default: `False`).\n        If `True`, the network will be unrolled,\n        else a symbolic loop will be used.\n        Unrolling can speed-up a RNN,\n        although it tends to be more memory-intensive.\n        Unrolling is only suitable for short sequences.",
         "name": "unroll"
       },
       {
         "default": "tanh",
-        "description": "Activation function to use.",
+        "description": "Activation function to use.\n        Default: hyperbolic tangent (`tanh`).\n        If you pass None, no activation is applied\n        (ie. \"linear\" activation: `a(x) = x`).",
         "name": "activation"
       },
       {
         "default": true,
-        "description": "Boolean, (default `True`), whether the layer uses a bias vector.",
+        "description": "Boolean, (default `True`), whether the layer uses\n        a bias vector.",
         "name": "use_bias",
         "visible": false
       },
@@ -3770,7 +3911,7 @@
             "seed": null
           }
         },
-        "description": "Initializer for the `kernel` weights matrix,\n    used for the linear transformation of the inputs. Default:\n    `glorot_uniform`.",
+        "description": "Initializer for the `kernel` weights matrix,\n        used for the linear transformation of the inputs. Default:\n        `\"glorot_uniform\"`.",
         "name": "kernel_initializer",
         "visible": false
       },
@@ -3782,7 +3923,7 @@
             "seed": null
           }
         },
-        "description": "Initializer for the `recurrent_kernel`\n    weights matrix, used for the linear transformation of the recurrent\n    state.  Default: `orthogonal`.",
+        "description": "Initializer for the `recurrent_kernel`\n        weights matrix, used for the linear transformation of the recurrent\n        state.  Default: `\"orthogonal\"`.",
         "name": "recurrent_initializer",
         "visible": false
       },
@@ -3791,18 +3932,18 @@
           "class_name": "Zeros",
           "config": {}
         },
-        "description": "Initializer for the bias vector. Default: `zeros`.",
+        "description": "Initializer for the bias vector. Default: `\"zeros\"`.",
         "name": "bias_initializer",
         "visible": false
       },
       {
         "default": 0,
-        "description": "Float between 0 and 1.\n    Fraction of the units to drop for the linear transformation of the\n    inputs. Default: 0.",
+        "description": "Float between 0 and 1.\n        Fraction of the units to drop for the linear transformation\n        of the inputs. Default: 0.",
         "name": "dropout"
       },
       {
         "default": 0,
-        "description": "Float between 0 and 1.\n    Fraction of the units to drop for the linear transformation of the\n    recurrent state. Default: 0.",
+        "description": "Float between 0 and 1.\n        Fraction of the units to drop for the linear transformation of the\n        recurrent state. Default: 0.",
         "name": "recurrent_dropout"
       },
       {
@@ -3810,35 +3951,35 @@
         "name": "units"
       },
       {
-        "description": "Regularizer function applied to the `kernel` weights\n    matrix. Default: `None`.",
+        "description": "Regularizer function applied to the `kernel` weights\n        matrix. Default: `None`.",
         "name": "kernel_regularizer",
         "visible": false
       },
       {
-        "description": "Regularizer function applied to the\n    `recurrent_kernel` weights matrix. Default: `None`.",
+        "description": "Regularizer function applied to the\n        `recurrent_kernel` weights matrix. Default: `None`.",
         "name": "recurrent_regularizer",
         "visible": false
       },
       {
-        "description": "Regularizer function applied to the bias vector.",
+        "description": "Regularizer function applied to the bias vector.\n        Default: `None`.",
         "name": "bias_regularizer",
         "visible": false
       },
       {
-        "description": "Regularizer function applied to the output of the\n    layer (its \"activation\"). Default: `None`.",
+        "description": "Regularizer function applied to the output of the\n        layer (its \"activation\"). Default: `None`.",
         "name": "activity_regularizer",
         "visible": false
       },
       {
-        "description": "Constraint function applied to the `kernel` weights\n    matrix. Default: `None`.",
+        "description": "Constraint function applied to the `kernel` weights\n        matrix. Default: `None`.",
         "name": "kernel_constraint"
       },
       {
-        "description": "Constraint function applied to the\n    `recurrent_kernel` weights matrix.  Default: `None`.",
+        "description": "Constraint function applied to the\n        `recurrent_kernel` weights matrix.  Default: `None`.",
         "name": "recurrent_constraint"
       },
       {
-        "description": "Constraint function applied to the bias vector. Default:\n    `None`.",
+        "description": "Constraint function applied to the bias vector.\n        Default: `None`.",
         "name": "bias_constraint"
       },
       {
@@ -3867,88 +4008,92 @@
     ],
     "examples": [
       {
-        "code": "inputs = np.random.random([32, 10, 8]).astype(np.float32)\nsimple_rnn = tf.keras.layers.SimpleRNN(4)\n\noutput = simple_rnn(inputs)  # The output has shape `[32, 4]`.\n\nsimple_rnn = tf.keras.layers.SimpleRNN(\n    4, return_sequences=True, return_state=True)\n\n# whole_sequence_output has shape `[32, 10, 4]`.\n# final_state has shape `[32, 4]`.\nwhole_sequence_output, final_state = simple_rnn(inputs)"
+        "code": "inputs = np.random.random((32, 10, 8))\nsimple_rnn = keras.layers.SimpleRNN(4)\noutput = simple_rnn(inputs)  # The output has shape `(32, 4)`.\nsimple_rnn = keras.layers.SimpleRNN(\n    4, return_sequences=True, return_state=True\n)\n# whole_sequence_output has shape `(32, 10, 4)`.\n# final_state has shape `(32, 4)`.\nwhole_sequence_output, final_state = simple_rnn(inputs)"
       }
     ]
   },
   {
     "name": "SimpleRNNCell",
     "module": "tensorflow.keras.layers",
-    "description": "Cell class for SimpleRNN.\n\nSee [the Keras RNN API guide](https://www.tensorflow.org/guide/keras/rnn)\nfor details about the usage of RNN API.\n\nThis class processes one step within the whole time sequence input, whereas\n`tf.keras.layer.SimpleRNN` processes the whole sequence.",
+    "description": "Cell class for SimpleRNN.\n\nThis class processes one step within the whole time sequence input, whereas\n`keras.layer.SimpleRNN` processes the whole sequence.",
     "attributes": [
       {
         "description": "Positive integer, dimensionality of the output space.",
         "name": "units"
       },
       {
-        "description": "Activation function to use.",
+        "description": "Activation function to use.\n        Default: hyperbolic tangent (`tanh`).\n        If you pass `None`, no activation is applied\n        (ie. \"linear\" activation: `a(x) = x`).",
         "name": "activation"
       },
       {
-        "description": "Boolean, (default `True`), whether the layer uses a bias vector.",
+        "description": "Boolean, (default `True`), whether the layer\n        should use a bias vector.",
         "name": "use_bias",
         "visible": false
       },
       {
-        "description": "Initializer for the `kernel` weights matrix,\n    used for the linear transformation of the inputs. Default:\n    `glorot_uniform`.",
+        "description": "Initializer for the `kernel` weights matrix,\n        used for the linear transformation of the inputs. Default:\n        `\"glorot_uniform\"`.",
         "name": "kernel_initializer",
         "visible": false
       },
       {
-        "description": "Initializer for the `recurrent_kernel`\n    weights matrix, used for the linear transformation of the recurrent\n    state.  Default: `orthogonal`.",
+        "description": "Initializer for the `recurrent_kernel`\n        weights matrix, used for the linear transformation\n        of the recurrent state. Default: `\"orthogonal\"`.",
         "name": "recurrent_initializer",
         "visible": false
       },
       {
-        "description": "Initializer for the bias vector. Default: `zeros`.",
+        "description": "Initializer for the bias vector. Default: `\"zeros\"`.",
         "name": "bias_initializer",
         "visible": false
       },
       {
-        "description": "Regularizer function applied to the `kernel` weights\n    matrix. Default: `None`.",
+        "description": "Regularizer function applied to the `kernel` weights\n        matrix. Default: `None`.",
         "name": "kernel_regularizer",
         "visible": false
       },
       {
-        "description": "Regularizer function applied to the\n    `recurrent_kernel` weights matrix. Default: `None`.",
+        "description": "Regularizer function applied to the\n        `recurrent_kernel` weights matrix. Default: `None`.",
         "name": "recurrent_regularizer",
         "visible": false
       },
       {
-        "description": "Regularizer function applied to the bias vector.",
+        "description": "Regularizer function applied to the bias vector.\n        Default: `None`.",
         "name": "bias_regularizer",
         "visible": false
       },
       {
-        "description": "Constraint function applied to the `kernel` weights\n    matrix. Default: `None`.",
+        "description": "Constraint function applied to the `kernel` weights\n        matrix. Default: `None`.",
         "name": "kernel_constraint"
       },
       {
-        "description": "Constraint function applied to the\n    `recurrent_kernel` weights matrix. Default: `None`.",
+        "description": "Constraint function applied to the\n        `recurrent_kernel` weights matrix. Default: `None`.",
         "name": "recurrent_constraint"
       },
       {
-        "description": "Constraint function applied to the bias vector. Default:\n    `None`.",
+        "description": "Constraint function applied to the bias vector.\n        Default: `None`.",
         "name": "bias_constraint"
       },
       {
         "default": 0,
-        "description": "Float between 0 and 1. Fraction of the units to drop for the\n    linear transformation of the inputs. Default: 0.",
+        "description": "Float between 0 and 1. Fraction of the units to drop for the\n        linear transformation of the inputs. Default: 0.",
         "name": "dropout"
       },
       {
         "default": 0,
-        "description": "Float between 0 and 1. Fraction of the units to drop\n    for the linear transformation of the recurrent state. Default: 0.",
+        "description": "Float between 0 and 1. Fraction of the units to drop\n        for the linear transformation of the recurrent state. Default: 0.",
         "name": "recurrent_dropout"
       },
       {
         "description": "`None`.",
         "name": "Default"
+      },
+      {
+        "name": "seed",
+        "description": "Random seed for dropout."
       }
     ],
     "examples": [
       {
-        "code": "inputs = np.random.random([32, 10, 8]).astype(np.float32)\nrnn = tf.keras.layers.RNN(tf.keras.layers.SimpleRNNCell(4))\n\noutput = rnn(inputs)  # The output has shape `[32, 4]`.\n\nrnn = tf.keras.layers.RNN(\n    tf.keras.layers.SimpleRNNCell(4),\n    return_sequences=True,\n    return_state=True)\n\n# whole_sequence_output has shape `[32, 10, 4]`.\n# final_state has shape `[32, 4]`.\nwhole_sequence_output, final_state = rnn(inputs)"
+        "code": "inputs = np.random.random([32, 10, 8]).astype(np.float32)\nrnn = keras.layers.RNN(keras.layers.SimpleRNNCell(4))\noutput = rnn(inputs)  # The output has shape `(32, 4)`.\nrnn = keras.layers.RNN(\n    keras.layers.SimpleRNNCell(4),\n    return_sequences=True,\n    return_state=True\n)\n# whole_sequence_output has shape `(32, 10, 4)`.\n# final_state has shape `(32, 4)`.\nwhole_sequence_output, final_state = rnn(inputs)"
       }
     ]
   },
@@ -3956,7 +4101,7 @@
     "name": "Softmax",
     "module": "tensorflow.keras.layers",
     "category": "Activation",
-    "description": "Softmax activation function.\n\nExample without mask:\n\n```\n>>> inp = np.asarray([[1., 2., 1.]])\n>>> layer = tf.keras.layers.Softmax()\n>>> layer(inp).numpy()\narray([[0.21194157, 0.5761169 , 0.21194157]], dtype=float32)\n>>> mask = np.asarray([[True, False, True]], dtype=bool)\n>>> layer(inp, mask).numpy()\narray([[0.5, 0. , 0.5]], dtype=float32)\n```",
+    "description": "Softmax activation layer.\n\nFormula:\n``` python\nexp_x = exp(x - max(x))\nf(x) = exp_x / sum(exp_x)\n```",
     "inputs": [
       {
         "name": "input",
@@ -3973,6 +4118,15 @@
       {
         "name": "axis",
         "description": "Integer, or list of Integers, axis along which the softmax\n        normalization is applied."
+      },
+      {
+        "name": "**kwargs",
+        "description": "Base layer keyword arguments, such as `name` and `dtype`."
+      }
+    ],
+    "examples": [
+      {
+        "code": ">>>softmax_layer = keras.layers.activations.Softmax()\n>>>input = np.array([1.0, 2.0, 1.0])\n>>>result = softmax_layer(input)\n[0.21194157, 0.5761169, 0.21194157]"
       }
     ]
   },
@@ -3988,7 +4142,7 @@
     "name": "SpatialDropout1D",
     "module": "tensorflow.keras.layers",
     "category": "Dropout",
-    "description": "Spatial 1D version of Dropout.\n\nThis version performs the same function as Dropout, however, it drops\nentire 1D feature maps instead of individual elements. If adjacent frames\nwithin feature maps are strongly correlated (as is normally the case in\nearly convolution layers) then regular dropout will not regularize the\nactivations and will otherwise just result in an effective learning rate\ndecrease. In this case, SpatialDropout1D will help promote independence\nbetween feature maps and should be used instead.",
+    "description": "Spatial 1D version of Dropout.\n\nThis layer performs the same function as Dropout, however, it drops\nentire 1D feature maps instead of individual elements. If adjacent frames\nwithin feature maps are strongly correlated (as is normally the case in\nearly convolution layers) then regular dropout will not regularize the\nactivations and will otherwise just result in an effective learning rate\ndecrease. In this case, `SpatialDropout1D` will help promote independence\nbetween feature maps and should be used instead.",
     "attributes": [
       {
         "description": "Float between 0 and 1. Fraction of the input units to drop.",
@@ -3997,7 +4151,7 @@
     ],
     "inputs": [
       {
-        "description": "3D tensor with shape: `(samples, timesteps, channels)`\nOutput shape: Same as input.\nReferences: - [Efficient Object Localization Using Convolutional\n    Networks](https://arxiv.org/abs/1411.4280)",
+        "description": "3D tensor with shape: `(samples, timesteps, channels)`\n\nOutput shape: Same as input.\n\nReference:\n\n- [Tompson et al., 2014](https://arxiv.org/abs/1411.4280)",
         "name": "input"
       }
     ],
@@ -4017,20 +4171,20 @@
     "name": "SpatialDropout2D",
     "module": "tensorflow.keras.layers",
     "category": "Dropout",
-    "description": "Spatial 2D version of Dropout.\n\nThis version performs the same function as Dropout, however, it drops\nentire 2D feature maps instead of individual elements. If adjacent pixels\nwithin feature maps are strongly correlated (as is normally the case in\nearly convolution layers) then regular dropout will not regularize the\nactivations and will otherwise just result in an effective learning rate\ndecrease. In this case, SpatialDropout2D will help promote independence\nbetween feature maps and should be used instead.",
+    "description": "Spatial 2D version of Dropout.\n\nThis version performs the same function as Dropout, however, it drops\nentire 2D feature maps instead of individual elements. If adjacent pixels\nwithin feature maps are strongly correlated (as is normally the case in\nearly convolution layers) then regular dropout will not regularize the\nactivations and will otherwise just result in an effective learning rate\ndecrease. In this case, `SpatialDropout2D` will help promote independence\nbetween feature maps and should be used instead.",
     "attributes": [
       {
         "description": "Float between 0 and 1. Fraction of the input units to drop.",
         "name": "rate"
       },
       {
-        "description": "'channels_first' or 'channels_last'. In 'channels_first'\n    mode, the channels dimension (the depth) is at index 1, in\n    'channels_last' mode is it at index 3. When unspecified, uses\n    `image_data_format` value found in your Keras config file at\n    `~/.keras/keras.json` (if exists) else 'channels_last'.\n    Defaults to 'channels_last'.",
+        "description": "`\"channels_first\"` or `\"channels_last\"`.\n        In `\"channels_first\"` mode, the channels dimension (the depth)\n        is at index 1, in `\"channels_last\"` mode is it at index 3.\n        It defaults to the `image_data_format` value found in your\n        Keras config file at `~/.keras/keras.json`.\n        If you never set it, then it will be `\"channels_last\"`.",
         "name": "data_format"
       }
     ],
     "inputs": [
       {
-        "description": "4D tensor with shape: `(samples, channels, rows, cols)` if\n    data_format='channels_first'\n  or 4D tensor with shape: `(samples, rows, cols, channels)` if\n    data_format='channels_last'.\nOutput shape: Same as input.\nReferences: - [Efficient Object Localization Using Convolutional\n    Networks](https://arxiv.org/abs/1411.4280)",
+        "description": "4D tensor with shape: `(samples, channels, rows, cols)` if\n        data_format='channels_first'\n    or 4D tensor with shape: `(samples, rows, cols, channels)` if\n        data_format='channels_last'.\n\nOutput shape: Same as input.\n\nReference:\n\n- [Tompson et al., 2014](https://arxiv.org/abs/1411.4280)",
         "name": "input"
       }
     ],
@@ -4057,13 +4211,13 @@
         "name": "rate"
       },
       {
-        "description": "'channels_first' or 'channels_last'. In 'channels_first'\n    mode, the channels dimension (the depth) is at index 1, in\n    'channels_last' mode is it at index 4. When unspecified, uses\n    `image_data_format` value found in your Keras config file at\n    `~/.keras/keras.json` (if exists) else 'channels_last'.\n    Defaults to 'channels_last'.",
+        "description": "`\"channels_first\"` or `\"channels_last\"`.\n        In `\"channels_first\"` mode, the channels dimension (the depth)\n        is at index 1, in `\"channels_last\"` mode is it at index 4.\n        It defaults to the `image_data_format` value found in your\n        Keras config file at `~/.keras/keras.json`.\n        If you never set it, then it will be `\"channels_last\"`.",
         "name": "data_format"
       }
     ],
     "inputs": [
       {
-        "description": "5D tensor with shape: `(samples, channels, dim1, dim2, dim3)` if\n    data_format='channels_first'\n  or 5D tensor with shape: `(samples, dim1, dim2, dim3, channels)` if\n    data_format='channels_last'.\nOutput shape: Same as input.\nReferences: - [Efficient Object Localization Using Convolutional\n    Networks](https://arxiv.org/abs/1411.4280)",
+        "description": "5D tensor with shape: `(samples, channels, dim1, dim2, dim3)` if\n        data_format='channels_first'\n    or 5D tensor with shape: `(samples, dim1, dim2, dim3, channels)` if\n        data_format='channels_last'.\n\nOutput shape: Same as input.\n\nReference:\n\n- [Tompson et al., 2014](https://arxiv.org/abs/1411.4280)",
         "name": "input"
       }
     ],
@@ -4091,14 +4245,14 @@
     ],
     "examples": [
       {
-        "code": "batch_size = 3\nsentence_max_length = 5\nn_features = 2\nnew_shape = (batch_size, sentence_max_length, n_features)\nx = tf.constant(np.reshape(np.arange(30), new_shape), dtype = tf.float32)\n\nrnn_cells = [tf.keras.layers.LSTMCell(128) for _ in range(2)]\nstacked_lstm = tf.keras.layers.StackedRNNCells(rnn_cells)\nlstm_layer = tf.keras.layers.RNN(stacked_lstm)\n\nresult = lstm_layer(x)"
+        "code": "batch_size = 3\nsentence_length = 5\nnum_features = 2\nnew_shape = (batch_size, sentence_length, num_features)\nx = np.reshape(np.arange(30), new_shape)\n\nrnn_cells = [keras.layers.LSTMCell(128) for _ in range(2)]\nstacked_lstm = keras.layers.StackedRNNCells(rnn_cells)\nlstm_layer = keras.layers.RNN(stacked_lstm)\n\nresult = lstm_layer(x)"
       }
     ]
   },
   {
     "name": "Subtract",
     "module": "tensorflow.keras.layers",
-    "description": "Layer that subtracts two inputs.\n\nIt takes as input a list of tensors of size 2, both of the same shape, and\nreturns a single tensor, (inputs[0] - inputs[1]), also of the same shape.",
+    "description": "Performs elementwise subtraction.\n\nIt takes as input a list of tensors of size 2 both of the\nsame shape, and returns a single tensor (inputs[0] - inputs[1])\nof same shape.",
     "inputs": [
       {
         "name": "x"
@@ -4114,7 +4268,11 @@
     ],
     "examples": [
       {
-        "code": "    import keras.src as keras\n\n    input1 = keras.layers.Input(shape=(16,))\n    x1 = keras.layers.Dense(8, activation='relu')(input1)\n    input2 = keras.layers.Input(shape=(32,))\n    x2 = keras.layers.Dense(8, activation='relu')(input2)\n    # Equivalent to subtracted = keras.layers.subtract([x1, x2])\n    subtracted = keras.layers.Subtract()([x1, x2])\n\n    out = keras.layers.Dense(4)(subtracted)\n    model = keras.models.Model(inputs=[input1, input2], outputs=out)"
+        "code": ">>> input_shape = (2, 3, 4)\n>>> x1 = np.random.rand(*input_shape)\n>>> x2 = np.random.rand(*input_shape)\n>>> y = keras.layers.Subtract()([x1, x2])"
+      },
+      {
+        "summary": "Usage in a Keras model:",
+        "code": ">>> input1 = keras.layers.Input(shape=(16,))\n>>> x1 = keras.layers.Dense(8, activation='relu')(input1)\n>>> input2 = keras.layers.Input(shape=(32,))\n>>> x2 = keras.layers.Dense(8, activation='relu')(input2)\n>>> # equivalent to `subtracted = keras.layers.subtract([x1, x2])`\n>>> subtracted = keras.layers.Subtract()([x1, x2])\n>>> out = keras.layers.Dense(4)(subtracted)\n>>> model = keras.models.Model(inputs=[input1, input2], outputs=out)"
       }
     ]
   },
@@ -4155,10 +4313,10 @@
     "name": "TimeDistributed",
     "module": "tensorflow.keras.layers",
     "category": "Wrapper",
-    "description": "This wrapper allows to apply a layer to every temporal slice of an input.\n\nEvery input should be at least 3D, and the dimension of index one of the\nfirst input will be considered to be the temporal dimension.\n\nConsider a batch of 32 video samples, where each sample is a 128x128 RGB\nimage with `channels_last` data format, across 10 timesteps.\nThe batch input shape is `(32, 10, 128, 128, 3)`.\n\nYou can then use `TimeDistributed` to apply the same `Conv2D` layer to each\nof the 10 timesteps, independently:\n\n```\n>>> inputs = tf.keras.Input(shape=(10, 128, 128, 3))\n>>> conv_2d_layer = tf.keras.layers.Conv2D(64, (3, 3))\n>>> outputs = tf.keras.layers.TimeDistributed(conv_2d_layer)(inputs)\n>>> outputs.shape\nTensorShape([None, 10, 126, 126, 64])\n```\n\nBecause `TimeDistributed` applies the same instance of `Conv2D` to each of\nthe timestamps, the same set of weights are used at each timestamp.",
+    "description": "This wrapper allows to apply a layer to every temporal slice of an input.\n\nEvery input should be at least 3D, and the dimension of index one of the\nfirst input will be considered to be the temporal dimension.\n\nConsider a batch of 32 video samples, where each sample is a 128x128 RGB\nimage with `channels_last` data format, across 10 timesteps.\nThe batch input shape is `(32, 10, 128, 128, 3)`.\n\nYou can then use `TimeDistributed` to apply the same `Conv2D` layer to each\nof the 10 timesteps, independently:\n\n```\n>>> inputs = layers.Input(shape=(10, 128, 128, 3), batch_size=32)\n>>> conv_2d_layer = layers.Conv2D(64, (3, 3))\n>>> outputs = layers.TimeDistributed(conv_2d_layer)(inputs)\n>>> outputs.shape\n(32, 10, 126, 126, 64)\n```\n\nBecause `TimeDistributed` applies the same instance of `Conv2D` to each of\nthe timestamps, the same set of weights are used at each timestamp.",
     "attributes": [
       {
-        "description": "a `tf.keras.layers.Layer` instance.",
+        "description": "a `keras.layers.Layer` instance.",
         "name": "layer"
       }
     ],
@@ -4202,7 +4360,7 @@
     ],
     "examples": [
       {
-        "code": ">>> input_shape = (2, 2, 3)\n>>> x = np.arange(np.prod(input_shape)).reshape(input_shape)\n>>> print(x)\n[[[ 0  1  2]\n  [ 3  4  5]]\n [[ 6  7  8]\n  [ 9 10 11]]]\n>>> y = tf.keras.layers.UpSampling1D(size=2)(x)\n>>> print(y)\ntf.Tensor(\n  [[[ 0  1  2]\n    [ 0  1  2]\n    [ 3  4  5]\n    [ 3  4  5]]\n   [[ 6  7  8]\n    [ 6  7  8]\n    [ 9 10 11]\n    [ 9 10 11]]], shape=(2, 4, 3), dtype=int64)"
+        "code": ">>> input_shape = (2, 2, 3)\n>>> x = np.arange(np.prod(input_shape)).reshape(input_shape)\n>>> x\n[[[ 0  1  2]\n  [ 3  4  5]]\n [[ 6  7  8]\n  [ 9 10 11]]]\n>>> y = keras.layers.UpSampling1D(size=2)(x)\n>>> y\n[[[ 0.  1.  2.]\n  [ 0.  1.  2.]\n  [ 3.  4.  5.]\n  [ 3.  4.  5.]]"
       }
     ]
   },
@@ -4210,19 +4368,19 @@
     "name": "UpSampling2D",
     "module": "tensorflow.keras.layers",
     "category": "Layer",
-    "description": "Upsampling layer for 2D inputs.\n\nRepeats the rows and columns of the data\nby `size[0]` and `size[1]` respectively.",
+    "description": "Upsampling layer for 2D inputs.\n\nThe implementation uses interpolative resizing, given the resize method\n(specified by the `interpolation` argument). Use `interpolation=nearest`\nto repeat the rows and columns of the data.",
     "attributes": [
       {
         "default": "channels_last",
-        "description": "A string,\n    one of `channels_last` (default) or `channels_first`.\n    The ordering of the dimensions in the inputs.\n    `channels_last` corresponds to inputs with shape\n    `(batch_size, height, width, channels)` while `channels_first`\n    corresponds to inputs with shape\n    `(batch_size, channels, height, width)`.\n    When unspecified, uses\n    `image_data_format` value found in your Keras config file at\n     `~/.keras/keras.json` (if exists) else 'channels_last'.\n    Defaults to 'channels_last'.",
+        "description": "A string,\n        one of `\"channels_last\"` (default) or `\"channels_first\"`.\n        The ordering of the dimensions in the inputs.\n        `\"channels_last\"` corresponds to inputs with shape\n        `(batch_size, height, width, channels)` while `\"channels_first\"`\n        corresponds to inputs with shape\n        `(batch_size, channels, height, width)`.\n        When unspecified, uses\n        `image_data_format` value found in your Keras config file at\n        `~/.keras/keras.json` (if exists) else `\"channels_last\"`.\n        Defaults to `\"channels_last\"`.",
         "name": "data_format"
       },
       {
-        "description": "Int, or tuple of 2 integers.\n    The upsampling factors for rows and columns.",
+        "description": "Int, or tuple of 2 integers.\n        The upsampling factors for rows and columns.",
         "name": "size"
       },
       {
-        "description": "A string, one of `\"area\"`, `\"bicubic\"`, `\"bilinear\"`,\n    `\"gaussian\"`, `\"lanczos3\"`, `\"lanczos5\"`, `\"mitchellcubic\"`,\n    `\"nearest\"`.",
+        "description": "A string, one of `\"bicubic\"`, `\"bilinear\"`, `\"lanczos3\"`,\n        `\"lanczos5\"`, `\"nearest\"`.",
         "name": "interpolation"
       }
     ],
@@ -4240,7 +4398,7 @@
     ],
     "examples": [
       {
-        "code": ">>> input_shape = (2, 2, 1, 3)\n>>> x = np.arange(np.prod(input_shape)).reshape(input_shape)\n>>> print(x)\n[[[[ 0  1  2]]\n  [[ 3  4  5]]]\n [[[ 6  7  8]]\n  [[ 9 10 11]]]]\n>>> y = tf.keras.layers.UpSampling2D(size=(1, 2))(x)\n>>> print(y)\ntf.Tensor(\n  [[[[ 0  1  2]\n     [ 0  1  2]]\n    [[ 3  4  5]\n     [ 3  4  5]]]\n   [[[ 6  7  8]\n     [ 6  7  8]]\n    [[ 9 10 11]\n     [ 9 10 11]]]], shape=(2, 2, 2, 3), dtype=int64)"
+        "code": ">>> input_shape = (2, 2, 1, 3)\n>>> x = np.arange(np.prod(input_shape)).reshape(input_shape)\n>>> print(x)\n[[[[ 0  1  2]]\n  [[ 3  4  5]]]\n [[[ 6  7  8]]\n  [[ 9 10 11]]]]\n>>> y = keras.layers.UpSampling2D(size=(1, 2))(x)\n>>> print(y)\n[[[[ 0  1  2]\n   [ 0  1  2]]\n  [[ 3  4  5]\n   [ 3  4  5]]]\n [[[ 6  7  8]\n   [ 6  7  8]]\n  [[ 9 10 11]\n   [ 9 10 11]]]]"
       }
     ]
   },
@@ -4252,11 +4410,11 @@
     "attributes": [
       {
         "default": "channels_last",
-        "description": "A string,\n    one of `channels_last` (default) or `channels_first`.\n    The ordering of the dimensions in the inputs.\n    `channels_last` corresponds to inputs with shape\n    `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)`\n    while `channels_first` corresponds to inputs with shape\n    `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`.\n    When unspecified, uses\n    `image_data_format` value found in your Keras config file at\n     `~/.keras/keras.json` (if exists) else 'channels_last'.\n    Defaults to 'channels_last'.",
+        "description": "A string,\n        one of `\"channels_last\"` (default) or `\"channels_first\"`.\n        The ordering of the dimensions in the inputs.\n        `\"channels_last\"` corresponds to inputs with shape\n        `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)`\n        while `\"channels_first\"` corresponds to inputs with shape\n        `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`.\n        When unspecified, uses\n        `image_data_format` value found in your Keras config file at\n         `~/.keras/keras.json` (if exists) else `\"channels_last\"`.\n        Defaults to `\"channels_last\"`.",
         "name": "data_format"
       },
       {
-        "description": "Int, or tuple of 3 integers.\n    The upsampling factors for dim1, dim2 and dim3.",
+        "description": "Int, or tuple of 3 integers.\n        The upsampling factors for dim1, dim2 and dim3.",
         "name": "size"
       }
     ],
@@ -4274,7 +4432,7 @@
     ],
     "examples": [
       {
-        "code": ">>> input_shape = (2, 1, 2, 1, 3)\n>>> x = tf.constant(1, shape=input_shape)\n>>> y = tf.keras.layers.UpSampling3D(size=2)(x)\n>>> print(y.shape)\n(2, 2, 4, 2, 3)"
+        "code": ">>> input_shape = (2, 1, 2, 1, 3)\n>>> x = np.ones(input_shape)\n>>> y = keras.layers.UpSampling3D(size=(2, 2, 2))(x)\n>>> y.shape\n(2, 2, 4, 2, 3)"
       }
     ]
   },
@@ -4285,7 +4443,7 @@
     "description": "Zero-padding layer for 1D input (e.g. temporal sequence).",
     "attributes": [
       {
-        "description": "Int, or tuple of int (length 2).\n        - If int:\n        How many zeros to add at the beginning and end of\n        the padding dimension (axis 1).\n        - If tuple of int (length 2):\n        How many zeros to add at the beginning and the end of\n        the padding dimension (`(left_pad, right_pad)`).",
+        "description": "Int, or tuple of int (length 2), or dictionary.\n        - If int: how many zeros to add at the beginning and end of\n          the padding dimension (axis 1).\n        - If tuple of 2 ints: how many zeros to add at the beginning and the\n          end of the padding dimension (`(left_pad, right_pad)`).",
         "name": "padding"
       }
     ],
@@ -4303,7 +4461,7 @@
     ],
     "examples": [
       {
-        "code": ">>> input_shape = (2, 2, 3)\n>>> x = np.arange(np.prod(input_shape)).reshape(input_shape)\n>>> print(x)\n[[[ 0  1  2]\n  [ 3  4  5]]\n [[ 6  7  8]\n  [ 9 10 11]]]\n>>> y = tf.keras.layers.ZeroPadding1D(padding=2)(x)\n>>> print(y)\ntf.Tensor(\n  [[[ 0  0  0]\n    [ 0  0  0]\n    [ 0  1  2]\n    [ 3  4  5]\n    [ 0  0  0]\n    [ 0  0  0]]\n   [[ 0  0  0]\n    [ 0  0  0]\n    [ 6  7  8]\n    [ 9 10 11]\n    [ 0  0  0]\n    [ 0  0  0]]], shape=(2, 6, 3), dtype=int64)"
+        "code": ">>> input_shape = (2, 2, 3)\n>>> x = np.arange(np.prod(input_shape)).reshape(input_shape)\n>>> x\n[[[ 0  1  2]\n  [ 3  4  5]]\n [[ 6  7  8]\n  [ 9 10 11]]]\n>>> y = keras.layers.ZeroPadding1D(padding=2)(x)\n>>> y\n[[[ 0  0  0]\n  [ 0  0  0]\n  [ 0  1  2]\n  [ 3  4  5]\n  [ 0  0  0]\n  [ 0  0  0]]\n [[ 0  0  0]\n  [ 0  0  0]\n  [ 6  7  8]\n  [ 9 10 11]\n  [ 0  0  0]\n  [ 0  0  0]]]"
       }
     ]
   },
@@ -4311,32 +4469,32 @@
     "name": "ZeroPadding2D",
     "module": "tensorflow.keras.layers",
     "category": "Tensor",
-    "description": "Zero-padding layer for 2D input (e.g. picture).\n\nThis layer can add rows and columns of zeros\nat the top, bottom, left and right side of an image tensor.",
+    "description": "Zero-padding layer for 2D input (e.g. picture).\n\nThis layer can add rows and columns of zeros at the top, bottom, left and\nright side of an image tensor.",
     "attributes": [
       {
-        "description": "Int, or tuple of 2 ints, or tuple of 2 tuples of 2 ints.\n    - If int: the same symmetric padding\n      is applied to height and width.\n    - If tuple of 2 ints:\n      interpreted as two different\n      symmetric padding values for height and width:\n      `(symmetric_height_pad, symmetric_width_pad)`.\n    - If tuple of 2 tuples of 2 ints:\n      interpreted as\n      `((top_pad, bottom_pad), (left_pad, right_pad))`",
+        "description": "Int, or tuple of 2 ints, or tuple of 2 tuples of 2 ints.\n        - If int: the same symmetric padding is applied to height and width.\n        - If tuple of 2 ints: interpreted as two different symmetric padding\n          values for height and width:\n          `(symmetric_height_pad, symmetric_width_pad)`.\n        - If tuple of 2 tuples of 2 ints: interpreted as\n         `((top_pad, bottom_pad), (left_pad, right_pad))`.",
         "name": "padding"
       },
       {
-        "description": "A string,\n    one of `channels_last` (default) or `channels_first`.\n    The ordering of the dimensions in the inputs.\n    `channels_last` corresponds to inputs with shape\n    `(batch_size, height, width, channels)` while `channels_first`\n    corresponds to inputs with shape\n    `(batch_size, channels, height, width)`.\n    When unspecified, uses\n    `image_data_format` value found in your Keras config file at\n     `~/.keras/keras.json` (if exists) else 'channels_last'.\n    Defaults to 'channels_last'.",
+        "description": "A string, one of `\"channels_last\"` (default) or\n        `\"channels_first\"`. The ordering of the dimensions in the inputs.\n        `\"channels_last\"` corresponds to inputs with shape\n        `(batch_size, height, width, channels)` while `\"channels_first\"`\n        corresponds to inputs with shape\n        `(batch_size, channels, height, width)`.\n        When unspecified, uses `image_data_format` value found in your Keras\n        config file at `~/.keras/keras.json` (if exists). Defaults to\n        `\"channels_last\"`.",
         "name": "data_format"
       }
     ],
     "inputs": [
       {
-        "description": "4D tensor with shape:\n- If `data_format` is `\"channels_last\"`:\n    `(batch_size, rows, cols, channels)`\n- If `data_format` is `\"channels_first\"`:\n    `(batch_size, channels, rows, cols)`",
+        "description": "4D tensor with shape:\n- If `data_format` is `\"channels_last\"`:\n  `(batch_size, height, width, channels)`\n- If `data_format` is `\"channels_first\"`:\n  `(batch_size, channels, height, width)`",
         "name": "input"
       }
     ],
     "outputs": [
       {
-        "description": "4D tensor with shape:\n- If `data_format` is `\"channels_last\"`:\n    `(batch_size, padded_rows, padded_cols, channels)`\n- If `data_format` is `\"channels_first\"`:\n    `(batch_size, channels, padded_rows, padded_cols)`",
+        "description": "4D tensor with shape:\n- If `data_format` is `\"channels_last\"`:\n  `(batch_size, padded_height, padded_width, channels)`\n- If `data_format` is `\"channels_first\"`:\n  `(batch_size, channels, padded_height, padded_width)`",
         "name": "output"
       }
     ],
     "examples": [
       {
-        "code": ">>> input_shape = (1, 1, 2, 2)\n>>> x = np.arange(np.prod(input_shape)).reshape(input_shape)\n>>> print(x)\n[[[[0 1]\n   [2 3]]]]\n>>> y = tf.keras.layers.ZeroPadding2D(padding=1)(x)\n>>> print(y)\ntf.Tensor(\n  [[[[0 0]\n     [0 0]\n     [0 0]\n     [0 0]]\n    [[0 0]\n     [0 1]\n     [2 3]\n     [0 0]]\n    [[0 0]\n     [0 0]\n     [0 0]\n     [0 0]]]], shape=(1, 3, 4, 2), dtype=int64)"
+        "code": ">>> input_shape = (1, 1, 2, 2)\n>>> x = np.arange(np.prod(input_shape)).reshape(input_shape)\n>>> x\n[[[[0 1]\n   [2 3]]]]\n>>> y = keras.layers.ZeroPadding2D(padding=1)(x)\n>>> y\n[[[[0 0]\n   [0 0]\n   [0 0]\n   [0 0]]\n  [[0 0]\n   [0 1]\n   [2 3]\n   [0 0]]\n  [[0 0]\n   [0 0]\n   [0 0]\n   [0 0]]]]"
       }
     ]
   },
@@ -4347,29 +4505,29 @@
     "description": "Zero-padding layer for 3D data (spatial or spatio-temporal).",
     "attributes": [
       {
-        "description": "Int, or tuple of 3 ints, or tuple of 3 tuples of 2 ints.\n    - If int: the same symmetric padding\n      is applied to height and width.\n    - If tuple of 3 ints:\n      interpreted as two different\n      symmetric padding values for height and width:\n      `(symmetric_dim1_pad, symmetric_dim2_pad, symmetric_dim3_pad)`.\n    - If tuple of 3 tuples of 2 ints:\n      interpreted as\n      `((left_dim1_pad, right_dim1_pad), (left_dim2_pad,\n        right_dim2_pad), (left_dim3_pad, right_dim3_pad))`",
+        "description": "Int, or tuple of 3 ints, or tuple of 3 tuples of 2 ints.\n        - If int: the same symmetric padding is applied to depth, height,\n          and width.\n        - If tuple of 3 ints: interpreted as three different symmetric\n          padding values for depth, height, and width:\n          `(symmetric_dim1_pad, symmetric_dim2_pad, symmetric_dim3_pad)`.\n        - If tuple of 3 tuples of 2 ints: interpreted as\n          `((left_dim1_pad, right_dim1_pad), (left_dim2_pad,\n          right_dim2_pad), (left_dim3_pad, right_dim3_pad))`.",
         "name": "padding"
       },
       {
-        "description": "A string,\n    one of `channels_last` (default) or `channels_first`.\n    The ordering of the dimensions in the inputs.\n    `channels_last` corresponds to inputs with shape\n    `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)`\n    while `channels_first` corresponds to inputs with shape\n    `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`.\n    When unspecified, uses\n    `image_data_format` value found in your Keras config file at\n     `~/.keras/keras.json` (if exists) else 'channels_last'.\n    Defaults to 'channels_last'.",
+        "description": "A string, one of `\"channels_last\"` (default) or\n        `\"channels_first\"`. The ordering of the dimensions in the inputs.\n        `\"channels_last\"` corresponds to inputs with shape\n        `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)`\n        while `\"channels_first\"` corresponds to inputs with shape\n        `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`.\n        When unspecified, uses `image_data_format` value found in your Keras\n        config file at `~/.keras/keras.json` (if exists). Defaults to\n        `\"channels_last\"`.",
         "name": "data_format"
       }
     ],
     "inputs": [
       {
-        "description": "5D tensor with shape:\n- If `data_format` is `\"channels_last\"`:\n    `(batch_size, first_axis_to_pad, second_axis_to_pad,\n    third_axis_to_pad, depth)`\n- If `data_format` is `\"channels_first\"`:\n    `(batch_size, depth, first_axis_to_pad, second_axis_to_pad,\n    third_axis_to_pad)`",
+        "description": "5D tensor with shape:\n- If `data_format` is `\"channels_last\"`:\n  `(batch_size, first_axis_to_pad, second_axis_to_pad,\n  third_axis_to_pad, depth)`\n- If `data_format` is `\"channels_first\"`:\n  `(batch_size, depth, first_axis_to_pad, second_axis_to_pad,\n  third_axis_to_pad)`",
         "name": "input"
       }
     ],
     "outputs": [
       {
-        "description": "5D tensor with shape:\n- If `data_format` is `\"channels_last\"`:\n    `(batch_size, first_padded_axis, second_padded_axis,\n    third_axis_to_pad, depth)`\n- If `data_format` is `\"channels_first\"`:\n    `(batch_size, depth, first_padded_axis, second_padded_axis,\n      third_axis_to_pad)`",
+        "description": "5D tensor with shape:\n- If `data_format` is `\"channels_last\"`:\n  `(batch_size, first_padded_axis, second_padded_axis,\n  third_axis_to_pad, depth)`\n- If `data_format` is `\"channels_first\"`:\n  `(batch_size, depth, first_padded_axis, second_padded_axis,\n  third_axis_to_pad)`",
         "name": "output"
       }
     ],
     "examples": [
       {
-        "code": ">>> input_shape = (1, 1, 2, 2, 3)\n>>> x = np.arange(np.prod(input_shape)).reshape(input_shape)\n>>> y = tf.keras.layers.ZeroPadding3D(padding=2)(x)\n>>> print(y.shape)\n(1, 5, 6, 6, 3)"
+        "code": ">>> input_shape = (1, 1, 2, 2, 3)\n>>> x = np.arange(np.prod(input_shape)).reshape(input_shape)\n>>> y = keras.layers.ZeroPadding3D(padding=2)(x)\n>>> y.shape\n(1, 5, 6, 6, 3)"
       }
     ]
   },
@@ -4391,7 +4549,7 @@
         "name": "output"
       }
     ],
-    "description": "Dot-product attention layer, a.k.a. Luong-style attention.\n\nInputs are `query` tensor of shape `[batch_size, Tq, dim]`, `value` tensor\nof shape `[batch_size, Tv, dim]` and `key` tensor of shape\n`[batch_size, Tv, dim]`. The calculation follows the steps:\n\n1. Calculate scores with shape `[batch_size, Tq, Tv]` as a `query`-`key` dot\n    product: `scores = tf.matmul(query, key, transpose_b=True)`.\n2. Use scores to calculate a distribution with shape\n    `[batch_size, Tq, Tv]`: `distribution = tf.nn.softmax(scores)`.\n3. Use `distribution` to create a linear combination of `value` with\n     shape `[batch_size, Tq, dim]`:\n     `return tf.matmul(distribution, value)`.",
+    "description": "Dot-product attention layer, a.k.a. Luong-style attention.\n\nInputs are a list with 2 or 3 elements:\n1. A `query` tensor of shape `(batch_size, Tq, dim)`.\n2. A `value` tensor of shape `(batch_size, Tv, dim)`.\n3. A optional `key` tensor of shape `(batch_size, Tv, dim)`. If none\n    supplied, `value` will be used as a `key`.\n\nThe calculation follows the steps:\n1. Calculate attention scores using `query` and `key` with shape\n    `(batch_size, Tq, Tv)`.\n2. Use scores to calculate a softmax distribution with shape\n    `(batch_size, Tq, Tv)`.\n3. Use the softmax distribution to create a linear combination of `value`\n    with shape `(batch_size, Tq, dim)`.",
     "attributes": [
       {
         "name": "use_scale",
@@ -4403,15 +4561,15 @@
       },
       {
         "name": "dropout",
-        "description": "Float between 0 and 1. Fraction of the units to drop for the\n        attention scores. Defaults to 0.0."
+        "description": "Float between 0 and 1. Fraction of the units to drop for the\n        attention scores. Defaults to `0.0`."
       },
       {
         "name": "inputs",
-        "description": "List of the following tensors:\n        * query: Query `Tensor` of shape `[batch_size, Tq, dim]`.\n        * value: Value `Tensor` of shape `[batch_size, Tv, dim]`.\n        * key: Optional key `Tensor` of shape `[batch_size, Tv, dim]`. If\n            not given, will use `value` for both `key` and `value`, which is\n            the most common case."
+        "description": "List of the following tensors:\n        - `query`: Query tensor of shape `(batch_size, Tq, dim)`.\n        - `value`: Value tensor of shape `(batch_size, Tv, dim)`.\n        - `key`: Optional key tensor of shape `(batch_size, Tv, dim)`. If\n            not given, will use `value` for both `key` and `value`, which is\n            the most common case."
       },
       {
         "name": "mask",
-        "description": "List of the following tensors:\n        * query_mask: A boolean mask `Tensor` of shape `[batch_size, Tq]`.\n            If given, the output will be zero at the positions where\n            `mask==False`.\n        * value_mask: A boolean mask `Tensor` of shape `[batch_size, Tv]`.\n            If given, will apply the mask such that values at positions\n             where `mask==False` do not contribute to the result."
+        "description": "List of the following tensors:\n        - `query_mask`: A boolean mask tensor of shape `(batch_size, Tq)`.\n            If given, the output will be zero at the positions where\n            `mask==False`.\n        - `value_mask`: A boolean mask tensor of shape `(batch_size, Tv)`.\n            If given, will apply the mask such that values at positions\n             where `mask==False` do not contribute to the result."
       },
       {
         "name": "return_attention_scores",
@@ -4423,11 +4581,11 @@
       },
       {
         "name": "score_mode",
-        "description": "Function to use to compute attention scores, one of\n        `{\"dot\", \"concat\"}`. `\"dot\"` refers to the dot product between the\n        query and key vectors. `\"concat\"` refers to the hyperbolic tangent\n        of the concatenation of the query and key vectors."
+        "description": "Function to use to compute attention scores, one of\n        `{\"dot\", \"concat\"}`. `\"dot\"` refers to the dot product between the\n        query and key vectors. `\"concat\"` refers to the hyperbolic tangent\n        of the concatenation of the `query` and `key` vectors.\n\nCall Args:"
       },
       {
         "name": "use_causal_mask",
-        "description": "Boolean. Set to `True` for decoder self-attention. Adds\n        a mask such that position `i` cannot attend to positions `j > i`.\n        This prevents the flow of information from the future towards the\n        past.\n        Defaults to `False`.\n\nOutput:\n\n    Attention outputs of shape `[batch_size, Tq, dim]`.\n    [Optional] Attention scores after masking and softmax with shape\n        `[batch_size, Tq, Tv]`.\n\nThe meaning of `query`, `value` and `key` depend on the application. In the\ncase of text similarity, for example, `query` is the sequence embeddings of\nthe first piece of text and `value` is the sequence embeddings of the second\npiece of text. `key` is usually the same tensor as `value`.\n\nHere is a code example for using `Attention` in a CNN+Attention network:\n\n```python\n# Variable-length int sequences.\nquery_input = tf.keras.Input(shape=(None,), dtype='int32')\nvalue_input = tf.keras.Input(shape=(None,), dtype='int32')\n\n# Embedding lookup.\ntoken_embedding = tf.keras.layers.Embedding(input_dim=1000, output_dim=64)\n# Query embeddings of shape [batch_size, Tq, dimension].\nquery_embeddings = token_embedding(query_input)\n# Value embeddings of shape [batch_size, Tv, dimension].\nvalue_embeddings = token_embedding(value_input)\n\n# CNN layer.\ncnn_layer = tf.keras.layers.Conv1D(\n    filters=100,\n    kernel_size=4,\n    # Use 'same' padding so outputs have the same shape as inputs.\n    padding='same')\n# Query encoding of shape [batch_size, Tq, filters].\nquery_seq_encoding = cnn_layer(query_embeddings)\n# Value encoding of shape [batch_size, Tv, filters].\nvalue_seq_encoding = cnn_layer(value_embeddings)\n\n# Query-value attention of shape [batch_size, Tq, filters].\nquery_value_attention_seq = tf.keras.layers.Attention()(\n    [query_seq_encoding, value_seq_encoding])\n\n# Reduce over the sequence axis to produce encodings of shape\n# [batch_size, filters].\nquery_encoding = tf.keras.layers.GlobalAveragePooling1D()(\n    query_seq_encoding)\nquery_value_attention = tf.keras.layers.GlobalAveragePooling1D()(\n    query_value_attention_seq)\n\n# Concatenate query and document encodings to produce a DNN input layer.\ninput_layer = tf.keras.layers.Concatenate()(\n    [query_encoding, query_value_attention])\n\n# Add DNN layers, and create Model.\n# ...\n```"
+        "description": "Boolean. Set to `True` for decoder self-attention. Adds\n        a mask such that position `i` cannot attend to positions `j > i`.\n        This prevents the flow of information from the future towards the\n        past. Defaults to `False`.\n\nOutput:\n    Attention outputs of shape `(batch_size, Tq, dim)`.\n    (Optional) Attention scores after masking and softmax with shape\n        `(batch_size, Tq, Tv)`."
       }
     ]
   },