From 4d43f09e35a1fd4e96c829c6cc518aa8c98b6a7a Mon Sep 17 00:00:00 2001 From: Han Date: Tue, 24 Sep 2019 16:05:57 -0700 Subject: [PATCH] attention qkv` --- assignment3/2 Summarization.ipynb | 55 +++++++++++++++++++++------- assignment3/Pipfile | 1 - assignment3/transformer_attention.py | 13 ++++--- 3 files changed, 48 insertions(+), 21 deletions(-) diff --git a/assignment3/2 Summarization.ipynb b/assignment3/2 Summarization.ipynb index 3c1be07..9c55521 100644 --- a/assignment3/2 Summarization.ipynb +++ b/assignment3/2 Summarization.ipynb @@ -57,23 +57,37 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:From /home/han/.virtualenvs/assignment3-p3x5XSoT/lib/python3.7/site-packages/tensorflow_core/python/compat/v2_compat.py:65: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version.\n", + "Instructions for updating:\n", + "non-resource variables are not supported in the long term\n" + ] + } + ], "source": [ "from transformer import Transformer\n", "import sentencepiece as spm\n", - "import tensorflow as tf\n", + "import tensorflow.compat.v1 as tf\n", + "tf.disable_v2_behavior()\n", "import numpy as np\n", "import json\n", "import capita\n", "\n", + "%load_ext autoreload\n", + "%autoreload 2\n", + "\n", "root_folder = \"\"" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -136,9 +150,22 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Entity > could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: module 'gast' has no attribute 'Num'\n", + "WARNING: Entity > could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: module 'gast' has no attribute 'Num'\n", + "WARNING:tensorflow:Entity > could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: Bad argument number for Name: 3, expecting 4\n", + "WARNING: Entity > could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: Bad argument number for Name: 3, expecting 4\n", + "Total error on the output: 3.501772880554199e-07 (should be 0.0 or close to 0.0)\n", + "Total error on the weights: 1.816079020500183e-07 (should be 0.0 or close to 0.0)\n" + ] + } + ], "source": [ "from transformer_attention import AttentionQKV\n", "\n", @@ -824,23 +851,23 @@ "metadata": { "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python [default]", + "display_name": "Python 3", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.12" + "pygments_lexer": "ipython3", + "version": "3.7.3" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/assignment3/Pipfile b/assignment3/Pipfile index 2169850..74ef119 100644 --- a/assignment3/Pipfile +++ b/assignment3/Pipfile @@ -11,7 +11,6 @@ tqdm = "*" sentencepiece = "*" segtok = "*" tensorflow-gpu = "==2.0.0-rc1" -juypterlab = "*" jupyterlab = "*" [requires] diff --git a/assignment3/transformer_attention.py b/assignment3/transformer_attention.py index d0eea6f..7735557 100644 --- a/assignment3/transformer_attention.py +++ b/assignment3/transformer_attention.py @@ -32,18 +32,19 @@ def call(self, queries, keys, values, mask=None): # depth_v is the size of the projection of the value projection. In a setting with one head, it is usually the dimension (dim) of the Transformer. # heads corresponds to the number of heads the attention is performed on. # If you are unfamiliar with attention heads, read section 3.2.2 of the Attentino is all you need paper - + # PART 1: Implement Attention QKV # Use queries, keys and values to compute the output of the QKV attention # As defined is the Attention is all you need paper: https://arxiv.org/pdf/1706.03762.pdf key_dim = tf.cast(tf.shape(keys)[-1], tf.float32) - similarity = None # Compute the similarity according to the QKV formula + similarity = tf.matmul(queries, keys, transpose_b=True) # Compute the similarity according to the QKV formula - masked_similarity = self.apply_mask(similarity, mask=mask) # We give you the mask to apply so that it is correct, you do not need to modify this. - weights = None # Turn the similarity into a normalized output - output = None # Obtain the output + # We give you the mask to apply so that it is correct, you do not need to modify this. + masked_similarity = self.apply_mask(similarity, mask=mask) + weights = tf.nn.softmax(masked_similarity/tf.sqrt(key_dim)) # Turn the similarity into a normalized output + output = tf.matmul(weights, values) # Obtain the output #################################### END OF YOUR CODE ################################## return output, weights @@ -171,4 +172,4 @@ def call(self, inputs, mask=None): attention_output = self.attention_layer((q, k, v), mask=mask) output = self.output_layer(attention_output) - return output \ No newline at end of file + return output