From 4d43f09e35a1fd4e96c829c6cc518aa8c98b6a7a Mon Sep 17 00:00:00 2001
From: Han <kein520@yahoo.com>
Date: Tue, 24 Sep 2019 16:05:57 -0700
Subject: [PATCH] attention qkv`

---
 assignment3/2 Summarization.ipynb    | 55 +++++++++++++++++++++-------
 assignment3/Pipfile                  |  1 -
 assignment3/transformer_attention.py | 13 ++++---
 3 files changed, 48 insertions(+), 21 deletions(-)
diff --git a/assignment3/2 Summarization.ipynb b/assignment3/2 Summarization.ipynb
index 3c1be07..9c55521 100644
--- a/assignment3/2 Summarization.ipynb	
+++ b/assignment3/2 Summarization.ipynb	
@@ -57,23 +57,37 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:From /home/han/.virtualenvs/assignment3-p3x5XSoT/lib/python3.7/site-packages/tensorflow_core/python/compat/v2_compat.py:65: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version.\n",
+      "Instructions for updating:\n",
+      "non-resource variables are not supported in the long term\n"
+     ]
+    }
+   ],
    "source": [
     "from transformer import Transformer\n",
     "import sentencepiece as spm\n",
-    "import tensorflow as tf\n",
+    "import tensorflow.compat.v1 as tf\n",
+    "tf.disable_v2_behavior()\n",
     "import numpy as np\n",
     "import json\n",
     "import capita\n",
     "\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "\n",
     "root_folder = \"\""
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -136,9 +150,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:Entity <bound method AttentionQKV.call of <transformer_attention.AttentionQKV object at 0x7f276e0dbb70>> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: module 'gast' has no attribute 'Num'\n",
+      "WARNING: Entity <bound method AttentionQKV.call of <transformer_attention.AttentionQKV object at 0x7f276e0dbb70>> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: module 'gast' has no attribute 'Num'\n",
+      "WARNING:tensorflow:Entity <bound method ApplyAttentionMask.call of <transformer_layers.ApplyAttentionMask object at 0x7f276d6de6a0>> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: Bad argument number for Name: 3, expecting 4\n",
+      "WARNING: Entity <bound method ApplyAttentionMask.call of <transformer_layers.ApplyAttentionMask object at 0x7f276d6de6a0>> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: Bad argument number for Name: 3, expecting 4\n",
+      "Total error on the output: 3.501772880554199e-07 (should be 0.0 or close to 0.0)\n",
+      "Total error on the weights: 1.816079020500183e-07 (should be 0.0 or close to 0.0)\n"
+     ]
+    }
+   ],
    "source": [
     "from transformer_attention import AttentionQKV\n",
     "\n",
@@ -824,23 +851,23 @@
  "metadata": {
   "anaconda-cloud": {},
   "kernelspec": {
-   "display_name": "Python [default]",
+   "display_name": "Python 3",
    "language": "python",
-   "name": "python2"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
     "name": "ipython",
-    "version": 2
+    "version": 3
    },
    "file_extension": ".py",
    "mimetype": "text/x-python",
    "name": "python",
    "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.12"
+   "pygments_lexer": "ipython3",
+   "version": "3.7.3"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
diff --git a/assignment3/Pipfile b/assignment3/Pipfile
index 2169850..74ef119 100644
--- a/assignment3/Pipfile
+++ b/assignment3/Pipfile
@@ -11,7 +11,6 @@ tqdm = "*"
 sentencepiece = "*"
 segtok = "*"
 tensorflow-gpu = "==2.0.0-rc1"
-juypterlab = "*"
 jupyterlab = "*"
 
 [requires]
diff --git a/assignment3/transformer_attention.py b/assignment3/transformer_attention.py
index d0eea6f..7735557 100644
--- a/assignment3/transformer_attention.py
+++ b/assignment3/transformer_attention.py
@@ -32,18 +32,19 @@ def call(self, queries, keys, values, mask=None):
         # depth_v is the size of the projection of the value projection. In a setting with one head, it is usually the dimension (dim) of the Transformer.
         # heads corresponds to the number of heads the attention is performed on.
         # If you are unfamiliar with attention heads, read section 3.2.2 of the Attentino is all you need paper
-         
+
         # PART 1: Implement Attention QKV
 
         # Use queries, keys and values to compute the output of the QKV attention
 
         # As defined is the Attention is all you need paper: https://arxiv.org/pdf/1706.03762.pdf
         key_dim = tf.cast(tf.shape(keys)[-1], tf.float32)
-        similarity = None # Compute the similarity according to the QKV formula
+        similarity = tf.matmul(queries, keys, transpose_b=True) # Compute the similarity according to the QKV formula
 
-        masked_similarity = self.apply_mask(similarity, mask=mask) # We give you the mask to apply so that it is correct, you do not need to modify this.
-        weights = None # Turn the similarity into a normalized output
-        output = None # Obtain the output
+        # We give you the mask to apply so that it is correct, you do not need to modify this.
+        masked_similarity = self.apply_mask(similarity, mask=mask)
+        weights = tf.nn.softmax(masked_similarity/tf.sqrt(key_dim)) # Turn the similarity into a normalized output
+        output = tf.matmul(weights, values) # Obtain the output
         ####################################  END OF YOUR CODE  ##################################
 
         return output, weights
@@ -171,4 +172,4 @@ def call(self, inputs, mask=None):
 
         attention_output = self.attention_layer((q, k, v), mask=mask)
         output = self.output_layer(attention_output)
-        return output
\ No newline at end of file
+        return output