5.41 language_modeling tf1

leehanchung · Sep 16, 2019 · bbf2f1b · bbf2f1b
1 parent 05b5b28
commit bbf2f1b
Showing 1 changed file with 67 additions and 41 deletions.
diff --git a/assignment3/1 Language Modeling.ipynb b/assignment3/1 Language Modeling.ipynb
@@ -403,7 +403,7 @@
     "        # vocabulary (vocab_size).\n",
     "        # output_logits should be of shape [None,input_length,vocab_size]\n",
     "        # You can look at the tf.layers.dense function\n",
-    "        self.output_logits = tf.layers.dense(outputs, vocab_size)\n",
+    "        self.output_logits = tf.layers.dense(outputs, units=vocab_size)\n",
     "        \n",
     "        # Setup the loss: using the sparse_softmax_cross_entropy.\n",
     "        # The logits are the output_logits we've computed.\n",
@@ -448,10 +448,10 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "WARNING:tensorflow:From <ipython-input-7-af7d1a29f2df>:23: BasicLSTMCell.__init__ (from tensorflow.python.ops.rnn_cell_impl) is deprecated and will be removed in a future version.\n",
+      "WARNING:tensorflow:From <ipython-input-7-b75f7ea36b08>:23: BasicLSTMCell.__init__ (from tensorflow.python.ops.rnn_cell_impl) is deprecated and will be removed in a future version.\n",
       "Instructions for updating:\n",
       "This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.\n",
-      "WARNING:tensorflow:From <ipython-input-7-af7d1a29f2df>:31: dynamic_rnn (from tensorflow.python.ops.rnn) is deprecated and will be removed in a future version.\n",
+      "WARNING:tensorflow:From <ipython-input-7-b75f7ea36b08>:31: dynamic_rnn (from tensorflow.python.ops.rnn) is deprecated and will be removed in a future version.\n",
       "Instructions for updating:\n",
       "Please use `keras.layers.RNN(cell)`, which is equivalent to this API\n",
       "WARNING:tensorflow:From c:\\users\\han-chung lee\\.virtualenvs\\assignment3-xzjguwob\\lib\\site-packages\\tensorflow_core\\python\\ops\\rnn_cell_impl.py:735: Layer.add_variable (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.\n",
@@ -460,7 +460,7 @@
       "WARNING:tensorflow:From c:\\users\\han-chung lee\\.virtualenvs\\assignment3-xzjguwob\\lib\\site-packages\\tensorflow_core\\python\\ops\\rnn_cell_impl.py:739: calling Zeros.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.\n",
       "Instructions for updating:\n",
       "Call initializer instance with the dtype argument instead of passing it to the constructor\n",
-      "WARNING:tensorflow:From <ipython-input-7-af7d1a29f2df>:37: dense (from tensorflow.python.layers.core) is deprecated and will be removed in a future version.\n",
+      "WARNING:tensorflow:From <ipython-input-7-b75f7ea36b08>:37: dense (from tensorflow.python.layers.core) is deprecated and will be removed in a future version.\n",
       "Instructions for updating:\n",
       "Use keras.layers.Dense instead.\n",
       "WARNING:tensorflow:From c:\\users\\han-chung lee\\.virtualenvs\\assignment3-xzjguwob\\lib\\site-packages\\tensorflow_core\\python\\layers\\core.py:187: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.\n",
@@ -516,16 +516,35 @@
    "cell_type": "code",
    "execution_count": 9,
    "metadata": {},
+   "outputs": [],
+   "source": [
+    "def evaluate(d_valid, batch_size):\n",
+    "    # simple evaluate function using the d_valid defined up top\n",
+    "    eval_input, eval_target, eval_target_mask = build_batch(d_valid, batch_size)\n",
+    "    feed = {model.input_num: eval_input, model.targets: eval_target, model.targets_mask: eval_target_mask}\n",
+    "    loss = sess.run([model.loss], feed_dict=feed)\n",
+    "    return loss[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "9.210281\n",
-      "7.118761\n",
-      "6.807353\n",
-      "6.66833\n",
-      "7.1950293\n"
+      "Epoch 0 -- Training loss: 6.484584331512451 -- Validation loss 6.577706336975098\n",
+      "Epoch 1 -- Training loss: 6.127745151519775 -- Validation loss 6.311026096343994\n",
+      "Epoch 2 -- Training loss: 5.9757795333862305 -- Validation loss 6.007895469665527\n",
+      "Epoch 3 -- Training loss: 5.789333820343018 -- Validation loss 6.405399322509766\n",
+      "Epoch 4 -- Training loss: 5.610446453094482 -- Validation loss 6.467176914215088\n",
+      "Epoch 5 -- Training loss: 5.39146614074707 -- Validation loss 6.104674816131592\n",
+      "Epoch 6 -- Training loss: 5.226644515991211 -- Validation loss 5.918052673339844\n",
+      "Epoch 7 -- Training loss: 5.259573936462402 -- Validation loss 4.922938823699951\n",
+      "Epoch 8 -- Training loss: 5.116381645202637 -- Validation loss 6.217684745788574\n",
+      "Epoch 9 -- Training loss: 4.962327003479004 -- Validation loss 4.070274353027344\n"
      ]
     }
    ],
@@ -536,28 +555,35 @@
     "\n",
     "experiment = root_folder+\"models\\magic_model\"\n",
     "\n",
+    "epochs = 10\n",
+    "batch_size = 256\n",
+    "val_batch_size = batch_size * len(d_valid) // len(d_train)\n",
+    "num_batches = len(d_train) // batch_size\n",
+    "losses = []\n",
+    "\n",
     "with tf.Session() as sess:\n",
     "    # Here is how you initialize weights of the model according to their\n",
     "    # Initialization parameters.\n",
     "    sess.run(tf.global_variables_initializer())\n",
     "\n",
-    "    for i in range(500):\n",
-    "        # Here is how you obtain a batch:\n",
-    "        batch_size = 16\n",
-    "        batch_input, batch_target, batch_target_mask = build_batch(d_train, batch_size)\n",
-    "        # Map the values to each tensor in a `feed_dict`\n",
-    "        feed = {model.input_num: batch_input, model.targets: batch_target, model.targets_mask: batch_target_mask}\n",
+    "    for epoch in range(epochs):\n",
+    "        for i in range(num_batches):\n",
+    "            # Here is how you obtain a batch:\n",
+    "            batch_input, batch_target, batch_target_mask = build_batch(d_train, batch_size)\n",
+    "            # Map the values to each tensor in a `feed_dict`\n",
+    "            feed = {model.input_num: batch_input, model.targets: batch_target, model.targets_mask: batch_target_mask}\n",
     "\n",
-    "        # Obtain a single value of the loss for that batch.\n",
-    "        # !IMPORTANT! Don't forget to include the train_op to when using a batch from the training dataset\n",
-    "        # (d_train)\n",
-    "        # !MORE IMPORTANT! Don't use the train_op if you evaluate the loss on the validation set,\n",
-    "        # Otherwise, your network will overfit on your validation dataset.\n",
+    "            # Obtain a single value of the loss for that batch.\n",
+    "            # !IMPORTANT! Don't forget to include the train_op to when using a batch from the training dataset\n",
+    "            # (d_train)\n",
+    "            # !MORE IMPORTANT! Don't use the train_op if you evaluate the loss on the validation set,\n",
+    "            # Otherwise, your network will overfit on your validation dataset.\n",
     "\n",
-    "        step, train_loss, _ = sess.run([model.global_step, model.loss, model.train_op], feed_dict=feed)\n",
-    "        \n",
-    "        if i % 100 == 0:\n",
-    "            print(train_loss)\n",
+    "            step, train_loss, _ = sess.run([model.global_step, model.loss, model.train_op], feed_dict=feed)\n",
+    "\n",
+    "        val_loss = evaluate(d_valid, val_batch_size)\n",
+    "        losses.append([train_loss, val_loss])\n",
+    "        print(f\"Epoch {epoch} -- Training loss: {train_loss} -- Validation loss {val_loss}\")\n",
     "\n",
     "    # Here is how you save the model weights\n",
     "    model.saver.save(sess, experiment)\n",
@@ -594,7 +620,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -604,15 +630,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
       "INFO:tensorflow:Restoring parameters from models\\magic_model\n",
-      "Evaluation set loss: [6.77481]\n"
+      "Evaluation set loss: [5.4138093]\n"
      ]
     }
    ],
@@ -653,7 +679,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 13,
    "metadata": {
     "hidden": true
    },
@@ -671,7 +697,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
@@ -681,10 +707,10 @@
       "INFO:tensorflow:Restoring parameters from models\\magic_model\n",
       "----------------------------------------\n",
       "Headline: apple to release new iphone in july\n",
-      "Loss of the headline: [5.360189]\n",
+      "Loss of the headline: [3.4388044]\n",
       "----------------------------------------\n",
       "Headline: apple and samsung resolve all lawsuits\n",
-      "Loss of the headline: [7.231014]\n"
+      "Loss of the headline: [5.6819367]\n"
      ]
     }
    ],
@@ -751,7 +777,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
@@ -761,16 +787,16 @@
       "INFO:tensorflow:Restoring parameters from models\\magic_model\n",
       "===================\n",
       "Generating headline starting with: apple has released\n",
-      "<START> apple has released UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK\n",
+      "<START> apple has released a new iphone x , but it's not a UNK ? the best thing you can\n",
       "===================\n",
       "Generating headline starting with: google has released\n",
-      "<START> google has released UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK\n",
+      "<START> google has released a new UNK to the UNK UNK UNK UNK UNK UNK UNK , and it will\n",
       "===================\n",
       "Generating headline starting with: amazon\n",
-      "<START> amazon UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK\n",
+      "<START> amazon to launch new york city UNK UNK UNK UNK UNK UNK , UNK UNK UNK UNK UNK UNK\n",
       "===================\n",
       "Generating headline starting with: tesla to\n",
-      "<START> tesla to UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK\n"
+      "<START> tesla to launch apple watch in china , UNK , UNK , UNK , UNK , UNK , UNK\n"
      ]
     }
    ],
@@ -791,7 +817,7 @@
     "        # beginning in `current_build`\n",
     "        tokenized = tokenizer.word_tokenizer(headline_starter)\n",
     "        current_build = [start_index] + numerize_sequence(tokenized)\n",
-    "\n",
+    "        \n",
     "        while len(current_build) < input_length:\n",
     "            # Pad the current_build into a input_length vector.\n",
     "            # We do this so that it can be processed by our LanguageModel class\n",
@@ -801,13 +827,13 @@
     "            # Obtain the logits for the current padded sequence\n",
     "            # This involves obtaining the output_logits from our model,\n",
     "            # and not the loss like we have done so far\n",
-    "            logits = sess.run([model.output_logits], feed_dict={model.input_num:current_padded})\n",
-    "#             print(np.array(logits).shape)\n",
+    "            logits = sess.run([model.output_logits], feed_dict={model.input_num:current_padded})[0]\n",
     "\n",
     "            # Obtain the row of logits that interest us, the logits for the last non-pad\n",
     "            # inputs\n",
-    "            last_logits = logits[0][0][len(current_build)-1]\n",
-    "#             print(last_logits)\n",
+    "            # logits has shape of [none, input_num, vocab_size]\n",
+    "            # take the vocab vector for the last word in the sentence\n",
+    "            last_logits = logits[0][len(current_build)-1]\n",
     "            \n",
     "            # Find the highest scoring word in the last_logits\n",
     "            # array. The np.argmax function should be useful.\n",