Skip to content

Commit

Permalink
5.41 language_modeling tf1
Browse files Browse the repository at this point in the history
  • Loading branch information
leehanchung committed Sep 16, 2019
1 parent 05b5b28 commit bbf2f1b
Showing 1 changed file with 67 additions and 41 deletions.
108 changes: 67 additions & 41 deletions assignment3/1 Language Modeling.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,7 @@
" # vocabulary (vocab_size).\n",
" # output_logits should be of shape [None,input_length,vocab_size]\n",
" # You can look at the tf.layers.dense function\n",
" self.output_logits = tf.layers.dense(outputs, vocab_size)\n",
" self.output_logits = tf.layers.dense(outputs, units=vocab_size)\n",
" \n",
" # Setup the loss: using the sparse_softmax_cross_entropy.\n",
" # The logits are the output_logits we've computed.\n",
Expand Down Expand Up @@ -448,10 +448,10 @@
"name": "stdout",
"output_type": "stream",
"text": [
"WARNING:tensorflow:From <ipython-input-7-af7d1a29f2df>:23: BasicLSTMCell.__init__ (from tensorflow.python.ops.rnn_cell_impl) is deprecated and will be removed in a future version.\n",
"WARNING:tensorflow:From <ipython-input-7-b75f7ea36b08>:23: BasicLSTMCell.__init__ (from tensorflow.python.ops.rnn_cell_impl) is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.\n",
"WARNING:tensorflow:From <ipython-input-7-af7d1a29f2df>:31: dynamic_rnn (from tensorflow.python.ops.rnn) is deprecated and will be removed in a future version.\n",
"WARNING:tensorflow:From <ipython-input-7-b75f7ea36b08>:31: dynamic_rnn (from tensorflow.python.ops.rnn) is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"Please use `keras.layers.RNN(cell)`, which is equivalent to this API\n",
"WARNING:tensorflow:From c:\\users\\han-chung lee\\.virtualenvs\\assignment3-xzjguwob\\lib\\site-packages\\tensorflow_core\\python\\ops\\rnn_cell_impl.py:735: Layer.add_variable (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.\n",
Expand All @@ -460,7 +460,7 @@
"WARNING:tensorflow:From c:\\users\\han-chung lee\\.virtualenvs\\assignment3-xzjguwob\\lib\\site-packages\\tensorflow_core\\python\\ops\\rnn_cell_impl.py:739: calling Zeros.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"Call initializer instance with the dtype argument instead of passing it to the constructor\n",
"WARNING:tensorflow:From <ipython-input-7-af7d1a29f2df>:37: dense (from tensorflow.python.layers.core) is deprecated and will be removed in a future version.\n",
"WARNING:tensorflow:From <ipython-input-7-b75f7ea36b08>:37: dense (from tensorflow.python.layers.core) is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"Use keras.layers.Dense instead.\n",
"WARNING:tensorflow:From c:\\users\\han-chung lee\\.virtualenvs\\assignment3-xzjguwob\\lib\\site-packages\\tensorflow_core\\python\\layers\\core.py:187: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.\n",
Expand Down Expand Up @@ -516,16 +516,35 @@
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"def evaluate(d_valid, batch_size):\n",
" # simple evaluate function using the d_valid defined up top\n",
" eval_input, eval_target, eval_target_mask = build_batch(d_valid, batch_size)\n",
" feed = {model.input_num: eval_input, model.targets: eval_target, model.targets_mask: eval_target_mask}\n",
" loss = sess.run([model.loss], feed_dict=feed)\n",
" return loss[0]"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"9.210281\n",
"7.118761\n",
"6.807353\n",
"6.66833\n",
"7.1950293\n"
"Epoch 0 -- Training loss: 6.484584331512451 -- Validation loss 6.577706336975098\n",
"Epoch 1 -- Training loss: 6.127745151519775 -- Validation loss 6.311026096343994\n",
"Epoch 2 -- Training loss: 5.9757795333862305 -- Validation loss 6.007895469665527\n",
"Epoch 3 -- Training loss: 5.789333820343018 -- Validation loss 6.405399322509766\n",
"Epoch 4 -- Training loss: 5.610446453094482 -- Validation loss 6.467176914215088\n",
"Epoch 5 -- Training loss: 5.39146614074707 -- Validation loss 6.104674816131592\n",
"Epoch 6 -- Training loss: 5.226644515991211 -- Validation loss 5.918052673339844\n",
"Epoch 7 -- Training loss: 5.259573936462402 -- Validation loss 4.922938823699951\n",
"Epoch 8 -- Training loss: 5.116381645202637 -- Validation loss 6.217684745788574\n",
"Epoch 9 -- Training loss: 4.962327003479004 -- Validation loss 4.070274353027344\n"
]
}
],
Expand All @@ -536,28 +555,35 @@
"\n",
"experiment = root_folder+\"models\\magic_model\"\n",
"\n",
"epochs = 10\n",
"batch_size = 256\n",
"val_batch_size = batch_size * len(d_valid) // len(d_train)\n",
"num_batches = len(d_train) // batch_size\n",
"losses = []\n",
"\n",
"with tf.Session() as sess:\n",
" # Here is how you initialize weights of the model according to their\n",
" # Initialization parameters.\n",
" sess.run(tf.global_variables_initializer())\n",
"\n",
" for i in range(500):\n",
" # Here is how you obtain a batch:\n",
" batch_size = 16\n",
" batch_input, batch_target, batch_target_mask = build_batch(d_train, batch_size)\n",
" # Map the values to each tensor in a `feed_dict`\n",
" feed = {model.input_num: batch_input, model.targets: batch_target, model.targets_mask: batch_target_mask}\n",
" for epoch in range(epochs):\n",
" for i in range(num_batches):\n",
" # Here is how you obtain a batch:\n",
" batch_input, batch_target, batch_target_mask = build_batch(d_train, batch_size)\n",
" # Map the values to each tensor in a `feed_dict`\n",
" feed = {model.input_num: batch_input, model.targets: batch_target, model.targets_mask: batch_target_mask}\n",
"\n",
" # Obtain a single value of the loss for that batch.\n",
" # !IMPORTANT! Don't forget to include the train_op to when using a batch from the training dataset\n",
" # (d_train)\n",
" # !MORE IMPORTANT! Don't use the train_op if you evaluate the loss on the validation set,\n",
" # Otherwise, your network will overfit on your validation dataset.\n",
" # Obtain a single value of the loss for that batch.\n",
" # !IMPORTANT! Don't forget to include the train_op to when using a batch from the training dataset\n",
" # (d_train)\n",
" # !MORE IMPORTANT! Don't use the train_op if you evaluate the loss on the validation set,\n",
" # Otherwise, your network will overfit on your validation dataset.\n",
"\n",
" step, train_loss, _ = sess.run([model.global_step, model.loss, model.train_op], feed_dict=feed)\n",
" \n",
" if i % 100 == 0:\n",
" print(train_loss)\n",
" step, train_loss, _ = sess.run([model.global_step, model.loss, model.train_op], feed_dict=feed)\n",
"\n",
" val_loss = evaluate(d_valid, val_batch_size)\n",
" losses.append([train_loss, val_loss])\n",
" print(f\"Epoch {epoch} -- Training loss: {train_loss} -- Validation loss {val_loss}\")\n",
"\n",
" # Here is how you save the model weights\n",
" model.saver.save(sess, experiment)\n",
Expand Down Expand Up @@ -594,7 +620,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -604,15 +630,15 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow:Restoring parameters from models\\magic_model\n",
"Evaluation set loss: [6.77481]\n"
"Evaluation set loss: [5.4138093]\n"
]
}
],
Expand Down Expand Up @@ -653,7 +679,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 13,
"metadata": {
"hidden": true
},
Expand All @@ -671,7 +697,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 14,
"metadata": {},
"outputs": [
{
Expand All @@ -681,10 +707,10 @@
"INFO:tensorflow:Restoring parameters from models\\magic_model\n",
"----------------------------------------\n",
"Headline: apple to release new iphone in july\n",
"Loss of the headline: [5.360189]\n",
"Loss of the headline: [3.4388044]\n",
"----------------------------------------\n",
"Headline: apple and samsung resolve all lawsuits\n",
"Loss of the headline: [7.231014]\n"
"Loss of the headline: [5.6819367]\n"
]
}
],
Expand Down Expand Up @@ -751,7 +777,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 15,
"metadata": {},
"outputs": [
{
Expand All @@ -761,16 +787,16 @@
"INFO:tensorflow:Restoring parameters from models\\magic_model\n",
"===================\n",
"Generating headline starting with: apple has released\n",
"<START> apple has released UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK\n",
"<START> apple has released a new iphone x , but it's not a UNK ? the best thing you can\n",
"===================\n",
"Generating headline starting with: google has released\n",
"<START> google has released UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK\n",
"<START> google has released a new UNK to the UNK UNK UNK UNK UNK UNK UNK , and it will\n",
"===================\n",
"Generating headline starting with: amazon\n",
"<START> amazon UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK\n",
"<START> amazon to launch new york city UNK UNK UNK UNK UNK UNK , UNK UNK UNK UNK UNK UNK\n",
"===================\n",
"Generating headline starting with: tesla to\n",
"<START> tesla to UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK\n"
"<START> tesla to launch apple watch in china , UNK , UNK , UNK , UNK , UNK , UNK\n"
]
}
],
Expand All @@ -791,7 +817,7 @@
" # beginning in `current_build`\n",
" tokenized = tokenizer.word_tokenizer(headline_starter)\n",
" current_build = [start_index] + numerize_sequence(tokenized)\n",
"\n",
" \n",
" while len(current_build) < input_length:\n",
" # Pad the current_build into a input_length vector.\n",
" # We do this so that it can be processed by our LanguageModel class\n",
Expand All @@ -801,13 +827,13 @@
" # Obtain the logits for the current padded sequence\n",
" # This involves obtaining the output_logits from our model,\n",
" # and not the loss like we have done so far\n",
" logits = sess.run([model.output_logits], feed_dict={model.input_num:current_padded})\n",
"# print(np.array(logits).shape)\n",
" logits = sess.run([model.output_logits], feed_dict={model.input_num:current_padded})[0]\n",
"\n",
" # Obtain the row of logits that interest us, the logits for the last non-pad\n",
" # inputs\n",
" last_logits = logits[0][0][len(current_build)-1]\n",
"# print(last_logits)\n",
" # logits has shape of [none, input_num, vocab_size]\n",
" # take the vocab vector for the last word in the sentence\n",
" last_logits = logits[0][len(current_build)-1]\n",
" \n",
" # Find the highest scoring word in the last_logits\n",
" # array. The np.argmax function should be useful.\n",
Expand Down

0 comments on commit bbf2f1b

Please sign in to comment.