diff --git a/data_utils.py b/data_utils.py
index e16c895..85e1fbb 100755
--- a/data_utils.py
+++ b/data_utils.py
@@ -144,3 +144,4 @@ def prepare_custom_data(working_directory, train_enc, train_dec, test_enc, test_
     data_to_token_ids(test_dec, dec_dev_ids_path, dec_vocab_path, tokenizer)
 
     return (enc_train_ids_path, dec_train_ids_path, enc_dev_ids_path, dec_dev_ids_path, enc_vocab_path, dec_vocab_path)
+
diff --git a/neuralconvo.ini b/neuralconvo.ini
index 0048351..f179a3c 100755
--- a/neuralconvo.ini
+++ b/neuralconvo.ini
@@ -4,7 +4,8 @@ mode = train
 train_enc = data/train.enc
 train_dec = data/train.dec
 test_enc = data/test.enc
-test_dec = data/test.enc
+test_dec = data/test.dec
+
 # folder where checkpoints, vocabulary, temporary data will be stored
 working_directory = working_dir/
 [ints]
diff --git a/seq2seq.ini b/seq2seq.ini
index 392f9da..467b212 100755
--- a/seq2seq.ini
+++ b/seq2seq.ini
@@ -4,7 +4,8 @@ mode = train
 train_enc = data/train.enc
 train_dec = data/train.dec
 test_enc = data/test.enc
-test_dec = data/test.enc
+test_dec = data/test.dec
+
 # folder where checkpoints, vocabulary, temporary data will be stored
 working_directory = working_dir/
 [ints]
diff --git a/seq2seq_model.py b/seq2seq_model.py
index 5b9f39b..9a015da 100755
--- a/seq2seq_model.py
+++ b/seq2seq_model.py
@@ -19,23 +19,16 @@
 from __future__ import division
 from __future__ import print_function
 
+import copy
 import random
 
 import numpy as np
 from six.moves import xrange  # pylint: disable=redefined-builtin
 import tensorflow as tf
 
-#from tensorflow.models.rnn.translate import data_utils
-#fixes  File "execute.py", line 31, in <module>
-    #import seq2seq_model
-  #File "C:\PYTHONCODE\Tensorflow\chatbot\tensorflow_chatbot\seq2seq_model.py", l
-#ine 28, in <module>
-    #from tensorflow.models.rnn.translate import data_utils
-#ModuleNotFoundError: No module named 'tensorflow.models'
 import data_utils
 
 
-
 class Seq2SeqModel(object):
   """Sequence-to-sequence model with attention and for multiple buckets.
 
@@ -51,10 +44,20 @@ class Seq2SeqModel(object):
     http://arxiv.org/abs/1412.2007
   """
 
-  def __init__(self, source_vocab_size, target_vocab_size, buckets, size,
-               num_layers, max_gradient_norm, batch_size, learning_rate,
-               learning_rate_decay_factor, use_lstm=False,
-               num_samples=512, forward_only=False):
+  def __init__(self,
+               source_vocab_size,
+               target_vocab_size,
+               buckets,
+               size,
+               num_layers,
+               max_gradient_norm,
+               batch_size,
+               learning_rate,
+               learning_rate_decay_factor,
+               use_lstm=False,
+               num_samples=512,
+               forward_only=False,
+               dtype=tf.float32):
     """Create the model.
 
     Args:
@@ -76,12 +79,14 @@ def __init__(self, source_vocab_size, target_vocab_size, buckets, size,
       use_lstm: if true, we use LSTM cells instead of GRU cells.
       num_samples: number of samples for sampled softmax.
       forward_only: if set, we do not construct the backward pass in the model.
+      dtype: the data type to use to store internal variables.
     """
     self.source_vocab_size = source_vocab_size
     self.target_vocab_size = target_vocab_size
     self.buckets = buckets
     self.batch_size = batch_size
-    self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
+    self.learning_rate = tf.Variable(
+        float(learning_rate), trainable=False, dtype=dtype)
     self.learning_rate_decay_op = self.learning_rate.assign(
         self.learning_rate * learning_rate_decay_factor)
     self.global_step = tf.Variable(0, trainable=False)
@@ -91,34 +96,52 @@ def __init__(self, source_vocab_size, target_vocab_size, buckets, size,
     softmax_loss_function = None
     # Sampled softmax only makes sense if we sample less than vocabulary size.
     if num_samples > 0 and num_samples < self.target_vocab_size:
-      w = tf.get_variable("proj_w", [size, self.target_vocab_size])
-      w_t = tf.transpose(w)
-      b = tf.get_variable("proj_b", [self.target_vocab_size])
+      w_t = tf.get_variable("proj_w", [self.target_vocab_size, size], dtype=dtype)
+      w = tf.transpose(w_t)
+      b = tf.get_variable("proj_b", [self.target_vocab_size], dtype=dtype)
       output_projection = (w, b)
 
-      def sampled_loss(inputs, labels):
+      def sampled_loss(labels, logits):
         labels = tf.reshape(labels, [-1, 1])
-        return tf.nn.sampled_softmax_loss(w_t, b, inputs, labels, num_samples,
-                self.target_vocab_size)
+        # We need to compute the sampled_softmax_loss using 32bit floats to
+        # avoid numerical instabilities.
+        local_w_t = tf.cast(w_t, tf.float32)
+        local_b = tf.cast(b, tf.float32)
+        local_inputs = tf.cast(logits, tf.float32)
+        return tf.cast(
+            tf.nn.sampled_softmax_loss(
+                weights=local_w_t,
+                biases=local_b,
+                labels=labels,
+                inputs=local_inputs,
+                num_sampled=num_samples,
+                num_classes=self.target_vocab_size),
+            dtype)
       softmax_loss_function = sampled_loss
 
     # Create the internal multi-layer cell for our RNN.
-    single_cell = tf.nn.rnn_cell.GRUCell(size)
+    def single_cell():
+      return tf.contrib.rnn.GRUCell(size)
     if use_lstm:
-      single_cell = tf.nn.rnn_cell.BasicLSTMCell(size)
-    cell = single_cell
+      def single_cell():
+        return tf.contrib.rnn.BasicLSTMCell(size)
+    cell = single_cell()
     if num_layers > 1:
-      cell = tf.nn.rnn_cell.MultiRNNCell([single_cell] * num_layers)
+      cell = tf.contrib.rnn.MultiRNNCell([single_cell() for _ in range(num_layers)])
 
     # The seq2seq function: we use embedding for the input and attention.
-    def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
-      return tf.nn.seq2seq.embedding_attention_seq2seq(
-          encoder_inputs, decoder_inputs, cell,
-          num_encoder_symbols=source_vocab_size,
-          num_decoder_symbols=target_vocab_size,
-          embedding_size=size,
-          output_projection=output_projection,
-          feed_previous=do_decode)
+    def seq2seq_f(encoder_inputs, decoder_inputs, do_decode=False):
+        tmp_cell = copy.deepcopy(cell) #new
+        return tf.contrib.legacy_seq2seq.embedding_attention_seq2seq(
+            encoder_inputs,
+            decoder_inputs,
+            tmp_cell, #new
+            num_encoder_symbols=source_vocab_size,
+            num_decoder_symbols=target_vocab_size,
+            embedding_size=size,
+            output_projection=output_projection,
+            feed_previous=do_decode,
+            dtype=dtype)
 
     # Feeds for inputs.
     self.encoder_inputs = []
@@ -130,7 +153,7 @@ def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
     for i in xrange(buckets[-1][1] + 1):
       self.decoder_inputs.append(tf.placeholder(tf.int32, shape=[None],
                                                 name="decoder{0}".format(i)))
-      self.target_weights.append(tf.placeholder(tf.float32, shape=[None],
+      self.target_weights.append(tf.placeholder(dtype, shape=[None],
                                                 name="weight{0}".format(i)))
 
     # Our targets are decoder inputs shifted by one.
@@ -139,7 +162,7 @@ def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
 
     # Training outputs and losses.
     if forward_only:
-      self.outputs, self.losses = tf.nn.seq2seq.model_with_buckets(
+      self.outputs, self.losses = tf.contrib.legacy_seq2seq.model_with_buckets(
           self.encoder_inputs, self.decoder_inputs, targets,
           self.target_weights, buckets, lambda x, y: seq2seq_f(x, y, True),
           softmax_loss_function=softmax_loss_function)
@@ -151,7 +174,7 @@ def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
               for output in self.outputs[b]
           ]
     else:
-      self.outputs, self.losses = tf.nn.seq2seq.model_with_buckets(
+      self.outputs, self.losses = tf.contrib.legacy_seq2seq.model_with_buckets(
           self.encoder_inputs, self.decoder_inputs, targets,
           self.target_weights, buckets,
           lambda x, y: seq2seq_f(x, y, False),
@@ -171,7 +194,7 @@ def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
         self.updates.append(opt.apply_gradients(
             zip(clipped_gradients, params), global_step=self.global_step))
 
-    self.saver = tf.train.Saver(tf.all_variables())
+    self.saver = tf.train.Saver(tf.global_variables())
 
   def step(self, session, encoder_inputs, decoder_inputs, target_weights,
            bucket_id, forward_only):
diff --git a/seq2seq_serve.ini b/seq2seq_serve.ini
index fe09704..c5a9d7d 100755
--- a/seq2seq_serve.ini
+++ b/seq2seq_serve.ini
@@ -4,7 +4,7 @@ mode = serve
 train_enc = data/train.enc
 train_dec = data/train.dec
 test_enc = data/test.enc
-test_dec = data/test.enc
+test_dec = data/test.dec
 # folder where checkpoints, vocabulary, temporary data will be stored
 working_directory = working_dir/
 [ints]