diff --git a/data_utils.py b/data_utils.py index e16c895..85e1fbb 100755 --- a/data_utils.py +++ b/data_utils.py @@ -144,3 +144,4 @@ def prepare_custom_data(working_directory, train_enc, train_dec, test_enc, test_ data_to_token_ids(test_dec, dec_dev_ids_path, dec_vocab_path, tokenizer) return (enc_train_ids_path, dec_train_ids_path, enc_dev_ids_path, dec_dev_ids_path, enc_vocab_path, dec_vocab_path) + diff --git a/neuralconvo.ini b/neuralconvo.ini index 0048351..f179a3c 100755 --- a/neuralconvo.ini +++ b/neuralconvo.ini @@ -4,7 +4,8 @@ mode = train train_enc = data/train.enc train_dec = data/train.dec test_enc = data/test.enc -test_dec = data/test.enc +test_dec = data/test.dec + # folder where checkpoints, vocabulary, temporary data will be stored working_directory = working_dir/ [ints] diff --git a/seq2seq.ini b/seq2seq.ini index 392f9da..467b212 100755 --- a/seq2seq.ini +++ b/seq2seq.ini @@ -4,7 +4,8 @@ mode = train train_enc = data/train.enc train_dec = data/train.dec test_enc = data/test.enc -test_dec = data/test.enc +test_dec = data/test.dec + # folder where checkpoints, vocabulary, temporary data will be stored working_directory = working_dir/ [ints] diff --git a/seq2seq_model.py b/seq2seq_model.py index 5b9f39b..9a015da 100755 --- a/seq2seq_model.py +++ b/seq2seq_model.py @@ -19,23 +19,16 @@ from __future__ import division from __future__ import print_function +import copy import random import numpy as np from six.moves import xrange # pylint: disable=redefined-builtin import tensorflow as tf -#from tensorflow.models.rnn.translate import data_utils -#fixes File "execute.py", line 31, in - #import seq2seq_model - #File "C:\PYTHONCODE\Tensorflow\chatbot\tensorflow_chatbot\seq2seq_model.py", l -#ine 28, in - #from tensorflow.models.rnn.translate import data_utils -#ModuleNotFoundError: No module named 'tensorflow.models' import data_utils - class Seq2SeqModel(object): """Sequence-to-sequence model with attention and for multiple buckets. @@ -51,10 +44,20 @@ class Seq2SeqModel(object): http://arxiv.org/abs/1412.2007 """ - def __init__(self, source_vocab_size, target_vocab_size, buckets, size, - num_layers, max_gradient_norm, batch_size, learning_rate, - learning_rate_decay_factor, use_lstm=False, - num_samples=512, forward_only=False): + def __init__(self, + source_vocab_size, + target_vocab_size, + buckets, + size, + num_layers, + max_gradient_norm, + batch_size, + learning_rate, + learning_rate_decay_factor, + use_lstm=False, + num_samples=512, + forward_only=False, + dtype=tf.float32): """Create the model. Args: @@ -76,12 +79,14 @@ def __init__(self, source_vocab_size, target_vocab_size, buckets, size, use_lstm: if true, we use LSTM cells instead of GRU cells. num_samples: number of samples for sampled softmax. forward_only: if set, we do not construct the backward pass in the model. + dtype: the data type to use to store internal variables. """ self.source_vocab_size = source_vocab_size self.target_vocab_size = target_vocab_size self.buckets = buckets self.batch_size = batch_size - self.learning_rate = tf.Variable(float(learning_rate), trainable=False) + self.learning_rate = tf.Variable( + float(learning_rate), trainable=False, dtype=dtype) self.learning_rate_decay_op = self.learning_rate.assign( self.learning_rate * learning_rate_decay_factor) self.global_step = tf.Variable(0, trainable=False) @@ -91,34 +96,52 @@ def __init__(self, source_vocab_size, target_vocab_size, buckets, size, softmax_loss_function = None # Sampled softmax only makes sense if we sample less than vocabulary size. if num_samples > 0 and num_samples < self.target_vocab_size: - w = tf.get_variable("proj_w", [size, self.target_vocab_size]) - w_t = tf.transpose(w) - b = tf.get_variable("proj_b", [self.target_vocab_size]) + w_t = tf.get_variable("proj_w", [self.target_vocab_size, size], dtype=dtype) + w = tf.transpose(w_t) + b = tf.get_variable("proj_b", [self.target_vocab_size], dtype=dtype) output_projection = (w, b) - def sampled_loss(inputs, labels): + def sampled_loss(labels, logits): labels = tf.reshape(labels, [-1, 1]) - return tf.nn.sampled_softmax_loss(w_t, b, inputs, labels, num_samples, - self.target_vocab_size) + # We need to compute the sampled_softmax_loss using 32bit floats to + # avoid numerical instabilities. + local_w_t = tf.cast(w_t, tf.float32) + local_b = tf.cast(b, tf.float32) + local_inputs = tf.cast(logits, tf.float32) + return tf.cast( + tf.nn.sampled_softmax_loss( + weights=local_w_t, + biases=local_b, + labels=labels, + inputs=local_inputs, + num_sampled=num_samples, + num_classes=self.target_vocab_size), + dtype) softmax_loss_function = sampled_loss # Create the internal multi-layer cell for our RNN. - single_cell = tf.nn.rnn_cell.GRUCell(size) + def single_cell(): + return tf.contrib.rnn.GRUCell(size) if use_lstm: - single_cell = tf.nn.rnn_cell.BasicLSTMCell(size) - cell = single_cell + def single_cell(): + return tf.contrib.rnn.BasicLSTMCell(size) + cell = single_cell() if num_layers > 1: - cell = tf.nn.rnn_cell.MultiRNNCell([single_cell] * num_layers) + cell = tf.contrib.rnn.MultiRNNCell([single_cell() for _ in range(num_layers)]) # The seq2seq function: we use embedding for the input and attention. - def seq2seq_f(encoder_inputs, decoder_inputs, do_decode): - return tf.nn.seq2seq.embedding_attention_seq2seq( - encoder_inputs, decoder_inputs, cell, - num_encoder_symbols=source_vocab_size, - num_decoder_symbols=target_vocab_size, - embedding_size=size, - output_projection=output_projection, - feed_previous=do_decode) + def seq2seq_f(encoder_inputs, decoder_inputs, do_decode=False): + tmp_cell = copy.deepcopy(cell) #new + return tf.contrib.legacy_seq2seq.embedding_attention_seq2seq( + encoder_inputs, + decoder_inputs, + tmp_cell, #new + num_encoder_symbols=source_vocab_size, + num_decoder_symbols=target_vocab_size, + embedding_size=size, + output_projection=output_projection, + feed_previous=do_decode, + dtype=dtype) # Feeds for inputs. self.encoder_inputs = [] @@ -130,7 +153,7 @@ def seq2seq_f(encoder_inputs, decoder_inputs, do_decode): for i in xrange(buckets[-1][1] + 1): self.decoder_inputs.append(tf.placeholder(tf.int32, shape=[None], name="decoder{0}".format(i))) - self.target_weights.append(tf.placeholder(tf.float32, shape=[None], + self.target_weights.append(tf.placeholder(dtype, shape=[None], name="weight{0}".format(i))) # Our targets are decoder inputs shifted by one. @@ -139,7 +162,7 @@ def seq2seq_f(encoder_inputs, decoder_inputs, do_decode): # Training outputs and losses. if forward_only: - self.outputs, self.losses = tf.nn.seq2seq.model_with_buckets( + self.outputs, self.losses = tf.contrib.legacy_seq2seq.model_with_buckets( self.encoder_inputs, self.decoder_inputs, targets, self.target_weights, buckets, lambda x, y: seq2seq_f(x, y, True), softmax_loss_function=softmax_loss_function) @@ -151,7 +174,7 @@ def seq2seq_f(encoder_inputs, decoder_inputs, do_decode): for output in self.outputs[b] ] else: - self.outputs, self.losses = tf.nn.seq2seq.model_with_buckets( + self.outputs, self.losses = tf.contrib.legacy_seq2seq.model_with_buckets( self.encoder_inputs, self.decoder_inputs, targets, self.target_weights, buckets, lambda x, y: seq2seq_f(x, y, False), @@ -171,7 +194,7 @@ def seq2seq_f(encoder_inputs, decoder_inputs, do_decode): self.updates.append(opt.apply_gradients( zip(clipped_gradients, params), global_step=self.global_step)) - self.saver = tf.train.Saver(tf.all_variables()) + self.saver = tf.train.Saver(tf.global_variables()) def step(self, session, encoder_inputs, decoder_inputs, target_weights, bucket_id, forward_only): diff --git a/seq2seq_serve.ini b/seq2seq_serve.ini index fe09704..c5a9d7d 100755 --- a/seq2seq_serve.ini +++ b/seq2seq_serve.ini @@ -4,7 +4,7 @@ mode = serve train_enc = data/train.enc train_dec = data/train.dec test_enc = data/test.enc -test_dec = data/test.enc +test_dec = data/test.dec # folder where checkpoints, vocabulary, temporary data will be stored working_directory = working_dir/ [ints]