forked from ilivans/tf-rnn-attention
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtoy_example.py
109 lines (89 loc) · 4.09 KB
/
toy_example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
"""
Toy example of attention layer use.
Train RNN (GRU) on IMDB dataset (binary classification).
"""
import tensorflow as tf
from tensorflow.python.ops.rnn import dynamic_rnn as rnn
from tensorflow.python.ops.rnn_cell import GRUCell
from keras.datasets import imdb
from attention import attention
from utils import *
# Load the dataset
(X_train, y_train), (X_test, y_test) = imdb.load_data()
# Sequences preprocessing
vocabulary_size = get_vocabulary_size(X_train)
X_test = fit_in_vocabulary(X_test, vocabulary_size)
sequence_length = 250
X_train = zero_pad(X_train, sequence_length)
X_test = zero_pad(X_test, sequence_length)
# Different placeholders
batch_ph = tf.placeholder(tf.int32, [None, sequence_length])
target_ph = tf.placeholder(tf.float32, [None])
seq_len_ph = tf.placeholder(tf.int32, [None])
keep_prob_ph = tf.placeholder(tf.float32)
# Embedding layer
embed_dim = 100
embeddings_var = tf.Variable(tf.random_uniform([vocabulary_size, embed_dim], -1.0, 1.0), trainable=True)
batch_embedded = tf.nn.embedding_lookup(embeddings_var, batch_ph)
# Bi-RNN layers
hidden_size = 150
# birnn_outputs, _ = bi_rnn(GRUCell(hidden_size), GRUCell(hidden_size),
# inputs=batch_embedded, sequence_length=seq_len_ph, dtype=tf.float32)
rnn_outputs, _ = rnn(GRUCell(hidden_size), inputs=batch_embedded, sequence_length=seq_len_ph, dtype=tf.float32)
# Attention layer
attention_size = 50
attention_output = attention(rnn_outputs, attention_size)
# Dropout
keep_prob = 0.5
drop = tf.nn.dropout(attention_output, keep_prob_ph)
# Fully connected layer
W = tf.Variable(tf.truncated_normal([drop.get_shape()[1].value, 1], stddev=0.1))
b = tf.Variable(tf.constant(0., shape=[1]))
y_hat = tf.nn.xw_plus_b(drop, W, b)
y_hat = tf.squeeze(y_hat)
# Cross-entropy loss and optimizer initialization
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(y_hat, target_ph))
optimizer = tf.train.AdamOptimizer(learning_rate=1e-3).minimize(loss)
# Accuracy metric
accuracy = 1. - tf.reduce_mean(tf.cast(tf.equal(tf.round(y_hat), target_ph), tf.float32))
# Actual lengths of sequences
seq_len_test = np.array([list(x).index(0) + 1 for x in X_test])
seq_len_train = np.array([list(x).index(0) + 1 for x in X_train])
# Train batch generator
batch_size = 256
train_batch_generator = batch_generator(X_train, y_train, batch_size)
test_batch_generator = batch_generator(X_test, y_test, batch_size)
num_epochs = 10
delta = 0.5
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
print "Start learning..."
for epoch in range(num_epochs):
loss_train = 0
loss_test = 0
accuracy_train = 0
accuracy_test = 0
print "epoch: {}".format(epoch)
# Training
num_batches = X_train.shape[0] / batch_size
for b in range(num_batches):
x_batch, y_batch = train_batch_generator.next()
seq_len = np.array([list(x).index(0) + 1 for x in x_batch]) # actual lengths of sequences
loss_tr, acc, _ = sess.run([loss, accuracy, optimizer], feed_dict={batch_ph: x_batch, target_ph: y_batch,
seq_len_ph: seq_len, keep_prob_ph: keep_prob})
accuracy_train += acc
loss_train = loss_tr * delta + loss_train * (1 - delta)
accuracy_train /= num_batches
# Validating
num_batches = X_test.shape[0] / batch_size
for b in range(num_batches):
x_batch, y_batch = test_batch_generator.next()
seq_len = np.array([list(x).index(0) + 1 for x in x_batch]) # actual lengths of sequences
loss_test_batch, acc = sess.run([loss, accuracy], feed_dict={batch_ph: x_batch, target_ph: y_batch,
seq_len_ph: seq_len, keep_prob_ph: 1.0})
accuracy_test += acc
loss_test += loss_test_batch
accuracy_test /= num_batches
loss_test /= num_batches
print "\t train\tloss: {:.3f}\t acc: {:.3f}".format(loss_train, accuracy_train)
print "\t test\tloss: {:.3f}\t acc: {:.3f}".format(loss_test, accuracy_test)