forked from ilivans/tf-rnn-attention
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathattention.py
32 lines (24 loc) · 1.35 KB
/
attention.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import tensorflow as tf
def attention(inputs, attention_size):
"""
Attention mechanism layer.
:param inputs: outputs of RNN/Bi-RNN layer (not final state)
:param attention_size: linear size of attention weights
:return: outputs of the passed RNN/Bi-RNN reduced with attention vector
"""
# In case of Bi-RNN input we need to concatenate outputs of its forward and backward parts
if isinstance(inputs, tuple):
inputs = tf.concat(2, inputs)
sequence_length = inputs.get_shape()[1].value # the length of sequences processed in the antecedent RNN layer
hidden_size = inputs.get_shape()[2].value # hidden size of the RNN layer
# Attention mechanism
W_omega = tf.Variable(tf.random_normal([hidden_size, attention_size], stddev=0.1))
b_omega = tf.Variable(tf.random_normal([attention_size], stddev=0.1))
u_omega = tf.Variable(tf.random_normal([attention_size], stddev=0.1))
v = tf.tanh(tf.matmul(tf.reshape(inputs, [-1, hidden_size]), W_omega) + tf.reshape(b_omega, [1, -1]))
vu = tf.matmul(v, tf.reshape(u_omega, [-1, 1]))
exps = tf.reshape(tf.exp(vu), [-1, sequence_length])
alphas = exps / tf.reshape(tf.reduce_sum(exps, 1), [-1, 1])
# Output of Bi-RNN is reduced with attention vector
output = tf.reduce_sum(inputs * tf.reshape(alphas, [-1, sequence_length, 1]), 1)
return output