forked from ilivans/tf-rnn-attention
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
73 lines (58 loc) · 2.22 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import numpy as np
def zero_pad(X, seq_len):
return np.array([x[:seq_len - 1] + [0] * max(seq_len - len(x), 1) for x in X])
def get_vocabulary_size(X):
return max([max(x) for x in X]) + 1 # plus the 0th word
def fit_in_vocabulary(X, voc_size):
return [[w for w in x if w < voc_size] for x in X]
def batch_generator(X, y, batch_size):
size = X.shape[0]
X_copy = X.copy()
y_copy = y.copy()
indices = np.arange(size)
np.random.shuffle(indices)
X_copy = X_copy[indices]
y_copy = y_copy[indices]
i = 0
while True:
if i + batch_size <= size:
yield X_copy[i:i + batch_size], y_copy[i:i + batch_size]
i += batch_size
else:
i = 0
indices = np.arange(size)
np.random.shuffle(indices)
X_copy = X_copy[indices]
y_copy = y_copy[indices]
continue
# Metrics
def f_macro(y_true, y_pred):
labels = (0, 2) # negative and positive classes' indices
f = 0
for label in labels:
tp = (y_true[:, label] + y_pred[:, label] == 2).sum()
fp = (y_true[:, label] - y_pred[:, label] == -1).sum()
fn = (y_true[:, label] - y_pred[:, label] == 1).sum()
precision = float(tp) / (tp + fp) if tp + fp != 0 else 0
recall = float(tp) / (tp + fn) if tp + fn != 0 else 0
f_local = 2 * precision * recall / (precision + recall) if precision + recall != 0 else 0
f += f_local / len(labels)
return f
def f_micro(y_true, y_pred):
labels = (0, 2) # negative and positive classes' indices
f = 0
tp, fp, fn = 0, 0, 0
for label in labels:
tp += (y_true[:, label] + y_pred[:, label] == 2).sum()
fp += (y_true[:, label] - y_pred[:, label] == -1).sum()
fn += (y_true[:, label] - y_pred[:, label] == 1).sum()
precision = float(tp) / (tp + fp) if tp + fp != 0 else 0
recall = float(tp) / (tp + fn) if tp + fn != 0 else 0
f = 2 * precision * recall / (precision + recall) if precision + recall != 0 else 0
return f
if __name__ == "__main__":
# Test batch generator
gen = batch_generator(np.array(['a', 'b', 'c', 'd']), np.array([1, 2, 3, 4]), 2)
for _ in range(8):
xx, yy = gen.next()
print xx, yy