-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathplot_util.py
347 lines (305 loc) · 13.3 KB
/
plot_util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
'''
Copyright (c) 2019 Uber Technologies, Inc.
Licensed under the Uber Non-Commercial License (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at the root directory of this project.
See the License for the specific language governing permissions and
limitations under the License.
'''
from __future__ import print_function
from __future__ import division
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.colors import LogNorm
from matplotlib.colors import LinearSegmentedColormap
from matplotlib.backends.backend_pdf import PdfPages
import numpy as np
import h5py
import os
import math
import time
from scipy import stats
from scipy.spatial import distance
from scipy.ndimage import gaussian_filter1d
from scipy.stats import dweibull
from ast import literal_eval
######################################## handling large matrices ########################################
'''
Loading entire matrix takes too much memory: var = np.array(h5pyfile['var'])
Reading one line at a time takes too much time: var = h5pyfile['var']
So instead, use this class as a wrapper to read in chunks at a time.
Usage:
var = util.streamds(h5pyfile['var'])
for i in range(var.shape[0]):
do_something(var[i])
Note that you cannot close the h5py file until after
Limitations for now:
- must be a 2D h5py dataset
- only useful for indexing by the first axis (time aka iteration)
- only useful for going forward in time, not backwards
'''
class streamds():
def __init__(self, ds, chunk_size):
self.ds = ds
self.chunk_size = chunk_size
self.shape = ds.shape
self.startind = ds.shape[0]
self.buffer = np.zeros((chunk_size, ds.shape[1]))
def __getitem__(self, i):
if isinstance(i, int):
i = i % self.shape[0]
if i < self.startind or i >= self.startind + self.chunk_size:
self.update_buffer(i)
return self.buffer[i - self.startind]
else:
return self.ds[i]
def update_buffer(self, i):
self.startind = i
self.buffer = np.array(self.ds[self.startind:self.startind + self.chunk_size])
def sum(self, axis=-1):
if axis == 0:
ret = np.zeros(self.ds.shape[1])
for start in range(0, self.ds.shape[0], self.chunk_size):
ret += self.ds[start : start + self.chunk_size].sum(axis=0)
return ret
elif axis == 1:
ret = np.zeros(self.ds.shape[0])
for start in range(0, self.ds.shape[0], self.chunk_size):
ret[start : start + self.chunk_size] += self.ds[start : start + self.chunk_size].sum(axis=1)
return ret
elif axis == -1 and len(self.shape) == 2:
return self.sum(axis=0).sum()
else:
print('sorry no can do')
def __len__(self):
return len(self.ds)
######################################## helpers ########################################
# concatenate arrays from multiple gpus (different keys in the same file)
def concat_arrays(hf, keyroot):
grads = np.array(hf['{}_0'.format(keyroot)])
for i in range(1, 4):
nextkey = '{}_{}'.format(keyroot, i)
if nextkey in hf.keys():
grads = np.concatenate((grads, hf[nextkey][1:]))
return grads
# return loss curve based on starting loss and the LCA totals
def get_approx_loss(loss, helped):
return loss[0] + np.concatenate((np.array([0]), np.cumsum(helped.sum(axis=1))))
# e.g. [1, 4, 2, 4, 1] for num_splits = 4
def get_rk_coeffs(num_splits):
coeffs = np.ones(num_splits + 1)
coeffs[1::2] = 4
coeffs[2:-1:2] = 2
return coeffs
def get_lca_rk_adaptive(weights, gradients, splits_per_iter):
helped = np.zeros((weights.shape[0] - 1, weights.shape[1]))
coeffs = {}
for i in [2, 4, 8, 16, 32]:
coeffs[i] = get_rk_coeffs(i)
grads_ind = 0
for ts in range(weights.shape[0] - 1):
num_splits = splits_per_iter[ts]
split_gradients = gradients[grads_ind:grads_ind + num_splits + 1]
grads_ind += num_splits
k = np.matmul(coeffs[num_splits], split_gradients) / coeffs[num_splits].sum()
helped[ts] = np.multiply(k, weights[ts + 1] - weights[ts])
return helped
# returns list of variables as np arrays in their original shape
def split_and_shape(one_time_slice, shapes):
variables = []
offset = 0
for shape in shapes:
num_params = np.prod(shape)
variables.append(one_time_slice[offset : offset + num_params].reshape(shape))
offset += num_params
return variables
# get neurons, given either a 2d fc layer or a 4d conv layer
def get_neurons(layer):
return np.sum(layer, axis=tuple(range(len(layer.shape) - 1)))
# This assumes all layers have a bias layer immediately after it
# returns list. each element is a vector
def get_neurons_plus_biases(split_vars): # flattened and joined
layers = []
for i in range(int(len(split_vars)/2)):
layers.append(get_neurons(split_vars[2*i]) + split_vars[2*i + 1].flatten())
return layers
# generate for each layer (in a list) a N x T matrix where T = total iterations, N = number of neurons
def get_neuron_trajs_all_layers(helped, shapes):
neuron_trajs = get_neurons_plus_biases(split_and_shape(helped[0], shapes))
for i, layer in enumerate(neuron_trajs):
neuron_trajs[i] = layer.reshape(-1, 1)
for ts in range(1, helped.shape[0]):
curneurons = get_neurons_plus_biases(split_and_shape(helped[ts], shapes))
for i in range(len(neuron_trajs)):
neuron_trajs[i] = np.concatenate((neuron_trajs[i], curneurons[i].reshape(-1, 1)), axis=1)
return neuron_trajs
def save_or_show(plot_dest):
'''Save figure or show it. Allowable destinations and actions:
Instance of PdfPages: write page to pdf
String like "/home/user/results/foo": write /home/user/results/foo.pdf and /home/user/results/foo.png
None: display instead of writing.
'''
if isinstance(plot_dest, PdfPages):
plot_dest.savefig()
plt.close()
elif isinstance(plot_dest, str):
plt.savefig('%s.png' % plot_dest)
plt.savefig('%s.pdf' % plot_dest)
plt.close()
elif plot_dest is None:
plt.show()
else:
raise Exception('Not sure what to do with this plot destination: %s' % repr(plot_dest))
######################################## plotters for notebooks ########################################
# plots approx loss based on rk4, returns diff per iter
def plot_approx_loss(loss, helped, plot_dest=None):
fig, ax1 = plt.subplots(figsize=(10,8))
ax1.plot(np.array(loss), label='actual loss')
approx_loss = np.array(get_approx_loss(loss, helped))
ax1.plot(approx_loss, label='sum approx loss')
ax1.set_xlabel('iterations')
ax1.set_ylabel('loss')
ax1.legend()
relative_err = abs(((approx_loss[-1] - approx_loss[0]) - (loss[-1] - loss[0])) / (loss[-1] - loss[0]))
diff = np.array(loss[:len(approx_loss)]) - approx_loss
diff_per_iter = [diff[i+1] - diff[i] for i in range(len(diff) - 1)]
ax2 = ax1.twinx()
ax2.plot(diff_per_iter, 'C1', alpha=0.5, linewidth=0.5, marker='.', markersize=1)
ax2.set_ylabel('diff per iteration')
plt.title('true and approximated loss (err %.2f%%)' % (relative_err * 100))
save_or_show(plot_dest)
if not plot_dest:
plt.hist(diff_per_iter, bins=50)
plt.show()
print(np.std(diff_per_iter))
return np.array(diff_per_iter)
# kinda a subset of save_layer_train_test below...
# plots total helped per kernel layer and all layers
def plot_total_layers(total_helped, names, shapes, plot_dest=None):
layer_indices = np.append(0, np.cumsum([np.prod(shape) for shape in shapes]))
total_kernels = []
total_all = []
for i, name in enumerate(names):
startind, endind = layer_indices[i], layer_indices[i+1]
layer_total = total_helped[startind:endind].sum()
total_all.append(layer_total)
if not ('batch_norm' in name or 'bias' in name):
total_kernels.append(layer_total)
print('Bias and batch norm layers helped {} percent of total'.format(
100 * (1 - np.sum(total_kernels) / np.sum(total_all))))
# TODO make nice stacked chart over time
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.bar(range(len(total_kernels)), total_kernels)
plt.axhline(0, c='k', linestyle=':')
plt.title('Kernel layers')
plt.xlabel('Layer')
plt.ylabel('LCA per layer')
plt.subplot(1, 2, 2)
plt.bar(range(len(total_all)), total_all)
plt.axhline(0, c='k', linestyle=':')
plt.title('All layers')
plt.xlabel('Layer')
plt.ylabel('LCA per layer')
save_or_show(plot_dest)
def percent_helped_histograms(helped):
# histogram of how many params helped, over each iteration
negatives = helped < 0
params_per_iter = negatives.sum(axis=1)
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.hist(params_per_iter, bins=50)
plt.xlabel('params helped per iteration')
plt.ylabel('number of iterations')
plt.title('Mean = {:.2f}, stdev = {:.2f}'.format(params_per_iter.mean(), params_per_iter.std()))
# histogram of how many iters helped, over each parameter
iters_per_param = negatives.sum(axis=0)
plt.subplot(1, 2, 2)
plt.hist(iters_per_param, bins=50)
plt.xlabel('iterations helped per param')
plt.ylabel('number of params')
plt.title('Mean = {:.2f}, stdev = {:.2f}'.format(iters_per_param.mean(), iters_per_param.std()))
plt.show()
# helped train and test trajectories (cumulative) for each layer
def plot_trajectory_per_layer(helped, names, shapes, helped_test=None, layer_inds=[], iters=-1, plot_dest=None):
if iters == -1:
iters = helped.shape[0]
if len(layer_inds) == 0:
layer_inds = range(len(names))
layer_indices = np.append(0, np.cumsum([np.prod(shape) for shape in shapes]))
for i, name in enumerate(names):
if i not in layer_inds:
continue
startind, endind = layer_indices[i], layer_indices[i+1]
traj = np.cumsum(helped[:, startind:endind].sum(axis=1))
plt.plot(traj[:iters])
print(name, np.argsort(traj)[:10])
if helped_test:
plt.plot(np.cumsum(helped_test[:iters, startind:endind].sum(axis=1))) # can also plot overfit here instead
plt.axhline(0, c='k', linestyle=':')
plt.xlabel('iterations')
plt.ylabel('LCA, summed over parameters')
plt.title(name)
if isinstance(plot_dest, str):
save_or_show('%s_%02d' % (plot_dest, i))
else:
save_or_show(plot_dest)
# plot how much each neuron helped, each iteration. Large 2d plot for each layer
def plot_neurons_all_time(neuron_trajs, names, color_percentile=99, plot_dest=None):
# custom color map matching the green and red used in other plots
# red is negative and green is positive because default colormaps are like that
cdict = {'red': [[0.0, 0.4, 0.4],
[0.4, 1.0, 1.0],
[0.5, 1.0, 1.0],
[1.0, 0.0, 0.0]],
'green': [[0.0, 0.0, 0.0],
[0.5, 1.0, 1.0],
[0.6, 1.0, 1.0],
[1.0, 0.3, 0.3]],
'blue': [[0.0, 0.0, 0.0],
[0.5, 1.0, 1.0],
[1.0, 0.0, 0.0]]}
custom_cmap = LinearSegmentedColormap('lc', segmentdata=cdict) # previously used PiYG
for i, layer in enumerate(neuron_trajs):
#if 'batch_norm' in names[2 * i]:
# continue
fig_height = min(3 + int(layer.shape[0] / 20), 15)
plt.figure(figsize=(15, fig_height))
cmap_range = np.percentile(np.absolute(layer), color_percentile)
# sorted_layer = sorted(list(layer.T), key=lambda x: np.average(np.arange(len(x)), weights=np.abs(x)))
plt.imshow(-layer, cmap=custom_cmap, vmin=-cmap_range, vmax=cmap_range, aspect='auto')
plt.xlabel('iterations')
plt.ylabel('neurons')
plt.title(names[2 * i])
if isinstance(plot_dest, str):
save_or_show('%s_%02d' % (plot_dest, i))
else:
save_or_show(plot_dest)
def plot_trajectories_per_layer(helped, shapes, names, plot_dest=None):
offset = 0
for i, shape in enumerate(shapes):
# layer shape is (iteration, all params in layer flattened)
num_params = np.prod(shape)
layer = helped[:, offset : offset + num_params].cumsum(axis=0)
offset += num_params
bestinds = np.argsort(layer[-1])[:50]
randinds = np.random.choice(layer.shape[1], size=min(50, layer.shape[1]), replace=False)
best_trajs = layer[:, bestinds].T
rand_trajs = layer[:, randinds].T
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
for j, traj in enumerate(best_trajs):
plt.plot(np.concatenate((np.array([0]), traj)), alpha=0.5) # label=bestinds[j])
plt.xlabel('iterations')
plt.ylabel('cumulative helped')
plt.subplot(1, 2, 2)
for j, traj in enumerate(rand_trajs):
plt.plot(np.concatenate((np.array([0]), traj)), alpha=0.5)
plt.xlabel('iterations')
plt.ylabel('cumulative helped')
plt.suptitle(names[i])
#plt.tight_layout()
if isinstance(plot_dest, str):
save_or_show('%s_%02d' % (plot_dest, i))
else:
save_or_show(plot_dest)