-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfileio.py
387 lines (253 loc) · 13.5 KB
/
fileio.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
"""
Module for the saving and loading of different data
"""
from __future__ import print_function, division
import os
import torch
import getpass
import numpy as np
import pickle
from models import ShallowNet
from tensorboardX import SummaryWriter
USE_NEW_NAMING_FORMAT = True
USERNAME = getpass.getuser()
OUTPUT_DIR = os.path.join('/network/tmp1', USERNAME, 'information-paths')
SAVED_DIR = os.path.join(OUTPUT_DIR, 'saved')
FINAL_TENSORBOARD_DIR = os.path.join(SAVED_DIR, 'tensorboard')
TMP_TENSORBOARD_DIR = os.path.join('/tmp', USERNAME, 'tensorboard')
MODEL_DIR = os.path.join(SAVED_DIR, 'models')
TRAIN_LOADER_DIR = os.path.join(SAVED_DIR, 'train_loader')
PROB_DIR = os.path.join(SAVED_DIR, 'probabilities')
VARIANCE_DIR = os.path.join(SAVED_DIR, 'variance')
BIAS_DIR = os.path.join(SAVED_DIR, 'bias')
DATA_MODEL_COMP_DIR = os.path.join(SAVED_DIR, 'data_model_comps')
HYPERPARAM_DIR = os.path.join(SAVED_DIR, 'hyperparam')
CORRELATIONS_DIR = os.path.join(SAVED_DIR, 'correlations')
TRAIN_ERRORS_DIR = os.path.join(SAVED_DIR, 'train_errors')
BITMAP_DIRS = ['train_bitmaps', 'val_bitmaps', 'test_bitmaps']
WEIGHT_DIR = os.path.join(SAVED_DIR, 'weights')
PAIRWISE_DISTS_DIR = os.path.join(SAVED_DIR, 'pairwise_dists')
PATH_DIR = os.path.join(SAVED_DIR, 'path_bitmaps')
FINE_PATH_DIR = os.path.join(SAVED_DIR, 'path_bitmaps_fine')
FINE_PATH_DIRS = [os.path.join(FINE_PATH_DIR, bitmap_dir) for bitmap_dir in BITMAP_DIRS]
PATHS = [SAVED_DIR, MODEL_DIR, WEIGHT_DIR, PAIRWISE_DISTS_DIR, PATH_DIR,
FINE_PATH_DIR, DATA_MODEL_COMP_DIR, HYPERPARAM_DIR, PROB_DIR, VARIANCE_DIR,
CORRELATIONS_DIR, BIAS_DIR, TRAIN_ERRORS_DIR, TRAIN_LOADER_DIR] + BITMAP_DIRS + FINE_PATH_DIRS
OLD_COMMON_NAMING_FORMAT = 'shallow%d_run%d_job%s.pt'
COMMON_NAMING_FORMAT = 'shallow%d_run%d_inter%d_job%s.pt'
COMMON_REGEXP_FORMAT = r'shallow%d_run\d+_job(\d+).pt'
TO_CPU_DEFAULT = False
def make_all_dirs():
"""Make all the directories if they don't already exist"""
for path in PATHS:
if not os.path.exists(path):
print("Creating directory:", path)
os.makedirs(path)
make_all_dirs()
def get_slurm_id():
try:
return os.environ["SLURM_JOB_ID"]
except:
return 0
def create_summary_writer():
return SummaryWriter(get_tmp_tensorboard_dir(), max_queue=100000)
def move_tensorboard_dir():
final_tensorboard_dir = os.path.join(FINAL_TENSORBOARD_DIR, str(get_slurm_id()))
tmp_tensorboard_dir = get_tmp_tensorboard_dir()
for src_file in os.listdir(tmp_tensorboard_dir):
src_file_path = os.path.join(tmp_tensorboard_dir, src_file)
os.rename(src_file_path, final_tensorboard_dir)
def get_tmp_tensorboard_dir():
return os.path.join(TMP_TENSORBOARD_DIR, str(get_slurm_id()))
"""Specific saving functions"""
def save_data_model_comp(data_model_comp_obj, slurm_id=get_slurm_id(), inter=0):
data_model_comp_path = get_data_model_comp_path(data_model_comp_obj.model.num_hidden, data_model_comp_obj.run_i, slurm_id, inter)
pickle.dump(data_model_comp_obj, open(data_model_comp_path, 'wb'))
def save_train_loader(slurm_id, train_loader):
pickle.dump(train_loader, open(get_train_loader_path(slurm_id), 'wb'))
def save_shallow_net(model, num_hidden, i, slurm_id=get_slurm_id(), inter=0):
if isinstance(model, ShallowNet):
return save_model(model, model.num_hidden, i, slurm_id, inter)
else:
raise Exception('Naming convention for saving model not implemented for models other than ShallowNet')
def save_model(model, num_hidden, i, slurm_id=get_slurm_id(), inter=0):
return torch.save(model, get_model_path(num_hidden, i, slurm_id, inter))
def save_probabilities(slurm_id, num_hidden, inter, probabilities):
return np.save(get_probabilities_path(slurm_id, num_hidden, inter), probabilities)
def save_correlations(slurm_id, correlations, matrix_num):
return np.save(get_correlations_path(slurm_id, matrix_num), correlations)
def save_variance_data(slurm_id, variance, option):
return np.save(get_variance_path(slurm_id, option), variance)
def save_variance_diffs(slurm_id, num_hidden, diffs):
return np.save(get_variance_diffs_path(slurm_id, num_hidden), diffs)
def save_bias_diffs(slurm_id, num_hidden, diffs):
return np.save(get_bias_diffs_path(slurm_id, num_hidden), diffs)
def save_weights(weights, num_hidden, i, slurm_id):
return torch.save(weights, get_weight_path(num_hidden, i, slurm_id))
def save_weights_tensor(slurm_id, num_seeds, num_hidden, matrix_num, weights):
return torch.save(weights, get_weights_tensor_path(slurm_id, num_seeds, num_hidden, matrix_num))
def save_train_errors(slurm_id, num_hidden, errors):
return np.save(get_train_errors_path(slurm_id, num_hidden), errors)
def save_bitmap(bitmap, num_hidden, i, slurm_id, type):
return torch.save(bitmap, get_bitmap_path(num_hidden, i, slurm_id, type))
def save_pairwise_dists(pairwise_dists, num_hidden, num_runs, modifier):
return torch.save(pairwise_dists, get_pairwise_dists_path(num_hidden, num_runs, modifier))
def save_opt_path_bitmaps(opt_path, num_hidden, i, slurm_id):
return torch.save(opt_path, get_opt_path_bitmaps_path(num_hidden, i, slurm_id))
def save_fine_path_bitmaps(bitmap, num_hidden, i, inter, type):
return torch.save(bitmap, get_fine_path_bitmaps_path(num_hidden, i, inter, get_slurm_id(), type))
"""Specific loading functions"""
def load_data_model_comp(num_hidden, i, slurm_id=get_slurm_id(), inter=0):
return pickle.load(open(get_data_model_comp_path(num_hidden, i, slurm_id, inter), 'rb'))
def load_train_loader(slurm_id):
return pickle.load(open(get_train_loader_path(slurm_id), 'rb'))
def load_shallow_net(num_hidden, i, slurm_id, inter=0):
return load_model(num_hidden, i, slurm_id, inter)
def load_model(num_hidden, i, slurm_id, inter=0):
return load_torch(get_model_path(num_hidden, i, slurm_id, inter))
def load_probabilities(slurm_id, num_hidden, inter=0):
return np.load(get_probabilities_path(slurm_id, num_hidden, inter))
def load_correlations(slurm_id, matrix_num):
return np.load(get_correlations_path(slurm_id, matrix_num))
def load_variance_data(slurm_id, option):
return np.load(get_variance_path(slurm_id, option))
def load_variance_diffs(slurm_id, num_hidden):
return np.load(get_variance_diffs_path(slurm_id, num_hidden))
def load_bias_diffs(slurm_id, num_hidden):
return np.load(get_bias_diffs_path(slurm_id, num_hidden))
def load_weights(num_hidden, i, slurm_id):
return load_torch(get_weight_path(num_hidden, i, slurm_id))
def load_weights_tensor(slurm_id, num_seeds, num_hidden, matrix_num):
return load_torch(get_weights_tensor_path(slurm_id, num_seeds, num_hidden, matrix_num))
def load_train_errors(slurm_id, num_hidden):
return np.load(get_train_errors_path(slurm_id, num_hidden))
def load_bitmap(num_hidden, i, slurm_id, type):
return load_torch(get_bitmap_path(num_hidden, i, slurm_id, type))
def load_pairwise_dists(num_hidden, num_runs, modifier):
return load_torch(get_pairwise_dists_path(num_hidden, num_runs, modifier))
def load_opt_path_bitmaps(num_hidden, i, slurm_id):
return load_torch(get_opt_path_bitmaps_path(num_hidden, i, slurm_id))
def load_fine_path_bitmaps(num_hidden, i, inter, slurm_id, type):
return load_torch(get_fine_path_bitmaps_path(num_hidden, i, inter, slurm_id, type))
def load_torch(filename, to_cpu=TO_CPU_DEFAULT):
"""Load torch object, reverting to loading to CPU if loading error"""
# Don't even try to load normally if you know it's going to CPU
if to_cpu:
return load_to_cpu(filename)
else:
# Try to load data normally
try:
return torch.load(filename)
# likely CUDA error from saving it from GPU and loading to CPU
except RuntimeError:
return load_to_cpu(filename)
def load_to_cpu(filename):
"""Load torch object specifically to CPU"""
return torch.load(filename, map_location=lambda storage, loc: storage)
# Deprecated
def load_model_information():
"""
TODO: trip this down and finish this (currently a copy paste)
Load model information from disk
"""
bitmap = torch.load(os.path.join(SAVED_BITMAP_DIR, load_fn))
weight_vec = torch.load(os.path.join(SAVED_WEIGHTS_DIR, load_fn))
save_model_filename = COMMON_NAMING_FORMAT % (num_hidden, i, slurm_id)
save_info_fn = save_model_fn
if load_model_inf or load_models:
load_fn = COMMON_NAMING_FORMAT % (num_hidden, i, saved_slurm_id)
save_info_fn = load_fn
# Common file naming
save_model_fn = 'shallow%d_run%d_job%s.pt' % (num_hidden, i, slurm_id)
save_info_fn = save_model_fn
if load_model_inf or load_models:
load_fn = 'shallow%d_run%d_job%s.pt' % (num_hidden, i, saved_slurm_id)
save_info_fn = load_fn
# Load model information (fastest)
if load_model_inf:
bitmap = torch.load(os.path.join(SAVED_BITMAP_DIR, load_fn))
weight_vec = torch.load(os.path.join(SAVED_WEIGHTS_DIR, load_fn))
else:
# Load models and test them (fast)
if load_models:
shallow_net = torch.load(os.path.join(SAVED_MODELS_DIR,
load_fn))
trainer = NNTrainer(shallow_net) # no training necessary
# Train models (slow)
else:
shallow_net = ShallowNet(num_hidden)
trainer = NNTrainer(shallow_net, lr=0.1, momentum=0.5, epochs=10)
trainer.train(test=True)
torch.save(shallow_net, os.path.join(SAVED_MODELS_DIR,
save_fn))
bitmap = trainer.test()
weight_vec = shallow_net.get_params()
torch.save(bitmap, os.path.join(SAVED_BITMAP_DIR, save_info_fn))
torch.save(weight_vec, os.path.join(SAVED_WEIGHTS_DIR, save_info_fn))
# Append bitmaps and weights to output lists
bitmaps.append(bitmap)
weights.append(weight_vec)
"""
Functions that return the path for a specific directory
"""
def get_train_loader_path(slurm_id):
return os.path.join(TRAIN_LOADER_DIR, '{}.p'.format(slurm_id))
def get_hyperparam_main_plot_path(first_job_id, option):
return os.path.join(HYPERPARAM_DIR, '{}_job{}.jpg'.format(option, first_job_id))
def get_hyperparam_indi_plot_path(first_job_id, num_hidden, option):
return os.path.join(HYPERPARAM_DIR, 'shallow{}_option{}_job{}.jpg'.format(num_hidden, option, first_job_id))
def get_probabilities_path(slurm_id, num_hidden, inter):
return os.path.join(PROB_DIR, 'shallow{}_job{}_inter{}.npy'.format(num_hidden, slurm_id, inter))
def get_correlations_path(slurm_id, matrix_num):
return os.path.join(CORRELATIONS_DIR, '{}_job{}.npy'.format(matrix_num, slurm_id))
def get_variance_path(slurm_id, option):
'''
option is either all or individual_variance
'''
return os.path.join(VARIANCE_DIR, '{}_{}.npy'.format(slurm_id, option))
def get_variance_diffs_path(slurm_id, num_hidden):
return os.path.join(VARIANCE_DIR, 'diffs_shallow{}_job{}.npy'.format(num_hidden, slurm_id))
def get_bias_diffs_path(slurm_id, num_hidden):
return os.path.join(BIAS_DIR, 'diffs_shallow{}_job{}.npy'.format(num_hidden, slurm_id))
def get_data_model_comp_path(num_hidden, i, slurm_id, inter=0):
return get_path(DATA_MODEL_COMP_DIR, num_hidden, i, slurm_id, inter)
def get_model_path(num_hidden, i, slurm_id, inter=0):
return get_path(MODEL_DIR, num_hidden, i, slurm_id, inter)
def get_weights_tensor_path(slurm_id, num_seeds, num_hidden, matrix_num):
return os.path.join(MODEL_DIR, '{}_shallow{}_seeds{}_job{}.pt'.format(matrix_num, num_hidden, num_seeds, slurm_id))
def get_train_errors_path(slurm_id, num_hidden):
return os.path.join(TRAIN_ERRORS_DIR, 'shallow{}_job{}.pt.npy'.format(num_hidden, slurm_id))
def get_weight_path(num_hidden, i, slurm_id):
return get_path(WEIGHT_DIR, num_hidden, i, slurm_id)
def get_bitmap_path(num_hidden, i, slurm_id, type): # TODO: rename type in all places
return get_path(BITMAP_DIRS[type], num_hidden, i, slurm_id)
def get_pairwise_dists_path(num_hidden, num_runs, modifier):
return os.path.join(PAIRWISE_DISTS_DIR,
'shallow{}_runs{}_{}.pt'.format(num_hidden, num_runs, modifier))
def get_opt_path_bitmaps_path(num_hidden, i, slurm_id):
return get_path(PATH_DIR, num_hidden, i, slurm_id)
def get_fine_path_bitmaps_path(num_hidden, i, inter, slurm_id,
type # 0 for train, 1 for validation, 2 for test
):
return os.path.join(FINE_PATH_DIRS[type],
'shallow{}_run{}_inter{}_job{}.pt'.format(num_hidden, i, inter, slurm_id))
def get_path(directory, num_hidden, i, slurm_id, inter=0):
"""Get path of a file in a specific directory"""
return os.path.join(directory, get_filename(num_hidden, i, slurm_id, inter))
def get_filename(num_hidden, i, slurm_id, inter=0):
"""
Return filename for a specific number of hidden units, run i, and SLURM id
"""
# if int(slurm_id) > 161000:
if USE_NEW_NAMING_FORMAT:
return COMMON_NAMING_FORMAT % (num_hidden, i, inter, slurm_id)
else:
return OLD_COMMON_NAMING_FORMAT % (num_hidden, i, slurm_id)
def get_train_test_modifiers(modifier=None):
"""Append the modifier to 'train' and 'test'"""
modifier_train = 'train'
modifier_test = 'test'
if modifier is not None:
modifier_train = modifier_train + '_' + modifier
modifier_test = modifier_test + '_' + modifier
return modifier_train, modifier_test