-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtorch_utils.py
85 lines (61 loc) · 2.94 KB
/
torch_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import torch
import numpy as np
def logdet(A):
"""
There is no batched log-determinant in pytorch, but there is a batched cholesky
We can use cholesky to calculate the determinant as A = LL^t then
det(A) = det(L)*det(L^T) = det(L)^2 where L is a lower triangular matrix
Also remember that: determinant of triangular matrices is the product of the diagonal of the matrix, det(L) = prod(L_ii)
So in full logdet(L) = 2*sum(log(diag(cholesky(A)))))
Args:
A: symmetric positive definite matrix of shape ..., D x D
Returns:
log(det(A))
"""
theval = 2. * torch.sum(torch.log(torch.diagonal(torch.cholesky(A), dim1=-2, dim2=-1)), dim=-1)
return theval
def init_tensor_gpu_grad(org_tensor, trainable=True, device='cuda'):
if trainable:
new_tensor = torch.nn.Parameter(org_tensor.to(device))
else:
new_tensor = org_tensor.to(device)
new_tensor.requires_grad=False
return new_tensor
def exponential_learning_rate(learning_rate, decay_rate, global_step, decay_steps):
# https://www.tensorflow.org/api_docs/python/tf/train/exponential_decay
# Fixed warning: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach()
# or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
# https://discuss.pytorch.org/t/clone-and-detach-in-v0-4-0/16861/2
# remember to convert numpy value to numpy array
#print ("Updating Learning Rate")
exp_learn = learning_rate*np.power(decay_rate, global_step/decay_steps)
return exp_learn
def make_pinwheel_data(radial_std, tangential_std, num_classes, num_per_class, rate):
rads = np.linspace(0, 2*np.pi, num_classes, endpoint=False)
features = np.random.randn(num_classes*num_per_class, 2) \
* np.array([radial_std, tangential_std])
features[:,0] += 1.
labels = np.repeat(np.arange(num_classes), num_per_class)
angles = rads[labels] + rate * np.exp(features[:,0])
rotations = np.stack([np.cos(angles), -np.sin(angles), np.sin(angles), np.cos(angles)])
rotations = np.reshape(rotations.T, (-1, 2, 2))
feats = 10 * np.einsum('ti,tij->tj', features, rotations)
data = np.random.permutation(np.hstack([feats, labels[:, None]]))
return torch.tensor(data[:, 0:2]).float(), torch.tensor(data[:, 2], dtype=torch.int64)
def rand_partial_isometry(input_dim, output_dim, stddev=1., seed=0):
"""
Initialization as in MJJ's code (Johnson et. al. 2016)
Args:
m: rows
n: cols
stddev: standard deviation
seed: random seed
Returns:
matrix of shape (m, n) with orthonormal columns
"""
d = max(input_dim, output_dim)
npr = np.random.RandomState(seed)
return np.linalg.qr(npr.normal(loc=0, scale=stddev, size=(d, d)))[0][:input_dim,:output_dim]