-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDGPLVM_m52.stan
207 lines (199 loc) · 6.91 KB
/
DGPLVM_m52.stan
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
// DGP-LVM with Matern 5/2 (M52) covariance function
// GP Paramters:
// GP length scale: rho
// GP marginal SD: alpha_obs (corresponding to the original part of GP); alpha_grad (corresponding to the derivative part of GP)
// Error SD: sigma_obs (corresponding to original output); sigma_grad (corresponding to derivative part)
functions {
// for repeating inputs
vector repeat_vector(vector x, int reps) {
int N = rows(x);
array[N * reps] int index;
for (i in 1:reps) {
for (j in 1:N) {
index[(i-1) * N + j] = j;
}
}
return x[index];
}
// derivative covariance fn
matrix deriv_m52(vector x, int[] derivative, real alpha_obs, real alpha_grad, real rho, real delta) {
int N = rows(x);
matrix[N, N] K;
real sq_rho = square(rho);
real rho4 = pow(rho, 4);
real sq_alpha_obs = pow(alpha_obs, 2);
real sq_alpha_grad = pow(alpha_grad, 2);
real r = -inv(2 * sq_rho);
for (i in 1:N) {
if (derivative[i] == 0) {
K[i, i] = sq_alpha_obs + delta;
} else if (derivative[i] == 1) {
K[i, i] = (5 * sq_alpha_grad / (3 * sq_rho)) + delta;
}
for (j in (i + 1):N) {
if(derivative[i] == 0 && derivative[j] == 0) {
K[i, j] = (1 + (sqrt(5) * abs(x[i] - x[j])/rho) +
((5 * square(x[i] - x[j]))/ (3 * sq_rho))) *
exp(- sqrt(5) * abs(x[i] - x[j])/rho) * sq_alpha_obs;
} else if(derivative[i] == 0 && derivative[j] == 1) {
K[i, j] = ((5 * square(x[i] - x[j]))/ (3 * sq_rho)) *
(1 + (sqrt(5) * abs(x[i] - x[j])/rho)) *
exp(- sqrt(5) * abs(x[i] - x[j])/rho) * alpha_obs * alpha_grad;
} else if(derivative[i] == 1 && derivative[j] == 0) {
K[i, j] = ((5 * square(x[j] - x[i]))/ (3 * sq_rho)) *
(1 + (sqrt(5) * abs(x[i] - x[j])/rho)) *
exp(- sqrt(5) * abs(x[i] - x[j])/rho) * alpha_obs * alpha_grad;
} else if(derivative[i] == 1 && derivative[j] == 1) {
K[i, j] = (1 + (sqrt(5) * abs(x[i] - x[j])/rho) -
((5 * square(x[i] - x[j]))/ sq_rho)) *
exp(- sqrt(5) * abs(x[i] - x[j])/rho) * (5 * sq_alpha_grad / (3 * sq_rho));
}
K[j, i] = K[i, j];
}
}
return cholesky_decompose(K);
}
// base covariance fn
matrix m52(vector x, real alpha_obs, real rho, real delta) {
int N = rows(x);
matrix[N, N] K;
real sq_rho = square(rho);
real rho4 = pow(rho, 4);
real sq_alpha_obs = pow(alpha_obs, 2);
real r = -inv(2 * sq_rho);
for(i in 1:N) {
K[i, i] = sq_alpha_obs + delta;
for(j in (i + 1):N) {
K[i, j] = (1 + (sqrt(5) * abs(x[i] - x[j])/rho) +
((5 * square(x[i] - x[j]))/ (3 * sq_rho))) *
exp(- sqrt(5) * abs(x[i] - x[j])/rho) * sq_alpha_obs;
K[j, i] = K[i, j];
}
}
return cholesky_decompose(K);
}
}
data {
// Sample size
int<lower=1> N;
// Number of unique x values; usually N / 2
int<lower=1> M;
// Number of output dimensions
int<lower=1> D;
// Output (Original and derivative concatenated for DGP-LVM)
matrix[N, D] y;
// Indicator variable for original and derivative part
int<lower=0, upper=1> derivative[N];
// Prior measurement SD (assumed to be known)
real<lower=0> s;
// Prior SD for alphas and sigmas
real<lower=0> sparams_prior_sd;
// Prior for latent x
vector[M] t;
// Model conditions (specify which modifications to use ranging from standard GPs to DGP-LVM)
int<lower=0, upper=1> is_deriv; // 0 = no derivative; 1 = derivative model
int<lower=0, upper=1> is_scale; // 0 = same scale; 1 = scaled
int<lower=0, upper=1> is_vary; // 0 = constant rho and alpha for each dims; 1 = varying params
int<lower=0, upper=1> is_corr; // 0 = no correlation b/w dims; 1 = correlated dims
}
transformed data {
// add constant to the diagonal of the covariance matrix for computational stability
real delta = 1e-6;
}
parameters {
// Latent x
// add bounds on x if known for theoretical reasons (e.g. lower=0)
vector[M] x;
// GP Length scale parameter (temporary storage)
vector<lower=0>[is_vary==1 ? D:1] rho_temp;
// GP marginal SD parameters (temporary storage)
vector<lower=0>[is_vary==1 ? D:1] alpha_obs_temp;
vector<lower=0>[is_scale==1 ? D:0] alpha_grad_temp;
// Gaussian link parameter
matrix[is_deriv==1 ? N:M, D] eta;
// Between dimension correlation parameter (temporary storage)
cholesky_factor_corr[is_corr==1 ? D:0] L_omega_temp;
// Error SD parameters (temporary storage)
vector<lower=0>[is_vary==1 ? D:1] sigma_obs_temp;
vector<lower=0>[is_scale==1 ? D:0] sigma_grad_temp;
}
transformed parameters {
// GP f()
matrix[is_deriv==1 ? N:M, D] f;
// Model condition adjustments (temporary to actual parameters)
vector<lower=0>[D] rho;
vector<lower=0>[D] alpha_obs;
vector<lower=0>[D] alpha_grad;
vector<lower=0>[D] sigma_obs;
vector<lower=0>[D] sigma_grad;
cholesky_factor_corr[D] L_omega;
// if params are constant for dims, it will be repeated
if (is_vary == 1) {
rho = rho_temp;
alpha_obs = alpha_obs_temp;
sigma_obs = sigma_obs_temp;
} else {
for (k in 1:D) {
rho[k] = rho_temp[1];
alpha_obs[k] = alpha_obs_temp[1];
sigma_obs[k] = sigma_obs_temp[1];
}
}
// if deriv scale is 1x, the grad params will be repeated as obs
if (is_scale == 1) {
alpha_grad = alpha_grad_temp;
sigma_grad = sigma_grad_temp;
} else {
alpha_grad = alpha_obs;
sigma_grad = sigma_obs;
}
// if no correlation b/w dims, corr mat will be replaced by identity matrix
if (is_corr==0) {
L_omega = diag_matrix(rep_vector(1, D));
} else {
L_omega = L_omega_temp;
}
// Computing covariance matrix for Derivative GP
if (is_deriv == 1) {
vector[N] x2 = repeat_vector(x, 2);
for (k in 1:D) {
matrix[N, N] K = deriv_m52(x2, derivative, alpha_obs[k], alpha_grad[k], rho[k], delta);
f[, k] = K * eta[, k];
}
// For correlated outputs
f = f * L_omega';
} else {
// Computing covariance matrix for standard GP
for (k in 1:D) {
matrix[M, M] K = m52(x, alpha_obs[k], rho[k], delta);
f[, k] = K * eta[, k];
}
// For correlated outputs
f = f * L_omega';
}
}
model {
// Priors
rho_temp ~ inv_gamma(5, 5);
alpha_obs_temp ~ normal(0, sparams_prior_sd);
alpha_grad_temp ~ normal(0, sparams_prior_sd);
sigma_obs_temp ~ normal(0, sparams_prior_sd);
sigma_grad_temp ~ normal(0, sparams_prior_sd);
L_omega_temp ~ lkj_corr_cholesky(3);
for (k in 1:D) {
to_vector(eta[, k]) ~ std_normal();
}
// Likelihood
t ~ normal(x, s); //latent prior
if (is_deriv == 1) {
for (k in 1:D) {
y[1:M, k] ~ normal(f[1:M, k], sigma_obs[k]);
y[(M+1):N, k] ~ normal(f[(M+1):N, k], sigma_grad[k]);
}
} else {
// here the second half of y (derivative info) remains unused
for (k in 1:D) {
y[1:M, k] ~ normal(f[1:M, k], sigma_obs[k]);
}
}
}