-
Notifications
You must be signed in to change notification settings - Fork 565
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #448 from thangbui/devel
Added pep.py -- Sparse Gaussian processes using Power Expectation Propagation
- Loading branch information
Showing
3 changed files
with
188 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
from .posterior import Posterior | ||
from ...util.linalg import jitchol, tdot, dtrtrs, dtrtri, pdinv | ||
from ...util import diag | ||
import numpy as np | ||
from . import LatentFunctionInference | ||
log_2_pi = np.log(2*np.pi) | ||
|
||
class PEP(LatentFunctionInference): | ||
''' | ||
Sparse Gaussian processes using Power-Expectation Propagation | ||
for regression: alpha \approx 0 gives VarDTC and alpha = 1 gives FITC | ||
Reference: A Unifying Framework for Sparse Gaussian Process Approximation using | ||
Power Expectation Propagation, https://arxiv.org/abs/1605.07066 | ||
''' | ||
const_jitter = 1e-6 | ||
|
||
def __init__(self, alpha): | ||
super(PEP, self).__init__() | ||
self.alpha = alpha | ||
|
||
def inference(self, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None): | ||
assert mean_function is None, "inference with a mean function not implemented" | ||
|
||
num_inducing, _ = Z.shape | ||
num_data, output_dim = Y.shape | ||
|
||
#make sure the noise is not hetero | ||
sigma_n = likelihood.gaussian_variance(Y_metadata) | ||
if sigma_n.size >1: | ||
raise NotImplementedError("no hetero noise with this implementation of PEP") | ||
|
||
Kmm = kern.K(Z) | ||
Knn = kern.Kdiag(X) | ||
Knm = kern.K(X, Z) | ||
U = Knm | ||
|
||
#factor Kmm | ||
diag.add(Kmm, self.const_jitter) | ||
Kmmi, L, Li, _ = pdinv(Kmm) | ||
|
||
#compute beta_star, the effective noise precision | ||
LiUT = np.dot(Li, U.T) | ||
sigma_star = sigma_n + self.alpha * (Knn - np.sum(np.square(LiUT),0)) | ||
beta_star = 1./sigma_star | ||
|
||
# Compute and factor A | ||
A = tdot(LiUT*np.sqrt(beta_star)) + np.eye(num_inducing) | ||
LA = jitchol(A) | ||
|
||
# back substitute to get b, P, v | ||
URiy = np.dot(U.T*beta_star,Y) | ||
tmp, _ = dtrtrs(L, URiy, lower=1) | ||
b, _ = dtrtrs(LA, tmp, lower=1) | ||
tmp, _ = dtrtrs(LA, b, lower=1, trans=1) | ||
v, _ = dtrtrs(L, tmp, lower=1, trans=1) | ||
tmp, _ = dtrtrs(LA, Li, lower=1, trans=0) | ||
P = tdot(tmp.T) | ||
|
||
alpha_const_term = (1.0-self.alpha) / self.alpha | ||
|
||
#compute log marginal | ||
log_marginal = -0.5*num_data*output_dim*np.log(2*np.pi) + \ | ||
-np.sum(np.log(np.diag(LA)))*output_dim + \ | ||
0.5*output_dim*(1+alpha_const_term)*np.sum(np.log(beta_star)) + \ | ||
-0.5*np.sum(np.square(Y.T*np.sqrt(beta_star))) + \ | ||
0.5*np.sum(np.square(b)) + 0.5*alpha_const_term*num_data*np.log(sigma_n) | ||
#compute dL_dR | ||
Uv = np.dot(U, v) | ||
dL_dR = 0.5*(np.sum(U*np.dot(U,P), 1) - (1.0+alpha_const_term)/beta_star + np.sum(np.square(Y), 1) - 2.*np.sum(Uv*Y, 1) \ | ||
+ np.sum(np.square(Uv), 1))*beta_star**2 | ||
|
||
# Compute dL_dKmm | ||
vvT_P = tdot(v.reshape(-1,1)) + P | ||
dL_dK = 0.5*(Kmmi - vvT_P) | ||
KiU = np.dot(Kmmi, U.T) | ||
dL_dK += self.alpha * np.dot(KiU*dL_dR, KiU.T) | ||
|
||
# Compute dL_dU | ||
vY = np.dot(v.reshape(-1,1),Y.T) | ||
dL_dU = vY - np.dot(vvT_P, U.T) | ||
dL_dU *= beta_star | ||
dL_dU -= self.alpha * 2.*KiU*dL_dR | ||
|
||
dL_dthetaL = likelihood.exact_inference_gradients(dL_dR) | ||
dL_dthetaL += 0.5*alpha_const_term*num_data / sigma_n | ||
grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':dL_dR * self.alpha, 'dL_dKnm':dL_dU.T, 'dL_dthetaL':dL_dthetaL} | ||
|
||
#construct a posterior object | ||
post = Posterior(woodbury_inv=Kmmi-P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=L) | ||
|
||
return post, log_marginal, grad_dict |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
# Copyright (c) 2014, James Hensman, 2016, Thang Bui | ||
# Licensed under the BSD 3-clause license (see LICENSE.txt) | ||
|
||
import unittest | ||
import numpy as np | ||
import GPy | ||
|
||
class PEPgradienttest(unittest.TestCase): | ||
def setUp(self): | ||
###################################### | ||
# # 1 dimensional example | ||
np.random.seed(10) | ||
|
||
N = 20 | ||
# sample inputs and outputs | ||
self.X1D = np.random.uniform(-3., 3., (N, 1)) | ||
self.Y1D = np.sin(self.X1D) + np.random.randn(N, 1) * 0.05 | ||
|
||
###################################### | ||
# # 2 dimensional example | ||
|
||
# sample inputs and outputs | ||
self.X2D = np.random.uniform(-3., 3., (N, 2)) | ||
self.Y2D = np.sin(self.X2D[:, 0:1]) * np.sin(self.X2D[:, 1:2]) + np.random.randn(N, 1) * 0.05 | ||
|
||
####################################### | ||
# # more datapoints, check in alpha limits, the log marginal likelihood | ||
# # is consistent with FITC and VFE/Var_DTC | ||
M = 5 | ||
np.random.seed(42) | ||
self.X1 = np.c_[np.linspace(-1., 1., N)] | ||
self.Y1 = np.sin(self.X1) + np.random.randn(N, 1) * 0.05 | ||
self.kernel = GPy.kern.RBF(input_dim=1, lengthscale=0.5, variance=1) | ||
self.Z = np.random.uniform(-1, 1, (M, 1)) | ||
self.lik_noise_var = 0.01 | ||
|
||
def test_pep_1d_gradients(self): | ||
m = GPy.models.SparseGPRegression(self.X1D, self.Y1D) | ||
m.inference_method = GPy.inference.latent_function_inference.PEP(alpha=np.random.rand()) | ||
self.assertTrue(m.checkgrad()) | ||
|
||
def test_pep_2d_gradients(self): | ||
m = GPy.models.SparseGPRegression(self.X2D, self.Y2D) | ||
m.inference_method = GPy.inference.latent_function_inference.PEP(alpha=np.random.rand()) | ||
self.assertTrue(m.checkgrad()) | ||
|
||
def test_pep_vfe_consistency(self): | ||
vfe_model = GPy.models.SparseGPRegression( | ||
self.X1, | ||
self.Y1, | ||
kernel=self.kernel, | ||
Z=self.Z | ||
) | ||
vfe_model.inference_method = GPy.inference.latent_function_inference.VarDTC() | ||
vfe_model.Gaussian_noise.variance = self.lik_noise_var | ||
vfe_lml = vfe_model.log_likelihood() | ||
|
||
pep_model = GPy.models.SparseGPRegression( | ||
self.X1, | ||
self.Y1, | ||
kernel=self.kernel, | ||
Z=self.Z | ||
) | ||
pep_model.inference_method = GPy.inference.latent_function_inference.PEP(alpha=1e-5) | ||
pep_model.Gaussian_noise.variance = self.lik_noise_var | ||
pep_lml = pep_model.log_likelihood() | ||
|
||
self.assertAlmostEqual(vfe_lml[0, 0], pep_lml[0], delta=abs(0.01*pep_lml[0])) | ||
|
||
def test_pep_fitc_consistency(self): | ||
fitc_model = GPy.models.SparseGPRegression( | ||
self.X1D, | ||
self.Y1D, | ||
kernel=self.kernel, | ||
Z=self.Z | ||
) | ||
fitc_model.inference_method = GPy.inference.latent_function_inference.FITC() | ||
fitc_model.Gaussian_noise.variance = self.lik_noise_var | ||
fitc_lml = fitc_model.log_likelihood() | ||
|
||
pep_model = GPy.models.SparseGPRegression( | ||
self.X1D, | ||
self.Y1D, | ||
kernel=self.kernel, | ||
Z=self.Z | ||
) | ||
pep_model.inference_method = GPy.inference.latent_function_inference.PEP(alpha=1) | ||
pep_model.Gaussian_noise.variance = self.lik_noise_var | ||
pep_lml = pep_model.log_likelihood() | ||
|
||
self.assertAlmostEqual(fitc_lml, pep_lml[0], delta=abs(0.001*pep_lml[0])) | ||
|
||
|
||
|