Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ZeroMeanNormal distribution and change GaussianEmission to use it. #44

Merged
merged 7 commits into from
Jun 12, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions cxx/distributions/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,16 @@ cc_library(
"//:util_math",
],
)
cc_library(
name = "zero_mean_normal",
srcs = ["zero_mean_normal.cc"],
hdrs = ["zero_mean_normal.hh"],
visibility = ["//:__subpackages__"],
deps = [
":base",
"//:util_math",
],
)

cc_test(
name = "adapter_test",
Expand Down Expand Up @@ -150,3 +160,14 @@ cc_test(
"@boost//:test",
],
)

cc_test(
name = "zero_mean_normal_test",
srcs = ["zero_mean_normal_test.cc"],
deps = [
":zero_mean_normal",
"@boost//:algorithm",
"@boost//:math",
"@boost//:test",
],
)
87 changes: 87 additions & 0 deletions cxx/distributions/zero_mean_normal.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// Copyright 2024
// See LICENSE.txt

#include "zero_mean_normal.hh"

#include <cmath>
#include <numbers>

// Return log density of location-scaled T distribution with zero mean.
double log_t_distribution(const double& x,
const double& v,
const double& variance) {
// https://en.wikipedia.org/wiki/Student%27s_t-distribution#Density_and_first_two_moments
double v_shift = (v + 1.0) / 2.0;
return lgamma(v_shift)
- lgamma(v / 2.0)
- 0.5 * log(std::numbers::pi * v * variance)
- v_shift * log(1.0 + x * x / variance / v);
}

void ZeroMeanNormal::incorporate(const double& x) {
++N;
var += (x * x - var) / N;
}

void ZeroMeanNormal::unincorporate(const double& x) {
int old_N = N;
--N;
if (N == 0) {
var = 0.0;
return;
}
var = (var * old_N - x * x) / N;
}


double ZeroMeanNormal::logp(const double& x) const {
// Equation (119) of https://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf
double alpha_n = alpha + N / 2.0;
double beta_n = beta + var * N;
double t_variance = beta_n / alpha_n;
return log_t_distribution(x, 2.0 * alpha_n, t_variance);
}

double ZeroMeanNormal::logp_score() const {
// Marginal likelihood from Page 10 of
// https://j-zin.github.io/files/Conjugate_Bayesian_analysis_of_common_distributions.pdf
double alpha_n = alpha + N / 2.0;
return alpha * log(beta)
- lgamma(alpha)
- (N / 2.0) * log(2.0 * std::numbers::pi)
+ lgamma(alpha_n)
- alpha_n * log(2.0 * beta + 0.5 * var * N);
}

double ZeroMeanNormal::sample() {
double alpha_n = alpha + N / 2.0;
double beta_n = beta + var * N;
double t_variance = beta_n / alpha_n;
std::student_t_distribution<double> d(2.0 * alpha_n);
return d(*prng) * sqrt(t_variance);
}

#define ALPHA_GRID \
{ 1e-4, 1e-3, 1e-2, 1e-1, 1.0, 10.0, 100.0, 1000.0, 10000.0 }
#define BETA_GRID \
{ 1e-4, 1e-3, 1e-2, 1e-1, 1.0, 10.0, 100.0, 1000.0, 10000.0 }

void ZeroMeanNormal::transition_hyperparameters() {
std::vector<double> logps;
std::vector<std::pair<double, double>> hypers;
for (double a : ALPHA_GRID) {
for (double b : BETA_GRID) {
alpha = a;
beta = b;
double lp = logp_score();
if (!std::isnan(lp)) {
logps.push_back(logp_score());
hypers.push_back(std::make_pair(a, b));
}
}
}

int i = sample_from_logps(logps, prng);
alpha = std::get<0>(hypers[i]);
beta = std::get<1>(hypers[i]);
}
46 changes: 46 additions & 0 deletions cxx/distributions/zero_mean_normal.hh
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// Copyright 2024
// See LICENSE.txt

#pragma once
#include <random>
#include <tuple>
#include <variant>

#include "base.hh"
#include "util_math.hh"

// A normal distribution that is known to have zero mean.
// This class is not intended to be directly used as a data model, but
// rather to support the Gaussian emissions model.
class ZeroMeanNormal : public Distribution<double> {
srvasude marked this conversation as resolved.
Show resolved Hide resolved
public:
// We use an Inverse gamma conjugate prior, so our hyperparameters are
double alpha = 1.0;
double beta = 1.0;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could these be renamed the same was as normal? Admittedly I don't love the single letter naming scheme or alpha and beta since I find these opaque, but would prefer the naming for both to be the same.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They could, but I think the most important thing is that they match the math. Since https://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf and https://j-zin.github.io/files/Conjugate_Bayesian_analysis_of_common_distributions.pdf both use alpha and beta, I would strongly prefer that we keep those as the variable names.


// Sufficient statistics of observed data.
double var = 0.0;

std::mt19937* prng;

// ZeroMeanNormal does not take ownership of prng.
ZeroMeanNormal(std::mt19937* prng) { this->prng = prng; }

void incorporate(const double& x);

void unincorporate(const double& x);

void posterior_hypers(double* mprime, double* sprime) const;

double logp(const double& x) const;

double logp_score() const;

double sample();

void transition_hyperparameters();

// Disable copying.
ZeroMeanNormal& operator=(const ZeroMeanNormal&) = delete;
ZeroMeanNormal(const ZeroMeanNormal&) = delete;
};
164 changes: 164 additions & 0 deletions cxx/distributions/zero_mean_normal_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
// Apache License, Version 2.0, refer to LICENSE.txt

#define BOOST_TEST_MODULE test Normal

#include "distributions/zero_mean_normal.hh"

#include <boost/math/distributions/inverse_gamma.hpp>
#include <boost/math/distributions/normal.hpp>
#include <boost/math/quadrature/gauss_kronrod.hpp>
#include <boost/test/included/unit_test.hpp>

#include "util_math.hh"
namespace bm = boost::math;
namespace tt = boost::test_tools;

BOOST_AUTO_TEST_CASE(test_log_prob) {
std::mt19937 prng;
ZeroMeanNormal nd(&prng);

double nd_v = 2.0 * nd.alpha;
double nd_s = 2.0 * nd.beta;
double nd_m = 0.0;
double inv_nd_r = 0.0;
bm::inverse_gamma_distribution inv_gamma_dist(nd_v / 2., nd_s / 2.);
auto quad = bm::quadrature::gauss_kronrod<double, 100>();

for (int i = 0; i < 10; ++i) {
nd.incorporate(i);

auto integrand1 = [&nd_m, &inv_nd_r, &inv_gamma_dist, &i](double n, double ig) {
bm::normal_distribution normal_prior_dist(nd_m, sqrt(ig * inv_nd_r));
bm::normal_distribution normal_dist(n, sqrt(ig));
double result =
bm::pdf(normal_prior_dist, n) * bm::pdf(inv_gamma_dist, ig);
for (int j = 0; j <= i; ++j) {
result *= bm::pdf(normal_dist, j);
}
return result;
};

auto integrand2 = [&quad, &integrand1](double ig) {
auto f = [&](double n) { return integrand1(n, ig); };
return quad.integrate(f, -std::numeric_limits<double>::infinity(),
std::numeric_limits<double>::infinity());
};

double result =
quad.integrate(integrand2, 0., std::numeric_limits<double>::infinity());

BOOST_TEST(nd.logp_score() == log(result), tt::tolerance(1e-4));
}
BOOST_TEST(nd.N == 10);
}


BOOST_AUTO_TEST_CASE(simple) {
std::mt19937 prng;
ZeroMeanNormal nd(&prng);

nd.incorporate(5.0);
nd.incorporate(-2.0);
BOOST_TEST(nd.N == 2);

nd.unincorporate(5.0);
nd.incorporate(7.0);
BOOST_TEST(nd.N == 2);

nd.unincorporate(-2.0);
BOOST_TEST(nd.N == 1);
}

BOOST_AUTO_TEST_CASE(test_expected_vars) {
std::mt19937 prng;
ZeroMeanNormal nd(&prng);
std::vector<double> entries{-1., 2., 4., 5., 6., 20.};
std::vector<double> expected_vars{1.0, 2.5, 7.0, 11.5, 16.4, 241.0 / 3.0};
for (int i = 0; i < std::ssize(entries); ++i) {
nd.incorporate(entries[i]);
BOOST_TEST(nd.var == expected_vars[i], tt::tolerance(1e-6));
}

for (int i = std::ssize(entries) - 1; i > 0; --i) {
nd.unincorporate(entries[i]);
BOOST_TEST(nd.var == expected_vars[i - 1], tt::tolerance(1e-6));
}
}

BOOST_AUTO_TEST_CASE(no_nan_after_incorporate_unincorporate) {
srvasude marked this conversation as resolved.
Show resolved Hide resolved
std::mt19937 prng;
ZeroMeanNormal nd(&prng);

nd.incorporate(10.0);
nd.unincorporate(10.0);

BOOST_TEST(nd.N == 0);
BOOST_TEST(!std::isnan(nd.var));
}

BOOST_AUTO_TEST_CASE(logp_before_incorporate) {
std::mt19937 prng;
ZeroMeanNormal nd(&prng);

BOOST_TEST(nd.logp(6.0) == -5.4563792395895785, tt::tolerance(1e-6));
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd replace these tests with marginalization tests (see normal_test.cc to test against a boost computed logp and logp_score via approximate integration).

I find the testing exact value tests brittle and have the possibility of hiding bugs, and strongly prefer marginalization tests.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that marginalization tests are great. They are also a lot more work. My preference would be to have these minimal tests now, and then add the marginalization tests in a later pull request.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The recommendation here is to copy the tests from normal_test.cc. I've added marginalization tests for logp and logp_score. Those should work the exact same here.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BOOST_AUTO_TEST_CASE(test_log_prob) {
and the following test_posterior_pred should work here (with some renaming of distributions).

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I tried your recommendation and it didn't work. The latest commit of this pull request has the test_log_prob and it fails.

This shouldn't be surprising, because as previously noted, this distribution has different marginal likelihood and predictive posterior functions than the Normal. For example, when we do add a marginalization test for this distribution, it will only need to do a single integral over variances rather than the double integral done in normal_test.cc.

BOOST_TEST(nd.logp_score() == -0.69314718055994529, tt::tolerance(1e-6));

nd.incorporate(5.0);
nd.unincorporate(5.0);

BOOST_TEST(nd.N == 0);
BOOST_TEST(nd.logp(6.0) == -5.4563792395895785, tt::tolerance(1e-6));
BOOST_TEST(nd.logp_score() == -0.69314718055994529, tt::tolerance(1e-6));
}

BOOST_AUTO_TEST_CASE(sample) {
std::mt19937 prng;
ZeroMeanNormal nd(&prng);

for (int i = 0; i < 1000; ++i) {
nd.incorporate(42.0);
}

double s = nd.sample();

BOOST_TEST(std::abs(s) < 100.0);
}

BOOST_AUTO_TEST_CASE(incorporate_raises_logp) {
std::mt19937 prng;
ZeroMeanNormal nd(&prng);

double old_lp = nd.logp(10.0);
for (int i = 0; i < 8; ++i) {
nd.incorporate(10.0);
double lp = nd.logp(10.0);
BOOST_TEST(lp > old_lp);
old_lp = lp;
}
}

BOOST_AUTO_TEST_CASE(prior_prefers_origin) {
std::mt19937 prng;
ZeroMeanNormal nd1(&prng), nd2(&prng);

for (int i = 0; i < 100; ++i) {
nd1.incorporate(0.0);
nd2.incorporate(50.0);
}

BOOST_TEST(nd1.logp_score() > nd2.logp_score());
}

BOOST_AUTO_TEST_CASE(transition_hyperparameters) {
std::mt19937 prng;
ZeroMeanNormal nd(&prng);

nd.transition_hyperparameters();

for (int i = 0; i < 100; ++i) {
nd.incorporate(5.0);
}

nd.transition_hyperparameters();
BOOST_TEST(nd.beta > 1.0);
}
5 changes: 4 additions & 1 deletion cxx/emissions/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@ cc_library(
name = "gaussian",
srcs = ["gaussian.hh"],
visibility = ["//:__subpackages__"],
deps = [":base"],
deps = [
":base",
"//distributions:zero_mean_normal",
],
)

# TODO(thomaswc): Fix and re-enable.
Expand Down
26 changes: 8 additions & 18 deletions cxx/emissions/gaussian.hh
Original file line number Diff line number Diff line change
@@ -1,45 +1,35 @@
#pragma once

#include "emissions/base.hh"
#include "distributions/zero_mean_normal.hh"

class GaussianEmission : public Emission<double> {
public:
// We use an Inverse gamma conjugate prior, so our hyperparameters are
double alpha = 1.0;
double beta = 1.0;

// Sufficient statistics of observed data.
double var = 0.0;
ZeroMeanNormal zmn(nullptr);

GaussianEmission() {}

void incorporate(const std::pair<double, double>& x) {
++N;
double y = x.second - x.first;
var += y * y;
zmn.incorporate(x.second - x.first);
}

void unincorporate(const std::pair<double, double>& x) {
--N;
double y = x.second - x.first;
var -= y * y;
zmn.unincorporate(x.second - x.first);
}

double logp(const std::pair<double, double>& x) const {
double y = x.second - x.first;
// TODO(thomaswc): This.
return 0.0;
return zmn.logp(x.second - x.first);
}

double logp_score() const {
// TODO(thomaswc): This.
return 0.0;
return zmn.logp_score();
}

double sample_corrupted(const double& clean, std::mt19937* prng) {
double smoothed_var = var; // TODO(thomaswc): Fix
std::normal_distribution<double> d(clean, sqrt(smoothed_var));
return d(*prng);
zmn.prng = prng;
return clean + zmn.sample();
}

double propose_clean(const std::vector<double>& corrupted,
Expand Down
Loading