Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial implementation of transition_hyperparameters #25

Merged
merged 14 commits into from
May 31, 2024
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions cxx/distributions/adapter.hh
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,14 @@ class DistributionAdapter : Distribution<std::string> {

double logp_score() const { return d->logp_score(); }

std::string sample() {
SampleType s = d->sample();
return to_string(s);
}
std::string sample() {
SampleType s = d->sample();
return to_string(s);
}

void transition_hyperparameters() {
d->transition_hyperparameters();
}

~DistributionAdapter() { delete d; }
};
5 changes: 5 additions & 0 deletions cxx/distributions/base.hh
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,10 @@ class Distribution {
// PRNG parameter.
virtual SampleType sample() = 0;

// Transition the hyperparameters. The probability of transitioning to
// a particular set of hyperparameters should be proportional to
// e^logp_score() under those hyperparameters.
virtual void transition_hyperparameters() = 0;

virtual ~Distribution() = default;
};
100 changes: 63 additions & 37 deletions cxx/distributions/beta_bernoulli.hh
Original file line number Diff line number Diff line change
Expand Up @@ -8,42 +8,68 @@
#include "util_math.hh"

class BetaBernoulli : public Distribution<double> {
public:
double alpha = 1; // hyperparameter
double beta = 1; // hyperparameter
int s = 0; // sum of observed values
std::mt19937* prng;
public:
double alpha = 1; // hyperparameter
double beta = 1; // hyperparameter
int s = 0; // sum of observed values
std::mt19937* prng;

// BetaBernoulli does not take ownership of prng.
BetaBernoulli(std::mt19937* prng) { this->prng = prng; }
void incorporate(const double& x) {
assert(x == 0 || x == 1);
++N;
s += x;
}
void unincorporate(const double& x) {
assert(x == 0 || x == 1);
--N;
s -= x;
assert(0 <= s);
assert(0 <= N);
}
double logp(const double& x) const {
assert(x == 0 || x == 1);
double log_denom = log(N + alpha + beta);
double log_numer = x ? log(s + alpha) : log(N - s + beta);
return log_numer - log_denom;
}
double logp_score() const {
double v1 = lbeta(s + alpha, N - s + beta);
double v2 = lbeta(alpha, beta);
return v1 - v2;
}
double sample() {
double p = exp(logp(1));
std::vector<int> items{0, 1};
std::vector<double> weights{1 - p, p};
int idx = choice(weights, prng);
return items[idx];
}
std::vector<double> alpha_grid;
std::vector<double> beta_grid;

// BetaBernoulli does not take ownership of prng.
BetaBernoulli(std::mt19937 *prng) {
this->prng = prng;
alpha_grid = log_linspace(1e-4, 1e4, 10, true);
beta_grid = log_linspace(1e-4, 1e4, 10, true);
}
void incorporate(const double& x){
assert(x == 0 || x == 1);
++N;
s += x;
}
void unincorporate(const double& x) {
assert(x == 0 || x ==1);
--N;
s -= x;
assert(0 <= s);
assert(0 <= N);
}
double logp(const double& x) const {
assert(x == 0 || x == 1);
double log_denom = log(N + alpha + beta);
double log_numer = x ? log(s + alpha) : log(N - s + beta);
return log_numer - log_denom;
}
double logp_score() const {
double v1 = lbeta(s + alpha, N - s + beta);

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Our current declaration / implementation of lbeta in util_math.hh and util_math.cc declare the arguments as ints. My compiler issued warnings that floating-point numbers were being coerced. I think now that we have hyperparameters that can be non-integer values, we'll want to change the argument types of lbeta (I think the implementation should stay the same).

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed.

double v2 = lbeta(alpha, beta);
return v1 - v2;
}
double sample() {
double p = exp(logp(1));
std::vector<int> items {0, 1};
std::vector<double> weights {1-p, p};
int idx = choice(weights, prng);
return items[idx];
}
void transition_hyperparameters() {
std::vector<double> logps;
std::vector<std::pair<double, double>> hypers;
// C++ doesn't yet allow range for-loops over existing variables. Sigh.
for (double alphat : alpha_grid) {
for (double betat : beta_grid) {
alpha = alphat;
beta = betat;
double lp = logp_score();
if (!std::isnan(lp)) {
logps.push_back(logp_score());
hypers.push_back(std::make_pair(alpha, beta));
}
}
}
int i = sample_from_logps(logps, prng);
alpha = hypers[i].first;
beta = hypers[i].second;
}
};
16 changes: 16 additions & 0 deletions cxx/distributions/beta_bernoulli_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,19 @@ BOOST_AUTO_TEST_CASE(test_simple) {
BOOST_TEST(bb.logp(1) == -0.4054651081081645, tt::tolerance(1e-6));
BOOST_TEST(bb.logp_score() == -0.69314718055994529, tt::tolerance(1e-6));
}

BOOST_AUTO_TEST_CASE(test_transition_hyperparameters)
{
std::mt19937 prng;
BetaBernoulli bb(&prng);

bb.transition_hyperparameters();

bb.incorporate(0);
for (int i = 0; i < 100; ++i) {
bb.incorporate(1);
}

bb.transition_hyperparameters();
BOOST_TEST(bb.alpha > bb.beta);
}
16 changes: 16 additions & 0 deletions cxx/distributions/bigram.hh
Original file line number Diff line number Diff line change
Expand Up @@ -106,4 +106,20 @@ class Bigram : public Distribution<std::string> {
trans_dist.alpha = alpha;
}
}

void transition_hyperparameters() {
std::vector<double> logps;
std::vector<double> alphas;
// C++ doesn't yet allow range for-loops over existing variables. Sigh.
for (double alphat : ALPHA_GRID) {
set_alpha(alphat);
double lp = logp_score();
if (!std::isnan(lp)) {
logps.push_back(logp_score());
alphas.push_back(alphat);
}
}
int i = sample_from_logps(logps, prng);
set_alpha(alphas[i]);
}
};
14 changes: 14 additions & 0 deletions cxx/distributions/bigram_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,17 @@ BOOST_AUTO_TEST_CASE(test_set_alpha) {

BOOST_TEST(first_lp != bb.logp_score());
}

BOOST_AUTO_TEST_CASE(transition_hyperparameters) {
std::mt19937 prng;
Bigram bb(&prng);

bb.transition_hyperparameters();
for (int i = 0; i < 100; ++i) {
bb.incorporate("abcdefghijklmnopqrstuvwxyz");
}

bb.transition_hyperparameters();

BOOST_TEST(bb.alpha < 1.0);
}
118 changes: 71 additions & 47 deletions cxx/distributions/dirichlet_categorical.hh
Original file line number Diff line number Diff line change
Expand Up @@ -3,57 +3,81 @@

#pragma once
#include <algorithm>
#include <cassert>
#include <random>

#include "base.hh"
#include "util_math.hh"

#define ALPHA_GRID {1e-4, 1e-3, 1e-2, 1e-1, 1.0, 10.0, 100.0, 1000.0, 10000.0}

class DirichletCategorical : public Distribution<double> {
public:
double alpha = 1; // hyperparameter (applies to all categories)
std::vector<int> counts; // counts of observed categories
int n; // Total number of observations.
std::mt19937* prng;
public:
double alpha = 1; // hyperparameter (applies to all categories)
std::vector<int> counts; // counts of observed categories
int n; // Total number of observations.
std::mt19937* prng;

// DirichletCategorical does not take ownership of prng.
DirichletCategorical(std::mt19937* prng,
int k) { // k is number of categories
this->prng = prng;
counts = std::vector<int>(k, 0);
n = 0;
}
void incorporate(const double& x) {
assert(x >= 0 && x < counts.size());
counts[size_t(x)] += 1;
++n;
}
void unincorporate(const double& x) {
const size_t y = x;
assert(y < counts.size());
counts[y] -= 1;
--n;
assert(0 <= counts[y]);
assert(0 <= n);
}
double logp(const double& x) const {
assert(x >= 0 && x < counts.size());
const double numer = log(alpha + counts[size_t(x)]);
const double denom = log(n + alpha * counts.size());
return numer - denom;
}
double logp_score() const {
const size_t k = counts.size();
const double a = alpha * k;
const double lg = std::transform_reduce(
counts.cbegin(), counts.cend(), 0, std::plus{},
[&](size_t y) -> double { return lgamma(y + alpha); });
return lgamma(a) - lgamma(a + n) + lg - k * lgamma(alpha);
}
double sample() {
std::vector<double> weights(counts.size());
std::transform(counts.begin(), counts.end(), weights.begin(),
[&](size_t y) -> double { return y + alpha; });
int idx = choice(weights, prng);
return double(idx);
}
// DirichletCategorical does not take ownership of prng.
DirichletCategorical(std::mt19937 *prng, int k) { // k is number of categories
this->prng = prng;
counts = std::vector<int>(k, 0);
n = 0;
}
void incorporate(const double& x) {
assert(x >= 0 && x < counts.size());
counts[size_t(x)] += 1;
++n;
}
void unincorporate(const double& x) {
const size_t y = x;
assert(y < counts.size());
counts[y] -= 1;
--n;
assert(0 <= counts[y]);
assert(0 <= n);
}
double logp(const double& x) const {
assert(x >= 0 && x < counts.size());
const double numer = log(alpha + counts[size_t(x)]);
const double denom = log(n + alpha * counts.size());
return numer - denom;
}
double logp_score() const {
const size_t k = counts.size();
const double a = alpha * k;
const double lg = std::transform_reduce(
counts.cbegin(),
counts.cend(),
0,
std::plus{},
[&](size_t y) -> double {return lgamma(y + alpha); }
);
return lgamma(a) - lgamma(a + n) + lg - k * lgamma(alpha);
}
double sample() {
std::vector<double> weights(counts.size());
std::transform(
counts.begin(),
counts.end(),
weights.begin(),
[&](size_t y) -> double { return y + alpha; }
);
int idx = choice(weights, prng);
return double(idx);
}
void transition_hyperparameters() {
std::vector<double> logps;
std::vector<double> alphas;
// C++ doesn't yet allow range for-loops over existing variables. Sigh.
for (double alphat : ALPHA_GRID) {
alpha = alphat;
double lp = logp_score();
if (!std::isnan(lp)) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you have a sense of when nans are showing up?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

None in dirichlet_categorical, at least with the current unit test, but I did see them previously in BetaBernoulli's transition_hyperparamters.

Huh. When I rerun that now, I no longer see any Nan's. I wonder if the above lbeta change fixed them.

logps.push_back(logp_score());
alphas.push_back(alpha);
}
}
int i = sample_from_logps(logps, prng);
alpha = alphas[i];
}
};
39 changes: 39 additions & 0 deletions cxx/distributions/dirichlet_categorical_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// Apache License, Version 2.0, refer to LICENSE.txt

#define BOOST_TEST_MODULE test DirichletCategorical

#include <boost/test/included/unit_test.hpp>
#include "distributions/dirichlet_categorical.hh"
namespace tt = boost::test_tools;

BOOST_AUTO_TEST_CASE(test_simple)
{
std::mt19937 prng;
DirichletCategorical dc(&prng, 10);

for (int i = 0; i < 10; ++i) {
dc.incorporate(i);
}
for (int i = 0; i < 10; i += 2) {
dc.unincorporate(i);
}

BOOST_TEST(dc.logp(1) == -2.0149030205422647, tt::tolerance(1e-6));
BOOST_TEST(dc.logp_score() == -12.389393702657209, tt::tolerance(1e-6));
}

BOOST_AUTO_TEST_CASE(test_transition_hyperparameters)
{
std::mt19937 prng;
DirichletCategorical dc(&prng, 10);

dc.transition_hyperparameters();

for (int i = 0; i < 100; ++i) {
dc.incorporate(i % 10);
}

dc.transition_hyperparameters();
BOOST_TEST(dc.alpha > 1.0);
}

Loading
Loading