handley-lab · williamjameshandley · Nov 16, 2023 · Nov 13, 2023 · Nov 13, 2023 · Nov 13, 2023
diff --git a/README.rst b/README.rst
@@ -3,7 +3,7 @@ lsbi: Linear Simulation Based Inference
 =======================================
 :lsbi: Linear Simulation Based Inference
 :Author: Will Handley & David Yallup
-:Version: 0.7.0
+:Version: 0.8.0
 :Homepage: https://github.com/handley-lab/lsbi
 :Documentation: http://lsbi.readthedocs.io/
 

diff --git a/docs/source/lsbi.rst b/docs/source/lsbi.rst
@@ -32,3 +32,12 @@ lsbi.stats module
    :undoc-members:
 
 
+lsbi.utils module
+-----------------
+
+.. automodule:: lsbi.utils
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+
diff --git a/lsbi/_version.py b/lsbi/_version.py
@@ -1 +1 @@
-__version__ = '0.7.0'
+__version__ = '0.8.0'
diff --git a/lsbi/model.py b/lsbi/model.py
@@ -2,12 +2,8 @@
 import numpy as np
 from lsbi.stats import (mixture_multivariate_normal,
                         multivariate_normal)
-from numpy.linalg import solve, inv, slogdet
-
-
-def logdet(A):
-    """log(abs(det(A)))."""
-    return slogdet(A)[1]
+from numpy.linalg import solve, inv
+from lsbi.utils import logdet
 
 
 class LinearModel(object):

diff --git a/lsbi/stats.py b/lsbi/stats.py
@@ -2,7 +2,9 @@
 import numpy as np
 import scipy.stats
 from scipy.stats._multivariate import multivariate_normal_frozen
+from scipy.special import logsumexp, erf
 from numpy.linalg import inv
+from lsbi.utils import bisect
 
 
 class multivariate_normal(multivariate_normal_frozen):  # noqa: D101
@@ -43,6 +45,33 @@ def _bar(self, indices):
         k[indices] = False
         return k
 
+    def bijector(self, x, inverse=False):
+        """Bijector between U([0, 1])^d and the distribution.
+
+        - x in [0, 1]^d is the hypercube space.
+        - theta in R^d is the physical space.
+
+        Computes the transformation from x to theta or theta to x depending on
+        the value of inverse.
+
+        Parameters
+        ----------
+        x : array_like, shape (..., d)
+            if inverse: x is theta
+            else: x is x
+        inverse : bool, optional, default=False
+            If True: compute the inverse transformation from physical to
+            hypercube space.
+        """
+        L = np.linalg.cholesky(self.cov)
+        if inverse:
+            Linv = inv(L)
+            y = np.einsum('ij,...j->...i', Linv, x-self.mean)
+            return scipy.stats.norm.cdf(y)
+        else:
+            y = scipy.stats.norm.ppf(x)
+            return self.mean + np.einsum('ij,...j->...i', L, y)
+
 
 class mixture_multivariate_normal(object):
     """Mixture of multivariate normal distributions.
@@ -136,3 +165,60 @@ def _bar(self, indices):
         k = np.ones(self.means.shape[-1], dtype=bool)
         k[indices] = False
         return k
+
+    def bijector(self, x, inverse=False):
+        """Bijector between U([0, 1])^d and the distribution.
+
+        - x in [0, 1]^d is the hypercube space.
+        - theta in R^d is the physical space.
+
+        Computes the transformation from x to theta or theta to x depending on
+        the value of inverse.
+
+        Parameters
+        ----------
+        x : array_like, shape (..., d)
+            if inverse: x is theta
+            else: x is x
+        inverse : bool, optional, default=False
+            If True: compute the inverse transformation from physical to
+            hypercube space.
+        """
+        theta = np.empty_like(x)
+        if inverse:
+            theta[:] = x
+            x = np.empty_like(x)
+
+        for i in range(x.shape[-1]):
+            m = self.means[..., :, i] + np.einsum('ia,iab,...ib->...i',
+                                                  self.covs[:, i, :i],
+                                                  inv(self.covs[:, :i, :i]),
+                                                  theta[..., None, :i]
+                                                  - self.means[:, :i])
+            c = self.covs[:, i, i] - np.einsum('ia,iab,ib->i',
+                                               self.covs[:, i, :i],
+                                               inv(self.covs[:, :i, :i]),
+                                               self.covs[:, i, :i])
+            dist = mixture_multivariate_normal(self.means[:, :i],
+                                               self.covs[:, :i, :i],
+                                               self.logA)
+            logA = (self.logA + dist.logpdf(theta[..., :i], reduce=False)
+                    - dist.logpdf(theta[..., :i])[..., None])
+            A = np.exp(logA - logsumexp(logA, axis=-1)[..., None])
+
+            def f(t):
+                return (A * 0.5 * (1 + erf((t[..., None] - m)/np.sqrt(2 * c)))
+                        ).sum(axis=-1) - y
+
+            if inverse:
+                y = 0
+                x[..., i] = f(theta[..., i])
+            else:
+                y = x[..., i]
+                a = (m - 10 * np.sqrt(c)).min(axis=-1)
+                b = (m + 10 * np.sqrt(c)).max(axis=-1)
+                theta[..., i] = bisect(f, a, b)
+        if inverse:
+            return x
+        else:
+            return theta
diff --git a/lsbi/utils.py b/lsbi/utils.py
@@ -0,0 +1,57 @@
+"""Utility functions for lsbi."""
+import numpy as np
+
+
+def logdet(A):
+    """log(abs(det(A)))."""
+    return np.linalg.slogdet(A)[1]
+
+
+def quantise(f, x, tol=1e-8):
+    """Quantise f(x) to zero within tolerance tol."""
+    y = np.atleast_1d(f(x))
+    return np.where(np.abs(y) < tol, 0, y)
+
+
+def bisect(f, a, b, args=(), tol=1e-8):
+    """Vectorised simple bisection search.
+
+    The shape of the output is the broadcasted shape of a and b.
+
+    Parameters
+    ----------
+    f : callable
+        Function to find the root of.
+    a : array_like
+        Lower bound of the search interval.
+    b : array_like
+        Upper bound of the search interval.
+    args : tuple, optional
+        Extra arguments to `f`.
+    tol : float, optional
+        (absolute) tolerance of the solution
+
+    Returns
+    -------
+    x : ndarray
+        Solution to the equation f(x) = 0.
+    """
+    a = np.array(a)
+    b = np.array(b)
+    while np.abs(a-b).max() > tol:
+        fa = quantise(f, a, tol)
+        fb = quantise(f, b, tol)
+        a = np.where(fb == 0, b, a)
+        b = np.where(fa == 0, a, b)
+
+        if np.any(fa*fb > 0):
+            raise ValueError("f(a) and f(b) must have opposite signs")
+        q = (a+b)/2
+        fq = quantise(f, q, tol)
+
+        a = np.where(fq == 0, q, a)
+        a = np.where(fa * fq > 0, q, a)
+
+        b = np.where(fq == 0, q, b)
+        b = np.where(fb * fq > 0, q, b)
+    return (a+b)/2
diff --git a/tests/test_stats.py b/tests/test_stats.py
@@ -50,6 +50,72 @@ def test_mixture_multivariate_normal(k, d):
         assert mvns[0].logpdf(x).shape == mixture.logpdf(x).shape
 
 
+def test_mixture_multivariate_normal_bijector():
+    k = 4
+    d = 10
+    covs = scipy.stats.wishart.rvs(d, np.eye(d), size=k)
+    means = np.random.randn(k, d)
+    logA = np.log(scipy.stats.dirichlet.rvs(np.ones(k))[0])
+    model = mixture_multivariate_normal(means, covs, logA)
+
+    # Test inversion
+    x = np.random.rand(1000, d)
+    theta = model.bijector(x)
+    assert_allclose(model.bijector(theta, inverse=True), x, atol=1e-6)
+
+    # Test sampling
+    samples = model.rvs(1000)
+    for i in range(d):
+        p = scipy.stats.kstest(theta[:, i], samples[:, i]).pvalue
+        assert p > 1e-5
+
+    p = scipy.stats.kstest(model.logpdf(samples), model.logpdf(theta)).pvalue
+    assert p > 1e-5
+
+    # Test shapes
+    x = np.random.rand(d)
+    theta = model.bijector(x)
+    assert theta.shape == x.shape
+    assert model.bijector(theta, inverse=True).shape == x.shape
+
+    x = np.random.rand(3, 4, d)
+    theta = model.bijector(x)
+    assert theta.shape == x.shape
+    assert model.bijector(theta, inverse=True).shape == x.shape
+
+
+def test_multivariate_normal_bijector():
+    d = 10
+    cov = scipy.stats.wishart.rvs(d, np.eye(d))
+    mean = np.random.randn(d)
+    model = multivariate_normal(mean, cov)
+
+    # Test inversion
+    x = np.random.rand(1000, d)
+    theta = model.bijector(x)
+    assert_allclose(model.bijector(theta, inverse=True), x, atol=1e-6)
+
+    # Test sampling
+    samples = model.rvs(1000)
+    for i in range(d):
+        p = scipy.stats.kstest(theta[:, i], samples[:, i]).pvalue
+        assert p > 1e-5
+
+    p = scipy.stats.kstest(model.logpdf(samples), model.logpdf(theta)).pvalue
+    assert p > 1e-5
+
+    # Test shapes
+    x = np.random.rand(d)
+    theta = model.bijector(x)
+    assert theta.shape == x.shape
+    assert model.bijector(theta, inverse=True).shape == x.shape
+
+    x = np.random.rand(3, 4, d)
+    theta = model.bijector(x)
+    assert theta.shape == x.shape
+    assert model.bijector(theta, inverse=True).shape == x.shape
+
+
 def test_marginalise_condition_multivariate_normal():
     d = 5
     mean = np.random.randn(d)

diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -0,0 +1,17 @@
+from lsbi.utils import bisect
+from numpy.testing import assert_allclose
+import pytest
+
+
+def test_bisect():
+    def f(x):
+        return x-5
+    assert bisect(f, 0, 10) == 5
+
+    with pytest.raises(ValueError):
+        bisect(f, 0, 4)
+
+    def f(x):
+        return x - [1, 2]
+
+    assert_allclose(bisect(f, 0, 10), [1, 2])