-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgaussian_example.py
215 lines (166 loc) · 6.81 KB
/
gaussian_example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
"""
A Python script that repackages PTMCMCSampler's "simple" example to run from the command using
mpirun. https://github.com/jellis18/PTMCMCSampler/blob/master/examples/simple.ipynb
------------------
Runtime environment:
This script requires a Python runtime environment with installed dependencies, and also a working
MPI installation. This is handled for you when running commands below within the Docker
container. For example, from your checkout directory:
# Build the Docker container, tagging the resulting image as "ptmcmc-example"
docker build --pull -t ptmcmc-example .
# Run a Bash shell within the container, running as user "mpiuser"
# to avoid errors for running "mpirun" as root. Mount ./results as a volume where we can store
# output from running the example. (CRTL-D to exit back to host OS)
docker run -it --rm --user mpiuser -v $(pwd)/results:/code/results ptmcmc-example
------------------
Sample commands:
Commands below demonstrate running the script from within the Docker container.
Show help for the script:
mpi4py -m gaussian_example -h
Testing the script (one chain):
python -m gaussian_example [output_dir]
Basic use (multiple chains):
mpirun -np [n_chains] python -m gaussian_example [output_dir]
Running 2 chains with repeatable results:
mpirun -np 2 python -m gaussian_example --seeds 10 11 /code/results/2-chains
Result files are written to the configured output directory, overwriting any previous results
with the same names. Clients are responsible to clean up result files generated by the script
(from the PTMCMCSampler library).
See also supporting library PTMCMCSampler https://github.com/jellis18/PTMCMCSampler
"""
import argparse
import logging
import random
import time
import numpy as np
from mpi4py import MPI
from PTMCMCSampler import PTMCMCSampler
logger = logging.getLogger(__name__)
class GaussianLikelihood:
def __init__(self, ndim=2, pmin=-10, pmax=10):
self.a = np.ones(ndim) * pmin
self.b = np.ones(ndim) * pmax
# get means
self.mu = np.random.uniform(pmin, pmax, ndim)
# ... and a positive definite, non-trivial covariance matrix.
cov = 0.5 - np.random.rand(ndim ** 2).reshape((ndim, ndim))
cov = np.triu(cov)
cov += cov.T - np.diag(cov.diagonal())
self.cov = np.dot(cov, cov)
# Invert the covariance matrix first.
self.icov = np.linalg.inv(self.cov)
def lnlikefn(self, x):
diff = x - self.mu
return -np.dot(diff, np.dot(self.icov, diff)) / 2.0
def lnpriorfn(self, x):
if np.all(self.a <= x) and np.all(self.b >= x):
return 0.0
else:
return -np.inf
class UniformJump:
def __init__(self, pmin, pmax):
"""Draw random parameters from pmin, pmax"""
self.pmin = pmin
self.pmax = pmax
def jump(self, x, it, beta):
"""
Function prototype must read in parameter vector x,
sampler iteration number it, and inverse temperature beta
"""
# log of forward-backward jump probability
lqxy = 0
# uniformly drawm parameters
q = np.random.uniform(self.pmin, self.pmax, len(x))
return q, lqxy
def parallel_tempering_opt(output_dir, verbose=False):
"""
Runs the parallel tempering example based on code from PTMCMCSampler's simple.ipynb.
Note this function purposefully omits a random seed parameter. For repeatable results when
calling it directly, call np.random.seed() in client code (and only once per process,
since otherwise it may cause random numbers to be repeated).
:param output_dir: the output directory for results
"""
start = time.time()
# Get info about the MPI runtime environment
comm = MPI.COMM_WORLD
n_chains = comm.Get_size() # total number of chains / MPI processes
process_rank = comm.Get_rank() # ID of *this* chain / MPI process (zero-indexed)
# Setup Gaussian model class
ndim = 20
pmin, pmax = 0.0, 10
glo = GaussianLikelihood(ndim=ndim, pmin=pmin, pmax=pmax)
# Setup sampler
# Set the start position and the covariance
p0 = np.random.uniform(pmin, pmax, ndim)
cov = np.eye(ndim) * 0.1 ** 2
sampler = PTMCMCSampler.PTSampler(
ndim, glo.lnlikefn, glo.lnpriorfn, np.copy(cov), outDir=output_dir
)
# Add custom jump
ujump = UniformJump(pmin, pmax)
sampler.addProposalToCycle(ujump.jump, 5)
# Run Sampler for 100,000 steps
print(
f"Chain {process_rank+1}: Starting sampler.sample()"
)
sampler.sample(
p0,
100000,
burn=500,
thin=1,
covUpdate=500,
SCAMweight=20,
AMweight=20,
DEweight=20,
)
# Get info about the MPI runtime environment and print a completion message
elapsed = time.time() - start
print(f"Chain {process_rank + 1} of {n_chains}: done in {elapsed:.2f} s")
if __name__ == "__main__":
"""
Command line program to run a single parallel tempering chain.
"""
# Get info about the MPI runtime environment
comm = MPI.COMM_WORLD
n_chains = comm.Get_size() # total number of chains / MPI processes
process_rank = comm.Get_rank() # ID of *this* chain / MPI process (zero-indexed)
# Configure program arguments.
parser = argparse.ArgumentParser(
description="A wrapper script to run PTMCMCSampler's parallel tempering example using "
"mpirun"
)
parser.add_argument(
"output_dir",
type=str,
default=None,
help="Path of the output directory for run artifacts",
)
parser.add_argument(
"-s",
"--seeds",
type=int,
default=None,
# If any random seeds are provided, require a seed for each chain.
nargs=n_chains,
help="A list of integer random seeds to pass to each temperature chain, "
"respectively. Values must be convertible to 32-bit unsigned "
"integers for compatability with NumPy's random.seed()",
)
parser.add_argument("-v", "--verbose", action="store_true", default=False)
# Parse the input.
args = parser.parse_args()
# Seed random state for this process/chain.
seeds = args.seeds
if seeds:
# Since PTMCMCSampler uses np.random*() functions to get random numbers, and we're
# running each chain in a separate Python process, we need to seed each process to get
# predictable results.
# Note we only set random state here since any Python client importing this module directly
# should retain control of random state, and only seed it once per process.
# extract & set the seed for this process
seed = seeds[process_rank]
np.random.seed(seed)
random.seed(seed)
# Do the work!
print(f"Running chain {process_rank + 1} of {n_chains}")
parallel_tempering_opt(args.output_dir, args.verbose)