-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbayesian_calibration.py
414 lines (335 loc) · 15 KB
/
bayesian_calibration.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
"""
This module contains the Bayesian calibration class.
"""
from typing import Type, Dict, Callable
import os
from glob import glob
from numpy import argmax
from grainlearning.dynamic_systems import DynamicSystem, IODynamicSystem
from grainlearning.iterative_bayesian_filter import IterativeBayesianFilter
from grainlearning.tools import plot_param_stats, plot_posterior, plot_param_data, plot_obs_and_sim, plot_pdf, \
close_plots
class BayesianCalibration:
"""This is the Bayesian calibration class.
A Bayesian calibration requires the following input:
1. The :class:`.DynamicSystem` that encapsulates the observation data and simulation data,
2. The inference method, for example, :class:`.IterativeBayesianFilter`,
3. The number of iterations,
4. The current iteration number if the user simply wants to process their data with GrainLearning for one iteration,
5. the flag for skipping (-1), showing (0), or saving (1) the figures,
6. and the tolerances for stopping the calibration based on the mean absolute percentage error and the ensemble percentage error.
There are two ways of initializing a calibration toolbox class.
Method 1 - dictionary style (recommended)
.. highlight:: python
.. code-block:: python
bayesian_calibration = BayesianCalibration.from_dict(
{
"num_iter": 8,
"callback": run_sim,
"system": {
"system_type": DynamicSystem,
"model_name": "test",
"param_names": ["a", "b"],
"param_min": [0, 0],
"param_max": [1, 10],
"num_samples": 10,
"obs_data": [2,4,8,16],
"ctrl_data": [1,2,3,4],
},
"inference": {
"Bayes_filter": {"ess_target": 0.3},
"sampling": {"max_num_components": 1},
},
"save_fig": -1,
}
)
or
Method 2 - class style
.. highlight:: python
.. code-block:: python
bayesian_calibration = BayesianCalibration(
num_iter = 8,
system = DynamicSystem(...),
inference = IterativeBayesianFilter(...)
save_fig = -1
)
:param system: A `dynamic system <https://en.wikipedia.org/wiki/Particle_filter#Approximate_Bayesian_computation_models>`_ whose observables and hidden states evolve dynamically over "time"
:param inference: An inference method to determine unknown parameters in state-parameter space. Currently, only the iterative Bayesian Filter is available.
:param num_iter: Number of iteration steps
:param curr_iter: Current iteration step
:param error_tol: Tolerance to check the sample with the smallest mean absolute percentage error
:param gl_error_tol: Tolerance to check the GrainLearning ensemble percentage error
:param save_fig: Flag for skipping (-1), showing (0), or saving (1) the figures
:param callback: A callback function that runs the external software and passes the parameter sample to generate outputs
"""
def __init__(
self,
system: Type["DynamicSystem"],
inference: Type["IterativeBayesianFilter"],
num_iter: int = 1,
curr_iter: int = 0,
error_tol: float = None,
gl_error_tol: float = None,
save_fig: int = -1,
threads: int = 4,
callback: Callable = None,
):
"""Initialize the Bayesian calibration class"""
self.num_iter = num_iter
self.save_fig = save_fig
self.threads = threads
self.system = system
self.system.curr_iter = curr_iter
self.curr_iter = curr_iter
self.error_tol = error_tol
self.gl_error_tol = gl_error_tol
self.inference = inference
self.callback = callback
self.error_array = None
self.gl_errors = []
def run(self):
""" This is the main calibration loop which does the following steps
1. First iteration of Bayesian calibration starts with a Halton sequence
2. Iterations continue by resampling the parameter space until certain criteria are met.
"""
# Move existing simulation data to the backup folder
self.system.backup_sim_data()
# Bayesian calibration continue until curr_iter = num_iter or sigma_max < tolerance
for _ in range(self.num_iter):
print(f"Bayesian calibration iter No. {self.curr_iter}")
stopping_criteria_met = self.run_one_iteration()
if stopping_criteria_met:
break
# Print the errors after all iterations are done
if not stopping_criteria_met:
error_most_probable = min(self.error_array)
gl_error = self.gl_errors[-1]
print(f"\n"
f"Stopping criteria NOT met: \n"
f"sigma = {self.system.sigma_max},\n"
f"Smallest mean absolute percentage error = {error_most_probable: .3e},\n"
f"GrainLearning ensemble percentage error = {gl_error: .3e}\n\n"
f"Ending Bayesian calibration.")
def run_one_iteration(self, index: int = -1):
"""Run Bayesian calibration for one iteration.
:param index: iteration step, defaults to -1
"""
# Initialize the samples if it is the first iteration
if self.curr_iter == 0:
self.inference.initialize(self.system)
# Fetch the parameter values from a stored list
self.system.param_data = self.inference.param_data_list[index]
self.system.num_samples = self.system.param_data.shape[0]
# Run the model realizations
self.run_callback()
# Load model data from disk
self.load_system()
# Estimate model parameters as a distribution
self.inference.solve(self.system)
self.inference.sigma_list.append(self.system.sigma_max)
# Generate some plots
self.plot_uq_in_time()
self.compute_errors()
# Defining stopping criterion
error_most_probable = min(self.error_array)
gl_error = self.gl_errors[-1]
print(f"\n"
f"sigma = {self.system.sigma_max},\n"
f"Smallest mean absolute percentage error = {error_most_probable: .3e},\n"
f"GrainLearning ensemble percentage error = {gl_error: .3e}\n")
# If any stopping condition is met
# Check stopping criteria
normalized_sigma_met = self.system.sigma_max < self.system.sigma_tol
most_probable_error_met = error_most_probable < self.error_tol if self.error_tol is not None else False
ensemble_error_met = gl_error < self.gl_error_tol if self.gl_error_tol is not None else False
if normalized_sigma_met or most_probable_error_met or ensemble_error_met:
print(f"\nStopping criteria are met. Ending Bayesian calibration!\n")
self.num_iter = self.curr_iter + 1
return True
else:
self.increase_curr_iter()
return False
def run_callback(self):
"""
Run the callback function
"""
if self.callback is not None:
if isinstance(self.system, IODynamicSystem):
self.system.set_up_sim_dir(self.threads)
self.callback(self)
self.system.move_data_to_sim_dir()
else:
self.callback(self)
else:
raise ValueError("No callback function defined")
def load_system(self):
"""Load existing simulation data from disk into the dynamic system
"""
if isinstance(self.system, IODynamicSystem):
self.system.load_param_data()
self.system.get_sim_data_files()
self.system.load_sim_data()
else:
if self.system.param_data is None or self.system.sim_data is None:
raise RuntimeError("The parameter and simulation data are not set up correctly.")
def load_and_run_one_iteration(self):
"""Load existing simulation data and run Bayesian calibration for one iteration
Note the maximum uncertainty sigma_max is solved to reach a certain effective sample size ess_target,
unlike being assumed as an input for `load_and_process(...)`
"""
self.load_system()
self.inference.add_curr_param_data_to_list(self.system.param_data)
self.inference.solve(self.system)
self.system.write_params_to_table(self.threads)
self.inference.sigma_list.append(self.system.sigma_max)
self.plot_uq_in_time()
def load_and_process(self, sigma: float = 0.1):
"""Load existing simulation data and compute the posterior distribution using an assumed sigma
:param sigma: assumed uncertainty coefficient, defaults to 0.1
"""
self.load_system()
self.inference.add_curr_param_data_to_list(self.system.param_data)
self.inference.load_proposal_from_file(self.system)
self.inference.Bayes_filter.data_assimilation_loop(sigma, self.system)
self.system.compute_estimated_params(self.inference.Bayes_filter.posteriors)
def load_all(self):
"""Simply load all previous iterations of Bayesian calibration
"""
self.load_system()
self.inference.add_curr_param_data_to_list(self.system.param_data)
self.increase_curr_iter()
while self.curr_iter < self.num_iter:
print(f"Bayesian calibration iter No. {self.curr_iter}")
self.load_system()
self.inference.add_curr_param_data_to_list(self.system.param_data)
self.inference.run_inference(self.system)
self.inference.sigma_list.append(self.system.sigma_max)
self.plot_uq_in_time()
self.increase_curr_iter()
def resample(self):
"""Learn and resample from a proposal distribution
todo this should be refactored
:return: Combinations of resampled parameter values
"""
self.inference.posterior = self.inference.Bayes_filter.get_posterior_at_time()
self.inference.run_sampling(self.system, )
resampled_param_data = self.inference.param_data_list[-1]
self.system.write_params_to_table(self.threads)
return resampled_param_data
def plot_uq_in_time(self, verbose: bool = False):
"""Plot the evolution of uncertainty moments and distribution over time
param verbose: plot also the detailed statistics, defaults to False
"""
if self.save_fig < 0:
return
path = f'{self.system.sim_data_dir}/iter{self.curr_iter}' \
if isinstance(self.system, IODynamicSystem) \
else f'./{self.system.sim_name}/iter{self.curr_iter}'
if not os.path.exists(path):
os.makedirs(path)
fig_name = f'{path}/{self.system.sim_name}'
if verbose:
plot_param_stats(
fig_name, self.system.param_names,
self.system.estimated_params,
self.system.estimated_params_cv,
self.save_fig
)
plot_posterior(
fig_name,
self.system.param_names,
self.system.param_data,
self.inference.Bayes_filter.posteriors,
self.save_fig
)
plot_param_data(
fig_name,
self.system.param_names,
self.inference.param_data_list,
self.save_fig
)
plot_obs_and_sim(
fig_name,
self.system.ctrl_name,
self.system.obs_names,
self.system.ctrl_data,
self.system.obs_data,
self.system.sim_data,
self.inference.Bayes_filter.posteriors,
self.save_fig
)
plot_pdf(
fig_name,
self.system.param_names,
self.inference.param_data_list,
self.save_fig,
)
close_plots(self.save_fig)
def get_most_prob_params_id(self):
"""Return the most probable set of parameters
:return: Estimated parameter values
"""
return argmax(self.inference.posterior)
def get_most_prob_params(self):
"""Return the most probable set of parameters
:return: Estimated parameter values
"""
most_prob = argmax(self.inference.posterior)
return self.system.param_data[most_prob]
def increase_curr_iter(self):
"""Increase the current iteration step by one
"""
self.system.curr_iter += 1
self.curr_iter += 1
def compute_errors(self):
"""Compute the mean absolute percentage error per sample and the ensemble error
"""
from sklearn.metrics import mean_absolute_error
import numpy as np
# compute mean absolute percentage error
sim_data = self.system.sim_data
num_samples = self.system.num_samples
# compute GrainLearning errors
self.error_array = np.zeros(num_samples)
# loop over all samples to compute sample percentage errors
# if observation data is a time series
if self.system.num_steps == 1:
obs_maxs = np.max(self.system.obs_data)
obs_mins = np.min(self.system.obs_data)
else:
obs_maxs = np.max(self.system.obs_data, axis=1)
obs_mins = np.min(self.system.obs_data, axis=1)
obs_range = obs_maxs - obs_mins
for i in range(num_samples):
self.error_array[i] = mean_absolute_error(self.system.obs_data.T/obs_range, sim_data[i, :, :].T/obs_range)
# compute the ensemble error
self.gl_errors.append(np.dot(self.error_array, self.inference.posterior))
@classmethod
def from_dict(
cls: Type["BayesianCalibration"],
obj: Dict
):
"""An alternative constructor to allow choosing a system type (e.g., dynamic system or IO dynamic system)
:param obj: a dictionary containing the keys and values to construct a BayesianCalibration object
:return: a BayesianCalibration object
"""
# Get the system class, defaults to `DynamicSystem`
system_obj = obj["system"]
system_type = system_obj.get("system_type", DynamicSystem)
# if the dictionary has the key "system_type", then delete it to avoid passing it to the constructor
system_obj.pop("system_type", None)
# Create a system object
system = system_type.from_dict(obj["system"])
# Create a inference object
inference = IterativeBayesianFilter.from_dict(obj["inference"])
return cls(
system=system,
inference=inference,
num_iter=obj["num_iter"],
curr_iter=obj.get("curr_iter", 0),
error_tol=obj.get("error_tol", None),
gl_error_tol=obj.get("gl_error_tol", None),
save_fig=obj.get("save_fig", -1),
threads=obj.get("threads", 4),
callback=obj.get("callback", None)
)