From 237940ae3030922b629a76efe41aea3b5acce548 Mon Sep 17 00:00:00 2001 From: Tijs Alleman Date: Thu, 25 Jul 2024 10:43:05 -0400 Subject: [PATCH] Draw functions with flexible number of parameters (#75) --- docs/enzyme_kinetics.md | 4 +- docs/influenza_1718.md | 16 +- docs/installation.md | 2 +- docs/introduction.md | 6 +- docs/models.md | 164 ++++++++++++------ docs/quickstart.md | 22 ++- docs/references.md | 6 +- docs/workflow.md | 95 +++++----- src/pySODM/models/base.py | 65 +++---- src/pySODM/models/validation.py | 53 +++--- src/tests/test_JumpProcess.py | 89 ++++++++-- src/tests/test_ODE.py | 83 +++++++-- tutorials/SIR/workflow_tutorial.py | 17 +- tutorials/SIR_SI/calibration.py | 4 +- .../calibrate_intrinsic_kinetics.py | 4 +- tutorials/enzyme_kinetics/simulate_1D_PFR.py | 4 +- tutorials/influenza_1718/calibration.py | 5 +- 17 files changed, 413 insertions(+), 226 deletions(-) diff --git a/docs/enzyme_kinetics.md b/docs/enzyme_kinetics.md index b0a082c..9939810 100644 --- a/docs/enzyme_kinetics.md +++ b/docs/enzyme_kinetics.md @@ -1,6 +1,6 @@ # A model for the enzymatic esterification of D-glucose and Lauric acid in a continuous-flow reactor -This tutorial is based on: Tijs W. Alleman. (2019). Model-Based Analysis of Enzymatic Reactions in Continuous Flow Reactors (master dissertation). Ghent University, Ghent, BE. +This tutorial is based on: Tijs W. Alleman. (2019). Model-Based Analysis of Enzymatic Reactions in Continuous Flow Reactors (master dissertation). Ghent University, Ghent, BE, and is showcased in our [software paper](https://www.sciencedirect.com/science/article/pii/S1877750323002089). ## Introduction @@ -265,7 +265,7 @@ if __name__ == '__main__': # Update initial condition model.initial_states.update(initial_states[i]) # Simulate model - out = model.sim(1600, N=n, draw_function=draw_fcn, samples=samples_dict) + out = model.sim(1600, N=n, draw_function=draw_fcn, draw_function_kwargs={'samples': samples_dict}) # Add 4% observational noise out = add_gaussian_noise(out, 0.04, relative=True) # Visualize diff --git a/docs/influenza_1718.md b/docs/influenza_1718.md index 1d1d299..ef52a17 100644 --- a/docs/influenza_1718.md +++ b/docs/influenza_1718.md @@ -1,11 +1,13 @@ # An stochastic jump process model for the 2017-2018 Influenza Season in Belgium -In this tutorial, we'll set up a stochastic age-stratified model for seasonal Influenza in Belgium. First, we'll expand the dynamics of the [simple SIR model](workflow.md) to account for additional charateristics of seasonal Influenza and we'll use the concept of *dimensions* to include four age groups in our model. Opposed to the simple SIR tutorial, where changes in the number of social contacts were not considered, we'll use a *time-dependent parameter function* to include the effects of school closures during school holidays in our Influenza model. Finally, we'll calibrate two of the model's parameters to incrementally larger sets of incidence data and asses how the goodness-of-fit evolves with the amount of knowledge at our disposal. One of the calibrated model parameters is a 1D vector, pySODM allows users to easily calibrate n-dimensional model parameters. +This tutorial is showcased in our [software paper](https://www.sciencedirect.com/science/article/pii/S1877750323002089). + +We'll set up a simple stochastic model for Influenza in Belgium. First, we'll expand the dynamics of the [simple SIR model](workflow.md) to account for additional disease charateristics of Influenza, and we'll use the concept of *dimensions* to include four age groups in our model (called 'age strata'). Opposed to the simple SIR tutorial, where changes in the number of social contacts were not considered, we'll demonstrate how to use a *time-dependent parameter function* to include the effects of school closures during school holidays in our Influenza model. Finally, we'll calibrate two of the model's parameters to incrementally larger sets of incidence data and asses how the goodness-of-fit evolves with the amount of knowledge at our disposal. One of the calibrated model parameters will be a 1D vector, pySODM allows users to easily calibrate n-dimensional model parameters. This tutorial introduces the following concepts, -- Building a stochastic model and simulate it with Gillespie's Tau-Leaping method (which is a form of jump process) -- Adding age groups to a dynamic transmission model of disease -- Calibrating n-D parameters to n-D datasets +1. Building a stochastic model and simulate it with Gillespie's Tau-Leaping method +2. Extending an SIR model with age strata and using a social contact matrix +3. Calibrating an n-D model parameter to an n-D dataset This tutorial can be replicated by running ```bash @@ -301,11 +303,11 @@ if __name__ == '__main__': data=[df_influenza[start_date:end_calibration], ] states = ["Im_inc",] log_likelihood_fnc = [ll_negative_binomial,] - log_likelihood_fnc_args = [5*[0.03,],] + log_likelihood_fnc_args = [4*[0.03,],] # Calibated parameters and bounds - pars = ['beta', 'f_a'] - labels = ['$\\beta$', '$f_a$'] + pars = ['beta', 'f_ud'] + labels = ['$\\beta$', '$f_{ud}$'] bounds = [(1e-6,0.06), (0,0.99)] # Setup objective function (no priors --> uniform priors based on bounds) diff --git a/docs/installation.md b/docs/installation.md index 57aaf44..98ce2f0 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -1,6 +1,6 @@ ## Installation -### Quick and dirty installation +### Fast & Furious ``` pip install pySODM diff --git a/docs/introduction.md b/docs/introduction.md index 551ecff..69bda48 100644 --- a/docs/introduction.md +++ b/docs/introduction.md @@ -11,11 +11,11 @@ pip install pySODM ### Resources -Documentation: https://twallema.github.io/pySODM +1. [Documentation website](https://twallema.github.io/pySODM) -Manuscript: https://www.sciencedirect.com/science/article/pii/S1877750323002089 +2. [Manuscript](https://www.sciencedirect.com/science/article/pii/S1877750323002089) -pyPI: https://pypi.org/project/pySODM/ +3. [pyPI](https://pypi.org/project/pySODM/) ### Aim & Scope diff --git a/docs/models.md b/docs/models.md index 29813af..8283f9c 100644 --- a/docs/models.md +++ b/docs/models.md @@ -1,99 +1,161 @@ # Models -Standard usage of pySODM involves building an ODE or stochastic jump process model. For practical examples of initializing models, we refer to the [tutorials](quickstart.md). +pySODM contains a class to build ordinary differential equation models (`ODE`) and a class to build stochastic jump process models (`JumpProcess`), both live in `models/base.py`. To learn more about initialising and simulating these models, see the [quickstart tutorial](quickstart.md). ## base.py ### *class* ODE -The ODE class inherits several attributes from the model class defined by the user. +**Inherits from the user's model class:** -**Inherits from model declaration:** +* **states** (list) - Names of the model's states. +* **parameters** (list) - Names of the model's parameters. Parameters are not subject to input checks and can have any type. +* **integrate** (function) - Function computing the differentials of every model state. The integrate function must have the timestep `t` as its first input, followed by the names of the model's states, parameters and stratified parameters (order not important). The integrate function must return a differential for every model state of the correct shape and **in the same order as the names `states`**. The integrate function must be a static method (include the decorator `@staticmethod`). +* **dimensions** (list) - optional - Names of the model's dimensions. Number of dimensions determines the dimensionality of the model's states. + * No dimensions: states are 0-D (scalar) + * 1 dimension: states are 1-D (np.ndarray) + * 2 dimensions: states are 2-D (np.ndarray) + * etc. +* **stratified_parameters** (list) - optional - Names of the *stratified* parameters. Stratified parameters must be of type list/1D np.array and their length must be equal to the number of coordinates of the dimension axis it corresponds to. Their use is optional and mainly serves as a way for the user to structure his code. + * 0-D model: not possible to have *stratified* parameters + * 1-D model: list containing strings - `['stratpar_1', 'stratpar_2']` + * 2-D+ dimensions: List contains *n* sublists, where *n* is the number of model dimensions. Each sublist contains names of stratified parameters associated with the dimension in the corresponding position in `dimensions` - example for a 3-D model: `[['stratpar_1', 'stratpar_2'],[],['stratpar_3']]`, first dimension in `dimensions` has two stratified parameters `stratpar_1` and `stratpar_2`, second dimension has no stratified parameters, third dimensions has one stratified parameter `stratpar_3`. +* **dimensions_per_state** (list) - optional - Specify the dimensions of each model states. Allows users to define models with states of different sizes. If `dimensions_per_state` is not provided, all model states will have the same number of dimensions, equal to the number of model dimensions specified using `dimensions`. If specified, `dimensions_per_state` must contain *n* sublists, where *n* is the number of model states (`n = len(states)`). If a model state has no dimensions (i.e. it is a float), specify an empty list. -* **states** (list) - Names of the model's states. Internally converted to variable `states_names` upon model initialisation. -* **parameters** (list) - Names of the model's parameters. Parameters are not subject to input checks and can thus be of any datatype/size. Internally converted to variable `parameters_names` upon model initialisation. -* **integrate** (function) - Function computing the differentials of every model state. The integrate function must have the timestep `t` as its first input, followed by the names `states`, `parameters` and `stratified_parameters` (their order is not important). The integrate function must return a differential for every model state of the correct size and **in the same order as the names `states`**. The integrate function must be a static method (include `@staticmethod`). -* **dimensions** (list) - optional - Names of the possible model dimensions. The coordinates of the dimensions are specified during initialization of the model. Internally converted to variable `dimensions_names` upon model initialisation. -* **stratified_parameters** (list) - optional - Names of the *stratified* parameters. Stratified parameters must be of type list/1D np.array and their length, which must be equal to the number of coordinates of the dimension axis it corresponds to, is verified during model initialization. Internally converted to variable `stratified_parameters_names` upon model initialisation. Their use is optional and mainly serves as a way for the user to structure his code. - * If the model has one dimension: list contains strings - `['stratpar_1', 'stratpar_2']` - * If the model has multiple dimensions: list contains *n* sublists, where *n* is the number of model dimensions (length of `dimensions`). Each sublist contains names of stratified parameters associated with the dimension in the corresponding position in `dimensions` - example: `[['stratpar_1', 'stratpar_2'],[],['stratpar_3']]` -* **dimensions_per_state** (list) - optional - Specify, for each model state in `states`, its dimensions. Allows users to define models with states of different sizes. If `dimensions_per_state` is not provided, all model states will have the same size, depending on the model's dimensions specified in `dimensions`. If specified, `dimensions_per_state` must contain *n* sublists, where *n* is the number of model states (length of `states`). If some model states have no dimensions (i.e. it is a float), specify an empty list. +Below is a minimal example of a user-defined model class inheriting `ODE`. -Upon intialization of the model class, the following arguments must be provided. +```python +# Import the ODE class +from pySODM.models.base import ODE + +# Define the model equations +class MY_MODEL(ODE): + + states = ['S1', 'S2'] + parameters = ['alpha'] + + @staticmethod + def integrate(t, S1, S2, alpha): + return -alpha*S1, alpha*S2 +``` + +To intialize the user-defined model class, the following arguments must be provided. **Parameters:** -* **states** (dict) - Keys: names of states. Values: values of states. The dictionary does not have to contain a key,value pair for all state names listed in `states` of the model declaration. States that lack a key,value pair are filled with zeros upon initialization. -* **parameters** (dict) - Keys: names of parameters. Values: values of parameters. A key,value pair must be provided for all parameter names listed in `parameters` and `stratified_parameters` of the model declaration. If time dependent parameter functions with additional parameters (aside from the obligatory `t`, `states`, `params`) are used, these parameters must be included as well. -* **coordinates** (dict) - optional - Keys: names of dimensions (`dimensions`). Values: desired coordinates for the dimension axis. Values provided here become the dimension's coordinates in the `xarray` Dataset. -* **time_dependent_parameters** (dict) - optional - Keys: name of the model parameter the time-dependent parameter function should be applied to. Must be a valid model parameter. Values: time-dependent parameter function. Time-dependent parameter functions must have `t` (simulation timestep), `states` (model states at timestep `t`) and `params` (model parameters dictionary) as the first three arguments. +* **states** (dict) - Initial states. Keys: names of model states. Values: initial values of model states. The dictionary does not have to contain a key,value pair for all states, missing states are filled with zeros upon initialization. +* **parameters** (dict) - Model parameters. Keys: names of parameters. Values: values of parameters. A key,value pair must be provided for all parameter names listed in `parameters` and `stratified_parameters` of the model declaration. If *time dependent parameter functions* with additional parameters are used, these parameters must be included as well. +* **coordinates** (dict) - optional - Keys: names of dimensions (`dimensions`). Values: coordinates of the dimension. +* **time_dependent_parameters** (dict) - optional - Keys: name of the model parameter the time-dependent parameter function should be applied to. Must be a valid model parameter. Values: time-dependent parameter function (callable function). Time-dependent parameter functions must have `t` (timestep/data), `states` (dictionary of model states at time `t`) and `params` (model parameters dictionary) as the first three arguments. -**Methods:** +To initalize our user-defined model class, -* **sim(time, N=1, draw_function=None, samples=None, processes=None, method='RK23', rtol=1e-3, tau=None, output_timestep=1, warmup=0)** +```python +model = MY_MODEL(states={'S1': 1000, 'S2': 0}, parameters={'alpha': 1}) +``` - Simulate a model over a given time period using `scipy.integrate.solve_ivp()`. Can optionally perform `N` repeated simulations. Can change the values of model parameters at every repeated simulation by drawing parameter samples from a dictionary `samples` using a function `draw_function`. +**Class methods:** + +* **sim(time, N=1, draw_function=None, draw_function_kwargs={}, processes=None, method='RK23', rtol=1e-3, tau=None, output_timestep=1, warmup=0)** + + Integrate a pySODM model using `scipy.integrate.solve_ivp()`. Can optionally perform `N` repeated simulations. Can change the values of model parameters at every consecutive simulation by manipulating the dictionary of model parameters `parameters` using a `draw_function`. **Parameters** - * **time** - (int/float or list) - The start and stop "time" for the simulation run. Three possible inputs: 1) int/float, 2) list of int/float of type `[start_time, stop_time]`, 3) list of pd.Timestamp or str of type `[start_date, stop_date]`. - * **N** - (int) - optional - Number of repeated simulations to perform. - * **samples** - (dict) - optional - Dictionary containing samples of the distribution of model parameters. Obligatory input to a *draw function*. Advanced users: dictionary can contain any arbitrary input to a *draw function*. - * **draw_function** - (function) - optional - A function consisting of two inputs: the model parameters dictionary `parameters`, the previously documented samples dictionary `samples`. Function must return the model parameters dictionary `parameters` with its keys unaltered. Function can be used to update a model parameter's value during every repeated simulation. Usefull to propagate MCMC samples of model parameters or perform sensitivity analyses. + * **time** - (int/float or list) - The start and stop "time" or "date" of the integration. Three possible inputs: 1) an int/float denoting the end of the integration, 2) a list of int/float `[start_time, stop_time]`, 3) a list of dates (type: `datetime` or a 'YYYY-MM-DD' string representation thereof) `[start_date, stop_date]`. + * **N** - (int) - optional - Number of consecutive simulations to perform. + * **draw_function** - (function) - optional - A function altering the model parameters dictionary `parameters` between consecutive simulations. Function must have `parameters` as its first input, followed by an arbitrary number of additional parameters. Function must return the model parameters dictionary `parameters` with its keys unaltered, meaning no parameters should be added or removed by a *draw function*. + * **draw_function_kwargs** - (dict) - optional - Dictionary containing the parameters of the *draw function*, excluding `parameters`. Keys: names of additional parameters *draw function*, values: values of additional parameters *draw function*. Not subject to input checks regarding data type. * **processes** - (int) - optional - Number of cores to use when {math}`N > 1`. - * **method** - (str) - optional - Integration methods of `scipy.integrate.solve_ivp()` (read the [docs](https://docs.scipy.org/doc/scipy/reference/generated/scipy.integrate.solve_ivp.html)). - * **rtol** - (int/float) - optional - Relative tolerance of `scipy.integrate.solve_ivp()` (read the [docs](https://docs.scipy.org/doc/scipy/reference/generated/scipy.integrate.solve_ivp.html)). + * **method** - (str) - optional - Integration methods of `scipy.integrate.solve_ivp()`, [click to consult scipy's documentation](https://docs.scipy.org/doc/scipy/reference/generated/scipy.integrate.solve_ivp.html). + * **rtol** - (int/float) - optional - Relative tolerance of `scipy.integrate.solve_ivp()`, [click to consult scipy's documentation](https://docs.scipy.org/doc/scipy/reference/generated/scipy.integrate.solve_ivp.html). * **tau** - (int/float) - optional - If `tau != None`, the model is simulated using a simple discrete timestepper with fixed timestep `tau`. Overrides the `method` and `rtol` arguments. - * **output_timestep** - (int/float) - optional - Interpolate model output to every `output_timestep` timesteps. For datetimes: expressed in days. - * **warmup** - (float) - optional - Number of days to simulate prior to the simulation start. Usefull in epidemiological contexts when the time between the appearance of "patient zero" and the collection of data is unkown. + * **output_timestep** - (int/float) - optional - Interpolate model output to every `output_timestep` timesteps. + * **warmup** - (float) - optional - Number of days to simulate prior to the simulation start. Usefull in epidemiological contexts when the time between the appearance of "patient zero" and the collection of data is be estimated. **Returns** - * **out** - (xarray.Dataset) - Simulation output. Consult the xarray documentation [here](https://docs.xarray.dev/en/stable/generated/xarray.Dataset.html#xarray.Dataset). `xarray.Dataset.data_vars` are the model states. `xarray.Dataset.dimensions` are the time dimension plus the model's dimensions. The time dimension is named `time` if timesteps were numbers, the time dimension is named `date` if timesteps were dates. When {math}`N > 1` an additional dimension `draws` is added to the output accomodate the repeated simulations. + * **out** - ([xarray.Dataset](https://docs.xarray.dev/en/stable/generated/xarray.Dataset.html#xarray.Dataset)) - Simulation output. `xarray.Dataset.data_vars` return the model's states. `xarray.Dataset.dimensions` returns the temporal dimension plus the model's dimensions. The time dimension is named `time` if timesteps were numbers, the time dimension is named `date` if timesteps were dates. When {math}`N > 1` an additional dimension `draws` is added to accomodate the consecutive simulations. ### *class* JumpProcess -The JumpProcess class inherits several attributes from the model class defined by the user. +**Inherits from the user's model class:** -**Inherits from model declaration:** -* **states** (list) - Names of the model's states. Internally converted to variable `states_names` upon model initialisation. -* **parameters** (list) - Names of the model's parameters. Parameters are not subject to input checks and can thus be of any datatype/size. Internally converted to variable `parameters_names` upon model initialisation. +* **states** (list) - Names of the model's states. +* **parameters** (list) - Names of the model's parameters. Parameters are not subject to input checks and can have any type. * **compute_rates** (function) - Function returning the rates of transitioning between the model states. `compute_rates()` must have the timestep `t` as its first input, followed by the names `states`, `parameters` and `stratified_parameters` (their order is not important). `compute_rates()` must be a static method (include `@staticmethod`). The output of `compute_rates()` must be a dictionary. Its keys must be valid model states, a rate is only needed for the states undergoing a transitioning. The corresponding values must be a list containing the rates of the possible transitionings of the state. In this way, a model state can have multiple transitionings. * **apply_transitionings** (function) - Function to update the states with the number of drawn transitionings. `apply_transitionings()` must have the timestep `t` as its first input, followed by the solver timestep `tau`, follwed by the dictionary containing the transitionings `transitionings`, then followed by the model states and parameters similarily to `compute_rates()`. -* **dimensions** (list) - optional - Names of the possible model dimensions. The coordinates of the dimensions are specified during initialization of the model. Internally converted to variable `dimensions_names` upon model initialisation. -* **stratified_parameters** (list) - optional - Names of the *stratified* parameters. Stratified parameters must be of type list/1D np.array and their length, which must be equal to the number of coordinates of the dimension axis it corresponds to, is verified during model initialization. Internally converted to variable `stratified_parameters_names` upon model initialisation. Their use is optional and mainly serves as a way for the user to structure his code. - * If the model has one dimension: list contains strings - `['stratpar_1', 'stratpar_2']` - * If the model has multiple dimensions: list contains *n* sublists, where *n* is the number of model dimensions (length of `dimensions`). Each sublist contains names of stratified parameters associated with the dimension in the corresponding position in `dimensions` - example: `[['stratpar_1', 'stratpar_2'],[],['stratpar_3']]` -* **dimensions_per_state** (list) - optional - Specify, for each model state name in `states`, its dimensions. Allows users to define models with states of different sizes. If `dimensions_per_state` is not provided, all model states will have the same size, depending on the model's dimensions specified in `dimensions`. If specified, `dimensions_per_state` must contain *n* sublists, where *n* is the number of model states (length of `states`). If some model states have no dimensions (i.e. it is a float), specify an empty list. +* **dimensions** (list) - optional - Names of the model's dimensions. Number of dimensions determines the dimensionality of the model's states. + * No dimensions: states are 0-D (scalar) + * 1 dimension: states are 1-D (np.ndarray) + * 2 dimensions: states are 2-D (np.ndarray) + * etc. +* **stratified_parameters** (list) - optional - Names of the *stratified* parameters. Stratified parameters must be of type list/1D np.array and their length must be equal to the number of coordinates of the dimension axis it corresponds to. Their use is optional and mainly serves as a way for the user to structure his code. + * 0-D model: not possible to have *stratified* parameters + * 1-D model: list containing strings - `['stratpar_1', 'stratpar_2']` + * 2-D+ dimensions: List contains *n* sublists, where *n* is the number of model dimensions. Each sublist contains names of stratified parameters associated with the dimension in the corresponding position in `dimensions` - example for a 3-D model: `[['stratpar_1', 'stratpar_2'],[],['stratpar_3']]`, first dimension in `dimensions` has two stratified parameters `stratpar_1` and `stratpar_2`, second dimension has no stratified parameters, third dimensions has one stratified parameter `stratpar_3`. +* **dimensions_per_state** (list) - optional - Specify the dimensions of each model states. Allows users to define models with states of different sizes. If `dimensions_per_state` is not provided, all model states will have the same number of dimensions, equal to the number of model dimensions specified using `dimensions`. If specified, `dimensions_per_state` must contain *n* sublists, where *n* is the number of model states (`n = len(states)`). If a model state has no dimensions (i.e. it is a float), specify an empty list. + +Below is a minimal example of a user-defined model class inheriting `JumpProcesses`. -Upon intialization of the model class, the following arguments must be provided. +```python +# Import the JumpProcesses class +from pySODM.models.base import JumpProcesses + +class MY_MODEL(JumpProcesses): + + states = ['S1', 'S2'] + parameters = ['alpha'] + + # define the rates of the system's transitionings + @staticmethod + def compute_rates(t, S1, S2, alpha): + return {'S1': [alpha, ]} + + # apply the sampled number of transitionings + @staticmethod + def apply_transitionings(t, tau, transitionings, S1, S2, alpha): + + S1_new = S1 - transitionings['S1'][0] + S2_new = S2 + transitionings['S2'][0] + + return S1_new, S2_new +``` + +To intialize the user-defined model class, the following arguments must be provided. **Parameters:** -* **states** (dict) - Keys: names of states. Values: values of states. The dictionary does not have to contain a key,value pair for all model state names listed in `states` of the model declaration. States that lack a key,value pair are filled with zeros upon initialization. -* **parameters** (dict) - Keys: names of parameters. Values: values of parameters. A key,value pair must be provided for all parameter names listed in `parameters` and `stratified_parameters` of the model declaration. If time dependent parameter functions with additional parameters (aside from the obligatory `t`, `states`, `params`) are used, these parameters must be included as well. -* **coordinates** (dict) - optional - Keys: names of dimensions (`dimensions`). Values: desired coordinates for the dimension axis. Values provided here become the dimension's coordinates in the `xarray` Dataset. -* **time_dependent_parameters** (dict) - optional - Keys: name of the model parameter the time-dependent parameter function should be applied to. Must be a valid model parameter. Values: time-dependent parameter function. Time-dependent parameter functions must have `t` (simulation timestep), `states` (model states at timestep `t`) and `params` (model parameters dictionary) as the first three arguments. +* **states** (dict) - Initial states. Keys: names of model states. Values: initial values of model states. The dictionary does not have to contain a key,value pair for all states, missing states are filled with zeros upon initialization. +* **parameters** (dict) - Model parameters. Keys: names of parameters. Values: values of parameters. A key,value pair must be provided for all parameter names listed in `parameters` and `stratified_parameters` of the model declaration. If *time dependent parameter functions* with additional parameters are used, these parameters must be included as well. +* **coordinates** (dict) - optional - Keys: names of dimensions (`dimensions`). Values: coordinates of the dimension. +* **time_dependent_parameters** (dict) - optional - Keys: name of the model parameter the time-dependent parameter function should be applied to. Must be a valid model parameter. Values: time-dependent parameter function (callable function). Time-dependent parameter functions must have `t` (timestep/data), `states` (dictionary of model states at time `t`) and `params` (model parameters dictionary) as the first three arguments. + +To initalize our user-defined model class, + +```python +model = MY_MODEL(states={'S1': 1000, 'S2': 0}, parameters={'alpha': 1}) +``` **Methods:** -* **sim(time, N=1, draw_function=None, samples=None, processes=None, method='tau_leap', tau=0.5, output_timestep=1, warmup=0)** +* **sim(time, N=1, draw_function=None, draw_function_kwargs={}, processes=None, method='tau_leap', tau=0.5, output_timestep=1, warmup=0)** - Simulate a model over a given time period stochastically using Gillespie's Stochastic Simulation Algorithm (SSA) or the approximate Tau-leaping method. Can optionally perform `N` repeated simulations. Can change the values of model parameters at every repeated simulation by drawing samples from a dictionary `samples` using a function `draw_function`. + Integrate a model stochastically using Gillespie's Stochastic Simulation Algorithm (SSA) or the approximate Tau-leaping method. Can optionally perform `N` repeated simulations. Can change the values of model parameters at every consecutive simulation by manipulating the dictionary of model parameters `parameters` using a `draw_function`. **Parameters** - * **time** - (int/float or list) - The start and stop "time" for the simulation run. Three possible inputs: 1) int/float, 2) list of int/float of type `[start_time, stop_time]`, 3) list of pd.Timestamp or str of type `[start_date, stop_date]`. - * **N** - (int) - optional - Number of repeated simulations to perform. - * **samples** - (dict) - optional - Dictionary containing samples of the distribution of model parameters. Obligatory input to a *draw function*. Advanced users: dictionary can contain any arbitrary input to a *draw function*. - * **draw_function** - (function) - optional - A function consisting of two inputs: the model parameters dictionary `parameters`, the previously documented samples dictionary `samples`. Function must return the model parameters dictionary `parameters`. Function can be used to update a model parameter's value during every repeated simulation. Usefull to propagate MCMC samples of model parameters or perform sensitivity analyses. + * **time** - (int/float or list) - The start and stop "time" or "date" of the integration. Three possible inputs: 1) an int/float denoting the end of the integration, 2) a list of int/float `[start_time, stop_time]`, 3) a list of dates (type: `datetime` or a 'YYYY-MM-DD' string representation thereof) `[start_date, stop_date]`. + * **N** - (int) - optional - Number of consecutive simulations to perform. + * **draw_function** - (function) - optional - A function altering the model parameters dictionary `parameters` between consecutive simulations. Function must have `parameters` as its first input, followed by an arbitrary number of additional parameters. Function must return the model parameters dictionary `parameters` with its keys unaltered, meaning no parameters should be added or removed by a *draw function*. + * **draw_function_kwargs** - (dict) - optional - Dictionary containing the parameters of the *draw function*, excluding `parameters`. Keys: names of additional parameters *draw function*, values: values of additional parameters *draw function*. Not subject to input checks regarding data type. * **processes** - (int) - optional - Number of cores to use when {math}`N > 1`. - * **method** - (str) - optional - 'SSA': Stochastic Simulation Algorithm. 'tau-leap': Tau-leaping algorithm. Consult the following [blog](https://lewiscoleblog.com/gillespie-algorithm) for more background information. + * **method** - (str) - optional - 'SSA': Stochastic Simulation Algorithm. 'tau-leap': Tau-leaping algorithm. Consult the [following blog](https://lewiscoleblog.com/gillespie-algorithm) for more background information. #TODO: add an adaptive tau-leap algorithm. * **tau** - (int/float) - optional - Leap value of the tau-leaping method. Consult the following [blog](https://lewiscoleblog.com/gillespie-algorithm) for more background information. * **output_timestep** - (int/float) - optional - Interpolate model output to every `output_timestep` timesteps. For datetimes: expressed in days. - * **warmup** - (float) - optional - Number of days to simulate prior to the simulation start. Usefull in epidemiological contexts when the time between the appearance of "patient zero" and the collection of data is unkown. + * **warmup** - (float) - optional - Number of days to simulate prior to the simulation start. Usefull in epidemiological contexts when the time between the appearance of "patient zero" and the collection of data is be estimated. **Returns** - * **out** - (xarray.Dataset) - Simulation output. Consult the xarray documentation [here](https://docs.xarray.dev/en/stable/generated/xarray.Dataset.html#xarray.Dataset). `xarray.Dataset.data_vars` are the model states. `xarray.Dataset.dimensions` are the time dimension plus the model's dimensions. The time dimension is named `time` if timesteps were numbers, the time dimension is named `date` if timesteps were dates. When {math}`N > 1` an additional dimension `draws` is added to the output accomodate the repeated simulations. + * **out** - ([xarray.Dataset](https://docs.xarray.dev/en/stable/generated/xarray.Dataset.html#xarray.Dataset)) - Simulation output. `xarray.Dataset.data_vars` return the model's states. `xarray.Dataset.dimensions` returns the temporal dimension plus the model's dimensions. The time dimension is named `time` if timesteps were numbers, the time dimension is named `date` if timesteps were dates. When {math}`N > 1` an additional dimension `draws` is added to accomodate the consecutive simulations. diff --git a/docs/quickstart.md b/docs/quickstart.md index 0927ea4..c19edff 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -238,24 +238,24 @@ class SIR(JumpProcess): ### Draw functions -The `sim()` method of the `ODE` and `JumpProcess` classes can be used to perform {math}`N` repeated simulations by using the optional argument `N`. A *draw function* can be used to alter the value of a model parameters during consecutive model runs by drawing them rrandomly from a list of samples, thereby offering a powerful tool for sensitivity analysis and uncertainty propagation. +The simulation functions of the `ODE` and `JumpProcess` classes (`sim()`) can be used to perform {math}`N` repeated simulations by using the optional argument `N`. A *draw function* can be used to instruct the model on how to alter the value of a model parameters during consecutive model runs, thereby offering a powerful tool for sensitivity analysis and uncertainty propagation. -Draw functions **always** take two dictionaries as input: 1) The dictionary of model parameters, 2) A dictionary containing samples of a model parameter. Assuming we have a list containing 100 samples of the parameter `gamma`, drawn randomly from 0 to 5, +Draw functions **always** take the dictionary of model parameters, `parameters` as their first argument, input checks are used to ensure you provide the correct name and type. This can be followed an arbitrary number of user-defined parameters, which are supplied to the `sim()` function by using its `draw_function_kwargs` argument. The output of a draw function is **always** the dictionary of model parameters, without alterations to the dictionaries keys (no new parameters introduced or parameters deleted). In the example below, we attempt to draw a model parameter `gamma` randomly from 0 to 5, ```python -# make an example of a dictionary containing parameter samples -samples_dict = {'gamma': np.random.uniform(low=0, high=5, n=100)} +# make an example of a dictionary containing samples of a parameter `gamma` +samples = np.random.uniform(low=0, high=5, n=100) # define a 'draw function' def draw_function(parameters, samples): """ randomly selects a sample of 'gamma' from the provided dictionary of samples and assigns it to the dictionary of model parameters """ - parameters['gamma'] = np.random.choice(samples['gamma']) + parameters['gamma'] = np.random.choice(samples) return parameters # simulate 10 trajectories, exploit 10 cores to speed up the computation -out = model.sim(121, N=10, draw_function=draw_function, samples=samples_dict, processes=10) +out = model.sim(121, N=10, draw_function=draw_function, draw_function_kwargs={'samples': samples}, processes=10) print(out) ``` @@ -274,20 +274,18 @@ Data variables: R (draws, age_groups, time) float64 0.0 0.3439 ... 684.0 684.0 ``` -This example is more easily coded up by drawing the random values within the *draw function*, +This example can also be coded up by drawing the random values within the *draw function*, ```python # define a 'draw function' -def draw_function(parameters, samples): +def draw_function(parameters): parameters['gamma'] = np.random.uniform(low=0, high=5) return parameters # simulate the model -out = model.sim(121, N=10, draw_function=draw_function, samples={}) +out = model.sim(121, N=10, draw_function=draw_function) ``` -**_NOTE_** Internally, a call to `draw_function` is made within the `sim()` function, where it is given the dictionary of model parameters and the input argument `samples` of the `sim()` function. - -**_NOTE:_** Technically, you can supply any input you'd like to a *draw function* by exploiting its `samples` dictionary input. Aside from its name and type, `samples` is not subject to any input checks. +**_NOTE_** Internally, a call to `draw_function` is made within the `sim()` function, where it is given the dictionary of model parameters `parameters`, followed by the `draw_function_kwargs`. ### Time-dependent parameter functions diff --git a/docs/references.md b/docs/references.md index 405cf19..ddfac48 100644 --- a/docs/references.md +++ b/docs/references.md @@ -6,6 +6,8 @@ In addition to the tutorials listed on the documentation website, we've used pyS - Alleman T.W., Vergeynst J., De Visscher L., Rollier M., Torfs E., Nopens I., Baetens J.M. (2021). Assessing the effects of non-pharmaceutical interventions on SARS-CoV-2 transmission in Belgium by means of an extended SEIQRD model and public mobility data. *Epidemics*, 37(9). https://doi.org/10.1016/j.epidem.2021.100505 -- Alleman T.W., Rollier M., Vergeynst J., Baetens J.M. (2022). A Mobility-Driven Spatially Explicit SEIQRD COVID-19 Model with VOCs, vaccines and seasonality. *Accepted for publication in Applied Mathematical Modeling*. https://doi.org/10.48550/ARXIV.2207.03717 +- Alleman T.W., Rollier M., Vergeynst J., Baetens J.M. (2023). A Mobility-Driven Spatially Explicit SEIQRD COVID-19 Model with VOCs, vaccines and seasonality. *Applied Mathematical Modeling*, 123, 507:525. https://doi.org/10.1016/j.apm.2023.06.027 -- Alleman T.W., Schoors K., Baetens J.M. (2023). Validating a dynamic input-output model for the propagation of supply and demand shocks during the COVID-19 pandemic in Belgium. *arXiv*. https://arxiv.org/abs/2305.16377 \ No newline at end of file +- Alleman T.W., Schoors K., Baetens J.M. (2023). Validating a dynamic input-output model for the propagation of supply and demand shocks during the COVID-19 pandemic in Belgium. *arXiv*. https://arxiv.org/abs/2305.16377 + +- Alleman T.W., Baetens J.M. (2024). Assessing the impact of forced and voluntary behavioral changes on economic-epidemiological co-dynamics: A comparative case study between Belgium and Sweden during the 2020 COVID-19 pandemic. *arXiv*. https://arxiv.org/abs/2401.08442 \ No newline at end of file diff --git a/docs/workflow.md b/docs/workflow.md index 33303b0..be8dc45 100644 --- a/docs/workflow.md +++ b/docs/workflow.md @@ -4,13 +4,13 @@ In this tutorial, we'll use the simple SIR disease model without dimensions from 1. Import dependencies 2. Load the dataset -3. Load/Define a model +3. Define a pySODM model 4. Initialize the model 5. Calibrate the model (PSO/NM + MCMC) 6. Visualize the goodness-of-fit 7. Perform a scenario analysis -By using a simple model, we can focus on the general workflow and on the most important functions of pySODM, which will be similar in the more research-driven [enzyme kinetics](enzyme_kinetics.md) and [Influenza](influenza_1718.md) case studies available on this documentation website. This tutorial can be reproduced using `~/tutorials/SIR/workflow_tutorial.py` +By using a simple model, we can focus on the general workflow and on the most important functions of pySODM, which will be similar in the more advanced [enzyme kinetics](enzyme_kinetics.md) and [Influenza](influenza_1718.md) case studies available on this documentation website. This tutorial can be reproduced by running `~/tutorials/SIR/workflow_tutorial.py` ### Import dependencies @@ -24,13 +24,13 @@ import matplotlib.pyplot as plt ### Load the dataset -For the purpose of this tutorial, we'll generate a sythetic dataset of disease cases. We'll accomplish this by assuming the disease is generating cases exponentially with a doubling time of 10 days. Alternatively, the output of the SIR model could be used to generate a dataset of disease cases. Mathematically, +For the purpose of this tutorial, we'll generate a sythetic dataset containing disease incidence. We assume the disease is generating cases exponentially with a doubling time of 10 days. Mathematically, ```{math} n_{cases}(t) = \exp \Big( t * \dfrac{\log 2}{t_d} \Big) ``` -We'll assume the first case was detected on December 1st, 2022 and data was collected on every weekday until December 21st, 2022. Then, we'll add observational noise to the synthetic data. For count based data, observational noise is typically the result of a poisson or negative binomial proces, depending on the occurence of overdispersion. For a poisson proces, the variance in the data is equal to the mean: {math}`\sigma^2 = \mu`, while for a negative binomial proces the mean-variance relationship is quadratic: {math}`\sigma^2 = \mu + \alpha \mu^2`. For this example we'll use the negative binomial distribution with a dispersion factor of `alpha=0.03`, which was representative for the data used during the COVID-19 pandemic in Belgium. Note that for {math}`\alpha=0`, the variance of the negative binomial distribution is equal to the variance of the poisson distribution. +We'll assume the first case was detected on December 1st, 2022 and data was collected on weekdays only until December 21st, 2022. Then, we'll add observational noise to the synthetic data. For count based data, observational noise is typically the result of a poisson or negative binomial proces, depending on the occurence of overdispersion. In case the observations result from a poisson proces, the variance in the data is equal to the mean: {math}`\sigma^2 = \mu`, while for a negative binomial proces the mean-variance relationship is quadratic: {math}`\sigma^2 = \mu + \alpha \mu^2`. For this example we'll use the negative binomial distribution with a dispersion factor of `alpha=0.03`, which was representative for the data used during the COVID-19 pandemic in Belgium. Note that for {math}`\alpha=0`, the variance of the negative binomial distribution is equal to the variance of the poisson distribution. ```python # Parameters @@ -44,7 +44,7 @@ y = np.random.negative_binomial(1/alpha, (1/alpha)/(np.exp(t*np.log(2)/t_d) + (1 d = pd.Series(index=dates, data=y, name='CASES') # Index name must be date for calibration to work d.index.name = 'date' -# Data collection only on weekdays +# Only retain weekdays d = d[d.index.dayofweek < 5] ``` @@ -86,16 +86,16 @@ N &=& S + I + R, \\ \end{eqnarray} ``` -The model has three states: 1) The number of individuals susceptible to the disease (S), 2) the number of infectious individuals (I), and 3) the number of removed individuals (R). The model has two parameters: 1) `beta`, the rate of transmission and, 2) `gamma`, the duration of infectiousness. Building a model is based on the class inheritance, the user must first load the `ODE` class from `~/src/models/base.by`. Then, the user must define his/her own class which must contain (minimally), -- A list containing the state names `states`, -- A list containing the parameter names `parameter_names`, -- An `integrate()` function where the differentials of the model are computed, +The model has three states: 1) The number of individuals susceptible to the disease (S), 2) the number of infectious individuals (I), and 3) the number of removed individuals (R). The model has two parameters: 1) `beta`, the rate of transmission and, 2) `gamma`, the duration of infectiousness. Building a pySODM model is based on class inheritance, you load the `ODE` class from `~/src/models/base.by`. Then, you define your own model class which must contain (minimally), +- A list containing the state names, named `states`, +- A list containing the parameter names, named `parameter_names`, +- A function named `integrate()` where you compute your model's differentials. -and take the `ODE` class as its input. Checkout the documentation of the ODE class [here](models.md). There are some important formatting requirements to the integrate function, which are verified when the model is initialized, +This class takes pySODM's `ODE` class as its input. The `ODE` class is recognizes the components of your custom model class which will aid in performing input checks. Checkout the documentation of the [ODE class](models.md) for an overview of the class methods. There are some important formatting requirements to the integrate function, which are verified when the model is initialized, 1. The integrate function must have the timestep `t` as its first input -2. The model states and parameters must also be given as input, their order does not make a difference +2. Followed by all model states and parameters, their order does not matter 3. The integrate function must return a differential for every model state, arranged in the same order as the state names defined in `states` -4. The integrate function must be a static method (include `@staticmethod`) +4. The integrate function must be a static method (done by decorating with `@staticmethod`) ```python # Import the ODE class @@ -104,7 +104,7 @@ from pySODM.models.base import ODE # Define the model equations class ODE_SIR(ODE): """ - Simple SIR model without dimensions + An SIR model """ states = ['S','I','R'] @@ -123,7 +123,7 @@ class ODE_SIR(ODE): return dS, dI, dR ``` -After defining our model, we'll initialize it by supplying a dictionary of initial states and a dictionary of model parameters. In our example, we'll assume the disease spreads in a relatively small population of 1000 individuals. At the start of the simulation we'll assume there is one "patient zero". We don't have to define the number of recovered individuals as undefined states are automatically set to zero by pySODM. +After defining our model, we'll initialize it by supplying a dictionary of initial states and a dictionary of model parameters. In this example, we'll assume the disease spreads in a relatively small population of 1000 individuals. At the start of the simulation we'll assume there is one "patient zero". There's no need to define the number of recovered individuals as undefined states are automatically set to zero by pySODM. ```python model = ODE_SIR(states={'S': 1000, 'I': 1}, parameters={'beta': 0.35, 'gamma': 5}) @@ -131,19 +131,21 @@ model = ODE_SIR(states={'S': 1000, 'I': 1}, parameters={'beta': 0.35, 'gamma': 5 ### Calibrating the model -#### The posterior probability +#### The posterior probability function -Before we can have our computer find a set of model parameters that aligns the model with the data, we must instruct it what deviations between the data and model prediction are tolerated. Such function is often referred to as an *objective function* or *likelihood function*. In all tutorials we will set up and attempt to maximize the *posterior probability* of our model's parameters in light of the data {math}`p(\theta | y_{\text{data}})`. It contrasts with the *likelihood function* {math}`p(y_{\text{data}} | \theta)`, which is the probability of the data given a fixed set of the model's parameter values. The two are related as follows by Bayes' theorem, +Before we can have our computer find a set of model parameters that aligns the model with the data, we must instruct it what deviations between the data and model prediction are tolerated. Such function is often referred to as an *objective function* or *likelihood function*. In all tutorials we will set up and attempt to maximize the *posterior probability* of our model's simulations in light of the data {math}`p(\tilde{x}(\theta) | x)`, which is mathematically defined as, +$$ p (\tilde{x}(\theta) | x) = \frac{p( x | \tilde{x}(\theta)) p(\theta)}{p(x)}, $$ +where {math}`x` represents the data, {math}`\theta` the model's parameters, and {math}`\tilde{x}(\theta)` a model simulation made using parameters {math}`\theta`. {math}`p(x | \tilde{x}(\theta))` is the *likelihood function*, {math}`p(\theta)` is the *prior probability* of the model parameters and contains any prior beliefs about the probability density distribution of the parameters {math}`\theta`. Finally, {math}`p(x)` is the probability of the data, which is used for normalization and can be ignored for all practical purposes. What is therefore important to remember is that the *posterior probability* is proportional to the product of the *likelihood* and the parameter *prior probability*. We'll maximize the logarithm of the *posterior probability*, which can be computed as the sum of the logarithm of the *prior probability* and the logarithm of the *likelihood*. Therefore, -$$ p (\theta | y_{\text{data}}) = \frac{p(y_{\text{data}} | \theta) p(\theta)}{p(y_{\text{data}})}. $$ +$$\log p (\tilde{x}(\theta) | x) \propto \log p( x | \tilde{x}(\theta)) + \log p(\theta)$$ -Here, {math}`p(y_{\text{data}})` is used for normalization and can be ignored for all practical purposes. {math}`p(\theta)` is referred to as the *prior probability* of the model parameters and contains any prior beliefs about the probability density distribution of the parameters {math}`\theta`. What is really important to remember is that the *posterior probability* {math}`p(\theta | y_{\text{data}})` is proportional to the product of the *likelihood* {math}`p(y_{\text{data}} | \theta)` and the parameter *prior probability* {math}`p(\theta)`. We'll maximize the logarithm of the *posterior probability*, computed as the sum of the logarithm of the *prior probability* and the logarithm of the *likelihood*. For an introduction to Bayesian inference, I recommend reading the following [article](https://towardsdatascience.com/a-gentle-introduction-to-bayesian-inference-6a7552e313cb). I also recommend going through the following tutorial of [emcee](https://emcee.readthedocs.io/en/stable/tutorials/line/). +Just remember: LOG POSTERIOR = LOG PRIOR + LOG LIKELIHOOD -Remember: LOG POSTERIOR = LOG PRIOR + LOG LIKELIHOOD +For an introduction to Bayesian inference, I recommend reading the following [article](https://towardsdatascience.com/a-gentle-introduction-to-bayesian-inference-6a7552e313cb). I also recommend going through the following tutorial of [emcee](https://emcee.readthedocs.io/en/stable/tutorials/line/). #### Choosing an appropriate prior function -For every calibrated model parameter we will need to provide a probability function expressing our prior believes with regard to the probability distribution of that parameter. pySODM includes uniform, normal, triangular, gamma and weibull priors, which can be imported as follows (reside in `~/src/pySODM/optimization/objective_functions.py`), +For every model parameter we'll try to calibrate, we'll need to provide a probability function expressing our prior believes with regard to the probability distribution of that parameter. pySODM includes uniform, normal, triangular, gamma and weibull priors, which can be imported as follows (reside in `~/src/pySODM/optimization/objective_functions.py`), ``` from pySODM.optimization.objective_functions import log_prior_uniform, log_prior_normal, log_prior_triangle, log_prior_gamma, log_prior_weibull, log_prior_custom @@ -153,15 +155,15 @@ For most problems, uniform prior probabilities, which simply constraint the para #### Choosing an appropriate likelihood function -The next step is to choose an appropriate log likelihood function {math}` \log [ p(y_{\text{data}} | \theta) ]`. The log likelihood function is a function that describes the magnitude of the error when model prediction and data deviate. The bread and butter log likelihood function is the sum of squared errors (SSE), +The next step is to choose an appropriate log likelihood function. The log likelihood function is a function that describes the magnitude of the error when model prediction and data deviate. The bread and butter log likelihood function is the sum of squared errors (SSE), ```{math} SSE = \sum_i (y_{data,i} - y_{model,i})^2, ``` -this is actually a simplified case of the following Guassian log likelihood function, +which is actually a simplified case of the following Guassian log likelihood function, ```{math} \log \big[ p(y_{data} | y_{model}, \sigma) \big] = - \frac{1}{2} \sum_i \Bigg[ \frac{(y_{data,i} - y_{model,i})^2}{\sigma_i^2} + \log (2 \pi \sigma_i^2) \Bigg]. ``` -from the above equations we can deduce that the SSEs's use is only appropriate when the error on all datapoints are the same (i.e. {math}`\sigma_i^2 = 1`). If the errors ({math}`\sigma_i`) of all datapoints ({math}`y_{data,i}`) are known, then the Gaussian log likelihood function is the most appropriate objective function. When the error of the datapoints are unknown, we must analyze the mean-variance relationship in our dataset to choose the appropriate likelihood function or make an assumption. For epidemiological case data, dispersion tends to grow with the magnitude of the data and only one datapoint is available per day (so no error is readily available). In that case, pySODM's [`variance_analysis()` function](optimization.md) includes the necessary tools to approximate the mean-variance relationship in a dataset of counts. By dividing the dataset in discrete windows and comparing an exponential moving average to the data, mean-variance couples can be approximated. Then, the appropriate likelihood function can be found by fitting the following candidate models, +from the above equations we can deduce that the SSEs's use is only appropriate when the error on all datapoints are the same (i.e. {math}`\sigma_i^2 = 1`). If the errors ({math}`\sigma_i`) of all datapoints ({math}`y_{data,i}`) are known, then the Gaussian log likelihood function is the most appropriate objective function. When the error of the datapoints are unknown, we can analyze the mean-variance relationship in our dataset to choose the appropriate likelihood function or make an assumption. For epidemiological case data, dispersion tends to grow with the magnitude of the data and only one datapoint is available per day (so no error is readily available). In that case, pySODM's [`variance_analysis()` function](optimization.md) includes the necessary tools to approximate the mean-variance relationship in a dataset of counts. By dividing the dataset in discrete windows and comparing an exponential moving average to the data, mean-variance couples can be approximated. Then, the appropriate likelihood function can be found by fitting the following candidate models, | Mean-Variance model | Relationship | |------------------------------|--------------------------------------------| @@ -343,28 +345,30 @@ plt.close() #### Goodness-of-fit: draw functions -Next, let's visualize how well our simple SIR model fits the data. To this end, we'll simulate the model a number of times, and at each time we'll update the value of `beta` with a sample drawn by the MCMC. Then, assuming our model is an adequate representation of the modeled proces, we'll add observational noise to each model trajectory using `add_negative_binomial()`. Finally, we'll visualize the individual model trajectories and the data to asses the goodness-of-fit. +Next, let's visualize how well our simple SIR model fits the data. To this end, we'll simulate the model a number of times, and we'll update the value of `beta` with a sample from its posterior probability distribution obtained using the MCMC. Then, we'll add the noise introduced by observing the epidemiological process to each simulated model trajectory using `add_negative_binomial()`. Finally, we'll visualize the individual model trajectories and the data to asses the goodness-of-fit. -To repeatedly simulate a model and update a parameter value in each consecutive run, we'll use what is called a *draw function*. This function always takes two input arguments, the model parameters dictionary `parameters` and a dictionary containing samples `samples`, and must always return the model parameters dictionary `parameters` (unaltered dictionary keys). The draw function defines how the samples dictionary can be used to update the model parameters dictionary. In our case, we'll use `np.random.choice()` to sample a random value of `beta` from the dictionary of samples and assign it to the model parameteres dictionary, as follows, +To repeatedly simulate a model and update a parameter value in each consecutive run, you can use pySODM's *draw function*. These functions always takes `parameters` as its first input argument, representing the dictionary of model parameters. This can then be followed by an arbitrary number of user-defined parameters to aid in the draw function. A draw function must always return the model parameters dictionary `parameters`, without alterations to the dictionaries keys. The draw function defines how values of parameters can change during consecutive simulations of the model, i.e. it updates parameter values in the model parameters dictionary. This feature is useful to perform sensitivty analysis. + +In this example, we'll use a draw function to replace `beta` with a random value obtained from its posterior probability distribution obtained during calibration. We accomplish this by defining a draw function with one additional argument, `samples`, which is a list containing the samples of the posterior probability distribution of `beta`. `np.random.choice()` is used to sample a random value of `beta` and assign it to the model parameteres dictionary, ```python # Define draw function def draw_fcn(parameters, samples): - parameters['beta'] = np.random.choice(np.array(samples['beta'])) + parameters['beta'] = np.random.choice(np.array(samples)) return parameters ``` -Then, we'll provide four additional arguments to `sim()`, +To use this draw function, you provide four additional arguments to the `sim()` function, 1. `N`: the number of repeated simulations, -2. `samples`: the dictionary containing the samples of `beta` generated with the MCMC, -3. `draw_fcn`: our defined draw function, +2. `draw_function`: the draw function, +2. `draw_function_kwargs`: a dictionary containing all parameters of the draw function not equal to `parameters` (passed internally by the `sim()` function). 4. `processes`: the number of cores to divide the `N` simulations over. As demonstrated in the quickstart example, the `xarray` containing the model output will now contain an additional dimension to accomodate the repeated simulations: `draws`. ```python # Simulate model -out = model.sim([d.index.min(), d.index.max()+pd.Timedelta(days=28)], N=50, samples=samples_dict, draw_fcn=draw_fcn, processes=processes) +out = model.sim([d.index.min(), d.index.max()+pd.Timedelta(days=28)], N=50, draw_function=draw_fcn, draw_function_kwargs={'samples': samples_dict['beta']}, processes=processes) # Add negative binomial observation noise out = add_negative_binomial_noise(out, dispersion) # Visualize result @@ -378,7 +382,7 @@ plt.show() plt.close() ``` -Our simple SIR model seems to fit the data very well. From the projection we can deduce that the peak number of infectees will fall around February 7th, 2023 and there will be between 30-50 infectees. +Our simple SIR model fits the data well. From the projection we can deduce that the peak number of infectees will fall around February 7th, 2023 and there will be between 30-50 infectees. ![MCMC](/_static/figs/workflow/MCMC_fit.png) @@ -397,9 +401,9 @@ def lower_infectivity(t, states, param, start_measures): return 0.50*param ``` -This simple function reduces the parameter `param` with a factor two if the simulation date is larger than `start_measures`. A time-dependent model parameter function must always have the arguments `t` (simulation timestep), `states` (dictionary of model states) and `param` (value of the parameter the function is applied to) as inputs. In addition, the user can supply additional arguments to the function, which need to be added to the dictionary of model parameters. +This simple function reduces the parameter `param` with a factor two if the simulation date is larger than `start_measures`. A time-dependent model parameter function must always have the arguments `t` (simulation timestep), `states` (dictionary of model states) and `param` (original value of the parameter the function is applied to) as inputs. In addition, the user can supply additional arguments to the function, which need to be added to the dictionary of model parameters when the model is initialised. -We will apply this function to the infectivity parameter `beta` by using the `time_dependent_parameters` argument of `sim()`. +We will apply this function to the infectivity parameter `beta`, and declare this using the `time_dependent_parameters` argument when initialising the model. ```python # Attach the additional arguments of the time-depenent function to the parameter dictionary model.parameters.update({'start_measures': '2023-01-21'}) @@ -413,7 +417,7 @@ Next, we compare the simulations with and without the use of our time-dependent ```python # Simulate the model -out_with = model_with.sim([d.index.min(), d.index.max()+pd.Timedelta(days=2*28)], N=50, samples=samples_dict, draw_fcn=draw_fcn, processes=processes) +out_with = model_with.sim([d.index.min(), d.index.max()+pd.Timedelta(days=2*28)], N=50, draw_function=draw_fcn, draw_function_kwargs={'samples': samples_dict['beta']} processes=processes) # Add negative binomial observation noise out_with = add_negative_binomial_noise(out_with, alpha) @@ -437,30 +441,31 @@ As we could reasonably expect, if we could implement some form of preventive mea ![scenario](/_static/figs/workflow/scenario.png) However, it is unlikely that people would adopt these preventive measures instantly. Adoptation of measures is more gradual in the real world. We could tackle this problem in two ways using pySODM, -- Implementing a ramp function to lower the infectivity in our time-dependent model parameter function, +- Implementing a ramp function to gradually lower the infectivity in our time-dependent model parameter function, - Using *draw functions* to sample `start_measures` stochastically in every simulation. We'll demonstrate this option. -To simulate ramp-like adoptation of measures, we can add the number of additional days it takes beyond January 21st, 2023 to adopt the measures by sampling from a triangular distribution with a minimum and mode of zero days, and a maximum adoptation time of 21 days. All we have to do is add one line to the draw function, +To simulate ramp-like adoptation of measures, we can add the number of additional days it takes beyond January 21st, 2023 to adopt the measures by sampling from a triangular distribution with a minimum and mode of zero days, and a maximum adoptation time of `ramp_length` days. All we have to do is add one line to the draw function, ```python # Define draw function -def draw_fcn(parameters, samples): - parameters['beta'] = np.random.choice(samples['beta']) - parameters['start_measures'] += pd.Timedelta(days=np.random.triangular(left=0,mode=0,right=21)) +def draw_fcn(parameters, samples, ramp_length): + parameters['beta'] = np.random.choice(samples) + parameters['start_measures'] += pd.Timedelta(days=np.random.triangular(left=0,mode=0,right=ramp_length)) return parameters ``` -Gradually adopting the preventive measures results in a more realistic simulation, +Don't forget to add the new parameter `ramp_length` to the dictionary of `draw_function_kwargs` when simulating the model! Gradually adopting the preventive measures results in a more realistic simulation, ![scenario_gradual](/_static/figs/workflow/scenario_gradual.png) ### Conclusion -I hope this tutorial has demonstrated the workflow pySODM can speedup. However, both our model and dataset had no labeled dimensions so this example was very rudimentary. pySODM allows to tackle much more convoluted problems across different fields. However, the basic syntax of our workflow remains practically unchanged. I illustrate this with the following tutorials, +I hope this tutorial has demonstrated the workflow pySODM can speedup. However, both our model and dataset had no labeled dimensions (0-dimensional) so this example was very rudimentary. However, pySODM allows to tackle higher-dimensional problems using the same basic syntax. This allows to tackle complex problems in different scientific disciplines, I illustrate this with the following tutorials, | Case study | Features | |------------------------------------------------|---------------------------------------------------------------------------------------------------| -| Enzyme kinetics: intrinsic kinetics | Dimensionless ODE model. Calibration to multiple datasets with changing initial conditions. | -| Enzyme kinetics: 1D Plug-Flow Reactor | Use the method of lines to discretise a PDE model into an ODE model with one dimension. | -| Influenza 2017-2018 | stochastic jump process model with one dimension. Calibration of a 1D model parameters on a 1D dataset. | -| SIR-SI Model (see `~/tutorials/SIR_SI/`) | ODE model where states have different dimensions and thus different shapes. | \ No newline at end of file +| Enzyme kinetics: intrinsic kinetics | Calibration of a model to multiple datasets with changing initial conditions. | +| Enzyme kinetics: 1D Plug-Flow Reactor | Use the method of lines to discretise a PDE model into an ODE model and simulate it using pySODM. | +| Influenza 2017-2018 | Stochastic jump process epidemiological model with age groups (1D model). Calibration of a 1D model parameter to a 1D dataset. | +| SIR-SI Model | ODE model where states have different dimensions and thus different shapes. | +| Spatial SIR model | (Coming soon) An epidemiological model with age and space stratification (2D model). | \ No newline at end of file diff --git a/src/pySODM/models/base.py b/src/pySODM/models/base.py index 2891634..9fc8f07 100644 --- a/src/pySODM/models/base.py +++ b/src/pySODM/models/base.py @@ -378,11 +378,11 @@ def _mp_sim_single(self, drawn_parameters, time, actual_start_date, method, tau, self.parameters.update(drawn_parameters) return self._sim_single(time, actual_start_date, method, tau, output_timestep) - def sim(self, time, warmup=0, N=1, draw_function=None, samples=None, processes=None, method='tau_leap', tau=0.5, output_timestep=1): + def sim(self, time, warmup=0, N=1, draw_function=None, draw_function_kwargs={}, processes=None, method='tau_leap', tau=0.5, output_timestep=1): """ - Run a model simulation for the given time period. Can optionally perform N repeated simulations of time days. - Can change the values of model parameters at every repeated simulation by drawing samples from a dictionary `samples` using a function `draw_function` + Simulate a model during a given time period. + Can optionally perform `N` repeated simulations with sampling of model parameters using a function `draw_function`. Parameters ---------- @@ -400,12 +400,11 @@ def sim(self, time, warmup=0, N=1, draw_function=None, samples=None, processes=N Number of repeated simulations (default: 1) draw_function : function - A function with as obligatory inputs the dictionary of model parameters 'parameters' and a dictionary of parameter samples 'samples' (can actually contain whatever you'd like as long as its a dict) - The function can alter parameters in the model parameters dictionary between repeated simulations `N`. - Usefull to propagate uncertainty, perform sensitivity analysis. + A function altering parameters in the model parameters dictionary between consecutive simulations, usefull to propagate uncertainty, perform sensitivity analysis + Has the dictionary of model parameters ('parameters') as its first obligatory input, followed by a variable number of additional inputs - samples : dictionary - Sample dictionary used by `draw_function`. + draw_function_kwargs : dictionary + A dictionary containing the input arguments of the draw function, excluding its obligatory input 'parameters' processes: int Number of cores to distribute the `N` draws over @@ -436,13 +435,17 @@ def sim(self, time, warmup=0, N=1, draw_function=None, samples=None, processes=N raise TypeError( "discrete timestep 'tau' must be of type int or float" ) - # Input checks on supplied simulation time time, actual_start_date = validate_simulation_time(time, warmup) - - # Input check on draw function + # Input checks related to draw functions if draw_function: - validate_draw_function(draw_function, self.parameters, samples) + # validate function + validate_draw_function(draw_function, draw_function_kwargs, self.parameters) + # function provided but N=1 --> user likely forgot 'N' + if N == 1: + raise ValueError( + "you specified a draw function but N=1, have you forgotten 'N'?" + ) # Copy parameter dictionary --> dict is global cp = copy.deepcopy(self.parameters) @@ -452,7 +455,7 @@ def sim(self, time, warmup=0, N=1, draw_function=None, samples=None, processes=N cp_draws=copy.deepcopy(self.parameters) if draw_function: out={} # Need because of global dictionaries and voodoo magic - out.update(draw_function(self.parameters,samples)) + out.update(draw_function(self.parameters,**draw_function_kwargs)) drawn_dictionaries.append(out) else: drawn_dictionaries.append({}) @@ -681,9 +684,11 @@ def _mp_sim_single(self, drawn_parameters, time, actual_start_date, method, rtol out = self._sim_single(time, actual_start_date, method, rtol, output_timestep, tau) return out - def sim(self, time, warmup=0, N=1, draw_function=None, samples=None, processes=None, method='RK23', rtol=1e-3, tau=None, output_timestep=1): + def sim(self, time, warmup=0, N=1, draw_function=None, draw_function_kwargs={}, processes=None, method='RK23', rtol=1e-3, tau=None, output_timestep=1): """ - Run a model simulation for a given time period. Can optionally perform `N` repeated simulations with sampling of model parameters from a dictionary `samples` using a function `draw_function`. Can perform discrete timestepping if `tau != None`. + Simulate a model during a given time period. + Can optionally perform `N` repeated simulations with sampling of model parameters using a function `draw_function`. + Can perform discrete timestepping by specifying `tau != None`. Parameters ---------- @@ -701,12 +706,11 @@ def sim(self, time, warmup=0, N=1, draw_function=None, samples=None, processes=N Number of repeated simulations (default: 1) draw_function : function - A function with as obligatory inputs the dictionary of model parameters and a dictionary of samples - The function can alter parameters in the model parameters dictionary between repeated simulations `N`. - Usefull to propagate uncertainty, perform sensitivity analysis. + A function altering parameters in the model parameters dictionary between consecutive simulations, usefull to propagate uncertainty, perform sensitivity analysis + Has the dictionary of model parameters ('parameters') as its first obligatory input, followed by a variable number of additional inputs - samples : dictionary - Sample dictionary used by `draw_function`. + draw_function_kwargs : dictionary + A dictionary containing the input arguments of the draw function, excluding its obligatory input 'parameters' processes: int Number of cores to distribute the `N` draws over. @@ -745,27 +749,30 @@ def sim(self, time, warmup=0, N=1, draw_function=None, samples=None, processes=N raise TypeError( "discrete timestep 'tau' must be of type int or float" ) - # Input checks on supplied simulation time time, actual_start_date = validate_simulation_time(time, warmup) - - # Input check on draw function + # Input checks related to draw functions if draw_function: - validate_draw_function(draw_function, self.parameters, samples) - - # Provinding 'N' but no draw function: wastefull + # validate function + validate_draw_function(draw_function, draw_function_kwargs, self.parameters) + # function provided but N=1 --> user likely forgot 'N' + if N == 1: + raise ValueError( + "you specified a draw function but N=1, have you forgotten 'N'?" + ) + # provinding 'N' but no draw function: wasteful of resources if ((N != 1) & (draw_function==None)): - raise ValueError('performing N={0} repeated simulations without using a `draw_function` is wastefull of computational resources'.format(N)) + raise ValueError('attempting to perform N={0} repeated simulations without using a draw function'.format(N)) + # Construct list of drawn dictionaries # Copy parameter dictionary --> dict is global cp = copy.deepcopy(self.parameters) - # Construct list of drawn dictionaries drawn_dictionaries=[] for n in range(N): cp_draws=copy.deepcopy(self.parameters) if draw_function: out={} # Need because of global dictionaries and voodoo magic - out.update(draw_function(self.parameters,samples)) + out.update(draw_function(self.parameters,**draw_function_kwargs)) drawn_dictionaries.append(out) else: drawn_dictionaries.append({}) diff --git a/src/pySODM/models/validation.py b/src/pySODM/models/validation.py index bc21341..ecb487b 100644 --- a/src/pySODM/models/validation.py +++ b/src/pySODM/models/validation.py @@ -27,7 +27,7 @@ def validate_simulation_time(time, warmup): if all([isinstance(item, (int,float,np.int32,np.float32,np.int64,np.float64)) for item in time]): time = [round(item) for item in time] time[0] -= warmup - # If they are all timestamps + # If they are all datetime elif all([isinstance(item, datetime) for item in time]): actual_start_date = time[0] - timedelta(days=warmup) time = [0, date_to_diff(actual_start_date, time[1])] @@ -60,22 +60,22 @@ def validate_simulation_time(time, warmup): ) return time, actual_start_date -def validate_draw_function(draw_function, parameters, samples): +def validate_draw_function(draw_function, draw_function_kwargs, parameters): """Validates the draw function's input and output. For use in the sim() functions of the ODE and JumpProcess classes (base.py). input ----- draw_function: function - a function used to alter the values of model parameters between consecutive simulations + a function altering parameters in the model parameters dictionary between consecutive simulations + has the dictionary of model parameters ('parameters') as its first obligatory input, followed by a variable number of inputs + + draw_function_kwargs: dict + a dictionary containing the aforementioned additional inputs to the 'draw_function' parameters: dict the dictionary of model parameters - samples: dict - a dictionary containing samples of model parameters; dictionary can technically contain anything you'd like - # TODO: perhaps change to more generic 'draw_function_kwargs' - output ------ @@ -83,35 +83,46 @@ def validate_draw_function(draw_function, parameters, samples): an updated dictionary of model parameters """ - # check if it is a function + # check that the draw_function is a function if not callable(draw_function): raise TypeError( f"a 'draw function' must be callable (a function)" ) - # verify that names of draw function input args are 'parameters' and 'samples - sig = inspect.signature(draw_function) - keywords = list(sig.parameters.keys()) - if len(keywords) != 2: + # check that it's first argument is named 'parameters' + args = list(inspect.signature(draw_function).parameters.keys()) + if args[0] != "parameters": raise ValueError( - f"Your draw function '{draw_function.__name__}' must have 'parameters' and 'samples' as the names of its inputs. Its current inputs are named: {keywords}" + f"your draw function '{draw_function.__name__}' must have 'parameters' as its first input. Its current inputs are: {args}" + ) + # check that draw_function_kwargs is a 'dict' + if not isinstance(draw_function_kwargs, dict): + raise TypeError( + f"your draw function kwargs must be of type 'dict' but are of type {type(draw_function_kwargs)}" ) - else: - if (keywords[0] != "parameters") | (keywords[1] != "samples"): - raise ValueError( - f"Your draw function '{draw_function.__name__}' must have 'parameters' and 'samples' as the names of its inputs. Its current inputs are named: {keywords}" + # if draw_functions_kwargs is an empty dict and draw_function has additional kwargs the user has most likely forgotten to pass draw_function_kwargs to the sim() function + if ((len(args[1:]) > 0) & (len(list(draw_function_kwargs.keys())) == 0)): + raise ValueError( + f"the draw function '{draw_function.__name__}' has {len(args[1:])} arguments in addition to the mandatory 'parameters' argument\n" + f"have you forgotten to pass 'draw_function_kwargs' to the sim() function?" + ) + # check that it's keys have the same name as the inputs of draw_function that follow 'parameters' + if set(args[1:]) != set(list(draw_function_kwargs.keys())): + raise ValueError( + f"incorrect arguments passed to draw function '{draw_function.__name__}'\n" + "keys missing in 'draw_function_kwargs': {0}. redundant keys: {1}".format(set(args[1:]).difference(list(draw_function_kwargs.keys())), set(list(draw_function_kwargs.keys())).difference(set(args[1:]))) ) # call draw function and check its outputs cp_draws=copy.deepcopy(parameters) - d = draw_function(parameters, samples) + d = draw_function(parameters, **draw_function_kwargs) parameters = cp_draws if not isinstance(d, dict): raise TypeError( - f"A draw function must return a dictionary. Found type {type(d)}" + f"a draw function must return a dictionary. found type {type(d)}" ) if set(d.keys()) != set(parameters.keys()): raise ValueError( - f"Keys in output dictionary of draw function '{draw_function.__name__}' must match the keys in input dictionary 'parameters'.\n" - "Missing keys in output dict: {0}. Redundant keys in output dict: {1}".format(set(parameters.keys()).difference(set(d.keys())), set(d.keys()).difference(set(parameters.keys()))) + f"keys in output dictionary of draw function '{draw_function.__name__}' must match the keys in input dictionary 'parameters'.\n" + "keys missing in draw function output: {0}. redundant keys: {1}".format(set(parameters.keys()).difference(set(d.keys())), set(d.keys()).difference(set(parameters.keys()))) ) def fill_initial_state_with_zero(state_names, initial_states): diff --git a/src/tests/test_JumpProcess.py b/src/tests/test_JumpProcess.py index 00748d3..2ab405b 100644 --- a/src/tests/test_JumpProcess.py +++ b/src/tests/test_JumpProcess.py @@ -503,19 +503,33 @@ def compliance_func(t, states, param): #################### def test_draw_function(): + """ + identical to the ODE variant + except for the check on having no draw_function when N>1 --> not wastefull of resources in a stochastic model + """ # states, parameters, coordinates parameters = {"gamma": 0.2, "beta": np.array([0.8, 0.9])} initial_states = {"S": [600_000 - 20, 400_000 - 10], "I": [20, 10]} coordinates = {'age_groups': ['0-20','20-120']} - # correct draw function - def draw_function(parameters, samples): + # correct draw function without additional arguments + def draw_function(parameters): return parameters # simulate model time = [0, 10] model = SIRstratified(initial_states, parameters, coordinates=coordinates) - output = model.sim(time, draw_function=draw_function, samples={}, N=5) + output = model.sim(time, draw_function=draw_function, N=5) + # assert dimension 'draws' is present in output + assert 'draws' in list(output.sizes.keys()) + + # correct draw function with additional arguments + def draw_function(parameters, par_1, par_2): + return parameters + # simulate model + time = [0, 10] + model = SIRstratified(initial_states, parameters, coordinates=coordinates) + output = model.sim(time, draw_function=draw_function, draw_function_kwargs={'par_1': 0, 'par_2': 0}, N=5) # assert dimension 'draws' is present in output assert 'draws' in list(output.sizes.keys()) @@ -523,41 +537,78 @@ def draw_function(parameters, samples): time = [0, 10] model = SIRstratified(initial_states, parameters, coordinates=coordinates) with pytest.raises(TypeError, match="a 'draw function' must be callable"): - model.sim(time, draw_function='bliblablu', samples={}, N=5) + model.sim(time, draw_function='bliblablu', N=5) + + # correct draw function but too few arguments provided in draw_function_kwargs + def draw_function(parameters, par_1, par_2): + return parameters + # simulate model + time = [0, 10] + model = SIRstratified(initial_states, parameters, coordinates=coordinates) + with pytest.raises(ValueError, match="incorrect arguments passed to draw function"): + model.sim(time, draw_function=draw_function, draw_function_kwargs={'par_1': 0}, N=5) - # wrong draw function: not enough input arguments + # correct draw function but too much arguments in draw_function_kwargs def draw_function(parameters): return parameters # simulate model time = [0, 10] model = SIRstratified(initial_states, parameters, coordinates=coordinates) - with pytest.raises(ValueError, match="Your draw function 'draw_function' must have 'parameters' and 'samples' as the names of its inputs."): - model.sim(time, draw_function=draw_function, samples={}, N=5) + with pytest.raises(ValueError, match="incorrect arguments passed to draw function"): + model.sim(time, draw_function=draw_function, draw_function_kwargs={'par_1': 0}, N=5) - # wrong draw function: input arguments switched - def draw_function(samples, parameters): + # correct draw function with extra args but user forgets to provide draw_function_kwargs to sim() + def draw_function(parameters, par_1, par_2): return parameters # simulate model time = [0, 10] model = SIRstratified(initial_states, parameters, coordinates=coordinates) - with pytest.raises(ValueError, match="Your draw function 'draw_function' must have 'parameters' and 'samples' as the names of its inputs."): - model.sim(time, draw_function=draw_function, samples={}, N=5) - - # wrong draw function: too many input arguments - def draw_function(parameters, samples, blublabliblu): + with pytest.raises(ValueError, match="the draw function 'draw_function' has 2 arguments in addition to the mandatory 'parameters' argument"): + model.sim(time, draw_function=draw_function, N=5) + + # wrong draw function: first input argument is not 'parameters' + def draw_function(par_1, parameters): return parameters # simulate model time = [0, 10] model = SIRstratified(initial_states, parameters, coordinates=coordinates) - with pytest.raises(ValueError, match="Your draw function 'draw_function' must have 'parameters' and 'samples' as the names of its inputs."): - model.sim(time, draw_function=draw_function, samples={}, N=5) + with pytest.raises(ValueError, match="must have 'parameters' as its first input. Its current inputs are"): + model.sim(time, draw_function=draw_function, draw_function_kwargs={'par_1': 0}, N=5) + + # wrong draw function: return a scalar + def draw_function(parameters): + return 5 + # simulate model + time = [0, 10] + model = SIRstratified(initial_states, parameters, coordinates=coordinates) + with pytest.raises(TypeError, match="a draw function must return a dictionary. found type"): + model.sim(time, draw_function=draw_function, N=5) # wrong draw function: put a new keys in model parameters dictionary that doesn't represent a model parameter - def draw_function(parameters, samples): + def draw_function(parameters): parameters['bliblublo'] = 5 return parameters # simulate model time = [0, 10] model = SIRstratified(initial_states, parameters, coordinates=coordinates) - with pytest.raises(ValueError, match="Keys in output dictionary of draw function 'draw_function' must match the keys in input dictionary 'parameters'."): - model.sim(time, draw_function=draw_function, samples={}, N=5) \ No newline at end of file + with pytest.raises(ValueError, match="keys in output dictionary of draw function 'draw_function' must match the keys in input dictionary 'parameters'."): + model.sim(time, draw_function=draw_function, N=5) + + # correct draw function but user does not provide N + def draw_function(parameters): + return parameters + # simulate model + time = [0, 10] + model = SIRstratified(initial_states, parameters, coordinates=coordinates) + with pytest.raises(ValueError, match="you specified a draw function but N=1, have you forgotten 'N'"): + model.sim(time, draw_function=draw_function) + + # user provides N but no draw function (differs from ODE) + output = model.sim(time, N=100) + # assert dimension 'draws' is present in output + assert 'draws' in list(output.sizes.keys()) + + # or + output = model.sim(time, draw_function_kwargs={'arg_1': 0}, N=100) + # assert dimension 'draws' is present in output + assert 'draws' in list(output.sizes.keys()) \ No newline at end of file diff --git a/src/tests/test_ODE.py b/src/tests/test_ODE.py index aa3e69c..eb27ef9 100644 --- a/src/tests/test_ODE.py +++ b/src/tests/test_ODE.py @@ -490,13 +490,23 @@ def test_draw_function(): initial_states = {"S": [600_000 - 20, 400_000 - 10], "I": [20, 10]} coordinates = {'age_groups': ['0-20','20-120']} - # correct draw function - def draw_function(parameters, samples): + # correct draw function without additional arguments + def draw_function(parameters): + return parameters + # simulate model + time = [0, 10] + model = SIRstratified(initial_states, parameters, coordinates=coordinates) + output = model.sim(time, draw_function=draw_function, N=5) + # assert dimension 'draws' is present in output + assert 'draws' in list(output.sizes.keys()) + + # correct draw function with additional arguments + def draw_function(parameters, par_1, par_2): return parameters # simulate model time = [0, 10] model = SIRstratified(initial_states, parameters, coordinates=coordinates) - output = model.sim(time, draw_function=draw_function, samples={}, N=5) + output = model.sim(time, draw_function=draw_function, draw_function_kwargs={'par_1': 0, 'par_2': 0}, N=5) # assert dimension 'draws' is present in output assert 'draws' in list(output.sizes.keys()) @@ -504,41 +514,76 @@ def draw_function(parameters, samples): time = [0, 10] model = SIRstratified(initial_states, parameters, coordinates=coordinates) with pytest.raises(TypeError, match="a 'draw function' must be callable"): - model.sim(time, draw_function='bliblablu', samples={}, N=5) + model.sim(time, draw_function='bliblablu', N=5) - # wrong draw function: not enough input arguments + # correct draw function but too few arguments provided in draw_function_kwargs + def draw_function(parameters, par_1, par_2): + return parameters + # simulate model + time = [0, 10] + model = SIRstratified(initial_states, parameters, coordinates=coordinates) + with pytest.raises(ValueError, match="incorrect arguments passed to draw function"): + model.sim(time, draw_function=draw_function, draw_function_kwargs={'par_1': 0}, N=5) + + # correct draw function but too much arguments in draw_function_kwargs def draw_function(parameters): return parameters # simulate model time = [0, 10] model = SIRstratified(initial_states, parameters, coordinates=coordinates) - with pytest.raises(ValueError, match="Your draw function 'draw_function' must have 'parameters' and 'samples' as the names of its inputs."): - model.sim(time, draw_function=draw_function, samples={}, N=5) + with pytest.raises(ValueError, match="incorrect arguments passed to draw function"): + model.sim(time, draw_function=draw_function, draw_function_kwargs={'par_1': 0}, N=5) - # wrong draw function: input arguments switched - def draw_function(samples, parameters): + # correct draw function with extra args but user forgets to provide draw_function_kwargs to sim() + def draw_function(parameters, par_1, par_2): return parameters # simulate model time = [0, 10] model = SIRstratified(initial_states, parameters, coordinates=coordinates) - with pytest.raises(ValueError, match="Your draw function 'draw_function' must have 'parameters' and 'samples' as the names of its inputs."): - model.sim(time, draw_function=draw_function, samples={}, N=5) - - # wrong draw function: too many input arguments - def draw_function(parameters, samples, blublabliblu): + with pytest.raises(ValueError, match="the draw function 'draw_function' has 2 arguments in addition to the mandatory 'parameters' argument"): + model.sim(time, draw_function=draw_function, N=5) + + # wrong draw function: first input argument is not 'parameters' + def draw_function(par_1, parameters): return parameters # simulate model time = [0, 10] model = SIRstratified(initial_states, parameters, coordinates=coordinates) - with pytest.raises(ValueError, match="Your draw function 'draw_function' must have 'parameters' and 'samples' as the names of its inputs."): - model.sim(time, draw_function=draw_function, samples={}, N=5) + with pytest.raises(ValueError, match="must have 'parameters' as its first input. Its current inputs are"): + model.sim(time, draw_function=draw_function, draw_function_kwargs={'par_1': 0}, N=5) + + # wrong draw function: return a scalar + def draw_function(parameters): + return 5 + # simulate model + time = [0, 10] + model = SIRstratified(initial_states, parameters, coordinates=coordinates) + with pytest.raises(TypeError, match="a draw function must return a dictionary. found type"): + model.sim(time, draw_function=draw_function, N=5) # wrong draw function: put a new keys in model parameters dictionary that doesn't represent a model parameter - def draw_function(parameters, samples): + def draw_function(parameters): parameters['bliblublo'] = 5 return parameters # simulate model time = [0, 10] model = SIRstratified(initial_states, parameters, coordinates=coordinates) - with pytest.raises(ValueError, match="Keys in output dictionary of draw function 'draw_function' must match the keys in input dictionary 'parameters'."): - model.sim(time, draw_function=draw_function, samples={}, N=5) + with pytest.raises(ValueError, match="keys in output dictionary of draw function 'draw_function' must match the keys in input dictionary 'parameters'."): + model.sim(time, draw_function=draw_function, N=5) + + # correct draw function but user does not provide N + def draw_function(parameters): + return parameters + # simulate model + time = [0, 10] + model = SIRstratified(initial_states, parameters, coordinates=coordinates) + with pytest.raises(ValueError, match="you specified a draw function but N=1, have you forgotten 'N'"): + model.sim(time, draw_function=draw_function) + + # user provides N but no draw function + with pytest.raises(ValueError, match="attempting to perform N=100 repeated simulations without using a draw function"): + model.sim(time, N=100) + + # or + with pytest.raises(ValueError, match="attempting to perform N=100 repeated simulations without using a draw function"): + model.sim(time, draw_function_kwargs={'arg_1': 0}, N=100) \ No newline at end of file diff --git a/tutorials/SIR/workflow_tutorial.py b/tutorials/SIR/workflow_tutorial.py index 2113b1a..047f0f4 100644 --- a/tutorials/SIR/workflow_tutorial.py +++ b/tutorials/SIR/workflow_tutorial.py @@ -48,7 +48,7 @@ # Define the model equations class ODE_SIR(ODE): """ - Simple SIR model without dimensions + An SIR model """ states = ['S','I','R'] @@ -172,10 +172,12 @@ def integrate(t, S, I, R, beta, gamma): # Define draw function def draw_fcn(parameters, samples): - parameters['beta'] = np.random.choice(samples['beta']) + parameters['beta'] = np.random.choice(samples) return parameters + # Simulate model - out = model.sim([start_date, end_date+pd.Timedelta(days=2*28)], N=100, samples=samples_dict, draw_function=draw_fcn, processes=processes) + out = model.sim([start_date, end_date+pd.Timedelta(days=2*28)], N=100, draw_function=draw_fcn, + draw_function_kwargs={'samples': samples_dict['beta']}, processes=processes) # Add negative binomial observation noise out = add_negative_binomial_noise(out, alpha) # Visualize result @@ -205,9 +207,9 @@ def lower_infectivity(t, states, param, start_measures): return param/2 # Define draw function - def draw_fcn(parameters, samples): - parameters['beta'] = np.random.choice(samples['beta']) - parameters['start_measures'] += pd.Timedelta(days=np.random.triangular(left=0,mode=0,right=21)) + def draw_fcn(parameters, samples, ramp_length): + parameters['beta'] = np.random.choice(samples) + parameters['start_measures'] += pd.Timedelta(days=np.random.triangular(left=0,mode=0,right=ramp_length)) return parameters # Attach its arguments to the parameter dictionary @@ -217,7 +219,8 @@ def draw_fcn(parameters, samples): model_with = ODE_SIR(states=model.initial_states, parameters=model.parameters, time_dependent_parameters={'beta': lower_infectivity}) # Simulate the model - out_with = model_with.sim([start_date, end_date+pd.Timedelta(days=2*28)], N=100, samples=samples_dict, draw_function=draw_fcn, processes=processes) + out_with = model_with.sim([start_date, end_date+pd.Timedelta(days=2*28)], N=100, draw_function=draw_fcn, + draw_function_kwargs={'samples': samples_dict['beta'], 'ramp_length': 21}, processes=processes) # Add negative binomial observation noise out_with = add_negative_binomial_noise(out_with, alpha) diff --git a/tutorials/SIR_SI/calibration.py b/tutorials/SIR_SI/calibration.py index fbfacda..8df63b5 100644 --- a/tutorials/SIR_SI/calibration.py +++ b/tutorials/SIR_SI/calibration.py @@ -147,8 +147,8 @@ def draw_fcn(parameters, samples): parameters['alpha'] = np.array([slice[idx] for slice in samples['alpha']]) return parameters # Simulate model - N = 50 - out = model.sim([start_date, end_date+60], N=N, samples=samples_dict, draw_function=draw_fcn, processes=processes) + N = 100 + out = model.sim([start_date, end_date+60], N=N, draw_function=draw_fcn, draw_function_kwargs={'samples': samples_dict}, processes=processes) # Add negative binomial observation noise out = add_negative_binomial_noise(out, alpha) # Visualize result diff --git a/tutorials/enzyme_kinetics/calibrate_intrinsic_kinetics.py b/tutorials/enzyme_kinetics/calibrate_intrinsic_kinetics.py index 614da7d..9e3d654 100644 --- a/tutorials/enzyme_kinetics/calibrate_intrinsic_kinetics.py +++ b/tutorials/enzyme_kinetics/calibrate_intrinsic_kinetics.py @@ -56,7 +56,7 @@ # Define an initial condition init_states = {'S': 46, 'A': 61, 'W': 37, 'Es': 0} # Initialize model -model = PPBB_model(init_states,params) +model = PPBB_model(states=init_states, parameters=params) ############### ## Load data ## @@ -153,7 +153,7 @@ def draw_fcn(parameters, samples): # Update initial condition model.initial_states.update(initial_states[i]) # Simulate model - out = model.sim(1600, N=n, draw_function=draw_fcn, samples=samples_dict) + out = model.sim(1600, N=n, draw_function=draw_fcn, draw_function_kwargs={'samples': samples_dict}) # Add 4% observational noise out = add_gaussian_noise(out, 0.04, relative=True) # Visualize diff --git a/tutorials/enzyme_kinetics/simulate_1D_PFR.py b/tutorials/enzyme_kinetics/simulate_1D_PFR.py index 8d99c9d..c53ddb0 100644 --- a/tutorials/enzyme_kinetics/simulate_1D_PFR.py +++ b/tutorials/enzyme_kinetics/simulate_1D_PFR.py @@ -122,7 +122,7 @@ def draw_fcn(parameters, samples): ## Concentration profile ## ########################### -out = model.sim(end_sim, N=n, draw_function=draw_fcn, samples=samples_dict, processes=processes) +out = model.sim(end_sim, N=n, draw_function=draw_fcn, draw_function_kwargs={'samples': samples_dict}, processes=processes) # Add 4% observational noise out = add_gaussian_noise(out, 0.04, relative=True) # Visualize @@ -178,7 +178,7 @@ def draw_fcn(parameters, samples): # Update model parameters model.parameters.update({'kL_a': kL*a, 'D_ax': D_ax, 'delta_x': l/nx, 'u': u}) # Simulate - out_tmp = model.sim(end_sim, N=n, draw_function=draw_fcn, samples=samples_dict, processes=processes) + out_tmp = model.sim(end_sim, N=n, draw_function=draw_fcn, draw_function_kwargs={'samples': samples_dict}, processes=processes) # Add 4% observational noise and store out.append(add_gaussian_noise(out_tmp, 0.04, relative=True)) diff --git a/tutorials/influenza_1718/calibration.py b/tutorials/influenza_1718/calibration.py index 81cd98f..6c5ca15 100644 --- a/tutorials/influenza_1718/calibration.py +++ b/tutorials/influenza_1718/calibration.py @@ -160,7 +160,7 @@ # Assign results to model model.parameters = assign_theta(model.parameters, pars, theta) # Simulate model - out = model.sim([start_calibration, end_visualisation], samples={}, N=n, tau=tau, output_timestep=1) + out = model.sim([start_calibration, end_visualisation], N=n, tau=tau) # Add poisson obervational noise out = add_negative_binomial_noise(out, alpha) # Visualize @@ -223,7 +223,8 @@ def draw_fcn(parameters, samples): parameters['f_ud'] = np.array([slice[idx] for slice in samples['f_ud']]) return parameters # Simulate model - out = model.sim([start_visualisation, end_visualisation], N=n, tau=tau, output_timestep=1, samples=samples_dict, draw_function=draw_fcn, processes=processes) + out = model.sim([start_visualisation, end_visualisation], N=n, tau=tau, output_timestep=1, + draw_function=draw_fcn, draw_function_kwargs={'samples': samples_dict}, processes=processes) # Add negative binomial observation noise out_noise = add_negative_binomial_noise(out, alpha)