From f617cc3328c0d1a86a1a2ab7a9755d18231bfbbf Mon Sep 17 00:00:00 2001 From: Ven Popov Date: Wed, 27 Mar 2024 00:25:53 +0100 Subject: [PATCH] update notes for v0.5.0 --- _book/add-new-model.html | 297 +++++++++-------- _book/bmm-architecture.html | 173 +++++----- _book/example-model.html | 619 ++++++++++++++++++++++-------------- _book/index.html | 4 +- _book/search.json | 23 +- _book/setup.html | 5 +- add-new-model.qmd | 133 ++++---- bmm-architecture.qmd | 124 +++++--- example-model.qmd | 573 +++++++++++++++++++++------------ index.qmd | 6 +- setup.qmd | 3 +- 11 files changed, 1178 insertions(+), 782 deletions(-) diff --git a/_book/add-new-model.html b/_book/add-new-model.html index dd205a8..9e80b74 100644 --- a/_book/add-new-model.html +++ b/_book/add-new-model.html @@ -260,8 +260,8 @@

Usage

Arguments

--++ @@ -302,110 +302,110 @@

#############################################################################!
 # MODELS                                                                 ####
 #############################################################################!
 # see file 'R/bmm_model_mixture3p.R' for an example
 
-.model_gcm <- function(resp_var1, required_arg1, required_arg2, ...) {
-   out <- list(
-      resp_vars = nlist(resp_var1),
-      other_vars = nlist(required_arg1, required_arg2),
-      info = list(
-         domain = '',
-         task = '',
-         name = '',
-         citation = '',
-         version = '',
-         requirements = '',
-         parameters = list(),
-         fixed_parameters = list()
-      ),
-      void_mu = FALSE
-   )
-   class(out) <- c('bmmmodel', 'gcm')
-   out
-}
-# user facing alias
-# information in the title and details sections will be filled in
-# automatically based on the information in the .model_gcm()$info
- 
-#' @title `r .model_gcm()$info$name`
-#' @name Model Name#' @details `r model_info(gcm(NA,NA))`
-#' @param resp_var1 A description of the response variable
-#' @param required_arg1 A description of the required argument
-#' @param required_arg2 A description of the required argument
-#' @param ... used internally for testing, ignore it
-#' @return An object of class `bmmmodel`
-#' @export
-#' @examples
-#' \dontrun{
-#' # put a full example here (see 'R/bmm_model_mixture3p.R' for an example)
-#' }
-gcm <- .model_gcm
-
-
-#############################################################################!
-# CHECK_DATA S3 methods                                                  ####
-#############################################################################!
-# A check_data.* function should be defined for each class of the model.
-# If a model shares methods with other models, the shared methods should be
-# defined in data-helpers.R. Put here only the methods that are specific to
-# the model. See ?check_data for details
-
-#' @export
-check_data.gcm <- function(model, data, formula) {
-   # retrieve required arguments
-   required_arg1 <- model$other_vars$required_arg1
-   required_arg2 <- model$other_vars$required_arg2
+.model_gcm <- function(resp_var1 = NULL, required_arg1 = NULL, required_arg2 = NULL, 
+                       links = NULL, version = NULL, call = NULL, ...) {
+   out <- structure(
+     list(
+       resp_vars = nlist(resp_var1),
+       other_vars = nlist(required_arg1, required_arg2),
+       domain = '',
+       task = '',
+       name = '',
+       citation = '',
+       version = version,
+       requirements = '',
+       parameters = list(),
+       links = list(),
+       fixed_parameters = list(),
+       default_priors = list(par1 = list(), par2 = list()),
+       void_mu = FALSE
+     ),
+     class = c('bmmodel', 'gcm'),
+     call = call
+   )
+   if(!is.null(version)) class(out) <- c(class(out), paste0("gcm_",version))
+   out$links[names(links)] <- links
+   out
+}
+# user facing alias
+# information in the title and details sections will be filled in
+# automatically based on the information in the .model_gcm()$info
+ 
+#' @title `r .model_gcm()$name`
+#' @name Model Name#' @details `r model_info(.model_gcm())`
+#' @param resp_var1 A description of the response variable
+#' @param required_arg1 A description of the required argument
+#' @param required_arg2 A description of the required argument
+#' @param links A list of links for the parameters.
+#' @param version A character label for the version of the model. Can be empty or NULL if there is only one version. 
+#' @param ... used internally for testing, ignore it
+#' @return An object of class `bmmodel`
+#' @export
+#' @examples
+#' \dontrun{
+#' # put a full example here (see 'R/model_mixture3p.R' for an example)
+#' }
+gcm <- function(resp_var1, required_arg1, required_arg2, links = NULL, version = NULL, ...) {
+   call <- match.call()
+   stop_missing_args()
+   .model_gcm(resp_var1 = resp_var1, required_arg1 = required_arg1, 
+              required_arg2 = required_arg2, links = links, version = version, 
+              call = call, ...)
+}
+
 
-   # check the data (required)
-
-
-   # compute any necessary transformations (optional)
-
-   # save some variables as attributes of the data for later use (optional)
-
-   data = NextMethod('check_data')
+#############################################################################!
+# CHECK_DATA S3 methods                                                  ####
+#############################################################################!
+# A check_data.* function should be defined for each class of the model.
+# If a model shares methods with other models, the shared methods should be
+# defined in helpers-data.R. Put here only the methods that are specific to
+# the model. See ?check_data for details.
+# (YOU CAN DELETE THIS SECTION IF YOU DO NOT REQUIRE ADDITIONAL DATA CHECKS)
 
-   return(data)
-}
-
-
-#############################################################################!
-# Convert bmmformula to brmsformla methods                               ####
-#############################################################################!
-# A bmf2bf.* function should be defined if the default method for consructing
-# the brmsformula from the bmmformula does not apply
-# The shared method for all `bmmmodels` is defined in helpers-formula.R.
-# See ?bmf2bf for details.
+#' @export
+check_data.gcm <- function(model, data, formula) {
+   # retrieve required arguments
+   required_arg1 <- model$other_vars$required_arg1
+   required_arg2 <- model$other_vars$required_arg2
+
+   # check the data (required)
+
+   # compute any necessary transformations (optional)
+
+   # save some variables as attributes of the data for later use (optional)
 
-#' @export
-bmf2bf.gcm <- function(model, formula) {
-   # retrieve required response arguments
-   resp_var1 <- model$resp_vars$resp_var1
-   resp_var2 <- model$resp_vars$resp_arg2
-
-   # set the base brmsformula based 
-   brms_formula <- brms::bf(paste0(resp_var1," | ", vreal(resp_var2), " ~ 1" ),)
-
-   # add bmmformula to the brms_formula
-   # check if parameters are used as non-linear predictors in other formulas
-   # and use the brms::lf() or brms::nlf() accordingly.
-   dpars <- names(formula)
-   for (dpar in dpars) {
-     pform <- formula[[dpar]]
-     predictors <- rhs_vars(pform)
-     if (any(predictors %in% dpars)) {
-       brms_formula <- brms_formula + brms::nlf(pform)
-     } else {
-       brms_formula <- brms_formula + brms::lf(pform)
-     }
-   }
-
-   return(brms_formula)
+   NextMethod('check_data')
+}
+
+
+#############################################################################!
+# Convert bmmformula to brmsformla methods                               ####
+#############################################################################!
+# A bmf2bf.* function should be defined if the default method for consructing
+# the brmsformula from the bmmformula does not apply (e.g if aterms are required).
+# The shared method for all `bmmodels` is defined in bmmformula.R.
+# See ?bmf2bf for details.
+# (YOU CAN DELETE THIS SECTION IF YOUR MODEL USES A STANDARD FORMULA WITH 1 RESPONSE VARIABLE)
+
+#' @export
+bmf2bf.gcm <- function(model, formula) {
+   # retrieve required response arguments
+   resp_var1 <- model$resp_vars$resp_var1
+   resp_var2 <- model$resp_vars$resp_arg2
+
+   # set the base brmsformula based 
+   brms_formula <- brms::bf(paste0(resp_var1," | ", vreal(resp_var2), " ~ 1" ),)
+
+   # return the brms_formula to add the remaining bmmformulas to it.
+   brms_formula
 }
 
 
@@ -425,66 +425,58 @@ 

my_precomputed_var <- attr(data, 'my_precomputed_var') # construct brms formula from the bmm formula - bmm_formula <- formula - formula <- bmf2bf(model, bmm_formula) - - # construct the family - gcm_family <- brms::custom_family( - 'gcm', - dpars = c(), - links = c(), - lb = c(), # upper bounds for parameters - ub = c(), # lower bounds for parameters - type = '', # real for continous dv, int for discrete dv - loop = TRUE, # is the likelihood vectorized - ) - family <- gcm_family - - # prepare initial stanvars to pass to brms, model formula and priors - sc_path <- system.file('stan_chunks', package='bmm') - stan_likelihood <- readChar(paste0(sc_path, '/gcm_likelihood.stan'), - file.info(paste0(sc_path, '/gcm_likelihood.stan'))$size) - stan_functions <- readChar(paste0(sc_path, '/gcm_functions.stan'), - file.info(paste0(sc_path, '/gcm_functions.stan'))$size) + formula <- bmf2bf(model, formula) + + # construct the family & add to formula object + gcm_family <- brms::custom_family( + 'gcm', + dpars = c(), + links = c(), + lb = c(), # upper bounds for parameters + ub = c(), # lower bounds for parameters + type = '', # real for continous dv, int for discrete dv + loop = TRUE, # is the likelihood vectorized + ) + formula$family <- gcm_family + + # prepare initial stanvars to pass to brms, model formula and priors + sc_path <- system.file('stan_chunks', package='bmm') + stan_likelihood <- read_lines2(paste0(sc_path, '/gcm_likelihood.stan')) + stan_functions <- read_lines2(paste0(sc_path, '/gcm_functions.stan')) + + stanvars <- stanvar(scode = stan_likelihood, block = 'likelihood') + + stanvar(scode = stan_functions, block = 'functions') - stanvars <- stanvar(scode = stan_likelihood, block = 'likelihood') + - stanvar(scode = stan_functions, block = 'functions') - - # construct the default prior - prior <- NULL - - # return the list - out <- nlist(formula, data, family, prior, stanvars) - return(out) -} + # return the list + nlist(formula, data, stanvars) +} + + +#############################################################################! +# POSTPROCESS METHODS #### +#############################################################################! +# A postprocess_brm.* function should be defined for the model class. See +# ?postprocess_brm for details - -#############################################################################! -# POSTPROCESS METHODS #### -#############################################################################! -# A postprocess_brm.* function should be defined for the model class. See -# ?postprocess_brm for details - -#' @export -postprocess_brm.gcm <- function(model, fit) { - # any required postprocessing (if none, delete this section) - - return(fit) -}

+#' @export +postprocess_brm.gcm <- function(model, fit) { + # any required postprocessing (if none, delete this section) + fit +}

Now you have to:

    -
  1. Fill the .model_gcm function with the appropriate code. This function should return a list with the variables that the model needs and a list with information about the model. The class of the list should be c('bmmmodel', 'gcm'). The information list should contain the following elements: domain, task, name, citation, version, requirements, and parameters. Rename the response arguements and the other required arguments, or delete the other arguments if you do not have any. You can see an example in the bmm_model_sdmSimple.R file.

  2. +
  3. Fill the .model_gcm function with the appropriate code. This function should return a list with all the variables specified above. The class of the list should be c('bmmmodel', 'gcm'). Rename the response arguments and the other required arguments, or delete the other arguments if you do not have any. You can see an example in the model_sdm.R file. Specify the parameters of the model, the link functions, what if any parameters are fixed (and to what value). It’s crucial that you set default priors for every parameter of the model, which should be informed by knowledge in the field.

  4. Adjust the user-facing alias. Here you should only rename the required arguments and fill in the @examples section with a full example. Everything else will be filled in automatically based on the information in the .model_gcm function.

  5. Fill the check_data.gcm function with the appropriate code. This function should check the data and return the data. You may or may not need to compute any transformations or save some variables as attributes of the data.

  6. -
  7. If necessary define the bmf2bf.gcm method to convert the bmmformula to a brmsformula. The first step for this is always to specify the response variable and additional response information. Keep in mind that brms automatically interprets this formula as the linear model formula for the mu parameter of your custom family. Currently, brms requires all custom families to have a mu parameter. However, we recommend to code this parameter as a void_mu, and fix the intercept of this parameter to zero using constant priors. This way, the bmmformula can be used to only specify the linear or non-linear model for the parameters of a bmmmodel.

  8. -
  9. Fill the configure_model.gcm function with the appropriate code. This function should construct the formula, the family, the stanvars, and the prior. You can also retrieve any arguments you saved from the data check. Depending on your model, some of these parts might not be necessary. For example, for the mixture models (e.g. mixture3p), we construct a new formula, because we want to rename the arguments to make it easier for the user. For the sdmSimple model, we define the family ourselves, so we don’t need to change the formula.

    +
  10. If necessary define the bmf2bf.gcm method to convert the bmmformula to a brmsformula. The first step for this is always to specify the response variable and additional response information. Keep in mind that brms automatically interprets this formula as the linear model formula for the mu parameter of your custom family. Currently, brms requires all custom families to have a mu parameter. However, we recommend to code this parameter as a void_mu, and fix the intercept of this parameter to zero using constant priors. This way, the bmmformula can be used to only specify the linear or non-linear model for the parameters of a bmmmodel. if your model has a single response variable, you can delete this section.

  11. +
  12. Fill the configure_model.gcm function with the appropriate code. This function should construct the formula, the family, the stanvars. You can also retrieve any arguments you saved from the data check. Depending on your model, some of these parts might not be necessary. For example, for the mixture models (e.g. mixture3p), we construct a new formula, because we want to rename the arguments to make it easier for the user. For the sdmSimple model, we define the family ourselves, so we don’t need to change the formula.

    You need to fill information about your custom family, and then fill the STAN files with your STAN code. Conveniently, you don’t have to edit lines 137-145: loading the STAN files and setting up the stanvars is set up automatically when calling the use_model_template function. Should you need to add more STAN files after you created the template, you can add the files in init/stan_chunks/ manually and edit those lines to additionally load the manually added files.

  13. Fill the postprocess_brm.gcm function with the appropriate code. By postprocessing, we mean changes to the fitted brms model - like renaming parameters, etc. If you don’t need any postprocessing, you can delete this section.

5.2 Testing

-

Unit testing is extremely important. You should test your model with the testthat package. You can use the use_test() function to create a test file for your model. See file tests/testthat/test-fit_model.R for an example of how we test the existing models. BRMS models take a long time to fit, so we don’t test the actual fitting process. fit_model() provides an argument backend="mock", which will return a mock object instead of fitting the model. This ensures that the entire pipeline works without errors. For example, here’s a test of the IMMfull model:

+

Unit testing is extremely important. You should test your model with the testthat package. You can use the use_test() function to create a test file for your model. See file tests/testthat/test-bmm.R for an example of how we test the existing models. BRMS models take a long time to fit, so we don’t test the actual fitting process. fit_model() provides an argument backend="mock", which will return a mock object instead of fitting the model. This ensures that the entire pipeline works without errors. For example, here’s a test of the IMMfull model:

test_that('Available mock models run without errors',{
   withr::local_options('bmm.silent'=2)
   skip_on_cran()
@@ -498,11 +490,16 @@ 

# two-parameter model mock fit f <- bmmformula(kappa ~ 1, c ~ 1, a ~ 1, s ~ 1) - mock_fit <- fit_model(f, dat, IMMfull(resp_err = "resp_err", setsize = 3, nt_features = paste0('Item',2:3,'_rel'), nt_distance=paste0('spaD',2:3)), backend = "mock", mock_fit = 1, rename=FALSE) - expect_equal(mock_fit$fit, 1) - expect_type(mock_fit$fit_args, "list") - expect_equal(names(mock_fit$fit_args[1:4]), c("formula", "data", "family", "prior")) -})

+ mock_fit <- bmm(f, dat, + imm(resp_err = "resp_err", + setsize = 3, + nt_features = paste0('Item',2:3,'_rel'), + nt_distance=paste0('spaD',2:3)), + backend = "mock", mock_fit = 1, rename=FALSE) + expect_equal(mock_fit$fit, 1) + expect_type(mock_fit$bmm$fit_args, "list") + expect_equal(names(mock_fit$fit_args[1:4]), c("formula", "data")) +})

The tests based on the testthat package are run every time you call the check() command. Before you submit your changes, make sure that all tests pass.

diff --git a/_book/bmm-architecture.html b/_book/bmm-architecture.html index 770d5df..8074dba 100644 --- a/_book/bmm-architecture.html +++ b/_book/bmm-architecture.html @@ -209,16 +209,17 @@

Table of contents

@@ -245,43 +246,51 @@

-

3.1 The main workhorse - fit_model()

-

The main function for fitting models is fit_model(). This function is the main entry point for users to fit models. It is set-up to be independent of the specific models that are implemented in the package:

-
fit_model <- function(formula, data, model, parallel = FALSE, 
-                      chains = 4, prior = NULL, ...) {
-  # enable parallel sampling if parallel equals TRUE
-  opts <- configure_options(nlist(parallel, chains, silent))
-
-  # check model, formula and data, and transform data if necessary
-  model <- check_model(model)
-  formula <- check_formula(model, formula)
-  data <- check_data(model, data, formula)
-
-  # generate the model specification to pass to brms later
-  config_args <- configure_model(model, data, formula)
-
-  # combine the default prior plus user given prior
-  config_args$prior <- combine_prior(config_args$prior, prior)
-
-  # estimate the model
-  dots <- list(...)
-  fit_args <- combine_args(nlist(config_args, opts, dots))
-  fit <- call_brm(fit_args)
-
-  # model postprocessing
-  fit <- postprocess_brm(model, fit)
-
-  return(fit)
-}
+
+

3.1 The main workhorse - bmm()

+

The main function for fitting models is bmm(). This function is the main entry point for users to fit models. It is set-up to be independent of the specific models that are implemented in the package.

+
bmm <- function(formula, data, model,
+                prior = NULL,
+                sort_data = getOption('bmm.sort_data', "check"),
+                silent = getOption('bmm.silent', 1),
+                backend = getOption('brms.backend', NULL), ...) {
+  deprecated_args(...)
+  dots <- list(...)
+
+  # set temporary global options and return modified arguments for brms
+  configure_opts <- nlist(sort_data, silent, backend, parallel = dots$parallel,
+                          cores = dots$cores)
+  opts <- configure_options(configure_opts)
+  dots$parallel <- NULL
+
+  # check model, formula and data, and transform data if necessary
+  user_formula <- formula
+  model <- check_model(model, data, formula)
+  data <- check_data(model, data, formula)
+  formula <- check_formula(model, data, formula)
+
+  # generate the model specification to pass to brms later
+  config_args <- configure_model(model, data, formula)
+
+  # configure the default prior and combine with user-specified prior
+  prior <- configure_prior(model, data, config_args$formula, prior)
+
+  # estimate the model
+  fit_args <- combine_args(nlist(config_args, opts, dots, prior))
+  fit <- call_brm(fit_args)
+
+  # model post-processing
+  postprocess_brm(model, fit, fit_args = fit_args, user_formula = user_formula,
+                  configure_opts = configure_opts)
+}

It calls several subroutines, implemented as generic S3 methods, to:

  • configure_options() - to configure local options for fitting, such as parallel sampling,
  • check_model() - check if the model exists
  • -
  • check_formula() - check if the formula is specified correctly
  • +
  • check_formula() - check if the formula is specified correctly and transform it to a brmsformula
  • check_data() - check whether the data contains all necessary information
  • configure_model() - configures the model called for fitting
  • -
  • combine_priors() - combines the user specified priors with default priors provided for each model
  • +
  • configure_prior() - sets the default priors for the model and combines them with the user prior
  • call_brm() - fit the model using the brm() function from the brms package
  • postprocess_brm() - to post-process the fitted model
@@ -289,31 +298,38 @@

3.2 Models

All models in the package are defined as S3 classes and follow a strict template. This allows us to implement general methods for handling model fitting, data checking, and post-processing. Each model has an internal function that defines the model and its parameters, and a user-facing alias. For a complete example model file and an explanation, see Section 4. The general model template looks like this:

-
.model_myNewModel <- function(resp_var1, required_arg1, required_arg2, ...) {
-   out <- list(
-      resp_vars = nlist(resp_var1),
-      other_vars = nlist(required_arg1, required_arg2),
-      info = list(
-         domain = '',
-         task = '',
-         name = '',
-         citation = '',
-         version = '',
-         requirements = '',
-         parameters = list(),
-         fixed_parameters = list()
-      ),
-      void_mu = FALSE
-   )
-   class(out) <- c('bmmmodel', 'myNewModel')
-   out
-}
+
.model_my_new_model <- function(resp_var1 = NULL, required_args1 = NULL, 
+                                required_arg2 = NULL, links = NULL, version = NULL,
+                                call = NULL, ...) {
+  out <- structure(
+    list(
+      resp_vars = nlist(resp_error),
+      other_vars = nlist(),
+      domain = "",
+      task = "",
+      name = "",
+      version = "",
+      citation = "",
+      requirements = "",
+      parameters = list(),
+      links = list(),
+      fixed_parameters = list(),
+      default_priors = list(),
+      version = version,
+      void_mu = FALSE
+    ),
+    class = c("bmmodel", "my_new_model"),
+    call = call
+  )
+  out$links[names(links)] <- links
+  out
+}

Each model is accompanied by a user-facing alias, the documentation of which is generated automatically based on the info list in the model definition.

# user facing alias
 # information in the title and details sections will be filled in
 # automatically based on the information in the .model_modelname()$info
-#' @title `r .model_myNewModel()$info$name`
-#' @name Model Name#' @details `r model_info(myNewModel(NA,NA))`
+#' @title `r .model_my_new_model()name`
+#' @name Model Name#' @details `r model_info(.model_my_new_model())`
 #' @param resp_var1 A description of the response variable
 #' @param required_arg1 A description of the required argument
 #' @param required_arg2 A description of the required argument
@@ -324,11 +340,18 @@ 

#' \dontrun{ #' # put a full example here (see 'R/bmm_model_mixture3p.R' for an example) #' } -myNewModel <- .model_myNewModel

+my_new_model <- function(resp_var1, required_arg1, required_arg2, + links = NULL, version = NULL, ...) { + call <- match.call() + stop_missing_args() + .model_my_new_model(resp_var1 = resp_var1, required_arg1 = required_arg1, + required_arg2 = required_arg2, links = links, version = version, + call = call, ...) +}

Then users can fit the model using the fit_model() function, and the model will be automatically recognized and handled by the package:

-
fit <- fit_model(formula, 
-                 data = mydata, 
-                 model = modelname(resp_var1, required_arg1, required_arg2))
+
fit <- bmm(formula = my_bmmformula, 
+           data = my_data, 
+           model = my_new_model(resp_var1, required_arg1, required_arg2))

3.3 S3 methods

@@ -341,26 +364,30 @@

3.4 File organization

The bmm package is organized into several files. The main files are:

-
-

R/fit_model.R

-

It contains the main function for fitting models, fit_model(). This function is the main entry point for users to fit models. It is set-up to be independent of the specific models that are implemented in the package.

+
+

R/bmm.R

+

It contains the main function for fitting models, bmm(). This function is the main entry point for users to fit models. It is set-up to be independent of the specific models that are implemented in the package.

To add new models, you do not have to edit this file. The functions above are generic S3 methods, and they will automatically recognize new models if you add appropriate methods for them (see section Adding new models).

R/helpers-*.R

-

R/helpers-data.R, R/helpers-postprocess.R, R/helpers-model.R, R/helpers-formula.R and R/helpers-prior.R

-

These files define the main generic S3 methods for checking data, postprocessing the fitted model, configuring the model, checking the model formula, and combining priors. They contain the default methods for these functions, which are called by fit_model() if no specific method is defined for a model. If you want to add a new model, you will need to add specific methods for these functions for your model. You do not need to edit these files to add a new model.

+

R/helpers-data.R, R/helpers-parameters.R, R/helpers-postprocess.R, R/helpers-model.R, and R/helpers-prior.R

+

These files define the main generic S3 methods for checking data, postprocessing the fitted model, configuring the model, checking the model formula, and combining priors. They contain the default methods for these functions, which are called by bmm() if no specific method is defined for a model. If you want to add a new model, you will need to add specific methods for these functions for your model. You do not need to edit these files to add a new model.

-
-

R/bmm_model_*.R

-

Each model and it’s methods is defined in a separate file. For example, the 3-parameter mixture model is defined in bmm_model_mixture3p.R. This file contains the internal function that defines the model and its parameters, and the specific methods for the generic S3 functions. Your new model will exist in a file like this. The name of the file should be bmm_model_name_of_your_model.R. You don’t have to add this file manually - see section Adding new models.

+
+

R/bmmformula.R

+

This file contains the definition of the bmmformula class, which is used to represent the formula for the model. It contains the bmmformula() function and its alias bmf(), which is used to create a new formula object.

-
-

R/bmm_distributions.R

+
+

R/model_*.R

+

Each model and it’s methods is defined in a separate file. For example, the 3-parameter mixture model is defined in model_mixture3p.R. This file contains the internal function that defines the model and its parameters, and the specific methods for the generic S3 functions. Your new model will exist in a file like this. The name of the file should be model_name_of_your_model.R. You don’t have to add this file manually - see section Adding new models.

+
+
+

R/distributions.R

This file contains the definition of the custom distributions that are used in the package. It specifies the density, random number generation, and probability functions for the custom distributions. If your model requires a custom distribution, you will need to add it to this file. These are not used during model fitting, but can be used to generate data from the model, and to plot the model fit.

-
-

R/utils.R, R/brms-misc.R

+
+

R/utils.R, R/brms-misc.R, R/restructure.R, R/summary.R, R/update.R

Various utility functions.

diff --git a/_book/example-model.html b/_book/example-model.html index 8c5ca38..702866e 100644 --- a/_book/example-model.html +++ b/_book/example-model.html @@ -221,6 +221,7 @@

Table of contents

  • 4.2.1 Model definition
  • 4.2.2 check_data() methods
  • 4.2.3 configure_model() methods
  • +
  • 4.2.4 Postprocessing methods
  • @@ -248,189 +249,291 @@

    All models in the package are defined as S3 classes and follow a strict template. This allows us to implement general methods for handling model fitting, data checking, and post-processing. Each model has an internal function that defines the model and its parameters, and a user-facing alias. Let’s look at how two models are implemented - the IMM model, which uses both general class and specific model methods, but no custom stan code, and the SDM model, which depends heavily on custom stan code. If you use the use_model_template() function, all sections bellows will be automatically generated for your model.

    4.1 The Interference Measurement Model (IMM)

    -

    The model is defined in the file R/bmm_model_IMM.R. Let’s go through the different parts.

    +

    The model is defined in the file R/model_imm.R. Let’s go through the different parts.

    4.1.1 Model definition

    The full IMM model is defined in the following internal model class:

    -
    .model_IMMfull <- function(resp_err,  nt_features, nt_distance, setsize, ...) {
    -  out <- list(
    -    resp_vars = nlist(resp_err),
    -    other_vars = nlist(nt_features, nt_distance, setsize),
    -    info = list(
    -      domain = "Visual working memory",
    -      task = "Continuous reproduction",
    -      name = "Interference measurement model by Oberauer and Lin (2017).",
    -      version = "full",
    -      citation = paste0("Oberauer, K., & Lin, H.Y. (2017). An interference model ",
    -                        "of visual working memory. Psychological Review, 124(1), 21-59"),
    -      requirements = paste0('- The response vairable should be in radians and ',
    -                            'represent the angular error relative to the target\n  ',
    -                            '- The non-target features should be in radians and be ',
    -                            'centered relative to the target'),
    -      parameters = list(
    -        mu1 = paste0("Location parameter of the von Mises distribution for memory responses",
    -                     "(in radians). Fixed internally to 0 by default."),
    -        kappa = "Concentration parameter of the von Mises distribution (log scale)",
    -        a = "General activation of memory items",
    -        c = "Context activation",
    -        s = "Spatial similarity gradient"
    -      ),
    -      fixed_parameters = list(
    -        mu1 = 0
    -      )),
    -    void_mu = FALSE
    -  )
    -  class(out) <- c("bmmmodel","vwm","nontargets","IMMspatial","IMMfull")
    -  out
    -}
    +
    .model_imm <-
    +  function(resp_error = NULL, nt_features = NULL, nt_distances = NULL,
    +           set_size = NULL, regex = FALSE, links = NULL, version = "full",
    +           call = NULL, ...) {
    +    out <- structure(
    +      list(
    +        resp_vars = nlist(resp_error),
    +        other_vars = nlist(nt_features, nt_distances, set_size),
    +        domain = "Visual working memory",
    +        task = "Continuous reproduction",
    +        name = "Interference measurement model by Oberauer and Lin (2017).",
    +        version = version,
    +        citation = glue(
    +          "Oberauer, K., & Lin, H.Y. (2017). An interference model \\
    +          of visual working memory. Psychological Review, 124(1), 21-59"
    +        ),
    +        requirements = glue(
    +          '- The response vairable should be in radians and \\
    +          represent the angular error relative to the target
    +          - The non-target features should be in radians and be \\
    +          centered relative to the target'
    +        ),
    +        parameters = list(
    +          mu1 = glue(
    +            "Location parameter of the von Mises distribution for memory \\
    +            responses (in radians). Fixed internally to 0 by default."
    +          ),
    +          kappa = "Concentration parameter of the von Mises distribution",
    +          a = "General activation of memory items",
    +          c = "Context activation",
    +          s = "Spatial similarity gradient"
    +        ),
    +        links = list(
    +          mu1 = "tan_half",
    +          kappa = "log",
    +          a = "identity",
    +          c = "identity",
    +          s = "log"
    +        ),
    +        fixed_parameters = list(mu1 = 0, mu2 = 0, kappa2 = -100),
    +        default_priors = list(
    +          mu1 = list(main = "student_t(1, 0, 1)"),
    +          kappa = list(main = "normal(2, 1)", effects = "normal(0, 1)"),
    +          a = list(main = "normal(0, 1)", effects = "normal(0, 1)"),
    +          c = list(main = "normal(0, 1)", effects = "normal(0, 1)"),
    +          s = list(main = "normal(0, 1)", effects = "normal(0, 1)")
    +        ),
    +        void_mu = FALSE
    +      ),
    +      # attributes
    +      regex = regex,
    +      regex_vars = c('nt_features', 'nt_distances'),
    +      class = c("bmmodel", "vwm", "non_targets", "imm", paste0('imm_',version)),
    +      call = call
    +    )
    +
    +    # add version specific information
    +    if (version == "abc") {
    +      out$parameters$s <- NULL
    +      out$links$s <- NULL
    +      out$default_priors$s <- NULL
    +      attributes(out)$regex_vars <- c('nt_features')
    +    } else if (version == "bsc") {
    +      out$parameters$a <- NULL
    +      out$links$a <- NULL
    +      out$default_priors$a <- NULL
    +    }
    +
    +    out$links[names(links)] <- links
    +    out
    +  }

    Here is a brief explanation of the different components of the model definition:

    -

    resp_vars: a list of response variables that the model will be fitted to. These variables will be used to construct the brmsformula passed to brms together with the bmmformula and the parameters of the model. The user has to provide these variables in the data frame that is passed to the fit_model function

    -

    other_vars: a list of additional variables that are required for the model. This is used to check if the data contains all necessary information for fitting the model. In the example above, the IMM model requires the names of the variables specifying the non-target features relative to the target, the variables specifying the distance of the non-targets to the target, and the setsize. The user has to provide these variables in the data frame that is passed to the fit_model function

    -

    info: contains information about the model, such as the domain, task, name, citation, version, requirements, and parameters. This information is used to check if the bmmformula contains linear model formulas for all model parameters, and also specify defaults for fixed_parameters. In addition, the info is used to generate the documentation for the model.

    +

    resp_vars: a list of response variables that the model will be fitted to. These variables will be used to construct the brmsformula passed to brms together with the bmmformula and the parameters of the model. The user has to provide these variables in the data frame that is passed to the bmm() function

    +

    other_vars: a list of additional variables that are required for the model. This is used to check if the data contains all necessary information for fitting the model. In the example above, the IMM model requires the names of the variables specifying the non-target features relative to the target, the variables specifying the distance of the non-targets to the target, and the set_size. The user has to provide these variables in the data frame that is passed to the bmm() function

    +

    domain, task, name, citation, requirements: contains information about the model, such as the domain, task, name, citation, requirements. This information is used for generating help pages

    +

    version: if the model has multiple versions, this argument is specified by the user. Then it is used to dynamically adjust some information in the model object. In the case of the imm model, we have three versions - full, bsc and abc. As you can see at the end of the script, some parameters are deleted depending on the model version.

    +

    parameters: contains a named list of all parameters in the model that can be estimated by the user and their description. This information is used internally to check if the bmmformula contains linear model formulas for all model parameters, and to decide what information to include in the summary of bmmfit objects.

    +

    links: a named list providing the link function for each parameter. For example, kappa in the imm models has to be positive, so it is sampled on the log scale. This information is used in defining the model family and for the summary methods. If you want the user to be able to specify custom link functions, the next to last line of the script replaces the links with those provided by the user

    +

    fixed_parameters in the imm several parameters are fixed to constant values internally to identify the model. Only one of them, mu1 is also part of the parameters block - this is the only fixed parameters that users can choose to estimate instead of leaving it fixed. mu2 and kappa2 cannot be freely estimated.

    +

    default_priors a list of lists for each parameter in the model. Each prior has two components: main, the prior that will be put on the Intercept or on each level of a factor if the intercept is suppressed; effects, the prior to put on the regression coefficients relative to the intercept. The priors are described as in the set_prior function from brms. This information is used by the configure_prior() S3 method to automatically set the default priors for the model. The priors that you put here will be used by bmm() unless the users chooses to overwrite them.

    void_mu: For models using a custom family that do not contain a location or mu parameter, for example the diffusion model, we recommend setting up a void_mu parameter. This avoids arbitrarily using one of the model parameters as the mu parameter.

    -

    class: is the most important part. It contains the class of the model. This is used by generic S3 methods to perform data checks and model configuration. The classes should be ordered from most general to most specific. A general class exists when the same operations can be performed on multiple models. For example, the ‘3p’, ‘IMMabc’, ‘IMMbsc’ and ‘IMMfull’ models all have non-targets and setsize arguments, so the same data checks can be performed on all of them, represented by the class nontargets. The first class should always be bmmmodel, which is the main class for all models. The last class should be the specific model name, in this case IMMfull.

    +

    regex: For the imm models, the nt_features and nt_distances variables can be specified with regular expressions, if the user sets regex = TRUE

    +

    call: this automatically records how the model was called so that the call can be printed in the summary after fitting. Leave it as is.

    +

    class: is the most important part. It contains the class of the model. This is used by generic S3 methods to perform data checks and model configuration. The classes should be ordered from most general to most specific. A general class exists when the same operations can be performed on multiple models. For example, the ‘3p’, ‘imm_abc’, ‘imm_bsc’ and ‘imm_full’ models all have non-targets and set_size arguments, so the same data checks can be performed on all of them, represented by the class non_targets. The first class should always be bmmodel, which is the main class for all models. The last class should be the specific model name, in this case imm_full, imm_abc or imm_bsc, which is automatically constructed if a version argument is provided. Otherwise the last class will be just the name of the model.

    4.1.2 Model alias

    The model alias is a user-facing function that calls the internal model function. It is defined as follows:

    -
    # user facing alias
    -
    -#' @title `r .model_IMMfull(NA, NA, NA, NA)$info$name`
    -#' @name IMM
    -#' @details `r model_info(IMMfull(NA, NA, NA, NA), components =c('domain', 'task', 'name', 'citation'))`
    -#' #### Version: `IMMfull`
    -#' `r model_info(IMMfull(NA, NA, NA, NA), components =c('requirements', 'parameters'))`
    -#' #### Version: `IMMbsc`
    -#' `r model_info(IMMbsc(NA, NA, NA, NA), components =c('requirements', 'parameters'))`
    -#' #### Version: `IMMabc`
    -#' `r model_info(IMMabc(NA, NA, NA), components =c('requirements', 'parameters'))`
    -#'
    -#' Additionally, all IMM models have an internal parameter that is fixed to 0 to
    -#' allow the model to be identifiable. This parameter is not estimated and is not
    -#' included in the model formula. The parameter is:
    -#'
    -#'   - b = "Background activation (internally fixed to 0)"
    +
    #' @title `r .model_imm()$name`
    +#' @description Three versions of the `r .model_imm()$name` - the full, bsc, and abc.
    +#' `IMMfull()`, `IMMbsc()`, and `IMMabc()` are deprecated and will be removed in the future.
    +#' Please use `imm(version = 'full')`, `imm(version = 'bsc')`, or `imm(version = 'abc')` instead.
    +#'
    +#' @name imm
    +#' @details `r model_info(.model_imm(), components =c('domain', 'task', 'name', 'citation'))`
    +#' #### Version: `full`
    +#' `r model_info(.model_imm(version = "full"), components = c('requirements', 'parameters', 'fixed_parameters', 'links', 'prior'))`
    +#' #### Version: `bsc`
    +#' `r model_info(.model_imm(version = "bsc"), components = c('requirements', 'parameters', 'fixed_parameters', 'links', 'prior'))`
    +#' #### Version: `abc`
    +#' `r model_info(.model_imm(version = "abc"), components =c('requirements', 'parameters', 'fixed_parameters', 'links', 'prior'))`
    +#'
    +#' Additionally, all imm models have an internal parameter that is fixed to 0 to
    +#' allow the model to be identifiable. This parameter is not estimated and is not
    +#' included in the model formula. The parameter is:
     #'
    -#' @param resp_err The name of the variable in the provided dataset containing the
    -#'   response error. The response Error should code the response relative to the to-be-recalled
    -#'   target in radians. You can transform the response error in degrees to radian using the `deg2rad` function.
    -#' @param nt_features A character vector with the names of the non-target variables.
    -#'   The non_target variables should be in radians and be centered relative to the
    -#'   target.
    -#' @param nt_distance A vector of names of the columns containing the distances of
    -#'   non-target items to the target item. Only necessary for the `IMMbsc` and `IMMfull` models
    -#' @param setsize Name of the column containing the set size variable (if
    -#'   setsize varies) or a numeric value for the setsize, if the setsize is
    -#'   fixed.
    -#' @param ... used internally for testing, ignore it
    -#' @return An object of class `bmmmodel`
    -#' @keywords bmmmodel
    -#' @export
    -IMMfull <- .model_IMMfull
    -
    -#' @rdname IMM
    -#' @keywords bmmmodel
    -#' @export
    -IMMbsc <- .model_IMMbsc
    -
    -#' @rdname IMM
    -#' @keywords bmmmodel
    -#' @export
    -IMMabc <- .model_IMMabc
    -

    The details will be filled out automatically from the model definition. The example for the IMM model also includes the aliases for the other versions of the IMM model, which are IMMbsc and IMMabc, and does some fancy formatting to include documentation about all versions of the model in the same help file.

    +#' - b = "Background activation (internally fixed to 0)" +#' +#' @param resp_error The name of the variable in the provided dataset containing +#' the response error. The response Error should code the response relative to +#' the to-be-recalled target in radians. You can transform the response error +#' in degrees to radian using the `deg2rad` function. +#' @param nt_features A character vector with the names of the non-target +#' variables. The non_target variables should be in radians and be centered +#' relative to the target. Alternatively, if regex=TRUE, a regular +#' expression can be used to match the non-target feature columns in the +#' dataset. +#' @param nt_distances A vector of names of the columns containing the distances +#' of non-target items to the target item. Alternatively, if regex=TRUE, a regular +#' expression can be used to match the non-target distances columns in the +#' dataset. Only necessary for the `bsc` and `full` versions. +#' @param set_size Name of the column containing the set size variable (if +#' set_size varies) or a numeric value for the set_size, if the set_size is +#' fixed. +#' @param regex Logical. If TRUE, the `nt_features` and `nt_distances` arguments +#' are interpreted as a regular expression to match the non-target feature +#' columns in the dataset. +#' @param links A list of links for the parameters. *Currently does not affect +#' the model fits, but it will in the future.* +#' @param version Character. The version of the IMM model to use. Can be one of +#' `full`, `bsc`, or `abc`. The default is `full`. +#' @param ... used internally for testing, ignore it +#' @return An object of class `bmmodel` +#' @keywords bmmodel +#' @examples +#' \dontrun{ +#' # load data +#' data <- oberauer_lin_2017 +#' +#' # define formula +#' ff <- bmmformula( +#' kappa ~ 0 + set_size, +#' c ~ 0 + set_size, +#' a ~ 0 + set_size, +#' s ~ 0 + set_size +#' ) +#' +#' # specify the full IMM model with explicit column names for non-target features and distances +#' # by default this fits the full version of the model +#' model1 <- imm(resp_error = "dev_rad", +#' nt_features = paste0('col_nt', 1:7), +#' nt_distances = paste0('dist_nt', 1:7), +#' set_size = 'set_size') +#' +#' # fit the model +#' fit <- bmm(formula = ff, +#' data = data, +#' model = model1, +#' cores = 4, +#' backend = 'cmdstanr') +#' +#' # alternatively specify the IMM model with a regular expression to match non-target features +#' # this is equivalent to the previous call, but more concise +#' model2 <- imm(resp_error = "dev_rad", +#' nt_features = 'col_nt', +#' nt_distances = 'dist_nt', +#' set_size = 'set_size', +#' regex = TRUE) +#' +#' # fit the model +#' fit <- bmm(formula = ff, +#' data = data, +#' model = model2, +#' cores = 4, +#' backend = 'cmdstanr') +#' +#' # you can also specify the `bsc` or `abc` versions of the model to fit a reduced version +#' model3 <- imm(resp_error = "dev_rad", +#' nt_features = 'col_nt', +#' set_size = 'set_size', +#' regex = TRUE, +#' version = 'abc') +#' fit <- bmm(formula = ff, +#' data = data, +#' model = model3, +#' cores = 4, +#' backend = 'cmdstanr') +#'} +#' @export +imm <- function(resp_error, nt_features, nt_distances, set_size, regex = FALSE, + links = NULL, version = "full", ...) { + call <- match.call() + dots <- list(...) + if (version == "abc") { + nt_distances <- NULL + } + stop_missing_args() + .model_imm(resp_error = resp_error, nt_features = nt_features, + nt_distances = nt_distances, set_size = set_size, regex = regex, + links = links, version = version, call = call, ...) +}
    +

    The details will be filled out automatically from the model definition. This does some fancy formatting to include documentation about all versions of the model in the same help file.

    4.1.3 check_data() methods

    -

    Each model should have a check_data.modelname() method that checks if the data contains all necessary information for fitting the model. For the IMM, all three versions of the model share the same data requirements, so check_data is defined for the more general class, IMMspatial. The method is defined as follows:

    -
    check_data.IMMspatial <- function(model, data, formula) {
    -  nt_distance <- model$other_vars$nt_distance
    -  max_setsize <- attr(data, 'max_setsize')
    -
    -  if (length(nt_distance) < max_setsize - 1) {
    -    stop(paste0("The number of columns for spatial positions in the argument ",
    -                "'nt_distance' is less than max(setsize)-1"))
    -  } else if (length(nt_distance) > max_setsize - 1) {
    -    stop(paste0("The number of columns for spatial positions in the argument ",
    -                "'nt_distance' is more than max(setsize)-1"))
    -  }
    +

    Each model should have a check_data.modelname() method that checks if the data contains all necessary information for fitting the model. For the IMM, the bsc and full versions require a special check for the nt_distances variables:

    +
    #' @export
    +check_data.imm_bsc <- function(model, data, formula) {
    +  data <- .check_data_imm_dist(model, data, formula)
    +  NextMethod("check_data")
    +}
    +
    +#' @export
    +check_data.imm_full <- function(model, data, formula) {
    +  data <- .check_data_imm_dist(model, data, formula)
    +  NextMethod("check_data")
    +}
     
    -  if (any(data[,nt_distance] < 0)) {
    -    stop('Somve values of the spatial distance variables in the data are negative.\n
    -         All spatial distances to the target need to be postive.')
    -  }
    -
    -  data = NextMethod("check_data")
    -
    -  return(data)
    -}
    -

    The IMM models share methods with the mixture3p model, all of which are of class nontargets so the check_data.nontargets method is defined in the general file R/helpers-data.R. If you are adding a new model, you should check if the data requirements are similar to any existing model and define the check_data method only for the methods that are unique to your model.

    +.check_data_imm_dist <- function(model, data, formula) { + nt_distances <- model$other_vars$nt_distances + max_set_size <- attr(data, 'max_set_size') + + stopif(!isTRUE(all.equal(length(nt_distances), max_set_size - 1)), + "The number of columns for non-target distances in the argument \\ + 'nt_distances' should equal max(set_size)-1})") + + # replace nt_distances + data[,nt_distances][is.na(data[,nt_distances])] <- 999 + + stopif(any(data[,nt_distances] < 0), + "All non-target distances to the target need to be postive.") + data +}
    +

    The IMM models share methods with the mixture3p model, all of which are of class non_targets so the check_data.non_targets method is defined in the general file R/helpers-data.R. If you are adding a new model, you should check if the data requirements are similar to any existing model and define the check_data method only for the methods that are unique to your model.

    The check_data.mymodel() function should always take the arguments model, data, and formula and return the data with the necessary transformations. It should also call data = NextMethod("check_data") to call the check_data method of the more general class.

    4.1.4 configure_model() methods

    -

    The configure_model.mymodel() method is where you specify the model formula, the family, any custom code, and the priors. The method is defined as follows for the IMM model:

    +

    The configure_model.mymodel() method is where you specify the model formula, the family, any custom code. The method is defined as follows for the IMM model:

    (we show only the IMMfull version)

    -
    configure_model.IMMfull <- function(model, data, formula) {
    -  # retrieve arguments from the data check
    -  max_setsize <- attr(data, 'max_setsize')
    -  lure_idx_vars <- attr(data, "lure_idx_vars")
    -  nt_features <- model$other_vars$nt_features
    -  setsize_var <- model$other_vars$setsize
    -  nt_distance <- model$other_vars$nt_distance
    -
    -  # construct main brms formula from the bmm formula
    -  bmm_formula <- formula
    -  formula <- bmf2bf(model, bmm_formula)
    -
    -  # additional internal terms for the mixture model formula
    -  kappa_nts <- paste0('kappa', 2:max_setsize)
    -  kappa_unif <- paste0('kappa',max_setsize + 1)
    -  theta_nts <- paste0('theta',2:max_setsize)
    -  mu_nts <- paste0('mu', 2:max_setsize)
    -  mu_unif <- paste0('mu', max_setsize + 1)
    -
    -  formula <- formula +
    -    glue_lf(kappa_unif,' ~ 1') +
    -    glue_lf(mu_unif, ' ~ 1') +
    -    brms::nlf(theta1 ~ c + a) +
    -    brms::nlf(kappa1 ~ kappa) +
    -    brms::nlf(expS ~ exp(s))
    -
    -  for (i in 1:(max_setsize - 1)) {
    -    formula <- formula +
    -      glue_nlf(kappa_nts[i], ' ~ kappa') +
    -      glue_nlf(theta_nts[i], ' ~ ', lure_idx_vars[i], '*(exp(-expS*',nt_distance[i],')*c + a) + ',
    -               '(1-', lure_idx_vars[i], ')*(-100)') +
    -      glue_nlf(mu_nts[i], ' ~ ', nt_features[i])
    -  }
    -
    -  # define mixture family
    -  vm_list = lapply(1:(max_setsize + 1), function(x) brms::von_mises(link = "identity"))
    -  vm_list$order = "none"
    -  family <- brms::do_call(brms::mixture, vm_list)
    -
    -  # define prior
    -  additional_constants <- list()
    -  additional_constants[[kappa_unif]] <- -100
    -  additional_constants[[mu_unif]] <- 0
    -  prior <- fixed_pars_priors(model, additional_constants) +
    -    brms::prior_("normal(2, 1)", class = "b", nlpar = "kappa") +
    -    brms::prior_("normal(0, 1)", class = "b", nlpar = "c") +
    -    brms::prior_("normal(0, 1)", class = "b", nlpar = "a") +
    -    brms::prior_("normal(0, 1)", class = "b", nlpar = "s")
    -
    -  # if there is setsize 1 in the data, set constant prior over thetant for setsize1
    -  if ((1 %in% data$ss_numeric) && !is.numeric(data[[setsize_var]])) {
    -    prior <- prior +
    -      brms::prior_("constant(0)", class = "b", coef = paste0(setsize_var, 1), nlpar = "a") +
    -      brms::prior_("constant(0)", class = "b", coef = paste0(setsize_var, 1), nlpar = "s")
    -  }
    -
    -  out <- nlist(formula, data, family, prior)
    -  return(out)
    -}
    -

    The configure_model method should always take the arguments model, data, and formula (as a bmmformula) and return a named list with the formula (as a brmsformula), the data, the family, and the priors.

    -

    Inside the configure_model method the brmsformula is generated using the bmf2bf function. This function converts the bmmformula passed to fit_model function into a brmsformula based on the information for the response variables provided in the bmmmodel object. There is a general method in R/helpers-formula.R to construct the formula for all models with a single response variable.

    +
    #' @export
    +configure_model.imm_full <- function(model, data, formula) {
    +  # retrieve arguments from the data check
    +  max_set_size <- attr(data, 'max_set_size')
    +  lure_idx <- attr(data, "lure_idx_vars")
    +  nt_features <- model$other_vars$nt_features
    +  set_size_var <- model$other_vars$set_size
    +  nt_distances <- model$other_vars$nt_distances
    +
    +  # construct main brms formula from the bmm formula
    +  formula <- bmf2bf(model, formula) +
    +    brms::lf(kappa2 ~ 1) +
    +    brms::lf(mu2 ~ 1) +
    +    brms::nlf(theta1 ~ c + a) +
    +    brms::nlf(kappa1 ~ kappa) +
    +    brms::nlf(expS ~ exp(s))
    +
    +  # additional internal terms for the mixture model formula
    +  kappa_nts <- paste0("kappa", 3:(max_set_size + 1))
    +  theta_nts <- paste0("theta", 3:(max_set_size + 1))
    +  mu_nts <- paste0("mu", 3:(max_set_size + 1))
    +
    +  for (i in 1:(max_set_size - 1)) {
    +    formula <- formula +
    +      glue_nlf("{kappa_nts[i]} ~ kappa") +
    +      glue_nlf("{theta_nts[i]} ~ {lure_idx[i]} * (exp(-expS*{nt_distances[i]})",
    +               " * c + a) + (1 - {lure_idx[i]}) * (-100)") +
    +      glue_nlf("{mu_nts[i]} ~ {nt_features[i]}")
    +  }
    +
    +
    +  # define mixture family
    +  formula$family <- brms::mixture(brms::von_mises("tan_half"),
    +                                  brms::von_mises("identity"),
    +                                  nmix = c(1, max_set_size),
    +                                  order = "none")
    +
    +  nlist(formula, data)
    +}
    +

    The configure_model method should always take the arguments model, data, and formula (as a bmmformula) and return a named list with the formula (as a brmsformula) and the data. The brmsfamily should be stored within the formula.

    +

    Inside the configure_model method the brmsformula is generated using the bmf2bf function. This function converts the bmmformula passed to bmm() function into a brmsformula based on the information for the response variables provided in the bmmmodel object. There is a general method in R/helpers-formula.R to construct the formula for all models with a single response variable.

    # default method for all bmmmodels with 1 response variable
     #' @export
     bmf2bf.bmmmodel <- function(model, formula) {
    @@ -464,82 +567,138 @@ 

    4.2 The Signal Discrimination Model (SDM)

    -

    The SDM model is defined in the file R/bmm_model_SDM.R. The SDM model differs in the configuration compared to the IMM model, as it requires custom STAN code. Let’s go through the different parts. As before, we start with the model definition.

    +

    The SDM model is defined in the file R/model_sdm.R. The SDM model differs in the configuration compared to the IMM model, as it requires custom STAN code. Let’s go through the different parts. As before, we start with the model definition.

    4.2.1 Model definition

    -
    .model_sdmSimple <- function(resp_err, ...) {
    -   out <- list(
    -      resp_vars = nlist(resp_err),
    -      other_vars = nlist(),
    -      info = list(
    -         domain = 'Visual working memory',
    -         task = 'Continuous reproduction',
    -         name = 'Signal Discrimination Model (SDM) by Oberauer (2023)',
    -         citation = paste0('Oberauer, K. (2023). Measurement models for visual working memory - ',
    -                           'A factorial model comparison. Psychological Review, 130(3), 841-852'),
    -         version = 'Simple (no non-targets)',
    -         requirements = '- The response variable should be in radians and represent the angular error relative to the target',
    -         parameters = list(
    -            mu = 'Location parameter of the SDM distribution (in radians; by default fixed internally to 0)',
    -            c = 'Memory strength parameter of the SDM distribution',
    -            kappa = 'Precision parameter of the SDM distribution (log scale)'
    -         ),
    -         fixed_parameters = list(
    -            mu = 0
    -         )),
    -      void_mu = FALSE
    -   )
    -   class(out) <- c('bmmmodel', 'vwm', 'sdmSimple')
    -   out
    -}
    +
    .model_sdm <- function(resp_error = NULL, links = NULL, 
    +                       version = "simple", call = NULL, ...) {
    +  out <- structure(
    +    list(
    +      resp_vars = nlist(resp_error),
    +      other_vars = nlist(),
    +      domain = 'Visual working memory',
    +      task = 'Continuous reproduction',
    +      name = 'Signal Discrimination Model (SDM) by Oberauer (2023)',
    +      citation = glue(
    +        'Oberauer, K. (2023). Measurement models for visual working memory - \\
    +        A factorial model comparison. Psychological Review, 130(3), 841-852'
    +      ),
    +      version = version,
    +      requirements = glue(
    +        '- The response variable should be in radians and represent the angular \\
    +        error relative to the target'
    +      ),
    +      parameters = list(
    +        mu = glue('Location parameter of the SDM distribution (in radians; \\
    +                  by default fixed internally to 0)'),
    +        c = 'Memory strength parameter of the SDM distribution',
    +        kappa = 'Precision parameter of the SDM distribution'
    +      ),
    +      links = list(
    +        mu = 'tan_half',
    +        c = 'log',
    +        kappa = 'log'
    +      ),
    +      fixed_parameters = list(mu = 0),
    +      default_priors = list(
    +        mu = list(main = "student_t(1, 0, 1)"),
    +        kappa = list(main = "student_t(5, 1.75, 0.75)", effects = "normal(0, 1)"),
    +        c = list(main = "student_t(5, 2, 0.75)", effects = "normal(0, 1)")
    +      ),
    +      void_mu = FALSE
    +    ),
    +    class = c('bmmodel', 'vwm', 'sdm', paste0("sdm_", version)),
    +    call = call
    +  )
    +  out$links[names(links)] <- links
    +  out
    +}

    The model definition is similar to the IMM model, but the SDM model only requires the user to specify the response error, but not additional variables such as non-target variables. The class is also different, as the SDM model is not a subclass of the IMM model. We’ll skip the alias for the SDM model, as it is similar for every model.

    4.2.2 check_data() methods

    -

    The SDM does not require special data checks beyond it’s shared class with other vwm models, so we don’t need to define a check_data.sdmSimple method. The check_data.vwm method is defined in the general file R/helpers-data.R.

    +

    The SDM shares a class with other vwm models, so we most of the data checks are performed by check_data.vwm method, defined in the general file R/helpers-data.R. The sdm however, samples much more quickly in Stan, if the data is sorted by the predictor variables, so we have the following custom data check method for the sdm:

    +
    #' @export
    +check_data.sdm <- function(model, data, formula) {
    +  # data sorted by predictors is necessary for speedy computation of normalizing constant
    +  data <- order_data_query(model, data, formula)
    +  NextMethod("check_data")
    +}

    4.2.3 configure_model() methods

    The configure_model method for the SDM model is different compared to the IMM model, as it requires custom STAN code. The method is defined as follows:

    -
    #' @export
    -configure_model.sdmSimple <- function(model, data, formula) {
    -    # construct the family
    -    # note - c has a log link, but I've coded it manually for computational efficiency
    -    sdm_simple <- brms::custom_family(
    -      "sdm_simple", dpars = c("mu", "c","kappa"),
    -      links = c("identity","identity", "log"), lb = c(NA, NA, NA),
    -      type = "real", loop=FALSE,
    -    )
    -    family <- sdm_simple
    -
    -    # prepare initial stanvars to pass to brms, model formula and priors
    -    sc_path <- system.file("stan_chunks", package="bmm")
    -    stan_funs <- read_lines2(paste0(sc_path, '/sdmSimple_funs.stan'))
    -    stan_tdata <- read_lines2(paste0(sc_path, '/sdmSimple_tdata.stan'))
    -    stan_likelihood <- read_lines2(paste0(sc_path, '/sdmSimple_likelihood.stan'))
    -    stanvars <- brms::stanvar(scode = stan_funs, block = "functions") +
    -      brms::stanvar(scode = stan_tdata, block = 'tdata') +
    -      brms::stanvar(scode = stan_likelihood, block = 'likelihood', position ="end")
    -
    -    # construct main brms formula from the bmm formula
    -    bmm_formula <- formula
    -    formula <- bmf2bf(model, bmm_formula)
    -
    -    # construct the default prior
    -    # TODO: for now it just fixes mu to 0, I have to add proper priors
    -    prior <- fixed_pars_priors(model)
    -
    -    # set initial values to be sampled between [-1,1] to avoid extreme SDs that
    -    # can cause the sampler to fail
    -    init = 1
    -
    -    # return the list
    -    out <- nlist(formula, data, family, prior, stanvars, init)
    -    return(out)
    -}
    -

    Lines 5-10 use the brms::custom_family function to define a custom family for the SDM model. The dpars argument specifies the parameters of the model, and the links argument specifies the link functions for the parameters. For more information, see here

    -

    Lines 13-22 read the custom stan code from the inst/stan_chunks directory. This has to be specified with the system.file() command to ensure that the code is found when the package is installed. The stanvars object is used to pass custom stan code to the brms package. The stanvars object is a list of brms::stanvar objects, each of which contains the stan code for a specific part of the model. There is a separate .stan file for each part of the stan code, and each file is read into a separate brms::stanvar object.

    -

    Converting the bmmformula to a brmsformula, specifying priors for fixed parameters, and collecting all arguements is the same as for the IMM model.

    +
    #' @export
    +configure_model.sdm <- function(model, data, formula) {
    +  # construct the family
    +  # note - c has a log link, but I've coded it manually for computational efficiency
    +  sdm_simple <- brms::custom_family(
    +    "sdm_simple",
    +    dpars = c("mu", "c", "kappa"),
    +    links = c("tan_half", "identity", "log"),
    +    lb = c(NA, NA, NA),
    +    ub = c(NA, NA, NA),
    +    type = "real", loop = FALSE,
    +    log_lik = log_lik_sdm_simple,
    +    posterior_predict = posterior_predict_sdm_simple
    +  )
    +
    +  # prepare initial stanvars to pass to brms, model formula and priors
    +  sc_path <- system.file("stan_chunks", package = "bmm")
    +  stan_funs <- read_lines2(paste0(sc_path, "/sdm_simple_funs.stan"))
    +  stan_tdata <- read_lines2(paste0(sc_path, "/sdm_simple_tdata.stan"))
    +  stan_likelihood <- read_lines2(paste0(sc_path, "/sdm_simple_likelihood.stan"))
    +  stanvars <- brms::stanvar(scode = stan_funs, block = "functions") +
    +    brms::stanvar(scode = stan_tdata, block = "tdata") +
    +    brms::stanvar(scode = stan_likelihood, block = "likelihood", position = "end")
    +
    +  # construct main brms formula from the bmm formula
    +  formula <- bmf2bf(model, formula)
    +  formula$family <- sdm_simple
    +
    +  # set initial values to be sampled between [-1,1] to avoid extreme SDs that
    +  # can cause the sampler to fail
    +  init <- 1
    +
    +  # return the list
    +  nlist(formula, data, stanvars, init)
    +}
    +

    Lines 5-14 use the brms::custom_family function to define a custom family for the SDM model. The dpars argument specifies the parameters of the model, and the links argument specifies the link functions for the parameters. For more information, see here

    +

    Lines 17-23 read the custom STAN code from the inst/stan_chunks directory. This has to be specified with the system.file() command to ensure that the code is found when the package is installed. The stanvars object is used to pass custom STAN code to the brms package. The stanvars object is a list of brms::stanvar objects, each of which contains the STAN code for a specific part of the model. There is a separate .stan file for each part of the STAN code, and each file is read into a separate brms::stanvar object.

    +

    Converting the bmmformula to a brmsformula and collecting all arguements is the same as for the IMM model.

    +
    +
    +

    4.2.4 Postprocessing methods

    +

    Unlike the imm model, the sdm model requires some special postprocessing because of the way the link functions are coded. These methods are applied after the brmsfit object is returned, at the very end of the bmm() pipeline:

    +
    #' @export
    +postprocess_brm.sdm <- function(model, fit, ...) {
    +  # manually set link_c to "log" since I coded it manually
    +  fit$family$link_c <- "log"
    +  fit$formula$family$link_c <- "log"
    +  fit
    +}
    +
    +#' @export
    +revert_postprocess_brm.sdm <- function(model, fit, ...) {
    +  fit$family$link_c <- "identity"
    +  fit$formula$family$link_c <- "identity"
    +  fit
    +}
    +

    we also have a couple of special functions for custom families in brms, which allow other typical tools such posterior_predict of bridgesampling to work:

    +
    log_lik_sdm_simple <- function(i, prep) {
    +  mu <- brms::get_dpar(prep, "mu", i = i)
    +  c <- brms::get_dpar(prep, "c", i = i)
    +  kappa <- brms::get_dpar(prep, "kappa", i = i)
    +  y <- prep$data$Y[i]
    +  dsdm(y, mu, c, kappa, log = T)
    +}
    +
    +posterior_predict_sdm_simple <- function(i, prep, ...) {
    +  mu <- brms::get_dpar(prep, "mu", i = i)
    +  c <- brms::get_dpar(prep, "c", i = i)
    +  kappa <- brms::get_dpar(prep, "kappa", i = i)
    +  rsdm(length(mu), mu, c, kappa)
    +}

    We will now look at how to construct all these parts for a new model. Hint: you don’t have to do it manually, you can use the use_model_template() function to generate templates for your model.

    diff --git a/_book/index.html b/_book/index.html index 2f355d2..5954a33 100644 --- a/_book/index.html +++ b/_book/index.html @@ -200,7 +200,9 @@

    BMM Developer Notes

    Overview

    -

    This article aims to help developers contribute new models to bmm. It is a work in progress and will be updated as the package evolves. It explains how to set-up your system for package development, the structure of the package, and the workflow for contributing new models to the package.

    +

    Last update: 26.03.2024

    +

    This guide aims to help developers contribute new models to bmm. It is a work in progress and will be updated as the package evolves. It explains how to set-up your system for package development, the structure of the package, and the workflow for contributing new models to the package.

    +

    The current guide is up to date with bmm v0.5.0 and it might not yet reflect changes implemented afterwards. If you run into problems, don’’t hesitate to open an issue on github.

    We follow a github flow workflow. The repository contains two main branches:

    • Master (contains the latest released stable version of the bmm package)

    • diff --git a/_book/search.json b/_book/search.json index 3ec9fde..3f9bd95 100644 --- a/_book/search.json +++ b/_book/search.json @@ -4,7 +4,7 @@ "href": "index.html", "title": "BMM Developer Notes", "section": "", - "text": "Overview\nThis article aims to help developers contribute new models to bmm. It is a work in progress and will be updated as the package evolves. It explains how to set-up your system for package development, the structure of the package, and the workflow for contributing new models to the package.\nWe follow a github flow workflow. The repository contains two main branches:\n\nMaster (contains the latest released stable version of the bmm package)\nDevelop (contains the latest stable development branch)\n\nAll new feature development should occur on an independent branch from Develop. If you want to contribute a new model to the bmm package, you need to fork the repository, create a new branch for your model, extensively test the model, and eventually submit a pull request for your changes to be merged into the Develop branch of the main repository. Your changes will be reviewed by someone from the core team. Once your changes are merged into the Develop branch, they will be included in the next release of the package." + "text": "Overview\nLast update: 26.03.2024\nThis guide aims to help developers contribute new models to bmm. It is a work in progress and will be updated as the package evolves. It explains how to set-up your system for package development, the structure of the package, and the workflow for contributing new models to the package.\nThe current guide is up to date with bmm v0.5.0 and it might not yet reflect changes implemented afterwards. If you run into problems, don’’t hesitate to open an issue on github.\nWe follow a github flow workflow. The repository contains two main branches:\n\nMaster (contains the latest released stable version of the bmm package)\nDevelop (contains the latest stable development branch)\n\nAll new feature development should occur on an independent branch from Develop. If you want to contribute a new model to the bmm package, you need to fork the repository, create a new branch for your model, extensively test the model, and eventually submit a pull request for your changes to be merged into the Develop branch of the main repository. Your changes will be reviewed by someone from the core team. Once your changes are merged into the Develop branch, they will be included in the next release of the package." }, { "objectID": "setup.html", @@ -28,7 +28,7 @@ "href": "setup.html#package-development-via-rstudio-and-devtools", "title": "1  System setup", "section": "1.2 Package development via RStudio and devtools", - "text": "1.2 Package development via RStudio and devtools\nThe bmm package is setup as an RStudio project. Opening the bmm.Rproj file will open a new RStudio instance, which facilitates package development with a few commands from the devtools package. A great tutorial on package development can be found here. Below is a summary of the most important steps\n\nMake sure you have the devtools package and a few others installed and loaded\ninstall.packages(c(\"devtools\", \"roxygen2\", \"testthat\", \"knitr\"))\nlibrary(devtools)\nTo avoid having to load the package every time, you can add the following code to your .Rprofile file\nif (interactive()) {\n suppressMessages(require(devtools))\n}\nAs noted here, you can create and open an .Rprofile file, if you don’t already have one with\nuse_devtools()\nLoad the current version of the bmm package based on your local files\nload_all() # or ctrl+shift+L\nyou can use this command whenever you make changes to the package code to see the changes in action. You should not call library(bmm) or source the files manually, as this will load the installed version of the package, not the one you are developing.\nMake any changes to the package code that you need to make (elaborated in the next section)\nUse check() to check the package for errors and warnings\ncheck()\nyou should always ensure that check() produces no errors before submitting a pull request\nUse document() to update the documentation\ndocument()" + "text": "1.2 Package development via RStudio and devtools\nThe bmm package is setup as an RStudio project. Opening the bmm.Rproj file will open a new RStudio instance, which facilitates package development with a few commands from the devtools package. A great tutorial on package development can be found here. Below is a summary of the most important steps\n\nMake sure you have the devtools package and a few others installed and loaded\ninstall.packages(c(\"devtools\", \"roxygen2\", \"testthat\", \"knitr\"))\nlibrary(devtools)\ninstall_dev_deps()\nTo avoid having to load the devtools package every time, you can add the following code to your .Rprofile file\nif (interactive()) {\n suppressMessages(require(devtools))\n}\nAs noted here, you can create and open an .Rprofile file, if you don’t already have one with\nuse_devtools()\nLoad the current version of the bmm package based on your local files\nload_all() # or ctrl+shift+L\nyou can use this command whenever you make changes to the package code to see the changes in action. You should not call library(bmm) or source the files manually, as this will load the installed version of the package, not the one you are developing.\nMake any changes to the package code that you need to make (elaborated in the next section)\nUse check() to check the package for errors and warnings\ncheck()\nyou should always ensure that check() produces no errors before submitting a pull request\nUse document() to update the documentation\ndocument()" }, { "objectID": "git-workflow.html", @@ -76,7 +76,7 @@ "href": "bmm-architecture.html#models", "title": "3  BMM code structure", "section": "3.2 Models", - "text": "3.2 Models\nAll models in the package are defined as S3 classes and follow a strict template. This allows us to implement general methods for handling model fitting, data checking, and post-processing. Each model has an internal function that defines the model and its parameters, and a user-facing alias. For a complete example model file and an explanation, see Section 4. The general model template looks like this:\n.model_myNewModel <- function(resp_var1, required_arg1, required_arg2, ...) {\n out <- list(\n resp_vars = nlist(resp_var1),\n other_vars = nlist(required_arg1, required_arg2),\n info = list(\n domain = '',\n task = '',\n name = '',\n citation = '',\n version = '',\n requirements = '',\n parameters = list(),\n fixed_parameters = list()\n ),\n void_mu = FALSE\n )\n class(out) <- c('bmmmodel', 'myNewModel')\n out\n}\nEach model is accompanied by a user-facing alias, the documentation of which is generated automatically based on the info list in the model definition.\n# user facing alias\n# information in the title and details sections will be filled in\n# automatically based on the information in the .model_modelname()$info\n#' @title `r .model_myNewModel()$info$name`\n#' @name Model Name#' @details `r model_info(myNewModel(NA,NA))`\n#' @param resp_var1 A description of the response variable\n#' @param required_arg1 A description of the required argument\n#' @param required_arg2 A description of the required argument\n#' @param ... used internally for testing, ignore it\n#' @return An object of class `bmmmodel`\n#' @export\n#' @examples\n#' \\dontrun{\n#' # put a full example here (see 'R/bmm_model_mixture3p.R' for an example)\n#' }\nmyNewModel <- .model_myNewModel\nThen users can fit the model using the fit_model() function, and the model will be automatically recognized and handled by the package:\nfit <- fit_model(formula, \n data = mydata, \n model = modelname(resp_var1, required_arg1, required_arg2))" + "text": "3.2 Models\nAll models in the package are defined as S3 classes and follow a strict template. This allows us to implement general methods for handling model fitting, data checking, and post-processing. Each model has an internal function that defines the model and its parameters, and a user-facing alias. For a complete example model file and an explanation, see Section 4. The general model template looks like this:\n.model_my_new_model <- function(resp_var1 = NULL, required_args1 = NULL, \n required_arg2 = NULL, links = NULL, version = NULL,\n call = NULL, ...) {\n out <- structure(\n list(\n resp_vars = nlist(resp_error),\n other_vars = nlist(),\n domain = \"\",\n task = \"\",\n name = \"\",\n version = \"\",\n citation = \"\",\n requirements = \"\",\n parameters = list(),\n links = list(),\n fixed_parameters = list(),\n default_priors = list(),\n version = version,\n void_mu = FALSE\n ),\n class = c(\"bmmodel\", \"my_new_model\"),\n call = call\n )\n out$links[names(links)] <- links\n out\n}\nEach model is accompanied by a user-facing alias, the documentation of which is generated automatically based on the info list in the model definition.\n# user facing alias\n# information in the title and details sections will be filled in\n# automatically based on the information in the .model_modelname()$info\n#' @title `r .model_my_new_model()name`\n#' @name Model Name#' @details `r model_info(.model_my_new_model())`\n#' @param resp_var1 A description of the response variable\n#' @param required_arg1 A description of the required argument\n#' @param required_arg2 A description of the required argument\n#' @param ... used internally for testing, ignore it\n#' @return An object of class `bmmmodel`\n#' @export\n#' @examples\n#' \\dontrun{\n#' # put a full example here (see 'R/bmm_model_mixture3p.R' for an example)\n#' }\nmy_new_model <- function(resp_var1, required_arg1, required_arg2, \n links = NULL, version = NULL, ...) {\n call <- match.call()\n stop_missing_args()\n .model_my_new_model(resp_var1 = resp_var1, required_arg1 = required_arg1,\n required_arg2 = required_arg2, links = links, version = version,\n call = call, ...)\n}\nThen users can fit the model using the fit_model() function, and the model will be automatically recognized and handled by the package:\nfit <- bmm(formula = my_bmmformula, \n data = my_data, \n model = my_new_model(resp_var1, required_arg1, required_arg2))" }, { "objectID": "bmm-architecture.html#s3-methods", @@ -90,7 +90,7 @@ "href": "bmm-architecture.html#file-organization", "title": "3  BMM code structure", "section": "3.4 File organization", - "text": "3.4 File organization\nThe bmm package is organized into several files. The main files are:\n\nR/fit_model.R\nIt contains the main function for fitting models, fit_model(). This function is the main entry point for users to fit models. It is set-up to be independent of the specific models that are implemented in the package.\nTo add new models, you do not have to edit this file. The functions above are generic S3 methods, and they will automatically recognize new models if you add appropriate methods for them (see section Adding new models).\n\n\nR/helpers-*.R\nR/helpers-data.R, R/helpers-postprocess.R, R/helpers-model.R, R/helpers-formula.R and R/helpers-prior.R\nThese files define the main generic S3 methods for checking data, postprocessing the fitted model, configuring the model, checking the model formula, and combining priors. They contain the default methods for these functions, which are called by fit_model() if no specific method is defined for a model. If you want to add a new model, you will need to add specific methods for these functions for your model. You do not need to edit these files to add a new model.\n\n\nR/bmm_model_*.R\nEach model and it’s methods is defined in a separate file. For example, the 3-parameter mixture model is defined in bmm_model_mixture3p.R. This file contains the internal function that defines the model and its parameters, and the specific methods for the generic S3 functions. Your new model will exist in a file like this. The name of the file should be bmm_model_name_of_your_model.R. You don’t have to add this file manually - see section Adding new models.\n\n\nR/bmm_distributions.R\nThis file contains the definition of the custom distributions that are used in the package. It specifies the density, random number generation, and probability functions for the custom distributions. If your model requires a custom distribution, you will need to add it to this file. These are not used during model fitting, but can be used to generate data from the model, and to plot the model fit.\n\n\nR/utils.R, R/brms-misc.R\nVarious utility functions.\n\n\ninst/stan_chunks/\nThis directory contains the Stan chunks that are passed to the brms::stanvar() function. These are used to define the custom distributions that are used in the package. If you add a new custom distribution, you will need to add a new Stan chunk to this directory. Each model has several files, one for each corresponding stanvar block." + "text": "3.4 File organization\nThe bmm package is organized into several files. The main files are:\n\nR/bmm.R\nIt contains the main function for fitting models, bmm(). This function is the main entry point for users to fit models. It is set-up to be independent of the specific models that are implemented in the package.\nTo add new models, you do not have to edit this file. The functions above are generic S3 methods, and they will automatically recognize new models if you add appropriate methods for them (see section Adding new models).\n\n\nR/helpers-*.R\nR/helpers-data.R, R/helpers-parameters.R, R/helpers-postprocess.R, R/helpers-model.R, and R/helpers-prior.R\nThese files define the main generic S3 methods for checking data, postprocessing the fitted model, configuring the model, checking the model formula, and combining priors. They contain the default methods for these functions, which are called by bmm() if no specific method is defined for a model. If you want to add a new model, you will need to add specific methods for these functions for your model. You do not need to edit these files to add a new model.\n\n\nR/bmmformula.R\nThis file contains the definition of the bmmformula class, which is used to represent the formula for the model. It contains the bmmformula() function and its alias bmf(), which is used to create a new formula object.\n\n\nR/model_*.R\nEach model and it’s methods is defined in a separate file. For example, the 3-parameter mixture model is defined in model_mixture3p.R. This file contains the internal function that defines the model and its parameters, and the specific methods for the generic S3 functions. Your new model will exist in a file like this. The name of the file should be model_name_of_your_model.R. You don’t have to add this file manually - see section Adding new models.\n\n\nR/distributions.R\nThis file contains the definition of the custom distributions that are used in the package. It specifies the density, random number generation, and probability functions for the custom distributions. If your model requires a custom distribution, you will need to add it to this file. These are not used during model fitting, but can be used to generate data from the model, and to plot the model fit.\n\n\nR/utils.R, R/brms-misc.R, R/restructure.R, R/summary.R, R/update.R\nVarious utility functions.\n\n\ninst/stan_chunks/\nThis directory contains the Stan chunks that are passed to the brms::stanvar() function. These are used to define the custom distributions that are used in the package. If you add a new custom distribution, you will need to add a new Stan chunk to this directory. Each model has several files, one for each corresponding stanvar block." }, { "objectID": "example-model.html", @@ -107,14 +107,14 @@ "href": "example-model.html#the-interference-measurement-model-imm", "title": "4  Example model file", "section": "4.1 The Interference Measurement Model (IMM)", - "text": "4.1 The Interference Measurement Model (IMM)\nThe model is defined in the file R/bmm_model_IMM.R. Let’s go through the different parts.\n\n4.1.1 Model definition\nThe full IMM model is defined in the following internal model class:\n.model_IMMfull <- function(resp_err, nt_features, nt_distance, setsize, ...) {\n out <- list(\n resp_vars = nlist(resp_err),\n other_vars = nlist(nt_features, nt_distance, setsize),\n info = list(\n domain = \"Visual working memory\",\n task = \"Continuous reproduction\",\n name = \"Interference measurement model by Oberauer and Lin (2017).\",\n version = \"full\",\n citation = paste0(\"Oberauer, K., & Lin, H.Y. (2017). An interference model \",\n \"of visual working memory. Psychological Review, 124(1), 21-59\"),\n requirements = paste0('- The response vairable should be in radians and ',\n 'represent the angular error relative to the target\\n ',\n '- The non-target features should be in radians and be ',\n 'centered relative to the target'),\n parameters = list(\n mu1 = paste0(\"Location parameter of the von Mises distribution for memory responses\",\n \"(in radians). Fixed internally to 0 by default.\"),\n kappa = \"Concentration parameter of the von Mises distribution (log scale)\",\n a = \"General activation of memory items\",\n c = \"Context activation\",\n s = \"Spatial similarity gradient\"\n ),\n fixed_parameters = list(\n mu1 = 0\n )),\n void_mu = FALSE\n )\n class(out) <- c(\"bmmmodel\",\"vwm\",\"nontargets\",\"IMMspatial\",\"IMMfull\")\n out\n}\nHere is a brief explanation of the different components of the model definition:\nresp_vars: a list of response variables that the model will be fitted to. These variables will be used to construct the brmsformula passed to brms together with the bmmformula and the parameters of the model. The user has to provide these variables in the data frame that is passed to the fit_model function\nother_vars: a list of additional variables that are required for the model. This is used to check if the data contains all necessary information for fitting the model. In the example above, the IMM model requires the names of the variables specifying the non-target features relative to the target, the variables specifying the distance of the non-targets to the target, and the setsize. The user has to provide these variables in the data frame that is passed to the fit_model function\ninfo: contains information about the model, such as the domain, task, name, citation, version, requirements, and parameters. This information is used to check if the bmmformula contains linear model formulas for all model parameters, and also specify defaults for fixed_parameters. In addition, the info is used to generate the documentation for the model.\nvoid_mu: For models using a custom family that do not contain a location or mu parameter, for example the diffusion model, we recommend setting up a void_mu parameter. This avoids arbitrarily using one of the model parameters as the mu parameter.\nclass: is the most important part. It contains the class of the model. This is used by generic S3 methods to perform data checks and model configuration. The classes should be ordered from most general to most specific. A general class exists when the same operations can be performed on multiple models. For example, the ‘3p’, ‘IMMabc’, ‘IMMbsc’ and ‘IMMfull’ models all have non-targets and setsize arguments, so the same data checks can be performed on all of them, represented by the class nontargets. The first class should always be bmmmodel, which is the main class for all models. The last class should be the specific model name, in this case IMMfull.\n\n\n4.1.2 Model alias\nThe model alias is a user-facing function that calls the internal model function. It is defined as follows:\n# user facing alias\n\n#' @title `r .model_IMMfull(NA, NA, NA, NA)$info$name`\n#' @name IMM\n#' @details `r model_info(IMMfull(NA, NA, NA, NA), components =c('domain', 'task', 'name', 'citation'))`\n#' #### Version: `IMMfull`\n#' `r model_info(IMMfull(NA, NA, NA, NA), components =c('requirements', 'parameters'))`\n#' #### Version: `IMMbsc`\n#' `r model_info(IMMbsc(NA, NA, NA, NA), components =c('requirements', 'parameters'))`\n#' #### Version: `IMMabc`\n#' `r model_info(IMMabc(NA, NA, NA), components =c('requirements', 'parameters'))`\n#'\n#' Additionally, all IMM models have an internal parameter that is fixed to 0 to\n#' allow the model to be identifiable. This parameter is not estimated and is not\n#' included in the model formula. The parameter is:\n#'\n#' - b = \"Background activation (internally fixed to 0)\"\n#'\n#' @param resp_err The name of the variable in the provided dataset containing the\n#' response error. The response Error should code the response relative to the to-be-recalled\n#' target in radians. You can transform the response error in degrees to radian using the `deg2rad` function.\n#' @param nt_features A character vector with the names of the non-target variables.\n#' The non_target variables should be in radians and be centered relative to the\n#' target.\n#' @param nt_distance A vector of names of the columns containing the distances of\n#' non-target items to the target item. Only necessary for the `IMMbsc` and `IMMfull` models\n#' @param setsize Name of the column containing the set size variable (if\n#' setsize varies) or a numeric value for the setsize, if the setsize is\n#' fixed.\n#' @param ... used internally for testing, ignore it\n#' @return An object of class `bmmmodel`\n#' @keywords bmmmodel\n#' @export\nIMMfull <- .model_IMMfull\n\n#' @rdname IMM\n#' @keywords bmmmodel\n#' @export\nIMMbsc <- .model_IMMbsc\n\n#' @rdname IMM\n#' @keywords bmmmodel\n#' @export\nIMMabc <- .model_IMMabc\nThe details will be filled out automatically from the model definition. The example for the IMM model also includes the aliases for the other versions of the IMM model, which are IMMbsc and IMMabc, and does some fancy formatting to include documentation about all versions of the model in the same help file.\n\n\n4.1.3 check_data() methods\nEach model should have a check_data.modelname() method that checks if the data contains all necessary information for fitting the model. For the IMM, all three versions of the model share the same data requirements, so check_data is defined for the more general class, IMMspatial. The method is defined as follows:\ncheck_data.IMMspatial <- function(model, data, formula) {\n nt_distance <- model$other_vars$nt_distance\n max_setsize <- attr(data, 'max_setsize')\n\n if (length(nt_distance) < max_setsize - 1) {\n stop(paste0(\"The number of columns for spatial positions in the argument \",\n \"'nt_distance' is less than max(setsize)-1\"))\n } else if (length(nt_distance) > max_setsize - 1) {\n stop(paste0(\"The number of columns for spatial positions in the argument \",\n \"'nt_distance' is more than max(setsize)-1\"))\n }\n\n if (any(data[,nt_distance] < 0)) {\n stop('Somve values of the spatial distance variables in the data are negative.\\n\n All spatial distances to the target need to be postive.')\n }\n\n data = NextMethod(\"check_data\")\n\n return(data)\n}\nThe IMM models share methods with the mixture3p model, all of which are of class nontargets so the check_data.nontargets method is defined in the general file R/helpers-data.R. If you are adding a new model, you should check if the data requirements are similar to any existing model and define the check_data method only for the methods that are unique to your model.\nThe check_data.mymodel() function should always take the arguments model, data, and formula and return the data with the necessary transformations. It should also call data = NextMethod(\"check_data\") to call the check_data method of the more general class.\n\n\n4.1.4 configure_model() methods\nThe configure_model.mymodel() method is where you specify the model formula, the family, any custom code, and the priors. The method is defined as follows for the IMM model:\n(we show only the IMMfull version)\nconfigure_model.IMMfull <- function(model, data, formula) {\n # retrieve arguments from the data check\n max_setsize <- attr(data, 'max_setsize')\n lure_idx_vars <- attr(data, \"lure_idx_vars\")\n nt_features <- model$other_vars$nt_features\n setsize_var <- model$other_vars$setsize\n nt_distance <- model$other_vars$nt_distance\n\n # construct main brms formula from the bmm formula\n bmm_formula <- formula\n formula <- bmf2bf(model, bmm_formula)\n\n # additional internal terms for the mixture model formula\n kappa_nts <- paste0('kappa', 2:max_setsize)\n kappa_unif <- paste0('kappa',max_setsize + 1)\n theta_nts <- paste0('theta',2:max_setsize)\n mu_nts <- paste0('mu', 2:max_setsize)\n mu_unif <- paste0('mu', max_setsize + 1)\n\n formula <- formula +\n glue_lf(kappa_unif,' ~ 1') +\n glue_lf(mu_unif, ' ~ 1') +\n brms::nlf(theta1 ~ c + a) +\n brms::nlf(kappa1 ~ kappa) +\n brms::nlf(expS ~ exp(s))\n\n for (i in 1:(max_setsize - 1)) {\n formula <- formula +\n glue_nlf(kappa_nts[i], ' ~ kappa') +\n glue_nlf(theta_nts[i], ' ~ ', lure_idx_vars[i], '*(exp(-expS*',nt_distance[i],')*c + a) + ',\n '(1-', lure_idx_vars[i], ')*(-100)') +\n glue_nlf(mu_nts[i], ' ~ ', nt_features[i])\n }\n\n # define mixture family\n vm_list = lapply(1:(max_setsize + 1), function(x) brms::von_mises(link = \"identity\"))\n vm_list$order = \"none\"\n family <- brms::do_call(brms::mixture, vm_list)\n\n # define prior\n additional_constants <- list()\n additional_constants[[kappa_unif]] <- -100\n additional_constants[[mu_unif]] <- 0\n prior <- fixed_pars_priors(model, additional_constants) +\n brms::prior_(\"normal(2, 1)\", class = \"b\", nlpar = \"kappa\") +\n brms::prior_(\"normal(0, 1)\", class = \"b\", nlpar = \"c\") +\n brms::prior_(\"normal(0, 1)\", class = \"b\", nlpar = \"a\") +\n brms::prior_(\"normal(0, 1)\", class = \"b\", nlpar = \"s\")\n\n # if there is setsize 1 in the data, set constant prior over thetant for setsize1\n if ((1 %in% data$ss_numeric) && !is.numeric(data[[setsize_var]])) {\n prior <- prior +\n brms::prior_(\"constant(0)\", class = \"b\", coef = paste0(setsize_var, 1), nlpar = \"a\") +\n brms::prior_(\"constant(0)\", class = \"b\", coef = paste0(setsize_var, 1), nlpar = \"s\")\n }\n\n out <- nlist(formula, data, family, prior)\n return(out)\n}\nThe configure_model method should always take the arguments model, data, and formula (as a bmmformula) and return a named list with the formula (as a brmsformula), the data, the family, and the priors.\nInside the configure_model method the brmsformula is generated using the bmf2bf function. This function converts the bmmformula passed to fit_model function into a brmsformula based on the information for the response variables provided in the bmmmodel object. There is a general method in R/helpers-formula.R to construct the formula for all models with a single response variable.\n# default method for all bmmmodels with 1 response variable\n#' @export\nbmf2bf.bmmmodel <- function(model, formula) {\n # check if the model has only one response variable and extract if TRUE\n resp <- model$resp_vars\n if (length(resp) > 1) {\n formula <- NextMethod(\"bmf2bf\")\n return(formula)\n }\n resp <- resp[[1]]\n\n # set base brms formula based on response\n brms_formula <- brms::bf(paste0(resp, \"~ 1\"))\n\n # for each dependent parameter, check if it is used as a non-linear predictor of\n # another parameter and add the corresponding brms function\n dpars <- names(formula)\n for (dpar in dpars) {\n pform <- formula[[dpar]]\n predictors <- rhs_vars(pform)\n if (any(predictors %in% dpars)) {\n brms_formula <- brms_formula + brms::nlf(pform)\n } else {\n brms_formula <- brms_formula + brms::lf(pform)\n }\n }\n brms_formula\n}\nFor models with more than one response variable, you will have to provide a model specific method of bmf2bf.myModel to convert the bmmformula into the brmsformula . This is done to avoid users having to specify complicated and long formulas specifying all additional response information in the brmsformula themselves. For more detailed information on the use of additional response information in a brmsformula please see the brmsformula documentation." + "text": "4.1 The Interference Measurement Model (IMM)\nThe model is defined in the file R/model_imm.R. Let’s go through the different parts.\n\n4.1.1 Model definition\nThe full IMM model is defined in the following internal model class:\n.model_imm <-\n function(resp_error = NULL, nt_features = NULL, nt_distances = NULL,\n set_size = NULL, regex = FALSE, links = NULL, version = \"full\",\n call = NULL, ...) {\n out <- structure(\n list(\n resp_vars = nlist(resp_error),\n other_vars = nlist(nt_features, nt_distances, set_size),\n domain = \"Visual working memory\",\n task = \"Continuous reproduction\",\n name = \"Interference measurement model by Oberauer and Lin (2017).\",\n version = version,\n citation = glue(\n \"Oberauer, K., & Lin, H.Y. (2017). An interference model \\\\\n of visual working memory. Psychological Review, 124(1), 21-59\"\n ),\n requirements = glue(\n '- The response vairable should be in radians and \\\\\n represent the angular error relative to the target\n - The non-target features should be in radians and be \\\\\n centered relative to the target'\n ),\n parameters = list(\n mu1 = glue(\n \"Location parameter of the von Mises distribution for memory \\\\\n responses (in radians). Fixed internally to 0 by default.\"\n ),\n kappa = \"Concentration parameter of the von Mises distribution\",\n a = \"General activation of memory items\",\n c = \"Context activation\",\n s = \"Spatial similarity gradient\"\n ),\n links = list(\n mu1 = \"tan_half\",\n kappa = \"log\",\n a = \"identity\",\n c = \"identity\",\n s = \"log\"\n ),\n fixed_parameters = list(mu1 = 0, mu2 = 0, kappa2 = -100),\n default_priors = list(\n mu1 = list(main = \"student_t(1, 0, 1)\"),\n kappa = list(main = \"normal(2, 1)\", effects = \"normal(0, 1)\"),\n a = list(main = \"normal(0, 1)\", effects = \"normal(0, 1)\"),\n c = list(main = \"normal(0, 1)\", effects = \"normal(0, 1)\"),\n s = list(main = \"normal(0, 1)\", effects = \"normal(0, 1)\")\n ),\n void_mu = FALSE\n ),\n # attributes\n regex = regex,\n regex_vars = c('nt_features', 'nt_distances'),\n class = c(\"bmmodel\", \"vwm\", \"non_targets\", \"imm\", paste0('imm_',version)),\n call = call\n )\n\n # add version specific information\n if (version == \"abc\") {\n out$parameters$s <- NULL\n out$links$s <- NULL\n out$default_priors$s <- NULL\n attributes(out)$regex_vars <- c('nt_features')\n } else if (version == \"bsc\") {\n out$parameters$a <- NULL\n out$links$a <- NULL\n out$default_priors$a <- NULL\n }\n\n out$links[names(links)] <- links\n out\n }\nHere is a brief explanation of the different components of the model definition:\nresp_vars: a list of response variables that the model will be fitted to. These variables will be used to construct the brmsformula passed to brms together with the bmmformula and the parameters of the model. The user has to provide these variables in the data frame that is passed to the bmm() function\nother_vars: a list of additional variables that are required for the model. This is used to check if the data contains all necessary information for fitting the model. In the example above, the IMM model requires the names of the variables specifying the non-target features relative to the target, the variables specifying the distance of the non-targets to the target, and the set_size. The user has to provide these variables in the data frame that is passed to the bmm() function\ndomain, task, name, citation, requirements: contains information about the model, such as the domain, task, name, citation, requirements. This information is used for generating help pages\nversion: if the model has multiple versions, this argument is specified by the user. Then it is used to dynamically adjust some information in the model object. In the case of the imm model, we have three versions - full, bsc and abc. As you can see at the end of the script, some parameters are deleted depending on the model version.\nparameters: contains a named list of all parameters in the model that can be estimated by the user and their description. This information is used internally to check if the bmmformula contains linear model formulas for all model parameters, and to decide what information to include in the summary of bmmfit objects.\nlinks: a named list providing the link function for each parameter. For example, kappa in the imm models has to be positive, so it is sampled on the log scale. This information is used in defining the model family and for the summary methods. If you want the user to be able to specify custom link functions, the next to last line of the script replaces the links with those provided by the user\nfixed_parameters in the imm several parameters are fixed to constant values internally to identify the model. Only one of them, mu1 is also part of the parameters block - this is the only fixed parameters that users can choose to estimate instead of leaving it fixed. mu2 and kappa2 cannot be freely estimated.\ndefault_priors a list of lists for each parameter in the model. Each prior has two components: main, the prior that will be put on the Intercept or on each level of a factor if the intercept is suppressed; effects, the prior to put on the regression coefficients relative to the intercept. The priors are described as in the set_prior function from brms. This information is used by the configure_prior() S3 method to automatically set the default priors for the model. The priors that you put here will be used by bmm() unless the users chooses to overwrite them.\nvoid_mu: For models using a custom family that do not contain a location or mu parameter, for example the diffusion model, we recommend setting up a void_mu parameter. This avoids arbitrarily using one of the model parameters as the mu parameter.\nregex: For the imm models, the nt_features and nt_distances variables can be specified with regular expressions, if the user sets regex = TRUE\ncall: this automatically records how the model was called so that the call can be printed in the summary after fitting. Leave it as is.\nclass: is the most important part. It contains the class of the model. This is used by generic S3 methods to perform data checks and model configuration. The classes should be ordered from most general to most specific. A general class exists when the same operations can be performed on multiple models. For example, the ‘3p’, ‘imm_abc’, ‘imm_bsc’ and ‘imm_full’ models all have non-targets and set_size arguments, so the same data checks can be performed on all of them, represented by the class non_targets. The first class should always be bmmodel, which is the main class for all models. The last class should be the specific model name, in this case imm_full, imm_abc or imm_bsc, which is automatically constructed if a version argument is provided. Otherwise the last class will be just the name of the model.\n\n\n4.1.2 Model alias\nThe model alias is a user-facing function that calls the internal model function. It is defined as follows:\n#' @title `r .model_imm()$name`\n#' @description Three versions of the `r .model_imm()$name` - the full, bsc, and abc.\n#' `IMMfull()`, `IMMbsc()`, and `IMMabc()` are deprecated and will be removed in the future.\n#' Please use `imm(version = 'full')`, `imm(version = 'bsc')`, or `imm(version = 'abc')` instead.\n#'\n#' @name imm\n#' @details `r model_info(.model_imm(), components =c('domain', 'task', 'name', 'citation'))`\n#' #### Version: `full`\n#' `r model_info(.model_imm(version = \"full\"), components = c('requirements', 'parameters', 'fixed_parameters', 'links', 'prior'))`\n#' #### Version: `bsc`\n#' `r model_info(.model_imm(version = \"bsc\"), components = c('requirements', 'parameters', 'fixed_parameters', 'links', 'prior'))`\n#' #### Version: `abc`\n#' `r model_info(.model_imm(version = \"abc\"), components =c('requirements', 'parameters', 'fixed_parameters', 'links', 'prior'))`\n#'\n#' Additionally, all imm models have an internal parameter that is fixed to 0 to\n#' allow the model to be identifiable. This parameter is not estimated and is not\n#' included in the model formula. The parameter is:\n#'\n#' - b = \"Background activation (internally fixed to 0)\"\n#'\n#' @param resp_error The name of the variable in the provided dataset containing\n#' the response error. The response Error should code the response relative to\n#' the to-be-recalled target in radians. You can transform the response error\n#' in degrees to radian using the `deg2rad` function.\n#' @param nt_features A character vector with the names of the non-target\n#' variables. The non_target variables should be in radians and be centered\n#' relative to the target. Alternatively, if regex=TRUE, a regular\n#' expression can be used to match the non-target feature columns in the\n#' dataset.\n#' @param nt_distances A vector of names of the columns containing the distances\n#' of non-target items to the target item. Alternatively, if regex=TRUE, a regular\n#' expression can be used to match the non-target distances columns in the\n#' dataset. Only necessary for the `bsc` and `full` versions.\n#' @param set_size Name of the column containing the set size variable (if\n#' set_size varies) or a numeric value for the set_size, if the set_size is\n#' fixed.\n#' @param regex Logical. If TRUE, the `nt_features` and `nt_distances` arguments\n#' are interpreted as a regular expression to match the non-target feature\n#' columns in the dataset.\n#' @param links A list of links for the parameters. *Currently does not affect\n#' the model fits, but it will in the future.*\n#' @param version Character. The version of the IMM model to use. Can be one of\n#' `full`, `bsc`, or `abc`. The default is `full`.\n#' @param ... used internally for testing, ignore it\n#' @return An object of class `bmmodel`\n#' @keywords bmmodel\n#' @examples\n#' \\dontrun{\n#' # load data\n#' data <- oberauer_lin_2017\n#'\n#' # define formula\n#' ff <- bmmformula(\n#' kappa ~ 0 + set_size,\n#' c ~ 0 + set_size,\n#' a ~ 0 + set_size,\n#' s ~ 0 + set_size\n#' )\n#'\n#' # specify the full IMM model with explicit column names for non-target features and distances\n#' # by default this fits the full version of the model\n#' model1 <- imm(resp_error = \"dev_rad\",\n#' nt_features = paste0('col_nt', 1:7),\n#' nt_distances = paste0('dist_nt', 1:7),\n#' set_size = 'set_size')\n#'\n#' # fit the model\n#' fit <- bmm(formula = ff,\n#' data = data,\n#' model = model1,\n#' cores = 4,\n#' backend = 'cmdstanr')\n#'\n#' # alternatively specify the IMM model with a regular expression to match non-target features\n#' # this is equivalent to the previous call, but more concise\n#' model2 <- imm(resp_error = \"dev_rad\",\n#' nt_features = 'col_nt',\n#' nt_distances = 'dist_nt',\n#' set_size = 'set_size',\n#' regex = TRUE)\n#'\n#' # fit the model\n#' fit <- bmm(formula = ff,\n#' data = data,\n#' model = model2,\n#' cores = 4,\n#' backend = 'cmdstanr')\n#'\n#' # you can also specify the `bsc` or `abc` versions of the model to fit a reduced version\n#' model3 <- imm(resp_error = \"dev_rad\",\n#' nt_features = 'col_nt',\n#' set_size = 'set_size',\n#' regex = TRUE,\n#' version = 'abc')\n#' fit <- bmm(formula = ff,\n#' data = data,\n#' model = model3,\n#' cores = 4,\n#' backend = 'cmdstanr')\n#'}\n#' @export\nimm <- function(resp_error, nt_features, nt_distances, set_size, regex = FALSE,\n links = NULL, version = \"full\", ...) {\n call <- match.call()\n dots <- list(...)\n if (version == \"abc\") {\n nt_distances <- NULL\n }\n stop_missing_args()\n .model_imm(resp_error = resp_error, nt_features = nt_features,\n nt_distances = nt_distances, set_size = set_size, regex = regex,\n links = links, version = version, call = call, ...)\n}\nThe details will be filled out automatically from the model definition. This does some fancy formatting to include documentation about all versions of the model in the same help file.\n\n\n4.1.3 check_data() methods\nEach model should have a check_data.modelname() method that checks if the data contains all necessary information for fitting the model. For the IMM, the bsc and full versions require a special check for the nt_distances variables:\n#' @export\ncheck_data.imm_bsc <- function(model, data, formula) {\n data <- .check_data_imm_dist(model, data, formula)\n NextMethod(\"check_data\")\n}\n\n#' @export\ncheck_data.imm_full <- function(model, data, formula) {\n data <- .check_data_imm_dist(model, data, formula)\n NextMethod(\"check_data\")\n}\n\n.check_data_imm_dist <- function(model, data, formula) {\n nt_distances <- model$other_vars$nt_distances\n max_set_size <- attr(data, 'max_set_size')\n\n stopif(!isTRUE(all.equal(length(nt_distances), max_set_size - 1)),\n \"The number of columns for non-target distances in the argument \\\\\n 'nt_distances' should equal max(set_size)-1})\")\n\n # replace nt_distances\n data[,nt_distances][is.na(data[,nt_distances])] <- 999\n\n stopif(any(data[,nt_distances] < 0),\n \"All non-target distances to the target need to be postive.\")\n data\n}\nThe IMM models share methods with the mixture3p model, all of which are of class non_targets so the check_data.non_targets method is defined in the general file R/helpers-data.R. If you are adding a new model, you should check if the data requirements are similar to any existing model and define the check_data method only for the methods that are unique to your model.\nThe check_data.mymodel() function should always take the arguments model, data, and formula and return the data with the necessary transformations. It should also call data = NextMethod(\"check_data\") to call the check_data method of the more general class.\n\n\n4.1.4 configure_model() methods\nThe configure_model.mymodel() method is where you specify the model formula, the family, any custom code. The method is defined as follows for the IMM model:\n(we show only the IMMfull version)\n#' @export\nconfigure_model.imm_full <- function(model, data, formula) {\n # retrieve arguments from the data check\n max_set_size <- attr(data, 'max_set_size')\n lure_idx <- attr(data, \"lure_idx_vars\")\n nt_features <- model$other_vars$nt_features\n set_size_var <- model$other_vars$set_size\n nt_distances <- model$other_vars$nt_distances\n\n # construct main brms formula from the bmm formula\n formula <- bmf2bf(model, formula) +\n brms::lf(kappa2 ~ 1) +\n brms::lf(mu2 ~ 1) +\n brms::nlf(theta1 ~ c + a) +\n brms::nlf(kappa1 ~ kappa) +\n brms::nlf(expS ~ exp(s))\n\n # additional internal terms for the mixture model formula\n kappa_nts <- paste0(\"kappa\", 3:(max_set_size + 1))\n theta_nts <- paste0(\"theta\", 3:(max_set_size + 1))\n mu_nts <- paste0(\"mu\", 3:(max_set_size + 1))\n\n for (i in 1:(max_set_size - 1)) {\n formula <- formula +\n glue_nlf(\"{kappa_nts[i]} ~ kappa\") +\n glue_nlf(\"{theta_nts[i]} ~ {lure_idx[i]} * (exp(-expS*{nt_distances[i]})\",\n \" * c + a) + (1 - {lure_idx[i]}) * (-100)\") +\n glue_nlf(\"{mu_nts[i]} ~ {nt_features[i]}\")\n }\n\n\n # define mixture family\n formula$family <- brms::mixture(brms::von_mises(\"tan_half\"),\n brms::von_mises(\"identity\"),\n nmix = c(1, max_set_size),\n order = \"none\")\n\n nlist(formula, data)\n}\nThe configure_model method should always take the arguments model, data, and formula (as a bmmformula) and return a named list with the formula (as a brmsformula) and the data. The brmsfamily should be stored within the formula.\nInside the configure_model method the brmsformula is generated using the bmf2bf function. This function converts the bmmformula passed to bmm() function into a brmsformula based on the information for the response variables provided in the bmmmodel object. There is a general method in R/helpers-formula.R to construct the formula for all models with a single response variable.\n# default method for all bmmmodels with 1 response variable\n#' @export\nbmf2bf.bmmmodel <- function(model, formula) {\n # check if the model has only one response variable and extract if TRUE\n resp <- model$resp_vars\n if (length(resp) > 1) {\n formula <- NextMethod(\"bmf2bf\")\n return(formula)\n }\n resp <- resp[[1]]\n\n # set base brms formula based on response\n brms_formula <- brms::bf(paste0(resp, \"~ 1\"))\n\n # for each dependent parameter, check if it is used as a non-linear predictor of\n # another parameter and add the corresponding brms function\n dpars <- names(formula)\n for (dpar in dpars) {\n pform <- formula[[dpar]]\n predictors <- rhs_vars(pform)\n if (any(predictors %in% dpars)) {\n brms_formula <- brms_formula + brms::nlf(pform)\n } else {\n brms_formula <- brms_formula + brms::lf(pform)\n }\n }\n brms_formula\n}\nFor models with more than one response variable, you will have to provide a model specific method of bmf2bf.myModel to convert the bmmformula into the brmsformula . This is done to avoid users having to specify complicated and long formulas specifying all additional response information in the brmsformula themselves. For more detailed information on the use of additional response information in a brmsformula please see the brmsformula documentation." }, { "objectID": "example-model.html#the-signal-discrimination-model-sdm", "href": "example-model.html#the-signal-discrimination-model-sdm", "title": "4  Example model file", "section": "4.2 The Signal Discrimination Model (SDM)", - "text": "4.2 The Signal Discrimination Model (SDM)\nThe SDM model is defined in the file R/bmm_model_SDM.R. The SDM model differs in the configuration compared to the IMM model, as it requires custom STAN code. Let’s go through the different parts. As before, we start with the model definition.\n\n4.2.1 Model definition\n.model_sdmSimple <- function(resp_err, ...) {\n out <- list(\n resp_vars = nlist(resp_err),\n other_vars = nlist(),\n info = list(\n domain = 'Visual working memory',\n task = 'Continuous reproduction',\n name = 'Signal Discrimination Model (SDM) by Oberauer (2023)',\n citation = paste0('Oberauer, K. (2023). Measurement models for visual working memory - ',\n 'A factorial model comparison. Psychological Review, 130(3), 841-852'),\n version = 'Simple (no non-targets)',\n requirements = '- The response variable should be in radians and represent the angular error relative to the target',\n parameters = list(\n mu = 'Location parameter of the SDM distribution (in radians; by default fixed internally to 0)',\n c = 'Memory strength parameter of the SDM distribution',\n kappa = 'Precision parameter of the SDM distribution (log scale)'\n ),\n fixed_parameters = list(\n mu = 0\n )),\n void_mu = FALSE\n )\n class(out) <- c('bmmmodel', 'vwm', 'sdmSimple')\n out\n}\nThe model definition is similar to the IMM model, but the SDM model only requires the user to specify the response error, but not additional variables such as non-target variables. The class is also different, as the SDM model is not a subclass of the IMM model. We’ll skip the alias for the SDM model, as it is similar for every model.\n\n\n4.2.2 check_data() methods\nThe SDM does not require special data checks beyond it’s shared class with other vwm models, so we don’t need to define a check_data.sdmSimple method. The check_data.vwm method is defined in the general file R/helpers-data.R.\n\n\n4.2.3 configure_model() methods\nThe configure_model method for the SDM model is different compared to the IMM model, as it requires custom STAN code. The method is defined as follows:\n#' @export\nconfigure_model.sdmSimple <- function(model, data, formula) {\n # construct the family\n # note - c has a log link, but I've coded it manually for computational efficiency\n sdm_simple <- brms::custom_family(\n \"sdm_simple\", dpars = c(\"mu\", \"c\",\"kappa\"),\n links = c(\"identity\",\"identity\", \"log\"), lb = c(NA, NA, NA),\n type = \"real\", loop=FALSE,\n )\n family <- sdm_simple\n\n # prepare initial stanvars to pass to brms, model formula and priors\n sc_path <- system.file(\"stan_chunks\", package=\"bmm\")\n stan_funs <- read_lines2(paste0(sc_path, '/sdmSimple_funs.stan'))\n stan_tdata <- read_lines2(paste0(sc_path, '/sdmSimple_tdata.stan'))\n stan_likelihood <- read_lines2(paste0(sc_path, '/sdmSimple_likelihood.stan'))\n stanvars <- brms::stanvar(scode = stan_funs, block = \"functions\") +\n brms::stanvar(scode = stan_tdata, block = 'tdata') +\n brms::stanvar(scode = stan_likelihood, block = 'likelihood', position =\"end\")\n\n # construct main brms formula from the bmm formula\n bmm_formula <- formula\n formula <- bmf2bf(model, bmm_formula)\n\n # construct the default prior\n # TODO: for now it just fixes mu to 0, I have to add proper priors\n prior <- fixed_pars_priors(model)\n\n # set initial values to be sampled between [-1,1] to avoid extreme SDs that\n # can cause the sampler to fail\n init = 1\n\n # return the list\n out <- nlist(formula, data, family, prior, stanvars, init)\n return(out)\n}\nLines 5-10 use the brms::custom_family function to define a custom family for the SDM model. The dpars argument specifies the parameters of the model, and the links argument specifies the link functions for the parameters. For more information, see here\nLines 13-22 read the custom stan code from the inst/stan_chunks directory. This has to be specified with the system.file() command to ensure that the code is found when the package is installed. The stanvars object is used to pass custom stan code to the brms package. The stanvars object is a list of brms::stanvar objects, each of which contains the stan code for a specific part of the model. There is a separate .stan file for each part of the stan code, and each file is read into a separate brms::stanvar object.\nConverting the bmmformula to a brmsformula, specifying priors for fixed parameters, and collecting all arguements is the same as for the IMM model.\nWe will now look at how to construct all these parts for a new model. Hint: you don’t have to do it manually, you can use the use_model_template() function to generate templates for your model." + "text": "4.2 The Signal Discrimination Model (SDM)\nThe SDM model is defined in the file R/model_sdm.R. The SDM model differs in the configuration compared to the IMM model, as it requires custom STAN code. Let’s go through the different parts. As before, we start with the model definition.\n\n4.2.1 Model definition\n.model_sdm <- function(resp_error = NULL, links = NULL, \n version = \"simple\", call = NULL, ...) {\n out <- structure(\n list(\n resp_vars = nlist(resp_error),\n other_vars = nlist(),\n domain = 'Visual working memory',\n task = 'Continuous reproduction',\n name = 'Signal Discrimination Model (SDM) by Oberauer (2023)',\n citation = glue(\n 'Oberauer, K. (2023). Measurement models for visual working memory - \\\\\n A factorial model comparison. Psychological Review, 130(3), 841-852'\n ),\n version = version,\n requirements = glue(\n '- The response variable should be in radians and represent the angular \\\\\n error relative to the target'\n ),\n parameters = list(\n mu = glue('Location parameter of the SDM distribution (in radians; \\\\\n by default fixed internally to 0)'),\n c = 'Memory strength parameter of the SDM distribution',\n kappa = 'Precision parameter of the SDM distribution'\n ),\n links = list(\n mu = 'tan_half',\n c = 'log',\n kappa = 'log'\n ),\n fixed_parameters = list(mu = 0),\n default_priors = list(\n mu = list(main = \"student_t(1, 0, 1)\"),\n kappa = list(main = \"student_t(5, 1.75, 0.75)\", effects = \"normal(0, 1)\"),\n c = list(main = \"student_t(5, 2, 0.75)\", effects = \"normal(0, 1)\")\n ),\n void_mu = FALSE\n ),\n class = c('bmmodel', 'vwm', 'sdm', paste0(\"sdm_\", version)),\n call = call\n )\n out$links[names(links)] <- links\n out\n}\nThe model definition is similar to the IMM model, but the SDM model only requires the user to specify the response error, but not additional variables such as non-target variables. The class is also different, as the SDM model is not a subclass of the IMM model. We’ll skip the alias for the SDM model, as it is similar for every model.\n\n\n4.2.2 check_data() methods\nThe SDM shares a class with other vwm models, so we most of the data checks are performed by check_data.vwm method, defined in the general file R/helpers-data.R. The sdm however, samples much more quickly in Stan, if the data is sorted by the predictor variables, so we have the following custom data check method for the sdm:\n#' @export\ncheck_data.sdm <- function(model, data, formula) {\n # data sorted by predictors is necessary for speedy computation of normalizing constant\n data <- order_data_query(model, data, formula)\n NextMethod(\"check_data\")\n}\n\n\n4.2.3 configure_model() methods\nThe configure_model method for the SDM model is different compared to the IMM model, as it requires custom STAN code. The method is defined as follows:\n#' @export\nconfigure_model.sdm <- function(model, data, formula) {\n # construct the family\n # note - c has a log link, but I've coded it manually for computational efficiency\n sdm_simple <- brms::custom_family(\n \"sdm_simple\",\n dpars = c(\"mu\", \"c\", \"kappa\"),\n links = c(\"tan_half\", \"identity\", \"log\"),\n lb = c(NA, NA, NA),\n ub = c(NA, NA, NA),\n type = \"real\", loop = FALSE,\n log_lik = log_lik_sdm_simple,\n posterior_predict = posterior_predict_sdm_simple\n )\n\n # prepare initial stanvars to pass to brms, model formula and priors\n sc_path <- system.file(\"stan_chunks\", package = \"bmm\")\n stan_funs <- read_lines2(paste0(sc_path, \"/sdm_simple_funs.stan\"))\n stan_tdata <- read_lines2(paste0(sc_path, \"/sdm_simple_tdata.stan\"))\n stan_likelihood <- read_lines2(paste0(sc_path, \"/sdm_simple_likelihood.stan\"))\n stanvars <- brms::stanvar(scode = stan_funs, block = \"functions\") +\n brms::stanvar(scode = stan_tdata, block = \"tdata\") +\n brms::stanvar(scode = stan_likelihood, block = \"likelihood\", position = \"end\")\n\n # construct main brms formula from the bmm formula\n formula <- bmf2bf(model, formula)\n formula$family <- sdm_simple\n\n # set initial values to be sampled between [-1,1] to avoid extreme SDs that\n # can cause the sampler to fail\n init <- 1\n\n # return the list\n nlist(formula, data, stanvars, init)\n}\nLines 5-14 use the brms::custom_family function to define a custom family for the SDM model. The dpars argument specifies the parameters of the model, and the links argument specifies the link functions for the parameters. For more information, see here\nLines 17-23 read the custom STAN code from the inst/stan_chunks directory. This has to be specified with the system.file() command to ensure that the code is found when the package is installed. The stanvars object is used to pass custom STAN code to the brms package. The stanvars object is a list of brms::stanvar objects, each of which contains the STAN code for a specific part of the model. There is a separate .stan file for each part of the STAN code, and each file is read into a separate brms::stanvar object.\nConverting the bmmformula to a brmsformula and collecting all arguements is the same as for the IMM model.\n\n\n4.2.4 Postprocessing methods\nUnlike the imm model, the sdm model requires some special postprocessing because of the way the link functions are coded. These methods are applied after the brmsfit object is returned, at the very end of the bmm() pipeline:\n#' @export\npostprocess_brm.sdm <- function(model, fit, ...) {\n # manually set link_c to \"log\" since I coded it manually\n fit$family$link_c <- \"log\"\n fit$formula$family$link_c <- \"log\"\n fit\n}\n\n#' @export\nrevert_postprocess_brm.sdm <- function(model, fit, ...) {\n fit$family$link_c <- \"identity\"\n fit$formula$family$link_c <- \"identity\"\n fit\n}\nwe also have a couple of special functions for custom families in brms, which allow other typical tools such posterior_predict of bridgesampling to work:\nlog_lik_sdm_simple <- function(i, prep) {\n mu <- brms::get_dpar(prep, \"mu\", i = i)\n c <- brms::get_dpar(prep, \"c\", i = i)\n kappa <- brms::get_dpar(prep, \"kappa\", i = i)\n y <- prep$data$Y[i]\n dsdm(y, mu, c, kappa, log = T)\n}\n\nposterior_predict_sdm_simple <- function(i, prep, ...) {\n mu <- brms::get_dpar(prep, \"mu\", i = i)\n c <- brms::get_dpar(prep, \"c\", i = i)\n kappa <- brms::get_dpar(prep, \"kappa\", i = i)\n rsdm(length(mu), mu, c, kappa)\n}\nWe will now look at how to construct all these parts for a new model. Hint: you don’t have to do it manually, you can use the use_model_template() function to generate templates for your model." }, { "objectID": "add-new-model.html", @@ -131,14 +131,14 @@ "href": "add-new-model.html#example", "title": "5  Adding a new model", "section": "5.1 Example", - "text": "5.1 Example\nLet’s add a new model called gcm. Let’s assume that you have tested the model in Stan and you have the Stan code ready. We want to define a custom family for the gcm model, and we want to define the following blocks: likelihood, functions (see ?brms::stanvar for an explanation of the blocks).\nFirst you set up your system and git environment as described in Section 1. Then you can run the following code from RStudio in the root directory of the bmm package:\nuse_model_template(\"gcm\", custom_family = TRUE, stanvar_blocks = c(\"likelihood\", \"functions\"))\nThis will create the file bmm_model_gcm.R in the R/ directory and the files gcm_likelihood.stan and gcm_functions.stan in the inst/stan_chunks/ directory. The function will also open the files in RStudio. You will see the following output in the console:\n• Modify 'inst/stan_chunks/gcm_likelihood.stan'\n• Modify 'inst/stan_chunks/gcm_functions.stan'\n• Modify 'R/bmm_model_gcm.R'\nNow you can fill the files with the appropriate code. The stan files will be empty, but the R file will have the following structure:\n#############################################################################!\n# MODELS ####\n#############################################################################!\n# see file 'R/bmm_model_mixture3p.R' for an example\n\n.model_gcm <- function(resp_var1, required_arg1, required_arg2, ...) {\n out <- list(\n resp_vars = nlist(resp_var1),\n other_vars = nlist(required_arg1, required_arg2),\n info = list(\n domain = '',\n task = '',\n name = '',\n citation = '',\n version = '',\n requirements = '',\n parameters = list(),\n fixed_parameters = list()\n ),\n void_mu = FALSE\n )\n class(out) <- c('bmmmodel', 'gcm')\n out\n}\n# user facing alias\n# information in the title and details sections will be filled in\n# automatically based on the information in the .model_gcm()$info\n \n#' @title `r .model_gcm()$info$name`\n#' @name Model Name#' @details `r model_info(gcm(NA,NA))`\n#' @param resp_var1 A description of the response variable\n#' @param required_arg1 A description of the required argument\n#' @param required_arg2 A description of the required argument\n#' @param ... used internally for testing, ignore it\n#' @return An object of class `bmmmodel`\n#' @export\n#' @examples\n#' \\dontrun{\n#' # put a full example here (see 'R/bmm_model_mixture3p.R' for an example)\n#' }\ngcm <- .model_gcm\n\n\n#############################################################################!\n# CHECK_DATA S3 methods ####\n#############################################################################!\n# A check_data.* function should be defined for each class of the model.\n# If a model shares methods with other models, the shared methods should be\n# defined in data-helpers.R. Put here only the methods that are specific to\n# the model. See ?check_data for details\n\n#' @export\ncheck_data.gcm <- function(model, data, formula) {\n # retrieve required arguments\n required_arg1 <- model$other_vars$required_arg1\n required_arg2 <- model$other_vars$required_arg2\n\n # check the data (required)\n\n\n # compute any necessary transformations (optional)\n\n # save some variables as attributes of the data for later use (optional)\n\n data = NextMethod('check_data')\n\n return(data)\n}\n\n\n#############################################################################!\n# Convert bmmformula to brmsformla methods ####\n#############################################################################!\n# A bmf2bf.* function should be defined if the default method for consructing\n# the brmsformula from the bmmformula does not apply\n# The shared method for all `bmmmodels` is defined in helpers-formula.R.\n# See ?bmf2bf for details.\n\n#' @export\nbmf2bf.gcm <- function(model, formula) {\n # retrieve required response arguments\n resp_var1 <- model$resp_vars$resp_var1\n resp_var2 <- model$resp_vars$resp_arg2\n\n # set the base brmsformula based \n brms_formula <- brms::bf(paste0(resp_var1,\" | \", vreal(resp_var2), \" ~ 1\" ),)\n\n # add bmmformula to the brms_formula\n # check if parameters are used as non-linear predictors in other formulas\n # and use the brms::lf() or brms::nlf() accordingly.\n dpars <- names(formula)\n for (dpar in dpars) {\n pform <- formula[[dpar]]\n predictors <- rhs_vars(pform)\n if (any(predictors %in% dpars)) {\n brms_formula <- brms_formula + brms::nlf(pform)\n } else {\n brms_formula <- brms_formula + brms::lf(pform)\n }\n }\n\n return(brms_formula)\n}\n\n\n#############################################################################!\n# CONFIGURE_MODEL S3 METHODS ####\n#############################################################################!\n# Each model should have a corresponding configure_model.* function. See\n# ?configure_model for more information.\n\n#' @export\nconfigure_model.gcm <- function(model, data, formula) {\n # retrieve required arguments\n required_arg1 <- model$other_vars$required_arg1\n required_arg2 <- model$other_vars$required_arg2\n\n # retrieve arguments from the data check\n my_precomputed_var <- attr(data, 'my_precomputed_var')\n\n # construct brms formula from the bmm formula\n bmm_formula <- formula\n formula <- bmf2bf(model, bmm_formula)\n\n # construct the family\n gcm_family <- brms::custom_family(\n 'gcm',\n dpars = c(),\n links = c(),\n lb = c(), # upper bounds for parameters\n ub = c(), # lower bounds for parameters\n type = '', # real for continous dv, int for discrete dv\n loop = TRUE, # is the likelihood vectorized\n )\n family <- gcm_family\n\n # prepare initial stanvars to pass to brms, model formula and priors\n sc_path <- system.file('stan_chunks', package='bmm')\n stan_likelihood <- readChar(paste0(sc_path, '/gcm_likelihood.stan'),\n file.info(paste0(sc_path, '/gcm_likelihood.stan'))$size)\n stan_functions <- readChar(paste0(sc_path, '/gcm_functions.stan'),\n file.info(paste0(sc_path, '/gcm_functions.stan'))$size)\n\n stanvars <- stanvar(scode = stan_likelihood, block = 'likelihood') +\n stanvar(scode = stan_functions, block = 'functions')\n\n # construct the default prior\n prior <- NULL\n\n # return the list\n out <- nlist(formula, data, family, prior, stanvars)\n return(out)\n}\n\n\n#############################################################################!\n# POSTPROCESS METHODS ####\n#############################################################################!\n# A postprocess_brm.* function should be defined for the model class. See \n# ?postprocess_brm for details\n\n#' @export\npostprocess_brm.gcm <- function(model, fit) {\n # any required postprocessing (if none, delete this section)\n\n return(fit)\n}\nNow you have to:\n\nFill the .model_gcm function with the appropriate code. This function should return a list with the variables that the model needs and a list with information about the model. The class of the list should be c('bmmmodel', 'gcm'). The information list should contain the following elements: domain, task, name, citation, version, requirements, and parameters. Rename the response arguements and the other required arguments, or delete the other arguments if you do not have any. You can see an example in the bmm_model_sdmSimple.R file.\nAdjust the user-facing alias. Here you should only rename the required arguments and fill in the @examples section with a full example. Everything else will be filled in automatically based on the information in the .model_gcm function.\nFill the check_data.gcm function with the appropriate code. This function should check the data and return the data. You may or may not need to compute any transformations or save some variables as attributes of the data.\nIf necessary define the bmf2bf.gcm method to convert the bmmformula to a brmsformula. The first step for this is always to specify the response variable and additional response information. Keep in mind that brms automatically interprets this formula as the linear model formula for the mu parameter of your custom family. Currently, brms requires all custom families to have a mu parameter. However, we recommend to code this parameter as a void_mu, and fix the intercept of this parameter to zero using constant priors. This way, the bmmformula can be used to only specify the linear or non-linear model for the parameters of a bmmmodel.\nFill the configure_model.gcm function with the appropriate code. This function should construct the formula, the family, the stanvars, and the prior. You can also retrieve any arguments you saved from the data check. Depending on your model, some of these parts might not be necessary. For example, for the mixture models (e.g. mixture3p), we construct a new formula, because we want to rename the arguments to make it easier for the user. For the sdmSimple model, we define the family ourselves, so we don’t need to change the formula.\nYou need to fill information about your custom family, and then fill the STAN files with your STAN code. Conveniently, you don’t have to edit lines 137-145: loading the STAN files and setting up the stanvars is set up automatically when calling the use_model_template function. Should you need to add more STAN files after you created the template, you can add the files in init/stan_chunks/ manually and edit those lines to additionally load the manually added files.\nFill the postprocess_brm.gcm function with the appropriate code. By postprocessing, we mean changes to the fitted brms model - like renaming parameters, etc. If you don’t need any postprocessing, you can delete this section." + "text": "5.1 Example\nLet’s add a new model called gcm. Let’s assume that you have tested the model in Stan and you have the Stan code ready. We want to define a custom family for the gcm model, and we want to define the following blocks: likelihood, functions (see ?brms::stanvar for an explanation of the blocks).\nFirst you set up your system and git environment as described in Section 1. Then you can run the following code from RStudio in the root directory of the bmm package:\nuse_model_template(\"gcm\", custom_family = TRUE, stanvar_blocks = c(\"likelihood\", \"functions\"))\nThis will create the file bmm_model_gcm.R in the R/ directory and the files gcm_likelihood.stan and gcm_functions.stan in the inst/stan_chunks/ directory. The function will also open the files in RStudio. You will see the following output in the console:\n• Modify 'inst/stan_chunks/gcm_likelihood.stan'\n• Modify 'inst/stan_chunks/gcm_functions.stan'\n• Modify 'R/model_gcm.R'\nNow you can fill the files with the appropriate code. The stan files will be empty, but the R file will have the following structure:\n#############################################################################!\n# MODELS ####\n#############################################################################!\n# see file 'R/bmm_model_mixture3p.R' for an example\n\n.model_gcm <- function(resp_var1 = NULL, required_arg1 = NULL, required_arg2 = NULL, \n links = NULL, version = NULL, call = NULL, ...) {\n out <- structure(\n list(\n resp_vars = nlist(resp_var1),\n other_vars = nlist(required_arg1, required_arg2),\n domain = '',\n task = '',\n name = '',\n citation = '',\n version = version,\n requirements = '',\n parameters = list(),\n links = list(),\n fixed_parameters = list(),\n default_priors = list(par1 = list(), par2 = list()),\n void_mu = FALSE\n ),\n class = c('bmmodel', 'gcm'),\n call = call\n )\n if(!is.null(version)) class(out) <- c(class(out), paste0(\"gcm_\",version))\n out$links[names(links)] <- links\n out\n}\n# user facing alias\n# information in the title and details sections will be filled in\n# automatically based on the information in the .model_gcm()$info\n \n#' @title `r .model_gcm()$name`\n#' @name Model Name#' @details `r model_info(.model_gcm())`\n#' @param resp_var1 A description of the response variable\n#' @param required_arg1 A description of the required argument\n#' @param required_arg2 A description of the required argument\n#' @param links A list of links for the parameters.\n#' @param version A character label for the version of the model. Can be empty or NULL if there is only one version. \n#' @param ... used internally for testing, ignore it\n#' @return An object of class `bmmodel`\n#' @export\n#' @examples\n#' \\dontrun{\n#' # put a full example here (see 'R/model_mixture3p.R' for an example)\n#' }\ngcm <- function(resp_var1, required_arg1, required_arg2, links = NULL, version = NULL, ...) {\n call <- match.call()\n stop_missing_args()\n .model_gcm(resp_var1 = resp_var1, required_arg1 = required_arg1, \n required_arg2 = required_arg2, links = links, version = version, \n call = call, ...)\n}\n\n\n#############################################################################!\n# CHECK_DATA S3 methods ####\n#############################################################################!\n# A check_data.* function should be defined for each class of the model.\n# If a model shares methods with other models, the shared methods should be\n# defined in helpers-data.R. Put here only the methods that are specific to\n# the model. See ?check_data for details.\n# (YOU CAN DELETE THIS SECTION IF YOU DO NOT REQUIRE ADDITIONAL DATA CHECKS)\n\n#' @export\ncheck_data.gcm <- function(model, data, formula) {\n # retrieve required arguments\n required_arg1 <- model$other_vars$required_arg1\n required_arg2 <- model$other_vars$required_arg2\n\n # check the data (required)\n\n # compute any necessary transformations (optional)\n\n # save some variables as attributes of the data for later use (optional)\n\n NextMethod('check_data')\n}\n\n\n#############################################################################!\n# Convert bmmformula to brmsformla methods ####\n#############################################################################!\n# A bmf2bf.* function should be defined if the default method for consructing\n# the brmsformula from the bmmformula does not apply (e.g if aterms are required).\n# The shared method for all `bmmodels` is defined in bmmformula.R.\n# See ?bmf2bf for details.\n# (YOU CAN DELETE THIS SECTION IF YOUR MODEL USES A STANDARD FORMULA WITH 1 RESPONSE VARIABLE)\n\n#' @export\nbmf2bf.gcm <- function(model, formula) {\n # retrieve required response arguments\n resp_var1 <- model$resp_vars$resp_var1\n resp_var2 <- model$resp_vars$resp_arg2\n\n # set the base brmsformula based \n brms_formula <- brms::bf(paste0(resp_var1,\" | \", vreal(resp_var2), \" ~ 1\" ),)\n\n # return the brms_formula to add the remaining bmmformulas to it.\n brms_formula\n}\n\n\n#############################################################################!\n# CONFIGURE_MODEL S3 METHODS ####\n#############################################################################!\n# Each model should have a corresponding configure_model.* function. See\n# ?configure_model for more information.\n\n#' @export\nconfigure_model.gcm <- function(model, data, formula) {\n # retrieve required arguments\n required_arg1 <- model$other_vars$required_arg1\n required_arg2 <- model$other_vars$required_arg2\n\n # retrieve arguments from the data check\n my_precomputed_var <- attr(data, 'my_precomputed_var')\n\n # construct brms formula from the bmm formula\n formula <- bmf2bf(model, formula)\n\n # construct the family & add to formula object \n gcm_family <- brms::custom_family(\n 'gcm',\n dpars = c(),\n links = c(),\n lb = c(), # upper bounds for parameters\n ub = c(), # lower bounds for parameters\n type = '', # real for continous dv, int for discrete dv\n loop = TRUE, # is the likelihood vectorized\n )\n formula$family <- gcm_family\n\n # prepare initial stanvars to pass to brms, model formula and priors\n sc_path <- system.file('stan_chunks', package='bmm')\n stan_likelihood <- read_lines2(paste0(sc_path, '/gcm_likelihood.stan'))\n stan_functions <- read_lines2(paste0(sc_path, '/gcm_functions.stan'))\n\n stanvars <- stanvar(scode = stan_likelihood, block = 'likelihood') +\n stanvar(scode = stan_functions, block = 'functions')\n\n # return the list\n nlist(formula, data, stanvars)\n}\n\n\n#############################################################################!\n# POSTPROCESS METHODS ####\n#############################################################################!\n# A postprocess_brm.* function should be defined for the model class. See \n# ?postprocess_brm for details\n\n#' @export\npostprocess_brm.gcm <- function(model, fit) {\n # any required postprocessing (if none, delete this section)\n fit\n}\nNow you have to:\n\nFill the .model_gcm function with the appropriate code. This function should return a list with all the variables specified above. The class of the list should be c('bmmmodel', 'gcm'). Rename the response arguments and the other required arguments, or delete the other arguments if you do not have any. You can see an example in the model_sdm.R file. Specify the parameters of the model, the link functions, what if any parameters are fixed (and to what value). It’s crucial that you set default priors for every parameter of the model, which should be informed by knowledge in the field.\nAdjust the user-facing alias. Here you should only rename the required arguments and fill in the @examples section with a full example. Everything else will be filled in automatically based on the information in the .model_gcm function.\nFill the check_data.gcm function with the appropriate code. This function should check the data and return the data. You may or may not need to compute any transformations or save some variables as attributes of the data.\nIf necessary define the bmf2bf.gcm method to convert the bmmformula to a brmsformula. The first step for this is always to specify the response variable and additional response information. Keep in mind that brms automatically interprets this formula as the linear model formula for the mu parameter of your custom family. Currently, brms requires all custom families to have a mu parameter. However, we recommend to code this parameter as a void_mu, and fix the intercept of this parameter to zero using constant priors. This way, the bmmformula can be used to only specify the linear or non-linear model for the parameters of a bmmmodel. if your model has a single response variable, you can delete this section.\nFill the configure_model.gcm function with the appropriate code. This function should construct the formula, the family, the stanvars. You can also retrieve any arguments you saved from the data check. Depending on your model, some of these parts might not be necessary. For example, for the mixture models (e.g. mixture3p), we construct a new formula, because we want to rename the arguments to make it easier for the user. For the sdmSimple model, we define the family ourselves, so we don’t need to change the formula.\nYou need to fill information about your custom family, and then fill the STAN files with your STAN code. Conveniently, you don’t have to edit lines 137-145: loading the STAN files and setting up the stanvars is set up automatically when calling the use_model_template function. Should you need to add more STAN files after you created the template, you can add the files in init/stan_chunks/ manually and edit those lines to additionally load the manually added files.\nFill the postprocess_brm.gcm function with the appropriate code. By postprocessing, we mean changes to the fitted brms model - like renaming parameters, etc. If you don’t need any postprocessing, you can delete this section." }, { "objectID": "add-new-model.html#testing", "href": "add-new-model.html#testing", "title": "5  Adding a new model", "section": "5.2 Testing", - "text": "5.2 Testing\nUnit testing is extremely important. You should test your model with the testthat package. You can use the use_test() function to create a test file for your model. See file tests/testthat/test-fit_model.R for an example of how we test the existing models. BRMS models take a long time to fit, so we don’t test the actual fitting process. fit_model() provides an argument backend=\"mock\", which will return a mock object instead of fitting the model. This ensures that the entire pipeline works without errors. For example, here’s a test of the IMMfull model:\ntest_that('Available mock models run without errors',{\n withr::local_options('bmm.silent'=2)\n skip_on_cran()\n dat <- data.frame(\n resp_err = rIMM(n = 5),\n Item2_rel = 2,\n Item3_rel = -1.5,\n spaD2 = 0.5,\n spaD3 = 2\n )\n\n # two-parameter model mock fit\n f <- bmmformula(kappa ~ 1, c ~ 1, a ~ 1, s ~ 1)\n mock_fit <- fit_model(f, dat, IMMfull(resp_err = \"resp_err\", setsize = 3, nt_features = paste0('Item',2:3,'_rel'), nt_distance=paste0('spaD',2:3)), backend = \"mock\", mock_fit = 1, rename=FALSE)\n expect_equal(mock_fit$fit, 1)\n expect_type(mock_fit$fit_args, \"list\")\n expect_equal(names(mock_fit$fit_args[1:4]), c(\"formula\", \"data\", \"family\", \"prior\"))\n})\nThe tests based on the testthat package are run every time you call the check() command. Before you submit your changes, make sure that all tests pass.\n\n\n\n\n\n\nImportant\n\n\n\nAdditionally, you should perform a full test of the model by running it in a separate script and ensuring it gives meaningful results. At the very least, you should perform basic parameter recovery simulations for hyper-parameters (i.e. means and standard deviations) as well as subject-level parameters to give users an idea of how much data they need to adequately estimate the model.\nWe are in the process of establishing guidelines for that." + "text": "5.2 Testing\nUnit testing is extremely important. You should test your model with the testthat package. You can use the use_test() function to create a test file for your model. See file tests/testthat/test-bmm.R for an example of how we test the existing models. BRMS models take a long time to fit, so we don’t test the actual fitting process. fit_model() provides an argument backend=\"mock\", which will return a mock object instead of fitting the model. This ensures that the entire pipeline works without errors. For example, here’s a test of the IMMfull model:\ntest_that('Available mock models run without errors',{\n withr::local_options('bmm.silent'=2)\n skip_on_cran()\n dat <- data.frame(\n resp_err = rIMM(n = 5),\n Item2_rel = 2,\n Item3_rel = -1.5,\n spaD2 = 0.5,\n spaD3 = 2\n )\n\n # two-parameter model mock fit\n f <- bmmformula(kappa ~ 1, c ~ 1, a ~ 1, s ~ 1)\n mock_fit <- bmm(f, dat, \n imm(resp_err = \"resp_err\", \n setsize = 3, \n nt_features = paste0('Item',2:3,'_rel'), \n nt_distance=paste0('spaD',2:3)), \n backend = \"mock\", mock_fit = 1, rename=FALSE)\n expect_equal(mock_fit$fit, 1)\n expect_type(mock_fit$bmm$fit_args, \"list\")\n expect_equal(names(mock_fit$fit_args[1:4]), c(\"formula\", \"data\"))\n})\nThe tests based on the testthat package are run every time you call the check() command. Before you submit your changes, make sure that all tests pass.\n\n\n\n\n\n\nImportant\n\n\n\nAdditionally, you should perform a full test of the model by running it in a separate script and ensuring it gives meaningful results. At the very least, you should perform basic parameter recovery simulations for hyper-parameters (i.e. means and standard deviations) as well as subject-level parameters to give users an idea of how much data they need to adequately estimate the model.\nWe are in the process of establishing guidelines for that." }, { "objectID": "add-new-model.html#add-an-example-dataset", @@ -153,5 +153,12 @@ "title": "5  Adding a new model", "section": "5.4 Add a vignette", "text": "5.4 Add a vignette\nAll new models should come with a vignette that explains some basic information about the model and how to estimate it with bmm. You can use the use_vignette() function to create a new vignette. See here for more information. The vignettes will be published automatically on the package website under “Articles” when the pull request is approved. You can browse the source code for the existing vignettes in the vignettes/ directory. You can see the published version of the existing vignettes here.\nAnd that’s it! You have added a new model to the bmm package. You can now submit your changes to the bmm package repository." + }, + { + "objectID": "bmm-architecture.html#the-main-workhorse---bmm", + "href": "bmm-architecture.html#the-main-workhorse---bmm", + "title": "3  BMM code structure", + "section": "3.1 The main workhorse - bmm()", + "text": "3.1 The main workhorse - bmm()\nThe main function for fitting models is bmm(). This function is the main entry point for users to fit models. It is set-up to be independent of the specific models that are implemented in the package.\nbmm <- function(formula, data, model,\n prior = NULL,\n sort_data = getOption('bmm.sort_data', \"check\"),\n silent = getOption('bmm.silent', 1),\n backend = getOption('brms.backend', NULL), ...) {\n deprecated_args(...)\n dots <- list(...)\n\n # set temporary global options and return modified arguments for brms\n configure_opts <- nlist(sort_data, silent, backend, parallel = dots$parallel,\n cores = dots$cores)\n opts <- configure_options(configure_opts)\n dots$parallel <- NULL\n\n # check model, formula and data, and transform data if necessary\n user_formula <- formula\n model <- check_model(model, data, formula)\n data <- check_data(model, data, formula)\n formula <- check_formula(model, data, formula)\n\n # generate the model specification to pass to brms later\n config_args <- configure_model(model, data, formula)\n\n # configure the default prior and combine with user-specified prior\n prior <- configure_prior(model, data, config_args$formula, prior)\n\n # estimate the model\n fit_args <- combine_args(nlist(config_args, opts, dots, prior))\n fit <- call_brm(fit_args)\n\n # model post-processing\n postprocess_brm(model, fit, fit_args = fit_args, user_formula = user_formula,\n configure_opts = configure_opts)\n}\nIt calls several subroutines, implemented as generic S3 methods, to:\n\nconfigure_options() - to configure local options for fitting, such as parallel sampling,\ncheck_model() - check if the model exists\ncheck_formula() - check if the formula is specified correctly and transform it to a brmsformula\ncheck_data() - check whether the data contains all necessary information\nconfigure_model() - configures the model called for fitting\nconfigure_prior() - sets the default priors for the model and combines them with the user prior\ncombine_priors() - combines the user specified priors with default priors provided for each model\ncall_brm() - fit the model using the brm() function from the brms package\npostprocess_brm() - to post-process the fitted model" } ] \ No newline at end of file diff --git a/_book/setup.html b/_book/setup.html index 45cdf99..a80a56a 100644 --- a/_book/setup.html +++ b/_book/setup.html @@ -249,8 +249,9 @@

    • Make sure you have the devtools package and a few others installed and loaded

      install.packages(c("devtools", "roxygen2", "testthat", "knitr"))
      -library(devtools)
      -

      To avoid having to load the package every time, you can add the following code to your .Rprofile file

      +library(devtools) +install_dev_deps()
    +

    To avoid having to load the devtools package every time, you can add the following code to your .Rprofile file

    if (interactive()) {
       suppressMessages(require(devtools))
     }
    diff --git a/add-new-model.qmd b/add-new-model.qmd index 23938fa..a54dfef 100644 --- a/add-new-model.qmd +++ b/add-new-model.qmd @@ -27,7 +27,7 @@ use_model_template( #### Arguments {.unnumbered} | Argument | Description | -|-------------------|-----------------------------------------------------| +|---------------------|---------------------------------------------------| | `model_name` | A string with the name of the model. The file will be named bmm_model_model_name.R and all necessary functions will be created with the appropriate names and structure. The file will be saved in the ⁠R/⁠ directory | | `custom_family` | Logical; Do you plan to define a brms::custom_family()? If TRUE the function will add a section for the custom family, placeholders for the stan_vars and corresponding empty .stan files in ⁠inst/stan_chunks/⁠, that you can fill For an example, see the sdmSimple model in ⁠/R/bmm_model_sdmSimple.R⁠. If FALSE (default) the function will not add the custom family section nor stan files. | | `stanvar_blocks` | A character vector with the names of the blocks that will be added to the custom family section. See brms::stanvar() for more details. The default lists all the possible blocks, but it is unlikely that you will need all of them. You can specify a vector of only those that you need. The function will add a section for each block in the list | @@ -49,7 +49,7 @@ This will create the file `bmm_model_gcm.R` in the `R/` directory and the files ``` • Modify 'inst/stan_chunks/gcm_likelihood.stan' • Modify 'inst/stan_chunks/gcm_functions.stan' -• Modify 'R/bmm_model_gcm.R' +• Modify 'R/model_gcm.R' ``` Now you can fill the files with the appropriate code. The stan files will be empty, but the R file will have the following structure: @@ -60,42 +60,56 @@ Now you can fill the files with the appropriate code. The stan files will be emp #############################################################################! # see file 'R/bmm_model_mixture3p.R' for an example -.model_gcm <- function(resp_var1, required_arg1, required_arg2, ...) { - out <- list( - resp_vars = nlist(resp_var1), - other_vars = nlist(required_arg1, required_arg2), - info = list( - domain = '', - task = '', - name = '', - citation = '', - version = '', - requirements = '', - parameters = list(), - fixed_parameters = list() - ), - void_mu = FALSE +.model_gcm <- function(resp_var1 = NULL, required_arg1 = NULL, required_arg2 = NULL, + links = NULL, version = NULL, call = NULL, ...) { + out <- structure( + list( + resp_vars = nlist(resp_var1), + other_vars = nlist(required_arg1, required_arg2), + domain = '', + task = '', + name = '', + citation = '', + version = version, + requirements = '', + parameters = list(), + links = list(), + fixed_parameters = list(), + default_priors = list(par1 = list(), par2 = list()), + void_mu = FALSE + ), + class = c('bmmodel', 'gcm'), + call = call ) - class(out) <- c('bmmmodel', 'gcm') + if(!is.null(version)) class(out) <- c(class(out), paste0("gcm_",version)) + out$links[names(links)] <- links out } # user facing alias # information in the title and details sections will be filled in # automatically based on the information in the .model_gcm()$info -#' @title `r .model_gcm()$info$name` -#' @name Model Name#' @details `r model_info(gcm(NA,NA))` +#' @title `r .model_gcm()$name` +#' @name Model Name#' @details `r model_info(.model_gcm())` #' @param resp_var1 A description of the response variable #' @param required_arg1 A description of the required argument #' @param required_arg2 A description of the required argument +#' @param links A list of links for the parameters. +#' @param version A character label for the version of the model. Can be empty or NULL if there is only one version. #' @param ... used internally for testing, ignore it -#' @return An object of class `bmmmodel` +#' @return An object of class `bmmodel` #' @export #' @examples #' \dontrun{ -#' # put a full example here (see 'R/bmm_model_mixture3p.R' for an example) +#' # put a full example here (see 'R/model_mixture3p.R' for an example) #' } -gcm <- .model_gcm +gcm <- function(resp_var1, required_arg1, required_arg2, links = NULL, version = NULL, ...) { + call <- match.call() + stop_missing_args() + .model_gcm(resp_var1 = resp_var1, required_arg1 = required_arg1, + required_arg2 = required_arg2, links = links, version = version, + call = call, ...) +} #############################################################################! @@ -103,8 +117,9 @@ gcm <- .model_gcm #############################################################################! # A check_data.* function should be defined for each class of the model. # If a model shares methods with other models, the shared methods should be -# defined in data-helpers.R. Put here only the methods that are specific to -# the model. See ?check_data for details +# defined in helpers-data.R. Put here only the methods that are specific to +# the model. See ?check_data for details. +# (YOU CAN DELETE THIS SECTION IF YOU DO NOT REQUIRE ADDITIONAL DATA CHECKS) #' @export check_data.gcm <- function(model, data, formula) { @@ -114,14 +129,11 @@ check_data.gcm <- function(model, data, formula) { # check the data (required) - # compute any necessary transformations (optional) # save some variables as attributes of the data for later use (optional) - data = NextMethod('check_data') - - return(data) + NextMethod('check_data') } @@ -129,9 +141,10 @@ check_data.gcm <- function(model, data, formula) { # Convert bmmformula to brmsformla methods #### #############################################################################! # A bmf2bf.* function should be defined if the default method for consructing -# the brmsformula from the bmmformula does not apply -# The shared method for all `bmmmodels` is defined in helpers-formula.R. +# the brmsformula from the bmmformula does not apply (e.g if aterms are required). +# The shared method for all `bmmodels` is defined in bmmformula.R. # See ?bmf2bf for details. +# (YOU CAN DELETE THIS SECTION IF YOUR MODEL USES A STANDARD FORMULA WITH 1 RESPONSE VARIABLE) #' @export bmf2bf.gcm <- function(model, formula) { @@ -142,21 +155,8 @@ bmf2bf.gcm <- function(model, formula) { # set the base brmsformula based brms_formula <- brms::bf(paste0(resp_var1," | ", vreal(resp_var2), " ~ 1" ),) - # add bmmformula to the brms_formula - # check if parameters are used as non-linear predictors in other formulas - # and use the brms::lf() or brms::nlf() accordingly. - dpars <- names(formula) - for (dpar in dpars) { - pform <- formula[[dpar]] - predictors <- rhs_vars(pform) - if (any(predictors %in% dpars)) { - brms_formula <- brms_formula + brms::nlf(pform) - } else { - brms_formula <- brms_formula + brms::lf(pform) - } - } - - return(brms_formula) + # return the brms_formula to add the remaining bmmformulas to it. + brms_formula } @@ -176,10 +176,9 @@ configure_model.gcm <- function(model, data, formula) { my_precomputed_var <- attr(data, 'my_precomputed_var') # construct brms formula from the bmm formula - bmm_formula <- formula - formula <- bmf2bf(model, bmm_formula) + formula <- bmf2bf(model, formula) - # construct the family + # construct the family & add to formula object gcm_family <- brms::custom_family( 'gcm', dpars = c(), @@ -189,24 +188,18 @@ configure_model.gcm <- function(model, data, formula) { type = '', # real for continous dv, int for discrete dv loop = TRUE, # is the likelihood vectorized ) - family <- gcm_family + formula$family <- gcm_family # prepare initial stanvars to pass to brms, model formula and priors sc_path <- system.file('stan_chunks', package='bmm') - stan_likelihood <- readChar(paste0(sc_path, '/gcm_likelihood.stan'), - file.info(paste0(sc_path, '/gcm_likelihood.stan'))$size) - stan_functions <- readChar(paste0(sc_path, '/gcm_functions.stan'), - file.info(paste0(sc_path, '/gcm_functions.stan'))$size) + stan_likelihood <- read_lines2(paste0(sc_path, '/gcm_likelihood.stan')) + stan_functions <- read_lines2(paste0(sc_path, '/gcm_functions.stan')) stanvars <- stanvar(scode = stan_likelihood, block = 'likelihood') + stanvar(scode = stan_functions, block = 'functions') - # construct the default prior - prior <- NULL - # return the list - out <- nlist(formula, data, family, prior, stanvars) - return(out) + nlist(formula, data, stanvars) } @@ -219,22 +212,21 @@ configure_model.gcm <- function(model, data, formula) { #' @export postprocess_brm.gcm <- function(model, fit) { # any required postprocessing (if none, delete this section) - - return(fit) + fit } ``` Now you have to: -1. Fill the `.model_gcm` function with the appropriate code. This function should return a list with the variables that the model needs and a list with information about the model. The class of the list should be `c('bmmmodel', 'gcm')`. The information list should contain the following elements: `domain`, `task`, `name`, `citation`, `version`, `requirements`, and `parameters`. Rename the response arguements and the other required arguments, or delete the other arguments if you do not have any. You can see an example in the `bmm_model_sdmSimple.R` file. +1. Fill the `.model_gcm` function with the appropriate code. This function should return a list with all the variables specified above. The class of the list should be `c('bmmmodel', 'gcm')`. Rename the response arguments and the other required arguments, or delete the other arguments if you do not have any. You can see an example in the `model_sdm.R` file. Specify the parameters of the model, the link functions, what if any parameters are fixed (and to what value). It's crucial that you set default priors for every parameter of the model, which should be informed by knowledge in the field. 2. Adjust the user-facing alias. Here you should only rename the required arguments and fill in the `@examples` section with a full example. Everything else will be filled in automatically based on the information in the `.model_gcm` function. 3. Fill the `check_data.gcm` function with the appropriate code. This function should check the data and return the data. You may or may not need to compute any transformations or save some variables as attributes of the data. -4. If necessary define the `bmf2bf.gcm` method to convert the `bmmformula` to a `brmsformula`. The first step for this is always to specify the response variable and additional response information. Keep in mind that `brms` automatically interprets this formula as the linear model formula for the `mu` parameter of your custom family. Currently, `brms` requires all custom families to have a `mu` parameter. However, we recommend to code this parameter as a `void_mu`, and fix the intercept of this parameter to zero using constant priors. This way, the `bmmformula` can be used to only specify the linear or non-linear model for the parameters of a `bmmmodel.` +4. If necessary define the `bmf2bf.gcm` method to convert the `bmmformula` to a `brmsformula`. The first step for this is always to specify the response variable and additional response information. Keep in mind that `brms` automatically interprets this formula as the linear model formula for the `mu` parameter of your custom family. Currently, `brms` requires all custom families to have a `mu` parameter. However, we recommend to code this parameter as a `void_mu`, and fix the intercept of this parameter to zero using constant priors. This way, the `bmmformula` can be used to only specify the linear or non-linear model for the parameters of a `bmmmodel`. *if your model has a single response variable, you can delete this section.* -5. Fill the `configure_model.gcm` function with the appropriate code. This function should construct the formula, the family, the stanvars, and the prior. You can also retrieve any arguments you saved from the data check. Depending on your model, some of these parts might not be necessary. For example, for the mixture models (e.g. `mixture3p`), we construct a new formula, because we want to rename the arguments to make it easier for the user. For the `sdmSimple` model, we define the family ourselves, so we don't need to change the formula. +5. Fill the `configure_model.gcm` function with the appropriate code. This function should construct the formula, the family, the stanvars. You can also retrieve any arguments you saved from the data check. Depending on your model, some of these parts might not be necessary. For example, for the mixture models (e.g. `mixture3p`), we construct a new formula, because we want to rename the arguments to make it easier for the user. For the `sdmSimple` model, we define the family ourselves, so we don't need to change the formula. You need to fill information about your custom family, and then fill the STAN files with your STAN code. Conveniently, you don't have to edit lines 137-145: loading the STAN files and setting up the stanvars is set up automatically when calling the `use_model_template` function. Should you need to add more STAN files after you created the template, you can add the files in `init/stan_chunks/` manually and edit those lines to additionally load the manually added files. @@ -242,7 +234,7 @@ Now you have to: ## Testing -Unit testing is extremely important. You should test your model with the `testthat` package. You can use the `use_test()` function to create a test file for your model. See file `tests/testthat/test-fit_model.R` for an example of how we test the existing models. BRMS models take a long time to fit, so we don't test the actual fitting process. `fit_model()` provides an argument `backend="mock"`, which will return a mock object instead of fitting the model. This ensures that the entire pipeline works without errors. For example, here's a test of the `IMMfull` model: +Unit testing is extremely important. You should test your model with the `testthat` package. You can use the `use_test()` function to create a test file for your model. See file `tests/testthat/test-bmm.R` for an example of how we test the existing models. BRMS models take a long time to fit, so we don't test the actual fitting process. `fit_model()` provides an argument `backend="mock"`, which will return a mock object instead of fitting the model. This ensures that the entire pipeline works without errors. For example, here's a test of the `IMMfull` model: ``` r test_that('Available mock models run without errors',{ @@ -258,10 +250,15 @@ test_that('Available mock models run without errors',{ # two-parameter model mock fit f <- bmmformula(kappa ~ 1, c ~ 1, a ~ 1, s ~ 1) - mock_fit <- fit_model(f, dat, IMMfull(resp_err = "resp_err", setsize = 3, nt_features = paste0('Item',2:3,'_rel'), nt_distance=paste0('spaD',2:3)), backend = "mock", mock_fit = 1, rename=FALSE) + mock_fit <- bmm(f, dat, + imm(resp_err = "resp_err", + setsize = 3, + nt_features = paste0('Item',2:3,'_rel'), + nt_distance=paste0('spaD',2:3)), + backend = "mock", mock_fit = 1, rename=FALSE) expect_equal(mock_fit$fit, 1) - expect_type(mock_fit$fit_args, "list") - expect_equal(names(mock_fit$fit_args[1:4]), c("formula", "data", "family", "prior")) + expect_type(mock_fit$bmm$fit_args, "list") + expect_equal(names(mock_fit$fit_args[1:4]), c("formula", "data")) }) ``` diff --git a/bmm-architecture.qmd b/bmm-architecture.qmd index 6a14e15..0e614e8 100644 --- a/bmm-architecture.qmd +++ b/bmm-architecture.qmd @@ -2,36 +2,44 @@ Adding a new model is straightforward using the `use_model_template()` function, which will be described in the next section. You do not have to edit any of the files below, but it will be helpful to understand the structure of the package. -## The main workhorse - `fit_model()` +## The main workhorse - `bmm()` -The main function for fitting models is `fit_model()`. This function is the main entry point for users to fit models. It is set-up to be independent of the specific models that are implemented in the package: +The main function for fitting models is `bmm()`. This function is the main entry point for users to fit models. It is set-up to be independent of the specific models that are implemented in the package. ``` r -fit_model <- function(formula, data, model, parallel = FALSE, - chains = 4, prior = NULL, ...) { - # enable parallel sampling if parallel equals TRUE - opts <- configure_options(nlist(parallel, chains, silent)) +bmm <- function(formula, data, model, + prior = NULL, + sort_data = getOption('bmm.sort_data', "check"), + silent = getOption('bmm.silent', 1), + backend = getOption('brms.backend', NULL), ...) { + deprecated_args(...) + dots <- list(...) + + # set temporary global options and return modified arguments for brms + configure_opts <- nlist(sort_data, silent, backend, parallel = dots$parallel, + cores = dots$cores) + opts <- configure_options(configure_opts) + dots$parallel <- NULL # check model, formula and data, and transform data if necessary - model <- check_model(model) - formula <- check_formula(model, formula) + user_formula <- formula + model <- check_model(model, data, formula) data <- check_data(model, data, formula) + formula <- check_formula(model, data, formula) # generate the model specification to pass to brms later config_args <- configure_model(model, data, formula) - # combine the default prior plus user given prior - config_args$prior <- combine_prior(config_args$prior, prior) + # configure the default prior and combine with user-specified prior + prior <- configure_prior(model, data, config_args$formula, prior) # estimate the model - dots <- list(...) - fit_args <- combine_args(nlist(config_args, opts, dots)) + fit_args <- combine_args(nlist(config_args, opts, dots, prior)) fit <- call_brm(fit_args) - # model postprocessing - fit <- postprocess_brm(model, fit) - - return(fit) + # model post-processing + postprocess_brm(model, fit, fit_args = fit_args, user_formula = user_formula, + configure_opts = configure_opts) } ``` @@ -39,10 +47,10 @@ It calls several subroutines, implemented as generic S3 methods, to: - `configure_options()` - to configure local options for fitting, such as parallel sampling, - `check_model()` - check if the model exists -- `check_formula()` - check if the formula is specified correctly +- `check_formula()` - check if the formula is specified correctly and transform it to a brmsformula - `check_data()` - check whether the data contains all necessary information - `configure_model()` - configures the model called for fitting -- `combine_priors()` - combines the user specified priors with default priors provided for each model +- `configure_prior()` - sets the default priors for the model and combines them with the user prior - `call_brm()` - fit the model using the `brm()` function from the `brms` package - `postprocess_brm()` - to post-process the fitted model @@ -51,24 +59,31 @@ It calls several subroutines, implemented as generic S3 methods, to: All models in the package are defined as S3 classes and follow a strict template. This allows us to implement general methods for handling model fitting, data checking, and post-processing. Each model has an internal function that defines the model and its parameters, and a user-facing alias. For a complete example model file and an explanation, see [Section @sec-example-model]. The general model template looks like this: ``` r -.model_myNewModel <- function(resp_var1, required_arg1, required_arg2, ...) { - out <- list( - resp_vars = nlist(resp_var1), - other_vars = nlist(required_arg1, required_arg2), - info = list( - domain = '', - task = '', - name = '', - citation = '', - version = '', - requirements = '', - parameters = list(), - fixed_parameters = list() - ), +.model_my_new_model <- function(resp_var1 = NULL, required_args1 = NULL, + required_arg2 = NULL, links = NULL, version = NULL, + call = NULL, ...) { + out <- structure( + list( + resp_vars = nlist(resp_error), + other_vars = nlist(), + domain = "", + task = "", + name = "", + version = "", + citation = "", + requirements = "", + parameters = list(), + links = list(), + fixed_parameters = list(), + default_priors = list(), + version = version, void_mu = FALSE - ) - class(out) <- c('bmmmodel', 'myNewModel') - out + ), + class = c("bmmodel", "my_new_model"), + call = call + ) + out$links[names(links)] <- links + out } ``` @@ -78,8 +93,8 @@ Each model is accompanied by a user-facing alias, the documentation of which is # user facing alias # information in the title and details sections will be filled in # automatically based on the information in the .model_modelname()$info -#' @title `r .model_myNewModel()$info$name` -#' @name Model Name#' @details `r model_info(myNewModel(NA,NA))` +#' @title `r .model_my_new_model()name` +#' @name Model Name#' @details `r model_info(.model_my_new_model())` #' @param resp_var1 A description of the response variable #' @param required_arg1 A description of the required argument #' @param required_arg2 A description of the required argument @@ -90,15 +105,22 @@ Each model is accompanied by a user-facing alias, the documentation of which is #' \dontrun{ #' # put a full example here (see 'R/bmm_model_mixture3p.R' for an example) #' } -myNewModel <- .model_myNewModel +my_new_model <- function(resp_var1, required_arg1, required_arg2, + links = NULL, version = NULL, ...) { + call <- match.call() + stop_missing_args() + .model_my_new_model(resp_var1 = resp_var1, required_arg1 = required_arg1, + required_arg2 = required_arg2, links = links, version = version, + call = call, ...) +} ``` Then users can fit the model using the `fit_model()` function, and the model will be automatically recognized and handled by the package: ``` r -fit <- fit_model(formula, - data = mydata, - model = modelname(resp_var1, required_arg1, required_arg2)) +fit <- bmm(formula = my_bmmformula, + data = my_data, + model = my_new_model(resp_var1, required_arg1, required_arg2)) ``` ## S3 methods @@ -117,27 +139,31 @@ and it will call a function `configure_model.modelname()` that is specified for The `bmm` package is organized into several files. The main files are: -### `R/fit_model.R` {.unnumbered} +### `R/bmm.R` {.unnumbered} -It contains the main function for fitting models, `fit_model()`. This function is the main entry point for users to fit models. It is set-up to be independent of the specific models that are implemented in the package. +It contains the main function for fitting models, `bmm()`. This function is the main entry point for users to fit models. It is set-up to be independent of the specific models that are implemented in the package. To add new models, you do not have to edit this file. The functions above are generic S3 methods, and they will automatically recognize new models if you add appropriate methods for them (see section [Adding new models](#adding-new-models-to-bmm)). ### `R/helpers-*.R` {.unnumbered} -`R/helpers-data.R`, `R/helpers-postprocess.R`, `R/helpers-model.R`, `R/helpers-formula.R` and `R/helpers-prior.R` +`R/helpers-data.R`, `R/helpers-parameters.R`, `R/helpers-postprocess.R`, `R/helpers-model.R`, and `R/helpers-prior.R` + +These files define the main generic S3 methods for checking data, postprocessing the fitted model, configuring the model, checking the model formula, and combining priors. They contain the default methods for these functions, which are called by `bmm()` if no specific method is defined for a model. If you want to add a new model, you will need to add specific methods for these functions for your model. *You do not need to edit these files to add a new model.* + +### `R/bmmformula.R` {.unnumbered} -These files define the main generic S3 methods for checking data, postprocessing the fitted model, configuring the model, checking the model formula, and combining priors. They contain the default methods for these functions, which are called by `fit_model()` if no specific method is defined for a model. If you want to add a new model, you will need to add specific methods for these functions for your model. *You do not need to edit these files to add a new model.* +This file contains the definition of the `bmmformula` class, which is used to represent the formula for the model. It contains the `bmmformula()` function and its alias `bmf()`, which is used to create a new formula object. -### `R/bmm_model_*.R` {.unnumbered} +### `R/model_*.R` {.unnumbered} -Each model and it's methods is defined in a separate file. For example, the 3-parameter mixture model is defined in `bmm_model_mixture3p.R`. This file contains the internal function that defines the model and its parameters, and the specific methods for the generic S3 functions. Your new model will exist in a file like this. The name of the file should be `bmm_model_name_of_your_model.R`. You don't have to add this file manually - see section [Adding new models](#adding-new-models-to-bmm). +Each model and it's methods is defined in a separate file. For example, the 3-parameter mixture model is defined in `model_mixture3p.R`. This file contains the internal function that defines the model and its parameters, and the specific methods for the generic S3 functions. Your new model will exist in a file like this. The name of the file should be `model_name_of_your_model.R`. You don't have to add this file manually - see section [Adding new models](#adding-new-models-to-bmm). -### `R/bmm_distributions.R` {.unnumbered} +### `R/distributions.R` {.unnumbered} This file contains the definition of the custom distributions that are used in the package. It specifies the density, random number generation, and probability functions for the custom distributions. If your model requires a custom distribution, you will need to add it to this file. These are not used during model fitting, but can be used to generate data from the model, and to plot the model fit. -### `R/utils.R`, `R/brms-misc.R` {.unnumbered} +### `R/utils.R`, `R/brms-misc.R`, `R/restructure.R`, `R/summary.R`, `R/update.R` {.unnumbered} Various utility functions. diff --git a/example-model.qmd b/example-model.qmd index 64a74d3..32986c6 100644 --- a/example-model.qmd +++ b/example-model.qmd @@ -4,214 +4,323 @@ All models in the package are defined as S3 classes and follow a strict template ## The Interference Measurement Model (IMM) -The model is defined in the file `R/bmm_model_IMM.R.` Let's go through the different parts. +The model is defined in the file `R/model_imm.R.` Let's go through the different parts. ### Model definition The full IMM model is defined in the following internal model class: ``` r -.model_IMMfull <- function(resp_err, nt_features, nt_distance, setsize, ...) { - out <- list( - resp_vars = nlist(resp_err), - other_vars = nlist(nt_features, nt_distance, setsize), - info = list( - domain = "Visual working memory", - task = "Continuous reproduction", - name = "Interference measurement model by Oberauer and Lin (2017).", - version = "full", - citation = paste0("Oberauer, K., & Lin, H.Y. (2017). An interference model ", - "of visual working memory. Psychological Review, 124(1), 21-59"), - requirements = paste0('- The response vairable should be in radians and ', - 'represent the angular error relative to the target\n ', - '- The non-target features should be in radians and be ', - 'centered relative to the target'), - parameters = list( - mu1 = paste0("Location parameter of the von Mises distribution for memory responses", - "(in radians). Fixed internally to 0 by default."), - kappa = "Concentration parameter of the von Mises distribution (log scale)", - a = "General activation of memory items", - c = "Context activation", - s = "Spatial similarity gradient" +.model_imm <- + function(resp_error = NULL, nt_features = NULL, nt_distances = NULL, + set_size = NULL, regex = FALSE, links = NULL, version = "full", + call = NULL, ...) { + out <- structure( + list( + resp_vars = nlist(resp_error), + other_vars = nlist(nt_features, nt_distances, set_size), + domain = "Visual working memory", + task = "Continuous reproduction", + name = "Interference measurement model by Oberauer and Lin (2017).", + version = version, + citation = glue( + "Oberauer, K., & Lin, H.Y. (2017). An interference model \\ + of visual working memory. Psychological Review, 124(1), 21-59" + ), + requirements = glue( + '- The response vairable should be in radians and \\ + represent the angular error relative to the target + - The non-target features should be in radians and be \\ + centered relative to the target' + ), + parameters = list( + mu1 = glue( + "Location parameter of the von Mises distribution for memory \\ + responses (in radians). Fixed internally to 0 by default." + ), + kappa = "Concentration parameter of the von Mises distribution", + a = "General activation of memory items", + c = "Context activation", + s = "Spatial similarity gradient" + ), + links = list( + mu1 = "tan_half", + kappa = "log", + a = "identity", + c = "identity", + s = "log" + ), + fixed_parameters = list(mu1 = 0, mu2 = 0, kappa2 = -100), + default_priors = list( + mu1 = list(main = "student_t(1, 0, 1)"), + kappa = list(main = "normal(2, 1)", effects = "normal(0, 1)"), + a = list(main = "normal(0, 1)", effects = "normal(0, 1)"), + c = list(main = "normal(0, 1)", effects = "normal(0, 1)"), + s = list(main = "normal(0, 1)", effects = "normal(0, 1)") + ), + void_mu = FALSE ), - fixed_parameters = list( - mu1 = 0 - )), - void_mu = FALSE - ) - class(out) <- c("bmmmodel","vwm","nontargets","IMMspatial","IMMfull") - out -} + # attributes + regex = regex, + regex_vars = c('nt_features', 'nt_distances'), + class = c("bmmodel", "vwm", "non_targets", "imm", paste0('imm_',version)), + call = call + ) + + # add version specific information + if (version == "abc") { + out$parameters$s <- NULL + out$links$s <- NULL + out$default_priors$s <- NULL + attributes(out)$regex_vars <- c('nt_features') + } else if (version == "bsc") { + out$parameters$a <- NULL + out$links$a <- NULL + out$default_priors$a <- NULL + } + + out$links[names(links)] <- links + out + } ``` Here is a brief explanation of the different components of the model definition: -`resp_vars`: a list of response variables that the model will be fitted to. These variables will be used to construct the `brmsformula` passed to `brms` together with the `bmmformula` and the `parameters` of the model. The user has to provide these variables in the data frame that is passed to the `fit_model` function +`resp_vars`: a list of response variables that the model will be fitted to. These variables will be used to construct the `brmsformula` passed to `brms` together with the `bmmformula` and the `parameters` of the model. The user has to provide these variables in the data frame that is passed to the `bmm()` function + +`other_vars:` a list of additional variables that are required for the model. This is used to check if the data contains all necessary information for fitting the model. In the example above, the IMM model requires the names of the variables specifying the non-target features relative to the target, the variables specifying the distance of the non-targets to the target, and the set_size. The user has to provide these variables in the data frame that is passed to the `bmm()` function + +`domain, task, name, citation, requirements`: contains information about the model, such as the domain, task, name, citation, requirements. This information is used for generating help pages -`other_vars:` a list of additional variables that are required for the model. This is used to check if the data contains all necessary information for fitting the model. In the example above, the IMM model requires the names of the variables specifying the non-target features relative to the target, the variables specifying the distance of the non-targets to the target, and the setsize. The user has to provide these variables in the data frame that is passed to the `fit_model` function +`version:` if the model has multiple versions, this argument is specified by the user. Then it is used to dynamically adjust some information in the model object. In the case of the `imm` model, we have three versions - `full`, `bsc` and `abc`. As you can see at the end of the script, some parameters are deleted depending on the model version. -`info`: contains information about the model, such as the domain, task, name, citation, version, requirements, and parameters. This information is used to check if the `bmmformula` contains linear model formulas for all model parameters, and also specify defaults for `fixed_parameters`. In addition, the info is used to generate the documentation for the model. +`parameters:` contains a named list of all parameters in the model that can be estimated by the user and their description. This information is used internally to check if the `bmmformula` contains linear model formulas for all model parameters, and to decide what information to include in the summary of `bmmfit` objects. + +`links:` a named list providing the link function for each parameter. For example, `kappa` in the `imm` models has to be positive, so it is sampled on the log scale. This information is used in defining the model family and for the summary methods. If you want the user to be able to specify custom link functions, the next to last line of the script replaces the links with those provided by the user + +`fixed_parameters` in the `imm` several parameters are fixed to constant values internally to identify the model. Only one of them, `mu1` is also part of the `parameters` block - this is the only fixed parameters that users can choose to estimate instead of leaving it fixed. `mu2` and `kappa2` cannot be freely estimated. + +`default_priors` a list of lists for each parameter in the model. Each prior has two components: `main`, the prior that will be put on the Intercept or on each level of a factor if the intercept is suppressed; `effects`, the prior to put on the regression coefficients relative to the intercept. The priors are described as in the `set_prior` function from `brms`. This information is used by the `configure_prior()` S3 method to automatically set the default priors for the model. The priors that you put here will be used by `bmm()` unless the users chooses to overwrite them. `void_mu:` For models using a custom family that do not contain a `location` or `mu` parameter, for example the diffusion model, we recommend setting up a `void_mu` parameter. This avoids arbitrarily using one of the model parameters as the `mu` parameter. -`class`: is the most important part. It contains the class of the model. This is used by generic S3 methods to perform data checks and model configuration. The classes should be ordered from most general to most specific. A general class exists when the same operations can be performed on multiple models. For example, the '3p', 'IMMabc', 'IMMbsc' and 'IMMfull' models all have non-targets and setsize arguments, so the same data checks can be performed on all of them, represented by the class `nontargets`. The first class should always be `bmmmodel`, which is the main class for all models. The last class should be the specific model name, in this case `IMMfull`. +`regex:` For the `imm` models, the `nt_features` and `nt_distances` variables can be specified with regular expressions, if the user sets `regex = TRUE` + +`call`: this automatically records how the model was called so that the call can be printed in the summary after fitting. Leave it as is. + +`class`: is the most important part. It contains the class of the model. This is used by generic S3 methods to perform data checks and model configuration. The classes should be ordered from most general to most specific. A general class exists when the same operations can be performed on multiple models. For example, the '3p', 'imm_abc', 'imm_bsc' and 'imm_full' models all have non-targets and set_size arguments, so the same data checks can be performed on all of them, represented by the class `non_targets`. The first class should always be `bmmodel`, which is the main class for all models. The last class should be the specific model name, in this case `imm_full`, `imm_abc` or `imm_bsc`, which is automatically constructed if a `version` argument is provided. Otherwise the last class will be just the name of the model. ### Model alias The model alias is a user-facing function that calls the internal model function. It is defined as follows: ``` r -# user facing alias - -#' @title `r .model_IMMfull(NA, NA, NA, NA)$info$name` -#' @name IMM -#' @details `r model_info(IMMfull(NA, NA, NA, NA), components =c('domain', 'task', 'name', 'citation'))` -#' #### Version: `IMMfull` -#' `r model_info(IMMfull(NA, NA, NA, NA), components =c('requirements', 'parameters'))` -#' #### Version: `IMMbsc` -#' `r model_info(IMMbsc(NA, NA, NA, NA), components =c('requirements', 'parameters'))` -#' #### Version: `IMMabc` -#' `r model_info(IMMabc(NA, NA, NA), components =c('requirements', 'parameters'))` +#' @title `r .model_imm()$name` +#' @description Three versions of the `r .model_imm()$name` - the full, bsc, and abc. +#' `IMMfull()`, `IMMbsc()`, and `IMMabc()` are deprecated and will be removed in the future. +#' Please use `imm(version = 'full')`, `imm(version = 'bsc')`, or `imm(version = 'abc')` instead. #' -#' Additionally, all IMM models have an internal parameter that is fixed to 0 to +#' @name imm +#' @details `r model_info(.model_imm(), components =c('domain', 'task', 'name', 'citation'))` +#' #### Version: `full` +#' `r model_info(.model_imm(version = "full"), components = c('requirements', 'parameters', 'fixed_parameters', 'links', 'prior'))` +#' #### Version: `bsc` +#' `r model_info(.model_imm(version = "bsc"), components = c('requirements', 'parameters', 'fixed_parameters', 'links', 'prior'))` +#' #### Version: `abc` +#' `r model_info(.model_imm(version = "abc"), components =c('requirements', 'parameters', 'fixed_parameters', 'links', 'prior'))` +#' +#' Additionally, all imm models have an internal parameter that is fixed to 0 to #' allow the model to be identifiable. This parameter is not estimated and is not #' included in the model formula. The parameter is: #' #' - b = "Background activation (internally fixed to 0)" #' -#' @param resp_err The name of the variable in the provided dataset containing the -#' response error. The response Error should code the response relative to the to-be-recalled -#' target in radians. You can transform the response error in degrees to radian using the `deg2rad` function. -#' @param nt_features A character vector with the names of the non-target variables. -#' The non_target variables should be in radians and be centered relative to the -#' target. -#' @param nt_distance A vector of names of the columns containing the distances of -#' non-target items to the target item. Only necessary for the `IMMbsc` and `IMMfull` models -#' @param setsize Name of the column containing the set size variable (if -#' setsize varies) or a numeric value for the setsize, if the setsize is +#' @param resp_error The name of the variable in the provided dataset containing +#' the response error. The response Error should code the response relative to +#' the to-be-recalled target in radians. You can transform the response error +#' in degrees to radian using the `deg2rad` function. +#' @param nt_features A character vector with the names of the non-target +#' variables. The non_target variables should be in radians and be centered +#' relative to the target. Alternatively, if regex=TRUE, a regular +#' expression can be used to match the non-target feature columns in the +#' dataset. +#' @param nt_distances A vector of names of the columns containing the distances +#' of non-target items to the target item. Alternatively, if regex=TRUE, a regular +#' expression can be used to match the non-target distances columns in the +#' dataset. Only necessary for the `bsc` and `full` versions. +#' @param set_size Name of the column containing the set size variable (if +#' set_size varies) or a numeric value for the set_size, if the set_size is #' fixed. +#' @param regex Logical. If TRUE, the `nt_features` and `nt_distances` arguments +#' are interpreted as a regular expression to match the non-target feature +#' columns in the dataset. +#' @param links A list of links for the parameters. *Currently does not affect +#' the model fits, but it will in the future.* +#' @param version Character. The version of the IMM model to use. Can be one of +#' `full`, `bsc`, or `abc`. The default is `full`. #' @param ... used internally for testing, ignore it -#' @return An object of class `bmmmodel` -#' @keywords bmmmodel -#' @export -IMMfull <- .model_IMMfull - -#' @rdname IMM -#' @keywords bmmmodel -#' @export -IMMbsc <- .model_IMMbsc - -#' @rdname IMM -#' @keywords bmmmodel +#' @return An object of class `bmmodel` +#' @keywords bmmodel +#' @examples +#' \dontrun{ +#' # load data +#' data <- oberauer_lin_2017 +#' +#' # define formula +#' ff <- bmmformula( +#' kappa ~ 0 + set_size, +#' c ~ 0 + set_size, +#' a ~ 0 + set_size, +#' s ~ 0 + set_size +#' ) +#' +#' # specify the full IMM model with explicit column names for non-target features and distances +#' # by default this fits the full version of the model +#' model1 <- imm(resp_error = "dev_rad", +#' nt_features = paste0('col_nt', 1:7), +#' nt_distances = paste0('dist_nt', 1:7), +#' set_size = 'set_size') +#' +#' # fit the model +#' fit <- bmm(formula = ff, +#' data = data, +#' model = model1, +#' cores = 4, +#' backend = 'cmdstanr') +#' +#' # alternatively specify the IMM model with a regular expression to match non-target features +#' # this is equivalent to the previous call, but more concise +#' model2 <- imm(resp_error = "dev_rad", +#' nt_features = 'col_nt', +#' nt_distances = 'dist_nt', +#' set_size = 'set_size', +#' regex = TRUE) +#' +#' # fit the model +#' fit <- bmm(formula = ff, +#' data = data, +#' model = model2, +#' cores = 4, +#' backend = 'cmdstanr') +#' +#' # you can also specify the `bsc` or `abc` versions of the model to fit a reduced version +#' model3 <- imm(resp_error = "dev_rad", +#' nt_features = 'col_nt', +#' set_size = 'set_size', +#' regex = TRUE, +#' version = 'abc') +#' fit <- bmm(formula = ff, +#' data = data, +#' model = model3, +#' cores = 4, +#' backend = 'cmdstanr') +#'} #' @export -IMMabc <- .model_IMMabc +imm <- function(resp_error, nt_features, nt_distances, set_size, regex = FALSE, + links = NULL, version = "full", ...) { + call <- match.call() + dots <- list(...) + if (version == "abc") { + nt_distances <- NULL + } + stop_missing_args() + .model_imm(resp_error = resp_error, nt_features = nt_features, + nt_distances = nt_distances, set_size = set_size, regex = regex, + links = links, version = version, call = call, ...) +} ``` -The details will be filled out automatically from the model definition. The example for the IMM model also includes the aliases for the other versions of the IMM model, which are `IMMbsc` and `IMMabc`, and does some fancy formatting to include documentation about all versions of the model in the same help file. +The details will be filled out automatically from the model definition. This does some fancy formatting to include documentation about all versions of the model in the same help file. ### check_data() methods -Each model should have a `check_data.modelname()` method that checks if the data contains all necessary information for fitting the model. For the IMM, all three versions of the model share the same data requirements, so check_data is defined for the more general class, `IMMspatial`. The method is defined as follows: +Each model should have a `check_data.modelname()` method that checks if the data contains all necessary information for fitting the model. For the IMM, the `bsc` and `full` versions require a special check for the `nt_distances` variables: ``` r -check_data.IMMspatial <- function(model, data, formula) { - nt_distance <- model$other_vars$nt_distance - max_setsize <- attr(data, 'max_setsize') - - if (length(nt_distance) < max_setsize - 1) { - stop(paste0("The number of columns for spatial positions in the argument ", - "'nt_distance' is less than max(setsize)-1")) - } else if (length(nt_distance) > max_setsize - 1) { - stop(paste0("The number of columns for spatial positions in the argument ", - "'nt_distance' is more than max(setsize)-1")) - } +#' @export +check_data.imm_bsc <- function(model, data, formula) { + data <- .check_data_imm_dist(model, data, formula) + NextMethod("check_data") +} - if (any(data[,nt_distance] < 0)) { - stop('Somve values of the spatial distance variables in the data are negative.\n - All spatial distances to the target need to be postive.') - } +#' @export +check_data.imm_full <- function(model, data, formula) { + data <- .check_data_imm_dist(model, data, formula) + NextMethod("check_data") +} + +.check_data_imm_dist <- function(model, data, formula) { + nt_distances <- model$other_vars$nt_distances + max_set_size <- attr(data, 'max_set_size') - data = NextMethod("check_data") + stopif(!isTRUE(all.equal(length(nt_distances), max_set_size - 1)), + "The number of columns for non-target distances in the argument \\ + 'nt_distances' should equal max(set_size)-1})") - return(data) + # replace nt_distances + data[,nt_distances][is.na(data[,nt_distances])] <- 999 + + stopif(any(data[,nt_distances] < 0), + "All non-target distances to the target need to be postive.") + data } ``` -The IMM models share methods with the `mixture3p` model, all of which are of class `nontargets` so the `check_data.nontargets` method is defined in the general file `R/helpers-data.R`. If you are adding a new model, you should check if the data requirements are similar to any existing model and define the `check_data` method only for the methods that are unique to your model. +The IMM models share methods with the `mixture3p` model, all of which are of class `non_targets` so the `check_data.non_targets` method is defined in the general file `R/helpers-data.R`. If you are adding a new model, you should check if the data requirements are similar to any existing model and define the `check_data` method only for the methods that are unique to your model. The `check_data.mymodel()` function should always take the arguments `model`, `data`, and `formula` and return the data with the necessary transformations. It should also call `data = NextMethod("check_data")` to call the check_data method of the more general class. ### configure_model() methods -The configure_model.mymodel() method is where you specify the model formula, the family, any custom code, and the priors. The method is defined as follows for the IMM model: +The configure_model.mymodel() method is where you specify the model formula, the family, any custom code. The method is defined as follows for the IMM model: (we show only the `IMMfull` version) ``` r -configure_model.IMMfull <- function(model, data, formula) { +#' @export +configure_model.imm_full <- function(model, data, formula) { # retrieve arguments from the data check - max_setsize <- attr(data, 'max_setsize') - lure_idx_vars <- attr(data, "lure_idx_vars") + max_set_size <- attr(data, 'max_set_size') + lure_idx <- attr(data, "lure_idx_vars") nt_features <- model$other_vars$nt_features - setsize_var <- model$other_vars$setsize - nt_distance <- model$other_vars$nt_distance + set_size_var <- model$other_vars$set_size + nt_distances <- model$other_vars$nt_distances # construct main brms formula from the bmm formula - bmm_formula <- formula - formula <- bmf2bf(model, bmm_formula) - - # additional internal terms for the mixture model formula - kappa_nts <- paste0('kappa', 2:max_setsize) - kappa_unif <- paste0('kappa',max_setsize + 1) - theta_nts <- paste0('theta',2:max_setsize) - mu_nts <- paste0('mu', 2:max_setsize) - mu_unif <- paste0('mu', max_setsize + 1) - - formula <- formula + - glue_lf(kappa_unif,' ~ 1') + - glue_lf(mu_unif, ' ~ 1') + + formula <- bmf2bf(model, formula) + + brms::lf(kappa2 ~ 1) + + brms::lf(mu2 ~ 1) + brms::nlf(theta1 ~ c + a) + brms::nlf(kappa1 ~ kappa) + brms::nlf(expS ~ exp(s)) - for (i in 1:(max_setsize - 1)) { + # additional internal terms for the mixture model formula + kappa_nts <- paste0("kappa", 3:(max_set_size + 1)) + theta_nts <- paste0("theta", 3:(max_set_size + 1)) + mu_nts <- paste0("mu", 3:(max_set_size + 1)) + + for (i in 1:(max_set_size - 1)) { formula <- formula + - glue_nlf(kappa_nts[i], ' ~ kappa') + - glue_nlf(theta_nts[i], ' ~ ', lure_idx_vars[i], '*(exp(-expS*',nt_distance[i],')*c + a) + ', - '(1-', lure_idx_vars[i], ')*(-100)') + - glue_nlf(mu_nts[i], ' ~ ', nt_features[i]) + glue_nlf("{kappa_nts[i]} ~ kappa") + + glue_nlf("{theta_nts[i]} ~ {lure_idx[i]} * (exp(-expS*{nt_distances[i]})", + " * c + a) + (1 - {lure_idx[i]}) * (-100)") + + glue_nlf("{mu_nts[i]} ~ {nt_features[i]}") } + # define mixture family - vm_list = lapply(1:(max_setsize + 1), function(x) brms::von_mises(link = "identity")) - vm_list$order = "none" - family <- brms::do_call(brms::mixture, vm_list) - - # define prior - additional_constants <- list() - additional_constants[[kappa_unif]] <- -100 - additional_constants[[mu_unif]] <- 0 - prior <- fixed_pars_priors(model, additional_constants) + - brms::prior_("normal(2, 1)", class = "b", nlpar = "kappa") + - brms::prior_("normal(0, 1)", class = "b", nlpar = "c") + - brms::prior_("normal(0, 1)", class = "b", nlpar = "a") + - brms::prior_("normal(0, 1)", class = "b", nlpar = "s") - - # if there is setsize 1 in the data, set constant prior over thetant for setsize1 - if ((1 %in% data$ss_numeric) && !is.numeric(data[[setsize_var]])) { - prior <- prior + - brms::prior_("constant(0)", class = "b", coef = paste0(setsize_var, 1), nlpar = "a") + - brms::prior_("constant(0)", class = "b", coef = paste0(setsize_var, 1), nlpar = "s") - } + formula$family <- brms::mixture(brms::von_mises("tan_half"), + brms::von_mises("identity"), + nmix = c(1, max_set_size), + order = "none") - out <- nlist(formula, data, family, prior) - return(out) + nlist(formula, data) } ``` -The configure_model method should always take the arguments `model`, `data`, and `formula` (as a `bmmformula)` and return a named list with the formula (as a `brmsformula`), the data, the family, and the priors. +The configure_model method should always take the arguments `model`, `data`, and `formula` (as a `bmmformula)` and return a named list with the formula (as a `brmsformula`) and the data. The `brmsfamily` should be stored within the formula. -Inside the configure_model method the `brmsformula` is generated using the `bmf2bf` function. This function converts the `bmmformula` passed to `fit_model` function into a `brmsformula` based on the information for the response variables provided in the `bmmmodel` object. There is a general method in `R/helpers-formula.R` to construct the formula for all models with a single response variable. +Inside the configure_model method the `brmsformula` is generated using the `bmf2bf` function. This function converts the `bmmformula` passed to `bmm()` function into a `brmsformula` based on the information for the response variables provided in the `bmmmodel` object. There is a general method in `R/helpers-formula.R` to construct the formula for all models with a single response variable. ``` r # default method for all bmmmodels with 1 response variable @@ -248,35 +357,53 @@ For models with more than one response variable, you will have to provide a mode ## The Signal Discrimination Model (SDM) -The SDM model is defined in the file `R/bmm_model_SDM.R`. The SDM model differs in the configuration compared to the IMM model, as it requires custom STAN code. Let's go through the different parts. As before, we start with the model definition. +The SDM model is defined in the file `R/model_sdm.R`. The SDM model differs in the configuration compared to the IMM model, as it requires custom STAN code. Let's go through the different parts. As before, we start with the model definition. ### Model definition ``` r -.model_sdmSimple <- function(resp_err, ...) { - out <- list( - resp_vars = nlist(resp_err), +.model_sdm <- function(resp_error = NULL, links = NULL, + version = "simple", call = NULL, ...) { + out <- structure( + list( + resp_vars = nlist(resp_error), other_vars = nlist(), - info = list( - domain = 'Visual working memory', - task = 'Continuous reproduction', - name = 'Signal Discrimination Model (SDM) by Oberauer (2023)', - citation = paste0('Oberauer, K. (2023). Measurement models for visual working memory - ', - 'A factorial model comparison. Psychological Review, 130(3), 841-852'), - version = 'Simple (no non-targets)', - requirements = '- The response variable should be in radians and represent the angular error relative to the target', - parameters = list( - mu = 'Location parameter of the SDM distribution (in radians; by default fixed internally to 0)', - c = 'Memory strength parameter of the SDM distribution', - kappa = 'Precision parameter of the SDM distribution (log scale)' - ), - fixed_parameters = list( - mu = 0 - )), + domain = 'Visual working memory', + task = 'Continuous reproduction', + name = 'Signal Discrimination Model (SDM) by Oberauer (2023)', + citation = glue( + 'Oberauer, K. (2023). Measurement models for visual working memory - \\ + A factorial model comparison. Psychological Review, 130(3), 841-852' + ), + version = version, + requirements = glue( + '- The response variable should be in radians and represent the angular \\ + error relative to the target' + ), + parameters = list( + mu = glue('Location parameter of the SDM distribution (in radians; \\ + by default fixed internally to 0)'), + c = 'Memory strength parameter of the SDM distribution', + kappa = 'Precision parameter of the SDM distribution' + ), + links = list( + mu = 'tan_half', + c = 'log', + kappa = 'log' + ), + fixed_parameters = list(mu = 0), + default_priors = list( + mu = list(main = "student_t(1, 0, 1)"), + kappa = list(main = "student_t(5, 1.75, 0.75)", effects = "normal(0, 1)"), + c = list(main = "student_t(5, 2, 0.75)", effects = "normal(0, 1)") + ), void_mu = FALSE - ) - class(out) <- c('bmmmodel', 'vwm', 'sdmSimple') - out + ), + class = c('bmmodel', 'vwm', 'sdm', paste0("sdm_", version)), + call = call + ) + out$links[names(links)] <- links + out } ``` @@ -284,7 +411,16 @@ The model definition is similar to the IMM model, but the SDM model only require ### check_data() methods -The SDM does not require special data checks beyond it's shared class with other `vwm` models, so we don't need to define a `check_data.sdmSimple` method. The `check_data.vwm` method is defined in the general file `R/helpers-data.R`. +The SDM shares a class with other `vwm` models, so we most of the data checks are performed by `check_data.vwm` method, defined in the general file `R/helpers-data.R`. The `sdm` however, samples much more quickly in Stan, if the data is sorted by the predictor variables, so we have the following custom data check method for the sdm: + +``` r +#' @export +check_data.sdm <- function(model, data, formula) { + # data sorted by predictors is necessary for speedy computation of normalizing constant + data <- order_data_query(model, data, formula) + NextMethod("check_data") +} +``` ### configure_model() methods @@ -292,47 +428,86 @@ The configure_model method for the SDM model is different compared to the IMM mo ``` r #' @export -configure_model.sdmSimple <- function(model, data, formula) { - # construct the family - # note - c has a log link, but I've coded it manually for computational efficiency - sdm_simple <- brms::custom_family( - "sdm_simple", dpars = c("mu", "c","kappa"), - links = c("identity","identity", "log"), lb = c(NA, NA, NA), - type = "real", loop=FALSE, - ) - family <- sdm_simple - - # prepare initial stanvars to pass to brms, model formula and priors - sc_path <- system.file("stan_chunks", package="bmm") - stan_funs <- read_lines2(paste0(sc_path, '/sdmSimple_funs.stan')) - stan_tdata <- read_lines2(paste0(sc_path, '/sdmSimple_tdata.stan')) - stan_likelihood <- read_lines2(paste0(sc_path, '/sdmSimple_likelihood.stan')) - stanvars <- brms::stanvar(scode = stan_funs, block = "functions") + - brms::stanvar(scode = stan_tdata, block = 'tdata') + - brms::stanvar(scode = stan_likelihood, block = 'likelihood', position ="end") - - # construct main brms formula from the bmm formula - bmm_formula <- formula - formula <- bmf2bf(model, bmm_formula) - - # construct the default prior - # TODO: for now it just fixes mu to 0, I have to add proper priors - prior <- fixed_pars_priors(model) - - # set initial values to be sampled between [-1,1] to avoid extreme SDs that - # can cause the sampler to fail - init = 1 - - # return the list - out <- nlist(formula, data, family, prior, stanvars, init) - return(out) +configure_model.sdm <- function(model, data, formula) { + # construct the family + # note - c has a log link, but I've coded it manually for computational efficiency + sdm_simple <- brms::custom_family( + "sdm_simple", + dpars = c("mu", "c", "kappa"), + links = c("tan_half", "identity", "log"), + lb = c(NA, NA, NA), + ub = c(NA, NA, NA), + type = "real", loop = FALSE, + log_lik = log_lik_sdm_simple, + posterior_predict = posterior_predict_sdm_simple + ) + + # prepare initial stanvars to pass to brms, model formula and priors + sc_path <- system.file("stan_chunks", package = "bmm") + stan_funs <- read_lines2(paste0(sc_path, "/sdm_simple_funs.stan")) + stan_tdata <- read_lines2(paste0(sc_path, "/sdm_simple_tdata.stan")) + stan_likelihood <- read_lines2(paste0(sc_path, "/sdm_simple_likelihood.stan")) + stanvars <- brms::stanvar(scode = stan_funs, block = "functions") + + brms::stanvar(scode = stan_tdata, block = "tdata") + + brms::stanvar(scode = stan_likelihood, block = "likelihood", position = "end") + + # construct main brms formula from the bmm formula + formula <- bmf2bf(model, formula) + formula$family <- sdm_simple + + # set initial values to be sampled between [-1,1] to avoid extreme SDs that + # can cause the sampler to fail + init <- 1 + + # return the list + nlist(formula, data, stanvars, init) +} +``` + +**Lines 5-14** use the `brms::custom_family` function to define a custom family for the SDM model. The `dpars` argument specifies the parameters of the model, and the `links` argument specifies the link functions for the parameters. For more information, see [here](https://cran.r-project.org/web/packages/brms/vignettes/brms_customfamilies.html) + +**Lines 17-23** read the custom STAN code from the `inst/stan_chunks` directory. This has to be specified with the system.file() command to ensure that the code is found when the package is installed. The `stanvars` object is used to pass custom STAN code to the `brms` package. The `stanvars` object is a list of `brms::stanvar` objects, each of which contains the STAN code for a specific part of the model. There is a separate `.stan` file for each part of the STAN code, and each file is read into a separate `brms::stanvar` object. + +Converting the `bmmformula` to a `brmsformula` and collecting all arguements is the same as for the IMM model. + +### Postprocessing methods + +Unlike the `imm` model, the `sdm` model requires some special postprocessing because of the way the link functions are coded. These methods are applied *after* the brmsfit object is returned, at the very end of the *bmm()* pipeline: + +``` r +#' @export +postprocess_brm.sdm <- function(model, fit, ...) { + # manually set link_c to "log" since I coded it manually + fit$family$link_c <- "log" + fit$formula$family$link_c <- "log" + fit +} + +#' @export +revert_postprocess_brm.sdm <- function(model, fit, ...) { + fit$family$link_c <- "identity" + fit$formula$family$link_c <- "identity" + fit } ``` -**Lines 5-10** use the `brms::custom_family` function to define a custom family for the SDM model. The `dpars` argument specifies the parameters of the model, and the `links` argument specifies the link functions for the parameters. For more information, see [here](https://cran.r-project.org/web/packages/brms/vignettes/brms_customfamilies.html) +we also have a couple of special functions for custom families in brms, which allow other typical tools such posterior_predict of bridgesampling to work: -**Lines 13-19** read the custom STAN code from the `inst/stan_chunks` directory. This has to be specified with the system.file() command to ensure that the code is found when the package is installed. The `stanvars` object is used to pass custom STAN code to the `brms` package. The `stanvars` object is a list of `brms::stanvar` objects, each of which contains the STAN code for a specific part of the model. There is a separate `.stan` file for each part of the STAN code, and each file is read into a separate `brms::stanvar` object. +``` r +log_lik_sdm_simple <- function(i, prep) { + mu <- brms::get_dpar(prep, "mu", i = i) + c <- brms::get_dpar(prep, "c", i = i) + kappa <- brms::get_dpar(prep, "kappa", i = i) + y <- prep$data$Y[i] + dsdm(y, mu, c, kappa, log = T) +} -Converting the `bmmformula` to a `brmsformula`, specifying priors for fixed parameters, and collecting all arguements is the same as for the IMM model. +posterior_predict_sdm_simple <- function(i, prep, ...) { + mu <- brms::get_dpar(prep, "mu", i = i) + c <- brms::get_dpar(prep, "c", i = i) + kappa <- brms::get_dpar(prep, "kappa", i = i) + rsdm(length(mu), mu, c, kappa) +} +``` We will now look at how to construct all these parts for a new model. **Hint**: you don't have to do it manually, you can use the `use_model_template()` function to generate templates for your model. diff --git a/index.qmd b/index.qmd index 5dcfd5a..5ab099b 100644 --- a/index.qmd +++ b/index.qmd @@ -1,6 +1,10 @@ # Overview {.unnumbered} -This article aims to help developers contribute new models to bmm. It is a work in progress and will be updated as the package evolves. It explains how to set-up your system for package development, the structure of the package, and the workflow for contributing new models to the package. +*Last update: 26.03.2024* + +This guide aims to help developers contribute new models to bmm. It is a work in progress and will be updated as the package evolves. It explains how to set-up your system for package development, the structure of the package, and the workflow for contributing new models to the package. + +The current guide is up to date with **bmm v0.5.0** and it might not yet reflect changes implemented afterwards. If you run into problems, don''t hesitate to [open an issue on github](https://github.com/venpopov/bmm/issues). We follow a [github flow workflow](https://jeffkreeftmeijer.com/git-flow/). The repository contains two main branches: diff --git a/setup.qmd b/setup.qmd index a645e4c..0a94a22 100644 --- a/setup.qmd +++ b/setup.qmd @@ -19,9 +19,10 @@ The bmm package is setup as an RStudio project. Opening the `bmm.Rproj` file wil ``` r install.packages(c("devtools", "roxygen2", "testthat", "knitr")) library(devtools) + install_dev_deps() ``` - To avoid having to load the package every time, you can add the following code to your `.Rprofile` file + To avoid having to load the **devtools** package every time, you can add the following code to your `.Rprofile` file ``` r if (interactive()) {