Skip to content

Commit

Permalink
More efficient sampling, for large EFIM.S
Browse files Browse the repository at this point in the history
  • Loading branch information
Kss2k committed Dec 9, 2024
1 parent 781b015 commit fd695e4
Show file tree
Hide file tree
Showing 7 changed files with 69 additions and 31 deletions.
73 changes: 49 additions & 24 deletions R/calc_se_da.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ calcFIM_da <- function(model,
NA__ = -999,
EFIM.S = 3e4,
epsilon = 1e-8,
R.max = 1e6,
verbose = FALSE) {
if (!calc.se) return(list(FIM = NULL, vcov = NULL, vcov.sub = NULL, type = "none",
raw.labels = names(theta), n.additions = 0))
Expand All @@ -24,15 +25,17 @@ calcFIM_da <- function(model,
epsilon = epsilon, hessian = hessian),
expected = calcEFIM_LMS(model, finalModel = finalModel, theta = theta,
data = data, epsilon = epsilon, S = EFIM.S,
parametric = EFIM.parametric, verbose = verbose),
parametric = EFIM.parametric, verbose = verbose,
R.max = R.max),
stop2("FIM must be either expected or observed")),
qml =
switch(FIM,
observed = calcOFIM_QML(model, theta = theta, data = data,
hessian = hessian, epsilon = epsilon),
expected = calcEFIM_QML(model, finalModel = finalModel, theta = theta,
data = data, epsilon = epsilon, S = EFIM.S,
parametric = EFIM.parametric, verbose = verbose),
parametric = EFIM.parametric, verbose = verbose,
R.max = R.max),
stop2("FIM must be either expected or observed")),
stop2("Unrecognized method: ", method)
)
Expand Down Expand Up @@ -138,42 +141,51 @@ calcOFIM_LMS <- function(model, theta, data, hessian = FALSE,


calcEFIM_LMS <- function(model, finalModel = NULL, theta, data, S = 3e4,
parametric = TRUE, epsilon = 1e-6, verbose = FALSE) {
N <- nrow(data)
parametric = TRUE, epsilon = 1e-6, verbose = FALSE,
R.max = 1e6) {
N <- nrow(model$data)
R.ceil <- N * S > R.max
R <- ifelse(R.ceil, yes = R.max, no = N * S)

if (R < N) {
warning2("Population size is smaller than sample size, please increase it using the `R.max` argument!")
R <- N
}

if (parametric) {
if (is.null(finalModel)) stop2("finalModel must be included in calcEFIM_LMS")
parTable <- modelToParTable(finalModel, method = "lms")
population <- tryCatch(
simulateDataParTable(parTable, N = S * N, colsOVs = colnames(data))$oV,
simulateDataParTable(parTable, N = R, colsOVs = colnames(data))$oV,
error = function(e) {
warning2("Unable to simulate data for EFIM, using stochastic sampling instead")
calcEFIM_LMS(model = model, theta = theta, data = data, S = S,
parametric = FALSE, epsilon = epsilon)
})

} else {
population <- data[sample(N * S, N * S, replace = TRUE), ]
}
} else population <- data[sample(R, N, replace = TRUE), ]

I <- matrix(0, nrow = length(theta), ncol = length(theta))

P <- estepLms(model = model, theta=theta, data = population)
for (i in seq_len(S)) {
if (verbose) printf("\rMonte-Carlo: Iteration = %d/%d", i, S)

n1 <- (i - 1) * N + 1
nn <- n1 + N - 1
if (!R.ceil) {
n1 <- (i - 1) * N + 1
nn <- n1 + N - 1
sub <- n1:nn
} else sub <- sample(R, N)

J <- gradientLogLikLms(theta = theta, model = model, data = population[n1:nn, ],
P = P[n1:nn, ], sign = 1, epsilon = epsilon)
J <- gradientLogLikLms(theta = theta, model = model, data = population[sub, ],
P = P[sub, ], sign = 1, epsilon = epsilon)

I <- I + J %*% t(J)
}

if (verbose) {
cat("\n")
if (S <= 100) cat("Consider increasing the iterations, using the `EFIM.S` argument!\n")
if (S <= 100) message("Consider increasing the Monte-Carlo Monte-Carloiterations, using the `EFIM.S` argument!")
}

I / S
Expand All @@ -199,37 +211,50 @@ calcOFIM_QML <- function(model, theta, data, hessian = FALSE,


calcEFIM_QML <- function(model, finalModel = NULL, theta, data, S = 100,
parametric = TRUE, epsilon = 1e-8, verbose = FALSE) {
N <- nrow(model$data)
parametric = TRUE, epsilon = 1e-8, verbose = FALSE,
R.max = 1e6) {
N <- nrow(model$data)
R.ceil <- N * S > R.max
R <- ifelse(R.ceil, yes = R.max, no = N * S)

if (R < N) {
warning2("Population size is smaller than sample size, please increase it using the `R.max` argument!")
R <- N
}

if (parametric) {
if (is.null(finalModel)) stop2("finalModel must be included in calcEFIM_QML")
parTable <- modelToParTable(finalModel, method = "qml")
population <- tryCatch(
simulateDataParTable(parTable, N = N * S, colsOVs = colnames(data))$oV,
simulateDataParTable(parTable, N = R, colsOVs = colnames(data))$oV,
error = function(e) {
warning2("Unable to simulate data for EFIM, using stochastic sampling instead")
calcEFIM_QML(model = model, theta = theta, data = data, S = S,
parametric = FALSE, epsilon = epsilon)
})
} else {
population <- data[sample(N * S, N * S, replace = TRUE), ]
}
}
)

} else population <- data[sample(R, N, replace = TRUE), ]


I <- matrix(0, nrow = length(theta), ncol = length(theta))
for (i in seq_len(S)) {
if (verbose) printf("\rMonte-Carlo: Iteration = %d/%d", i, S)
n1 <- (i - 1) * N + 1
nn <- n1 + N - 1

if (!R.ceil) {
n1 <- (i - 1) * N + 1
nn <- n1 + N - 1
sub <- n1:nn
} else sub <- sample(R, N)

J <- gradientLogLikQml(theta = theta, model = model, sign = 1, epsilon = epsilon,
data = population[n1:nn, ])
data = population[sub, ])
I <- I + J %*% t(J)
}

if (verbose) {
cat("\n")
if (S <= 100) cat("Consider increasing the iterations, using the `EFIM.S` argument!\n")
if (S <= 100) message("Consider increasing the Monte-Carlo iterations, using the `EFIM.S` argument!")
}

I / S
Expand Down
3 changes: 2 additions & 1 deletion R/est_lms.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ emLms <- function(model,
epsilon = 1e-6,
optimizer = "nlminb",
fix.estep = TRUE,
R.max = 1e6,
...) {
data <- model$data
stopif(anyNA(data), "Remove or replace missing values from data")
Expand Down Expand Up @@ -106,7 +107,7 @@ emLms <- function(model,
hessian = OFIM.hessian, calc.se = calc.se,
EFIM.parametric = EFIM.parametric, verbose = verbose,
FIM = FIM, robust.se = robust.se, epsilon = epsilon,
NA__ = -999)
R.max = R.max, NA__ = -999)
SE <- calcSE_da(calc.se = calc.se, FIM$vcov, rawLabels = FIM$raw.labels,
NA__ = -999)
modelSE <- getSE_Model(model, se = SE, method = "lms",
Expand Down
3 changes: 2 additions & 1 deletion R/est_qml.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ estQml <- function(model,
robust.se = FALSE,
epsilon = 1e-6,
optimizer = "nlminb",
R.max = 1e6,
...) {
startTheta <- model$theta
final <- mstepQml(model = model, theta = startTheta, max.iter = max.iter,
Expand All @@ -36,7 +37,7 @@ estQml <- function(model,
hessian = OFIM.hessian, calc.se = calc.se,
EFIM.parametric = EFIM.parametric, verbose = verbose,
FIM = FIM, robust.se = robust.se, NA__ = -999,
epsilon = epsilon)
epsilon = epsilon, R.max = R.max)
SE <- calcSE_da(calc.se = calc.se, FIM$vcov, rawLabels = FIM$raw.labels,
NA__ = -999)
modelSE <- getSE_Model(model, se = SE, method = "qml",
Expand Down
2 changes: 2 additions & 0 deletions R/method_settings_da.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ getMethodSettingsDA <- function(method, args = NULL) {
EFIM.S = 100,
EFIM.parametric = TRUE,
robust.se = FALSE,
R.max = 1e6,
max.iter = 500,
max.step = 1,
fix.estep = TRUE,
Expand All @@ -40,6 +41,7 @@ getMethodSettingsDA <- function(method, args = NULL) {
EFIM.S = 100,
EFIM.parametric = TRUE,
robust.se = FALSE,
R.max = 1e6,
max.iter = 500,
max.step = NULL,
fix.estep = NULL,
Expand Down
13 changes: 9 additions & 4 deletions R/modsem_da.R
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@
#' from the model), or non-parametrically (stochastically sampled)? If you believe that
#' normality assumptions are violated, \code{EFIM.parametric = FALSE} might be the better option.
#'
#' @param R.max Maximum population size (not sample size) used in the calculated of the expected
#' fischer information matrix.
#'
#' @param robust.se should robust standard errors be computed? Meant to be used for QML,
#' can be unreliable with the LMS approach.
#'
Expand Down Expand Up @@ -178,6 +181,7 @@ modsem_da <- function(model.syntax = NULL,
OFIM.hessian = NULL,
EFIM.parametric = NULL,
robust.se = NULL,
R.max = NULL,
max.iter = NULL,
max.step = NULL,
fix.estep = NULL,
Expand Down Expand Up @@ -221,6 +225,7 @@ modsem_da <- function(model.syntax = NULL,
OFIM.hessian = OFIM.hessian,
EFIM.parametric = EFIM.parametric,
robust.se = robust.se,
R.max = R.max,
max.iter = max.iter,
max.step = max.step,
fix.estep = fix.estep,
Expand All @@ -230,12 +235,10 @@ modsem_da <- function(model.syntax = NULL,
)
)

if (!method %in% c("lms", "qml")) {
stop2("Method must be either 'lms' or 'qml'")
}
stopif(!method %in% c("lms", "qml"), "Method must be either 'lms' or 'qml'")

if (args$center.data) {
data <- lapplyDf(data, FUN = function(x) x - mean(x))
data <- lapplyDf(data, FUN = function(x) x - mean(x, na.rm = TRUE))
}
if (args$standardize.data) {
data <- lapplyDf(data, FUN = scaleIfNumeric, scaleFactor = FALSE)
Expand Down Expand Up @@ -280,6 +283,7 @@ modsem_da <- function(model.syntax = NULL,
max.iter = args$max.iter,
epsilon = args$epsilon,
optimizer = args$optimizer,
R.max = args$R.max,
...
),
"lms" = emLms(model,
Expand All @@ -296,6 +300,7 @@ modsem_da <- function(model.syntax = NULL,
epsilon = args$epsilon,
optimizer = args$optimizer,
fix.estep = args$fix.estep,
R.max = args$R.max,
...
)
)
Expand Down
4 changes: 4 additions & 0 deletions man/modsem_da.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion tests/testthat/test_lms.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ m1 <- "
"

est1 <- modsem(m1, oneInt, method = "lms", optimize = TRUE, verbose = TRUE,
convergence = 1e-2)
convergence = 1e-2, R.max = 50000)
plot_interaction("X", "Z", "Y", "X:Z", -3:3, c(-0.5, 0.5), est1)
print(summary(est1, adjusted.stat = TRUE))

Expand Down

0 comments on commit fd695e4

Please sign in to comment.