diff --git a/DESCRIPTION b/DESCRIPTION index b3cb99a..b13bd49 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: Rwtdttt Title: Parametric Waiting Time Distribution estimation -Version: 0.1.0 +Version: 0.2.0 Author: Sabrina Giometto, Malcolm Gillies, Olga Paoletti, Henrik Støvring Maintainer: Henrik Støvring Description: Estimation of prescription durations and treatment @@ -15,17 +15,14 @@ License: GPL (>= 3) Roxygen: list(markdown = TRUE) Imports: bbmle, - class, data.table (>= 1.15.0), - dplyr, - haven, methods, graphics, rlang, - stats, - numDeriv + stats Suggests: - testthat (>= 3.0.0) + testthat (>= 3.0.0), + haven Config/testthat/edition: 3 Encoding: UTF-8 LazyData: true diff --git a/NAMESPACE b/NAMESPACE index cf8c4ed..822b756 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -22,7 +22,6 @@ importFrom(graphics,curve) importFrom(graphics,hist) importFrom(methods,as) importFrom(methods,is) -importFrom(numDeriv,grad) importFrom(rlang,enquo) importFrom(rlang,eval_tidy) importFrom(stats,as.formula) @@ -30,6 +29,7 @@ importFrom(stats,formula) importFrom(stats,model.frame) importFrom(stats,model.matrix) importFrom(stats,na.pass) +importFrom(stats,numericDeriv) importFrom(stats,pexp) importFrom(stats,pnorm) importFrom(stats,pweibull) @@ -38,3 +38,4 @@ importFrom(stats,qnorm) importFrom(stats,runif) importFrom(stats,sd) importFrom(stats,terms) +importMethodsFrom(bbmle,summary) diff --git a/R/dfunctions.R b/R/dfunctions.R index 9bda063..f100220 100644 --- a/R/dfunctions.R +++ b/R/dfunctions.R @@ -7,13 +7,9 @@ #' @param delta width of interval with positive support (x in (0; delta)) #' @param log logical; if TRUE, density values are returned on log-scale. #' -#' @return +#' @return Density. The length of the result is the maximum of the lengths of the numerical arguments. #' @export #' @importFrom stats pnorm -#' -#' @examples -#' -#' dlnorm <- function(x, logitp, mu, lnsigma, delta = 1, log = FALSE) { @@ -44,10 +40,8 @@ dlnorm <- function(x, logitp, mu, lnsigma, delta = 1, log = FALSE) { #' @param delta width of interval with positive support (x in (0; delta)) #' @param log logical; if TRUE, probabilities p are given as log(p) #' -#' @return +#' @return Density. The length of the result is the maximum of the lengths of the numerical arguments. #' @export -#' -#' @examples dweib <- function(x, logitp, lnalpha, lnbeta, delta = 1, log = FALSE) { prob <- exp(logitp) / (1 + exp(logitp)) @@ -73,10 +67,8 @@ dweib <- function(x, logitp, lnalpha, lnbeta, delta = 1, log = FALSE) { #' @param delta width of interval with positive support (x in (0; delta)) #' @param log logical; if TRUE, probabilities p are given as log(p) #' -#' @return +#' @return Density. The length of the result is the maximum of the lengths of the numerical arguments. #' @export -#' -#' @examples dexp <- function(x, logitp, lnbeta, delta = 1, log = FALSE) { prob <- exp(logitp) / (1 + exp(logitp)) diff --git a/R/sandwich.R b/R/sandwich.R index eebaa3b..c000ce3 100644 --- a/R/sandwich.R +++ b/R/sandwich.R @@ -5,17 +5,13 @@ #' @param fit an object of class "wtd" returned by `ranwtdttt()` #' #' @return sand_vcov returns a matrix -#' @importFrom stats model.matrix -#' @importFrom numDeriv grad -#' -#' @examples +#' @importFrom stats model.matrix numericDeriv sand_vcov <- function(fit) { # parse model formula components parm_form <- unlist(strsplit(gsub(" ", "", unlist(strsplit(fit@formula, ":", fixed=T))[2]), ",", fixed=T)) ##### # based on code from bbmle::calc_mle2_function -# FIXME: painfully slow # FIXME: redo without special case code for each density type # FIXME: redo to handle non-standard parameter naming @@ -40,80 +36,70 @@ sand_vcov <- function(fit) { vpos <- list() for (i in seq(along=parm_form)) { vname <- vars[i] ## name of variable - vpos[[vname]] <- which(parnames==vname) + vpos[[vname]] <- i } - score_mat <- t(sapply(1:length(fit@data[[1]]), - - function(i) { - - logl <- function(x) { - - if(fit@dist=="lnorm") { - - # FIXME this is very ugly - # XXXX can we use fit@minuslogl somehow instead? - - - mm1 <- model.matrix(formula(models[vpos[["logitp"]]]), data=as.data.frame(fit@data))[i,,drop=F] - mm2 <- model.matrix(formula(models[vpos[["mu"]]]), data=as.data.frame(fit@data), drop=F)[i,,drop=F] - mm3 <- model.matrix(formula(models[vpos[["lnsigma"]]]), data=as.data.frame(fit@data), drop=F)[i,,drop=F] - - dlnorm(as.numeric(getElement(fit@data, fit@depvar)[i]), - - logitp=mm1 %*% matrix(x[1:ncol(mm1)]), - - mu=mm2 %*% matrix(x[(ncol(mm1)+1):(ncol(mm1)+ncol(mm2))]), - lnsigma=mm3 %*% matrix(x[(ncol(mm1)+ncol(mm2)+1):(ncol(mm1)+ncol(mm2)+ncol(mm3))]), + if(fit@dist=="lnorm") { - delta=fit@delta, + mm1 <- model.matrix(formula(models[vpos[["logitp"]]]), data=as.data.frame(fit@data), drop=F) + mm2 <- model.matrix(formula(models[vpos[["mu"]]]), data=as.data.frame(fit@data), drop=F) + mm3 <- model.matrix(formula(models[vpos[["lnsigma"]]]), data=as.data.frame(fit@data), drop=F) - log=T) + myenv <- new.env() + myenv$logitp <- mm1 %*% fit@coef[1:ncol(mm1)] + myenv$mu <- mm2 %*% fit@coef[(ncol(mm1)+1):(ncol(mm1)+ncol(mm2))] + myenv$lnsigma <- mm3%*% fit@coef[(ncol(mm1)+ncol(mm2)+1):(ncol(mm1)+ncol(mm2)+ncol(mm3))] + myenv$x <- as.numeric(getElement(fit@data, fit@depvar)) - } else if(fit@dist=="weib") { + sc1 <- diag(attr(numericDeriv( + quote(dlnorm(x, logitp, mu, lnsigma, delta = fit@delta, log=T)), c("logitp"), myenv), "gradient")) + sc2 <- diag(attr(numericDeriv( + quote(dlnorm(x, logitp, mu, lnsigma, delta = fit@delta, log=T)), c("mu"), myenv), "gradient")) + sc3 <- diag(attr(numericDeriv( + quote(dlnorm(x, logitp, mu, lnsigma, delta = fit@delta, log=T)), c("lnsigma"), myenv), "gradient")) - mm1 <- model.matrix(formula(models[vpos[["logitp"]]]), data=as.data.frame(fit@data))[i,,drop=F] - mm2 <- model.matrix(formula(models[vpos[["lnalpha"]]]), data=as.data.frame(fit@data))[i,,drop=F] - mm3 <- model.matrix(formula(models[vpos[["lnbeta"]]]), data=as.data.frame(fit@data))[i,,drop=F] + score_mat <- cbind(mm1 * sc1, mm2 * sc2, mm3 * sc3) - dweib(as.numeric(getElement(fit@data, fit@depvar)[i]), + } else if(fit@dist=="weib") { - logitp=mm1 %*% matrix(x[1:ncol(mm1)]), + mm1 <- model.matrix(formula(models[vpos[["logitp"]]]), data=as.data.frame(fit@data), drop=F) + mm2 <- model.matrix(formula(models[vpos[["lnalpha"]]]), data=as.data.frame(fit@data), drop=F) + mm3 <- model.matrix(formula(models[vpos[["lnbeta"]]]), data=as.data.frame(fit@data), drop=F) - lnalpha=mm2 %*% matrix(x[(ncol(mm1)+1):(ncol(mm1)+ncol(mm2))]), + myenv <- new.env() + myenv$logitp <- mm1 %*% fit@coef[1:ncol(mm1)] + myenv$lnalpha <- mm2 %*% fit@coef[(ncol(mm1)+1):(ncol(mm1)+ncol(mm2))] + myenv$lnbeta <- mm3%*% fit@coef[(ncol(mm1)+ncol(mm2)+1):(ncol(mm1)+ncol(mm2)+ncol(mm3))] + myenv$x <- as.numeric(getElement(fit@data, fit@depvar)) - lnbeta=mm3 %*% matrix(x[(ncol(mm1)+ncol(mm2)+1):(ncol(mm1)+ncol(mm2)+ncol(mm3))]), + sc1 <- diag(attr(numericDeriv( + quote(dweib(x, logitp, lnalpha, lnbeta, delta = fit@delta, log=T)), c("logitp"), myenv), "gradient")) + sc2 <- diag(attr(numericDeriv( + quote(dweib(x, logitp, lnalpha, lnbeta, delta = fit@delta, log=T)), c("lnalpha"), myenv), "gradient")) + sc3 <- diag(attr(numericDeriv( + quote(dweib(x, logitp, lnalpha, lnbeta, delta = fit@delta, log=T)), c("lnbeta"), myenv), "gradient")) - delta=fit@delta, + score_mat <- cbind(mm1 * sc1, mm2 * sc2, mm3 * sc3) - log=T) + } else if(fit@dist=="exp") { - } else if(fit@dist=="exp") { + mm1 <- model.matrix(formula(models[vpos[["logitp"]]]), data=as.data.frame(fit@data), drop=F) + mm2 <- model.matrix(formula(models[vpos[["lnbeta"]]]), data=as.data.frame(fit@data), drop=F) - mm1 <- model.matrix(formula(models[vpos[["logitp"]]]), data=as.data.frame(fit@data))[i,,drop=F] - mm2 <- model.matrix(formula(models[vpos[["lnbeta"]]]), data=as.data.frame(fit@data))[i,,drop=F] + myenv <- new.env() + myenv$logitp <- mm1 %*% fit@coef[1:ncol(mm1)] + myenv$lnbeta <- mm2 %*% fit@coef[(ncol(mm1)+1):(ncol(mm1)+ncol(mm2))] + myenv$x <- as.numeric(getElement(fit@data, fit@depvar)) - dexp(as.numeric(getElement(fit@data, fit@depvar)[i]), + sc1 <- diag(attr(numericDeriv( + quote(dexp(x, logitp, lnbeta, delta = fit@delta, log=T)), c("logitp"), myenv), "gradient")) + sc2 <- diag(attr(numericDeriv( + quote(dexp(x, logitp, lnbeta, delta = fit@delta, log=T)), c("lnbeta"), myenv), "gradient")) - logitp=mm1 %*% matrix(x[1:ncol(mm1)]), + score_mat <- cbind(mm1 * sc1, mm2 * sc2) - lnbeta=mm2 %*% matrix(x[(ncol(mm1)+1):(ncol(mm1)+ncol(mm2))]), - - delta=fit@delta, - - log=T) - - } - } - - - - grad(logl, fit@coef) - - } - - )) + } colnames(score_mat) <- names(fit@coef) diff --git a/R/wtd-class.R b/R/wtd-class.R index 952c0af..e4431bb 100644 --- a/R/wtd-class.R +++ b/R/wtd-class.R @@ -17,6 +17,7 @@ #' #' @exportClass wtd #' @importClassesFrom bbmle mle2 +#' @importMethodsFrom bbmle summary #' setClass("wtd", contains="mle2", slots=c(delta="numeric", dist="character", depvar="character", idvar="character", diff --git a/Rwtdttt.pdf b/Rwtdttt.pdf index 0fcd8cc..a79f66f 100644 Binary files a/Rwtdttt.pdf and b/Rwtdttt.pdf differ diff --git a/man/dexp.Rd b/man/dexp.Rd index 08893d1..b09ddb4 100644 --- a/man/dexp.Rd +++ b/man/dexp.Rd @@ -17,6 +17,9 @@ dexp(x, logitp, lnbeta, delta = 1, log = FALSE) \item{log}{logical; if TRUE, probabilities p are given as log(p)} } +\value{ +Density. The length of the result is the maximum of the lengths of the numerical arguments. +} \description{ The Exponential distribution } diff --git a/man/dlnorm.Rd b/man/dlnorm.Rd index 900e200..7fe82a0 100644 --- a/man/dlnorm.Rd +++ b/man/dlnorm.Rd @@ -19,6 +19,9 @@ dlnorm(x, logitp, mu, lnsigma, delta = 1, log = FALSE) \item{log}{logical; if TRUE, density values are returned on log-scale.} } +\value{ +Density. The length of the result is the maximum of the lengths of the numerical arguments. +} \description{ The Lognormal distribution } diff --git a/man/dweib.Rd b/man/dweib.Rd index 026c065..49beb48 100644 --- a/man/dweib.Rd +++ b/man/dweib.Rd @@ -19,6 +19,9 @@ dweib(x, logitp, lnalpha, lnbeta, delta = 1, log = FALSE) \item{log}{logical; if TRUE, probabilities p are given as log(p)} } +\value{ +Density. The length of the result is the maximum of the lengths of the numerical arguments. +} \description{ The Weibull distribution } diff --git a/sandpit/examples.R b/sandpit/examples.R index b78aeab..c5a274c 100644 --- a/sandpit/examples.R +++ b/sandpit/examples.R @@ -358,3 +358,19 @@ summary(fit4) plot(fit4) predict(fit4) + +# Extract a small subset and test with covariates +df <- haven::read_dta(system.file("extdata", "drugpakud.dta", package="Rwtdttt")) +set.seed(1) +ids <- sample(unique(df$id), size=1000) +df2 <- df[df$id %in% ids,] + +set.seed(1) +fit1 <- ranwtdttt(data = df2, + rxdate ~ dlnorm(logitp, mu, lnsigma), + id = "id", + start = as.Date('2014-01-01'), end = as.Date('2014-12-31'), + subset = atc=='A10AC01'&!is.na(ddd), reverse=T, nsamp=5, + parameters=list(mu~log(ddd)) +) +summary(fit1) \ No newline at end of file