Merge pull request #81 from fasrc/release_v0.2.4

Release v0.2.4
NSAPH-Software · Jul 11, 2021 · 3b42660 · 3b42660
2 parents d7ea9f3 + d230b0f
commit 3b42660
Show file tree

Hide file tree

Showing 255 changed files with 11,417 additions and 1,321 deletions.
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
@@ -64,10 +64,17 @@ jobs:
             eval sudo $cmd
           done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))')
 
+      - name: Install omp
+        if: runner.os == 'macOS'
+        run: |
+              brew install libomp
+
       - name: Install dependencies
         run: |
           remotes::install_deps(dependencies = TRUE)
           remotes::install_cran("rcmdcheck")
+          remotes::install_cran("devtools")
+          devtools::install()
         shell: Rscript {0}
 
       - name: Check

diff --git a/CausalGPS.Rproj b/CausalGPS.Rproj
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: CausalGPS
 Type: Package
 Title: Matching on generalized propensity scores with continuous exposures
-Version: 0.2.3
+Version: 0.2.4
 Authors@R: c(
     person("Naeem", "Khoshnevis", email = "[email protected]",
            role=c("aut","cre"), 
@@ -23,7 +23,7 @@ Description: An R package for implementing matching on generalized propensity
 License: GPL-3
 URL: https://github.com/fasrc/CausalGPS
 BugReports: https://github.com/fasrc/CausalGPS/issues
-Copyright: See COPYRIGHTS for details.
+Copyright: Harvard University
 Imports: 
     parallel,
     data.table,
@@ -42,7 +42,8 @@ Imports:
     logger,
     glue,
     Rcpp,
-    gnm
+    gnm,
+    tidyr
 Encoding: UTF-8
 LazyData: true
 RoxygenNote: 7.1.1

diff --git a/NAMESPACE b/NAMESPACE
@@ -8,17 +8,16 @@ S3method(print,gpsm_erf)
 S3method(print,gpsm_pspop)
 S3method(summary,gpsm_erf)
 S3method(summary,gpsm_pspop)
-export(check_args)
+export(absolute_corr_fun)
+export(absolute_weighted_corr_fun)
 export(check_covar_balance)
-export(compute_outer)
-export(create_matching)
-export(estimate_erf)
+export(compile_pseudo_pop)
 export(estimate_gps)
-export(estimate_hr)
-export(estimate_semi_erf)
-export(gen_pseudo_pop)
-export(gen_syn_data)
-export(matching_l1)
+export(estimate_npmetric_erf)
+export(estimate_pmetric_erf)
+export(estimate_semipmetric_erf)
+export(generate_pseudo_pop)
+export(generate_syn_data)
 export(set_logger)
 import(KernSmooth)
 import(SuperLearner)
@@ -32,10 +31,4 @@ import(xgboost)
 importFrom(Rcpp,sourceCpp)
 importFrom(ggplot2,autoplot)
 importFrom(rlang,.data)
-importFrom(stats,approx)
-importFrom(stats,cor)
-importFrom(stats,density)
-importFrom(stats,rnorm)
-importFrom(stats,rt)
-importFrom(stats,runif)
 useDynLib(CausalGPS, .registration = TRUE)
diff --git a/NEWS.md b/NEWS.md
@@ -1,22 +1,67 @@
-## CausalGPS 0.2.3 (2021-05-12)
+
+## CausalGPS (devloping version)
 
 ### Added
+
 ### Changed
+
 ### Fixed
 
-*  Fixed documentations
+### Removed
+
+## CausalGPS 0.2.4 (2021-07-11)
+
+### Added
+
+* OpenMP for Rcpp code
+* optimized_compile
+* log_system_info()
+* Frequently asked questions
+* logo 
+
+### Changed
+
+* estimate_gps.Rmd
+* estimate_semi_erf -> estimate_semipmetric_erf
+* estimate_erf -> estimate_npmetric_erf
+* estimate_hr -> estimate_pmetric_erf
+* gen_pseudo_pop -> generate_pseudo_pop
+* gen_syn_data -> generate_syn_data
+* estimate_erf accepts counter as an input
+* estimate_erf can use multiple cores
+* generating_pseudo_population.Rmd
+* estimate_erf function description
+* estimate_hr function description
+* estimate_semi_erf function description
+* compute_risk function description and return value
+* outcome_models.Rmd
+* generate_synthetic_data.Rmd
+
+
+### Fixed
+
+* RCpp parLapply worker processors arguments
 
 ### Removed
 
+* running_appr
+
+
+## CausalGPS 0.2.3 (2021-05-12)
+
+### Fixed
 
+*  Fixed documentations
 
 ## CausalGPS 0.2.2 (2021-05-12)
 
 ### Added
+
 * estimate_semi_erf
 * estimate_hr
 
 ### Changed
+
 * Package name: GPSmatching --> CausalGPS 
 
 ### Fixed

diff --git a/R/absolute_corr_fun.R b/R/absolute_corr_fun.R
@@ -1,59 +1,58 @@
 #' @title
-#' Check covariate balance
+#' Check Covariate Balance Using Absolute Approach
 #'
 #' @description
 #' Checks covariate balance based on absolute correlations for given data sets.
 #'
 #' @param w A vector of observed continuous exposure variable.
-#' @param c A data frame or matrix of observed covariates variable.
-#' @param  nthread Number of available threads to use.
+#' @param c A data table of observed covariates variable.
 #' @return
-#' The function returns a list saved the measure related to covariate balance
-#' \code{absolute_corr}: the absolute correlations for each pre-exposure
+#' The function returns a list including:
+#' - \code{absolute_corr}: the absolute correlations for each pre-exposure
 #'  covairates;
-#' \code{mean_absolute_corr}: the average absolute correlations for all
+#' - \code{mean_absolute_corr}: the average absolute correlations for all
 #'  pre-exposure covairates.
-#' @importFrom stats cor
 #'
-#' @keywords internal
-
-absolute_corr_fun <- function(w,
-                              c,
-                              nthread){
-
-  # w type should be numeric (polyserial requirments)
-  if (!is.numeric(w)) {
-     w <- unlist(w)
-     if (!is.numeric(w)) {
-       stop('w type should be numeric.')
-    }
-  }
+#' @export
+#' @examples
+#' set.seed(291)
+#' n <- 100
+#' mydata <- generate_syn_data(sample_size=100)
+#' year <- sample(x=c("2001","2002","2003","2004","2005"),size = n, replace = TRUE)
+#' region <- sample(x=c("North", "South", "East", "West"),size = n, replace = TRUE)
+#' mydata$year <- as.factor(year)
+#' mydata$region <- as.factor(region)
+#' mydata$cf5 <- as.factor(mydata$cf5)
+#' data.table::setDT(mydata)
+#' cor_val <- absolute_corr_fun(mydata[,2], mydata[, 3:length(mydata)])
+#' print(cor_val$mean_absolute_corr)
+#'
+absolute_corr_fun <- function(w, c){
+
+  if (class(w)[1] != "data.table"){stop("w should be a data.table.")}
+  if (class(c)[1] != "data.table"){stop("c should be a data.table.")}
 
-  # convert c to datatable
-  data.table::setDT(c)
 
   # detect numeric columns
   col_n <- colnames(c)[unlist(lapply(c, is.numeric))]
 
-  # detect factorial columns
+  # detect factor columns
   col_f <- colnames(c)[unlist(lapply(c, is.factor))]
 
   absolute_corr_n <- absolute_corr_f <- NULL
 
-  platform_os <- .Platform$OS.type
-
   if (length(col_n) > 0){
       absolute_corr_n<- lapply(col_n,function(i){
-        abs(cor(w,c[[i]],method = c("spearman")))})
+        abs(stats::cor(w,c[[i]],method = c("spearman")))})
   }
 
   if (length(col_f) > 0) {
+      w_numeric <- as.list(w[,1])[[colnames(w[,1])[1]]]
       absolute_corr_f<- lapply(col_f,function(i){
-        abs(polycor::polyserial(w,c[[i]]))})
+        abs(polycor::polyserial(w_numeric,c[[i]]))})
   }
 
   absolute_corr <- c(unlist(absolute_corr_f), unlist(absolute_corr_n))
-
   return(list(absolute_corr = absolute_corr,
               mean_absolute_corr = mean(absolute_corr)))
 }
diff --git a/R/absolute_weighted_corr_fun.R b/R/absolute_weighted_corr_fun.R
@@ -1,32 +1,45 @@
 #' @title
-#' Check covariate balance (weighted correlation)
+#' Check  Weighted Covariate Balance Using Absolute Approach
 #'
 #' @description
 #' Checks covariate balance based on absolute weighted correlations for
 #' given data sets.
 #'
 #' @param w A vector of observed continuous exposure variable.
-#' @param ipw A vector of weights.
-#' @param c A data frame or matrix of observed covariates variable.
+#' @param vw A vector of weights.
+#' @param c A data.table of observed covariates variable.
 #' @return
 #' The function returns a list saved the measure related to covariate balance
 #' \code{absolute_corr}: the absolute correlations for each pre-exposure
 #'  covairates;
 #' \code{mean_absolute_corr}: the average absolute correlations for all
 #'  pre-exposure covairates.
 #'
-#' @keywords internal
+#' @export
+#'
+#' @examples
+#' set.seed(639)
+#' n <- 100
+#' mydata <- generate_syn_data(sample_size=100)
+#' year <- sample(x=c("2001","2002","2003","2004","2005"),size = n, replace = TRUE)
+#' region <- sample(x=c("North", "South", "East", "West"),size = n, replace = TRUE)
+#' mydata$year <- as.factor(year)
+#' mydata$region <- as.factor(region)
+#' mydata$cf5 <- as.factor(mydata$cf5)
+#' data.table::setDT(mydata)
+#' cor_val <- absolute_weighted_corr_fun(mydata[,2],
+#'                                       data.table::data.table(runif(n)),
+#'                                       mydata[, 3:length(mydata)])
+#' print(cor_val$mean_absolute_corr)
+#'
 absolute_weighted_corr_fun <- function(w,
-                                       ipw,
+                                       vw,
                                        c){
 
-  # w type should be numeric (polyserial requirments)
-  if (!is.numeric(w)) {
-    w <- unlist(w)
-    if (!is.numeric(w)) {
-      stop('w type should be numeric.')
-    }
-  }
+
+  if (class(w)[1] != "data.table"){stop("w should be a data.table.")}
+  if (class(vw)[1] != "data.table"){stop("vw should be a data.table.")}
+  if (class(c)[1] != "data.table"){stop("c should be a data.table.")}
 
   # detect numeric columns
   col_n <- colnames(c)[unlist(lapply(c, is.numeric))]
@@ -38,18 +51,22 @@ absolute_weighted_corr_fun <- function(w,
 
   if (length(col_n) > 0) {
     absolute_corr_n<- sapply(col_n,function(i){
-      abs(wCorr::weightedCorr(w, c[[i]], weights = ipw,
+      abs(wCorr::weightedCorr(as.list(w)[[colnames(w)[1]]],
+                              c[[i]],
+                              weights = as.list(vw)[[colnames(vw)[1]]],
                               method = c("spearman")))})
   }
 
   if (length(col_f) > 0) {
     absolute_corr_f<- sapply(col_f,function(i){
-      abs(wCorr::weightedCorr(w,c[[i]], weights = ipw,
+      abs(wCorr::weightedCorr(as.list(w)[[colnames(w)[1]]],
+                              c[[i]],
+                              weights = as.list(vw)[[colnames(vw)[1]]],
                               method = c("Polyserial")))})
   }
 
   absolute_corr <- c(absolute_corr_f, absolute_corr_n)
 
   return(list(absolute_corr = absolute_corr,
               mean_absolute_corr = mean(absolute_corr)))
-}
+}
diff --git a/R/causalgps_package.R b/R/causalgps_package.R
@@ -2,8 +2,8 @@
 #' The 'CausalGPS' package.
 #'
 #' @description
-#' An R package for implementing matching on generalized propensity scores with
-#' continuous exposures.
+#' An R package for implementing matching and weighting on generalized
+#' propensity scores with continuous exposures.
 #'
 #' @details
 #' We developed an innovative approach for estimating causal effects using

diff --git a/R/causalgps_smooth.R b/R/causalgps_smooth.R