From 12ee173d03f429792f0dd9cf5027169eee59ea5f Mon Sep 17 00:00:00 2001 From: Joe Feldman Date: Wed, 4 Jan 2023 15:48:34 -0500 Subject: [PATCH] fixed bug in posterior predictive simulation function --- .Rhistory | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++ R/Helpers.R | 1 + R/Software.R | 6 ++-- 3 files changed, 82 insertions(+), 2 deletions(-) diff --git a/.Rhistory b/.Rhistory index 33e0852..3c7a0db 100644 --- a/.Rhistory +++ b/.Rhistory @@ -204,3 +204,80 @@ pred<- get_predictive_Y(mcmc, # GMC mcmc object nobs = dim(X)[1], # number of observations in predictive data set; we use n = dim(X)[1] nsets = dim(X)[1] + 1, # number of predictive data sets to create seed = 10) +#' Title +#' +#' @param Y +#' @param factor.cols +#' +#' @return +#' +#' @examples +format_data<-function(Y,factor.cols){ +cat<- NULL +cat_col = 1 +cat_col_names = NULL +bin_col_names = NULL +for(i in factor.cols){ +if(nlevels(Y[,i]) >2){ +one.hot.cat<- model.matrix(~. -1,data =model.frame(~.,data = Y[,i], na.action = na.pass)) +cat<- cbind(cat,one.hot.cat) +cat_col_names = c(cat_col_names, rep(names(Y)[i], nlevels(Y[,i]))) +} +else{ +one.hot.bin<- model.matrix(~. , data = model.frame(~.,data = Y[,i], na.action = na.pass))[,2] +cat<-cbind(cat,one.hot.bin) +bin_col_names = c(bin_col_names, rep(names(Y)[i],1)) +} +} +one.hot.cols<- ncol(cat) # number of one hot encoded categories +Y_mod<- cbind(cat,Y[,-factor.cols]) +col_mem = c(cat_col_names,bin_col_names,colnames(Y_mod)[(one.hot.cols+1):ncol(Y_mod)])# data frame used for sampler +return(list( cat_col_names = cat_col_names, +bin_col_names = bin_col_names, col_mem = col_mem)) +} +library(GMCImpute) +GMCImpute::format_data +library(GMCImpute) +?GMC.mcmc +library(GMCImpute) +?GMC.mcmc +library(GMCImpute) +knitr::opts_chunk$set(echo = TRUE) +library(ggplot2) +library(gridExtra) +library(purrr) +library(devtools) +remotes::install_github('jfeldman396/GMCImpute', force = T) +library(GMCImpute) +set.seed(47) +num= 500 +X1<-rnorm(num) +X2<- rpois(num,5*abs(X1)) +X3<- as.factor(rbernoulli(num,pnorm(-.5+scale(X2)))) +X<- data.frame(X1,X2,X3) +R = t(sapply(1:num, function(x)rbernoulli(2, p = pnorm(-.5 + .5*abs(X1[x]))))) # missingness mechanism +X_noMis = X +X[which(R[,1] == T),2] = NA +X[which(R[,2] == T),3] = NA +hyperparams = list(delta = 10, +k.star = 2, +plugin.threshold = 100, +a_alpha = 1, +b_alpha = 3, +nu_mix = 4, +kappa_0 = .001, +nu = 3, +a1 = 2, +a2 = 3, +a.sigma = 1, +b.sigma = .3, +D_0 = diag(1,2)) +mcmc<-GMC.mcmc(Data = X, nImp = 5,hyperparams = hyperparams, burn = 1500,nsamp = 2000, seed = 47) +library(GMCImpute) +knitr::opts_chunk$set(echo = TRUE) +library(ggplot2) +library(gridExtra) +library(purrr) +mcmc<-GMC.mcmc(Data = X, nImp = 5,hyperparams = hyperparams, burn = 1500,nsamp = 2000, seed = 47) +?GMC.mcmc +setwd("~/Desktop/Research/MI Paper") diff --git a/R/Helpers.R b/R/Helpers.R index 280afd8..fb3cd1f 100644 --- a/R/Helpers.R +++ b/R/Helpers.R @@ -42,6 +42,7 @@ format_data<-function(Y,factor.cols){ Y_mod<- cbind(cat,Y[,-factor.cols]) col_mem = c(cat_col_names,bin_col_names,colnames(Y_mod)[(one.hot.cols+1):ncol(Y_mod)])# data frame used for sampler + #Y_mod_comp = Y_mod %>% tidyr::drop_na() #complete observations return(list( cat_col_names = cat_col_names, bin_col_names = bin_col_names, col_mem = col_mem)) } diff --git a/R/Software.R b/R/Software.R index 919fdf1..0b98e15 100644 --- a/R/Software.R +++ b/R/Software.R @@ -744,9 +744,11 @@ get_predictive_Y<-function(mcmc, if(length(unique(z))>1){ cs<- sample(unique_z, n, replace = T, prob = pi_h[unique_z]) etas = NULL - etas = do.call(rbind, t(sapply(unique(cs),function(x) return(rbind(etas, + #need to fix this + etas = do.call(rbind, data.frame(t(sapply(unique(cs),function(x) return(rbind(etas, mvrnorm(sum(cs == x),mu[[x]], - Delta[[x]])))))) + Delta[[x]]))))))) + }else{ etas = mvrnorm(n,mu[[unique_z]], Delta[[unique_z]]) }