Skip to content

Commit

Permalink
fixed bug in posterior predictive simulation function
Browse files Browse the repository at this point in the history
  • Loading branch information
Joe Feldman authored and Joe Feldman committed Jan 4, 2023
1 parent 478c6da commit 12ee173
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 2 deletions.
77 changes: 77 additions & 0 deletions .Rhistory
Original file line number Diff line number Diff line change
Expand Up @@ -204,3 +204,80 @@ pred<- get_predictive_Y(mcmc, # GMC mcmc object
nobs = dim(X)[1], # number of observations in predictive data set; we use n = dim(X)[1]
nsets = dim(X)[1] + 1, # number of predictive data sets to create
seed = 10)
#' Title
#'
#' @param Y
#' @param factor.cols
#'
#' @return
#'
#' @examples
format_data<-function(Y,factor.cols){
cat<- NULL
cat_col = 1
cat_col_names = NULL
bin_col_names = NULL
for(i in factor.cols){
if(nlevels(Y[,i]) >2){
one.hot.cat<- model.matrix(~. -1,data =model.frame(~.,data = Y[,i], na.action = na.pass))
cat<- cbind(cat,one.hot.cat)
cat_col_names = c(cat_col_names, rep(names(Y)[i], nlevels(Y[,i])))
}
else{
one.hot.bin<- model.matrix(~. , data = model.frame(~.,data = Y[,i], na.action = na.pass))[,2]
cat<-cbind(cat,one.hot.bin)
bin_col_names = c(bin_col_names, rep(names(Y)[i],1))
}
}
one.hot.cols<- ncol(cat) # number of one hot encoded categories
Y_mod<- cbind(cat,Y[,-factor.cols])
col_mem = c(cat_col_names,bin_col_names,colnames(Y_mod)[(one.hot.cols+1):ncol(Y_mod)])# data frame used for sampler
return(list( cat_col_names = cat_col_names,
bin_col_names = bin_col_names, col_mem = col_mem))
}
library(GMCImpute)
GMCImpute::format_data
library(GMCImpute)
?GMC.mcmc
library(GMCImpute)
?GMC.mcmc
library(GMCImpute)
knitr::opts_chunk$set(echo = TRUE)
library(ggplot2)
library(gridExtra)
library(purrr)
library(devtools)
remotes::install_github('jfeldman396/GMCImpute', force = T)
library(GMCImpute)
set.seed(47)
num= 500
X1<-rnorm(num)
X2<- rpois(num,5*abs(X1))
X3<- as.factor(rbernoulli(num,pnorm(-.5+scale(X2))))
X<- data.frame(X1,X2,X3)
R = t(sapply(1:num, function(x)rbernoulli(2, p = pnorm(-.5 + .5*abs(X1[x]))))) # missingness mechanism
X_noMis = X
X[which(R[,1] == T),2] = NA
X[which(R[,2] == T),3] = NA
hyperparams = list(delta = 10,
k.star = 2,
plugin.threshold = 100,
a_alpha = 1,
b_alpha = 3,
nu_mix = 4,
kappa_0 = .001,
nu = 3,
a1 = 2,
a2 = 3,
a.sigma = 1,
b.sigma = .3,
D_0 = diag(1,2))
mcmc<-GMC.mcmc(Data = X, nImp = 5,hyperparams = hyperparams, burn = 1500,nsamp = 2000, seed = 47)
library(GMCImpute)
knitr::opts_chunk$set(echo = TRUE)
library(ggplot2)
library(gridExtra)
library(purrr)
mcmc<-GMC.mcmc(Data = X, nImp = 5,hyperparams = hyperparams, burn = 1500,nsamp = 2000, seed = 47)
?GMC.mcmc
setwd("~/Desktop/Research/MI Paper")
1 change: 1 addition & 0 deletions R/Helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ format_data<-function(Y,factor.cols){

Y_mod<- cbind(cat,Y[,-factor.cols])
col_mem = c(cat_col_names,bin_col_names,colnames(Y_mod)[(one.hot.cols+1):ncol(Y_mod)])# data frame used for sampler
#Y_mod_comp = Y_mod %>% tidyr::drop_na() #complete observations
return(list( cat_col_names = cat_col_names,
bin_col_names = bin_col_names, col_mem = col_mem))
}
Expand Down
6 changes: 4 additions & 2 deletions R/Software.R
Original file line number Diff line number Diff line change
Expand Up @@ -744,9 +744,11 @@ get_predictive_Y<-function(mcmc,
if(length(unique(z))>1){
cs<- sample(unique_z, n, replace = T, prob = pi_h[unique_z])
etas = NULL
etas = do.call(rbind, t(sapply(unique(cs),function(x) return(rbind(etas,
#need to fix this
etas = do.call(rbind, data.frame(t(sapply(unique(cs),function(x) return(rbind(etas,
mvrnorm(sum(cs == x),mu[[x]],
Delta[[x]]))))))
Delta[[x]])))))))

}else{
etas = mvrnorm(n,mu[[unique_z]], Delta[[unique_z]])
}
Expand Down

0 comments on commit 12ee173

Please sign in to comment.