Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Plot missing data indicator matrix #123

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ Imports:
mice,
purrr,
rlang,
scales,
stats,
stringr,
tidyr,
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ export(densityplot)
export(ggmice)
export(plot_corr)
export(plot_flux)
export(plot_miss)
export(plot_pattern)
export(plot_pred)
export(plot_trace)
Expand Down
125 changes: 125 additions & 0 deletions R/plot_miss.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
#' Plot missingness in a dataset
#'
#' @param data An incomplete dataset of class `data.frame` or `matrix`.
#' @param vrb String, vector, or unquoted expression with variable name(s), default is "all".
#' @param grid Logical indicating whether borders should be present between tiles.
#' @param ordered Logical indicating whether rows should be ordered according to their pattern.
#' @param square Logical indicating whether the plot tiles should be squares, defaults to squares.
#'
#' @return An object of class [ggplot2::ggplot].
#'
#' @examples
#' plot_miss(mice::nhanes)
#' @export

plot_miss <-
function(data,
vrb = "all",
grid = FALSE,
square = FALSE,
ordered = FALSE) {
# input processing
if (is.matrix(data) && ncol(data) > 1) {
data <- as.data.frame(data)
}
verify_data(data, df = TRUE)
vrb <- substitute(vrb)
if (vrb[1] == "all") {
vrb <- names(data)
} else {
vrb <- names(dplyr::select(as.data.frame(data), {{vrb}}))
}
if (".x" %in% vrb || ".y" %in% vrb) {
cli::cli_abort(
c(
"The variable names '.x' and '.y' are used internally to produce the missing data pattern plot.",
"i" = "Please exclude or rename your variable(s)."
)
)
}
if (ordered) {
# extract md.pattern matrix
mdpat <- utils::head(mice::md.pattern(data, plot = FALSE), -1)
# save frequency of patterns
freq.pat <- rownames(mdpat) %>%

Check warning on line 44 in R/plot_miss.R

View workflow job for this annotation

GitHub Actions / lint

file=R/plot_miss.R,line=44,col=7,[object_name_linter] Variable and function name style should match snake_case or symbols.
as.numeric()

na.mat <- mdpat %>%

Check warning on line 47 in R/plot_miss.R

View workflow job for this annotation

GitHub Actions / lint

file=R/plot_miss.R,line=47,col=7,[object_name_linter] Variable and function name style should match snake_case or symbols.
as.data.frame() %>%
dplyr::select(-ncol(.data)) %>%
dplyr::mutate(nmis = freq.pat) %>%
tidyr::uncount(nmis)
} else {
# Create missingness indicator matrix
na.mat <-

Check warning on line 54 in R/plot_miss.R

View workflow job for this annotation

GitHub Actions / lint

file=R/plot_miss.R,line=54,col=7,[object_name_linter] Variable and function name style should match snake_case or symbols.
purrr::map_df(data[, vrb], function(y)
as.numeric(!is.na(y)))
}
# extract pattern info
vrb <- colnames(na.mat)
rws <- nrow(na.mat)
cls <- ncol(na.mat)

# transform to long format
long <-
as.data.frame(cbind(.y = 1:rws, na.mat)) %>%
tidyr::pivot_longer(
cols = tidyselect::all_of(vrb),
names_to = "x",
values_to = ".where"
) %>%
dplyr::mutate(.x = as.numeric(factor(
.data$x,
levels = vrb, ordered = TRUE
)),
.where = factor(
.data$.where,
levels = c(0, 1),
labels = c("missing", "observed")
))
gg <-
ggplot2::ggplot(long,
ggplot2::aes(.data$.x,
as.numeric(.data$.y),
fill = .data$.where)) +
ggplot2::scale_fill_manual(values = c(
"observed" = "#006CC2B3",
"missing" = "#B61A51B3"
)) +
ggplot2::scale_alpha_continuous(limits = c(0, 1), guide = "none") +
ggplot2::scale_x_continuous(breaks = 1:cls,
labels = vrb) +
ggplot2::scale_y_reverse(breaks = \(y) {
eb = scales::extended_breaks()(y)
eb[1] = min(long$.y)
eb[length(eb)] = max(long$.y)
eb
}) +
ggplot2::labs(
x = "Column name",
y = "Row number",
fill = "",
alpha = ""
) +
theme_minimice()
# additional arguments
if (grid) {
gg <- gg + ggplot2::geom_tile(color = "black")
} else{
gg <- gg + ggplot2::geom_tile()
}
if (square) {
gg <- gg + ggplot2::coord_fixed(expand = FALSE)
} else {
gg <- gg + ggplot2::coord_cartesian(expand = FALSE)
}
if (ordered) {
gg <- gg +
ggplot2::theme(
axis.text.y = ggplot2::element_blank(),
axis.ticks.y = ggplot2::element_blank()
)
}
return(gg)
}

5 changes: 5 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@
#' @return The result of calling `rhs(lhs)`.
NULL

# suppress undefined global functions or variables note
utils::globalVariables(c(".id", ".imp", ".where", ".id", "where", "name", "value", "nmis"))

# Alias a function with `foo <- function(...) pkgB::blah(...)`

#' Utils function to validate data argument inputs
#'
#' @param data The input supplied to the 'data' argument.
Expand Down
28 changes: 28 additions & 0 deletions man/plot_miss.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 24 additions & 0 deletions tests/testthat/test-plot_miss.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# create test objects
dat <- mice::nhanes

# tests
test_that("plot_miss produces plot", {
expect_s3_class(plot_miss(dat), "ggplot")
expect_s3_class(plot_miss(dat, grid = TRUE, ordered = TRUE, square = TRUE), "ggplot")
expect_s3_class(plot_miss(cbind(dat, "testvar" = NA)), "ggplot")
})

test_that("plot_miss works with different inputs", {
expect_s3_class(plot_miss(dat, c("age", "bmi")), "ggplot")
expect_s3_class(plot_miss(dat, c(age, bmi)), "ggplot")
expect_s3_class(plot_miss(data.frame(age = dat$age, testvar = NA)), "ggplot")
expect_s3_class(plot_miss(cbind(dat, "with space" = NA)), "ggplot")
})


test_that("plot_miss with incorrect argument(s)", {
expect_s3_class(plot_miss(na.omit(dat)), "ggplot")
expect_error(plot_miss("test"))
expect_error(plot_miss(dat, vrb = "test"))
expect_error(plot_miss(cbind(dat, .x = NA)))
})
Loading