Skip to content

Commit

Permalink
Add missing values replacement (#4)
Browse files Browse the repository at this point in the history
  • Loading branch information
nfrerebeau committed Nov 17, 2023
1 parent eb6efe2 commit a2ddd85
Show file tree
Hide file tree
Showing 9 changed files with 168 additions and 34 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ exportMethods(pca)
exportMethods(perturbation)
exportMethods(plot)
exportMethods(powering)
exportMethods(replace_NA)
exportMethods(replace_zero)
exportMethods(scalar)
exportMethods(transform_alr)
Expand Down
35 changes: 28 additions & 7 deletions R/AllGenerics.R
Original file line number Diff line number Diff line change
Expand Up @@ -935,15 +935,15 @@ NULL
# Missign Values ===============================================================
#' Zero-Replacement
#'
#' Multiplicative replacement of zeros in compositional data.
#' @param x An \eqn{m \times p}{m x p} [`CompositionMatrix-class`] object.
#' @param value A length-\eqn{p} [`numeric`] vector giving the detection limits
#' of each part (in \eqn{(0,1)}).
#' Multiplicative replacement of zeros.
#' @param x A [`CompositionMatrix-class`] object.
#' @param value A [`numeric`] vector giving the detection limits of each part
#' (in \eqn{(0,1)}).
#' @param delta A [`numeric`] vector specifying the fraction of the detection
#' limit to be used in replacement.
#' @return
#' An \eqn{m \times p}{m x p} [`CompositionMatrix-class`] object, where all
#' zero values have been replaced.
#' An [`CompositionMatrix-class`] object, where all zero values have been
#' replaced.
#' @references
#' Aitchison, J. (1986). *The Statistical Analysis of Compositional Data*.
#' London: Chapman and Hall. \doi{10.1007/978-94-009-4109-0}.
Expand All @@ -957,7 +957,28 @@ NULL
#' @docType methods
#' @family imputation methods
#' @name zero
#' @rdname zero
#' @rdname replace_zero
NULL

#' Missing Values Replacement
#'
#' Multiplicative replacement of missing values.
#' @param x A [`CompositionMatrix-class`] object.
#' @param value A [`numeric`] vector giving the replacement values.
#' @return
#' An [`CompositionMatrix-class`] object, where all missing values have been
#' replaced.
#' @references
#' Martín-Fernández, J. A., Barceló-Vidal, C. & Pawlowsky-Glahn, V. (2003).
#' Dealing with Zeros and Missing Values in Compositional Data Sets Using
#' Nonparametric Imputation. *Mathematical Geology*, 35(3): 253-278.
#' \doi{10.1023/A:1023866030544}.
#' @example inst/examples/ex-missing.R
#' @author N. Frerebeau
#' @docType methods
#' @family imputation methods
#' @name missing
#' @rdname replace_NA
NULL

# Outliers =====================================================================
Expand Down
44 changes: 34 additions & 10 deletions R/replace.R
Original file line number Diff line number Diff line change
@@ -1,25 +1,26 @@
# REPLACE ZEROS
# REPLACE
#' @include AllGenerics.R
NULL

# Zeros ========================================================================
#' @export
#' @rdname zero
#' @rdname replace_zero
#' @aliases replace_zero,CompositionMatrix-method
setMethod(
f = "replace_zero",
signature = c(x = "CompositionMatrix"),
definition = function(x, value = NULL, delta = 2/3) {
definition = function(x, value, delta = 2/3) {
## Validation
D <- ncol(x)
if (is.null(value)) return(x)
if (length(value) == 1) rep(value, D)
if (length(value) == 1) value <- rep(value, D)
if (length(value) > 1) arkhe::assert_length(value, D)
if (length(delta) > 1) arkhe::assert_length(delta, D)

sigma <- value * delta
repl <- apply(X = x, MARGIN = 1, FUN = zero_multiplicative, sigma = sigma)
r <- apply(X = x, MARGIN = 1, FUN = zero_multiplicative, sigma = sigma)

methods::initialize(x, t(repl))
methods::initialize(x, t(r))
}
)

Expand All @@ -35,12 +36,35 @@ zero_additive <- function(x, sigma) {
x
}
zero_multiplicative <- function(x, sigma) {
D <- length(x)
is_zero <- x == 0 & !is.na(x)
x[is_zero] <- sigma[is_zero]
x[!is_zero] <- x[!is_zero] * (1 - sum(sigma[is_zero]) / 1)
x
}

is_zero <- x == 0
# Missing values ===============================================================
#' @export
#' @rdname replace_NA
#' @aliases replace_NA,CompositionMatrix-method
setMethod(
f = "replace_NA",
signature = c(x = "CompositionMatrix"),
definition = function(x, value) {
## Validation
D <- ncol(x)
if (is.null(value)) return(x)
if (length(value) == 1) value <- rep(value, D)
if (length(value) > 1) arkhe::assert_length(value, D)

x[is_zero] <- sigma[is_zero]
x[!is_zero] <- x[!is_zero] * (1 - (sum(sigma[is_zero])) / 1)
r <- apply(X = x, MARGIN = 1, FUN = missing_multiplicative, sigma = value)

methods::initialize(x, t(r))
}
)

missing_multiplicative <- function(x, sigma) {
is_missing <- is.na(x)
x[is_missing] <- sigma[is_missing]
x[!is_missing] <- x[!is_missing] * (1 - sum(sigma[is_missing])) / sum(x[!is_missing])
x
}
15 changes: 15 additions & 0 deletions inst/examples/ex-missing.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
## Data from Martín-Fernández et al. 2003
X <- data.frame(
X1 = c(0.0000, 0.1304, 0.1963),
X2 = c(0.1250, 0.3151, NA),
X3 = c(0.1237, NA, NA),
X4 = c(0.7253, 0.2002, 0.0819),
X5 = c(0.0260, 0.3543, 0.0114)
)

## Coerce to a compositional matrix
Y <- as_composition(X)

## Replace zeros
Z <- replace_NA(Y, value = 0.2)
Z
12 changes: 7 additions & 5 deletions inst/examples/ex-zero.R
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
## Create a data.frame
## Data from Martín-Fernández et al. 2003
X <- data.frame(
Ca = c(7.72, 0, 3.11, 7.19, 7.41, 5, 0, 1, 4.51),
Fe = c(6.12, 5.88, 5.12, 0, 6.02, 0, 0, 5.28, 5.72),
Na = c(0.97, 1.59, 0, 0.86, 0.76, 0.51, 0.75, 0.52, 0.56)
X1 = c(0.0000, 0.1304, 0.1963),
X2 = c(0.1250, 0.3151, NA),
X3 = c(0.1237, NA, NA),
X4 = c(0.7253, 0.2002, 0.0819),
X5 = c(0.0260, 0.3543, 0.0114)
)

## Coerce to a compositional matrix
Y <- as_composition(X)

## Replace zeros
Z <- replace_zero(Y, value = c(0.02, 0.1, 0.01), delta = 2/3)
Z <- replace_zero(Y, value = 0.02, delta = 2/3)
Z
Binary file added inst/tinytest/_snaps/missing_multiplicative.rds
Binary file not shown.
12 changes: 12 additions & 0 deletions inst/tinytest/test_zero.R → inst/tinytest/test_replace.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,15 @@ Y <- as_composition(X)
## Multiplicative replacement
Z <- replace_zero(Y, value = c(0.02, 0.1, 0.01), delta = 2/3)
expect_equal_to_reference(Z, file = "_snaps/zero_multiplicative.rds")

# Replace missing ==============================================================
X <- data.frame(
Ca = c(7.72, NA, 3.11, 7.19, 7.41, 5, NA, 1, 4.51),
Fe = c(6.12, 5.88, 5.12, NA, 6.02, NA, NA, 5.28, 5.72),
Na = c(0.97, 1.59, NA, 0.86, 0.76, 0.51, 0.75, 0.52, 0.56)
)
Y <- as_composition(X)

## Multiplicative replacement
Z <- replace_NA(Y, value = 0.02)
expect_equal_to_reference(Z, file = "_snaps/missing_multiplicative.rds")
53 changes: 53 additions & 0 deletions man/replace_NA.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 18 additions & 12 deletions man/zero.Rd → man/replace_zero.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit a2ddd85

Please sign in to comment.