From f0ec0db7055c4138b6b237cd4892684194821bc4 Mon Sep 17 00:00:00 2001 From: nfrerebeau Date: Wed, 22 Nov 2023 12:11:24 +0100 Subject: [PATCH] Add scaling and centering --- NAMESPACE | 2 ++ R/AllGenerics.R | 23 +++++++++++++ R/simplex.R | 5 +++ R/statistics.R | 30 +++++++++++++++++ inst/examples/ex-scale.R | 8 +++++ inst/tinytest/_snaps/mean.rds | Bin 0 -> 130 bytes inst/tinytest/_snaps/scale.rds | Bin 0 -> 1514 bytes inst/tinytest/test_statistics.R | 8 +++++ man/aggregate.Rd | 1 + man/covariance.Rd | 1 + man/dist.Rd | 1 + man/mahalanobis.Rd | 1 + man/margin.Rd | 1 + man/mean.Rd | 1 + man/metric_var.Rd | 1 + man/scale.Rd | 58 ++++++++++++++++++++++++++++++++ man/variation.Rd | 3 +- 17 files changed, 143 insertions(+), 1 deletion(-) create mode 100644 inst/examples/ex-scale.R create mode 100644 inst/tinytest/_snaps/mean.rds create mode 100644 inst/tinytest/_snaps/scale.rds create mode 100644 man/scale.Rd diff --git a/NAMESPACE b/NAMESPACE index 7f439b1..6072b43 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,6 +12,7 @@ S3method(mean,CompositionMatrix) S3method(plot,CompositionMatrix) S3method(plot,LogRatio) S3method(plot,OutlierIndex) +S3method(scale,CompositionMatrix) export(pca) export(remove_NA) export(remove_zero) @@ -56,6 +57,7 @@ exportMethods(powering) exportMethods(replace_NA) exportMethods(replace_zero) exportMethods(scalar) +exportMethods(scale) exportMethods(transform_alr) exportMethods(transform_clr) exportMethods(transform_ilr) diff --git a/R/AllGenerics.R b/R/AllGenerics.R index 184e865..3e08b1e 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -774,6 +774,29 @@ setGeneric( # valueClass = "matrix" # ) +#' Scaling and Centering of Compositional Data +#' +#' @param x A [`CompositionMatrix-class`] object. +#' @param center A [`logical`] scalar or a [`numeric`] vector giving the center +#' to be substracted. +#' @param scale A [`logical`] scalar or a length-one [`numeric`] vector giving a +#' scaling factor for multiplication. +#' @return A [`CompositionMatrix-class`] object. +#' @references +#' Aitchison, J. (1986). *The Statistical Analysis of Compositional Data*. +#' London: Chapman and Hall, p. 64-91. \doi{10.1007/978-94-009-4109-0}. +#' +#' Boogaart, K. G. van den & Tolosana-Delgado, R. (2013). *Analyzing +#' Compositional Data with R*. Berlin Heidelberg: Springer-Verlag. +#' \doi{10.1007/978-3-642-36809-7}. +#' @example inst/examples/ex-scale.R +#' @author N. Frerebeau +#' @docType methods +#' @family statistics +#' @name scale +#' @rdname scale +NULL + # Distances ==================================================================== #' Distances #' diff --git a/R/simplex.R b/R/simplex.R index fd75de9..2725a6d 100644 --- a/R/simplex.R +++ b/R/simplex.R @@ -34,12 +34,16 @@ setMethod( signature = c(x = "CompositionMatrix", y = "CompositionMatrix"), definition = function(x, y) { arkhe::assert_dimensions(y, dim(x)) + if (all(x <= 0)) x <- 1 / x if (all(y <= 0)) y <- 1 / y + z <- x * y z <- as_composition(z) + set_samples(z) <- get_samples(x) set_groups(z) <- get_groups(x) + z } ) @@ -63,6 +67,7 @@ setMethod( f = "perturbation", signature = c(x = "CompositionMatrix", y = "numeric"), definition = function(x, y) { + y <- matrix(data = y, nrow = nrow(x), ncol = length(y), byrow = TRUE) x %perturbe% as_composition(y) } ) diff --git a/R/statistics.R b/R/statistics.R index be9bf65..c5b1732 100644 --- a/R/statistics.R +++ b/R/statistics.R @@ -96,6 +96,36 @@ gmean <- function(x, trim = 0, na.rm = FALSE) { exp(mean(log(unclass(x)[index]), trim = trim, na.rm = na.rm)) } +# Scale ======================================================================== +#' @export +#' @method scale CompositionMatrix +scale.CompositionMatrix <- function(x, center = TRUE, scale = TRUE) { + if (isFALSE(center) & isFALSE(scale)) return(x) + + y <- x + if (!isFALSE(center)) { + if (isTRUE(center)) center <- mean(x) + arkhe::assert_type(center, "numeric") + arkhe::assert_length(center, NCOL(x)) + + y <- perturbation(y, 1 / center) + } + + if (!isFALSE(scale)) { + if (isTRUE(scale)) scale <- sqrt(mean(diag(covariance(x, center = TRUE)))) + arkhe::assert_type(scale, "numeric") + + y <- powering(y, 1 / scale) + } + + y +} + +#' @export +#' @rdname scale +#' @aliases scale,CompositionMatrix-method +setMethod("scale", "CompositionMatrix", scale.CompositionMatrix) + # Metric variance ============================================================== #' @export #' @rdname metric_var diff --git a/inst/examples/ex-scale.R b/inst/examples/ex-scale.R new file mode 100644 index 0000000..2c4768c --- /dev/null +++ b/inst/examples/ex-scale.R @@ -0,0 +1,8 @@ +## Coerce to compositional data +data("hongite") +coda <- as_composition(hongite) + +## Center and scale +scaled <- scale(coda, center = TRUE, scale = TRUE) +mean(scaled) +head(scaled) diff --git a/inst/tinytest/_snaps/mean.rds b/inst/tinytest/_snaps/mean.rds new file mode 100644 index 0000000000000000000000000000000000000000..686b9f72e616c1486623c9d213e49244216919cd GIT binary patch literal 130 zcmb2|=3oE==I#ec2?+^l35jV*32CfGk`d0%cS>|6Bxd|@koo=F@$YV-OBQd=pSbrz zV`qNO{>#7LtQVEfc~h=icJ{{SH+_G%t+Vg`qNi$Lu)%=ws_U8!=fI4m>D`sLN-qYS`7dX**91K literal 0 HcmV?d00001 diff --git a/inst/tinytest/_snaps/scale.rds b/inst/tinytest/_snaps/scale.rds new file mode 100644 index 0000000000000000000000000000000000000000..8a8f14a89010f6107fbd450f1c08a580f3c0b771 GIT binary patch literal 1514 zcmV3<>)*!;j zxKnJKTFJ59+eX_(BatMOOJo=`ZX;6KL`yg9xNNfQZf)(@;@x#RXaC-Fde3>!^Lc*f zIp=qt&v|~|P2&lIpc2wl3PF{TAR*(j!A{SNkfJI{HhWN$J#(#W3k6lwP_MSa3}j?J zYFAC-KvZ)yWc{vE2%Hbl8spSZZ&+cQQ0@nTdBNhR=guNU)!Np6J`H(Z%L8z~80BRT zRBbK4L)8Vh`03ZTKv0uN8&bRNM$& zsI^Nbbp-@dv(=LFccPi5+rQs`Iz$l%6IJSaaV+zlAKRCSyF-0p{%u^euTsw4ob?=y zN4NH0aZEw8O(o4@KnY>C$){qctU*fDUR~+bd3f3U@Us3fHu#1*i8P~SXtSa@hBa9s zalhE=rnL^PQ2yMfRnUf_8yge$t?5QwZn$W2Q4d~hzYy)2EJllMj1)ZZ#6sXAH){g&LuN+3>Aj3P&cnH9Y}~*-@R(=S z^#T=79NizJo1!C4Q~|GQl+M=`M!81e`=&IFWcvi9C!N0PEK`oF((7Z*-lsq;Q&oOC znS*CfWMvdvBC(?{P1{nh0e5el5j*8(;$&FF!O$<<(fQ)MQDj9o+Vz!*%9>RPq8-D|PK?R$xrzO9Xh@T|M5RnBu$WZd*$4su5SQ7niPA~S zE*8JDLCN~1#GZvkDADv5I&hkC=f{A@=JRAxsdQFKopKNrw9IA$Y7dI~T;=$#j&=Mrp~2cY6bU5)DYC zlW0hy5sAhmnviHpq8W)~H#*skPIjY{-RNXDI@yhGELq--Rpr5AepdAM$5iW6;F{(&sEPp}Uwz=;tYEK{fXI>ljQkGl1(N)g!<>#H#X6RjmUf~Ni7|LWg}oa0?S8W Q#jxtX0M^Rn$3hDL0KuX1qyPW_ literal 0 HcmV?d00001 diff --git a/inst/tinytest/test_statistics.R b/inst/tinytest/test_statistics.R index b0e45f3..fb4b74d 100644 --- a/inst/tinytest/test_statistics.R +++ b/inst/tinytest/test_statistics.R @@ -1,6 +1,14 @@ data("hongite") coda <- as_composition(hongite) +# Mean ========================================================================= +expect_equal_to_reference(mean(coda), file = "_snaps/mean.rds") + +# Scale ======================================================================== +z <- scale(coda, center = TRUE, scale = TRUE) +expect_equal(mean(z), c(A = 0.2, B = 0.2, C = 0.2, D = 0.2, E = 0.2)) +expect_equal_to_reference(z, file = "_snaps/scale.rds") + # Margin ======================================================================= expect_equal_to_reference(margin(coda, parts = c("B", "D")), file = "_snaps/margin.rds") diff --git a/man/aggregate.Rd b/man/aggregate.Rd index 0913829..29d64bc 100644 --- a/man/aggregate.Rd +++ b/man/aggregate.Rd @@ -52,6 +52,7 @@ Other statistics: \code{\link{margin}()}, \code{\link{mean}()}, \code{\link{metric_var}()}, +\code{\link{scale}()}, \code{\link{variation}()} } \author{ diff --git a/man/covariance.Rd b/man/covariance.Rd index 1695e60..d28b7e5 100644 --- a/man/covariance.Rd +++ b/man/covariance.Rd @@ -71,6 +71,7 @@ Other statistics: \code{\link{margin}()}, \code{\link{mean}()}, \code{\link{metric_var}()}, +\code{\link{scale}()}, \code{\link{variation}()} } \author{ diff --git a/man/dist.Rd b/man/dist.Rd index 2ddca8f..6b5e8cb 100644 --- a/man/dist.Rd +++ b/man/dist.Rd @@ -61,6 +61,7 @@ Other statistics: \code{\link{margin}()}, \code{\link{mean}()}, \code{\link{metric_var}()}, +\code{\link{scale}()}, \code{\link{variation}()} } \author{ diff --git a/man/mahalanobis.Rd b/man/mahalanobis.Rd index 3c3b5bc..af0dbbe 100644 --- a/man/mahalanobis.Rd +++ b/man/mahalanobis.Rd @@ -54,6 +54,7 @@ Other statistics: \code{\link{margin}()}, \code{\link{mean}()}, \code{\link{metric_var}()}, +\code{\link{scale}()}, \code{\link{variation}()} } \author{ diff --git a/man/margin.Rd b/man/margin.Rd index d580e97..672eeab 100644 --- a/man/margin.Rd +++ b/man/margin.Rd @@ -45,6 +45,7 @@ Other statistics: \code{\link{mahalanobis}()}, \code{\link{mean}()}, \code{\link{metric_var}()}, +\code{\link{scale}()}, \code{\link{variation}()} } \author{ diff --git a/man/mean.Rd b/man/mean.Rd index d348a7c..83af29a 100644 --- a/man/mean.Rd +++ b/man/mean.Rd @@ -50,6 +50,7 @@ Other statistics: \code{\link{mahalanobis}()}, \code{\link{margin}()}, \code{\link{metric_var}()}, +\code{\link{scale}()}, \code{\link{variation}()} } \author{ diff --git a/man/metric_var.Rd b/man/metric_var.Rd index 7743400..7d531b2 100644 --- a/man/metric_var.Rd +++ b/man/metric_var.Rd @@ -71,6 +71,7 @@ Other statistics: \code{\link{mahalanobis}()}, \code{\link{margin}()}, \code{\link{mean}()}, +\code{\link{scale}()}, \code{\link{variation}()} } \author{ diff --git a/man/scale.Rd b/man/scale.Rd new file mode 100644 index 0000000..7eeee3c --- /dev/null +++ b/man/scale.Rd @@ -0,0 +1,58 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AllGenerics.R, R/statistics.R +\docType{methods} +\name{scale} +\alias{scale} +\alias{scale,CompositionMatrix-method} +\title{Scaling and Centering of Compositional Data} +\usage{ +\S4method{scale}{CompositionMatrix}(x, center = TRUE, scale = TRUE) +} +\arguments{ +\item{x}{A \code{\linkS4class{CompositionMatrix}} object.} + +\item{center}{A \code{\link{logical}} scalar or a \code{\link{numeric}} vector giving the center +to be substracted.} + +\item{scale}{A \code{\link{logical}} scalar or a length-one \code{\link{numeric}} vector giving a +scaling factor for multiplication.} +} +\value{ +A \code{\linkS4class{CompositionMatrix}} object. +} +\description{ +Scaling and Centering of Compositional Data +} +\examples{ +## Coerce to compositional data +data("hongite") +coda <- as_composition(hongite) + +## Center and scale +scaled <- scale(coda, center = TRUE, scale = TRUE) +mean(scaled) +head(scaled) +} +\references{ +Aitchison, J. (1986). \emph{The Statistical Analysis of Compositional Data}. +London: Chapman and Hall, p. 64-91. \doi{10.1007/978-94-009-4109-0}. + +Boogaart, K. G. van den & Tolosana-Delgado, R. (2013). \emph{Analyzing +Compositional Data with R}. Berlin Heidelberg: Springer-Verlag. +\doi{10.1007/978-3-642-36809-7}. +} +\seealso{ +Other statistics: +\code{\link{aggregate}()}, +\code{\link{covariance}()}, +\code{\link{dist}()}, +\code{\link{mahalanobis}()}, +\code{\link{margin}()}, +\code{\link{mean}()}, +\code{\link{metric_var}()}, +\code{\link{variation}()} +} +\author{ +N. Frerebeau +} +\concept{statistics} diff --git a/man/variation.Rd b/man/variation.Rd index 0dd0bc6..edee0b7 100644 --- a/man/variation.Rd +++ b/man/variation.Rd @@ -51,7 +51,8 @@ Other statistics: \code{\link{mahalanobis}()}, \code{\link{margin}()}, \code{\link{mean}()}, -\code{\link{metric_var}()} +\code{\link{metric_var}()}, +\code{\link{scale}()} } \author{ N. Frerebeau