From c52ccfc7df8cdac692b8925f83b7e0315ca1d280 Mon Sep 17 00:00:00 2001 From: Achim Zeileis Date: Mon, 27 Jan 2025 03:10:44 +0100 Subject: [PATCH] joint.bw = "mean" instead of "owm" --- R/type_density.R | 14 +++++++------- R/type_ridge.R | 12 ++++++------ inst/tinytest/test-density.R | 2 +- inst/tinytest/test-ridge.R | 2 +- man/type_density.Rd | 10 +++++----- man/type_ridge.Rd | 6 +++--- 6 files changed, 23 insertions(+), 23 deletions(-) diff --git a/R/type_density.R b/R/type_density.R index 91ed130c..0fff327d 100644 --- a/R/type_density.R +++ b/R/type_density.R @@ -15,8 +15,8 @@ #' @param joint.bw character string indicating whether (and how) the smoothing #' bandwidth should be computed from the joint data distribution, in case #' there are multiple subgroups (from `by` or `facet`). The default of -#' `"owm"` will compute the joint bandwidth as the observation-weighted mean -#' of the individual subgroup bandwidths. Choosing `"full"` will result in a +#' `"mean"` will compute the joint bandwidth as the mean of the individual +#' subgroup bandwidths (weighted by their number of observations). Choosing `"full"` will result in a #' joint bandwidth computed from the full distribution (merging all subgroups). #' For `"none"` the individual bandwidth will be computed independently for #' each subgroup. When no grouped or faceted densities are visualized, the @@ -38,7 +38,7 @@ #' bandwidth across all subgroups. The following strategies are available via the #' `joint.bw` argument. #' -#' The default `joint.bw = "owm"` first computes the individual bandwidths for +#' The default `joint.bw = "mean"` first computes the individual bandwidths for #' each group but then computes their mean, weighted by the number of observations #' in each group. This will work well when all groups have similar amounts of #' scatter (similar variances), even when they have potentially rather different @@ -77,7 +77,7 @@ #' # individual subgroup bandwidths (weighted by group size) as the #' # joint bandwidth. Alternatively, the bandwidth from the "full" #' # data or separate individual bandwidths ("none") can be used. -#' tinyplot(~Sepal.Length | Species, data = iris, type = "density") # owm (default) +#' tinyplot(~Sepal.Length | Species, data = iris, type = "density") # mean (default) #' tinyplot_add(joint.bw = "full", lty = 2) # full data #' tinyplot_add(joint.bw = "none", lty = 3) # none (individual) #' legend("topright", c("Mean", "Full", "None"), lty = 1:3, bty = "n", title = "Joint BW") @@ -91,11 +91,11 @@ type_density = function( kernel = c("gaussian", "epanechnikov", "rectangular", "triangular", "biweight", "cosine", "optcosine"), n = 512, # more args from density here? - joint.bw = c("owm", "full", "none"), + joint.bw = c("mean", "full", "none"), alpha = NULL ) { kernel = match.arg(kernel, c("gaussian", "epanechnikov", "rectangular", "triangular", "biweight", "cosine", "optcosine")) - joint.bw = match.arg(joint.bw, c("owm", "full", "none")) + joint.bw = match.arg(joint.bw, c("mean", "full", "none")) out = list( data = data_density(bw = bw, adjust = adjust, kernel = kernel, n = n, joint.bw = joint.bw, alpha = alpha), @@ -136,7 +136,7 @@ data_density = function(bw = "nrd0", adjust = 1, kernel = "gaussian", n = 512, } if (joint.bw == "full") { dens_bw = bw_fun(kernel = bw, unlist(sapply(datapoints, `[[`, "x"))) - } else if (joint.bw == "owm") { + } else if (joint.bw == "mean") { bws = sapply(datapoints, function(dat) bw_fun(kernel = bw, dat$x)) ws = sapply(datapoints, nrow) dens_bw = weighted.mean(bws, ws) diff --git a/R/type_ridge.R b/R/type_ridge.R index f0c2e3d5..a5be9a5b 100644 --- a/R/type_ridge.R +++ b/R/type_ridge.R @@ -36,8 +36,8 @@ #' the version used by S. #' @param joint.bw character string indicating whether (and how) the smoothing #' bandwidth should be computed from the joint data distribution. The default of -#' `"owm"` will compute the joint bandwidth as the observation-weighted mean -#' of the individual subgroup bandwidths. Choosing `"full"` will result in a +#' `"mean"` will compute the joint bandwidth as the mean of the individual +#' subgroup bandwidths (weighted by their number of observations). Choosing `"full"` will result in a #' joint bandwidth computed from the full distribution (merging all subgroups). #' For `"none"` the individual bandwidth will be computed independently for #' each subgroup. See \code{\link{type_density}} for some discussion of @@ -168,7 +168,7 @@ type_ridge = function( probs = NULL, ylevels = NULL, bw = "nrd0", - joint.bw = c("owm", "full", "none"), + joint.bw = c("mean", "full", "none"), adjust = 1, kernel = c("gaussian", "epanechnikov", "rectangular", "triangular", "biweight", "cosine", "optcosine"), n = 512, @@ -180,7 +180,7 @@ type_ridge = function( ) { kernel = match.arg(kernel, c("gaussian", "epanechnikov", "rectangular", "triangular", "biweight", "cosine", "optcosine")) - joint.bw = match.arg(joint.bw, c("owm", "full", "none")) + joint.bw = match.arg(joint.bw, c("mean", "full", "none")) out = list( draw = draw_ridge(), @@ -206,7 +206,7 @@ type_ridge = function( # ## Underlying data_ridge function data_ridge = function(bw = "nrd0", adjust = 1, kernel = "gaussian", n = 512, - joint.bw = "owm", + joint.bw = "mean", scale = 1.5, global.max = TRUE, gradient = FALSE, @@ -264,7 +264,7 @@ data_ridge = function(bw = "nrd0", adjust = 1, kernel = "gaussian", n = 512, } if (joint.bw == "full") { dens_bw = bw_fun(kernel = bw, unlist(sapply(datapoints, `[[`, "x"))) - } else if (joint.bw == "owm") { + } else if (joint.bw == "mean") { bws = sapply(datapoints, function(dat) bw_fun(kernel = bw, dat$x)) ws = sapply(datapoints, nrow) dens_bw = weighted.mean(bws, ws) diff --git a/inst/tinytest/test-density.R b/inst/tinytest/test-density.R index 408e82f3..7376656d 100644 --- a/inst/tinytest/test-density.R +++ b/inst/tinytest/test-density.R @@ -64,7 +64,7 @@ expect_snapshot_plot(f1, label = "density_type_bw_sj") f1 = function() { tinyplot(~ Sepal.Width | Species, iris, type = type_density(joint.bw = "none")) tinyplot_add(type = type_density(joint.bw = "full"), lty = 2) - tinyplot_add(type = type_density(joint.bw = "owm"), lty = 3) + tinyplot_add(type = type_density(joint.bw = "mean"), lty = 3) legend("topright", c("None", "Full", "OWM"), lty = 1:3, title = "Joint BW") } expect_snapshot_plot(f1, label = "density_type_joint_bw") diff --git a/inst/tinytest/test-ridge.R b/inst/tinytest/test-ridge.R index 64ff8748..e3495e70 100644 --- a/inst/tinytest/test-ridge.R +++ b/inst/tinytest/test-ridge.R @@ -18,7 +18,7 @@ f = function() { tinyplot( Species ~ Sepal.Width, data = iris, main = 'joint.bw = "owm"', - type = type_ridge(joint.bw = "owm") + type = type_ridge(joint.bw = "mean") ) } expect_snapshot_plot(f, label = "ridge_joint_owm") diff --git a/man/type_density.Rd b/man/type_density.Rd index c8b39668..5e098c7a 100644 --- a/man/type_density.Rd +++ b/man/type_density.Rd @@ -10,7 +10,7 @@ type_density( kernel = c("gaussian", "epanechnikov", "rectangular", "triangular", "biweight", "cosine", "optcosine"), n = 512, - joint.bw = c("owm", "full", "none"), + joint.bw = c("mean", "full", "none"), alpha = NULL ) } @@ -53,8 +53,8 @@ the version used by S.} \item{joint.bw}{character string indicating whether (and how) the smoothing bandwidth should be computed from the joint data distribution, in case there are multiple subgroups (from \code{by} or \code{facet}). The default of -\code{"owm"} will compute the joint bandwidth as the observation-weighted mean -of the individual subgroup bandwidths. Choosing \code{"full"} will result in a +\code{"mean"} will compute the joint bandwidth as the mean of the individual +subgroup bandwidths (weighted by their number of observations). Choosing \code{"full"} will result in a joint bandwidth computed from the full distribution (merging all subgroups). For \code{"none"} the individual bandwidth will be computed independently for each subgroup. When no grouped or faceted densities are visualized, the @@ -110,7 +110,7 @@ random variations too much. Hence, it is often useful to employ the same joint bandwidth across all subgroups. The following strategies are available via the \code{joint.bw} argument. -The default \code{joint.bw = "owm"} first computes the individual bandwidths for +The default \code{joint.bw = "mean"} first computes the individual bandwidths for each group but then computes their mean, weighted by the number of observations in each group. This will work well when all groups have similar amounts of scatter (similar variances), even when they have potentially rather different @@ -151,7 +151,7 @@ tinyplot( # individual subgroup bandwidths (weighted by group size) as the # joint bandwidth. Alternatively, the bandwidth from the "full" # data or separate individual bandwidths ("none") can be used. -tinyplot(~Sepal.Length | Species, data = iris, type = "density") # owm (default) +tinyplot(~Sepal.Length | Species, data = iris, type = "density") # mean (default) tinyplot_add(joint.bw = "full", lty = 2) # full data tinyplot_add(joint.bw = "none", lty = 3) # none (individual) legend("topright", c("Mean", "Full", "None"), lty = 1:3, bty = "n", title = "Joint BW") diff --git a/man/type_ridge.Rd b/man/type_ridge.Rd index 68816828..65eedec4 100644 --- a/man/type_ridge.Rd +++ b/man/type_ridge.Rd @@ -11,7 +11,7 @@ type_ridge( probs = NULL, ylevels = NULL, bw = "nrd0", - joint.bw = c("owm", "full", "none"), + joint.bw = c("mean", "full", "none"), adjust = 1, kernel = c("gaussian", "epanechnikov", "rectangular", "triangular", "biweight", "cosine", "optcosine"), @@ -62,8 +62,8 @@ the levels of the y-variable should be plotted.} \item{joint.bw}{character string indicating whether (and how) the smoothing bandwidth should be computed from the joint data distribution. The default of -\code{"owm"} will compute the joint bandwidth as the observation-weighted mean -of the individual subgroup bandwidths. Choosing \code{"full"} will result in a +\code{"mean"} will compute the joint bandwidth as the mean of the individual +subgroup bandwidths (weighted by their number of observations). Choosing \code{"full"} will result in a joint bandwidth computed from the full distribution (merging all subgroups). For \code{"none"} the individual bandwidth will be computed independently for each subgroup. See \code{\link{type_density}} for some discussion of