Merge pull request #65 from larmarange/likert_cutoff

`gglikert()` improvements
larmarange · May 20, 2024 · 3039f39 · 3039f39
2 parents 6aa80bc + c7cb096
commit 3039f39
Show file tree

Hide file tree

Showing 25 changed files with 1,579 additions and 110 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -24,9 +24,14 @@ export(ggsurvey)
 export(hex_bw)
 export(label_number_abs)
 export(label_percent_abs)
+export(likert_pal)
+export(pal_extender)
 export(position_likert)
 export(position_likert_count)
 export(round_any)
+export(scale_colour_extended)
+export(scale_fill_extended)
+export(scale_fill_likert)
 export(signif_stars)
 export(stat_cross)
 export(stat_prop)

diff --git a/NEWS.md b/NEWS.md
@@ -10,6 +10,12 @@
   `gglikert_stacked()` (using `hex_bw()`) (#57)
 * new argument `data_fun` for `gglikert()`, `gglikert_data()` and
   `gglikert_stacked()` (#60)
+* new scale `scale_fill_likert()`
+* new argument `cutoff` for `gglikert()`, `position_likert()` and
+  `scale_fill_likert()` (#64)
+* new helper `pal_extender()` and corresponding `scale_fill_extender()` and
+  `scale_colour_extender()`
+* new sorting option `"prop_lower"` for `gglikert()` (#62)
 
 # ggstats 0.5.1
 

diff --git a/R/gglikert.R b/R/gglikert.R
@@ -26,7 +26,8 @@
 #' to the answers (see `sort_method`)? One of "none" (default), "ascending" or
 #' "descending"
 #' @param sort_method method used to sort the variables: `"prop"` sort according
-#' to the proportion of answers higher than the centered level, `"mean"`
+#' to the proportion of answers higher than the centered level, `"prop_lower"`
+#' according to the proportion lower than the centered level,  `"mean"`
 #' considers answer as a score and sort according to the mean score, `"median"`
 #' used the median and the majority judgment rule for tie-breaking.
 #' @param sort_prop_include_center when sorting with `"prop"` and if the number
@@ -39,6 +40,13 @@
 #' @param exclude_fill_values Vector of values that should not be displayed
 #' (but still taken into account for computing proportions),
 #' see [position_likert()]
+#' @param cutoff number of modalities to be displayed negatively (i.e. on the
+#' left of the x axis or the bottom of the y axis), could be a decimal value:
+#' `2` to display negatively the two first modalities, `2.5` to display
+#' negatively the two first modalities and half of the third, `2.2` to display
+#' negatively the two first modalities and a fifth of the third (see examples).
+#' By default (`NULL`), it will be equal to the number of modalities divided
+#' by 2, i.e. it will be centered.
 #' @param data_fun for advanced usage, custom function to be applied to the
 #' generated dataset at the end of `gglikert_data()`
 #' @param add_labels should percentage labels be added to the plot?
@@ -98,7 +106,8 @@
 #'
 #' gglikert(df)
 #'
-#' gglikert(df, include = q1:3)
+#' gglikert(df, include = q1:3) +
+#'   scale_fill_likert(pal = scales::brewer_pal(palette = "PRGn"))
 #'
 #' gglikert(df, sort = "ascending")
 #'
@@ -163,10 +172,11 @@ gglikert <- function(data,
                      y = ".question",
                      variable_labels = NULL,
                      sort = c("none", "ascending", "descending"),
-                     sort_method = c("prop", "mean", "median"),
+                     sort_method = c("prop", "prop_lower", "mean", "median"),
                      sort_prop_include_center = totals_include_center,
                      factor_to_sort = ".question",
                      exclude_fill_values = NULL,
+                     cutoff = NULL,
                      data_fun = NULL,
                      add_labels = TRUE,
                      labels_size = 3.5,
@@ -198,6 +208,7 @@ gglikert <- function(data,
       sort_prop_include_center = sort_prop_include_center,
       factor_to_sort = {{ factor_to_sort }},
       exclude_fill_values = exclude_fill_values,
+      cutoff = cutoff,
       data_fun = data_fun
     )
 
@@ -226,7 +237,8 @@ gglikert <- function(data,
     geom_bar(
       position = position_likert(
         reverse = reverse_likert,
-        exclude_fill_values = exclude_fill_values
+        exclude_fill_values = exclude_fill_values,
+        cutoff = cutoff
       ),
       stat = StatProp,
       complete = "fill",
@@ -248,7 +260,8 @@ gglikert <- function(data,
         position = position_likert(
           vjust = .5,
           reverse = reverse_likert,
-          exclude_fill_values = exclude_fill_values
+          exclude_fill_values = exclude_fill_values,
+          cutoff = cutoff
         ),
         size = labels_size
       )
@@ -268,7 +281,8 @@ gglikert <- function(data,
         position = position_likert(
           vjust = .5,
           reverse = reverse_likert,
-          exclude_fill_values = exclude_fill_values
+          exclude_fill_values = exclude_fill_values,
+          cutoff = cutoff
         ),
         size = labels_size,
         color = labels_color
@@ -283,25 +297,29 @@ gglikert <- function(data,
           .data$.answer,
           .data$.weights,
           include_center = TRUE,
-          exclude_fill_values = exclude_fill_values
+          exclude_fill_values = exclude_fill_values,
+          cutoff = cutoff
         ),
         prop_higher = .prop_higher(
           .data$.answer,
           .data$.weights,
           include_center = TRUE,
-          exclude_fill_values = exclude_fill_values
+          exclude_fill_values = exclude_fill_values,
+          cutoff = cutoff
         ),
         label_lower = .prop_lower(
           .data$.answer,
           .data$.weights,
           include_center = totals_include_center,
-          exclude_fill_values = exclude_fill_values
+          exclude_fill_values = exclude_fill_values,
+          cutoff = cutoff
         ),
         label_higher = .prop_higher(
           .data$.answer,
           .data$.weights,
           include_center = totals_include_center,
-          exclude_fill_values = exclude_fill_values
+          exclude_fill_values = exclude_fill_values,
+          cutoff = cutoff
         )
       ) %>%
       dplyr::ungroup() %>%
@@ -352,11 +370,8 @@ gglikert <- function(data,
     theme(
       legend.position = "bottom",
       panel.grid.major.y = element_blank()
-    )
-
-  if (length(levels(data$.answer)) <= 11) {
-    p <- p + scale_fill_brewer(palette = "BrBG")
-  }
+    ) +
+    scale_fill_likert(cutoff = cutoff)
 
   p + facet_grid(
     rows = facet_rows, cols = facet_cols,
@@ -371,10 +386,13 @@ gglikert_data <- function(data,
                           weights = NULL,
                           variable_labels = NULL,
                           sort = c("none", "ascending", "descending"),
-                          sort_method = c("prop", "mean", "median"),
+                          sort_method = c(
+                            "prop", "prop_lower", "mean", "median"
+                          ),
                           sort_prop_include_center = TRUE,
                           factor_to_sort = ".question",
                           exclude_fill_values = NULL,
+                          cutoff = NULL,
                           data_fun = NULL) {
   rlang::check_installed("broom.helpers")
   rlang::check_installed("labelled")
@@ -447,6 +465,7 @@ gglikert_data <- function(data,
         .fun = .prop_higher,
         include_center = sort_prop_include_center,
         exclude_fill_values = exclude_fill_values,
+        cutoff = cutoff,
         .na_rm = FALSE,
         .desc = FALSE
       )
@@ -459,6 +478,33 @@ gglikert_data <- function(data,
         .fun = .prop_higher,
         include_center = sort_prop_include_center,
         exclude_fill_values = exclude_fill_values,
+        cutoff = cutoff,
+        .na_rm = FALSE,
+        .desc = TRUE
+      )
+  }
+  if (sort == "ascending" && sort_method == "prop_lower") {
+    data[[factor_to_sort]] <- data[[factor_to_sort]] %>%
+      forcats::fct_reorder2(
+        data$.answer,
+        data$.weights,
+        .fun = .prop_lower,
+        include_center = sort_prop_include_center,
+        exclude_fill_values = exclude_fill_values,
+        cutoff = cutoff,
+        .na_rm = FALSE,
+        .desc = FALSE
+      )
+  }
+  if (sort == "descending" && sort_method == "prop_lower") {
+    data[[factor_to_sort]] <- data[[factor_to_sort]] %>%
+      forcats::fct_reorder2(
+        data$.answer,
+        data$.weights,
+        .fun = .prop_lower,
+        include_center = sort_prop_include_center,
+        exclude_fill_values = exclude_fill_values,
+        cutoff = cutoff,
         .na_rm = FALSE,
         .desc = TRUE
       )
@@ -520,35 +566,45 @@ gglikert_data <- function(data,
 # Compute the proportion being higher than the center
 # Option to include the centre (if yes, only half taken into account)
 .prop_higher <- function(x, w, include_center = TRUE,
-                         exclude_fill_values = NULL) {
+                         exclude_fill_values = NULL,
+                         cutoff = NULL) {
   N <- sum(as.integer(!is.na(x)) * w)
   if (!is.factor(x)) x <- factor(x)
   if (!is.null(exclude_fill_values)) {
     l <- levels(x)
     l <- l[!l %in% exclude_fill_values]
     x <- factor(x, levels = l)
   }
-  m <- length(levels(x)) / 2 + 1 / 2
+  if (is.null(cutoff)) cutoff <- length(levels(x)) / 2
   x <- as.numeric(x)
-  ic <- ifelse(include_center, 1 / 2, 0)
-  sum(w * as.integer(x > m), w * ic * as.integer(x == m), na.rm = TRUE) / N
+  m <- ceiling(cutoff)
+  sum(
+    w * as.integer(x >= cutoff + 1),
+    include_center * w * (x == m) * (m - cutoff),
+    na.rm = TRUE
+  ) / N
 }
 
 # Compute the proportion being higher than the center
 # Option to include the centre (if yes, only half taken into account)
 .prop_lower <- function(x, w, include_center = TRUE,
-                        exclude_fill_values = NULL) {
+                        exclude_fill_values = NULL,
+                        cutoff = NULL) {
   N <- sum(as.integer(!is.na(x)) * w)
   if (!is.factor(x)) x <- factor(x)
   if (!is.null(exclude_fill_values)) {
     l <- levels(x)
     l <- l[!l %in% exclude_fill_values]
     x <- factor(x, levels = l)
   }
-  m <- length(levels(x)) / 2 + 1 / 2
+  if (is.null(cutoff)) cutoff <- length(levels(x)) / 2
   x <- as.numeric(x)
-  ic <- ifelse(include_center, 1 / 2, 0)
-  sum(w * as.integer(x < m), ic * w * as.integer(x == m), na.rm = TRUE) / N
+  m <- ceiling(cutoff)
+  sum(
+    w * as.integer(x <= cutoff),
+    include_center * w * (x == m) * (cutoff %% 1),
+    na.rm = TRUE
+  ) / N
 }
 
 #' @importFrom stats weighted.mean
@@ -597,7 +653,9 @@ gglikert_stacked <- function(data,
                              y = ".question",
                              variable_labels = NULL,
                              sort = c("none", "ascending", "descending"),
-                             sort_method = c("prop", "mean", "median"),
+                             sort_method = c(
+                               "prop", "prop_lower", "mean", "median"
+                             ),
                              sort_prop_include_center = FALSE,
                              factor_to_sort = ".question",
                              data_fun = NULL,
@@ -707,11 +765,8 @@ gglikert_stacked <- function(data,
     theme(
       legend.position = "bottom",
       panel.grid.major.y = element_blank()
-    )
-
-  if (length(levels(data$.answer)) <= 11) {
-    p <- p + scale_fill_brewer(palette = "BrBG")
-  }
+    ) +
+    scale_fill_extended()
 
   p
 }
diff --git a/R/pal_extender.R b/R/pal_extender.R
@@ -0,0 +1,60 @@
+#' Extend a discrete colour palette
+#'
+#' If the palette returns less colours than requested, the list of colours
+#' will be expanded using [scales::pal_gradient_n()]. To be used with a
+#' sequential or diverging palette. Not relevant for qualitative palettes.
+#'
+#' @param pal A palette function, such as returned by [scales::brewer_pal],
+#' taking a number of colours as entry and returning a list of colours.
+#' @return A palette function.
+#' @export
+#' @examples
+#' pal <- scales::pal_brewer(palette = "PiYG")
+#' scales::show_col(pal(16))
+#' scales::show_col(pal_extender(pal)(16))
+pal_extender <- function(pal = scales::brewer_pal(palette = "BrBG")) {
+  function(n) {
+    cols <- suppressWarnings(
+      stats::na.omit(pal(n))
+    )
+    if (length(cols) <= n) {
+      cols <- scales::pal_gradient_n(cols)(seq(0, 1, length.out = n))
+    }
+    cols
+  }
+}
+
+#' @rdname pal_extender
+#' @param name The name of the scale. Used as the axis or legend title.
+#' If `waiver()`, the default, the name of the scale is taken from the first
+#' mapping used for that aesthetic. If `NULL`, the legend title will be omitted.
+#' @param ... Other arguments passed on to `discrete_scale()` to control name,
+#' limits, breaks, labels and so forth.
+#' @param aesthetics Character string or vector of character strings listing
+#' the name(s) of the aesthetic(s) that this scale works with. This can be
+#' useful, for example, to apply colour settings to the colour and fill
+#' aesthetics at the same time, via `aesthetics = c("colour", "fill")`.
+#' @export
+scale_fill_extended <- function(name = waiver(), ...,
+                                pal = scales::brewer_pal(palette = "BrBG"),
+                                aesthetics = "fill") {
+  ggplot2::discrete_scale(
+    aesthetics,
+    name = name,
+    palette = pal_extender(pal = pal),
+    ...
+  )
+}
+
+#' @rdname pal_extender
+#' @export
+scale_colour_extended <- function(name = waiver(), ...,
+                                  pal = scales::brewer_pal(palette = "BrBG"),
+                                  aesthetics = "colour") {
+  ggplot2::discrete_scale(
+    aesthetics,
+    name = name,
+    palette = pal_extender(pal = pal),
+    ...
+  )
+}