From b9a6272167588ac0b94a2bab23bf83bf3a0487fd Mon Sep 17 00:00:00 2001 From: olivroy Date: Sun, 20 Aug 2023 09:11:38 -0400 Subject: [PATCH 01/17] Add link for deprecated functions --- R/janitor_deprecated.R | 41 ++++++++++++++++++++------------------- man/add_totals_col.Rd | 2 +- man/add_totals_row.Rd | 2 +- man/adorn_crosstab.Rd | 2 +- man/convert_to_NA.Rd | 6 ++---- man/crosstab.Rd | 2 +- man/janitor_deprecated.Rd | 16 +++++++-------- man/remove_empty_cols.Rd | 6 +----- man/remove_empty_rows.Rd | 2 +- man/use_first_valid_of.Rd | 10 +++++----- 10 files changed, 42 insertions(+), 47 deletions(-) diff --git a/R/janitor_deprecated.R b/R/janitor_deprecated.R index bdd78b6b..884c6363 100644 --- a/R/janitor_deprecated.R +++ b/R/janitor_deprecated.R @@ -2,14 +2,14 @@ #' #' These functions have already become defunct or may be defunct as soon as the next release. #' -#' * [adorn_crosstab()] -#' * [crosstab()] -#' * [use_first_valid_of()] -#' * [convert_to_NA()] -#' * [add_totals_col()] -#' * [add_totals_row()] -#' * [remove_empty_rows()] -#' * [remove_empty_cols()] +#' * [adorn_crosstab()] -> `adorn_` +#' * [crosstab()] -> [tabyl()] +#' * [use_first_valid_of()] -> [dplyr::coalesce()] +#' * [convert_to_NA()] -> [dplyr::na_if()] +#' * [add_totals_col()] -> [`adorn_totals(where = "col")`][adorn_totals()] +#' * [add_totals_row()] -> [adorn_totals()] +#' * [remove_empty_rows()] -> [`remove_empty("rows")`][remove_empty()] +#' * [remove_empty_cols()] -> [`remove_empty("cols")`][remove_empty()] #' #' @name janitor_deprecated # EXCLUDE COVERAGE START @@ -22,7 +22,7 @@ NULL #' @param ... arguments #' @keywords internal #' @description -#' This function is deprecated, use `tabyl(dat, var1, var2)` instead. +#' This function is deprecated, use [`tabyl(dat, var1, var2)`][tabyl()] instead. #' @export crosstab <- function(...) { @@ -36,7 +36,7 @@ crosstab <- function(...) { #' @title Add presentation formatting to a crosstabulation table. #' @description -#' This function is deprecated, use the `adorn_` family of functions instead. +#' This function is deprecated, use [tabyl()] with the `adorn_` family of functions instead. #' @param dat a data.frame with row names in the first column and numeric values in all other columns. Usually the piped-in result of a call to `crosstab` that included the argument `percent = "none"`. #' @param denom the denominator to use for calculating percentages. One of "row", "col", or "all". #' @param show_n should counts be displayed alongside the percentages? @@ -59,7 +59,7 @@ adorn_crosstab <- function(dat, denom = "row", show_n = TRUE, digits = 1, show_t #' @title Append a totals row to a data.frame. #' #' @description -#' This function is deprecated, use `adorn_totals` instead. +#' This function is deprecated, use [adorn_totals()] instead. #' #' @param dat an input data.frame with at least one numeric column. #' @param fill if there are more than one non-numeric columns, what string should fill the bottom row of those columns? @@ -79,7 +79,7 @@ add_totals_row <- function(dat, fill = "-", na.rm = TRUE) { #' @title Append a totals column to a data.frame. #' #' @description -#' This function is deprecated, use `adorn_totals` instead. +#' This function is deprecated, use [`adorn_totals(where = "col")`][adorn_totals()] instead. #' #' @param dat an input data.frame with at least one numeric column. #' @param na.rm should missing values (including NaN) be omitted from the calculations? @@ -100,9 +100,12 @@ add_totals_col <- function(dat, na.rm = TRUE) { #' @title Returns first non-NA value from a set of vectors. #' #' @description -#' At each position of the input vectors, iterates through in order and returns the first non-NA value. This is a robust replacement of the common `ifelse(!is.na(x), x, ifelse(!is.na(y), y, z))`. It's more readable and handles problems like `ifelse`'s inability to work with dates in this way. +#' Warning: Deprecated, do not use in new code. Use [dplyr::coalesce()] instead. +#' +#' At each position of the input vectors, iterates through in order and returns the first non-NA value. +#' This is a robust replacement of the common `ifelse(!is.na(x), x, ifelse(!is.na(y), y, z))`. +#' It's more readable and handles problems like [ifelse()]'s inability to work with dates in this way. #' -##' @section Warning: Deprecated, do not use in new code. Use `dplyr::coalesce()` instead. #' @param ... the input vectors. Order matters: these are searched and prioritized in the order they are supplied. #' @param if_all_NA what value should be used when all of the vectors return `NA` for a certain index? Default is NA. #' @return Returns a single vector with the selected values. @@ -120,9 +123,10 @@ use_first_valid_of <- function(..., if_all_NA = NA) { #' @title Convert string values to true `NA` values. #' #' @description +#' Warning: Deprecated, do not use in new code. Use [dplyr::na_if()] instead. +#' #' Converts instances of user-specified strings into `NA`. Can operate on either a single vector or an entire data.frame. #' -#' @section Warning: Deprecated, do not use in new code. Use `dplyr::na_if()` instead. #' @param dat vector or data.frame to operate on. #' @param strings character vector of strings to convert. #' @return Returns a cleaned object. Can be a vector, data.frame, or `tibble::tbl_df` depending on the provided input. @@ -144,7 +148,7 @@ convert_to_NA <- function(dat, strings) { #' @title Removes empty rows from a data.frame. #' #' @description -#' This function is deprecated, use `remove_empty("rows")` instead. +#' This function is deprecated, use [`remove_empty("rows")`][remove_empty()] instead. #' #' @param dat the input data.frame. #' @return Returns the data.frame with no empty rows. @@ -165,13 +169,10 @@ remove_empty_rows <- function(dat) { #' @title Removes empty columns from a data.frame. #' #' @description -#' This function is deprecated, use `remove_empty("cols")` instead. +#' This function is deprecated, use [`remove_empty("cols")`][remove_empty()] instead. #' #' @param dat the input data.frame. #' @return Returns the data.frame with no empty columns. -#' @examples -#' # not run: -#' # dat %>% remove_empty_cols #' @export #' @keywords internal diff --git a/man/add_totals_col.Rd b/man/add_totals_col.Rd index 69fe98b6..c0c0c38c 100644 --- a/man/add_totals_col.Rd +++ b/man/add_totals_col.Rd @@ -15,6 +15,6 @@ add_totals_col(dat, na.rm = TRUE) Returns a data.frame with a totals column containing row-wise sums. } \description{ -This function is deprecated, use \code{adorn_totals} instead. +This function is deprecated, use \code{\link[=adorn_totals]{adorn_totals(where = "col")}} instead. } \keyword{internal} diff --git a/man/add_totals_row.Rd b/man/add_totals_row.Rd index e33098b0..d6dc8883 100644 --- a/man/add_totals_row.Rd +++ b/man/add_totals_row.Rd @@ -17,6 +17,6 @@ add_totals_row(dat, fill = "-", na.rm = TRUE) Returns a data.frame with a totals row, consisting of "Total" in the first column and column sums in the others. } \description{ -This function is deprecated, use \code{adorn_totals} instead. +This function is deprecated, use \code{\link[=adorn_totals]{adorn_totals()}} instead. } \keyword{internal} diff --git a/man/adorn_crosstab.Rd b/man/adorn_crosstab.Rd index 6e66a698..f3a94e42 100644 --- a/man/adorn_crosstab.Rd +++ b/man/adorn_crosstab.Rd @@ -30,6 +30,6 @@ adorn_crosstab( Returns a data.frame. } \description{ -This function is deprecated, use the \code{adorn_} family of functions instead. +This function is deprecated, use \code{\link[=tabyl]{tabyl()}} with the \code{adorn_} family of functions instead. } \keyword{internal} diff --git a/man/convert_to_NA.Rd b/man/convert_to_NA.Rd index cc2aa74e..9cf87886 100644 --- a/man/convert_to_NA.Rd +++ b/man/convert_to_NA.Rd @@ -15,12 +15,10 @@ convert_to_NA(dat, strings) Returns a cleaned object. Can be a vector, data.frame, or \code{tibble::tbl_df} depending on the provided input. } \description{ +Warning: Deprecated, do not use in new code. Use \code{\link[dplyr:na_if]{dplyr::na_if()}} instead. + Converts instances of user-specified strings into \code{NA}. Can operate on either a single vector or an entire data.frame. } -\section{Warning}{ - Deprecated, do not use in new code. Use \code{dplyr::na_if()} instead. -} - \seealso{ janitor_deprecated } diff --git a/man/crosstab.Rd b/man/crosstab.Rd index f8249006..32f44354 100644 --- a/man/crosstab.Rd +++ b/man/crosstab.Rd @@ -10,6 +10,6 @@ crosstab(...) \item{...}{arguments} } \description{ -This function is deprecated, use \code{tabyl(dat, var1, var2)} instead. +This function is deprecated, use \code{\link[=tabyl]{tabyl(dat, var1, var2)}} instead. } \keyword{internal} diff --git a/man/janitor_deprecated.Rd b/man/janitor_deprecated.Rd index d1591a6d..8c5c452c 100644 --- a/man/janitor_deprecated.Rd +++ b/man/janitor_deprecated.Rd @@ -8,13 +8,13 @@ These functions have already become defunct or may be defunct as soon as the nex } \details{ \itemize{ -\item \code{\link[=adorn_crosstab]{adorn_crosstab()}} -\item \code{\link[=crosstab]{crosstab()}} -\item \code{\link[=use_first_valid_of]{use_first_valid_of()}} -\item \code{\link[=convert_to_NA]{convert_to_NA()}} -\item \code{\link[=add_totals_col]{add_totals_col()}} -\item \code{\link[=add_totals_row]{add_totals_row()}} -\item \code{\link[=remove_empty_rows]{remove_empty_rows()}} -\item \code{\link[=remove_empty_cols]{remove_empty_cols()}} +\item \code{\link[=adorn_crosstab]{adorn_crosstab()}} -> \code{adorn_} +\item \code{\link[=crosstab]{crosstab()}} -> \code{\link[=tabyl]{tabyl()}} +\item \code{\link[=use_first_valid_of]{use_first_valid_of()}} -> \code{\link[dplyr:coalesce]{dplyr::coalesce()}} +\item \code{\link[=convert_to_NA]{convert_to_NA()}} -> \code{\link[dplyr:na_if]{dplyr::na_if()}} +\item \code{\link[=add_totals_col]{add_totals_col()}} -> \code{\link[=adorn_totals]{adorn_totals(where = "col")}} +\item \code{\link[=add_totals_row]{add_totals_row()}} -> \code{\link[=adorn_totals]{adorn_totals()}} +\item \code{\link[=remove_empty_rows]{remove_empty_rows()}} -> \code{\link[=remove_empty]{remove_empty("rows")}} +\item \code{\link[=remove_empty_cols]{remove_empty_cols()}} -> \code{\link[=remove_empty]{remove_empty("cols")}} } } diff --git a/man/remove_empty_cols.Rd b/man/remove_empty_cols.Rd index 35778141..5c996472 100644 --- a/man/remove_empty_cols.Rd +++ b/man/remove_empty_cols.Rd @@ -13,10 +13,6 @@ remove_empty_cols(dat) Returns the data.frame with no empty columns. } \description{ -This function is deprecated, use \code{remove_empty("cols")} instead. -} -\examples{ -# not run: -# dat \%>\% remove_empty_cols +This function is deprecated, use \code{\link[=remove_empty]{remove_empty("cols")}} instead. } \keyword{internal} diff --git a/man/remove_empty_rows.Rd b/man/remove_empty_rows.Rd index ebaf2192..4d987c7d 100644 --- a/man/remove_empty_rows.Rd +++ b/man/remove_empty_rows.Rd @@ -13,7 +13,7 @@ remove_empty_rows(dat) Returns the data.frame with no empty rows. } \description{ -This function is deprecated, use \code{remove_empty("rows")} instead. +This function is deprecated, use \code{\link[=remove_empty]{remove_empty("rows")}} instead. } \examples{ # not run: diff --git a/man/use_first_valid_of.Rd b/man/use_first_valid_of.Rd index 2466f43e..901477ee 100644 --- a/man/use_first_valid_of.Rd +++ b/man/use_first_valid_of.Rd @@ -15,12 +15,12 @@ use_first_valid_of(..., if_all_NA = NA) Returns a single vector with the selected values. } \description{ -At each position of the input vectors, iterates through in order and returns the first non-NA value. This is a robust replacement of the common \code{ifelse(!is.na(x), x, ifelse(!is.na(y), y, z))}. It's more readable and handles problems like \code{ifelse}'s inability to work with dates in this way. -} -\section{Warning}{ - Deprecated, do not use in new code. Use \code{dplyr::coalesce()} instead. -} +Warning: Deprecated, do not use in new code. Use \code{\link[dplyr:coalesce]{dplyr::coalesce()}} instead. +At each position of the input vectors, iterates through in order and returns the first non-NA value. +This is a robust replacement of the common \code{ifelse(!is.na(x), x, ifelse(!is.na(y), y, z))}. +It's more readable and handles problems like \code{\link[=ifelse]{ifelse()}}'s inability to work with dates in this way. +} \seealso{ janitor_deprecated } From dde3fde8eaeb07559419864119332048c8572dd8 Mon Sep 17 00:00:00 2001 From: olivroy Date: Sun, 20 Aug 2023 09:21:48 -0400 Subject: [PATCH 02/17] WS --- R/janitor_deprecated.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/janitor_deprecated.R b/R/janitor_deprecated.R index 884c6363..90586db6 100644 --- a/R/janitor_deprecated.R +++ b/R/janitor_deprecated.R @@ -101,7 +101,7 @@ add_totals_col <- function(dat, na.rm = TRUE) { #' #' @description #' Warning: Deprecated, do not use in new code. Use [dplyr::coalesce()] instead. -#' +#' #' At each position of the input vectors, iterates through in order and returns the first non-NA value. #' This is a robust replacement of the common `ifelse(!is.na(x), x, ifelse(!is.na(y), y, z))`. #' It's more readable and handles problems like [ifelse()]'s inability to work with dates in this way. @@ -124,7 +124,7 @@ use_first_valid_of <- function(..., if_all_NA = NA) { #' #' @description #' Warning: Deprecated, do not use in new code. Use [dplyr::na_if()] instead. -#' +#' #' Converts instances of user-specified strings into `NA`. Can operate on either a single vector or an entire data.frame. #' #' @param dat vector or data.frame to operate on. From 42a9990888597f1d82c5295b9145d6196dd8965b Mon Sep 17 00:00:00 2001 From: olivroy Date: Sun, 20 Aug 2023 09:33:50 -0400 Subject: [PATCH 03/17] cleanup: janitor requires dplyr 1.0.0 (no need for the old test. --- tests/testthat/test-tabyl.R | 37 +++++++++++-------------------------- 1 file changed, 11 insertions(+), 26 deletions(-) diff --git a/tests/testthat/test-tabyl.R b/tests/testthat/test-tabyl.R index 5bb73e43..ae034a51 100644 --- a/tests/testthat/test-tabyl.R +++ b/tests/testthat/test-tabyl.R @@ -315,32 +315,17 @@ test_that("NA levels get moved to the last column in the data.frame, are suppres dplyr::filter(species == "Human") %>% tabyl(eye_color, skin_color, gender, show_missing_levels = TRUE)), 2) - # The starwars data set changed in dplyr v 1.0.0 so have two blocks of tests: - if (packageVersion("dplyr") > package_version("0.8.5")) { - # If there is NA, it does appear in split list - expect_equal(length(dplyr::starwars %>% - tabyl(eye_color, skin_color, gender, show_missing_levels = TRUE)), 3) - expect_equal(length(dplyr::starwars %>% - tabyl(eye_color, skin_color, gender, show_missing_levels = FALSE)), 3) - - # NA level in the list gets suppressed if show_na = FALSE. Should have one less level if NA is suppressed. - expect_equal(length(dplyr::starwars %>% - tabyl(eye_color, skin_color, gender, show_na = TRUE)), 3) - expect_equal(length(dplyr::starwars %>% - tabyl(eye_color, skin_color, gender, show_na = FALSE)), 2) - } else { - # If there is NA, it does appear in split list - expect_equal(length(dplyr::starwars %>% - tabyl(eye_color, skin_color, gender, show_missing_levels = TRUE)), 5) - expect_equal(length(dplyr::starwars %>% - tabyl(eye_color, skin_color, gender, show_missing_levels = FALSE)), 5) - - # NA level in the list gets suppressed if show_na = FALSE. Should have one less level if NA is suppressed. - expect_equal(length(dplyr::starwars %>% - tabyl(eye_color, skin_color, gender, show_na = TRUE)), 5) - expect_equal(length(dplyr::starwars %>% - tabyl(eye_color, skin_color, gender, show_na = FALSE)), 4) - } + # If there is NA, it does appear in split list + expect_equal(length(dplyr::starwars %>% + tabyl(eye_color, skin_color, gender, show_missing_levels = TRUE)), 5) + expect_equal(length(dplyr::starwars %>% + tabyl(eye_color, skin_color, gender, show_missing_levels = FALSE)), 5) + + # NA level in the list gets suppressed if show_na = FALSE. Should have one less level if NA is suppressed. + expect_equal(length(dplyr::starwars %>% + tabyl(eye_color, skin_color, gender, show_na = TRUE)), 5) + expect_equal(length(dplyr::starwars %>% + tabyl(eye_color, skin_color, gender, show_na = FALSE)), 4) }) test_that("zero-row and fully-NA inputs are handled", { From a0ca1b790f04e6d534761abef857800c1d2c0d44 Mon Sep 17 00:00:00 2001 From: olivroy Date: Sun, 20 Aug 2023 09:37:50 -0400 Subject: [PATCH 04/17] Update tabyl's doc --- R/janitor.R | 2 +- R/tabyl.R | 34 +++++++++++++++++++++++----------- man/janitor-package.Rd | 2 +- man/tabyl.Rd | 34 +++++++++++++++++++++++----------- 4 files changed, 48 insertions(+), 24 deletions(-) diff --git a/R/janitor.R b/R/janitor.R index eeccc8bc..1d3adf33 100644 --- a/R/janitor.R +++ b/R/janitor.R @@ -12,7 +12,7 @@ #' #' @section Package context: #' This package follows the principles of the "tidyverse" and works -#' well with the pipe function `\%>\%`. +#' well with the pipe function `%>%`. #' #' janitor was built with beginning-to-intermediate R users in mind #' and is optimized for user-friendliness. Advanced users can do most diff --git a/R/tabyl.R b/R/tabyl.R index 168420a5..5a719e97 100644 --- a/R/tabyl.R +++ b/R/tabyl.R @@ -1,20 +1,32 @@ #' Generate a frequency table (1-, 2-, or 3-way). #' #' @description -#' A fully-featured alternative to `table()`. Results are data.frames and can be formatted and enhanced with janitor's family of `adorn_` functions. +#' A fully-featured alternative to `table()`. Results are data.frames and can be +#' formatted and enhanced with janitor's family of `adorn_` functions. #' -#' Specify a data.frame and the one, two, or three unquoted column names you want to tabulate. Three variables generates a list of 2-way tabyls, split by the third variable. +#' Specify a `data.frame` and the one, two, or three unquoted column names you +#' want to tabulate. Three variables generates a list of 2-way tabyls, +#' split by the third variable. #' -#' Alternatively, you can tabulate a single variable that isn't in a data.frame by calling `tabyl` on a vector, e.g., `tabyl(mtcars$gear)`. +#' Alternatively, you can tabulate a single variable that isn't in a `data.frame` +#' by calling `tabyl()` on a vector, e.g., `tabyl(mtcars$gear)`. #' -#' @param dat a `data.frame` containing the variables you wish to count. Or, a vector you want to tabulate. -#' @param var1 the column name of the first variable. -#' @param var2 (optional) the column name of the second variable (the rows in a 2-way tabulation). -#' @param var3 (optional) the column name of the third variable (the list in a 3-way tabulation). -#' @param show_na should counts of `NA` values be displayed? In a one-way tabyl, the presence of `NA` values triggers an additional column showing valid percentages(calculated excluding `NA` values). -#' @param show_missing_levels should counts of missing levels of factors be displayed? These will be rows and/or columns of zeroes. Useful for keeping consistent output dimensions even when certain factor levels may not be present in the data. -#' @param ... the arguments to tabyl (here just for the sake of documentation compliance, as all arguments are listed with the vector- and data.frame-specific methods) -#' @return A data.frame with frequencies and percentages of the tabulated variable(s). A 3-way tabulation returns a list of data.frames. +#' @param dat A `data.frame` containing the variables you wish to count. +#' Or, a vector you want to tabulate. +#' @param var1 The column name of the first variable. +#' @param var2 (optional) the column name of the second variable +#' (the rows in a 2-way tabulation). +#' @param var3 (optional) the column name of the third variable +#' (the list in a 3-way tabulation). +#' @param show_na Should counts of `NA` values be displayed? In a one-way tabyl, +#' the presence of `NA` values triggers an additional column showing valid percentages +#' (calculated excluding `NA` values). +#' @param show_missing_levels Should counts of missing levels of factors be displayed? +#' These will be rows and/or columns of zeroes. Useful for keeping consistent +#' output dimensions even when certain factor levels may not be present in the data. +#' @param ... Additional arguments passed to methods. +#' @return A `data.frame` with frequencies and percentages of the tabulated variable(s). +#' A 3-way tabulation returns a list of data frames. #' @export #' @examples #' diff --git a/man/janitor-package.Rd b/man/janitor-package.Rd index abc82e51..01b24b6c 100644 --- a/man/janitor-package.Rd +++ b/man/janitor-package.Rd @@ -21,7 +21,7 @@ Microsoft Excel. \section{Package context}{ This package follows the principles of the "tidyverse" and works -well with the pipe function \verb{\\\%>\\\%}. +well with the pipe function \verb{\%>\%}. janitor was built with beginning-to-intermediate R users in mind and is optimized for user-friendliness. Advanced users can do most diff --git a/man/tabyl.Rd b/man/tabyl.Rd index 461ab9d5..f883dd98 100644 --- a/man/tabyl.Rd +++ b/man/tabyl.Rd @@ -13,29 +13,41 @@ tabyl(dat, ...) \method{tabyl}{data.frame}(dat, var1, var2, var3, show_na = TRUE, show_missing_levels = TRUE, ...) } \arguments{ -\item{dat}{a \code{data.frame} containing the variables you wish to count. Or, a vector you want to tabulate.} +\item{dat}{A \code{data.frame} containing the variables you wish to count. +Or, a vector you want to tabulate.} -\item{...}{the arguments to tabyl (here just for the sake of documentation compliance, as all arguments are listed with the vector- and data.frame-specific methods)} +\item{...}{Additional arguments passed to methods.} -\item{show_na}{should counts of \code{NA} values be displayed? In a one-way tabyl, the presence of \code{NA} values triggers an additional column showing valid percentages(calculated excluding \code{NA} values).} +\item{show_na}{Should counts of \code{NA} values be displayed? In a one-way tabyl, +the presence of \code{NA} values triggers an additional column showing valid percentages +(calculated excluding \code{NA} values).} -\item{show_missing_levels}{should counts of missing levels of factors be displayed? These will be rows and/or columns of zeroes. Useful for keeping consistent output dimensions even when certain factor levels may not be present in the data.} +\item{show_missing_levels}{Should counts of missing levels of factors be displayed? +These will be rows and/or columns of zeroes. Useful for keeping consistent +output dimensions even when certain factor levels may not be present in the data.} -\item{var1}{the column name of the first variable.} +\item{var1}{The column name of the first variable.} -\item{var2}{(optional) the column name of the second variable (the rows in a 2-way tabulation).} +\item{var2}{(optional) the column name of the second variable +(the rows in a 2-way tabulation).} -\item{var3}{(optional) the column name of the third variable (the list in a 3-way tabulation).} +\item{var3}{(optional) the column name of the third variable +(the list in a 3-way tabulation).} } \value{ -A data.frame with frequencies and percentages of the tabulated variable(s). A 3-way tabulation returns a list of data.frames. +A \code{data.frame} with frequencies and percentages of the tabulated variable(s). +A 3-way tabulation returns a list of data frames. } \description{ -A fully-featured alternative to \code{table()}. Results are data.frames and can be formatted and enhanced with janitor's family of \code{adorn_} functions. +A fully-featured alternative to \code{table()}. Results are data.frames and can be +formatted and enhanced with janitor's family of \code{adorn_} functions. -Specify a data.frame and the one, two, or three unquoted column names you want to tabulate. Three variables generates a list of 2-way tabyls, split by the third variable. +Specify a \code{data.frame} and the one, two, or three unquoted column names you +want to tabulate. Three variables generates a list of 2-way tabyls, +split by the third variable. -Alternatively, you can tabulate a single variable that isn't in a data.frame by calling \code{tabyl} on a vector, e.g., \code{tabyl(mtcars$gear)}. +Alternatively, you can tabulate a single variable that isn't in a \code{data.frame} +by calling \code{tabyl()} on a vector, e.g., \code{tabyl(mtcars$gear)}. } \examples{ From 703786be71bbba3c7aa936ceee6b99acaccab09d Mon Sep 17 00:00:00 2001 From: olivroy Date: Sun, 20 Aug 2023 10:35:27 -0400 Subject: [PATCH 05/17] `usethis::use_pipe()` + add example from janitor --- R/utils-pipe.R | 18 ++++++++++++++++++ R/utils.R | 17 ----------------- man/pipe.Rd | 12 ++++++++++-- 3 files changed, 28 insertions(+), 19 deletions(-) create mode 100644 R/utils-pipe.R delete mode 100644 R/utils.R diff --git a/R/utils-pipe.R b/R/utils-pipe.R new file mode 100644 index 00000000..e7fa4dff --- /dev/null +++ b/R/utils-pipe.R @@ -0,0 +1,18 @@ +#' Pipe operator +#' +#' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. +#' +#' @name %>% +#' @rdname pipe +#' @keywords internal +#' @export +#' @importFrom magrittr %>% +#' @usage lhs \%>\% rhs +#' @param lhs A value or the magrittr placeholder. +#' @param rhs A function call using the magrittr semantics. +#' @return The result of calling `rhs(lhs)`. +#' @examples +#' mtcars %>% +#' tabyl(carb, cyl) %>% +#' adorn_totals() +NULL diff --git a/R/utils.R b/R/utils.R deleted file mode 100644 index ea8af53f..00000000 --- a/R/utils.R +++ /dev/null @@ -1,17 +0,0 @@ -# Copied from tidyr/R/utils.R, to export the magrittr pipe - -#' Pipe operator -#' -#' @description Exported from the magrittr package. To learn more, run `?magrittr::`\%>\%``. -#' -#' @name %>% -#' @rdname pipe -#' @keywords internal -#' @export -#' @importFrom magrittr %>% -#' @usage lhs \%>\% rhs -#' @examples -#' mtcars %>% -#' tabyl(carb, cyl) %>% -#' adorn_totals() -NULL diff --git a/man/pipe.Rd b/man/pipe.Rd index ee8964de..4f928936 100644 --- a/man/pipe.Rd +++ b/man/pipe.Rd @@ -1,13 +1,21 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R +% Please edit documentation in R/utils-pipe.R \name{\%>\%} \alias{\%>\%} \title{Pipe operator} \usage{ lhs \%>\% rhs } +\arguments{ +\item{lhs}{A value or the magrittr placeholder.} + +\item{rhs}{A function call using the magrittr semantics.} +} +\value{ +The result of calling \code{rhs(lhs)}. +} \description{ -Exported from the magrittr package. To learn more, run \verb{?magrittr::}\\%>\\%``. +See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. } \examples{ mtcars \%>\% From 725b0dff9119de8573796d251579e752ba174dde Mon Sep 17 00:00:00 2001 From: olivroy Date: Sun, 20 Aug 2023 10:39:19 -0400 Subject: [PATCH 06/17] Take the actual dplyr 1.0.0 version --- tests/testthat/test-tabyl.R | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/tests/testthat/test-tabyl.R b/tests/testthat/test-tabyl.R index ae034a51..aa53ada3 100644 --- a/tests/testthat/test-tabyl.R +++ b/tests/testthat/test-tabyl.R @@ -310,22 +310,17 @@ test_that("NA levels get moved to the last column in the data.frame, are suppres data.frame(c = 10, `1` = 1, `2` = 0, NA_ = 1, check.names = FALSE) ) - # If no NA in 3rd variable, it doesn't appear in split list - expect_equal(length(dplyr::starwars %>% - dplyr::filter(species == "Human") %>% - tabyl(eye_color, skin_color, gender, show_missing_levels = TRUE)), 2) - # If there is NA, it does appear in split list expect_equal(length(dplyr::starwars %>% - tabyl(eye_color, skin_color, gender, show_missing_levels = TRUE)), 5) + tabyl(eye_color, skin_color, gender, show_missing_levels = TRUE)), 3) expect_equal(length(dplyr::starwars %>% - tabyl(eye_color, skin_color, gender, show_missing_levels = FALSE)), 5) + tabyl(eye_color, skin_color, gender, show_missing_levels = FALSE)), 3) # NA level in the list gets suppressed if show_na = FALSE. Should have one less level if NA is suppressed. expect_equal(length(dplyr::starwars %>% - tabyl(eye_color, skin_color, gender, show_na = TRUE)), 5) + tabyl(eye_color, skin_color, gender, show_na = TRUE)), 3) expect_equal(length(dplyr::starwars %>% - tabyl(eye_color, skin_color, gender, show_na = FALSE)), 4) + tabyl(eye_color, skin_color, gender, show_na = FALSE)), 2) }) test_that("zero-row and fully-NA inputs are handled", { From 28792dfa5a99dd05fb489881b510b5f2967ed68a Mon Sep 17 00:00:00 2001 From: olivroy Date: Sun, 20 Aug 2023 10:40:38 -0400 Subject: [PATCH 07/17] Remove space. --- tests/testthat/test-adorn-totals.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-adorn-totals.R b/tests/testthat/test-adorn-totals.R index b7598ebc..045460be 100644 --- a/tests/testthat/test-adorn-totals.R +++ b/tests/testthat/test-adorn-totals.R @@ -168,12 +168,12 @@ test_that("error thrown if no columns past first are numeric", { ) expect_error( adorn_totals(df2, "col"), - "at least one targeted column must be of class numeric. Control target variables with the ... argument. adorn_totals should be called before other adorn_ functions." + "at least one targeted column must be of class numeric. Control target variables with the ... argument. adorn_totals should be called before other adorn_ functions." ) expect_error( mixed %>% adorn_totals("row", "-", TRUE, "Totals", d), - "at least one targeted column must be of class numeric. Control target variables with the ... argument. adorn_totals should be called before other adorn_ functions." + "at least one targeted column must be of class numeric. Control target variables with the ... argument. adorn_totals should be called before other adorn_ functions." ) # Add a test where only the first column is numeric @@ -183,7 +183,7 @@ test_that("error thrown if no columns past first are numeric", { ) expect_error( adorn_totals(df3), - "at least one targeted column must be of class numeric. Control target variables with the ... argument. adorn_totals should be called before other adorn_ functions." + "at least one targeted column must be of class numeric. Control target variables with the ... argument. adorn_totals should be called before other adorn_ functions." ) }) From 01a4ff54acdacb2063df68f888655015454dd912 Mon Sep 17 00:00:00 2001 From: olivroy Date: Sun, 20 Aug 2023 10:40:59 -0400 Subject: [PATCH 08/17] Wrap long lines + tweak docs. --- R/adorn_ns.R | 25 +++++++++++----- R/adorn_percentages.R | 18 ++++++++---- R/adorn_rounding.R | 31 ++++++++++++++------ R/adorn_title.R | 53 ++++++++++++++++++++++++---------- R/adorn_totals.R | 56 +++++++++++++++++++++++++----------- R/as_and_untabyl.R | 14 +++++---- R/compare_df_cols.R | 26 +++++++++-------- R/convert_to_date.R | 22 +++++++------- R/excel_dates.R | 2 +- R/sas_dates.R | 2 +- R/statistical_tests.R | 3 +- R/top_levels.R | 13 +++++---- man/adorn_ns.Rd | 25 +++++++++++----- man/adorn_percentages.Rd | 18 ++++++++---- man/adorn_rounding.Rd | 27 ++++++++++++----- man/adorn_title.Rd | 31 ++++++++++++++++---- man/adorn_totals.Rd | 35 ++++++++++++++++------ man/as_tabyl.Rd | 11 +++---- man/chisq.test.Rd | 3 +- man/compare_df_cols.Rd | 7 ++--- man/compare_df_cols_same.Rd | 11 ++++--- man/convert_to_date.Rd | 20 +++++-------- man/describe_class.Rd | 6 ++-- man/excel_numeric_to_date.Rd | 4 +-- man/sas_numeric_to_date.Rd | 4 +-- man/top_levels.Rd | 8 +++--- 26 files changed, 314 insertions(+), 161 deletions(-) diff --git a/R/adorn_ns.R b/R/adorn_ns.R index 97bac414..3dbc7284 100644 --- a/R/adorn_ns.R +++ b/R/adorn_ns.R @@ -1,14 +1,25 @@ #' Add underlying Ns to a tabyl displaying percentages. #' -#' This function adds back the underlying Ns to a `tabyl` whose percentages were calculated using `adorn_percentages()`, to display the Ns and percentages together. You can also call it on a non-tabyl data.frame to which you wish to append Ns. +#' This function adds back the underlying Ns to a `tabyl` whose percentages were +#' calculated using [adorn_percentages()], to display the Ns and percentages together. +#' You can also call it on a non-tabyl data.frame to which you wish to append Ns. #' -#' @param dat a data.frame of class `tabyl` that has had `adorn_percentages` and/or `adorn_pct_formatting` called on it. If given a list of data.frames, this function will apply itself to each data.frame in the list (designed for 3-way `tabyl` lists). -#' @param position should the N go in the front, or in the rear, of the percentage? -#' @param ns the Ns to append. The default is the "core" attribute of the input tabyl `dat`, where the original Ns of a two-way `tabyl` are stored. However, if your Ns are stored somewhere else, or you need to customize them beyond what can be done with `format_func`, you can supply them here. -#' @param format_func a formatting function to run on the Ns. Consider defining with [base::format()]. -#' @param ... columns to adorn. This takes a tidyselect specification. By default, all columns are adorned except for the first column and columns not of class `numeric`, but this allows you to manually specify which columns should be adorned, for use on a data.frame that does not result from a call to `tabyl`. +#' @param dat A data.frame of class `tabyl` that has had `adorn_percentages` and/or +#' `adorn_pct_formatting` called on it. If given a list of data.frames, +#' this function will apply itself to each data.frame in the list (designed for 3-way `tabyl` lists). +#' @param position Should the N go in the front, or in the rear, of the percentage? +#' @param ns The Ns to append. The default is the "core" attribute of the input tabyl +#' `dat`, where the original Ns of a two-way `tabyl` are stored. However, if your Ns +#' are stored somewhere else, or you need to customize them beyond what can be done +#' with `format_func`, you can supply them here. +#' @param format_func A formatting function to run on the Ns. Consider defining +#' with [base::format()]. +#' @param ... Columns to adorn. This takes a tidyselect specification. By default, +#' all columns are adorned except for the first column and columns not of class +#' `numeric`, but this allows you to manually specify which columns should be adorned, +#' for use on a data.frame that does not result from a call to `tabyl`. #' -#' @return a data.frame with Ns appended +#' @return A `data.frame` with Ns appended #' @export #' @examples #' mtcars %>% diff --git a/R/adorn_percentages.R b/R/adorn_percentages.R index 2d7a436a..3151d5e1 100644 --- a/R/adorn_percentages.R +++ b/R/adorn_percentages.R @@ -1,13 +1,21 @@ #' Convert a data.frame of counts to percentages. #' -#' This function defaults to excluding the first column of the input data.frame, assuming that it contains a descriptive variable, but this can be overridden by specifying the columns to adorn in the `...` argument. +#' This function defaults to excluding the first column of the input data.frame, +#' assuming that it contains a descriptive variable, but this can be overridden +#' by specifying the columns to adorn in the `...` argument. #' -#' @param dat a `tabyl` or other data.frame with a tabyl-like layout. If given a list of data.frames, this function will apply itself to each data.frame in the list (designed for 3-way `tabyl` lists). -#' @param denominator the direction to use for calculating percentages. One of "row", "col", or "all". +#' @param dat A `tabyl` or other data.frame with a tabyl-like layout. +#' If given a list of data.frames, this function will apply itself to each +#' `data.frame` in the list (designed for 3-way `tabyl` lists). +#' @param denominator The direction to use for calculating percentages. +#' One of "row", "col", or "all". #' @param na.rm should missing values (including NaN) be omitted from the calculations? -#' @param ... columns to adorn. This takes a tidyselect specification. By default, all numeric columns (besides the initial column, if numeric) are adorned, but this allows you to manually specify which columns should be adorned, for use on a data.frame that does not result from a call to `tabyl`. +#' @param ... columns to adorn. This takes a <[`tidy-select`][dplyr::dplyr_tidy_select]> +#' specification. By default, all numeric columns (besides the initial column, if numeric) +#' are adorned, but this allows you to manually specify which columns should +#' be adorned, for use on a `data.frame` that does not result from a call to [tabyl()]. #' -#' @return Returns a data.frame of percentages, expressed as numeric values between 0 and 1. +#' @return A `data.frame` of percentages, expressed as numeric values between 0 and 1. #' @export #' @examples #' diff --git a/R/adorn_rounding.R b/R/adorn_rounding.R index 3e3909bb..60059ed5 100644 --- a/R/adorn_rounding.R +++ b/R/adorn_rounding.R @@ -1,16 +1,29 @@ #' Round the numeric columns in a data.frame. #' #' @description -#' Can run on any data.frame with at least one numeric column. This function defaults to excluding the first column of the input data.frame, assuming that it contains a descriptive variable, but this can be overridden by specifying the columns to round in the `...` argument. +#' Can run on any `data.frame` with at least one numeric column. +#' This function defaults to excluding the first column of the input data.frame, +#' assuming that it contains a descriptive variable, but this can be overridden by +#' specifying the columns to round in the `...` argument. #' -#' If you're formatting percentages, e.g., the result of `adorn_percentages()`, use `adorn_pct_formatting()` instead. This is a more flexible variant for ad-hoc usage. Compared to `adorn_pct_formatting()`, it does not multiply by 100 or pad the numbers with spaces for alignment in the results data.frame. This function retains the class of numeric input columns. +#' If you're formatting percentages, e.g., the result of [adorn_percentages()], +#' use [adorn_pct_formatting()] instead. This is a more flexible variant for ad-hoc usage. +#' Compared to `adorn_pct_formatting()`, it does not multiply by 100 or pad the +#' numbers with spaces for alignment in the results `data.frame`. +#' This function retains the class of numeric input columns. #' -#' @param dat a `tabyl` or other data.frame with similar layout. If given a list of data.frames, this function will apply itself to each data.frame in the list (designed for 3-way `tabyl` lists). -#' @param digits how many digits should be displayed after the decimal point? -#' @param rounding method to use for rounding - either "half to even", the base R default method, or "half up", where 14.5 rounds up to 15. -#' @param ... columns to adorn. This takes a tidyselect specification. By default, all numeric columns (besides the initial column, if numeric) are adorned, but this allows you to manually specify which columns should be adorned, for use on a data.frame that does not result from a call to `tabyl`. +#' @param dat A `tabyl` or other `data.frame` with similar layout. +#' If given a list of data.frames, this function will apply itself to each +#' `data.frame` in the list (designed for 3-way `tabyl` lists). +#' @param digits How many digits should be displayed after the decimal point? +#' @param rounding Method to use for rounding - either "half to even" +#' (the base R default method), or "half up", where 14.5 rounds up to 15. +#' @param ... Columns to adorn. This takes a tidyselect specification. +#' By default, all numeric columns (besides the initial column, if numeric) +#' are adorned, but this allows you to manually specify which columns should +#' be adorned, for use on a data.frame that does not result from a call to `tabyl`. #' -#' @return Returns the data.frame with rounded numeric columns. +#' @return The `data.frame` with rounded numeric columns. #' @export #' @examples #' @@ -54,7 +67,9 @@ adorn_rounding <- function(dat, digits = 1, rounding = "half to even", ...) { } numeric_cols <- which(vapply(dat, is.numeric, logical(1))) non_numeric_cols <- setdiff(1:ncol(dat), numeric_cols) - numeric_cols <- setdiff(numeric_cols, 1) # assume 1st column should not be included so remove it from numeric_cols. Moved up to this line so that if only 1st col is numeric, the function errors + # assume 1st column should not be included so remove it from numeric_cols. + # Moved up to this line so that if only 1st col is numeric, the function errors + numeric_cols <- setdiff(numeric_cols, 1) if (rlang::dots_n(...) == 0) { cols_to_round <- numeric_cols diff --git a/R/adorn_title.R b/R/adorn_title.R index 15783d44..bac28c3e 100644 --- a/R/adorn_title.R +++ b/R/adorn_title.R @@ -1,13 +1,30 @@ -#' @title Add column name to the top of a two-way tabyl. +#' Add column name to the top of a two-way tabyl. #' -#' @description -#' This function adds the column variable name to the top of a `tabyl` for a complete display of information. This makes the tabyl prettier, but renders the data.frame less useful for further manipulation. -#' -#' @param dat a data.frame of class `tabyl` or other data.frame with a tabyl-like layout. If given a list of data.frames, this function will apply itself to each data.frame in the list (designed for 3-way `tabyl` lists). -#' @param placement whether the column name should be added to the top of the tabyl in an otherwise-empty row `"top"` or appended to the already-present row name variable (`"combined"`). The formatting in the `"top"` option has the look of base R's `table()`; it also wipes out the other column names, making it hard to further use the data.frame besides formatting it for reporting. The `"combined"` option is more conservative in this regard. -#' @param row_name (optional) default behavior is to pull the row name from the attributes of the input `tabyl` object. If you wish to override that text, or if your input is not a `tabyl`, supply a string here. -#' @param col_name (optional) default behavior is to pull the column_name from the attributes of the input `tabyl` object. If you wish to override that text, or if your input is not a `tabyl`, supply a string here. -#' @return the input tabyl, augmented with the column title. Non-tabyl inputs that are of class `tbl_df` are downgraded to basic data.frames so that the title row prints correctly. +#' This function adds the column variable name to the top of a `tabyl` for a +#' complete display of information. This makes the tabyl prettier, but renders +#' the `data.frame` less useful for further manipulation. +#' +#' The `placement` argument indicates whether the column name should be added to +#' the `top` of the tabyl in an otherwise-empty row `"top"` or appended to the +#' already-present row name variable (`"combined"`). The formatting in the `"top"` +#' option has the look of base R's `table()`; it also wipes out the other column +#' names, making it hard to further use the `data.frame` besides formatting it for reporting. +#' The `"combined"` option is more conservative in this regard. +#' +#' @param dat A `data.frame` of class `tabyl` or other `data.frame` with a tabyl-like layout. +#' If given a list of data.frames, this function will apply itself to each `data.frame` +#' in the list (designed for 3-way `tabyl` lists). +#' @param placement The title placement, one of `"top"`, or `"combined"`. +#' See **Details** for more information. +#' @param row_name (optional) default behavior is to pull the row name from the +#' attributes of the input `tabyl` object. If you wish to override that text, +#' or if your input is not a `tabyl`, supply a string here. +#' @param col_name (optional) default behavior is to pull the column_name from +#' the attributes of the input `tabyl` object. If you wish to override that text, +#' or if your input is not a `tabyl`, supply a string here. +#' @return The input `tabyl`, augmented with the column title. Non-tabyl inputs +#' that are of class `tbl_df` are downgraded to basic data.frames so that the +#' title row prints correctly. #' #' @export #' @examples @@ -38,12 +55,14 @@ adorn_title <- function(dat, placement = "top", row_name, col_name) { } if ("tabyl" %in% class(dat)) { if (attr(dat, "tabyl_type") == "one_way") { - warning("adorn_title is meant for two-way tabyls, calling it on a one-way tabyl may not yield a meaningful result") + warning(c("adorn_title is meant for two-way tabyls, ", + "calling it on a one-way tabyl may not yield a meaningful result")) } } if (missing(col_name)) { if (!"tabyl" %in% class(dat)) { - stop("When input is not a data.frame of class tabyl, a value must be specified for the col_name argument") + stop(c("When input is not a data.frame of class tabyl, ", + "a value must be specified for the col_name argument")) } col_var <- attr(dat, "var_names")$col } else { @@ -63,13 +82,15 @@ adorn_title <- function(dat, placement = "top", row_name, col_name) { if ("tabyl" %in% class(dat)) { row_var <- attr(dat, "var_names")$row } else { - row_var <- names(dat)[1] # for non-tabyl input, if no row_name supplied, use first existing name + # for non-tabyl input, if no row_name supplied, use first existing name + row_var <- names(dat)[1] } } if (placement == "top") { - dat[, ] <- lapply(dat[, ], as.character) # to handle factors, problematic in first column and at bind_rows. + # to handle factors, problematic in first column and at bind_rows. + dat[, ] <- lapply(dat[, ], as.character) # Can't use mutate_all b/c it strips attributes top <- dat[1, ] @@ -82,8 +103,10 @@ adorn_title <- function(dat, placement = "top", row_name, col_name) { out <- dat names(out)[1] <- paste(row_var, col_var, sep = "/") } - if ("tbl_df" %in% class(out)) { # "top" text doesn't print if input (and thus the output) is a tibble - out <- as.data.frame(out) # but this prints row numbers, so don't apply to non-tbl_dfs like tabyls + # "top" text doesn't print if input (and thus the output) is a tibble + if ("tbl_df" %in% class(out)) { + # but this prints row numbers, so don't apply to non-tbl_dfs like tabyls + out <- as.data.frame(out) } out } diff --git a/R/adorn_totals.R b/R/adorn_totals.R index f3aa4e43..8055ae91 100644 --- a/R/adorn_totals.R +++ b/R/adorn_totals.R @@ -1,15 +1,31 @@ -#' @title Append a totals row and/or column to a data.frame. +#' Append a totals row and/or column to a data.frame #' -#' @description -#' This function defaults to excluding the first column of the input data.frame, assuming that it contains a descriptive variable, but this can be overridden by specifying the columns to be totaled in the `...` argument. Non-numeric columns are converted to character class and have a user-specified fill character inserted in the totals row. +#' This function defaults to excluding the first column of the input data.frame, +#' assuming that it contains a descriptive variable, but this can be overridden +#' by specifying the columns to be totaled in the `...` argument. Non-numeric +#' columns are converted to character class and have a user-specified fill character +#' inserted in the totals row. #' -#' @param dat an input data.frame with at least one numeric column. If given a list of data.frames, this function will apply itself to each data.frame in the list (designed for 3-way `tabyl` lists). -#' @param where one of "row", "col", or `c("row", "col")` -#' @param fill if there are non-numeric columns, what should fill the bottom row of those columns? If a string, relevant columns will be coerced to character. If `NA` then column types are preserved. -#' @param na.rm should missing values (including NaN) be omitted from the calculations? -#' @param name name of the totals row and/or column. If both are created, and `name` is a single string, that name is applied to both. If both are created and `name` is a vector of length 2, the first element of the vector will be used as the row name (in column 1), and the second element will be used as the totals column name. Defaults to "Total". -#' @param ... columns to total. This takes a tidyselect specification. By default, all numeric columns (besides the initial column, if numeric) are included in the totals, but this allows you to manually specify which columns should be included, for use on a data.frame that does not result from a call to `tabyl`. -#' @return a data.frame augmented with a totals row, column, or both. The data.frame is now also of class `tabyl` and stores information about the attached totals and underlying data in the tabyl attributes. +#' @param dat An input `data.frame` with at least one numeric column. If given a +#' list of data.frames, this function will apply itself to each `data.frame` +#' in the list (designed for 3-way `tabyl` lists). +#' @param where One of "row", "col", or `c("row", "col")` +#' @param fill If there are non-numeric columns, what should fill the bottom row +#' of those columns? If a string, relevant columns will be coerced to character. +#' If `NA` then column types are preserved. +#' @param na.rm Should missing values (including `NaN`) be omitted from the calculations? +#' @param name Name of the totals row and/or column. If both are created, and +#' `name` is a single string, that name is applied to both. If both are created +#' and `name` is a vector of length 2, the first element of the vector will be +#' used as the row name (in column 1), and the second element will be used as the +#' totals column name. Defaults to "Total". +#' @param ... Columns to total. This takes a tidyselect specification. By default, +#' all numeric columns (besides the initial column, if numeric) are included in +#' the totals, but this allows you to manually specify which columns should be +#' included, for use on a data.frame that does not result from a call to `tabyl`. +#' @return A `data.frame` augmented with a totals row, column, or both. +#' The `data.frame` is now also of class `tabyl` and stores information about +#' the attached totals and underlying data in the tabyl attributes. #' @export #' @examples #' mtcars %>% @@ -31,7 +47,8 @@ adorn_totals <- function(dat, where = "row", fill = "-", na.rm = TRUE, name = "T non_numeric_cols <- setdiff(1:ncol(dat), numeric_cols) if (rlang::dots_n(...) == 0) { - numeric_cols <- setdiff(numeric_cols, 1) # by default 1st column is not totaled so remove it from numeric_cols and add to non_numeric_cols + # by default 1st column is not totaled so remove it from numeric_cols and add to non_numeric_cols + numeric_cols <- setdiff(numeric_cols, 1) non_numeric_cols <- unique(c(1, non_numeric_cols)) cols_to_total <- numeric_cols } else { @@ -43,7 +60,9 @@ adorn_totals <- function(dat, where = "row", fill = "-", na.rm = TRUE, name = "T } if (length(cols_to_total) == 0) { - stop("at least one targeted column must be of class numeric. Control target variables with the ... argument. adorn_totals should be called before other adorn_ functions.") + stop("at least one targeted column must be of class numeric. ", + "Control target variables with the ... argument. ", + "adorn_totals should be called before other adorn_ functions.") } if (sum(where %in% c("row", "col")) != length(where)) { @@ -63,7 +82,8 @@ adorn_totals <- function(dat, where = "row", fill = "-", na.rm = TRUE, name = "T # set totals attribute if (sum(where %in% attr(dat, "totals")) > 0) { # if either of the values of "where" are already in totals attribute stop("trying to re-add a totals dimension that is already been added") - } else if (length(attr(dat, "totals")) == 1) { # if totals row OR col has already been adorned, append new axis to the current attribute + } else if (length(attr(dat, "totals")) == 1) { + # if totals row OR col has already been adorned, append new axis to the current attribute attr(dat, "totals") <- c(attr(dat, "totals"), where) } else { attr(dat, "totals") <- where @@ -77,7 +97,9 @@ adorn_totals <- function(dat, where = "row", fill = "-", na.rm = TRUE, name = "T } # creates the totals row to be appended col_sum <- function(a_col, na_rm = na.rm) { - if (is.numeric(a_col)) { # can't do this with if_else because it doesn't like the sum() of a character vector, even if that clause is not reached + # can't do this with if_else because it doesn't like the sum() of a character vector, + # even if that clause is not reached + if (is.numeric(a_col)) { sum(a_col, na.rm = na_rm) } else { if (!is.character(fill)) { # if fill isn't a character string, use NA consistent with data types @@ -119,7 +141,8 @@ adorn_totals <- function(dat, where = "row", fill = "-", na.rm = TRUE, name = "T } }) - if (!is.character(dat[[1]]) && !1 %in% cols_to_total) { # convert first col to character so that name can be appended + if (!is.character(dat[[1]]) && !1 %in% cols_to_total) { + # convert first col to character so that name can be appended dat[[1]] <- as.character(dat[[1]]) col_totals[[1]] <- as.character(col_totals[[1]]) } @@ -128,7 +151,8 @@ adorn_totals <- function(dat, where = "row", fill = "-", na.rm = TRUE, name = "T if (!1 %in% cols_to_total) { # give users the option to total the first column?? Up to them I guess col_totals[1, 1] <- name[1] # replace first column value with name argument } else { - message("Because the first column was specified to be totaled, it does not contain the label 'Total' (or user-specified name) in the totals row") + message("Because the first column was specified to be totaled, ", + "it does not contain the label 'Total' (or user-specified name) in the totals row") } dat[(nrow(dat) + 1), ] <- col_totals[1, ] # insert totals_col as last row in dat if (factor_input) { # restore factor/ordered info, #494 diff --git a/R/as_and_untabyl.R b/R/as_and_untabyl.R index 5a14b525..bdb60a99 100644 --- a/R/as_and_untabyl.R +++ b/R/as_and_untabyl.R @@ -1,7 +1,7 @@ -#' Add `tabyl` attributes to a data.frame. +#' Add `tabyl` attributes to a data.frame #' #' @description -#' A `tabyl` is a data.frame containing counts of a variable or +#' A `tabyl` is a `data.frame` containing counts of a variable or #' co-occurrences of two variables (a.k.a., a contingency table or crosstab). #' This specialized kind of data.frame has attributes that enable `adorn_` #' functions to be called for precise formatting and presentation of results. @@ -15,12 +15,13 @@ #' variable 1 2) Column names 2:n are the values of variable 2 3) Numeric values #' in columns 2:n are counts of the co-occurrences of the two variables.* #' -#' * = this is the ideal form of a tabyl, but janitor's `adorn_` functions tolerate and ignore non-numeric columns in positions 2:n. +#' * = this is the ideal form of a `tabyl`, but janitor's `adorn_` functions tolerate +#' and ignore non-numeric columns in positions 2:n. #' -#' For instance, the result of [dplyr::count()] followed by [tidyr::spread()] +#' For instance, the result of [dplyr::count()] followed by [tidyr::pivot_wider()] #' can be treated as a `tabyl`. #' -#' The result of calling `tabyl()` on a single variable is a special class of +#' The result of calling [tabyl()] on a single variable is a special class of #' one-way tabyl; this function only pertains to the two-way tabyl. #' #' @param dat a data.frame with variable values in the first column and numeric @@ -53,7 +54,8 @@ as_tabyl <- function(dat, axes = 2, row_var_name = NULL, col_var_name = NULL) { # assign core attribute and classes if ("tabyl" %in% class(dat)) { - # if already a tabyl, may have totals row. Safest play is to simply reorder the core rows to match the dat rows + # if already a tabyl, may have totals row. + # Safest play is to simply reorder the core rows to match the dat rows attr(dat, "core") <- attr(dat, "core")[order(match( attr(dat, "core")[, 1], dat[, 1] diff --git a/R/compare_df_cols.R b/R/compare_df_cols.R index 6456a62a..4d1dab06 100644 --- a/R/compare_df_cols.R +++ b/R/compare_df_cols.R @@ -1,3 +1,5 @@ +#' Compare data frames columns before merging +#' #' Generate a comparison of data.frames (or similar objects) that indicates if #' they will successfully bind together by rows. #' @@ -39,7 +41,7 @@ #' compare_df_cols(dfA = data.frame(A = 1), dfB = data.frame(B = 2)) #' # a combination of list and data.frame input #' compare_df_cols(listA = list(dfA = data.frame(A = 1), dfB = data.frame(B = 2)), data.frame(A = 3)) -#' @family Data frame type comparison +#' @family data frame type comparison #' @export compare_df_cols <- function(..., return = c("all", "match", "mismatch"), bind_method = c("bind_rows", "rbind"), strict_description = FALSE) { # Input checking @@ -215,13 +217,13 @@ compare_df_cols_df_maker.list <- function(x, class_colname = "class", strict_des #' Do the the data.frames have the same columns & types? #' -#' @description Check whether a set of data.frames are row-bindable. Calls -#' `compare_df_cols()`and returns TRUE if there are no mis-matching rows. ` +#' Check whether a set of data.frames are row-bindable. Calls `compare_df_cols()` +#' and returns `TRUE` if there are no mis-matching rows. +#' #' @inheritParams compare_df_cols #' @param verbose Print the mismatching columns if binding will fail. -#' @return `TRUE` if row binding will succeed or `FALSE` if it will -#' fail. -#' @family Data frame type comparison +#' @return `TRUE` if row binding will succeed or `FALSE` if it will fail. +#' @family data frame type comparison #' @examples #' compare_df_cols_same(data.frame(A = 1), data.frame(A = 2)) #' compare_df_cols_same(data.frame(A = 1), data.frame(B = 2)) @@ -241,18 +243,18 @@ compare_df_cols_same <- function(..., bind_method = c("bind_rows", "rbind"), ver #' #' @details For package developers, an S3 generic method can be written for #' `describe_class()` for custom classes that may need more definition -#' than the default method. This function is called by `compare_df_cols`. +#' than the default method. This function is called by [compare_df_cols()]. #' #' @param x The object to describe #' @param strict_description Should differing factor levels be treated -#' as differences for the purposes of identifying mismatches? -#' `strict_description = TRUE` is stricter and factors with different -#' levels will be treated as different classes. `FALSE` is more -#' lenient: for class comparison purposes, the variable is just a "factor". +#' as differences for the purposes of identifying mismatches? +#' `strict_description = TRUE` is stricter and factors with different +#' levels will be treated as different classes. `FALSE` is more +#' lenient: for class comparison purposes, the variable is just a "factor". #' @return A character scalar describing the class(es) of an object where if the #' scalar will match, columns in a data.frame (or similar object) should bind #' together without issue. -#' @family Data frame type comparison +#' @family data frame type comparison #' @examples #' describe_class(1) #' describe_class(factor("A")) diff --git a/R/convert_to_date.R b/R/convert_to_date.R index e124b446..fa2c7daf 100644 --- a/R/convert_to_date.R +++ b/R/convert_to_date.R @@ -1,11 +1,13 @@ -#' Convert many date and datetime formats as may be received from Microsoft -#' Excel -#' -#' @details Character conversion checks if it matches something that looks like -#' a Microsoft Excel numeric date, converts those to numeric, and then runs -#' convert_to_datetime_helper() on those numbers. Then, character to Date or -#' POSIXct conversion occurs via `character_fun(x, ...)` or -#' `character_fun(x, tz=tz, ...)`, respectively. +#' Parse dates from many formats +#' +#' Convert many date and date-time (POSIXct) formats as may be received +#' from Microsoft Excel. +#' @details +#' Character conversion checks if it matches something that looks like a +#' Microsoft Excel numeric date, converts those to numeric, and then runs +#' convert_to_datetime_helper() on those numbers. Then, character to Date or +#' POSIXct conversion occurs via `character_fun(x, ...)` or +#' `character_fun(x, tz=tz, ...)`, respectively. #' #' @param x The object to convert #' @param tz The timezone for POSIXct output, unless an object is POSIXt @@ -26,7 +28,7 @@ #' # Mixed date source data can be provided. #' convert_to_date(c("2020-02-29", "40000.1")) #' @export -#' @family Date-time cleaning +#' @family date-time cleaning #' @importFrom lubridate ymd convert_to_date <- function(x, ..., character_fun = lubridate::ymd, string_conversion_failure = c("error", "warning")) { string_conversion_failure <- match.arg(string_conversion_failure) @@ -38,7 +40,7 @@ convert_to_date <- function(x, ..., character_fun = lubridate::ymd, string_conve ) } -#' @describeIn convert_to_date Convert to a date-time (POSIXct) +#' @name convert_to_date #' @examples #' convert_to_datetime( #' c("2009-07-06", "40000.1", "40000", NA), diff --git a/R/excel_dates.R b/R/excel_dates.R index a570d5e6..c950c8fe 100644 --- a/R/excel_dates.R +++ b/R/excel_dates.R @@ -42,7 +42,7 @@ #' include_time = TRUE, #' round_seconds = FALSE #' ) # Time with fractional seconds is included -#' @family Date-time cleaning +#' @family date-time cleaning #' @importFrom lubridate as_date as_datetime force_tz hour minute second excel_numeric_to_date <- function(date_num, date_system = "modern", include_time = FALSE, round_seconds = TRUE, tz = Sys.timezone()) { if (all(is.na(date_num))) { diff --git a/R/sas_dates.R b/R/sas_dates.R index b0f4e01e..da4c6370 100644 --- a/R/sas_dates.R +++ b/R/sas_dates.R @@ -14,7 +14,7 @@ #' sas_numeric_to_date(datetime_num = 1217083532, tz = "UTC") # 1998-07-26T14:45:32Z #' sas_numeric_to_date(date_num = 15639, time_num = 3600, tz = "UTC") # 2002-10-26T01:00:00Z #' sas_numeric_to_date(time_num = 3600) # 01:00:00 -#' @family Date-time cleaning +#' @family date-time cleaning #' @export sas_numeric_to_date <- function(date_num, datetime_num, time_num, tz = "") { # Confirm that a usable set of input arguments is given diff --git a/R/statistical_tests.R b/R/statistical_tests.R index 239879d9..20efa27d 100644 --- a/R/statistical_tests.R +++ b/R/statistical_tests.R @@ -55,7 +55,8 @@ chisq.test.default <- function(x, y = NULL, ...) { #' @rdname chisq.test #' @method chisq.test tabyl -#' @param tabyl_results if TRUE and x is a tabyl object, also return `observed`, `expected`, `residuals` and `stdres` as tabyl +#' @param tabyl_results If `TRUE` and `x` is a tabyl object, +#' also return `observed`, `expected`, `residuals` and `stdres` as tabyl. #' @export chisq.test.tabyl <- function(x, tabyl_results = TRUE, ...) { diff --git a/R/top_levels.R b/R/top_levels.R index 4deae4de..a40a8e00 100644 --- a/R/top_levels.R +++ b/R/top_levels.R @@ -3,10 +3,10 @@ #' #' Get a frequency table of a factor variable, grouped into categories by level. #' -#' @param input_vec the factor variable to tabulate. -#' @param n number of levels to include in top and bottom groups -#' @param show_na should cases where the variable is NA be shown? -#' @return a data.frame (actually a `tbl_df`) with the frequencies of the +#' @param input_vec The factor variable to tabulate. +#' @param n Number of levels to include in top and bottom groups +#' @param show_na Should cases where the variable is NA be shown? +#' @return A `data.frame` (actually a `tbl_df`) with the frequencies of the #' grouped, tabulated variable. Includes counts and percentages, and valid #' percentages (calculated omitting `NA` values, if present in the vector and #' `show_na = TRUE`.) @@ -26,7 +26,10 @@ top_levels <- function(input_vec, n = 2, show_na = FALSE) { stop("input factor variable must have at least 3 levels") } if (num_levels_in_var < 2 * n) { - stop(paste0("there are ", num_levels_in_var, " levels in the variable and ", n, " levels in each of the top and bottom groups.\nSince 2 * ", n, " = ", 2 * n, " is greater than ", num_levels_in_var, ", there would be overlap in the top and bottom groups and some records will be double-counted.")) + stop(paste0("there are ", num_levels_in_var, " levels in the variable and ", + n, " levels in each of the top and bottom groups.\nSince 2 * ", n, " = ", 2 * n, + " is greater than ", num_levels_in_var, ", + there would be overlap in the top and bottom groups and some records will be double-counted.")) } if (n < 1 || n %% 1 != 0) { stop("n must be a whole number at least 1") diff --git a/man/adorn_ns.Rd b/man/adorn_ns.Rd index 4e7ba233..aeaa0b43 100644 --- a/man/adorn_ns.Rd +++ b/man/adorn_ns.Rd @@ -15,21 +15,32 @@ adorn_ns( ) } \arguments{ -\item{dat}{a data.frame of class \code{tabyl} that has had \code{adorn_percentages} and/or \code{adorn_pct_formatting} called on it. If given a list of data.frames, this function will apply itself to each data.frame in the list (designed for 3-way \code{tabyl} lists).} +\item{dat}{A data.frame of class \code{tabyl} that has had \code{adorn_percentages} and/or +\code{adorn_pct_formatting} called on it. If given a list of data.frames, +this function will apply itself to each data.frame in the list (designed for 3-way \code{tabyl} lists).} -\item{position}{should the N go in the front, or in the rear, of the percentage?} +\item{position}{Should the N go in the front, or in the rear, of the percentage?} -\item{ns}{the Ns to append. The default is the "core" attribute of the input tabyl \code{dat}, where the original Ns of a two-way \code{tabyl} are stored. However, if your Ns are stored somewhere else, or you need to customize them beyond what can be done with \code{format_func}, you can supply them here.} +\item{ns}{The Ns to append. The default is the "core" attribute of the input tabyl +\code{dat}, where the original Ns of a two-way \code{tabyl} are stored. However, if your Ns +are stored somewhere else, or you need to customize them beyond what can be done +with \code{format_func}, you can supply them here.} -\item{format_func}{a formatting function to run on the Ns. Consider defining with \code{\link[base:format]{base::format()}}.} +\item{format_func}{A formatting function to run on the Ns. Consider defining +with \code{\link[base:format]{base::format()}}.} -\item{...}{columns to adorn. This takes a tidyselect specification. By default, all columns are adorned except for the first column and columns not of class \code{numeric}, but this allows you to manually specify which columns should be adorned, for use on a data.frame that does not result from a call to \code{tabyl}.} +\item{...}{Columns to adorn. This takes a tidyselect specification. By default, +all columns are adorned except for the first column and columns not of class +\code{numeric}, but this allows you to manually specify which columns should be adorned, +for use on a data.frame that does not result from a call to \code{tabyl}.} } \value{ -a data.frame with Ns appended +A \code{data.frame} with Ns appended } \description{ -This function adds back the underlying Ns to a \code{tabyl} whose percentages were calculated using \code{adorn_percentages()}, to display the Ns and percentages together. You can also call it on a non-tabyl data.frame to which you wish to append Ns. +This function adds back the underlying Ns to a \code{tabyl} whose percentages were +calculated using \code{\link[=adorn_percentages]{adorn_percentages()}}, to display the Ns and percentages together. +You can also call it on a non-tabyl data.frame to which you wish to append Ns. } \examples{ mtcars \%>\% diff --git a/man/adorn_percentages.Rd b/man/adorn_percentages.Rd index 02677b3c..e04d5afc 100644 --- a/man/adorn_percentages.Rd +++ b/man/adorn_percentages.Rd @@ -7,19 +7,27 @@ adorn_percentages(dat, denominator = "row", na.rm = TRUE, ...) } \arguments{ -\item{dat}{a \code{tabyl} or other data.frame with a tabyl-like layout. If given a list of data.frames, this function will apply itself to each data.frame in the list (designed for 3-way \code{tabyl} lists).} +\item{dat}{A \code{tabyl} or other data.frame with a tabyl-like layout. +If given a list of data.frames, this function will apply itself to each +\code{data.frame} in the list (designed for 3-way \code{tabyl} lists).} -\item{denominator}{the direction to use for calculating percentages. One of "row", "col", or "all".} +\item{denominator}{The direction to use for calculating percentages. +One of "row", "col", or "all".} \item{na.rm}{should missing values (including NaN) be omitted from the calculations?} -\item{...}{columns to adorn. This takes a tidyselect specification. By default, all numeric columns (besides the initial column, if numeric) are adorned, but this allows you to manually specify which columns should be adorned, for use on a data.frame that does not result from a call to \code{tabyl}.} +\item{...}{columns to adorn. This takes a <\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> +specification. By default, all numeric columns (besides the initial column, if numeric) +are adorned, but this allows you to manually specify which columns should +be adorned, for use on a \code{data.frame} that does not result from a call to \code{\link[=tabyl]{tabyl()}}.} } \value{ -Returns a data.frame of percentages, expressed as numeric values between 0 and 1. +A \code{data.frame} of percentages, expressed as numeric values between 0 and 1. } \description{ -This function defaults to excluding the first column of the input data.frame, assuming that it contains a descriptive variable, but this can be overridden by specifying the columns to adorn in the \code{...} argument. +This function defaults to excluding the first column of the input data.frame, +assuming that it contains a descriptive variable, but this can be overridden +by specifying the columns to adorn in the \code{...} argument. } \examples{ diff --git a/man/adorn_rounding.Rd b/man/adorn_rounding.Rd index 5da5df7b..409ce000 100644 --- a/man/adorn_rounding.Rd +++ b/man/adorn_rounding.Rd @@ -7,21 +7,34 @@ adorn_rounding(dat, digits = 1, rounding = "half to even", ...) } \arguments{ -\item{dat}{a \code{tabyl} or other data.frame with similar layout. If given a list of data.frames, this function will apply itself to each data.frame in the list (designed for 3-way \code{tabyl} lists).} +\item{dat}{A \code{tabyl} or other \code{data.frame} with similar layout. +If given a list of data.frames, this function will apply itself to each +\code{data.frame} in the list (designed for 3-way \code{tabyl} lists).} -\item{digits}{how many digits should be displayed after the decimal point?} +\item{digits}{How many digits should be displayed after the decimal point?} -\item{rounding}{method to use for rounding - either "half to even", the base R default method, or "half up", where 14.5 rounds up to 15.} +\item{rounding}{Method to use for rounding - either "half to even" +(the base R default method), or "half up", where 14.5 rounds up to 15.} -\item{...}{columns to adorn. This takes a tidyselect specification. By default, all numeric columns (besides the initial column, if numeric) are adorned, but this allows you to manually specify which columns should be adorned, for use on a data.frame that does not result from a call to \code{tabyl}.} +\item{...}{Columns to adorn. This takes a tidyselect specification. +By default, all numeric columns (besides the initial column, if numeric) +are adorned, but this allows you to manually specify which columns should +be adorned, for use on a data.frame that does not result from a call to \code{tabyl}.} } \value{ -Returns the data.frame with rounded numeric columns. +The \code{data.frame} with rounded numeric columns. } \description{ -Can run on any data.frame with at least one numeric column. This function defaults to excluding the first column of the input data.frame, assuming that it contains a descriptive variable, but this can be overridden by specifying the columns to round in the \code{...} argument. +Can run on any \code{data.frame} with at least one numeric column. +This function defaults to excluding the first column of the input data.frame, +assuming that it contains a descriptive variable, but this can be overridden by +specifying the columns to round in the \code{...} argument. -If you're formatting percentages, e.g., the result of \code{adorn_percentages()}, use \code{adorn_pct_formatting()} instead. This is a more flexible variant for ad-hoc usage. Compared to \code{adorn_pct_formatting()}, it does not multiply by 100 or pad the numbers with spaces for alignment in the results data.frame. This function retains the class of numeric input columns. +If you're formatting percentages, e.g., the result of \code{\link[=adorn_percentages]{adorn_percentages()}}, +use \code{\link[=adorn_pct_formatting]{adorn_pct_formatting()}} instead. This is a more flexible variant for ad-hoc usage. +Compared to \code{adorn_pct_formatting()}, it does not multiply by 100 or pad the +numbers with spaces for alignment in the results \code{data.frame}. +This function retains the class of numeric input columns. } \examples{ diff --git a/man/adorn_title.Rd b/man/adorn_title.Rd index 81c17b14..0bdd54f3 100644 --- a/man/adorn_title.Rd +++ b/man/adorn_title.Rd @@ -7,19 +7,38 @@ adorn_title(dat, placement = "top", row_name, col_name) } \arguments{ -\item{dat}{a data.frame of class \code{tabyl} or other data.frame with a tabyl-like layout. If given a list of data.frames, this function will apply itself to each data.frame in the list (designed for 3-way \code{tabyl} lists).} +\item{dat}{A \code{data.frame} of class \code{tabyl} or other \code{data.frame} with a tabyl-like layout. +If given a list of data.frames, this function will apply itself to each \code{data.frame} +in the list (designed for 3-way \code{tabyl} lists).} -\item{placement}{whether the column name should be added to the top of the tabyl in an otherwise-empty row \code{"top"} or appended to the already-present row name variable (\code{"combined"}). The formatting in the \code{"top"} option has the look of base R's \code{table()}; it also wipes out the other column names, making it hard to further use the data.frame besides formatting it for reporting. The \code{"combined"} option is more conservative in this regard.} +\item{placement}{The title placement, one of \code{"top"}, or \code{"combined"}. +See \strong{Details} for more information.} -\item{row_name}{(optional) default behavior is to pull the row name from the attributes of the input \code{tabyl} object. If you wish to override that text, or if your input is not a \code{tabyl}, supply a string here.} +\item{row_name}{(optional) default behavior is to pull the row name from the +attributes of the input \code{tabyl} object. If you wish to override that text, +or if your input is not a \code{tabyl}, supply a string here.} -\item{col_name}{(optional) default behavior is to pull the column_name from the attributes of the input \code{tabyl} object. If you wish to override that text, or if your input is not a \code{tabyl}, supply a string here.} +\item{col_name}{(optional) default behavior is to pull the column_name from +the attributes of the input \code{tabyl} object. If you wish to override that text, +or if your input is not a \code{tabyl}, supply a string here.} } \value{ -the input tabyl, augmented with the column title. Non-tabyl inputs that are of class \code{tbl_df} are downgraded to basic data.frames so that the title row prints correctly. +The input \code{tabyl}, augmented with the column title. Non-tabyl inputs +that are of class \code{tbl_df} are downgraded to basic data.frames so that the +title row prints correctly. } \description{ -This function adds the column variable name to the top of a \code{tabyl} for a complete display of information. This makes the tabyl prettier, but renders the data.frame less useful for further manipulation. +This function adds the column variable name to the top of a \code{tabyl} for a +complete display of information. This makes the tabyl prettier, but renders +the \code{data.frame} less useful for further manipulation. +} +\details{ +The \code{placement} argument indicates whether the column name should be added to +the \code{top} of the tabyl in an otherwise-empty row \code{"top"} or appended to the +already-present row name variable (\code{"combined"}). The formatting in the \code{"top"} +option has the look of base R's \code{table()}; it also wipes out the other column +names, making it hard to further use the \code{data.frame} besides formatting it for reporting. +The \code{"combined"} option is more conservative in this regard. } \examples{ diff --git a/man/adorn_totals.Rd b/man/adorn_totals.Rd index ce16ccfd..2f6a55ef 100644 --- a/man/adorn_totals.Rd +++ b/man/adorn_totals.Rd @@ -2,28 +2,45 @@ % Please edit documentation in R/adorn_totals.R \name{adorn_totals} \alias{adorn_totals} -\title{Append a totals row and/or column to a data.frame.} +\title{Append a totals row and/or column to a data.frame} \usage{ adorn_totals(dat, where = "row", fill = "-", na.rm = TRUE, name = "Total", ...) } \arguments{ -\item{dat}{an input data.frame with at least one numeric column. If given a list of data.frames, this function will apply itself to each data.frame in the list (designed for 3-way \code{tabyl} lists).} +\item{dat}{An input \code{data.frame} with at least one numeric column. If given a +list of data.frames, this function will apply itself to each \code{data.frame} +in the list (designed for 3-way \code{tabyl} lists).} -\item{where}{one of "row", "col", or \code{c("row", "col")}} +\item{where}{One of "row", "col", or \code{c("row", "col")}} -\item{fill}{if there are non-numeric columns, what should fill the bottom row of those columns? If a string, relevant columns will be coerced to character. If \code{NA} then column types are preserved.} +\item{fill}{If there are non-numeric columns, what should fill the bottom row +of those columns? If a string, relevant columns will be coerced to character. +If \code{NA} then column types are preserved.} -\item{na.rm}{should missing values (including NaN) be omitted from the calculations?} +\item{na.rm}{Should missing values (including \code{NaN}) be omitted from the calculations?} -\item{name}{name of the totals row and/or column. If both are created, and \code{name} is a single string, that name is applied to both. If both are created and \code{name} is a vector of length 2, the first element of the vector will be used as the row name (in column 1), and the second element will be used as the totals column name. Defaults to "Total".} +\item{name}{Name of the totals row and/or column. If both are created, and +\code{name} is a single string, that name is applied to both. If both are created +and \code{name} is a vector of length 2, the first element of the vector will be +used as the row name (in column 1), and the second element will be used as the +totals column name. Defaults to "Total".} -\item{...}{columns to total. This takes a tidyselect specification. By default, all numeric columns (besides the initial column, if numeric) are included in the totals, but this allows you to manually specify which columns should be included, for use on a data.frame that does not result from a call to \code{tabyl}.} +\item{...}{Columns to total. This takes a tidyselect specification. By default, +all numeric columns (besides the initial column, if numeric) are included in +the totals, but this allows you to manually specify which columns should be +included, for use on a data.frame that does not result from a call to \code{tabyl}.} } \value{ -a data.frame augmented with a totals row, column, or both. The data.frame is now also of class \code{tabyl} and stores information about the attached totals and underlying data in the tabyl attributes. +A \code{data.frame} augmented with a totals row, column, or both. +The \code{data.frame} is now also of class \code{tabyl} and stores information about +the attached totals and underlying data in the tabyl attributes. } \description{ -This function defaults to excluding the first column of the input data.frame, assuming that it contains a descriptive variable, but this can be overridden by specifying the columns to be totaled in the \code{...} argument. Non-numeric columns are converted to character class and have a user-specified fill character inserted in the totals row. +This function defaults to excluding the first column of the input data.frame, +assuming that it contains a descriptive variable, but this can be overridden +by specifying the columns to be totaled in the \code{...} argument. Non-numeric +columns are converted to character class and have a user-specified fill character +inserted in the totals row. } \examples{ mtcars \%>\% diff --git a/man/as_tabyl.Rd b/man/as_tabyl.Rd index fc5480c0..635898d1 100644 --- a/man/as_tabyl.Rd +++ b/man/as_tabyl.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/as_and_untabyl.R \name{as_tabyl} \alias{as_tabyl} -\title{Add \code{tabyl} attributes to a data.frame.} +\title{Add \code{tabyl} attributes to a data.frame} \usage{ as_tabyl(dat, axes = 2, row_var_name = NULL, col_var_name = NULL) } @@ -25,7 +25,7 @@ Returns the same data.frame, but with the additional class of "tabyl" and the attribute "core". } \description{ -A \code{tabyl} is a data.frame containing counts of a variable or +A \code{tabyl} is a \code{data.frame} containing counts of a variable or co-occurrences of two variables (a.k.a., a contingency table or crosstab). This specialized kind of data.frame has attributes that enable \code{adorn_} functions to be called for precise formatting and presentation of results. @@ -39,13 +39,14 @@ meets the requirements of a two-way tabyl: 1) First column contains values of variable 1 2) Column names 2:n are the values of variable 2 3) Numeric values in columns 2:n are counts of the co-occurrences of the two variables.* \itemize{ -\item = this is the ideal form of a tabyl, but janitor's \code{adorn_} functions tolerate and ignore non-numeric columns in positions 2:n. +\item = this is the ideal form of a \code{tabyl}, but janitor's \code{adorn_} functions tolerate +and ignore non-numeric columns in positions 2:n. } -For instance, the result of \code{\link[dplyr:count]{dplyr::count()}} followed by \code{\link[tidyr:spread]{tidyr::spread()}} +For instance, the result of \code{\link[dplyr:count]{dplyr::count()}} followed by \code{\link[tidyr:pivot_wider]{tidyr::pivot_wider()}} can be treated as a \code{tabyl}. -The result of calling \code{tabyl()} on a single variable is a special class of +The result of calling \code{\link[=tabyl]{tabyl()}} on a single variable is a special class of one-way tabyl; this function only pertains to the two-way tabyl. } \examples{ diff --git a/man/chisq.test.Rd b/man/chisq.test.Rd index baac01a5..675d948b 100644 --- a/man/chisq.test.Rd +++ b/man/chisq.test.Rd @@ -19,7 +19,8 @@ chisq.test(x, ...) \item{y}{if x is a vector, must be another vector or factor of the same length} -\item{tabyl_results}{if TRUE and x is a tabyl object, also return \code{observed}, \code{expected}, \code{residuals} and \code{stdres} as tabyl} +\item{tabyl_results}{If \code{TRUE} and \code{x} is a tabyl object, +also return \code{observed}, \code{expected}, \code{residuals} and \code{stdres} as tabyl.} } \value{ The result is the same as the one of \code{stats::chisq.test()}. diff --git a/man/compare_df_cols.Rd b/man/compare_df_cols.Rd index 231a4859..05333348 100644 --- a/man/compare_df_cols.Rd +++ b/man/compare_df_cols.Rd @@ -2,8 +2,7 @@ % Please edit documentation in R/compare_df_cols.R \name{compare_df_cols} \alias{compare_df_cols} -\title{Generate a comparison of data.frames (or similar objects) that indicates if -they will successfully bind together by rows.} +\title{Compare data frames columns before merging} \usage{ compare_df_cols( ..., @@ -64,8 +63,8 @@ compare_df_cols(dfA = data.frame(A = 1), dfB = data.frame(B = 2)) compare_df_cols(listA = list(dfA = data.frame(A = 1), dfB = data.frame(B = 2)), data.frame(A = 3)) } \seealso{ -Other Data frame type comparison: +Other data frame type comparison: \code{\link{compare_df_cols_same}()}, \code{\link{describe_class}()} } -\concept{Data frame type comparison} +\concept{data frame type comparison} diff --git a/man/compare_df_cols_same.Rd b/man/compare_df_cols_same.Rd index 5bbb9d0b..157bbcb2 100644 --- a/man/compare_df_cols_same.Rd +++ b/man/compare_df_cols_same.Rd @@ -25,12 +25,11 @@ missing from a data.frame would be considered a mismatch (as in \item{verbose}{Print the mismatching columns if binding will fail.} } \value{ -\code{TRUE} if row binding will succeed or \code{FALSE} if it will -fail. +\code{TRUE} if row binding will succeed or \code{FALSE} if it will fail. } \description{ -Check whether a set of data.frames are row-bindable. Calls -\code{compare_df_cols()}and returns TRUE if there are no mis-matching rows. ` +Check whether a set of data.frames are row-bindable. Calls \code{compare_df_cols()} +and returns \code{TRUE} if there are no mis-matching rows. } \examples{ compare_df_cols_same(data.frame(A = 1), data.frame(A = 2)) @@ -39,8 +38,8 @@ compare_df_cols_same(data.frame(A = 1), data.frame(B = 2), verbose = FALSE) compare_df_cols_same(data.frame(A = 1), data.frame(B = 2), bind_method = "rbind") } \seealso{ -Other Data frame type comparison: +Other data frame type comparison: \code{\link{compare_df_cols}()}, \code{\link{describe_class}()} } -\concept{Data frame type comparison} +\concept{data frame type comparison} diff --git a/man/convert_to_date.Rd b/man/convert_to_date.Rd index 4765d08a..61b4c64f 100644 --- a/man/convert_to_date.Rd +++ b/man/convert_to_date.Rd @@ -3,8 +3,7 @@ \name{convert_to_date} \alias{convert_to_date} \alias{convert_to_datetime} -\title{Convert many date and datetime formats as may be received from Microsoft -Excel} +\title{Parse dates from many formats} \usage{ convert_to_date( x, @@ -42,21 +41,16 @@ POSIXct objects for \code{convert_to_datetime()} or Date objects for \code{convert_to_date()}. } \description{ -Convert many date and datetime formats as may be received from Microsoft -Excel +Convert many date and date-time (POSIXct) formats as may be received +from Microsoft Excel. } \details{ -Character conversion checks if it matches something that looks like -a Microsoft Excel numeric date, converts those to numeric, and then runs +Character conversion checks if it matches something that looks like a +Microsoft Excel numeric date, converts those to numeric, and then runs convert_to_datetime_helper() on those numbers. Then, character to Date or POSIXct conversion occurs via \code{character_fun(x, ...)} or \code{character_fun(x, tz=tz, ...)}, respectively. } -\section{Functions}{ -\itemize{ -\item \code{convert_to_datetime()}: Convert to a date-time (POSIXct) - -}} \examples{ convert_to_date("2009-07-06") convert_to_date(40000) @@ -69,8 +63,8 @@ convert_to_datetime( ) } \seealso{ -Other Date-time cleaning: +Other date-time cleaning: \code{\link{excel_numeric_to_date}()}, \code{\link{sas_numeric_to_date}()} } -\concept{Date-time cleaning} +\concept{date-time cleaning} diff --git a/man/describe_class.Rd b/man/describe_class.Rd index 2717e693..1c895342 100644 --- a/man/describe_class.Rd +++ b/man/describe_class.Rd @@ -32,7 +32,7 @@ Describe the class(es) of an object \details{ For package developers, an S3 generic method can be written for \code{describe_class()} for custom classes that may need more definition -than the default method. This function is called by \code{compare_df_cols}. +than the default method. This function is called by \code{\link[=compare_df_cols]{compare_df_cols()}}. } \section{Methods (by class)}{ \itemize{ @@ -49,8 +49,8 @@ describe_class(ordered(c("A", "B"))) describe_class(ordered(c("A", "B")), strict_description = FALSE) } \seealso{ -Other Data frame type comparison: +Other data frame type comparison: \code{\link{compare_df_cols_same}()}, \code{\link{compare_df_cols}()} } -\concept{Data frame type comparison} +\concept{data frame type comparison} diff --git a/man/excel_numeric_to_date.Rd b/man/excel_numeric_to_date.Rd index 22b49409..454ac00e 100644 --- a/man/excel_numeric_to_date.Rd +++ b/man/excel_numeric_to_date.Rd @@ -64,8 +64,8 @@ excel_numeric_to_date(40000.521, ) # Time with fractional seconds is included } \seealso{ -Other Date-time cleaning: +Other date-time cleaning: \code{\link{convert_to_date}()}, \code{\link{sas_numeric_to_date}()} } -\concept{Date-time cleaning} +\concept{date-time cleaning} diff --git a/man/sas_numeric_to_date.Rd b/man/sas_numeric_to_date.Rd index 3dffc7af..57d18ce6 100644 --- a/man/sas_numeric_to_date.Rd +++ b/man/sas_numeric_to_date.Rd @@ -36,8 +36,8 @@ SAS Date, Time, and Datetime Values reference (retrieved on 2022-03-08): https://v8doc.sas.com/sashtml/lrcon/zenid-63.htm } \seealso{ -Other Date-time cleaning: +Other date-time cleaning: \code{\link{convert_to_date}()}, \code{\link{excel_numeric_to_date}()} } -\concept{Date-time cleaning} +\concept{date-time cleaning} diff --git a/man/top_levels.Rd b/man/top_levels.Rd index 0484d7b2..80d9571b 100644 --- a/man/top_levels.Rd +++ b/man/top_levels.Rd @@ -8,14 +8,14 @@ other levels.} top_levels(input_vec, n = 2, show_na = FALSE) } \arguments{ -\item{input_vec}{the factor variable to tabulate.} +\item{input_vec}{The factor variable to tabulate.} -\item{n}{number of levels to include in top and bottom groups} +\item{n}{Number of levels to include in top and bottom groups} -\item{show_na}{should cases where the variable is NA be shown?} +\item{show_na}{Should cases where the variable is NA be shown?} } \value{ -a data.frame (actually a \code{tbl_df}) with the frequencies of the +A \code{data.frame} (actually a \code{tbl_df}) with the frequencies of the grouped, tabulated variable. Includes counts and percentages, and valid percentages (calculated omitting \code{NA} values, if present in the vector and \code{show_na = TRUE}.) From a113c8ba0bc7ba2dabc95d33973f2e05c41f1fd3 Mon Sep 17 00:00:00 2001 From: olivroy Date: Sun, 20 Aug 2023 10:45:35 -0400 Subject: [PATCH 09/17] Revert unintended change + style --- R/adorn_ns.R | 6 +++--- R/adorn_percentages.R | 4 ++-- R/adorn_title.R | 36 ++++++++++++++++++++---------------- R/adorn_totals.R | 24 ++++++++++++++---------- R/compare_df_cols.R | 4 ++-- R/convert_to_date.R | 8 ++++---- R/statistical_tests.R | 2 +- R/top_levels.R | 10 ++++++---- tests/testthat/test-tabyl.R | 4 ++++ 9 files changed, 56 insertions(+), 42 deletions(-) diff --git a/R/adorn_ns.R b/R/adorn_ns.R index 3dbc7284..3cb01bcc 100644 --- a/R/adorn_ns.R +++ b/R/adorn_ns.R @@ -5,18 +5,18 @@ #' You can also call it on a non-tabyl data.frame to which you wish to append Ns. #' #' @param dat A data.frame of class `tabyl` that has had `adorn_percentages` and/or -#' `adorn_pct_formatting` called on it. If given a list of data.frames, +#' `adorn_pct_formatting` called on it. If given a list of data.frames, #' this function will apply itself to each data.frame in the list (designed for 3-way `tabyl` lists). #' @param position Should the N go in the front, or in the rear, of the percentage? #' @param ns The Ns to append. The default is the "core" attribute of the input tabyl #' `dat`, where the original Ns of a two-way `tabyl` are stored. However, if your Ns -#' are stored somewhere else, or you need to customize them beyond what can be done +#' are stored somewhere else, or you need to customize them beyond what can be done #' with `format_func`, you can supply them here. #' @param format_func A formatting function to run on the Ns. Consider defining #' with [base::format()]. #' @param ... Columns to adorn. This takes a tidyselect specification. By default, #' all columns are adorned except for the first column and columns not of class -#' `numeric`, but this allows you to manually specify which columns should be adorned, +#' `numeric`, but this allows you to manually specify which columns should be adorned, #' for use on a data.frame that does not result from a call to `tabyl`. #' #' @return A `data.frame` with Ns appended diff --git a/R/adorn_percentages.R b/R/adorn_percentages.R index 3151d5e1..5f88607e 100644 --- a/R/adorn_percentages.R +++ b/R/adorn_percentages.R @@ -1,10 +1,10 @@ #' Convert a data.frame of counts to percentages. #' #' This function defaults to excluding the first column of the input data.frame, -#' assuming that it contains a descriptive variable, but this can be overridden +#' assuming that it contains a descriptive variable, but this can be overridden #' by specifying the columns to adorn in the `...` argument. #' -#' @param dat A `tabyl` or other data.frame with a tabyl-like layout. +#' @param dat A `tabyl` or other data.frame with a tabyl-like layout. #' If given a list of data.frames, this function will apply itself to each #' `data.frame` in the list (designed for 3-way `tabyl` lists). #' @param denominator The direction to use for calculating percentages. diff --git a/R/adorn_title.R b/R/adorn_title.R index bac28c3e..5bf3cdbe 100644 --- a/R/adorn_title.R +++ b/R/adorn_title.R @@ -1,28 +1,28 @@ #' Add column name to the top of a two-way tabyl. #' #' This function adds the column variable name to the top of a `tabyl` for a -#' complete display of information. This makes the tabyl prettier, but renders +#' complete display of information. This makes the tabyl prettier, but renders #' the `data.frame` less useful for further manipulation. -#' +#' #' The `placement` argument indicates whether the column name should be added to -#' the `top` of the tabyl in an otherwise-empty row `"top"` or appended to the +#' the `top` of the tabyl in an otherwise-empty row `"top"` or appended to the #' already-present row name variable (`"combined"`). The formatting in the `"top"` -#' option has the look of base R's `table()`; it also wipes out the other column -#' names, making it hard to further use the `data.frame` besides formatting it for reporting. +#' option has the look of base R's `table()`; it also wipes out the other column +#' names, making it hard to further use the `data.frame` besides formatting it for reporting. #' The `"combined"` option is more conservative in this regard. -#' +#' #' @param dat A `data.frame` of class `tabyl` or other `data.frame` with a tabyl-like layout. #' If given a list of data.frames, this function will apply itself to each `data.frame` #' in the list (designed for 3-way `tabyl` lists). #' @param placement The title placement, one of `"top"`, or `"combined"`. #' See **Details** for more information. #' @param row_name (optional) default behavior is to pull the row name from the -#' attributes of the input `tabyl` object. If you wish to override that text, +#' attributes of the input `tabyl` object. If you wish to override that text, #' or if your input is not a `tabyl`, supply a string here. -#' @param col_name (optional) default behavior is to pull the column_name from +#' @param col_name (optional) default behavior is to pull the column_name from #' the attributes of the input `tabyl` object. If you wish to override that text, #' or if your input is not a `tabyl`, supply a string here. -#' @return The input `tabyl`, augmented with the column title. Non-tabyl inputs +#' @return The input `tabyl`, augmented with the column title. Non-tabyl inputs #' that are of class `tbl_df` are downgraded to basic data.frames so that the #' title row prints correctly. #' @@ -55,14 +55,18 @@ adorn_title <- function(dat, placement = "top", row_name, col_name) { } if ("tabyl" %in% class(dat)) { if (attr(dat, "tabyl_type") == "one_way") { - warning(c("adorn_title is meant for two-way tabyls, ", - "calling it on a one-way tabyl may not yield a meaningful result")) + warning(c( + "adorn_title is meant for two-way tabyls, ", + "calling it on a one-way tabyl may not yield a meaningful result" + )) } } if (missing(col_name)) { if (!"tabyl" %in% class(dat)) { - stop(c("When input is not a data.frame of class tabyl, ", - "a value must be specified for the col_name argument")) + stop(c( + "When input is not a data.frame of class tabyl, ", + "a value must be specified for the col_name argument" + )) } col_var <- attr(dat, "var_names")$col } else { @@ -83,14 +87,14 @@ adorn_title <- function(dat, placement = "top", row_name, col_name) { row_var <- attr(dat, "var_names")$row } else { # for non-tabyl input, if no row_name supplied, use first existing name - row_var <- names(dat)[1] + row_var <- names(dat)[1] } } if (placement == "top") { # to handle factors, problematic in first column and at bind_rows. - dat[, ] <- lapply(dat[, ], as.character) + dat[, ] <- lapply(dat[, ], as.character) # Can't use mutate_all b/c it strips attributes top <- dat[1, ] @@ -106,7 +110,7 @@ adorn_title <- function(dat, placement = "top", row_name, col_name) { # "top" text doesn't print if input (and thus the output) is a tibble if ("tbl_df" %in% class(out)) { # but this prints row numbers, so don't apply to non-tbl_dfs like tabyls - out <- as.data.frame(out) + out <- as.data.frame(out) } out } diff --git a/R/adorn_totals.R b/R/adorn_totals.R index 8055ae91..8d86184a 100644 --- a/R/adorn_totals.R +++ b/R/adorn_totals.R @@ -1,6 +1,6 @@ #' Append a totals row and/or column to a data.frame #' -#' This function defaults to excluding the first column of the input data.frame, +#' This function defaults to excluding the first column of the input data.frame, #' assuming that it contains a descriptive variable, but this can be overridden #' by specifying the columns to be totaled in the `...` argument. Non-numeric #' columns are converted to character class and have a user-specified fill character @@ -14,8 +14,8 @@ #' of those columns? If a string, relevant columns will be coerced to character. #' If `NA` then column types are preserved. #' @param na.rm Should missing values (including `NaN`) be omitted from the calculations? -#' @param name Name of the totals row and/or column. If both are created, and -#' `name` is a single string, that name is applied to both. If both are created +#' @param name Name of the totals row and/or column. If both are created, and +#' `name` is a single string, that name is applied to both. If both are created #' and `name` is a vector of length 2, the first element of the vector will be #' used as the row name (in column 1), and the second element will be used as the #' totals column name. Defaults to "Total". @@ -23,7 +23,7 @@ #' all numeric columns (besides the initial column, if numeric) are included in #' the totals, but this allows you to manually specify which columns should be #' included, for use on a data.frame that does not result from a call to `tabyl`. -#' @return A `data.frame` augmented with a totals row, column, or both. +#' @return A `data.frame` augmented with a totals row, column, or both. #' The `data.frame` is now also of class `tabyl` and stores information about #' the attached totals and underlying data in the tabyl attributes. #' @export @@ -60,9 +60,11 @@ adorn_totals <- function(dat, where = "row", fill = "-", na.rm = TRUE, name = "T } if (length(cols_to_total) == 0) { - stop("at least one targeted column must be of class numeric. ", - "Control target variables with the ... argument. ", - "adorn_totals should be called before other adorn_ functions.") + stop( + "at least one targeted column must be of class numeric. ", + "Control target variables with the ... argument. ", + "adorn_totals should be called before other adorn_ functions." + ) } if (sum(where %in% c("row", "col")) != length(where)) { @@ -97,7 +99,7 @@ adorn_totals <- function(dat, where = "row", fill = "-", na.rm = TRUE, name = "T } # creates the totals row to be appended col_sum <- function(a_col, na_rm = na.rm) { - # can't do this with if_else because it doesn't like the sum() of a character vector, + # can't do this with if_else because it doesn't like the sum() of a character vector, # even if that clause is not reached if (is.numeric(a_col)) { sum(a_col, na.rm = na_rm) @@ -151,8 +153,10 @@ adorn_totals <- function(dat, where = "row", fill = "-", na.rm = TRUE, name = "T if (!1 %in% cols_to_total) { # give users the option to total the first column?? Up to them I guess col_totals[1, 1] <- name[1] # replace first column value with name argument } else { - message("Because the first column was specified to be totaled, ", - "it does not contain the label 'Total' (or user-specified name) in the totals row") + message( + "Because the first column was specified to be totaled, ", + "it does not contain the label 'Total' (or user-specified name) in the totals row" + ) } dat[(nrow(dat) + 1), ] <- col_totals[1, ] # insert totals_col as last row in dat if (factor_input) { # restore factor/ordered info, #494 diff --git a/R/compare_df_cols.R b/R/compare_df_cols.R index 4d1dab06..c478b91c 100644 --- a/R/compare_df_cols.R +++ b/R/compare_df_cols.R @@ -1,5 +1,5 @@ #' Compare data frames columns before merging -#' +#' #' Generate a comparison of data.frames (or similar objects) that indicates if #' they will successfully bind together by rows. #' @@ -219,7 +219,7 @@ compare_df_cols_df_maker.list <- function(x, class_colname = "class", strict_des #' #' Check whether a set of data.frames are row-bindable. Calls `compare_df_cols()` #' and returns `TRUE` if there are no mis-matching rows. -#' +#' #' @inheritParams compare_df_cols #' @param verbose Print the mismatching columns if binding will fail. #' @return `TRUE` if row binding will succeed or `FALSE` if it will fail. diff --git a/R/convert_to_date.R b/R/convert_to_date.R index fa2c7daf..b96b3f9e 100644 --- a/R/convert_to_date.R +++ b/R/convert_to_date.R @@ -1,6 +1,6 @@ -#' Parse dates from many formats -#' -#' Convert many date and date-time (POSIXct) formats as may be received +#' Parse dates from many formats +#' +#' Convert many date and date-time (POSIXct) formats as may be received #' from Microsoft Excel. #' @details #' Character conversion checks if it matches something that looks like a @@ -40,7 +40,7 @@ convert_to_date <- function(x, ..., character_fun = lubridate::ymd, string_conve ) } -#' @name convert_to_date +#' @name convert_to_date #' @examples #' convert_to_datetime( #' c("2009-07-06", "40000.1", "40000", NA), diff --git a/R/statistical_tests.R b/R/statistical_tests.R index 20efa27d..b0fe5b51 100644 --- a/R/statistical_tests.R +++ b/R/statistical_tests.R @@ -55,7 +55,7 @@ chisq.test.default <- function(x, y = NULL, ...) { #' @rdname chisq.test #' @method chisq.test tabyl -#' @param tabyl_results If `TRUE` and `x` is a tabyl object, +#' @param tabyl_results If `TRUE` and `x` is a tabyl object, #' also return `observed`, `expected`, `residuals` and `stdres` as tabyl. #' @export diff --git a/R/top_levels.R b/R/top_levels.R index a40a8e00..67e98d71 100644 --- a/R/top_levels.R +++ b/R/top_levels.R @@ -26,10 +26,12 @@ top_levels <- function(input_vec, n = 2, show_na = FALSE) { stop("input factor variable must have at least 3 levels") } if (num_levels_in_var < 2 * n) { - stop(paste0("there are ", num_levels_in_var, " levels in the variable and ", - n, " levels in each of the top and bottom groups.\nSince 2 * ", n, " = ", 2 * n, - " is greater than ", num_levels_in_var, ", - there would be overlap in the top and bottom groups and some records will be double-counted.")) + stop(paste0( + "there are ", num_levels_in_var, " levels in the variable and ", + n, " levels in each of the top and bottom groups.\nSince 2 * ", n, " = ", 2 * n, + " is greater than ", num_levels_in_var, ", + there would be overlap in the top and bottom groups and some records will be double-counted." + )) } if (n < 1 || n %% 1 != 0) { stop("n must be a whole number at least 1") diff --git a/tests/testthat/test-tabyl.R b/tests/testthat/test-tabyl.R index aa53ada3..bc46fc25 100644 --- a/tests/testthat/test-tabyl.R +++ b/tests/testthat/test-tabyl.R @@ -309,6 +309,10 @@ test_that("NA levels get moved to the last column in the data.frame, are suppres y_with_missing[["NA_"]] %>% untabyl(), # column c remains numeric data.frame(c = 10, `1` = 1, `2` = 0, NA_ = 1, check.names = FALSE) ) + # If no NA in 3rd variable, it doesn't appear in split list + expect_equal(length(dplyr::starwars %>% + dplyr::filter(species == "Human") %>% + tabyl(eye_color, skin_color, gender, show_missing_levels = TRUE)), 2) # If there is NA, it does appear in split list expect_equal(length(dplyr::starwars %>% From 44d25b4919f52ac0b4e073be18ac51a8c52fa47b Mon Sep 17 00:00:00 2001 From: olivroy <52606734+olivroy@users.noreply.github.com> Date: Thu, 7 Dec 2023 08:54:07 -0500 Subject: [PATCH 10/17] Update excel_time_to_numeric.R --- R/excel_time_to_numeric.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/excel_time_to_numeric.R b/R/excel_time_to_numeric.R index d6366bde..a25ce6d9 100644 --- a/R/excel_time_to_numeric.R +++ b/R/excel_time_to_numeric.R @@ -19,7 +19,7 @@ #' @param round_seconds Should the output number of seconds be rounded to an #' integer? #' @return A vector of numbers >= 0 and <86400 -#' @family Date-time cleaning +#' @family date-time cleaning #' @seealso `\link{excel_numeric_to_date}` #' @export excel_time_to_numeric <- function(time_value, round_seconds = TRUE) { From ab58dc76d3059dff4f7e2183c193d1963f8efe8a Mon Sep 17 00:00:00 2001 From: olivroy Date: Thu, 7 Dec 2023 09:22:23 -0500 Subject: [PATCH 11/17] Review links. --- R/excel_dates.R | 2 +- R/excel_time_to_numeric.R | 2 +- man/excel_numeric_to_date.Rd | 2 +- man/excel_time_to_numeric.Rd | 6 +++--- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/excel_dates.R b/R/excel_dates.R index 1b8266a5..7dd17492 100644 --- a/R/excel_dates.R +++ b/R/excel_dates.R @@ -33,7 +33,7 @@ #' https://support.microsoft.com/en-us/help/2722715/support-for-the-leap-second). #' #' @export -#' @seealso \code{\link{excel_time_to_numeric}} +#' @seealso [excel_time_to_numeric()] #' @examples #' excel_numeric_to_date(40000) #' excel_numeric_to_date(40000.5) # No time is included diff --git a/R/excel_time_to_numeric.R b/R/excel_time_to_numeric.R index a25ce6d9..8517a31c 100644 --- a/R/excel_time_to_numeric.R +++ b/R/excel_time_to_numeric.R @@ -20,7 +20,7 @@ #' integer? #' @return A vector of numbers >= 0 and <86400 #' @family date-time cleaning -#' @seealso `\link{excel_numeric_to_date}` +#' @seealso [excel_numeric_to_date()] #' @export excel_time_to_numeric <- function(time_value, round_seconds = TRUE) { UseMethod("excel_time_to_numeric") diff --git a/man/excel_numeric_to_date.Rd b/man/excel_numeric_to_date.Rd index 99445f4e..23fc1ba1 100644 --- a/man/excel_numeric_to_date.Rd +++ b/man/excel_numeric_to_date.Rd @@ -64,7 +64,7 @@ excel_numeric_to_date(40000.521, ) # Time with fractional seconds is included } \seealso{ -\code{\link{excel_time_to_numeric}} +\code{\link[=excel_time_to_numeric]{excel_time_to_numeric()}} Other date-time cleaning: \code{\link{convert_to_date}()}, diff --git a/man/excel_time_to_numeric.Rd b/man/excel_time_to_numeric.Rd index 59e9b033..ebba75d4 100644 --- a/man/excel_time_to_numeric.Rd +++ b/man/excel_time_to_numeric.Rd @@ -34,11 +34,11 @@ Microsoft Excel to a numeric number of seconds between 0 and 86400. } } \seealso{ -\verb{\link{excel_numeric_to_date}} +\code{\link[=excel_numeric_to_date]{excel_numeric_to_date()}} -Other Date-time cleaning: +Other date-time cleaning: \code{\link{convert_to_date}()}, \code{\link{excel_numeric_to_date}()}, \code{\link{sas_numeric_to_date}()} } -\concept{Date-time cleaning} +\concept{date-time cleaning} From fe2eee3ae0eebf1ad4e0f555ffcaea0276de1fe7 Mon Sep 17 00:00:00 2001 From: olivroy Date: Thu, 7 Dec 2023 09:24:01 -0500 Subject: [PATCH 12/17] Inherit package doc from DESCRIPTION + use `@keywords internal` (as it is the norm in tidyverse packages --- R/janitor.R | 23 +++-------------------- man/janitor-package.Rd | 24 +++++------------------- 2 files changed, 8 insertions(+), 39 deletions(-) diff --git a/R/janitor.R b/R/janitor.R index 1d3adf33..f8b7e0aa 100644 --- a/R/janitor.R +++ b/R/janitor.R @@ -1,24 +1,7 @@ -#' janitor -#' -#' janitor has simple little tools for examining and cleaning dirty data. -#' -#' @section Main functions: -#' The main janitor functions can: perfectly format data.frame -#' column names; provide quick counts of variable combinations (i.e., -#' frequency tables and crosstabs); and explore duplicate records. Other -#' janitor functions nicely format the tabulation results. These -#' tabulate-and-report functions approximate popular features of SPSS and -#' Microsoft Excel. -#' #' @section Package context: -#' This package follows the principles of the "tidyverse" and works -#' well with the pipe function `%>%`. -#' -#' janitor was built with beginning-to-intermediate R users in mind -#' and is optimized for user-friendliness. Advanced users can do most -#' things covered here, but they can do it faster with janitor and save -#' their thinking for more fun tasks. -#' +#' Advanced users can do most things covered here, but they can do it +#' faster with janitor and save their thinking for more fun tasks. +#' @keywords internal "_PACKAGE" ## quiets concerns of R CMD check re: the .'s that appear in pipelines ## and the "n" that is produced by dplyr::count() in a pipeline diff --git a/man/janitor-package.Rd b/man/janitor-package.Rd index 8c6bf262..f397d9b1 100644 --- a/man/janitor-package.Rd +++ b/man/janitor-package.Rd @@ -4,29 +4,14 @@ \name{janitor-package} \alias{janitor} \alias{janitor-package} -\title{janitor} +\title{janitor: Simple Tools for Examining and Cleaning Dirty Data} \description{ -janitor has simple little tools for examining and cleaning dirty data. +The main janitor functions can: perfectly format data.frame column names; provide quick counts of variable combinations (i.e., frequency tables and crosstabs); and explore duplicate records. Other janitor functions nicely format the tabulation results. These tabulate-and-report functions approximate popular features of SPSS and Microsoft Excel. This package follows the principles of the "tidyverse" and works well with the pipe function %>%. janitor was built with beginning-to-intermediate R users in mind and is optimized for user-friendliness. } -\section{Main functions}{ - -The main janitor functions can: perfectly format data.frame -column names; provide quick counts of variable combinations (i.e., -frequency tables and crosstabs); and explore duplicate records. Other -janitor functions nicely format the tabulation results. These -tabulate-and-report functions approximate popular features of SPSS and -Microsoft Excel. -} - \section{Package context}{ -This package follows the principles of the "tidyverse" and works -well with the pipe function \verb{\%>\%}. - -janitor was built with beginning-to-intermediate R users in mind -and is optimized for user-friendliness. Advanced users can do most -things covered here, but they can do it faster with janitor and save -their thinking for more fun tasks. +Advanced users can do most things covered here, but they can do it +faster with janitor and save their thinking for more fun tasks. } \seealso{ @@ -52,3 +37,4 @@ Other contributors: } } +\keyword{internal} From 686579ef285f9708f02669bfa162b5bcf7605c78 Mon Sep 17 00:00:00 2001 From: olivroy Date: Thu, 7 Dec 2023 09:33:26 -0500 Subject: [PATCH 13/17] fix cran note --- NEWS.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index 3e387eb8..5d715ce2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -22,11 +22,11 @@ These are all minor breaking changes resulting from enhancements and are not exp * `get_one_to_one()` no longer errors with near-equal values that become identical factor levels (fix #543, thanks to @olivroy for reporting) -# Refactoring +## Refactoring * Remove dplyr verbs superseded in dplyr 1.0.0 (#547, @olivroy) -* Restyle the package and vignettes according to the [tidyverse style guide](style.tidyverse.org) (#548, olivroy) +* Restyle the package and vignettes according to the [tidyverse style guide](https://style.tidyverse.org) (#548, olivroy) # janitor 2.2.0 (2023-02-02) From c74f662e6471abe1d919d72950a0c123a9b121c6 Mon Sep 17 00:00:00 2001 From: olivroy Date: Thu, 7 Dec 2023 11:41:59 -0500 Subject: [PATCH 14/17] Address comments --- R/adorn_percentages.R | 2 +- R/adorn_title.R | 12 ++++-------- R/adorn_totals.R | 11 ++--------- R/convert_to_date.R | 2 +- R/janitor_deprecated.R | 4 ++-- R/make_clean_names.R | 4 ++-- R/row_to_names.R | 2 +- man/adorn_percentages.Rd | 2 +- man/convert_to_date.Rd | 2 +- man/make_clean_names.Rd | 4 ++-- man/row_to_names.Rd | 2 +- man/use_first_valid_of.Rd | 4 ++-- 12 files changed, 20 insertions(+), 31 deletions(-) diff --git a/R/adorn_percentages.R b/R/adorn_percentages.R index 2fb93952..6536325f 100644 --- a/R/adorn_percentages.R +++ b/R/adorn_percentages.R @@ -9,7 +9,7 @@ #' `data.frame` in the list (designed for 3-way `tabyl` lists). #' @param denominator The direction to use for calculating percentages. #' One of "row", "col", or "all". -#' @param na.rm should missing values (including NaN) be omitted from the calculations? +#' @param na.rm should missing values (including `NaN`) be omitted from the calculations? #' @param ... columns to adorn. This takes a <[`tidy-select`][dplyr::dplyr_tidy_select]> #' specification. By default, all numeric columns (besides the initial column, if numeric) #' are adorned, but this allows you to manually specify which columns should diff --git a/R/adorn_title.R b/R/adorn_title.R index 642a288e..fa5a5e8a 100644 --- a/R/adorn_title.R +++ b/R/adorn_title.R @@ -55,18 +55,14 @@ adorn_title <- function(dat, placement = "top", row_name, col_name) { if (inherits(dat, "tabyl")) { if (attr(dat, "tabyl_type") == "one_way") { - warning(c( - "adorn_title is meant for two-way tabyls, ", - "calling it on a one-way tabyl may not yield a meaningful result" - )) + warning( + "adorn_title is meant for two-way tabyls, calling it on a one-way tabyl may not yield a meaningful result" + ) } } if (missing(col_name)) { if (!inherits(dat, "tabyl")) { - stop(c( - "When input is not a data.frame of class tabyl, ", - "a value must be specified for the col_name argument" - )) + stop("When input is not a data.frame of class tabyl, a value must be specified for the col_name argument.") } col_var <- attr(dat, "var_names")$col } else { diff --git a/R/adorn_totals.R b/R/adorn_totals.R index b51a4020..b4db52f3 100644 --- a/R/adorn_totals.R +++ b/R/adorn_totals.R @@ -60,11 +60,7 @@ adorn_totals <- function(dat, where = "row", fill = "-", na.rm = TRUE, name = "T } if (length(cols_to_total) == 0) { - stop( - "at least one targeted column must be of class numeric. ", - "Control target variables with the ... argument. ", - "adorn_totals should be called before other adorn_ functions." - ) + stop("at least one targeted column must be of class numeric. Control target variables with the ... argument. adorn_totals should be called before other adorn_ functions.") } if (sum(where %in% c("row", "col")) != length(where)) { @@ -153,10 +149,7 @@ adorn_totals <- function(dat, where = "row", fill = "-", na.rm = TRUE, name = "T if (!1 %in% cols_to_total) { # give users the option to total the first column?? Up to them I guess col_totals[1, 1] <- name[1] # replace first column value with name argument } else { - message( - "Because the first column was specified to be totaled, ", - "it does not contain the label 'Total' (or user-specified name) in the totals row" - ) + message("Because the first column was specified to be totaled, it does not contain the label 'Total' (or user-specified name) in the totals row") } dat[(nrow(dat) + 1), ] <- col_totals[1, ] # insert totals_col as last row in dat if (factor_input) { # restore factor/ordered info, #494 diff --git a/R/convert_to_date.R b/R/convert_to_date.R index b96b3f9e..40177867 100644 --- a/R/convert_to_date.R +++ b/R/convert_to_date.R @@ -14,7 +14,7 @@ #' already. Ignored for Date output. #' @param ... Passed to further methods. Eventually may be passed to #' `excel_numeric_to_date()`, `base::as.POSIXct()`, or `base::as.Date()`. -#' @param character_fun A function to convert non-numeric-looking, non-NA values +#' @param character_fun A function to convert non-numeric-looking, non-`NA` values #' in `x` to POSIXct objects. #' @param string_conversion_failure If a character value fails to parse into the #' desired class and instead returns `NA`, should the function return the diff --git a/R/janitor_deprecated.R b/R/janitor_deprecated.R index 90586db6..2be26e7c 100644 --- a/R/janitor_deprecated.R +++ b/R/janitor_deprecated.R @@ -97,7 +97,7 @@ add_totals_col <- function(dat, na.rm = TRUE) { } -#' @title Returns first non-NA value from a set of vectors. +#' @title Returns first non-`NA` value from a set of vectors. #' #' @description #' Warning: Deprecated, do not use in new code. Use [dplyr::coalesce()] instead. @@ -107,7 +107,7 @@ add_totals_col <- function(dat, na.rm = TRUE) { #' It's more readable and handles problems like [ifelse()]'s inability to work with dates in this way. #' #' @param ... the input vectors. Order matters: these are searched and prioritized in the order they are supplied. -#' @param if_all_NA what value should be used when all of the vectors return `NA` for a certain index? Default is NA. +#' @param if_all_NA what value should be used when all of the vectors return `NA` for a certain index? Default is `NA`. #' @return Returns a single vector with the selected values. #' @seealso janitor_deprecated #' @export diff --git a/R/make_clean_names.R b/R/make_clean_names.R index 284f0dcf..472c3b0e 100644 --- a/R/make_clean_names.R +++ b/R/make_clean_names.R @@ -15,10 +15,10 @@ #' #' The order of operations is: make replacements, (optional) ASCII conversion, #' remove initial spaces and punctuation, apply `base::make.names()`, -#' apply `snakecase::to_any_case`, and add numeric suffixes +#' apply `snakecase::to_any_case(()`, and add numeric suffixes #' to resolve any duplicated names. #' -#' This function relies on `snakecase::to_any_case` and can take advantage of +#' This function relies on `snakecase::to_any_case()` and can take advantage of #' its versatility. For instance, an abbreviation like "ID" can have its #' capitalization preserved by passing the argument `abbreviations = "ID"`. #' See the documentation for [snakecase::to_any_case()] diff --git a/R/row_to_names.R b/R/row_to_names.R index f24b180f..0d9c4d0f 100644 --- a/R/row_to_names.R +++ b/R/row_to_names.R @@ -4,7 +4,7 @@ #' @param row_number The row(s) of `dat` containing the variable names or the #' string `"find_header"` to use `find_header(dat=dat, ...)` to find #' the row_number. Allows for multiple rows input as a numeric vector. NA's are -#' ignored, and if a column contains only NA value it will be named `"NA"`. +#' ignored, and if a column contains only `NA` value it will be named `"NA"`. #' @param ... Sent to `find_header()`, if #' `row_number = "find_header"`. Otherwise, ignored. #' @param remove_row Should the row `row_number` be removed from the diff --git a/man/adorn_percentages.Rd b/man/adorn_percentages.Rd index e04d5afc..0d6b8714 100644 --- a/man/adorn_percentages.Rd +++ b/man/adorn_percentages.Rd @@ -14,7 +14,7 @@ If given a list of data.frames, this function will apply itself to each \item{denominator}{The direction to use for calculating percentages. One of "row", "col", or "all".} -\item{na.rm}{should missing values (including NaN) be omitted from the calculations?} +\item{na.rm}{should missing values (including \code{NaN}) be omitted from the calculations?} \item{...}{columns to adorn. This takes a <\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> specification. By default, all numeric columns (besides the initial column, if numeric) diff --git a/man/convert_to_date.Rd b/man/convert_to_date.Rd index e623b01a..441fffd8 100644 --- a/man/convert_to_date.Rd +++ b/man/convert_to_date.Rd @@ -26,7 +26,7 @@ convert_to_datetime( \item{...}{Passed to further methods. Eventually may be passed to \code{excel_numeric_to_date()}, \code{base::as.POSIXct()}, or \code{base::as.Date()}.} -\item{character_fun}{A function to convert non-numeric-looking, non-NA values +\item{character_fun}{A function to convert non-numeric-looking, non-\code{NA} values in \code{x} to POSIXct objects.} \item{string_conversion_failure}{If a character value fails to parse into the diff --git a/man/make_clean_names.Rd b/man/make_clean_names.Rd index 2486da61..d17559fa 100644 --- a/man/make_clean_names.Rd +++ b/man/make_clean_names.Rd @@ -107,10 +107,10 @@ the Spanish character "enye" becomes "n". The order of operations is: make replacements, (optional) ASCII conversion, remove initial spaces and punctuation, apply \code{base::make.names()}, -apply \code{snakecase::to_any_case}, and add numeric suffixes +apply \verb{snakecase::to_any_case(()}, and add numeric suffixes to resolve any duplicated names. -This function relies on \code{snakecase::to_any_case} and can take advantage of +This function relies on \code{snakecase::to_any_case()} and can take advantage of its versatility. For instance, an abbreviation like "ID" can have its capitalization preserved by passing the argument \code{abbreviations = "ID"}. See the documentation for \code{\link[snakecase:to_any_case]{snakecase::to_any_case()}} diff --git a/man/row_to_names.Rd b/man/row_to_names.Rd index 34e4e48a..e1096399 100644 --- a/man/row_to_names.Rd +++ b/man/row_to_names.Rd @@ -19,7 +19,7 @@ row_to_names( \item{row_number}{The row(s) of \code{dat} containing the variable names or the string \code{"find_header"} to use \code{find_header(dat=dat, ...)} to find the row_number. Allows for multiple rows input as a numeric vector. NA's are -ignored, and if a column contains only NA value it will be named \code{"NA"}.} +ignored, and if a column contains only \code{NA} value it will be named \code{"NA"}.} \item{...}{Sent to \code{find_header()}, if \code{row_number = "find_header"}. Otherwise, ignored.} diff --git a/man/use_first_valid_of.Rd b/man/use_first_valid_of.Rd index 901477ee..e735e1dc 100644 --- a/man/use_first_valid_of.Rd +++ b/man/use_first_valid_of.Rd @@ -2,14 +2,14 @@ % Please edit documentation in R/janitor_deprecated.R \name{use_first_valid_of} \alias{use_first_valid_of} -\title{Returns first non-NA value from a set of vectors.} +\title{Returns first non-\code{NA} value from a set of vectors.} \usage{ use_first_valid_of(..., if_all_NA = NA) } \arguments{ \item{...}{the input vectors. Order matters: these are searched and prioritized in the order they are supplied.} -\item{if_all_NA}{what value should be used when all of the vectors return \code{NA} for a certain index? Default is NA.} +\item{if_all_NA}{what value should be used when all of the vectors return \code{NA} for a certain index? Default is \code{NA}.} } \value{ Returns a single vector with the selected values. From ee425db354e6e4214fdcec2953b042deb41bdc11 Mon Sep 17 00:00:00 2001 From: olivroy Date: Thu, 7 Dec 2023 11:46:49 -0500 Subject: [PATCH 15/17] oops --- R/top_levels.R | 2 +- man/top_levels.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/top_levels.R b/R/top_levels.R index 67e98d71..87320c11 100644 --- a/R/top_levels.R +++ b/R/top_levels.R @@ -5,7 +5,7 @@ #' #' @param input_vec The factor variable to tabulate. #' @param n Number of levels to include in top and bottom groups -#' @param show_na Should cases where the variable is NA be shown? +#' @param show_na Should cases where the variable is `NA` be shown? #' @return A `data.frame` (actually a `tbl_df`) with the frequencies of the #' grouped, tabulated variable. Includes counts and percentages, and valid #' percentages (calculated omitting `NA` values, if present in the vector and diff --git a/man/top_levels.Rd b/man/top_levels.Rd index 80d9571b..821c02c9 100644 --- a/man/top_levels.Rd +++ b/man/top_levels.Rd @@ -12,7 +12,7 @@ top_levels(input_vec, n = 2, show_na = FALSE) \item{n}{Number of levels to include in top and bottom groups} -\item{show_na}{Should cases where the variable is NA be shown?} +\item{show_na}{Should cases where the variable is \code{NA} be shown?} } \value{ A \code{data.frame} (actually a \code{tbl_df}) with the frequencies of the From f36c85a2bbde532733cc940cb8b10aad7607c14e Mon Sep 17 00:00:00 2001 From: olivroy Date: Thu, 18 Jan 2024 11:16:36 -0500 Subject: [PATCH 16/17] re-document with roxygen2 7.3.0 --- DESCRIPTION | 2 +- R/compare_df_cols.R | 2 ++ R/convert_to_date.R | 5 +++++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index a5de4660..7607c8ef 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -52,4 +52,4 @@ Config/testthat/edition: 3 Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.0 diff --git a/R/compare_df_cols.R b/R/compare_df_cols.R index c478b91c..5d7c80fc 100644 --- a/R/compare_df_cols.R +++ b/R/compare_df_cols.R @@ -174,6 +174,7 @@ compare_df_cols_df_maker <- function(x, class_colname = "class", strict_descript UseMethod("compare_df_cols_df_maker") } +#' @exportS3Method NULL compare_df_cols_df_maker.data.frame <- function(x, class_colname = "class", strict_description) { if (class_colname == "column_name") { stop('`class_colname` cannot be "column_name"') @@ -193,6 +194,7 @@ compare_df_cols_df_maker.data.frame <- function(x, class_colname = "class", stri ret } +#' @exportS3Method NULL compare_df_cols_df_maker.list <- function(x, class_colname = "class", strict_description = strict_description) { if (length(class_colname) != length(x)) { stop("`x` and `class_colname` must be the same length.") diff --git a/R/convert_to_date.R b/R/convert_to_date.R index 40177867..b239d29e 100644 --- a/R/convert_to_date.R +++ b/R/convert_to_date.R @@ -68,6 +68,7 @@ convert_to_datetime_helper <- function(x, ..., out_class = c("POSIXct", "Date")) UseMethod("convert_to_datetime_helper") } +#' @exportS3Method NULL convert_to_datetime_helper.numeric <- function(x, ..., date_system = "modern", include_time = NULL, @@ -87,10 +88,12 @@ convert_to_datetime_helper.numeric <- function(x, ..., ) } +#' @exportS3Method NULL convert_to_datetime_helper.factor <- function(x, ..., out_class = c("POSIXct", "Date")) { convert_to_datetime_helper.character(as.character(x), ..., out_class = out_class) } +#' @exportS3Method NULL convert_to_datetime_helper.POSIXt <- function(x, ..., out_class = c("POSIXct", "Date")) { out_class <- match.arg(out_class) if (out_class %in% "POSIXct") { @@ -101,6 +104,7 @@ convert_to_datetime_helper.POSIXt <- function(x, ..., out_class = c("POSIXct", " } } +#' @exportS3Method NULL convert_to_datetime_helper.Date <- function(x, ..., tz = "UTC", out_class = c("POSIXct", "Date")) { out_class <- match.arg(out_class) if (out_class %in% "POSIXct") { @@ -113,6 +117,7 @@ convert_to_datetime_helper.Date <- function(x, ..., tz = "UTC", out_class = c("P ret } +#' @exportS3Method NULL convert_to_datetime_helper.character <- function(x, ..., tz = "UTC", character_fun = lubridate::ymd_hms, string_conversion_failure = c("error", "warning"), out_class = c("POSIXct", "Date")) { string_conversion_failure <- match.arg(string_conversion_failure) out_class <- match.arg(out_class) From b1ef486af5746d0293d2f05b666c23538ac101dc Mon Sep 17 00:00:00 2001 From: olivroy Date: Thu, 18 Jan 2024 11:21:22 -0500 Subject: [PATCH 17/17] Add pkgdown function index --- R/janitor_deprecated.R | 1 + _pkgdown.yml | 60 +++++++++++++++++++++++++++++++++++++++ man/janitor_deprecated.Rd | 1 + 3 files changed, 62 insertions(+) diff --git a/R/janitor_deprecated.R b/R/janitor_deprecated.R index 2be26e7c..946a2924 100644 --- a/R/janitor_deprecated.R +++ b/R/janitor_deprecated.R @@ -12,6 +12,7 @@ #' * [remove_empty_cols()] -> [`remove_empty("cols")`][remove_empty()] #' #' @name janitor_deprecated +#' @keywords internal # EXCLUDE COVERAGE START NULL diff --git a/_pkgdown.yml b/_pkgdown.yml index fcbb6757..965f4647 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,3 +1,63 @@ url: https://sfirke.github.io/janitor/ template: bootstrap: 5 + + +reference: +- title: Cleaning data + +- subtitle: Cleaning variable names + contents: + - contains("clean_names") + +- title: Exploring data + desc: > + tabyls are an enhanced version of tables. See `vignette("tabyls")` + for more details. + contents: + - tabyl + - starts_with("adorn") + - contains("tabyl") + - -contains('.test') + +- subtitle: Change order + contents: + - row_to_names + - find_header + +- title: Comparison + desc: > + Compare data frames columns + contents: + - starts_with("compare_df_cols") + +- title: Removing unnecessary columns / rows + contents: + - starts_with("remove_") + - get_dupes + - get_one_to_one + - top_levels + - single_value + +- title: Rounding / dates helpers + desc: > + Help to mimic some behaviour from Excel or SAS. + These should be used on vector. + contents: + - round_half_up + - signif_half_up + - round_to_fraction + - excel_numeric_to_date + - sas_numeric_to_date + - excel_time_to_numeric + - starts_with("convert_to_date") + +- title: Misc / helpers + desc: > + These functions can help perform less frequent operations. + contents: + - describe_class + - paste_skip_na + - chisq.test + - fisher.test + - mu_to_u diff --git a/man/janitor_deprecated.Rd b/man/janitor_deprecated.Rd index 8c5c452c..6b9cb5c2 100644 --- a/man/janitor_deprecated.Rd +++ b/man/janitor_deprecated.Rd @@ -18,3 +18,4 @@ These functions have already become defunct or may be defunct as soon as the nex \item \code{\link[=remove_empty_cols]{remove_empty_cols()}} -> \code{\link[=remove_empty]{remove_empty("cols")}} } } +\keyword{internal}